extractDomNodes.tsx 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. import * as Sentry from '@sentry/react';
  2. import {Replayer} from '@sentry-internal/rrweb';
  3. import first from 'lodash/first';
  4. import type {
  5. BreadcrumbFrame,
  6. RecordingFrame,
  7. SpanFrame,
  8. } from 'sentry/utils/replays/types';
  9. import {EventType} from 'sentry/utils/replays/types';
  10. import requestIdleCallback from 'sentry/utils/window/requestIdleCallback';
  11. export type Extraction = {
  12. frame: BreadcrumbFrame | SpanFrame;
  13. html: string;
  14. timestamp: number;
  15. };
  16. type Args = {
  17. finishedAt: Date | undefined;
  18. frames: (BreadcrumbFrame | SpanFrame)[] | undefined;
  19. rrwebEvents: RecordingFrame[] | undefined;
  20. };
  21. function _extractDomNodes({
  22. frames,
  23. rrwebEvents,
  24. finishedAt,
  25. }: Args): Promise<Extraction[]> {
  26. // Get a list of the BreadcrumbFrames that relate directly to the DOM, for each
  27. // frame we will extract the referenced HTML.
  28. if (!frames || !rrwebEvents || rrwebEvents.length < 2 || !finishedAt) {
  29. return Promise.reject();
  30. }
  31. return new Promise((resolve, reject) => {
  32. const domRoot = document.createElement('div');
  33. domRoot.className = 'sentry-block';
  34. const {style} = domRoot;
  35. style.position = 'fixed';
  36. style.inset = '0';
  37. style.width = '0';
  38. style.height = '0';
  39. style.overflow = 'hidden';
  40. document.body.appendChild(domRoot);
  41. // Grab the last event, but skip the synthetic `replay-end` event that the
  42. // ReplayerReader added. RRWeb will skip that event when it comes time to render
  43. const lastEvent = rrwebEvents[rrwebEvents.length - 2];
  44. const isLastRRWebEvent = (event: RecordingFrame) => lastEvent === event;
  45. const replayerRef = new Replayer(rrwebEvents, {
  46. root: domRoot,
  47. loadTimeout: 1,
  48. showWarning: false,
  49. blockClass: 'sentry-block',
  50. speed: 99999,
  51. skipInactive: true,
  52. triggerFocus: false,
  53. plugins: [
  54. new BreadcrumbReferencesPlugin({
  55. frames,
  56. isFinished: isLastRRWebEvent,
  57. onFinish: rows => {
  58. resolve(rows);
  59. setTimeout(() => {
  60. if (document.body.contains(domRoot)) {
  61. document.body.removeChild(domRoot);
  62. }
  63. }, 0);
  64. },
  65. }),
  66. ],
  67. mouseTail: false,
  68. });
  69. try {
  70. // Run the replay to the end, we will capture data as it streams into the plugin
  71. replayerRef.pause(finishedAt.getTime());
  72. } catch (error) {
  73. Sentry.captureException(error);
  74. reject(error);
  75. }
  76. });
  77. }
  78. export default function extractDomNodes(args: Args): Promise<Extraction[]> {
  79. return new Promise((resolve, reject) => {
  80. requestIdleCallback(
  81. () => {
  82. _extractDomNodes(args).then(resolve).catch(reject);
  83. },
  84. {
  85. timeout: 2500,
  86. }
  87. );
  88. });
  89. }
  90. type PluginOpts = {
  91. frames: (BreadcrumbFrame | SpanFrame)[];
  92. isFinished: (event: RecordingFrame) => boolean;
  93. onFinish: (mutations: Extraction[]) => void;
  94. };
  95. class BreadcrumbReferencesPlugin {
  96. frames: (BreadcrumbFrame | SpanFrame)[];
  97. isFinished: (event: RecordingFrame) => boolean;
  98. onFinish: (mutations: Extraction[]) => void;
  99. nextExtract: null | Extraction['html'] = null;
  100. activities: Extraction[] = [];
  101. constructor({frames, isFinished, onFinish}: PluginOpts) {
  102. this.frames = frames;
  103. this.isFinished = isFinished;
  104. this.onFinish = onFinish;
  105. }
  106. handler(event: RecordingFrame, _isSync: boolean, {replayer}: {replayer: Replayer}) {
  107. if (event.type === EventType.FullSnapshot) {
  108. this.extractNextFrame({replayer});
  109. } else if (event.type === EventType.IncrementalSnapshot) {
  110. this.extractCurrentFrame(event, {replayer});
  111. this.extractNextFrame({replayer});
  112. }
  113. if (this.isFinished(event)) {
  114. this.onFinish(this.activities);
  115. }
  116. }
  117. extractCurrentFrame(event: RecordingFrame, {replayer}: {replayer: Replayer}) {
  118. const frame = first(this.frames);
  119. if (!frame || !frame?.timestampMs || frame.timestampMs > event.timestamp) {
  120. return;
  121. }
  122. const truncated = extractNode(frame, replayer) || this.nextExtract;
  123. if (truncated) {
  124. this.activities.push({
  125. frame,
  126. html: truncated,
  127. timestamp: frame.timestampMs,
  128. });
  129. }
  130. this.nextExtract = null;
  131. this.frames.shift();
  132. }
  133. extractNextFrame({replayer}: {replayer: Replayer}) {
  134. const frame = first(this.frames);
  135. if (!frame || !frame?.timestampMs) {
  136. return;
  137. }
  138. this.nextExtract = extractNode(frame, replayer);
  139. }
  140. }
  141. function extractNode(frame: BreadcrumbFrame | SpanFrame, replayer: Replayer) {
  142. const mirror = replayer.getMirror();
  143. // @ts-expect-error
  144. const nodeId = (frame.data?.nodeId ?? -1) as number;
  145. const node = mirror.getNode(nodeId);
  146. // @ts-expect-error
  147. const html = node?.outerHTML || node?.textContent || '';
  148. // Limit document node depth to 2
  149. let truncated = removeNodesAtLevel(html, 2);
  150. // If still very long and/or removeNodesAtLevel failed, truncate
  151. if (truncated.length > 1500) {
  152. truncated = truncated.substring(0, 1500);
  153. }
  154. return truncated;
  155. }
  156. function removeChildLevel(max: number, collection: HTMLCollection, current: number = 0) {
  157. for (let i = 0; i < collection.length; i++) {
  158. const child = collection[i];
  159. if (child.nodeName === 'STYLE') {
  160. child.textContent = '/* Inline CSS */';
  161. }
  162. if (child.nodeName === 'svg') {
  163. child.innerHTML = '<!-- SVG -->';
  164. }
  165. if (max <= current) {
  166. if (child.childElementCount > 0) {
  167. child.innerHTML = `<!-- ${child.childElementCount} descendents -->`;
  168. }
  169. } else {
  170. removeChildLevel(max, child.children, current + 1);
  171. }
  172. }
  173. }
  174. function removeNodesAtLevel(html: string, level: number) {
  175. const parser = new DOMParser();
  176. try {
  177. const doc = parser.parseFromString(html, 'text/html');
  178. removeChildLevel(level, doc.body.children);
  179. return doc.body.innerHTML;
  180. } catch (err) {
  181. // If we can't parse the HTML, just return the original
  182. return html;
  183. }
  184. }