extractDomNodes.tsx 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. import * as Sentry from '@sentry/react';
  2. import type {eventWithTime} from '@sentry-internal/rrweb';
  3. import {EventType, Replayer} from '@sentry-internal/rrweb';
  4. import first from 'lodash/first';
  5. import type {Crumb} from 'sentry/types/breadcrumbs';
  6. import requestIdleCallback from 'sentry/utils/window/requestIdleCallback';
  7. export type Extraction = {
  8. crumb: Crumb;
  9. html: string;
  10. timestamp: number;
  11. };
  12. type Args = {
  13. crumbs: Crumb[] | undefined;
  14. finishedAt: Date | undefined;
  15. rrwebEvents: eventWithTime[] | undefined;
  16. };
  17. function _extractDomNodes({
  18. crumbs,
  19. rrwebEvents,
  20. finishedAt,
  21. }: Args): Promise<Extraction[]> {
  22. // Get a list of the breadcrumbs that relate directly to the DOM, for each
  23. // crumb we will extract the referenced HTML.
  24. if (!crumbs || !rrwebEvents || rrwebEvents.length < 2 || !finishedAt) {
  25. return Promise.reject();
  26. }
  27. return new Promise((resolve, reject) => {
  28. const domRoot = document.createElement('div');
  29. domRoot.className = 'sentry-block';
  30. const {style} = domRoot;
  31. style.position = 'fixed';
  32. style.inset = '0';
  33. style.width = '0';
  34. style.height = '0';
  35. style.overflow = 'hidden';
  36. document.body.appendChild(domRoot);
  37. // Grab the last event, but skip the synthetic `replay-end` event that the
  38. // ReplayerReader added. RRWeb will skip that event when it comes time to render
  39. const lastEvent = rrwebEvents[rrwebEvents.length - 2];
  40. const isLastRRWebEvent = (event: eventWithTime) => lastEvent === event;
  41. const replayerRef = new Replayer(rrwebEvents, {
  42. root: domRoot,
  43. loadTimeout: 1,
  44. showWarning: false,
  45. blockClass: 'sentry-block',
  46. speed: 99999,
  47. skipInactive: true,
  48. triggerFocus: false,
  49. plugins: [
  50. new BreadcrumbReferencesPlugin({
  51. crumbs,
  52. isFinished: isLastRRWebEvent,
  53. onFinish: rows => {
  54. resolve(rows);
  55. setTimeout(() => {
  56. if (document.body.contains(domRoot)) {
  57. document.body.removeChild(domRoot);
  58. }
  59. }, 0);
  60. },
  61. }),
  62. ],
  63. mouseTail: false,
  64. });
  65. try {
  66. // Run the replay to the end, we will capture data as it streams into the plugin
  67. replayerRef.pause(finishedAt.getTime());
  68. } catch (error) {
  69. Sentry.captureException(error);
  70. reject(error);
  71. }
  72. });
  73. }
  74. export default function extractDomNodes(args: Args): Promise<Extraction[]> {
  75. return new Promise((resolve, reject) => {
  76. requestIdleCallback(
  77. () => {
  78. _extractDomNodes(args).then(resolve).catch(reject);
  79. },
  80. {
  81. timeout: 2500,
  82. }
  83. );
  84. });
  85. }
  86. type PluginOpts = {
  87. crumbs: Crumb[];
  88. isFinished: (event: eventWithTime) => boolean;
  89. onFinish: (mutations: Extraction[]) => void;
  90. };
  91. class BreadcrumbReferencesPlugin {
  92. crumbs: Crumb[];
  93. isFinished: (event: eventWithTime) => boolean;
  94. onFinish: (mutations: Extraction[]) => void;
  95. nextExtract: null | Extraction['html'] = null;
  96. activities: Extraction[] = [];
  97. constructor({crumbs, isFinished, onFinish}: PluginOpts) {
  98. this.crumbs = crumbs;
  99. this.isFinished = isFinished;
  100. this.onFinish = onFinish;
  101. }
  102. handler(event: eventWithTime, _isSync: boolean, {replayer}: {replayer: Replayer}) {
  103. if (event.type === EventType.FullSnapshot) {
  104. this.extractNextCrumb({replayer});
  105. } else if (event.type === EventType.IncrementalSnapshot) {
  106. this.extractCurrentCrumb(event, {replayer});
  107. this.extractNextCrumb({replayer});
  108. }
  109. if (this.isFinished(event)) {
  110. this.onFinish(this.activities);
  111. }
  112. }
  113. extractCurrentCrumb(event: eventWithTime, {replayer}: {replayer: Replayer}) {
  114. const crumb = first(this.crumbs);
  115. const crumbTimestamp = +new Date(crumb?.timestamp || '');
  116. if (!crumb || !crumbTimestamp || crumbTimestamp > event.timestamp) {
  117. return;
  118. }
  119. const truncated = extractNode(crumb, replayer) || this.nextExtract;
  120. if (truncated) {
  121. this.activities.push({
  122. crumb,
  123. html: truncated,
  124. timestamp: crumbTimestamp,
  125. });
  126. }
  127. this.nextExtract = null;
  128. this.crumbs.shift();
  129. }
  130. extractNextCrumb({replayer}: {replayer: Replayer}) {
  131. const crumb = first(this.crumbs);
  132. const crumbTimestamp = +new Date(crumb?.timestamp || '');
  133. if (!crumb || !crumbTimestamp) {
  134. return;
  135. }
  136. this.nextExtract = extractNode(crumb, replayer);
  137. }
  138. }
  139. function extractNode(crumb: Crumb, replayer: Replayer) {
  140. const mirror = replayer.getMirror();
  141. // @ts-expect-error
  142. const nodeId = crumb.data?.nodeId || '';
  143. const node = mirror.getNode(nodeId);
  144. // @ts-expect-error
  145. const html = node?.outerHTML || node?.textContent || '';
  146. // Limit document node depth to 2
  147. let truncated = removeNodesAtLevel(html, 2);
  148. // If still very long and/or removeNodesAtLevel failed, truncate
  149. if (truncated.length > 1500) {
  150. truncated = truncated.substring(0, 1500);
  151. }
  152. return truncated;
  153. }
  154. function removeNodesAtLevel(html: string, level: number) {
  155. const parser = new DOMParser();
  156. try {
  157. const doc = parser.parseFromString(html, 'text/html');
  158. const removeChildLevel = (
  159. max: number,
  160. collection: HTMLCollection,
  161. current: number = 0
  162. ) => {
  163. for (let i = 0; i < collection.length; i++) {
  164. const child = collection[i];
  165. if (child.nodeName === 'STYLE') {
  166. child.textContent = '/* Inline CSS */';
  167. }
  168. if (child.nodeName === 'svg') {
  169. child.innerHTML = '<!-- SVG -->';
  170. }
  171. if (max <= current) {
  172. if (child.childElementCount > 0) {
  173. child.innerHTML = `<!-- ${child.childElementCount} descendents -->`;
  174. }
  175. } else {
  176. removeChildLevel(max, child.children, current + 1);
  177. }
  178. }
  179. };
  180. removeChildLevel(level, doc.body.children);
  181. return doc.body.innerHTML;
  182. } catch (err) {
  183. // If we can't parse the HTML, just return the original
  184. return html;
  185. }
  186. }