extractDomNodes.tsx 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. import type {Mirror} from '@sentry-internal/rrweb-snapshot';
  2. import replayerStepper from 'sentry/utils/replays/replayerStepper';
  3. import {
  4. getNodeId,
  5. type RecordingFrame,
  6. type ReplayFrame,
  7. } from 'sentry/utils/replays/types';
  8. export type Extraction = {
  9. frame: ReplayFrame;
  10. html: string | null;
  11. timestamp: number;
  12. };
  13. type Args = {
  14. frames: ReplayFrame[] | undefined;
  15. rrwebEvents: RecordingFrame[] | undefined;
  16. startTimestampMs: number;
  17. };
  18. export default function extractDomNodes({
  19. frames,
  20. rrwebEvents,
  21. startTimestampMs,
  22. }: Args): Promise<Map<ReplayFrame, Extraction>> {
  23. return replayerStepper({
  24. frames,
  25. rrwebEvents,
  26. startTimestampMs,
  27. shouldVisitFrame: frame => {
  28. const nodeId = getNodeId(frame);
  29. return nodeId !== undefined && nodeId !== -1;
  30. },
  31. onVisitFrame: (frame, collection, replayer) => {
  32. const mirror = replayer.getMirror();
  33. const nodeId = getNodeId(frame);
  34. const html = extractHtml(nodeId as number, mirror);
  35. collection.set(frame as ReplayFrame, {
  36. frame,
  37. html,
  38. timestamp: frame.timestampMs,
  39. });
  40. },
  41. });
  42. }
  43. function extractHtml(nodeId: number, mirror: Mirror): string | null {
  44. const node = mirror.getNode(nodeId);
  45. const html =
  46. (node && 'outerHTML' in node ? (node.outerHTML as string) : node?.textContent) || '';
  47. // Limit document node depth to 2
  48. let truncated = removeNodesAtLevel(html, 2);
  49. // If still very long and/or removeNodesAtLevel failed, truncate
  50. if (truncated.length > 1500) {
  51. truncated = truncated.substring(0, 1500);
  52. }
  53. return truncated ? truncated : null;
  54. }
  55. function removeChildLevel(max: number, collection: HTMLCollection, current: number = 0) {
  56. for (let i = 0; i < collection.length; i++) {
  57. const child = collection[i];
  58. if (child.nodeName === 'STYLE') {
  59. child.textContent = '/* Inline CSS */';
  60. }
  61. if (child.nodeName === 'svg') {
  62. child.innerHTML = '<!-- SVG -->';
  63. }
  64. if (max <= current) {
  65. if (child.childElementCount > 0) {
  66. child.innerHTML = `<!-- ${child.childElementCount} descendents -->`;
  67. }
  68. } else {
  69. removeChildLevel(max, child.children, current + 1);
  70. }
  71. }
  72. }
  73. function removeNodesAtLevel(html: string, level: number): string {
  74. const parser = new DOMParser();
  75. try {
  76. const doc = parser.parseFromString(html, 'text/html');
  77. removeChildLevel(level, doc.body.children);
  78. return doc.body.innerHTML;
  79. } catch (err) {
  80. // If we can't parse the HTML, just return the original
  81. return html;
  82. }
  83. }