extractHtml.tsx 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. import type {Mirror} from '@sentry-internal/rrweb-snapshot';
  2. import type {ReplayFrame} from 'sentry/utils/replays/types';
  3. import constructSelector from 'sentry/views/replays/deadRageClick/constructSelector';
  4. export type Extraction = {
  5. frame: ReplayFrame;
  6. html: string[];
  7. selectors: Map<number, string>;
  8. timestamp: number;
  9. };
  10. export default function extractHtmlAndSelector(
  11. nodeIds: number[],
  12. mirror: Mirror
  13. ): {html: string[]; selectors: Map<number, string>} {
  14. const htmlStrings: string[] = [];
  15. const selectors = new Map<number, string>();
  16. for (const nodeId of nodeIds) {
  17. const node = mirror.getNode(nodeId);
  18. if (node) {
  19. const html = extractHtml(node);
  20. if (html) {
  21. htmlStrings.push(html);
  22. }
  23. const selector = extractSelector(node);
  24. if (selector) {
  25. selectors.set(nodeId, selector);
  26. }
  27. }
  28. }
  29. return {html: htmlStrings, selectors};
  30. }
  31. function extractHtml(node: Node): string | null {
  32. const html =
  33. ('outerHTML' in node ? (node.outerHTML as string) : node.textContent) || '';
  34. // Limit document node depth to 2
  35. let truncated = removeNodesAtLevel(html, 2);
  36. // If still very long and/or removeNodesAtLevel failed, truncate
  37. if (truncated.length > 1500) {
  38. truncated = truncated.substring(0, 1500);
  39. }
  40. if (truncated) {
  41. return truncated;
  42. }
  43. return null;
  44. }
  45. function extractSelector(node: Node): string | null {
  46. const element = node.nodeType === Node.ELEMENT_NODE ? (node as HTMLElement) : null;
  47. if (element) {
  48. return constructSelector({
  49. alt: element.attributes.getNamedItem('alt')?.nodeValue ?? '',
  50. aria_label: element.attributes.getNamedItem('aria-label')?.nodeValue ?? '',
  51. class: element.attributes.getNamedItem('class')?.nodeValue?.split(' ') ?? [],
  52. component_name:
  53. element.attributes.getNamedItem('data-sentry-component')?.nodeValue ?? '',
  54. id: element.id,
  55. role: element.attributes.getNamedItem('role')?.nodeValue ?? '',
  56. tag: element.tagName.toLowerCase(),
  57. testid: element.attributes.getNamedItem('data-test-id')?.nodeValue ?? '',
  58. title: element.attributes.getNamedItem('title')?.nodeValue ?? '',
  59. }).selector;
  60. }
  61. return null;
  62. }
  63. function removeChildLevel(max: number, collection: HTMLCollection, current: number = 0) {
  64. for (let i = 0; i < collection.length; i++) {
  65. const child = collection[i];
  66. if (child.nodeName === 'STYLE') {
  67. child.textContent = '/* Inline CSS */';
  68. }
  69. if (child.nodeName === 'svg') {
  70. child.innerHTML = '<!-- SVG -->';
  71. }
  72. if (max <= current) {
  73. if (child.childElementCount > 0) {
  74. child.innerHTML = `<!-- ${child.childElementCount} descendents -->`;
  75. }
  76. } else {
  77. removeChildLevel(max, child.children, current + 1);
  78. }
  79. }
  80. }
  81. function removeNodesAtLevel(html: string, level: number): string {
  82. const parser = new DOMParser();
  83. try {
  84. const doc = parser.parseFromString(html, 'text/html');
  85. removeChildLevel(level, doc.body.children);
  86. return doc.body.innerHTML;
  87. } catch (err) {
  88. // If we can't parse the HTML, just return the original
  89. return html;
  90. }
  91. }