extractHtml.tsx 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import type {Mirror} from '@sentry-internal/rrweb-snapshot';
  2. import type {ReplayFrame} from 'sentry/utils/replays/types';
  3. export type Extraction = {
  4. frame: ReplayFrame;
  5. html: string | null;
  6. timestamp: number;
  7. };
  8. export default function extractHtml(nodeId: number, mirror: Mirror): string | null {
  9. const node = mirror.getNode(nodeId);
  10. const html =
  11. (node && 'outerHTML' in node ? (node.outerHTML as string) : node?.textContent) || '';
  12. // Limit document node depth to 2
  13. let truncated = removeNodesAtLevel(html, 2);
  14. // If still very long and/or removeNodesAtLevel failed, truncate
  15. if (truncated.length > 1500) {
  16. truncated = truncated.substring(0, 1500);
  17. }
  18. return truncated ? truncated : null;
  19. }
  20. function removeChildLevel(max: number, collection: HTMLCollection, current: number = 0) {
  21. for (let i = 0; i < collection.length; i++) {
  22. const child = collection[i];
  23. if (child.nodeName === 'STYLE') {
  24. child.textContent = '/* Inline CSS */';
  25. }
  26. if (child.nodeName === 'svg') {
  27. child.innerHTML = '<!-- SVG -->';
  28. }
  29. if (max <= current) {
  30. if (child.childElementCount > 0) {
  31. child.innerHTML = `<!-- ${child.childElementCount} descendents -->`;
  32. }
  33. } else {
  34. removeChildLevel(max, child.children, current + 1);
  35. }
  36. }
  37. }
  38. function removeNodesAtLevel(html: string, level: number): string {
  39. const parser = new DOMParser();
  40. try {
  41. const doc = parser.parseFromString(html, 'text/html');
  42. removeChildLevel(level, doc.body.children);
  43. return doc.body.innerHTML;
  44. } catch (err) {
  45. // If we can't parse the HTML, just return the original
  46. return html;
  47. }
  48. }