Browse Source

ref(replay): refactor replayerStepper calls to be memoized (#74606)

- closes https://github.com/getsentry/team-replay/issues/450
- this is the branch off from
https://github.com/getsentry/sentry/pull/74540, the experimental
refactor of `replayerStepper`
- TLDR: memoize the `replayerStepper` calls so that we remember the
results and don't have to reload the data every time.

**current state of things**:
we have distinct `replayerStepper` calls in the breadcrumbs + memory
tabs. in total, we could be calling it 3 times because it's used by
(1) the breadcrumbs tab to render the HTML snippet, 
(2) for hydration error text diffs,
(3) and the DOM nodes chart.

**the plan**:
improve things by calling `replayerStepper` once (or at least less) and
memoize the results. side note: it was hard to combine the hydration
error text diff `replayerStepper` call with the others due to the types,
so we had to keep that call separate.

**the question**:
is it faster to run one `replayerStepper` instance, and collect all the
data at one time (iterate over the frames exactly once)? this means the
speed of any tab will always be limited by the slower call, since we're
going through ALL the data at once, even some that we might not need
yet. like this:

https://github.com/getsentry/sentry/blob/70c90544f39dcc3d87dea8c85c0a7669c9585403/static/app/utils/replays/replayReader.tsx#L363-L366

or is it better to continue to have 2 stepper instances that iterate
over 2 different sets of frames (breadcrumbs loops over hundreds of
frames, and DOM node count iterates over thousands, which means the
speeds could be drastically different). in this situation, each tab
would handle their own array of necessary frames.

**the experiments**:
the first PR (https://github.com/getsentry/sentry/pull/74540) tried to
explore a refactor where `replayerStepper` is called once for the two
DOM node actions (counting, for the memory chart, and extracting, for
the breadcrumbs HTML). changes were made to the stepper so it could
accept a list of callbacks, and return all types of data in one shot, so
we only need one hidden `replayer` instance on the page for the DOM node
functions.

unscientifically, it seemed like loading breadcrumbs + memory took the
same amount of time as loading the memory chart. (see video below -- as
soon as the breadcrumbs tab is done loading, the memory tab is already
done).


https://github.com/user-attachments/assets/2c882bcb-c8df-409c-8e2a-69d75c279735

this makes sense because we’re iterating over thousands of frames for
the DOM nodes chart, so that’s the bottleneck. this means the
breadcrumbs tab loads slower.

**this PR**:
compared to that is the approach where we have 1 stepper instance for
breadcrumbs, and one for DOM node count, each iterating over their own
lists (which is what i'm doing in this PR). this approach showed
breadcrumb data on the screen about 2x as fast as the approach above.
therefore 2 stepper instances on the screen is better for users,
especially since breadcrumbs tab is more popular than the memory tab.

the video below demonstrates the memoization in action. once the
breadcrumbs or memory tabs are loaded, switching back to them does not
cause re-loading, because the results are cached. notice that the
breadcrumbs tab loading is not dependent on the loading of the DOM tab,
unlike the video above where the breadcrumbs tab has to "wait" for the
memory tab. (i'm also on slower wifi in this clip below)


https://github.com/user-attachments/assets/be71add1-63b5-4308-9d26-356d544980ef
Michelle Zhang 7 months ago
parent
commit
11e8424a4a

+ 1 - 1
static/app/components/replays/breadcrumbs/breadcrumbItem.tsx

@@ -18,7 +18,7 @@ import {useHasNewTimelineUI} from 'sentry/components/timeline/utils';
 import {Tooltip} from 'sentry/components/tooltip';
 import {t} from 'sentry/locale';
 import {space} from 'sentry/styles/space';
-import type {Extraction} from 'sentry/utils/replays/extractDomNodes';
+import type {Extraction} from 'sentry/utils/replays/extractHtml';
 import {getReplayDiffOffsetsFromFrame} from 'sentry/utils/replays/getDiffTimestamps';
 import getFrameDetails from 'sentry/utils/replays/getFrameDetails';
 import type ReplayReader from 'sentry/utils/replays/replayReader';

+ 3 - 3
static/app/components/replays/diff/replayTextDiff.tsx

@@ -7,7 +7,7 @@ import {CopyToClipboardButton} from 'sentry/components/copyToClipboardButton';
 import SplitDiff from 'sentry/components/splitDiff';
 import {t} from 'sentry/locale';
 import {space} from 'sentry/styles/space';
-import useExtractedPageHtml from 'sentry/utils/replays/hooks/useExtractedPageHtml';
+import useExtractPageHtml from 'sentry/utils/replays/hooks/useExtractPageHtml';
 import type ReplayReader from 'sentry/utils/replays/replayReader';
 
 interface Props {
@@ -16,8 +16,8 @@ interface Props {
   rightOffsetMs: number;
 }
 
-export function ReplayTextDiff({leftOffsetMs, replay, rightOffsetMs}: Props) {
-  const {data} = useExtractedPageHtml({
+export function ReplayTextDiff({replay, leftOffsetMs, rightOffsetMs}: Props) {
+  const {data} = useExtractPageHtml({
     replay,
     offsetMsToStopAt: [leftOffsetMs, rightOffsetMs],
   });

+ 0 - 54
static/app/utils/replays/countDomNodes.tsx

@@ -1,54 +0,0 @@
-import replayerStepper from 'sentry/utils/replays/replayerStepper';
-import type {RecordingFrame} from 'sentry/utils/replays/types';
-
-export type DomNodeChartDatapoint = {
-  added: number;
-  count: number;
-  endTimestampMs: number;
-  removed: number;
-  startTimestampMs: number;
-  timestampMs: number;
-};
-
-type Args = {
-  frames: RecordingFrame[] | undefined;
-  rrwebEvents: RecordingFrame[] | undefined;
-  startTimestampMs: number;
-};
-
-export default function countDomNodes({
-  frames,
-  rrwebEvents,
-  startTimestampMs,
-}: Args): Promise<Map<RecordingFrame, DomNodeChartDatapoint>> {
-  let frameCount = 0;
-  const length = frames?.length ?? 0;
-  const frameStep = Math.max(Math.round(length * 0.007), 1);
-
-  let prevIds: number[] = [];
-
-  return replayerStepper<RecordingFrame, DomNodeChartDatapoint>({
-    frames,
-    rrwebEvents,
-    startTimestampMs,
-    shouldVisitFrame: () => {
-      frameCount++;
-      return frameCount % frameStep === 0;
-    },
-    onVisitFrame: (frame, collection, replayer) => {
-      const ids = replayer.getMirror().getIds(); // gets list of DOM nodes present
-      const count = ids.length;
-      const added = ids.filter(id => !prevIds.includes(id)).length;
-      const removed = prevIds.filter(id => !ids.includes(id)).length;
-      collection.set(frame as RecordingFrame, {
-        count,
-        added,
-        removed,
-        timestampMs: frame.timestamp,
-        startTimestampMs: frame.timestamp,
-        endTimestampMs: frame.timestamp,
-      });
-      prevIds = ids;
-    },
-  });
-}

+ 2 - 50
static/app/utils/replays/extractDomNodes.tsx → static/app/utils/replays/extractHtml.tsx

@@ -1,11 +1,6 @@
 import type {Mirror} from '@sentry-internal/rrweb-snapshot';
 
-import replayerStepper from 'sentry/utils/replays/replayerStepper';
-import {
-  getNodeId,
-  type RecordingFrame,
-  type ReplayFrame,
-} from 'sentry/utils/replays/types';
+import type {ReplayFrame} from 'sentry/utils/replays/types';
 
 export type Extraction = {
   frame: ReplayFrame;
@@ -13,50 +8,7 @@ export type Extraction = {
   timestamp: number;
 };
 
-type Args = {
-  /**
-   * Frames where we should stop and extract html for a given dom node
-   */
-  frames: ReplayFrame[] | undefined;
-
-  /**
-   * The rrweb events that constitute the replay
-   */
-  rrwebEvents: RecordingFrame[] | undefined;
-
-  /**
-   * The replay start time, in ms
-   */
-  startTimestampMs: number;
-};
-
-export default function extractDomNodes({
-  frames,
-  rrwebEvents,
-  startTimestampMs,
-}: Args): Promise<Map<ReplayFrame, Extraction>> {
-  return replayerStepper({
-    frames,
-    rrwebEvents,
-    startTimestampMs,
-    shouldVisitFrame: frame => {
-      const nodeId = getNodeId(frame);
-      return nodeId !== undefined && nodeId !== -1;
-    },
-    onVisitFrame: (frame, collection, replayer) => {
-      const mirror = replayer.getMirror();
-      const nodeId = getNodeId(frame);
-      const html = extractHtml(nodeId as number, mirror);
-      collection.set(frame as ReplayFrame, {
-        frame,
-        html,
-        timestamp: frame.timestampMs,
-      });
-    },
-  });
-}
-
-function extractHtml(nodeId: number, mirror: Mirror): string | null {
+export default function extractHtml(nodeId: number, mirror: Mirror): string | null {
   const node = mirror.getNode(nodeId);
 
   const html =

+ 26 - 0
static/app/utils/replays/hooks/useCountDomNodes.tsx

@@ -0,0 +1,26 @@
+import {useQuery, type UseQueryResult} from 'sentry/utils/queryClient';
+import type ReplayReader from 'sentry/utils/replays/replayReader';
+import type {RecordingFrame} from 'sentry/utils/replays/types';
+
+export type DomNodeChartDatapoint = {
+  added: number;
+  count: number;
+  endTimestampMs: number;
+  removed: number;
+  startTimestampMs: number;
+  timestampMs: number;
+};
+
+export default function useCountDomNodes({
+  replay,
+}: {
+  replay: null | ReplayReader;
+}): UseQueryResult<Map<RecordingFrame, DomNodeChartDatapoint>> {
+  return useQuery(
+    ['countDomNodes', replay],
+    () => {
+      return replay?.getCountDomNodes();
+    },
+    {enabled: Boolean(replay), cacheTime: Infinity}
+  );
+}

+ 18 - 0
static/app/utils/replays/hooks/useExtractDomNodes.tsx

@@ -0,0 +1,18 @@
+import {useQuery, type UseQueryResult} from 'sentry/utils/queryClient';
+import type {Extraction} from 'sentry/utils/replays/extractHtml';
+import type ReplayReader from 'sentry/utils/replays/replayReader';
+import type {ReplayFrame} from 'sentry/utils/replays/types';
+
+export default function useExtractDomNodes({
+  replay,
+}: {
+  replay: null | ReplayReader;
+}): UseQueryResult<Map<ReplayFrame, Extraction>> {
+  return useQuery(
+    ['getDomNodes', replay],
+    () => {
+      return replay?.getExtractDomNodes();
+    },
+    {enabled: Boolean(replay), cacheTime: Infinity}
+  );
+}

+ 23 - 3
static/app/utils/replays/extractPageHtml.tsx → static/app/utils/replays/hooks/useExtractPageHtml.tsx

@@ -1,4 +1,6 @@
+import {useQuery} from 'sentry/utils/queryClient';
 import replayerStepper from 'sentry/utils/replays/replayerStepper';
+import type ReplayReader from 'sentry/utils/replays/replayReader';
 import type {RecordingFrame, ReplayFrame} from 'sentry/utils/replays/types';
 
 type Args = {
@@ -18,7 +20,7 @@ type Args = {
   startTimestampMs: number;
 };
 
-export default async function extactPageHtml({
+async function extractPageHtml({
   offsetMsToStopAt,
   rrwebEvents,
   startTimestampMs,
@@ -32,11 +34,11 @@ export default async function extactPageHtml({
     frames,
     rrwebEvents,
     startTimestampMs,
-    shouldVisitFrame(_frame) {
+    shouldVisitFrame: () => {
       // Visit all the timestamps (converted to frames) that were passed in above
       return true;
     },
-    onVisitFrame(frame, collection, replayer) {
+    onVisitFrame: (frame, collection, replayer) => {
       const doc = replayer.getMirror().getNode(1);
       const html = (doc as Document)?.body.outerHTML ?? '';
       collection.set(frame, html);
@@ -46,3 +48,21 @@ export default async function extactPageHtml({
     return [frame.offsetMs, html];
   });
 }
+
+interface Props {
+  offsetMsToStopAt: number[];
+  replay: ReplayReader | null;
+}
+
+export default function useExtractPageHtml({replay, offsetMsToStopAt}: Props) {
+  return useQuery(
+    ['extactPageHtml', replay, offsetMsToStopAt],
+    () =>
+      extractPageHtml({
+        offsetMsToStopAt,
+        rrwebEvents: replay?.getRRWebFrames(),
+        startTimestampMs: replay?.getReplay().started_at.getTime() ?? 0,
+      }),
+    {enabled: Boolean(replay), cacheTime: Infinity}
+  );
+}

+ 0 - 16
static/app/utils/replays/hooks/useExtractedDomNodes.tsx

@@ -1,16 +0,0 @@
-import {useQuery} from 'sentry/utils/queryClient';
-import extractDomNodes from 'sentry/utils/replays/extractDomNodes';
-import type ReplayReader from 'sentry/utils/replays/replayReader';
-
-export default function useExtractedDomNodes({replay}: {replay: null | ReplayReader}) {
-  return useQuery(
-    ['getDomNodes', replay],
-    () =>
-      extractDomNodes({
-        frames: replay?.getDOMFrames(),
-        rrwebEvents: replay?.getRRWebFrames(),
-        startTimestampMs: replay?.getReplay().started_at.getTime() ?? 0,
-      }),
-    {enabled: Boolean(replay), cacheTime: Infinity}
-  );
-}

+ 0 - 21
static/app/utils/replays/hooks/useExtractedPageHtml.tsx

@@ -1,21 +0,0 @@
-import {useQuery} from 'sentry/utils/queryClient';
-import extractPageHtml from 'sentry/utils/replays/extractPageHtml';
-import type ReplayReader from 'sentry/utils/replays/replayReader';
-
-interface Props {
-  offsetMsToStopAt: number[];
-  replay: ReplayReader | null;
-}
-
-export default function useExtractedPageHtml({replay, offsetMsToStopAt}: Props) {
-  return useQuery(
-    ['extactPageHtml', replay, offsetMsToStopAt],
-    () =>
-      extractPageHtml({
-        offsetMsToStopAt,
-        rrwebEvents: replay?.getRRWebFrames(),
-        startTimestampMs: replay?.getReplay().started_at.getTime() ?? 0,
-      }),
-    {enabled: Boolean(replay), cacheTime: Infinity}
-  );
-}

+ 80 - 0
static/app/utils/replays/replayReader.tsx

@@ -1,4 +1,5 @@
 import * as Sentry from '@sentry/react';
+import type {eventWithTime} from '@sentry-internal/rrweb';
 import memoize from 'lodash/memoize';
 import {type Duration, duration} from 'moment-timezone';
 
@@ -6,6 +7,7 @@ import {defined} from 'sentry/utils';
 import domId from 'sentry/utils/domId';
 import localStorageWrapper from 'sentry/utils/localStorage';
 import clamp from 'sentry/utils/number/clamp';
+import extractHtml from 'sentry/utils/replays/extractHtml';
 import hydrateBreadcrumbs, {
   replayInitBreadcrumb,
 } from 'sentry/utils/replays/hydrateBreadcrumbs';
@@ -17,6 +19,7 @@ import {
 } from 'sentry/utils/replays/hydrateRRWebRecordingFrames';
 import hydrateSpans from 'sentry/utils/replays/hydrateSpans';
 import {replayTimestamps} from 'sentry/utils/replays/replayDataUtils';
+import replayerStepper from 'sentry/utils/replays/replayerStepper';
 import type {
   BreadcrumbFrame,
   ClipWindow,
@@ -26,6 +29,7 @@ import type {
   MemoryFrame,
   OptionFrame,
   RecordingFrame,
+  ReplayFrame,
   serializedNodeWithId,
   SlowClickFrame,
   SpanFrame,
@@ -34,6 +38,7 @@ import type {
 import {
   BreadcrumbCategories,
   EventType,
+  getNodeId,
   IncrementalSource,
   isDeadClick,
   isDeadRageClick,
@@ -137,6 +142,53 @@ function removeDuplicateNavCrumbs(
   return otherBreadcrumbFrames.concat(uniqueNavCrumbs);
 }
 
+const extractDomNodes = {
+  shouldVisitFrame: frame => {
+    const nodeId = getNodeId(frame);
+    return nodeId !== undefined && nodeId !== -1;
+  },
+  onVisitFrame: (frame, collection, replayer) => {
+    const mirror = replayer.getMirror();
+    const nodeId = getNodeId(frame);
+    const html = extractHtml(nodeId as number, mirror);
+    collection.set(frame as ReplayFrame, {
+      frame,
+      html,
+      timestamp: frame.timestampMs,
+    });
+  },
+};
+
+const countDomNodes = function (frames: eventWithTime[]) {
+  let frameCount = 0;
+  const length = frames?.length ?? 0;
+  const frameStep = Math.max(Math.round(length * 0.007), 1);
+
+  let prevIds: number[] = [];
+
+  return {
+    shouldVisitFrame() {
+      frameCount++;
+      return frameCount % frameStep === 0;
+    },
+    onVisitFrame(frame, collection, replayer) {
+      const ids = replayer.getMirror().getIds(); // gets list of DOM nodes present
+      const count = ids.length;
+      const added = ids.filter(id => !prevIds.includes(id)).length;
+      const removed = prevIds.filter(id => !ids.includes(id)).length;
+      collection.set(frame as RecordingFrame, {
+        count,
+        added,
+        removed,
+        timestampMs: frame.timestamp,
+        startTimestampMs: frame.timestamp,
+        endTimestampMs: frame.timestamp,
+      });
+      prevIds = ids;
+    },
+  };
+};
+
 export default class ReplayReader {
   static factory({
     attachments,
@@ -412,6 +464,34 @@ export default class ReplayReader {
     return this.processingErrors().length;
   };
 
+  getCountDomNodes = memoize(async () => {
+    const {onVisitFrame, shouldVisitFrame} = countDomNodes(this.getRRWebMutations());
+
+    const results = await replayerStepper({
+      frames: this.getRRWebMutations(),
+      rrwebEvents: this.getRRWebFrames(),
+      startTimestampMs: this.getReplay().started_at.getTime() ?? 0,
+      onVisitFrame,
+      shouldVisitFrame,
+    });
+
+    return results;
+  });
+
+  getExtractDomNodes = memoize(async () => {
+    const {onVisitFrame, shouldVisitFrame} = extractDomNodes;
+
+    const results = await replayerStepper({
+      frames: this.getDOMFrames(),
+      rrwebEvents: this.getRRWebFrames(),
+      startTimestampMs: this.getReplay().started_at.getTime() ?? 0,
+      onVisitFrame,
+      shouldVisitFrame,
+    });
+
+    return results;
+  });
+
   getClipWindow = () => this._clipWindow;
 
   /**

Some files were not shown because too many files changed in this diff