Browse Source

ref(replay): try 5s rage/dead click timeout for internal orgs (#79163)

Reopened from https://github.com/getsentry/sentry/pull/77325.

Follows up on a slack thread from a while ago with @drguthals and
@bruno-garcia. Once this is merged, we can specify a list of org IDs
(e.g. sentry orgs) and any timeout we want.

Note we still need a way to pass the org_id from
[recording_buffered.py](https://github.com/getsentry/sentry/blob/06884c96f61f9fb30d02ef41d833299123a3e23b/src/sentry/replays/consumers/recording_buffered.py#L323-L323)
Andrew Liu 4 months ago
parent
commit
c9fbb2c5da

+ 13 - 0
src/sentry/options/defaults.py

@@ -466,6 +466,19 @@ register(
     default=[],
     flags=FLAG_ALLOW_EMPTY | FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE,
 )
+# Used for internal dogfooding of a reduced timeout on rage/dead clicks.
+register(
+    "replay.rage-click.experimental-timeout.org-id-list",
+    type=Sequence,
+    default=[],
+    flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
+)
+register(
+    "replay.rage-click.experimental-timeout.milliseconds",
+    type=Int,
+    default=5000,
+    flags=FLAG_AUTOMATOR_MODIFIABLE,
+)
 
 # User Feedback Options
 register(

+ 1 - 0
src/sentry/replays/consumers/recording_buffered.py

@@ -326,6 +326,7 @@ def process_message(buffer: RecordingBuffer, message: bytes) -> None:
             decoded_message["retention_days"],
             parsed_recording_data,
             parsed_replay_event,
+            org_id=decoded_message["org_id"],
         )
 
         if replay_actions is not None:

+ 1 - 0
src/sentry/replays/usecases/ingest/__init__.py

@@ -269,6 +269,7 @@ def recording_post_processor(
                 replay_id=message.replay_id,
                 segment_data=parsed_segment_data,
                 replay_event=parsed_replay_event,
+                org_id=message.org_id,
             )
 
         # Log canvas mutations to bigquery.

+ 26 - 11
src/sentry/replays/usecases/ingest/dom_index.py

@@ -8,6 +8,7 @@ from collections.abc import Generator
 from hashlib import md5
 from typing import Any, Literal, TypedDict
 
+from sentry import options
 from sentry.conf.types.kafka_definition import Topic
 from sentry.models.project import Project
 from sentry.replays.usecases.ingest.issue_creation import (
@@ -66,10 +67,11 @@ def parse_and_emit_replay_actions(
     retention_days: int,
     segment_data: list[dict[str, Any]],
     replay_event: dict[str, Any] | None,
+    org_id: int | None = None,
 ) -> None:
     with metrics.timer("replays.usecases.ingest.dom_index.parse_and_emit_replay_actions"):
         message = parse_replay_actions(
-            project, replay_id, retention_days, segment_data, replay_event
+            project, replay_id, retention_days, segment_data, replay_event, org_id=org_id
         )
         if message is not None:
             emit_replay_actions(message)
@@ -86,9 +88,10 @@ def parse_replay_actions(
     retention_days: int,
     segment_data: list[dict[str, Any]],
     replay_event: dict[str, Any] | None,
+    org_id: int | None = None,
 ) -> ReplayActionsEvent | None:
     """Parse RRWeb payload to ReplayActionsEvent."""
-    actions = get_user_actions(project, replay_id, segment_data, replay_event)
+    actions = get_user_actions(project, replay_id, segment_data, replay_event, org_id=org_id)
     if len(actions) == 0:
         return None
 
@@ -156,6 +159,7 @@ def get_user_actions(
     replay_id: str,
     events: list[dict[str, Any]],
     replay_event: dict[str, Any] | None,
+    org_id: int | None = None,
 ) -> list[ReplayActionsEventPayloadClick]:
     """Return a list of ReplayActionsEventPayloadClick types.
 
@@ -176,9 +180,10 @@ def get_user_actions(
             "textContent": "Helloworld!"
         }
     """
-    # Feature flag and project option queries
+    # Project option and Sentry option queries
     should_report_rage = _should_report_rage_click_issue(project)
     should_report_hydration = _should_report_hydration_error_issue(project)
+    rage_click_timeout_ms = _get_rage_click_timeout(org_id)
 
     result: list[ReplayActionsEventPayloadClick] = []
     for event in _iter_custom_events(events):
@@ -190,9 +195,10 @@ def get_user_actions(
         if tag == "breadcrumb":
             click = _handle_breadcrumb(
                 event,
-                project,
+                project.id,
                 replay_id,
                 replay_event,
+                rage_click_timeout_ms,
                 should_report_rage_click_issue=should_report_rage,
                 should_report_hydration_error_issue=should_report_hydration,
             )
@@ -311,6 +317,14 @@ def _should_report_rage_click_issue(project: Project) -> bool:
     return project.get_option("sentry:replay_rage_click_issues")
 
 
+def _get_rage_click_timeout(org_id: int | None) -> int | float:
+    """Returns the rage click timeout in milliseconds. Queries Sentry options if org_id is not None."""
+    default_timeout = 7000
+    if org_id and org_id in options.get("replay.rage-click.experimental-timeout.org-id-list"):
+        return options.get("replay.rage-click.experimental-timeout.milliseconds")
+    return default_timeout
+
+
 def _iter_custom_events(events: list[dict[str, Any]]) -> Generator[dict[str, Any]]:
     for event in events:
         if event.get("type") == 5:
@@ -381,9 +395,10 @@ def _handle_mutations_event(project_id: int, replay_id: str, event: dict[str, An
 
 def _handle_breadcrumb(
     event: dict[str, Any],
-    project: Project,
+    project_id: int,
     replay_id: str,
     replay_event: dict[str, Any] | None,
+    rage_click_timeout_ms: int | float,
     should_report_rage_click_issue=False,
     should_report_hydration_error_issue=False,
 ) -> ReplayActionsEventPayloadClick | None:
@@ -405,12 +420,12 @@ def _handle_breadcrumb(
         timeout = payload["data"].get("timeAfterClickMs", 0) or payload["data"].get(
             "timeafterclickms", 0
         )
-        if is_timeout_reason and is_target_tagname and timeout >= 7000:
+        if is_timeout_reason and is_target_tagname and timeout >= rage_click_timeout_ms:
             is_rage = (
                 payload["data"].get("clickCount", 0) or payload["data"].get("clickcount", 0)
             ) >= 5
             click = create_click_event(
-                payload, replay_id, is_dead=True, is_rage=is_rage, project_id=project.id
+                payload, replay_id, is_dead=True, is_rage=is_rage, project_id=project_id
             )
             if click is not None:
                 if is_rage:
@@ -418,7 +433,7 @@ def _handle_breadcrumb(
                     if should_report_rage_click_issue:
                         if replay_event is not None:
                             report_rage_click_issue_with_replay_event(
-                                project.id,
+                                project_id,
                                 replay_id,
                                 payload["timestamp"],
                                 payload["message"],
@@ -429,7 +444,7 @@ def _handle_breadcrumb(
                             )
         # Log the event for tracking.
         log = event["data"].get("payload", {}).copy()
-        log["project_id"] = project.id
+        log["project_id"] = project_id
         log["replay_id"] = replay_id
         log["dom_tree"] = log.pop("message")
 
@@ -437,7 +452,7 @@ def _handle_breadcrumb(
 
     elif category == "ui.click":
         click = create_click_event(
-            payload, replay_id, is_dead=False, is_rage=False, project_id=project.id
+            payload, replay_id, is_dead=False, is_rage=False, project_id=project_id
         )
         if click is not None:
             return click
@@ -446,7 +461,7 @@ def _handle_breadcrumb(
         metrics.incr("replay.hydration_error_breadcrumb")
         if replay_event is not None and should_report_hydration_error_issue:
             report_hydration_error_issue_with_replay_event(
-                project.id,
+                project_id,
                 replay_id,
                 payload["timestamp"],
                 payload.get("data", {}).get("url"),

+ 26 - 5
tests/sentry/replays/unit/test_ingest_dom_index.py

@@ -17,6 +17,7 @@ from sentry.replays.usecases.ingest.dom_index import (
     log_canvas_size,
     parse_replay_actions,
 )
+from sentry.testutils.helpers import override_options
 from sentry.testutils.pytest.fixtures import django_db_all
 from sentry.utils import json
 
@@ -274,8 +275,15 @@ def test_parse_replay_actions(mock_project):
     assert len(action["event_hash"]) == 36
 
 
+@pytest.mark.parametrize("use_experimental_timeout", (False, True))
 @django_db_all
-def test_parse_replay_dead_click_actions(patch_rage_click_issue_with_replay_event, default_project):
+def test_parse_replay_dead_click_actions(
+    patch_rage_click_issue_with_replay_event, default_project, use_experimental_timeout
+):
+    experimental_timeout = 5000.0
+    default_timeout = 7000.0
+    time_after_click_ms = experimental_timeout if use_experimental_timeout else default_timeout
+
     events = [
         {
             "type": 5,
@@ -289,7 +297,7 @@ def test_parse_replay_dead_click_actions(patch_rage_click_issue_with_replay_even
                     "message": "div.container > div#root > div > ul > div",
                     "data": {
                         "endReason": "timeout",
-                        "timeafterclickms": 7000.0,
+                        "timeafterclickms": time_after_click_ms,
                         "nodeId": 59,
                         "url": "https://www.sentry.io",
                         "node": {
@@ -324,7 +332,7 @@ def test_parse_replay_dead_click_actions(patch_rage_click_issue_with_replay_even
                     "data": {
                         "clickcount": 5,
                         "endReason": "timeout",
-                        "timeafterclickms": 7000.0,
+                        "timeafterclickms": time_after_click_ms,
                         "nodeId": 59,
                         "url": "https://www.sentry.io",
                         "node": {
@@ -361,7 +369,7 @@ def test_parse_replay_dead_click_actions(patch_rage_click_issue_with_replay_even
                         "url": "https://www.sentry.io",
                         "clickCount": 5,
                         "endReason": "timeout",
-                        "timeAfterClickMs": 7000.0,
+                        "timeAfterClickMs": time_after_click_ms,
                         "nodeId": 59,
                         "node": {
                             "id": 59,
@@ -385,7 +393,20 @@ def test_parse_replay_dead_click_actions(patch_rage_click_issue_with_replay_even
     ]
 
     default_project.update_option("sentry:replay_rage_click_issues", True)
-    replay_actions = parse_replay_actions(default_project, "1", 30, events, mock_replay_event())
+
+    if use_experimental_timeout:
+        with override_options(
+            {
+                "replay.rage-click.experimental-timeout.org-id-list": [1],
+                "replay.rage-click.experimental-timeout.milliseconds": experimental_timeout,
+            }
+        ):
+            replay_actions = parse_replay_actions(
+                default_project, "1", 30, events, mock_replay_event(), org_id=1
+            )
+    else:
+        replay_actions = parse_replay_actions(default_project, "1", 30, events, mock_replay_event())
+
     assert patch_rage_click_issue_with_replay_event.call_count == 2
     assert replay_actions is not None
     assert replay_actions["type"] == "replay_event"