Browse Source

feat(replays): Use new error column for replays (#59378)

- Use new error columns for replay error counts and filtering
- Old error ids will still be retrieved in final results, but not be
used in counts or searchable. We filter them out if no corresponding
error exists anyways on replay details, so effectively, this will fix
the counts.
- Will make a follow-up PR to remove old error column entirely from
final results.
Josh Ferge 1 year ago
parent
commit
38c0733918

+ 1 - 6
src/sentry/replays/post_process.py

@@ -40,7 +40,7 @@ class UserResponseType(TypedDict, total=False):
     display_name: Optional[str]
 
 
-@extend_schema_serializer(exclude_fields=["info_ids", "warning_ids", "new_error_ids"])
+@extend_schema_serializer(exclude_fields=["info_ids", "warning_ids"])
 class ReplayDetailsResponse(TypedDict, total=False):
     id: str
     project_id: str
@@ -69,10 +69,8 @@ class ReplayDetailsResponse(TypedDict, total=False):
     platform: Optional[str]
     releases: List[str]
     dist: Optional[str]
-    new_error_ids: Optional[List[str]]
     warning_ids: Optional[List[str]]
     info_ids: Optional[List[str]]
-    new_count_errors: Optional[int]
     count_warnings: Optional[int]
     count_infos: Optional[int]
 
@@ -88,7 +86,6 @@ def generate_restricted_fieldset(
     fields: List[str],
     response: Generator[ReplayDetailsResponse, None, None],
 ) -> Iterator[ReplayDetailsResponse]:
-
     """Return only the fields requested by the client."""
     if fields:
         for item in response:
@@ -180,10 +177,8 @@ def generate_normalized_output(
         ret_item["replay_type"] = item.pop("replay_type", "session")
         ret_item["started_at"] = item.pop("started_at", None)
 
-        ret_item["new_error_ids"] = item.pop("new_error_ids", None)
         ret_item["warning_ids"] = item.pop("warning_ids", None)
         ret_item["info_ids"] = item.pop("info_ids", None)
-        ret_item["new_count_errors"] = item.pop("new_count_errors", None)
         ret_item["count_infos"] = item.pop("count_infos", None)
         ret_item["count_warnings"] = item.pop("count_warnings", None)
         yield ret_item

+ 4 - 24
src/sentry/replays/query.py

@@ -284,7 +284,6 @@ replay_url_parser_config = SearchConfig(
         "count_dead_clicks",
         "count_rage_clicks",
         "activity",
-        "new_count_errors",
         "count_warnings",
         "count_infos",
     },
@@ -424,7 +423,7 @@ def _collect_new_errors():
                 ],
             ),
         ],
-        alias="new_error_ids",
+        alias="errorIds",
     )
 
 
@@ -539,13 +538,10 @@ FIELD_QUERY_ALIAS_MAP: Dict[str, List[str]] = {
         "click.text",
         "click.title",
     ],
-    "new_error_id": ["new_error_ids"],
     "warning_id": ["warning_ids"],
     "info_id": ["info_ids"],
-    "new_error_ids": ["new_error_ids"],
     "warning_ids": ["warning_ids"],
     "info_ids": ["info_ids"],
-    "new_count_errors": ["new_count_errors"],
     "count_warnings": ["count_warnings"],
     "count_infos": ["count_infos"],
 }
@@ -571,17 +567,6 @@ QUERY_ALIAS_COLUMN_MAP = {
         ],
         alias="traceIds",
     ),
-    "error_ids": Function(
-        "arrayMap",
-        parameters=[
-            Lambda(["id"], _strip_uuid_dashes("id", Identifier("id"))),
-            Function(
-                "groupUniqArrayArray",
-                parameters=[Column("error_ids")],
-            ),
-        ],
-        alias="errorIds",
-    ),
     "started_at": Function(
         "min", parameters=[Column("replay_start_timestamp")], alias="started_at"
     ),
@@ -599,11 +584,6 @@ QUERY_ALIAS_COLUMN_MAP = {
         alias="agg_urls",
     ),
     "count_segments": Function("count", parameters=[Column("segment_id")], alias="count_segments"),
-    "count_errors": Function(
-        "sum",
-        parameters=[Function("length", parameters=[Column("error_ids")])],
-        alias="count_errors",
-    ),
     "count_urls": Function(
         "sum",
         parameters=[Function("length", parameters=[Column("urls")])],
@@ -690,13 +670,13 @@ QUERY_ALIAS_COLUMN_MAP = {
     ),
     "click.text": Function("groupArray", parameters=[Column("click_text")], alias="click_text"),
     "click.title": Function("groupArray", parameters=[Column("click_title")], alias="click_title"),
-    "new_error_ids": _collect_new_errors(),
+    "error_ids": _collect_new_errors(),
     "warning_ids": _collect_event_ids("warning_ids", ["warning_id"]),
     "info_ids": _collect_event_ids("info_ids", ["info_id", "debug_id"]),
-    "new_count_errors": Function(
+    "count_errors": Function(
         "sum",
         parameters=[Column("count_error_events")],
-        alias="new_count_errors",
+        alias="count_errors",
     ),
     "count_warnings": Function(
         "sum",

+ 1 - 3
src/sentry/replays/testutils.py

@@ -79,7 +79,6 @@ def mock_expected_response(
         "duration": (finished_at - started_at).seconds,
         "count_dead_clicks": kwargs.pop("count_dead_clicks", 0),
         "count_rage_clicks": kwargs.pop("count_rage_clicks", 0),
-        "count_errors": kwargs.pop("count_errors", 1),
         "count_segments": kwargs.pop("count_segments", 1),
         "count_urls": len(urls),
         "platform": kwargs.pop("platform", "javascript"),
@@ -115,10 +114,9 @@ def mock_expected_response(
         "activity": kwargs.pop("activity", 0),
         "is_archived": kwargs.pop("is_archived", False),
         "clicks": kwargs.pop("clicks", []),
-        "new_error_ids": kwargs.pop("new_error_ids", ["a3a62ef6ac86415b83c2416fc2f76db1"]),
         "warning_ids": kwargs.pop("warning_ids", []),
         "info_ids": kwargs.pop("info_ids", []),
-        "new_count_errors": kwargs.pop("new_count_errors", 0),
+        "count_errors": kwargs.pop("count_errors", 0),
         "count_warnings": kwargs.pop("count_warnings", 0),
         "count_infos": kwargs.pop("count_infos", 0),
     }

+ 1 - 2
src/sentry/replays/usecases/query/conditions/__init__.py

@@ -11,7 +11,6 @@ __all__ = [
     "SumOfClickScalar",
     "SumOfClickSelectorComposite",
     "SumOfDeadClickSelectorComposite",
-    "SumOfErrorIdsArray",
     "SumOfIPv4Scalar",
     "SumOfRageClickSelectorComposite",
     "SumOfStringArray",
@@ -25,7 +24,7 @@ __all__ = [
 from .activity import AggregateActivityScalar
 from .aggregate import SumOfIPv4Scalar, SumOfStringArray, SumOfStringScalar, SumOfUUIDArray
 from .duration import SimpleAggregateDurationScalar
-from .error_ids import ErrorIdsArray, SumOfErrorIdsArray
+from .error_ids import ErrorIdsArray
 from .selector import (
     ClickArray,
     ClickScalar,

+ 4 - 18
src/sentry/replays/usecases/query/conditions/event_ids.py

@@ -11,7 +11,6 @@ from sentry.replays.lib.new_query.utils import (
     translate_condition_to_function,
 )
 from sentry.replays.usecases.query.conditions.base import ComputedBase
-from sentry.replays.usecases.query.conditions.error_ids import ErrorIdsArray, has_error_id
 
 
 class InfoIdScalar(ComputedBase):
@@ -81,28 +80,21 @@ class ErrorIdScalar(ComputedBase):
     @classmethod
     def visit_eq(cls, value: UUID) -> Condition:
         conditions = _make_conditions_from_column_names(cls.event_id_columns, Op.EQ, to_uuid(value))
-        deprecated_error_column_conditions = has_error_id(value)
 
         return Condition(
-            Function(
-                "or", [Function("or", parameters=conditions), deprecated_error_column_conditions]
-            ),
+            Function("or", conditions),
             Op.EQ,
             1,
         )
 
     @classmethod
     def visit_neq(cls, value: UUID) -> Condition:
-
         conditions = _make_conditions_from_column_names(
             cls.event_id_columns, Op.NEQ, to_uuid(value)
         )
-        deprecated_error_column_conditions = translate_condition_to_function(
-            ErrorIdsArray.visit_neq(value)
-        )
 
         return Condition(
-            Function("and", [Function("and", conditions), deprecated_error_column_conditions]),
+            Function("and", conditions),
             Op.EQ,
             1,
         )
@@ -112,11 +104,8 @@ class ErrorIdScalar(ComputedBase):
         conditions = _make_conditions_from_column_names(
             cls.event_id_columns, Op.IN, [str(v) for v in value]
         )
-        deprecated_error_column_conditions = translate_condition_to_function(
-            ErrorIdsArray.visit_in(value)
-        )
         return Condition(
-            Function("or", [Function("or", conditions), deprecated_error_column_conditions]),
+            Function("or", conditions),
             Op.EQ,
             1,
         )
@@ -126,11 +115,8 @@ class ErrorIdScalar(ComputedBase):
         conditions = _make_conditions_from_column_names(
             cls.event_id_columns, Op.NOT_IN, [str(v) for v in value]
         )
-        deprecated_error_column_conditions = translate_condition_to_function(
-            ErrorIdsArray.visit_not_in(value)
-        )
         return Condition(
-            Function("and", [Function("and", conditions), deprecated_error_column_conditions]),
+            Function("and", conditions),
             Op.EQ,
             1,
         )

+ 2 - 6
src/sentry/replays/usecases/query/configs/aggregate.py

@@ -31,7 +31,6 @@ from sentry.replays.usecases.query.conditions import (
     SumOfClickScalar,
     SumOfClickSelectorComposite,
     SumOfDeadClickSelectorComposite,
-    SumOfErrorIdsArray,
     SumOfIPv4Scalar,
     SumOfRageClickSelectorComposite,
     SumOfStringArray,
@@ -86,10 +85,9 @@ search_config: dict[str, FieldProtocol] = {
     "click.textContent": click_field("click_text"),
     "click.title": click_field("click_title"),
     "count_dead_clicks": sum_field("click_is_dead"),
-    "count_errors": sum_field("count_errors"),
     "count_infos": sum_field("count_info_events"),
     "count_warnings": sum_field("count_warning_events"),
-    "new_count_errors": sum_field("count_error_events"),
+    "count_errors": sum_field("count_error_events"),
     "count_rage_clicks": sum_field("click_is_rage"),
     "count_segments": count_field("segment_id"),
     "count_urls": sum_field("count_urls"),
@@ -101,8 +99,7 @@ search_config: dict[str, FieldProtocol] = {
     "dist": string_field("dist"),
     "duration": ComputedField(parse_int, SimpleAggregateDurationScalar),
     "environment": string_field("environment"),
-    "error_ids": ComputedField(parse_uuid, SumOfErrorIdsArray),
-    "new_error_ids": ComputedField(parse_uuid, SumOfErrorIdScalar),
+    "error_ids": ComputedField(parse_uuid, SumOfErrorIdScalar),
     "warning_ids": UUIDColumnField("warning_id", parse_uuid, SumOfUUIDScalar),
     "info_ids": ComputedField(parse_uuid, SumOfInfoIdScalar),
     # Backwards Compat: We pass a simple string to the UUID column. Older versions of ClickHouse
@@ -138,7 +135,6 @@ search_config["user"] = search_config["user.username"]
 # Fields which have multiple names that represent the same search operation are defined here.
 # QQ:JFERG: why dont we have these on the scalar search
 search_config["error_id"] = search_config["error_ids"]
-search_config["new_error_id"] = search_config["new_error_ids"]
 search_config["warning_id"] = search_config["warning_ids"]
 search_config["info_id"] = search_config["info_ids"]
 

+ 1 - 2
src/sentry/replays/usecases/query/configs/aggregate_sort.py

@@ -36,8 +36,7 @@ sort_config = {
     "browser.name": any_if("browser_name"),
     "browser.version": any_if("browser_version"),
     "count_dead_clicks": _click_count_sum_if_after("click_is_dead"),
-    "count_errors": Function("sum", parameters=[Column("count_errors")]),
-    "new_count_errors": Function("sum", parameters=[Column("count_error_events")]),
+    "count_errors": Function("sum", parameters=[Column("count_error_events")]),
     "count_warnings": Function("sum", parameters=[Column("count_warning_events")]),
     "count_infos": Function("sum", parameters=[Column("count_info_events")]),
     "count_rage_clicks": _click_count_sum_if_after("click_is_rage"),

+ 0 - 2
src/sentry/replays/validators.py

@@ -29,8 +29,6 @@ VALID_FIELD_SET = (
     "clicks",
     "info_ids",
     "warning_ids",
-    "new_error_ids",
-    "new_count_errors",
     "count_warnings",
     "count_infos",
 )

+ 10 - 0
tests/sentry/replays/test_organization_replay_details.py

@@ -107,6 +107,15 @@ class OrganizationReplayDetailsTest(APITestCase, ReplaysSnubaTestCase):
                 urls=["http://localhost:3000/"],
             )
         )
+        self.store_replays(
+            self.mock_event_links(
+                seq1_timestamp,
+                self.project.id,
+                "error",
+                replay1_id,
+                "a3a62ef6ac86415b83c2416fc2f76db1",
+            )
+        )
         self.store_replays(
             mock_replay(
                 seq2_timestamp,
@@ -153,5 +162,6 @@ class OrganizationReplayDetailsTest(APITestCase, ReplaysSnubaTestCase):
                 ],
                 count_segments=3,
                 activity=4,
+                count_errors=1,
             )
             assert_expected_response(response_data["data"], expected_response)

+ 31 - 29
tests/sentry/replays/test_organization_replay_index.py

@@ -101,6 +101,11 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
                 release=None,
             )
         )
+        self.store_replays(
+            self.mock_event_links(
+                seq1_timestamp, project.id, "error", replay1_id, "a3a62ef6ac86415b83c2416fc2f76db1"
+            )
+        )
 
         with self.feature(REPLAYS_FEATURES):
             response = self.client.get(self.url)
@@ -527,7 +532,12 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
             self.mock_event_links(seq1_timestamp, project.id, "fatal", replay1_id, uuid.uuid4().hex)
         )
         self.store_replays(
-            self.mock_event_links(seq1_timestamp, project.id, "error", replay1_id, uuid.uuid4().hex)
+            self.mock_event_links(seq1_timestamp, project.id, "fatal", replay1_id, uuid.uuid4().hex)
+        )
+        self.store_replays(
+            self.mock_event_links(
+                seq1_timestamp, project.id, "error", replay1_id, "a3a62ef6ac86415b83c2416fc2f76db1"
+            )
         )
         self.store_replays(
             self.mock_event_links(
@@ -607,9 +617,6 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
                 "url:example.com",
                 "activity:3",
                 "activity:>2",
-                "new_count_errors:2",
-                "new_count_errors:>1",
-                "new_count_errors:<3",
                 "count_warnings:1",
                 "count_warnings:>0",
                 "count_warnings:<2",
@@ -823,7 +830,6 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
                 "user.email",
                 "user.id",
                 "user.username",
-                "new_count_errors",
                 "count_warnings",
                 "count_infos",
             ]
@@ -965,10 +971,8 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
                     "count_segments": None,
                     "count_urls": None,
                     "clicks": None,
-                    "new_error_ids": None,
                     "warning_ids": None,
                     "info_ids": None,
-                    "new_count_errors": None,
                     "count_warnings": None,
                     "count_infos": None,
                 }
@@ -1593,6 +1597,11 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
 
         self.store_replays(mock_replay(seq1_timestamp, project.id, replay1_id))
         self.store_replays(mock_replay(seq2_timestamp, project.id, replay1_id, error_ids=[]))
+        self.store_replays(
+            self.mock_event_links(
+                seq1_timestamp, project.id, "error", replay1_id, "a3a62ef6ac86415b83c2416fc2f76db1"
+            )
+        )
 
         with self.feature(REPLAYS_FEATURES):
             queries = [
@@ -1797,23 +1806,21 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
         )
         with self.feature(REPLAYS_FEATURES):
             queries = [
-                f"new_error_id:{uid1}",
-                f"new_error_id:{uid2}",
-                f"new_error_id:[{uid1}]",
-                f"!new_error_id:[{uid3}]",
-                f"!new_error_id:{uid3}",
+                f"error_id:{uid1}",
+                f"error_id:{uid2}",
+                f"error_id:[{uid1}]",
+                f"!error_id:[{uid3}]",
+                f"!error_id:{uid3}",
             ]
             for query in queries:
-                response = self.client.get(
-                    self.url + f"?field=id&field=new_error_ids&query={query}"
-                )
+                response = self.client.get(self.url + f"?field=id&field=error_ids&query={query}")
                 assert response.status_code == 200
                 response_data = response.json()
                 assert len(response_data["data"]) == 1, query
-                assert len(response_data["data"][0]["new_error_ids"]) == 2, query
+                assert len(response_data["data"][0]["error_ids"]) == 2, query
 
             response = self.client.get(
-                self.url + f"?field=id&field=new_error_ids&query=new_error_id:{uid3}"
+                self.url + f"?field=id&field=error_ids&query=error_id:{uid3}"
             )
             assert response.status_code == 200
             response_data = response.json()
@@ -1913,19 +1920,17 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
 
         with self.feature(REPLAYS_FEATURES):
             queries = [
-                f"new_error_ids:{uid1}",
-                f"!new_error_ids:{uid2}",
-                f"new_error_ids:[{uid1},{uid2}]",
-                f"!new_error_ids:[{uid2}]",
+                f"error_ids:{uid1}",
+                f"!error_ids:{uid2}",
+                f"error_ids:[{uid1},{uid2}]",
+                f"!error_ids:[{uid2}]",
             ]
             for query in queries:
-                response = self.client.get(
-                    self.url + f"?field=id&field=new_error_ids&query={query}"
-                )
+                response = self.client.get(self.url + f"?field=id&field=error_ids&query={query}")
                 assert response.status_code == 200
                 response_data = response.json()
                 assert len(response_data["data"]) == 1, query
-                assert len(response_data["data"][0]["new_error_ids"]) == 1, query
+                assert len(response_data["data"][0]["error_ids"]) == 1, query
 
     def test_event_id_count_columns(self):
         project = self.create_project(teams=[self.team])
@@ -1967,11 +1972,8 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
 
         with self.feature(REPLAYS_FEATURES):
             response = self.client.get(
-                self.url
-                + f"?field=id&field=new_count_errors&field=count_warnings&field=count_infos&query=id:{replay1_id}"
+                self.url + f"?field=id&field=count_warnings&field=count_infos&query=id:{replay1_id}"
             )
             assert response.status_code == 200
             response_data = response.json()
-            assert response_data["data"][0]["new_count_errors"] == 2
             assert response_data["data"][0]["count_warnings"] == 1
-            assert response_data["data"][0]["new_count_errors"] == 2

Some files were not shown because too many files changed in this diff