Просмотр исходного кода

Aggregate spans waterfall to indexed spans (#69954)

Reopening https://github.com/getsentry/sentry/pull/69016 basically
updated slightly to use options to enable/disable indexed spans
Shruthi 10 месяцев назад
Родитель
Сommit
0d6dd4f646

+ 12 - 11
src/sentry/api/endpoints/organization_spans_aggregation.py

@@ -1,7 +1,7 @@
 import hashlib
 from collections import defaultdict, namedtuple
 from collections.abc import Mapping
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Any, Optional, TypedDict
 
 import sentry_sdk
@@ -10,7 +10,7 @@ from rest_framework.request import Request
 from rest_framework.response import Response
 from snuba_sdk import Column, Function
 
-from sentry import eventstore, features
+from sentry import eventstore, features, options
 from sentry.api.api_publish_status import ApiPublishStatus
 from sentry.api.base import region_silo_endpoint
 from sentry.api.bases import NoProjects, OrganizationEventsEndpointBase
@@ -22,6 +22,7 @@ from sentry.snuba.referrer import Referrer
 from sentry.utils.snuba import raw_snql_query
 
 ALLOWED_BACKENDS = ["indexedSpans", "nodestore"]
+CUTOVER_DATE = datetime(2024, 3, 22, tzinfo=timezone.utc)
 
 EventSpan = namedtuple(
     "EventSpan",
@@ -336,9 +337,7 @@ class OrganizationSpansAggregationEndpoint(OrganizationEventsEndpointBase):
     }
 
     def get(self, request: Request, organization: Organization) -> Response:
-        if not features.has(
-            "organizations:starfish-aggregate-span-waterfall", organization, actor=request.user
-        ):
+        if not features.has("organizations:spans-first-ui", organization, actor=request.user):
             return Response(status=404)
 
         try:
@@ -346,6 +345,14 @@ class OrganizationSpansAggregationEndpoint(OrganizationEventsEndpointBase):
         except NoProjects:
             return Response(status=404)
 
+        enable_indexed_spans = options.get("indexed-spans.agg-span-waterfall.enable")
+
+        start = params["start"]
+        if start and start >= CUTOVER_DATE and enable_indexed_spans:
+            backend = "indexedSpans"
+        else:
+            backend = "nodestore"
+
         transaction = request.query_params.get("transaction", None)
         http_method = request.query_params.get("http.method", None)
         if transaction is None:
@@ -353,14 +360,8 @@ class OrganizationSpansAggregationEndpoint(OrganizationEventsEndpointBase):
                 status=status.HTTP_400_BAD_REQUEST, data={"details": "Transaction not provided"}
             )
 
-        backend = request.query_params.get("backend", "nodestore")
         sentry_sdk.set_tag("aggregate_spans.backend", backend)
 
-        if backend not in ALLOWED_BACKENDS:
-            return Response(
-                status=status.HTTP_400_BAD_REQUEST, data={"details": "Backend not supported"}
-            )
-
         query = f"transaction:{transaction}"
         if http_method is not None:
             query += f" transaction.method:{http_method}"

+ 5 - 0
src/sentry/options/defaults.py

@@ -2373,6 +2373,11 @@ register(
     default=False,
     flags=FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE,
 )
+register(
+    "indexed-spans.agg-span-waterfall.enable",
+    default=False,
+    flags=FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE,
+)
 
 # Deobfuscate profiles using Symbolicator
 register(

+ 241 - 144
tests/snuba/api/endpoints/test_organization_spans_aggregation.py

@@ -9,6 +9,8 @@ from snuba_sdk import Column, Condition, Function, Op
 from sentry.api.endpoints.organization_spans_aggregation import NULL_GROUP
 from sentry.testutils.cases import APITestCase, SnubaTestCase
 from sentry.testutils.helpers.datetime import before_now
+from sentry.testutils.helpers.options import override_options
+from sentry.testutils.pytest.fixtures import django_db_all
 from sentry.utils.samples import load_data
 
 MOCK_SNUBA_RESPONSE = {
@@ -163,10 +165,156 @@ MOCK_SNUBA_RESPONSE = {
 }
 
 
-class OrganizationSpansAggregationTest(APITestCase, SnubaTestCase):
+class OrganizationIndexedSpansAggregationTest(APITestCase, SnubaTestCase):
     url_name = "sentry-api-0-organization-spans-aggregation"
     FEATURES = [
-        "organizations:starfish-aggregate-span-waterfall",
+        "organizations:spans-first-ui",
+        "organizations:performance-view",
+    ]
+
+    def setUp(self):
+        super().setUp()
+        self.login_as(user=self.user)
+
+        self.day_ago = before_now(days=1).replace(hour=10, minute=0, second=0, microsecond=0)
+
+        self.url = reverse(
+            self.url_name,
+            kwargs={"organization_slug": self.project.organization.slug},
+        )
+
+    @override_options({"indexed-spans.agg-span-waterfall.enable": True})
+    @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query")
+    def test_simple(self, mock_query):
+        mock_query.side_effect = [MOCK_SNUBA_RESPONSE]
+        with self.feature(self.FEATURES):
+            response = self.client.get(
+                self.url,
+                data={"transaction": "api/0/foo", "statsPeriod": "1d"},
+                format="json",
+            )
+
+        assert response.data
+        data = response.data
+        root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16]
+        assert root_fingerprint in data
+        assert data[root_fingerprint]["count()"] == 2
+        assert data[root_fingerprint]["description"] == "api/0/foo"
+        assert round(data[root_fingerprint]["avg(duration)"]) == 850
+
+        assert data[root_fingerprint]["samples"] == {
+            ("80fe542aea4945ffbe612646987ee449", "root_1"),
+            ("86b21833d1854d9b811000b91e7fccfa", "root_2"),
+        }
+        assert data[root_fingerprint]["sample_spans"] == [
+            {
+                "transaction": "80fe542aea4945ffbe612646987ee449",
+                "timestamp": 1694625139.1,
+                "span": "root_1",
+                "trace": "a" * 32,
+            },
+            {
+                "transaction": "86b21833d1854d9b811000b91e7fccfa",
+                "timestamp": 1694625159.1,
+                "span": "root_2",
+                "trace": "b" * 32,
+            },
+        ]
+
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16]
+        assert data[fingerprint]["description"] == "connect"
+        assert round(data[fingerprint]["avg(duration)"]) == 30
+
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16]
+        assert data[fingerprint]["description"] == "resolve_orderby"
+        assert data[fingerprint]["avg(exclusive_time)"] == 15.0
+        assert data[fingerprint]["count()"] == 2
+
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16]
+        assert data[fingerprint]["description"] == "resolve_orderby"
+        assert data[fingerprint]["avg(exclusive_time)"] == 20.0
+        assert data[fingerprint]["count()"] == 1
+
+    @override_options({"indexed-spans.agg-span-waterfall.enable": True})
+    @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query")
+    def test_offset_logic(self, mock_query):
+        mock_query.side_effect = [MOCK_SNUBA_RESPONSE]
+        with self.feature(self.FEATURES):
+            response = self.client.get(
+                self.url,
+                data={"transaction": "api/0/foo", "statsPeriod": "1d"},
+                format="json",
+            )
+
+        assert response.data
+        data = response.data
+        root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16]
+        assert root_fingerprint in data
+        assert data[root_fingerprint]["avg(absolute_offset)"] == 0.0
+
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16]
+        assert data[fingerprint]["avg(absolute_offset)"] == 30.0
+
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-C").hexdigest()[:16]
+        assert data[fingerprint]["avg(absolute_offset)"] == 35.0
+
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16]
+        assert data[fingerprint]["avg(absolute_offset)"] == 53.5
+
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16]
+        assert data[fingerprint]["avg(absolute_offset)"] == 1075.0
+
+    @override_options({"indexed-spans.agg-span-waterfall.enable": True})
+    @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query")
+    def test_null_group_fallback(self, mock_query):
+        mock_query.side_effect = [MOCK_SNUBA_RESPONSE]
+        with self.feature(self.FEATURES):
+            response = self.client.get(
+                self.url,
+                data={"transaction": "api/0/foo", "statsPeriod": "1d"},
+                format="json",
+            )
+
+        assert response.data
+        data = response.data
+        root_fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-discover.snql").hexdigest()[:16]
+        assert root_fingerprint in data
+        assert data[root_fingerprint]["description"] == ""
+        assert data[root_fingerprint]["count()"] == 2
+
+    @override_options({"indexed-spans.agg-span-waterfall.enable": True})
+    @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query")
+    def test_http_method_filter(self, mock_query):
+        with self.feature(self.FEATURES):
+            self.client.get(
+                self.url,
+                data={"transaction": "api/0/foo", "http.method": "GET", "statsPeriod": "1d"},
+                format="json",
+            )
+
+            assert (
+                Condition(
+                    lhs=Function(
+                        function="ifNull",
+                        parameters=[
+                            Column(
+                                name="sentry_tags[transaction.method]",
+                            ),
+                            "",
+                        ],
+                        alias=None,
+                    ),
+                    op=Op.EQ,
+                    rhs="GET",
+                )
+                in mock_query.mock_calls[0].args[0].query.where
+            )
+
+
+class OrganizationNodestoreSpansAggregationTest(APITestCase, SnubaTestCase):
+    url_name = "sentry-api-0-organization-spans-aggregation"
+    FEATURES = [
+        "organizations:spans-first-ui",
         "organizations:performance-view",
     ]
 
@@ -210,7 +358,7 @@ class OrganizationSpansAggregationTest(APITestCase, SnubaTestCase):
             data["environment"] = environment
 
         with self.feature(self.FEATURES):
-            return self.store_event(data, project_id=project_id, **kwargs)
+            return self.store_event(data, project_id=project_id, assert_no_errors=False, **kwargs)
 
     def setUp(self):
         super().setUp()
@@ -418,187 +566,136 @@ class OrganizationSpansAggregationTest(APITestCase, SnubaTestCase):
             kwargs={"organization_slug": self.project.organization.slug},
         )
 
-    @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query")
-    def test_simple(self, mock_query):
-        mock_query.side_effect = [MOCK_SNUBA_RESPONSE]
-        for backend in ["indexedSpans", "nodestore"]:
-            with self.feature(self.FEATURES):
-                response = self.client.get(
-                    self.url,
-                    data={"transaction": "api/0/foo", "backend": backend},
-                    format="json",
-                )
-
-            assert response.data
-            data = response.data
-            root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16]
-            assert root_fingerprint in data
-            assert data[root_fingerprint]["count()"] == 2
-            assert data[root_fingerprint]["description"] == "api/0/foo"
-            assert round(data[root_fingerprint]["avg(duration)"]) == 850
-
-            if backend == "indexedSpans":
-                assert data[root_fingerprint]["samples"] == {
-                    ("80fe542aea4945ffbe612646987ee449", "root_1"),
-                    ("86b21833d1854d9b811000b91e7fccfa", "root_2"),
-                }
-                assert data[root_fingerprint]["sample_spans"] == [
-                    {
-                        "transaction": "80fe542aea4945ffbe612646987ee449",
-                        "timestamp": 1694625139.1,
-                        "span": "root_1",
-                        "trace": "a" * 32,
-                    },
-                    {
-                        "transaction": "86b21833d1854d9b811000b91e7fccfa",
-                        "timestamp": 1694625159.1,
-                        "span": "root_2",
-                        "trace": "b" * 32,
-                    },
-                ]
-            else:
-                assert data[root_fingerprint]["samples"] == {
-                    (
-                        self.root_event_1.event_id,
-                        self.span_ids_event_1["A"],
-                    ),
-                    (
-                        self.root_event_2.event_id,
-                        self.span_ids_event_2["A"],
-                    ),
-                }
-                assert data[root_fingerprint]["sample_spans"] == [
-                    {
-                        "transaction": self.root_event_1.event_id,
-                        "timestamp": self.root_event_1.data["start_timestamp"],
-                        "span": self.span_ids_event_1["A"],
-                        "trace": self.root_event_1.data["contexts"]["trace"]["trace_id"],
-                    },
-                    {
-                        "transaction": self.root_event_2.event_id,
-                        "timestamp": self.root_event_2.data["start_timestamp"],
-                        "span": self.span_ids_event_2["A"],
-                        "trace": self.root_event_2.data["contexts"]["trace"]["trace_id"],
-                    },
-                ]
-
-            fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16]
-            assert data[fingerprint]["description"] == "connect"
-            assert round(data[fingerprint]["avg(duration)"]) == 30
+    @django_db_all
+    def test_simple(self):
+        with self.feature(self.FEATURES):
+            response = self.client.get(
+                self.url,
+                data={"transaction": "api/0/foo"},
+                format="json",
+            )
 
-            fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16]
-            assert data[fingerprint]["description"] == "resolve_orderby"
-            assert data[fingerprint]["avg(exclusive_time)"] == 15.0
-            assert data[fingerprint]["count()"] == 2
+        assert response.data
+        data = response.data
+        root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16]
+        assert root_fingerprint in data
+        assert data[root_fingerprint]["count()"] == 2
+        assert data[root_fingerprint]["description"] == "api/0/foo"
+        assert round(data[root_fingerprint]["avg(duration)"]) == 850
+
+        assert data[root_fingerprint]["samples"] == {
+            (
+                self.root_event_1.event_id,
+                self.span_ids_event_1["A"],
+            ),
+            (
+                self.root_event_2.event_id,
+                self.span_ids_event_2["A"],
+            ),
+        }
+        assert data[root_fingerprint]["sample_spans"] == [
+            {
+                "transaction": self.root_event_1.event_id,
+                "timestamp": self.root_event_1.data["start_timestamp"],
+                "span": self.span_ids_event_1["A"],
+                "trace": self.root_event_1.data["contexts"]["trace"]["trace_id"],
+            },
+            {
+                "transaction": self.root_event_2.event_id,
+                "timestamp": self.root_event_2.data["start_timestamp"],
+                "span": self.span_ids_event_2["A"],
+                "trace": self.root_event_2.data["contexts"]["trace"]["trace_id"],
+            },
+        ]
 
-            fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16]
-            assert data[fingerprint]["description"] == "resolve_orderby"
-            assert data[fingerprint]["avg(exclusive_time)"] == 20.0
-            assert data[fingerprint]["count()"] == 1
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16]
+        assert data[fingerprint]["description"] == "connect"
+        assert round(data[fingerprint]["avg(duration)"]) == 30
 
-    @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query")
-    def test_offset_logic(self, mock_query):
-        mock_query.side_effect = [MOCK_SNUBA_RESPONSE]
-        for backend in ["indexedSpans", "nodestore"]:
-            with self.feature(self.FEATURES):
-                response = self.client.get(
-                    self.url,
-                    data={"transaction": "api/0/foo", "backend": backend},
-                    format="json",
-                )
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16]
+        assert data[fingerprint]["description"] == "resolve_orderby"
+        assert data[fingerprint]["avg(exclusive_time)"] == 15.0
+        assert data[fingerprint]["count()"] == 2
 
-            assert response.data
-            data = response.data
-            root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16]
-            assert root_fingerprint in data
-            assert data[root_fingerprint]["avg(absolute_offset)"] == 0.0
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16]
+        assert data[fingerprint]["description"] == "resolve_orderby"
+        assert data[fingerprint]["avg(exclusive_time)"] == 20.0
+        assert data[fingerprint]["count()"] == 1
 
-            fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16]
-            assert data[fingerprint]["avg(absolute_offset)"] == 30.0
+    @django_db_all
+    def test_offset_logic(self):
+        with self.feature(self.FEATURES):
+            response = self.client.get(
+                self.url,
+                data={"transaction": "api/0/foo"},
+                format="json",
+            )
 
-            fingerprint = hashlib.md5(b"e238e6c2e2466b07-C").hexdigest()[:16]
-            assert data[fingerprint]["avg(absolute_offset)"] == 35.0
+        assert response.data
+        data = response.data
+        root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16]
+        assert root_fingerprint in data
+        assert data[root_fingerprint]["avg(absolute_offset)"] == 0.0
 
-            fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16]
-            assert data[fingerprint]["avg(absolute_offset)"] == 53.5
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16]
+        assert data[fingerprint]["avg(absolute_offset)"] == 30.0
 
-            fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16]
-            assert data[fingerprint]["avg(absolute_offset)"] == 1075.0
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-C").hexdigest()[:16]
+        assert data[fingerprint]["avg(absolute_offset)"] == 35.0
 
-    @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query")
-    def test_null_group_fallback(self, mock_query):
-        mock_query.side_effect = [MOCK_SNUBA_RESPONSE]
-        for backend in ["indexedSpans", "nodestore"]:
-            with self.feature(self.FEATURES):
-                response = self.client.get(
-                    self.url,
-                    data={"transaction": "api/0/foo", "backend": backend},
-                    format="json",
-                )
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16]
+        assert data[fingerprint]["avg(absolute_offset)"] == 53.5
 
-            assert response.data
-            data = response.data
-            root_fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-discover.snql").hexdigest()[:16]
-            assert root_fingerprint in data
-            assert data[root_fingerprint]["description"] == ""
-            assert data[root_fingerprint]["count()"] == 2
+        fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16]
+        assert data[fingerprint]["avg(absolute_offset)"] == 1075.0
 
-    @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query")
-    def test_http_method_filter(self, mock_query):
+    @django_db_all
+    def test_null_group_fallback(self):
         with self.feature(self.FEATURES):
             response = self.client.get(
                 self.url,
-                data={"transaction": "api/0/foo", "backend": "nodestore", "http.method": "GET"},
+                data={"transaction": "api/0/foo"},
                 format="json",
             )
 
         assert response.data
         data = response.data
-        root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16]
+        root_fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-discover.snql").hexdigest()[:16]
         assert root_fingerprint in data
+        assert data[root_fingerprint]["description"] == ""
         assert data[root_fingerprint]["count()"] == 2
 
+    @django_db_all
+    def test_http_method_filter(self):
         with self.feature(self.FEATURES):
             response = self.client.get(
                 self.url,
-                data={"transaction": "api/0/foo", "backend": "nodestore", "http.method": "POST"},
+                data={"transaction": "api/0/foo", "http.method": "GET"},
                 format="json",
             )
 
-        assert response.data == {}
+        assert response.data
+        data = response.data
+        root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16]
+        assert root_fingerprint in data
+        assert data[root_fingerprint]["count()"] == 2
 
         with self.feature(self.FEATURES):
-            self.client.get(
+            response = self.client.get(
                 self.url,
-                data={"transaction": "api/0/foo", "backend": "indexedSpans", "http.method": "GET"},
+                data={"transaction": "api/0/foo", "http.method": "POST"},
                 format="json",
             )
 
-            assert (
-                Condition(
-                    lhs=Function(
-                        function="ifNull",
-                        parameters=[
-                            Column(
-                                name="sentry_tags[transaction.method]",
-                            ),
-                            "",
-                        ],
-                        alias=None,
-                    ),
-                    op=Op.EQ,
-                    rhs="GET",
-                )
-                in mock_query.mock_calls[0].args[0].query.where
-            )
+        assert response.data == {}
 
+    @django_db_all
     def test_environment_filter(self):
         with self.feature(self.FEATURES):
             response = self.client.get(
                 self.url,
                 data={
                     "transaction": "api/0/foo",
-                    "backend": "nodestore",
                     "environment": "production",
                 },
                 format="json",
@@ -615,8 +712,8 @@ class OrganizationSpansAggregationTest(APITestCase, SnubaTestCase):
                 self.url,
                 data={
                     "transaction": "api/0/foo",
-                    "backend": "nodestore",
                     "environment": ["production", "development"],
+                    "forceNodestore": "true",
                 },
                 format="json",
             )