Просмотр исходного кода

feat(perf-detector-threshold-configuration): Extended consecutive http spans detection. (#52309)

- Added new detector: `ConsecutiveHTTPSpanDetectorExtended`
- Detector only collects metrics. Doesn't create issues. 
- New detector settings:
<img width="450" alt="Screenshot 2023-07-05 at 3 21 16 PM"
src="https://github.com/getsentry/sentry/assets/60121741/ddb582f0-228c-4dc6-855e-48ebe858d690">

- We care about time saved from parallelizing for this issue the most. 
- Got rid of individual span duration. Parallelizing 101 spans each
taking 10ms , saves 1s. We should create an issue for this. Currently we
don't.
- Min time saved handles the misalignment of length case, we don't need
min_time_saved_ratio. For example parallelizing 998ms, 1ms, 1ms saves
just 2ms which fails the min time saved check.
- Reduced count threshold to 2, from 3. 
- Added tests of new thresholds and ensured passes all other consecutive
http spans tests.

---------

Co-authored-by: Abdullah Khan <abdullahkhan@PG9Y57YDXQ.local>
Abdkhan14 1 год назад
Родитель
Сommit
2c7c969a69

+ 2 - 0
src/sentry/utils/performance_issues/base.py

@@ -36,6 +36,7 @@ class DetectorType(Enum):
     N_PLUS_ONE_API_CALLS = "n_plus_one_api_calls"
     CONSECUTIVE_DB_OP = "consecutive_db"
     CONSECUTIVE_HTTP_OP = "consecutive_http"
+    CONSECUTIVE_HTTP_OP_EXTENDED = "consecutive_http_ext"
     LARGE_HTTP_PAYLOAD = "large_http_payload"
     FILE_IO_MAIN_THREAD = "file_io_main_thread"
     M_N_PLUS_ONE_DB = "m_n_plus_one_db"
@@ -54,6 +55,7 @@ DETECTOR_TYPE_TO_GROUP_TYPE = {
     DetectorType.M_N_PLUS_ONE_DB: PerformanceMNPlusOneDBQueriesGroupType,
     DetectorType.UNCOMPRESSED_ASSETS: PerformanceUncompressedAssetsGroupType,
     DetectorType.CONSECUTIVE_HTTP_OP: PerformanceConsecutiveHTTPQueriesGroupType,
+    DetectorType.CONSECUTIVE_HTTP_OP_EXTENDED: PerformanceConsecutiveHTTPQueriesGroupType,
     DetectorType.DB_MAIN_THREAD: PerformanceDBMainThreadGroupType,
     DetectorType.LARGE_HTTP_PAYLOAD: PerformanceLargeHTTPPayloadGroupType,
 }

+ 7 - 13
src/sentry/utils/performance_issues/detectors/consecutive_db_detector.py

@@ -11,6 +11,10 @@ from sentry.issues.grouptype import PerformanceConsecutiveDBQueriesGroupType
 from sentry.issues.issue_occurrence import IssueEvidence
 from sentry.models import Organization, Project
 from sentry.utils.event_frames import get_sdk_name
+from sentry.utils.performance_issues.detectors.utils import (
+    get_max_span_duration,
+    get_total_span_duration,
+)
 
 from ..base import (
     DetectorType,
@@ -91,7 +95,7 @@ class ConsecutiveDBSpanDetector(PerformanceDetector):
         )
 
         time_saved = self._calculate_time_saved(self.independent_db_spans)
-        total_time = self._sum_span_duration(self.consecutive_db_spans)
+        total_time = get_total_span_duration(self.consecutive_db_spans)
 
         exceeds_time_saved_threshold = time_saved >= self.settings.get("min_time_saved")
 
@@ -181,13 +185,6 @@ class ConsecutiveDBSpanDetector(PerformanceDetector):
 
         return self.consecutive_db_spans[0].get("description", "")
 
-    def _sum_span_duration(self, spans: list[Span]) -> float:
-        "Given a list of spans, find the sum of the span durations in milliseconds"
-        sum = 0.0
-        for span in spans:
-            sum += get_span_duration(span).total_seconds() * 1000
-        return sum
-
     def _set_independent_spans(self, spans: list[Span]):
         """
         Given a list of spans, checks if there is at least a single span that is independent of the rest.
@@ -213,11 +210,8 @@ class ConsecutiveDBSpanDetector(PerformanceDetector):
         this is where thresholds come in
         """
         consecutive_spans = self.consecutive_db_spans
-        total_duration = self._sum_span_duration(consecutive_spans)
-
-        max_independent_span_duration = max(
-            [get_span_duration(span).total_seconds() * 1000 for span in independent_spans]
-        )
+        total_duration = get_total_span_duration(consecutive_spans)
+        max_independent_span_duration = get_max_span_duration(independent_spans)
 
         sum_of_dependent_span_durations = 0.0
         for span in consecutive_spans:

+ 57 - 9
src/sentry/utils/performance_issues/detectors/consecutive_http_detector.py

@@ -7,6 +7,10 @@ from sentry.issues.grouptype import PerformanceConsecutiveHTTPQueriesGroupType
 from sentry.issues.issue_occurrence import IssueEvidence
 from sentry.models import Organization, Project
 from sentry.utils.event import is_event_from_browser_javascript_sdk
+from sentry.utils.performance_issues.detectors.utils import (
+    get_max_span_duration,
+    get_total_span_duration,
+)
 from sentry.utils.safe import get_path
 
 from ..base import (
@@ -66,7 +70,7 @@ class ConsecutiveHTTPSpanDetector(PerformanceDetector):
             for span in self.consecutive_http_spans
         )
 
-        exceeds_duration_between_spans_threshold = all(
+        subceeds_duration_between_spans_threshold = all(
             get_duration_between_spans(
                 self.consecutive_http_spans[idx - 1], self.consecutive_http_spans[idx]
             )
@@ -77,7 +81,7 @@ class ConsecutiveHTTPSpanDetector(PerformanceDetector):
         if (
             exceeds_count_threshold
             and exceeds_span_duration_threshold
-            and exceeds_duration_between_spans_threshold
+            and subceeds_duration_between_spans_threshold
         ):
             self._store_performance_problem()
 
@@ -121,13 +125,6 @@ class ConsecutiveHTTPSpanDetector(PerformanceDetector):
 
         self._reset_variables()
 
-    def _sum_span_duration(self, spans: list[Span]) -> float:
-        "Given a list of spans, find the sum of the span durations in milliseconds"
-        sum = 0.0
-        for span in spans:
-            sum += get_span_duration(span).total_seconds() * 1000
-        return sum
-
     def _overlaps_last_span(self, span: Span) -> bool:
         if len(self.consecutive_http_spans) == 0:
             return False
@@ -174,3 +171,54 @@ class ConsecutiveHTTPSpanDetector(PerformanceDetector):
 
     def is_creation_allowed_for_project(self, project: Project) -> bool:
         return self.settings["detection_enabled"]
+
+
+class ConsecutiveHTTPSpanDetectorExtended(ConsecutiveHTTPSpanDetector):
+    """
+    Detector goals:
+    - Extend Consecutive HTTP Span Detector to mimic detection using thresholds from
+    - Consecutive DB Queries Detector.
+    """
+
+    type = DetectorType.CONSECUTIVE_HTTP_OP_EXTENDED
+    settings_key = DetectorType.CONSECUTIVE_HTTP_OP_EXTENDED
+
+    def _validate_and_store_performance_problem(self):
+        exceeds_count_threshold = len(self.consecutive_http_spans) >= self.settings.get(
+            "consecutive_count_threshold"
+        )
+
+        exceeds_min_time_saved_duration = False
+        if self.consecutive_http_spans:
+            exceeds_min_time_saved_duration = self._calculate_time_saved() >= self.settings.get(
+                "min_time_saved"
+            )
+
+        subceeds_duration_between_spans_threshold = all(
+            get_duration_between_spans(
+                self.consecutive_http_spans[idx - 1], self.consecutive_http_spans[idx]
+            )
+            < self.settings.get("max_duration_between_spans")
+            for idx in range(1, len(self.consecutive_http_spans))
+        )
+
+        if (
+            exceeds_count_threshold
+            and subceeds_duration_between_spans_threshold
+            and exceeds_min_time_saved_duration
+        ):
+            self._store_performance_problem()
+
+    def _calculate_time_saved(self) -> float:
+        total_time = get_total_span_duration(self.consecutive_http_spans)
+        max_span_duration = get_max_span_duration(self.consecutive_http_spans)
+
+        return total_time - max_span_duration
+
+    def is_creation_allowed_for_organization(self, organization: Organization) -> bool:
+        # Only collecting metrics.
+        return False
+
+    def is_creation_allowed_for_project(self, project: Project) -> bool:
+        # Only collecting metrics.
+        return False

+ 18 - 0
src/sentry/utils/performance_issues/detectors/utils.py

@@ -0,0 +1,18 @@
+from typing import List
+
+from sentry.utils.performance_issues.base import get_span_duration
+
+from ..types import Span
+
+
+def get_total_span_duration(spans: List[Span]) -> float:
+    "Given a list of spans, find the sum of the span durations in milliseconds"
+    sum = 0.0
+    for span in spans:
+        sum += get_span_duration(span).total_seconds() * 1000
+    return sum
+
+
+def get_max_span_duration(spans: List[Span]) -> float:
+    "Given a list of spans, return the duration of the longest span in milliseconds"
+    return max([get_span_duration(span).total_seconds() * 1000 for span in spans])

+ 10 - 0
src/sentry/utils/performance_issues/performance_detection.py

@@ -14,6 +14,9 @@ from sentry.projectoptions.defaults import DEFAULT_PROJECT_PERFORMANCE_DETECTION
 from sentry.utils import metrics
 from sentry.utils.event import is_event_from_browser_javascript_sdk
 from sentry.utils.event_frames import get_sdk_name
+from sentry.utils.performance_issues.detectors.consecutive_http_detector import (
+    ConsecutiveHTTPSpanDetectorExtended,
+)
 from sentry.utils.safe import get_path
 
 from .base import DetectorType, PerformanceDetector
@@ -283,6 +286,12 @@ def get_detection_settings(project_id: Optional[int] = None) -> Dict[DetectorTyp
             ],  # ms
             "detection_enabled": settings["consecutive_http_spans_detection_enabled"],
         },
+        DetectorType.CONSECUTIVE_HTTP_OP_EXTENDED: {
+            # time saved by running all queries in parallel
+            "min_time_saved": 500,
+            "consecutive_count_threshold": 2,
+            "max_duration_between_spans": 1000,  # ms
+        },
         DetectorType.LARGE_HTTP_PAYLOAD: {
             "payload_size_threshold": settings["large_http_payload_size_threshold"],
             "detection_enabled": settings["large_http_payload_detection_enabled"],
@@ -300,6 +309,7 @@ def _detect_performance_problems(
     detectors: List[PerformanceDetector] = [
         ConsecutiveDBSpanDetector(detection_settings, data),
         ConsecutiveHTTPSpanDetector(detection_settings, data),
+        ConsecutiveHTTPSpanDetectorExtended(detection_settings, data),
         DBMainThreadDetector(detection_settings, data),
         SlowDBQueryDetector(detection_settings, data),
         RenderBlockingAssetSpanDetector(detection_settings, data),

+ 250 - 0
tests/sentry/utils/performance_issues/test_consecutive_http_detector_extended.py

@@ -0,0 +1,250 @@
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from sentry.issues.grouptype import PerformanceConsecutiveHTTPQueriesGroupType
+from sentry.spans.grouping.strategy.base import Span
+from sentry.testutils import TestCase
+from sentry.testutils.performance_issues.event_generators import (
+    create_event,
+    create_span,
+    modify_span_start,
+)
+from sentry.testutils.silo import region_silo_test
+from sentry.utils.performance_issues.detectors.consecutive_http_detector import (
+    ConsecutiveHTTPSpanDetectorExtended,
+)
+from sentry.utils.performance_issues.performance_detection import (
+    get_detection_settings,
+    run_detector_on_data,
+)
+from sentry.utils.performance_issues.performance_problem import PerformanceProblem
+
+
+@region_silo_test
+@pytest.mark.django_db
+class ConsecutiveHTTPSpansDetectorExtendedTest(TestCase):
+    def setUp(self):
+        super().setUp()
+        self._settings = get_detection_settings()
+
+    def find_problems(self, event: dict[str, Any]) -> list[PerformanceProblem]:
+        detector = ConsecutiveHTTPSpanDetectorExtended(self._settings, event)
+        run_detector_on_data(detector, event)
+        return list(detector.stored_problems.values())
+
+    def create_issue_spans(self, span_duration=2000) -> list[Span]:
+        spans = [
+            create_span(
+                "http.client", span_duration, "GET /api/0/organizations/endpoint1", "hash1"
+            ),
+            create_span(
+                "http.client", span_duration, "GET /api/0/organizations/endpoint2", "hash2"
+            ),
+            create_span(
+                "http.client", span_duration, "GET /api/0/organizations/endpoint3", "hash3"
+            ),
+        ]
+        spans = [
+            modify_span_start(span, span_duration * spans.index(span)) for span in spans
+        ]  # ensure spans don't overlap
+
+        return spans
+
+    def create_issue_event(self, span_duration=2000):
+        spans = self.create_issue_spans(span_duration)
+        return create_event(spans)
+
+    def test_detects_consecutive_http_issue(self):
+        event = self.create_issue_event()
+        problems = self.find_problems(event)
+
+        assert problems == [
+            PerformanceProblem(
+                fingerprint="1-1009-00b8644b56309c8391aa365783145162ab9c589a",
+                op="http",
+                desc="GET /api/0/organizations/endpoint1",
+                type=PerformanceConsecutiveHTTPQueriesGroupType,
+                parent_span_ids=None,
+                cause_span_ids=[],
+                offender_span_ids=[
+                    "bbbbbbbbbbbbbbbb",
+                    "bbbbbbbbbbbbbbbb",
+                    "bbbbbbbbbbbbbbbb",
+                ],
+                evidence_data={
+                    "parent_span_ids": [],
+                    "cause_span_ids": [],
+                    "offender_span_ids": [
+                        "bbbbbbbbbbbbbbbb",
+                        "bbbbbbbbbbbbbbbb",
+                        "bbbbbbbbbbbbbbbb",
+                    ],
+                    "op": "http",
+                },
+                evidence_display=[],
+            )
+        ]
+
+    def test_does_not_detects_consecutive_http_issue_low_time_saved(self):
+        spans = [  # min time saved by parallelizing is 2s
+            create_span("http.client", 1000, "GET /api/0/organizations/endpoint1", "hash1"),
+            create_span("http.client", 1000, "GET /api/0/organizations/endpoint2", "hash2"),
+            create_span("http.client", 1000, "GET /api/0/organizations/endpoint3", "hash3"),
+        ]
+        spans = [
+            modify_span_start(span, 1000 * spans.index(span)) for span in spans
+        ]  # ensure spans don't overlap
+        problems = self.find_problems(create_event(spans))
+
+        assert len(problems) == 1
+
+        spans = [  # min time saved by parallelizing is 40ms
+            create_span("http.client", 20, "GET /api/0/organizations/endpoint1", "hash1"),
+            create_span("http.client", 20, "GET /api/0/organizations/endpoint2", "hash2"),
+            create_span("http.client", 1000, "GET /api/0/organizations/endpoint3", "hash3"),
+        ]
+        spans = [
+            modify_span_start(span, 1000 * spans.index(span)) for span in spans
+        ]  # ensure spans don't overlap
+
+        problems = self.find_problems(create_event(spans))
+
+        assert problems == []
+
+    def test_does_not_detect_consecutive_http_issue_with_frontend_events(self):
+        event = {
+            **self.create_issue_event(),
+            "sdk": {"name": "sentry.javascript.browser"},
+        }
+        problems = self.find_problems(event)
+        assert problems == []
+
+    def test_does_not_detect_consecutive_http_issue_with_low_count(self):
+        spans = [  # count less than threshold
+            create_span("http.client", 20, "GET /api/0/organizations/endpoint1", "hash1"),
+        ]
+
+        problems = self.find_problems(create_event(spans))
+        assert problems == []
+
+    def test_detects_consecutive_http_issue_with_low_duration(self):
+        event = self.create_issue_event(300)
+        problems = self.find_problems(event)
+
+        assert len(problems) == 1
+
+    def test_detects_consecutive_with_non_http_between_http_spans(self):
+        spans = self.create_issue_spans()
+
+        spans.insert(
+            1, modify_span_start(create_span("resource.script", 500, "/static/js/bundle.js"), 2000)
+        )
+
+        event = create_event(spans)
+
+        problems = self.find_problems(event)
+
+        assert problems == [
+            PerformanceProblem(
+                fingerprint="1-1009-00b8644b56309c8391aa365783145162ab9c589a",
+                op="http",
+                desc="GET /api/0/organizations/endpoint1",
+                type=PerformanceConsecutiveHTTPQueriesGroupType,
+                parent_span_ids=None,
+                cause_span_ids=[],
+                offender_span_ids=[
+                    "bbbbbbbbbbbbbbbb",
+                    "bbbbbbbbbbbbbbbb",
+                    "bbbbbbbbbbbbbbbb",
+                ],
+                evidence_data={
+                    "parent_span_ids": [],
+                    "cause_span_ids": [],
+                    "offender_span_ids": [
+                        "bbbbbbbbbbbbbbbb",
+                        "bbbbbbbbbbbbbbbb",
+                        "bbbbbbbbbbbbbbbb",
+                    ],
+                    "op": "http",
+                },
+                evidence_display=[],
+            )
+        ]
+
+    def test_does_not_detect_nextjs_asset(self):
+        span_duration = 2000  # ms
+        spans = [
+            create_span(
+                "http.client", span_duration, "GET /api/0/organizations/endpoint1", "hash1"
+            ),
+            create_span(
+                "http.client", span_duration, "GET /api/0/organizations/endpoint2", "hash2"
+            ),
+        ]
+        spans = [modify_span_start(span, span_duration * spans.index(span)) for span in spans]
+
+        assert len(self.find_problems(create_event(spans))) == 1
+
+        spans[0] = modify_span_start(
+            create_span("http.client", 2000, "GET /_next/static/css/file-hash-abc.css", "hash4"),
+            0,
+        )
+
+        assert self.find_problems(create_event(spans)) == []
+
+    def test_does_not_detect_with_high_duration_between_spans(self):
+        span_duration = 2000
+        spans = [
+            create_span(
+                "http.client", span_duration, "GET /api/0/organizations/endpoint1", "hash1"
+            ),
+            create_span(
+                "http.client", span_duration, "GET /api/0/organizations/endpoint2", "hash2"
+            ),
+            create_span(
+                "http.client", span_duration, "GET /api/0/organizations/endpoint3", "hash3"
+            ),
+        ]
+
+        spans = [
+            modify_span_start(span, (10000 + span_duration) * spans.index(span)) for span in spans
+        ]  # ensure spans don't overlap
+
+        assert self.find_problems(create_event(spans)) == []
+
+    def test_fingerprints_match_with_duplicate_http(self):
+        span_duration = 2000
+        spans = [
+            create_span("http.client", span_duration, "GET /api/endpoint1", "hash1"),
+            create_span("http.client", span_duration, "GET /api/endpoint2", "hash2"),
+            create_span("http.client", span_duration, "GET /api/endpoint3", "hash3"),
+        ]
+
+        spans = [
+            modify_span_start(span, span_duration * spans.index(span)) for span in spans
+        ]  # ensure spans don't overlap
+
+        problem_1 = self.find_problems(create_event(spans))[0]
+
+        spans.append(
+            modify_span_start(
+                create_span("http.client", span_duration, "GET /api/endpoint3", "hash3"), 6000
+            )
+        )
+
+        problem_2 = self.find_problems(create_event(spans))[0]
+
+        assert problem_2.fingerprint == "1-1009-515a42c2614f98fa886b6d9ad1ddfe1929329f53"
+        assert problem_1.fingerprint == problem_2.fingerprint
+
+    def test_issue_creation_not_allowed_for_project(self):
+        project = self.create_project()
+        event = self.create_issue_event()
+
+        settings = get_detection_settings(project.id)
+        detector = ConsecutiveHTTPSpanDetectorExtended(settings, event)
+
+        assert not detector.is_creation_allowed_for_project(project)

+ 1 - 0
tests/sentry/utils/performance_issues/test_performance_detection.py

@@ -476,6 +476,7 @@ class PerformanceDetectionTest(TestCase):
                     "consecutive_db": False,
                     "large_http_payload": False,
                     "consecutive_http": False,
+                    "consecutive_http_ext": False,
                     "slow_db_query": False,
                     "render_blocking_assets": False,
                     "n_plus_one_db": False,