Browse Source

feat(stats-detector): Create regression issue in stats detectors (#61189)

In preparation for the auto resolution work, this creates a regression
group for new occurrences where we store the baseline values. This will
allow us to check if there is a regression group associated with an
object, then if there is one, we can then check if the moving average is
close the to original baseline and emit anresolved status change.

---------

Co-authored-by: Nar Saynorath <nar.saynorath@sentry.io>
Tony Xiao 1 year ago
parent
commit
27c6d10f72

+ 8 - 3
src/sentry/statistical_detectors/issue_platform_adapter.py

@@ -14,9 +14,12 @@ from sentry.seer.utils import BreakpointData
 from sentry.utils import metrics
 
 
-def fingerprint_regression(transaction):
+def fingerprint_regression(transaction, full=False):
     prehashed_fingerprint = f"p95_transaction_duration_regression-{transaction}"
-    return hashlib.sha1((prehashed_fingerprint).encode()).hexdigest()
+    fingerprint = hashlib.sha1((prehashed_fingerprint).encode()).hexdigest()
+    if not full:
+        fingerprint = fingerprint[:16]
+    return fingerprint
 
 
 def send_regression_to_platform(regression: BreakpointData, released: bool):
@@ -40,7 +43,9 @@ def send_regression_to_platform(regression: BreakpointData, released: bool):
         resource_id=None,
         project_id=project_id,
         event_id=uuid.uuid4().hex,
-        fingerprint=[fingerprint_regression(regression["transaction"])],
+        # This uses the full fingerprint to avoid creating a new group for existing
+        # issues but in theory this could be switched to the abbreviated fingerprint.
+        fingerprint=[fingerprint_regression(regression["transaction"], full=True)],
         type=issue_type,
         issue_title=issue_type.description,
         subtitle=f"Increased from {displayed_old_baseline}ms to {displayed_new_baseline}ms (P95)",

+ 115 - 25
src/sentry/tasks/statistical_detectors.py

@@ -3,9 +3,10 @@ from __future__ import annotations
 import heapq
 import logging
 from collections import defaultdict
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from typing import Any, DefaultDict, Dict, Generator, Iterable, List, Optional, Tuple
 
+from django.db.models import Q
 from django.utils import timezone as django_timezone
 from snuba_sdk import (
     And,
@@ -32,6 +33,7 @@ from sentry.api.endpoints.project_performance_issue_settings import InternalProj
 from sentry.constants import ObjectStatus
 from sentry.models.options.project_option import ProjectOption
 from sentry.models.project import Project
+from sentry.models.statistical_detectors import RegressionGroup, RegressionType
 from sentry.profiles.utils import get_from_profiling_service
 from sentry.seer.utils import BreakpointData
 from sentry.sentry_metrics import indexer
@@ -286,12 +288,16 @@ def detect_transaction_change_points(
         (projects_by_id[item[0]], item[1]) for item in transactions if item[0] in projects_by_id
     ]
 
-    breakpoint_count = 0
-
     regressions = EndpointRegressionDetector.detect_regressions(
         transaction_pairs, start, "p95(transaction.duration)", TIMESERIES_PER_BATCH
     )
 
+    versioned_regressions = redirect_escalations(regressions, RegressionType.ENDPOINT)
+
+    regressions = save_versioned_regressions(versioned_regressions, RegressionType.ENDPOINT)
+
+    breakpoint_count = 0
+
     for regression in regressions:
         breakpoint_count += 1
         send_regression_to_platform(regression, True)
@@ -361,13 +367,17 @@ def detect_function_change_points(
         (projects_by_id[item[0]], item[1]) for item in functions_list if item[0] in projects_by_id
     ]
 
-    breakpoint_count = 0
-    emitted_count = 0
-
     regressions = FunctionRegressionDetector.detect_regressions(
         function_pairs, start, "p95()", TIMESERIES_PER_BATCH
     )
 
+    versioned_regressions = redirect_escalations(regressions, RegressionType.FUNCTION)
+
+    regressions = save_versioned_regressions(versioned_regressions, RegressionType.FUNCTION)
+
+    breakpoint_count = 0
+    emitted_count = 0
+
     for regression_chunk in chunked(regressions, 100):
         breakpoint_count += len(regression_chunk)
         emitted_count += emit_function_regression_issue(projects_by_id, regression_chunk, start)
@@ -389,13 +399,13 @@ def detect_function_change_points(
 
 def emit_function_regression_issue(
     projects_by_id: Dict[int, Project],
-    breakpoints: List[BreakpointData],
+    regressions: List[BreakpointData],
     start: datetime,
 ) -> int:
     start = start - timedelta(hours=1)
     start = start.replace(minute=0, second=0, microsecond=0)
 
-    project_ids = [int(entry["project"]) for entry in breakpoints]
+    project_ids = [int(regression["project"]) for regression in regressions]
     projects = [projects_by_id[project_id] for project_id in project_ids]
 
     params: Dict[str, Any] = {
@@ -408,11 +418,11 @@ def emit_function_regression_issue(
     conditions = [
         And(
             [
-                Condition(Column("project_id"), Op.EQ, int(entry["project"])),
-                Condition(Column("fingerprint"), Op.EQ, int(entry["transaction"])),
+                Condition(Column("project_id"), Op.EQ, int(regression["project"])),
+                Condition(Column("fingerprint"), Op.EQ, int(regression["transaction"])),
             ]
         )
-        for entry in breakpoints
+        for regression in regressions
     ]
 
     result = functions.query(
@@ -420,7 +430,7 @@ def emit_function_regression_issue(
         query="is_application:1",
         params=params,
         orderby=["project.id"],
-        limit=len(breakpoints),
+        limit=len(regressions),
         referrer=Referrer.API_PROFILING_FUNCTIONS_STATISTICAL_DETECTOR_EXAMPLE.value,
         auto_aggregations=True,
         use_aggregate_conditions=True,
@@ -432,9 +442,9 @@ def emit_function_regression_issue(
 
     payloads = []
 
-    for entry in breakpoints:
-        project_id = int(entry["project"])
-        fingerprint = int(entry["transaction"])
+    for regression in regressions:
+        project_id = int(regression["project"])
+        fingerprint = int(regression["transaction"])
         example = examples.get((project_id, fingerprint))
         if example is None:
             continue
@@ -449,14 +459,14 @@ def emit_function_regression_issue(
                 "project_id": project_id,
                 "profile_id": example,
                 "fingerprint": fingerprint,
-                "absolute_percentage_change": entry["absolute_percentage_change"],
-                "aggregate_range_1": entry["aggregate_range_1"],
-                "aggregate_range_2": entry["aggregate_range_2"],
-                "breakpoint": int(entry["breakpoint"]),
-                "trend_difference": entry["trend_difference"],
-                "trend_percentage": entry["trend_percentage"],
-                "unweighted_p_value": entry["unweighted_p_value"],
-                "unweighted_t_value": entry["unweighted_t_value"],
+                "absolute_percentage_change": regression["absolute_percentage_change"],
+                "aggregate_range_1": regression["aggregate_range_1"],
+                "aggregate_range_2": regression["aggregate_range_2"],
+                "breakpoint": int(regression["breakpoint"]),
+                "trend_difference": regression["trend_difference"],
+                "trend_percentage": regression["trend_percentage"],
+                "unweighted_p_value": regression["unweighted_p_value"],
+                "unweighted_t_value": regression["unweighted_t_value"],
                 "released": True,
             }
         )
@@ -597,8 +607,7 @@ def query_transactions(
         DetectorPayload(
             project_id=row["project_id"],
             group=row["transaction_name"],
-            # take the first 16 chars of the fingerprint as that's sufficiently unique
-            fingerprint=fingerprint_regression(row["transaction_name"])[:16],
+            fingerprint=fingerprint_regression(row["transaction_name"]),
             count=row["count"],
             value=row["p95"],
             timestamp=start,
@@ -907,3 +916,84 @@ def limit_regressions_by_project(
     for regressions in regressions_by_project.values():
         for _, regression in regressions:
             yield regression
+
+
+def get_regression_groups(
+    regression_type: RegressionType, pairs: List[Tuple[int, str]]
+) -> List[RegressionGroup]:
+    conditions = Q()
+    for project_id, fingerprint in pairs:
+        conditions |= Q(project_id=project_id, fingerprint=fingerprint)
+
+    return (
+        RegressionGroup.objects.filter(conditions, type=regression_type.value)
+        .order_by("type", "project_id", "fingerprint", "-version")
+        .distinct("type", "project_id", "fingerprint")
+    )
+
+
+def redirect_escalations(
+    regressions: Generator[BreakpointData, None, None],
+    regression_type: RegressionType,
+    batch_size=100,
+) -> Generator[Tuple[int, BreakpointData], None, None]:
+    for regression_chunk in chunked(regressions, batch_size):
+        existing_regression_groups = {
+            (group.project_id, group.fingerprint): group
+            for group in get_regression_groups(
+                regression_type,
+                [
+                    (
+                        int(regression["project"]),
+                        fingerprint_regression(regression["transaction"]),
+                    )
+                    for regression in regression_chunk
+                ],
+            )
+        }
+
+        for regression in regression_chunk:
+            project_id = int(regression["project"])
+            fingerprint = fingerprint_regression(regression["transaction"])
+            group = existing_regression_groups.get((project_id, fingerprint))
+
+            if group is None:
+                yield 1, regression
+            elif not group.active:
+                yield group.version + 1, regression
+            else:
+                # TODO:
+                # If there is an active regression group, we should check
+                # - if the issue group is still unresolved
+                # - if the issue escalted
+                # then emit an status change message if necessary.
+                pass
+
+
+def save_versioned_regressions(
+    versioned_regressions: Generator[Tuple[int, BreakpointData], None, None],
+    regression_type: RegressionType,
+    batch_size=100,
+) -> Generator[BreakpointData, None, None]:
+
+    for regression_chunk in chunked(versioned_regressions, batch_size):
+        RegressionGroup.objects.bulk_create(
+            [
+                RegressionGroup(
+                    type=regression_type.value,
+                    date_regressed=datetime.utcfromtimestamp(regression["breakpoint"]).replace(
+                        tzinfo=timezone.utc
+                    ),
+                    version=version,
+                    active=True,
+                    project_id=int(regression["project"]),
+                    fingerprint=fingerprint_regression(regression["transaction"]),
+                    baseline=regression["aggregate_range_1"],
+                    regressed=regression["aggregate_range_2"],
+                )
+                for version, regression in regression_chunk
+            ]
+        )
+
+        for _, regression in regression_chunk:
+            yield regression

+ 109 - 2
tests/sentry/tasks/test_statistical_detectors.py

@@ -8,11 +8,13 @@ from django.db.models import F
 from sentry.api.endpoints.project_performance_issue_settings import InternalProjectOptions
 from sentry.models.options.project_option import ProjectOption
 from sentry.models.project import Project
+from sentry.models.statistical_detectors import RegressionGroup, RegressionType
 from sentry.seer.utils import BreakpointData
 from sentry.sentry_metrics.use_case_id_registry import UseCaseID
 from sentry.snuba.discover import zerofill
 from sentry.snuba.metrics.naming_layer.mri import TransactionMRI
 from sentry.statistical_detectors.detector import DetectorPayload, TrendType
+from sentry.statistical_detectors.issue_platform_adapter import fingerprint_regression
 from sentry.tasks.statistical_detectors import (
     detect_function_change_points,
     detect_function_trends,
@@ -23,6 +25,7 @@ from sentry.tasks.statistical_detectors import (
     query_functions,
     query_transactions,
     query_transactions_timeseries,
+    redirect_escalations,
     run_detection,
 )
 from sentry.testutils.cases import MetricsAPIBaseTestCase, ProfilesSnubaTestCase
@@ -424,6 +427,110 @@ def test_limit_regressions_by_project(ratelimit, timestamp, expected_idx):
     assert set(regressions) == set(expected_regressions)
 
 
+@pytest.mark.parametrize(
+    ["regression_type"],
+    [
+        pytest.param(RegressionType.ENDPOINT, id="endpoint"),
+        pytest.param(RegressionType.FUNCTION, id="function"),
+    ],
+)
+@pytest.mark.parametrize(
+    ["existing", "expected_versions"],
+    [
+        pytest.param([], [1], id="no existing"),
+        pytest.param(
+            [
+                (1, False, "transaction_1"),
+                (2, False, "transaction_1"),
+            ],
+            [3],
+            id="existing inactive",
+        ),
+        pytest.param(
+            [
+                (1, False, "transaction_1"),
+                (2, True, "transaction_1"),
+            ],
+            [None],
+            id="existing active",
+        ),
+        pytest.param(
+            [
+                (1, False, "transaction_1"),
+                (2, False, "transaction_1"),
+                (1, False, "transaction_2"),
+                (2, False, "transaction_2"),
+                (3, False, "transaction_2"),
+                (4, True, "transaction_2"),
+            ],
+            [3, None],
+            id="mixed active and inactive",
+        ),
+        pytest.param(
+            [
+                (1, True, "transaction_1"),
+                (2, False, "transaction_1"),
+            ],
+            [3],
+            id="use latest version",
+        ),
+    ],
+)
+@django_db_all
+def test_redirect_escalations(
+    regression_type,
+    existing,
+    expected_versions,
+    project,
+    timestamp,
+):
+    if existing:
+        RegressionGroup.objects.bulk_create(
+            RegressionGroup(
+                type=regression_type.value,
+                date_regressed=timestamp,
+                version=version,
+                active=active,
+                project_id=project.id,
+                fingerprint=fingerprint_regression(transaction),
+                baseline=100000000.0,
+                regressed=500000000.0,
+            )
+            for version, active, transaction in existing
+        )
+
+    breakpoints: List[BreakpointData] = [
+        {
+            "absolute_percentage_change": 5.0,
+            "aggregate_range_1": 100000000.0,
+            "aggregate_range_2": 500000000.0,
+            "breakpoint": 1687323600,
+            "project": str(project.id),
+            "transaction": "transaction_1",
+            "trend_difference": 400000000.0,
+            "trend_percentage": 5.0,
+            "unweighted_p_value": 0.0,
+            "unweighted_t_value": -float("inf"),
+        }
+    ]
+
+    def mock_regressions():
+        yield from breakpoints
+
+    regressions = list(
+        redirect_escalations(
+            mock_regressions(),
+            regression_type,
+        )
+    )
+
+    assert regressions == [
+        (expected_version, breakpoints[i])
+        for i, expected_version in enumerate(expected_versions)
+        if expected_version is not None
+    ]
+
+
 @mock.patch("sentry.tasks.statistical_detectors.query_functions")
 @mock.patch("sentry.tasks.statistical_detectors.detect_function_change_points")
 @django_db_all
@@ -676,7 +783,7 @@ class FunctionsTasksTest(ProfilesSnubaTestCase):
         mock_value.data = b'{"occurrences":5}'
         mock_get_from_profiling_service.return_value = mock_value
 
-        breakpoints: List[BreakpointData] = [
+        regressions: List[BreakpointData] = [
             {
                 "project": str(project.id),
                 "transaction": str(
@@ -694,7 +801,7 @@ class FunctionsTasksTest(ProfilesSnubaTestCase):
             for project in self.projects
         ]
         emitted = emit_function_regression_issue(
-            {project.id: project for project in self.projects}, breakpoints, self.now
+            {project.id: project for project in self.projects}, regressions, self.now
         )
         assert emitted == 5