Browse Source

feat(perf-issues): Switch rates and settings to system options (#38767)

This switches issue creation to the system-option for the
'n_plus_one_db' detector, as well as using system options as the primary
settings for thresholds (so long as a project option isn't set, which
overrides system option settings). Finally, adds a feature flag check so
only organizations with the 'performance-issues-ingest' non-flagr
feature will have any creation allowed at all.

After this PR goes in, we can check that system options are properly
controlling creation of issues, then flip the project-option
'sentry:performance_issue_creation_rate' default from 0.0 to 1.0 in a
subsequent PR. That next PR will enable system/flag based issue creation
by default.
Kev 2 years ago
parent
commit
9f23c75e5a

+ 52 - 15
src/sentry/utils/performance_issues/performance_detection.py

@@ -11,9 +11,9 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 
 import sentry_sdk
 
-from sentry import nodestore, options, projectoptions
+from sentry import features, nodestore, options, projectoptions
 from sentry.eventstore.models import Event
-from sentry.models import ProjectOption
+from sentry.models import Organization, Project, ProjectOption
 from sentry.types.issues import GroupType
 from sentry.utils import metrics
 
@@ -49,6 +49,11 @@ DETECTOR_TYPE_TO_GROUP_TYPE = {
     DetectorType.N_PLUS_ONE_DB_QUERIES: GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES,
 }
 
+# Detector and the corresponding system option must be added to this list to have issues created.
+DETECTOR_TYPE_ISSUE_CREATION_TO_SYSTEM_OPTION = {
+    DetectorType.N_PLUS_ONE_DB_QUERIES: "performance.issues.n_plus_one_db.problem-creation",
+}
+
 
 @dataclass
 class PerformanceProblem:
@@ -143,21 +148,35 @@ def detect_performance_problems(data: Event) -> List[PerformanceProblem]:
     return []
 
 
-# Gets some of the thresholds to perform performance detection. Can be made configurable later.
-# Thresholds are in milliseconds.
+# Gets the thresholds to perform performance detection.
+# Duration thresholds are in milliseconds.
 # Allowed span ops are allowed span prefixes. (eg. 'http' would work for a span with 'http.client' as its op)
 def get_detection_settings(project_id: str):
-    default_settings = projectoptions.get_well_known_default(
+    default_project_settings = projectoptions.get_well_known_default(
         "sentry:performance_issue_settings",
         project=project_id,
     )
-    issue_settings = ProjectOption.objects.get_value(
-        project_id, "sentry:performance_issue_settings", default_settings
+    project_settings = ProjectOption.objects.get_value(
+        project_id, "sentry:performance_issue_settings", default_project_settings
+    )
+
+    use_project_option_settings = default_project_settings != project_settings
+    merged_project_settings = {
+        **default_project_settings,
+        **project_settings,
+    }  # Merge saved project settings into default so updating the default to add new settings works in the future.
+
+    # Use project settings if they've been adjusted at all, to allow customization, otherwise fetch settings from system-wide options.
+    settings = (
+        merged_project_settings
+        if use_project_option_settings
+        else {
+            "n_plus_one_db_count": options.get("performance.issues.n_plus_one_db.count_threshold"),
+            "n_plus_one_db_duration_threshold": options.get(
+                "performance.issues.n_plus_one_db.duration_threshold"
+            ),
+        }
     )
-    settings = {
-        **default_settings,
-        **issue_settings,
-    }  # Merge saved settings into default so updating the default works in the future.
 
     return {
         DetectorType.DUPLICATE_SPANS: [
@@ -236,20 +255,21 @@ def _detect_performance_problems(data: Event, sdk_span: Any) -> List[Performance
         DetectorType.N_PLUS_ONE_DB_QUERIES: NPlusOneDBSpanDetector(detection_settings, data),
     }
 
-    # Create performance issues for N+1 DB queries first
-    used_perf_issue_detectors = {DetectorType.N_PLUS_ONE_DB_QUERIES}
-
     for span in spans:
         for _, detector in detectors.items():
             detector.visit_span(span)
     for _, detector in detectors.items():
         detector.on_complete()
 
+    # Metrics reporting only for detection, not created issues.
     report_metrics_for_detectors(event_id, detectors, sdk_span)
 
+    # Get list of detectors that are allowed to create issues.
+    allowed_perf_issue_detectors = get_allowed_issue_creation_detectors(project_id)
+
     detected_problems = [
         (i, detector_type)
-        for detector_type in used_perf_issue_detectors
+        for detector_type in allowed_perf_issue_detectors
         for _, i in detectors[detector_type].stored_problems.items()
     ]
 
@@ -261,6 +281,23 @@ def _detect_performance_problems(data: Event, sdk_span: Any) -> List[Performance
     ]
 
 
+# Uses options and flags to determine which orgs and which detectors automatically create performance issues.
+def get_allowed_issue_creation_detectors(project_id: str):
+    project = Project.objects.get_from_cache(id=project_id)
+    organization = Organization.objects.get_from_cache(id=project.organization_id)
+    if not features.has("organizations:performance-issues-ingest", organization):
+        # Only organizations with this non-flagr feature have performance issues created.
+        return {}
+
+    allowed_detectors = set()
+    for detector_type, system_option in DETECTOR_TYPE_ISSUE_CREATION_TO_SYSTEM_OPTION.items():
+        rate = options.get(system_option)
+        if rate and rate > random.random():
+            allowed_detectors.add(detector_type)
+
+    return allowed_detectors
+
+
 def prepare_problem_for_grouping(
     problem: Union[PerformanceProblem, PerformanceSpanProblem],
     data: Event,

+ 12 - 2
tests/sentry/api/serializers/test_event.py

@@ -384,10 +384,14 @@ class DetailedEventSerializerTest(TestCase):
     @override_options({"store.use-ingest-performance-detection-only": 1.0})
     @override_options({"performance.issues.all.problem-creation": 1.0})
     @override_options({"performance.issues.all.problem-detection": 1.0})
+    @override_options({"performance.issues.n_plus_one_db.problem-creation": 1.0})
     def test_performance_problem(self):
         self.project.update_option("sentry:performance_issue_creation_rate", 1.0)
         with mock.patch("sentry_sdk.tracing.Span.containing_transaction"), self.feature(
-            "projects:performance-suspect-spans-ingestion"
+            {
+                "projects:performance-suspect-spans-ingestion": True,
+                "organizations:performance-issues-ingest": True,
+            }
         ):
             manager = EventManager(make_event(**EVENTS["n-plus-one-in-django-index-view"]))
             manager.normalize()
@@ -420,11 +424,17 @@ class DetailedEventSerializerTest(TestCase):
     @override_options({"store.use-ingest-performance-detection-only": 1.0})
     @override_options({"performance.issues.all.problem-creation": 1.0})
     @override_options({"performance.issues.all.problem-detection": 1.0})
+    @override_options({"performance.issues.n_plus_one_db.problem-creation": 1.0})
     def test_performance_problem_no_stored_data(self):
         self.project.update_option("sentry:performance_issue_creation_rate", 1.0)
         with mock.patch("sentry_sdk.tracing.Span.containing_transaction"), mock.patch(
             "sentry.event_manager.EventPerformanceProblem"
-        ), self.feature("projects:performance-suspect-spans-ingestion"):
+        ), self.feature(
+            {
+                "projects:performance-suspect-spans-ingestion": True,
+                "organizations:performance-issues-ingest": True,
+            }
+        ):
             manager = EventManager(make_event(**EVENTS["n-plus-one-in-django-index-view"]))
             manager.normalize()
             event = manager.save(self.project.id)

+ 10 - 2
tests/sentry/event_manager/test_event_manager.py

@@ -2105,11 +2105,15 @@ class EventManagerTest(TestCase, EventManagerTestMixin):
     @override_options({"store.use-ingest-performance-detection-only": 1.0})
     @override_options({"performance.issues.all.problem-creation": 1.0})
     @override_options({"performance.issues.all.problem-detection": 1.0})
+    @override_options({"performance.issues.n_plus_one_db.problem-creation": 1.0})
     def test_perf_issue_creation(self):
         self.project.update_option("sentry:performance_issue_creation_rate", 1.0)
 
         with mock.patch("sentry_sdk.tracing.Span.containing_transaction"), self.feature(
-            "projects:performance-suspect-spans-ingestion"
+            {
+                "projects:performance-suspect-spans-ingestion": True,
+                "organizations:performance-issues-ingest": True,
+            }
         ):
             manager = EventManager(make_event(**EVENTS["n-plus-one-in-django-index-view"]))
             manager.normalize()
@@ -2164,11 +2168,15 @@ class EventManagerTest(TestCase, EventManagerTestMixin):
     @override_options({"store.use-ingest-performance-detection-only": 1.0})
     @override_options({"performance.issues.all.problem-creation": 1.0})
     @override_options({"performance.issues.all.problem-detection": 1.0})
+    @override_options({"performance.issues.n_plus_one_db.problem-creation": 1.0})
     def test_perf_issue_update(self):
         self.project.update_option("sentry:performance_issue_creation_rate", 1.0)
 
         with mock.patch("sentry_sdk.tracing.Span.containing_transaction"), self.feature(
-            "projects:performance-suspect-spans-ingestion"
+            {
+                "projects:performance-suspect-spans-ingestion": True,
+                "organizations:performance-issues-ingest": True,
+            }
         ):
             manager = EventManager(make_event(**EVENTS["n-plus-one-in-django-index-view"]))
             manager.normalize()

+ 91 - 44
tests/sentry/utils/performance_issues/test_performance_detection.py

@@ -2,6 +2,7 @@ import os
 import unittest
 from unittest.mock import Mock, call, patch
 
+from sentry import projectoptions
 from sentry.eventstore.models import Event
 from sentry.testutils import TestCase
 from sentry.testutils.helpers import override_options
@@ -63,6 +64,31 @@ def create_event(spans, event_id="a" * 16):
     return {"event_id": event_id, "project": PROJECT_ID, "spans": spans}
 
 
+def assert_n_plus_one_db_problem(perf_problems):
+    assert perf_problems == [
+        PerformanceProblem(
+            fingerprint="1-GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES-8d86357da4d8a866b19c97670edee38d037a7bc8",
+            op="db",
+            desc="SELECT `books_author`.`id`, `books_author`.`name` FROM `books_author` WHERE `books_author`.`id` = %s LIMIT 21",
+            type=GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES,
+            parent_span_ids=["8dd7a5869a4f4583"],
+            cause_span_ids=["9179e43ae844b174"],
+            offender_span_ids=[
+                "b8be6138369491dd",
+                "b2d4826e7b618f1b",
+                "b3fdeea42536dbf1",
+                "b409e78a092e642f",
+                "86d2ede57bbf48d4",
+                "8e554c84cdc9731e",
+                "94d6230f3f910e12",
+                "a210b87a2191ceb6",
+                "88a5ccaf25b9bd8f",
+                "bb32cf50fc56b296",
+            ],
+        )
+    ]
+
+
 class PerformanceDetectionTest(unittest.TestCase):
     def setUp(self):
         super().setUp()
@@ -70,6 +96,24 @@ class PerformanceDetectionTest(unittest.TestCase):
         self.project_option_mock = patch_project_option_get.start()
         self.addCleanup(patch_project_option_get.stop)
 
+        patch_project = patch("sentry.models.Project.objects.get_from_cache")
+        self.project_mock = patch_project.start()
+        self.addCleanup(patch_project.stop)
+
+        patch_organization = patch("sentry.models.Organization.objects.get_from_cache")
+        self.organization_mock = patch_organization.start()
+        self.addCleanup(patch_organization.stop)
+
+        self.features = ["organizations:performance-issues-ingest"]
+
+        def has_feature(feature, org):
+            return feature in self.features
+
+        patch_features = patch("sentry.features.has")
+        self.features_mock = patch_features.start()
+        self.features_mock.side_effect = has_feature
+        self.addCleanup(patch_features.stop)
+
     @patch("sentry.utils.performance_issues.performance_detection._detect_performance_problems")
     def test_options_disabled(self, mock):
         event = {}
@@ -84,33 +128,13 @@ class PerformanceDetectionTest(unittest.TestCase):
                 detect_performance_problems(event)
         assert mock.call_count == 1
 
+    @override_options({"performance.issues.n_plus_one_db.problem-creation": 1.0})
     def test_project_option_overrides_default(self):
         n_plus_one_event = EVENTS["n-plus-one-in-django-index-view"]
         sdk_span_mock = Mock()
 
         perf_problems = _detect_performance_problems(n_plus_one_event, sdk_span_mock)
-        assert perf_problems == [
-            PerformanceProblem(
-                fingerprint="1-GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES-8d86357da4d8a866b19c97670edee38d037a7bc8",
-                op="db",
-                desc="SELECT `books_author`.`id`, `books_author`.`name` FROM `books_author` WHERE `books_author`.`id` = %s LIMIT 21",
-                type=GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES,
-                parent_span_ids=["8dd7a5869a4f4583"],
-                cause_span_ids=["9179e43ae844b174"],
-                offender_span_ids=[
-                    "b8be6138369491dd",
-                    "b2d4826e7b618f1b",
-                    "b3fdeea42536dbf1",
-                    "b409e78a092e642f",
-                    "86d2ede57bbf48d4",
-                    "8e554c84cdc9731e",
-                    "94d6230f3f910e12",
-                    "a210b87a2191ceb6",
-                    "88a5ccaf25b9bd8f",
-                    "bb32cf50fc56b296",
-                ],
-            )
-        ]
+        assert_n_plus_one_db_problem(perf_problems)
 
         self.project_option_mock.return_value = {
             "n_plus_one_db_duration_threshold": 100000,
@@ -119,6 +143,49 @@ class PerformanceDetectionTest(unittest.TestCase):
         perf_problems = _detect_performance_problems(n_plus_one_event, sdk_span_mock)
         assert perf_problems == []
 
+    @override_options({"performance.issues.n_plus_one_db.problem-creation": 1.0})
+    def test_no_feature_flag_disables_creation(self):
+        self.features = []
+        n_plus_one_event = EVENTS["n-plus-one-in-django-index-view"]
+        sdk_span_mock = Mock()
+
+        perf_problems = _detect_performance_problems(n_plus_one_event, sdk_span_mock)
+        assert perf_problems == []
+
+    @override_options({"performance.issues.n_plus_one_db.problem-creation": 0.0})
+    def test_system_option_disables_detector_issue_creation(self):
+        n_plus_one_event = EVENTS["n-plus-one-in-django-index-view"]
+        sdk_span_mock = Mock()
+
+        perf_problems = _detect_performance_problems(n_plus_one_event, sdk_span_mock)
+        assert perf_problems == []
+
+    @override_options({"performance.issues.n_plus_one_db.problem-creation": 1.0})
+    def test_system_option_used_when_project_option_is_default(self):
+        n_plus_one_event = EVENTS["n-plus-one-in-django-index-view"]
+        sdk_span_mock = Mock()
+
+        self.project_option_mock.return_value = projectoptions.get_well_known_default(
+            "sentry:performance_issue_settings", project=1
+        )
+        with override_options(
+            {
+                "performance.issues.n_plus_one_db.count_threshold": 20,
+                "performance.issues.n_plus_one_db.duration_threshold": 100,
+            }
+        ):
+            perf_problems = _detect_performance_problems(n_plus_one_event, sdk_span_mock)
+            assert perf_problems == []
+
+        with override_options(
+            {
+                "performance.issues.n_plus_one_db.count_threshold": 5,
+                "performance.issues.n_plus_one_db.duration_threshold": 100,
+            }
+        ):
+            perf_problems = _detect_performance_problems(n_plus_one_event, sdk_span_mock)
+            assert_n_plus_one_db_problem(perf_problems)
+
     def test_calls_detect_duplicate(self):
         no_duplicate_event = create_event([create_span("db")] * 4)
         duplicate_not_allowed_op_event = create_event([create_span("random", 100.0)] * 5)
@@ -492,6 +559,7 @@ class PerformanceDetectionTest(unittest.TestCase):
 
         assert sdk_span_mock.containing_transaction.set_tag.call_count == 0
 
+    @override_options({"performance.issues.n_plus_one_db.problem-creation": 1.0})
     def test_detects_multiple_performance_issues_in_n_plus_one_query(self):
         n_plus_one_event = EVENTS["n-plus-one-in-django-index-view"]
         sdk_span_mock = Mock()
@@ -525,28 +593,7 @@ class PerformanceDetectionTest(unittest.TestCase):
                 call("_pi_n_plus_one_db", "b8be6138369491dd"),
             ]
         )
-        assert perf_problems == [
-            PerformanceProblem(
-                fingerprint="1-GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES-8d86357da4d8a866b19c97670edee38d037a7bc8",
-                op="db",
-                desc="SELECT `books_author`.`id`, `books_author`.`name` FROM `books_author` WHERE `books_author`.`id` = %s LIMIT 21",
-                type=GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES,
-                parent_span_ids=["8dd7a5869a4f4583"],
-                cause_span_ids=["9179e43ae844b174"],
-                offender_span_ids=[
-                    "b8be6138369491dd",
-                    "b2d4826e7b618f1b",
-                    "b3fdeea42536dbf1",
-                    "b409e78a092e642f",
-                    "86d2ede57bbf48d4",
-                    "8e554c84cdc9731e",
-                    "94d6230f3f910e12",
-                    "a210b87a2191ceb6",
-                    "88a5ccaf25b9bd8f",
-                    "bb32cf50fc56b296",
-                ],
-            )
-        ]
+        assert_n_plus_one_db_problem(perf_problems)
 
     @patch("sentry.utils.metrics.incr")
     def test_does_not_report_metric_on_non_truncated_n_plus_one_query(self, incr_mock):