Browse Source

feat(timing-issues): Introduce a db on main thread detector (#45929)

- This introduces the db on main thread detector, which works nearly
identically to the file io on main thread detector, except that it looks
for spans that are db instead of just file io
- Closes #45930
William Mak 2 years ago
parent
commit
7a1a57251f

+ 276 - 0
fixtures/events/performance_problems/db-on-main-thread.json

@@ -0,0 +1,276 @@
+{
+  "event_id": "c119e45a9d724b1891df4651ebf9e6db",
+  "project": 5428559,
+  "release": "io.sentry.samples.android@1.1.0+2",
+  "dist": "2",
+  "platform": "java",
+  "message": "",
+  "datetime": "2022-11-21T11:57:38.806589+00:00",
+  "tags": [
+    [
+      "device",
+      "Android SDK built for x86"
+    ],
+    [
+      "device.family",
+      "Android"
+    ],
+    [
+      "environment",
+      "debug"
+    ],
+    [
+      "isSideLoaded",
+      "true"
+    ],
+    [
+      "level",
+      "info"
+    ],
+    [
+      "os",
+      "Android 10"
+    ],
+    [
+      "os.name",
+      "Android"
+    ],
+    [
+      "os.rooted",
+      "no"
+    ],
+    [
+      "dist",
+      "2"
+    ],
+    [
+      "release",
+      "io.sentry.samples.android@1.1.0+2"
+    ],
+    [
+      "user",
+      "id:0f38dda2-6ef7-4d9a-b7f4-6bcc80b85b2e"
+    ],
+    [
+      "transaction",
+      "MainActivity.add_attachment"
+    ]
+  ],
+  "_metrics": {
+    "bytes.ingested.event": 3725,
+    "bytes.stored.event": 4851
+  },
+  "breadcrumbs": {
+    "values": [
+      {
+        "timestamp": 1669031851.824,
+        "type": "navigation",
+        "category": "ui.lifecycle",
+        "level": "info",
+        "data": {
+          "screen": "MainActivity",
+          "state": "created"
+        }
+      },
+      {
+        "timestamp": 1669031851.942,
+        "type": "navigation",
+        "category": "ui.lifecycle",
+        "level": "info",
+        "data": {
+          "screen": "MainActivity",
+          "state": "started"
+        }
+      },
+      {
+        "timestamp": 1669031851.943,
+        "type": "session",
+        "category": "app.lifecycle",
+        "level": "info",
+        "data": {
+          "state": "start"
+        }
+      },
+      {
+        "timestamp": 1669031851.95,
+        "type": "navigation",
+        "category": "app.lifecycle",
+        "level": "info",
+        "data": {
+          "state": "foreground"
+        }
+      },
+      {
+        "timestamp": 1669031851.953,
+        "type": "navigation",
+        "category": "ui.lifecycle",
+        "level": "info",
+        "data": {
+          "screen": "MainActivity",
+          "state": "resumed"
+        }
+      },
+      {
+        "timestamp": 1669031858.676,
+        "type": "user",
+        "category": "ui.click",
+        "level": "info",
+        "data": {
+          "view.class": "androidx.appcompat.widget.AppCompatButton",
+          "view.id": "add_attachment"
+        }
+      }
+    ]
+  },
+  "breakdowns": {
+    "span_ops": {
+      "total.time": {
+        "value": 94.411134,
+        "unit": "millisecond"
+      }
+    }
+  },
+  "contexts": {
+    "app": {
+      "app_start_time": "2022-11-21T11:57:31.523Z",
+      "app_identifier": "io.sentry.samples.android",
+      "app_name": "Sentry sample",
+      "app_version": "1.1.0",
+      "app_build": "2",
+      "permissions": {
+        "ACCESS_NETWORK_STATE": "granted",
+        "CAMERA": "not_granted",
+        "FOREGROUND_SERVICE": "granted",
+        "INTERNET": "granted",
+        "READ_EXTERNAL_STORAGE": "not_granted",
+        "READ_PHONE_STATE": "not_granted",
+        "WRITE_EXTERNAL_STORAGE": "not_granted"
+      },
+      "type": "app"
+    },
+    "device": {
+      "name": "Android SDK built for x86",
+      "family": "Android",
+      "model": "Android SDK built for x86",
+      "model_id": "QSR1.190920.001",
+      "orientation": "portrait",
+      "manufacturer": "Google",
+      "brand": "google",
+      "screen_density": 2.625,
+      "screen_dpi": 420,
+      "simulator": true,
+      "boot_time": "2022-11-21T10:57:17.182Z",
+      "timezone": "Europe/Vienna",
+      "archs": [
+        "x86"
+      ],
+      "id": "0f38dda2-6ef7-4d9a-b7f4-6bcc80b85b2e",
+      "language": "en",
+      "locale": "en_US",
+      "screen_height_pixels": 1794,
+      "screen_width_pixels": 1080,
+      "type": "device"
+    },
+    "os": {
+      "name": "Android",
+      "version": "10",
+      "build": "QSR1.190920.001",
+      "kernel_version": "4.14.112+",
+      "rooted": false,
+      "type": "os"
+    },
+    "trace": {
+      "trace_id": "b2a33f3f79fe4a7c8de3426725a045cb",
+      "span_id": "b93d2be92cd64fd5",
+      "op": "ui.action.click",
+      "status": "ok",
+      "exclusive_time": 35.177709,
+      "client_sample_rate": 1,
+      "hash": "1ff9a18d6a6b09a8",
+      "type": "trace"
+    }
+  },
+  "culprit": "MainActivity.add_attachment",
+  "environment": "debug",
+  "grouping_config": {
+    "enhancements": "eJybzDRxY3J-bm5-npWRgaGlroGxrpHxxEkT1-Zm5usVp-aVFFXqaWlNZAQAKGsOFg",
+    "id": "newstyle:2019-10-29"
+  },
+  "hashes": [],
+  "ingest_path": [
+    {
+      "version": "22.11.0",
+      "public_key": "XE7QiyuNlja9PZ7I9qJlwQotzecWrUIN91BAO7Q5R38"
+    }
+  ],
+  "key_id": "1336851",
+  "level": "info",
+  "location": "MainActivity.add_attachment",
+  "logger": "",
+  "metadata": {
+    "location": "MainActivity.add_attachment",
+    "title": "MainActivity.add_attachment"
+  },
+  "nodestore_insert": 1669031864.427658,
+  "received": 1669031861.899161,
+  "sdk": {
+    "name": "sentry.java.android.timber",
+    "version": "6.8.0",
+    "packages": [
+      {
+        "name": "maven:io.sentry:sentry",
+        "version": "6.8.0"
+      },
+      {
+        "name": "maven:io.sentry:sentry-android-core",
+        "version": "6.8.0"
+      },
+      {
+        "name": "maven:io.sentry:sentry-android-ndk",
+        "version": "6.8.0"
+      },
+      {
+        "name": "maven:io.sentry:sentry-android-timber",
+        "version": "6.8.0"
+      }
+    ]
+  },
+  "span_grouping_config": {
+    "id": "default:2022-10-27"
+  },
+  "spans": [
+    {
+      "timestamp": 1669031858.806411,
+      "start_timestamp": 1669031857.712,
+      "exclusive_time": 1094.411134,
+      "description": "SELECT * FROM my_cool_database WHERE some_col=some_val",
+      "op": "db",
+      "span_id": "054ba3a374d543eb",
+      "parent_span_id": "b93d2be92cd64fd5",
+      "trace_id": "b2a33f3f79fe4a7c8de3426725a045cb",
+      "status": "ok",
+      "data": {
+        "blocked_main_thread": true
+      },
+      "hash": "8add714f71a52ef2"
+    }
+  ],
+  "start_timestamp": 1669031858.677,
+  "timestamp": 1669031858.806589,
+  "title": "MainActivity.add_attachment",
+  "transaction": "MainActivity.add_attachment",
+  "transaction_info": {
+    "source": "component"
+  },
+  "type": "transaction",
+  "user": {
+    "id": "0f38dda2-6ef7-4d9a-b7f4-6bcc80b85b2e",
+    "ip_address": "127.0.0.1",
+    "geo": {
+      "country_code": "AT",
+      "city": "Enns",
+      "region": "Austria"
+    }
+  },
+  "version": "7"
+}

+ 1 - 0
src/sentry/features/__init__.py

@@ -117,6 +117,7 @@ default_manager.add("organizations:performance-anomaly-detection-ui", Organizati
 default_manager.add("organizations:performance-chart-interpolation", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
 default_manager.add("organizations:performance-consecutive-db-issue", OrganizationFeature, FeatureHandlerStrategy.INTERNAL)
 default_manager.add("organizations:performance-consecutive-http-detector", OrganizationFeature, FeatureHandlerStrategy.INTERNAL)
+default_manager.add("organizations:performance-db-main-thread-detector", OrganizationFeature)
 default_manager.add("organizations:performance-n-plus-one-api-calls-detector", OrganizationFeature, FeatureHandlerStrategy.INTERNAL)
 default_manager.add("organizations:performance-issues-compressed-assets-detector", OrganizationFeature, FeatureHandlerStrategy.INTERNAL)
 default_manager.add("organizations:performance-issues-render-blocking-assets-detector", OrganizationFeature, FeatureHandlerStrategy.INTERNAL)

+ 8 - 0
src/sentry/issues/grouptype.py

@@ -256,6 +256,14 @@ class PerformanceUncompressedAssetsGroupType(PerformanceGroupTypeDefaults, Group
     noise_config = NoiseConfig(ignore_limit=100)
 
 
+@dataclass(frozen=True)
+class PerformanceDBMainThreadGroupType(PerformanceGroupTypeDefaults, GroupType):
+    type_id = 1013
+    slug = "performance_db_main_thread"
+    description = "DB on Main Thread"
+    category = GroupCategory.PERFORMANCE.value
+
+
 @dataclass(frozen=True)
 class ProfileFileIOGroupType(GroupType):
     type_id = 2001

+ 1 - 0
src/sentry/options/defaults.py

@@ -621,6 +621,7 @@ register("performance.issues.n_plus_one_db.problem-detection", default=0.0)
 register("performance.issues.n_plus_one_db.problem-creation", default=0.0)
 register("performance.issues.n_plus_one_db_ext.problem-creation", default=0.0)
 register("performance.issues.file_io_main_thread.problem-creation", default=0.0)
+register("performance.issues.db_main_thread.problem-creation", default=0.0)
 register("performance.issues.n_plus_one_api_calls.problem-creation", default=0.0)
 register("performance.issues.n_plus_one_api_calls.la-rollout", default=0.0)
 register("performance.issues.n_plus_one_api_calls.ea-rollout", default=0.0)

+ 4 - 0
src/sentry/utils/performance_issues/base.py

@@ -14,6 +14,7 @@ from sentry.eventstore.models import Event
 from sentry.issues.grouptype import (
     PerformanceConsecutiveDBQueriesGroupType,
     PerformanceConsecutiveHTTPQueriesGroupType,
+    PerformanceDBMainThreadGroupType,
     PerformanceFileIOMainThreadGroupType,
     PerformanceMNPlusOneDBQueriesGroupType,
     PerformanceNPlusOneAPICallsGroupType,
@@ -38,6 +39,7 @@ class DetectorType(Enum):
     FILE_IO_MAIN_THREAD = "file_io_main_thread"
     M_N_PLUS_ONE_DB = "m_n_plus_one_db"
     UNCOMPRESSED_ASSETS = "uncompressed_assets"
+    DB_MAIN_THREAD = "db_main_thread"
 
 
 DETECTOR_TYPE_TO_GROUP_TYPE = {
@@ -51,6 +53,7 @@ DETECTOR_TYPE_TO_GROUP_TYPE = {
     DetectorType.M_N_PLUS_ONE_DB: PerformanceMNPlusOneDBQueriesGroupType,
     DetectorType.UNCOMPRESSED_ASSETS: PerformanceUncompressedAssetsGroupType,
     DetectorType.CONSECUTIVE_HTTP_OP: PerformanceConsecutiveHTTPQueriesGroupType,
+    DetectorType.DB_MAIN_THREAD: PerformanceDBMainThreadGroupType,
 }
 
 
@@ -66,6 +69,7 @@ DETECTOR_TYPE_ISSUE_CREATION_TO_SYSTEM_OPTION = {
     DetectorType.SLOW_DB_QUERY: "performance.issues.slow_db_query.problem-creation",
     DetectorType.RENDER_BLOCKING_ASSET_SPAN: "performance.issues.render_blocking_assets.problem-creation",
     DetectorType.M_N_PLUS_ONE_DB: "performance.issues.m_n_plus_one_db.problem-creation",
+    DetectorType.DB_MAIN_THREAD: "performance.issues.db_main_thread.problem-creation",
 }
 
 

+ 1 - 1
src/sentry/utils/performance_issues/detectors/__init__.py

@@ -1,6 +1,6 @@
 from .consecutive_db_detector import ConsecutiveDBSpanDetector  # NOQA
 from .consecutive_http_detector import ConsecutiveHTTPSpanDetector  # NOQA
-from .file_io_main_thread_detector import FileIOMainThreadDetector  # NOQA
+from .io_main_thread_detector import DBMainThreadDetector, FileIOMainThreadDetector  # NOQA
 from .mn_plus_one_db_span_detector import MNPlusOneDBSpanDetector  # NOQA
 from .n_plus_one_api_calls_detector import NPlusOneAPICallsDetector  # NOQA
 from .n_plus_one_db_span_detector import (  # NOQA

+ 100 - 41
src/sentry/utils/performance_issues/detectors/file_io_main_thread_detector.py → src/sentry/utils/performance_issues/detectors/io_main_thread_detector.py

@@ -7,7 +7,10 @@ from collections import defaultdict
 from symbolic import ProguardMapper  # type: ignore
 
 from sentry import features
-from sentry.issues.grouptype import PerformanceFileIOMainThreadGroupType
+from sentry.issues.grouptype import (
+    PerformanceDBMainThreadGroupType,
+    PerformanceFileIOMainThreadGroupType,
+)
 from sentry.models import Organization, Project, ProjectDebugFile
 
 from ..base import DetectorType, PerformanceDetector, total_span_time
@@ -15,7 +18,52 @@ from ..performance_problem import PerformanceProblem
 from ..types import Span
 
 
-class FileIOMainThreadDetector(PerformanceDetector):
+class BaseIOMainThreadDetector(PerformanceDetector):
+    __slots__ = ("spans_involved", "stored_problems")
+
+    def init(self):
+        self.spans_involved = {}
+        self.most_recent_start_time = {}
+        self.most_recent_hash = {}
+        self.stored_problems = {}
+        self.mapper = None
+        self.parent_to_blocked_span = defaultdict(list)
+
+    def visit_span(self, span: Span):
+        if self._is_io_on_main_thread(span) and span.get("op", "").lower().startswith(
+            self.SPAN_PREFIX
+        ):
+            parent_span_id = span.get("parent_span_id")
+            self.parent_to_blocked_span[parent_span_id].append(span)
+
+    def on_complete(self):
+        for parent_span_id, span_list in self.parent_to_blocked_span.items():
+            span_list = [
+                span for span in span_list if "start_timestamp" in span and "timestamp" in span
+            ]
+            total_duration = total_span_time(span_list)
+            settings_for_span = self.settings_for_span(span_list[0])
+            if not settings_for_span:
+                return
+
+            _, _, _, _, settings = settings_for_span
+            if total_duration >= settings["duration_threshold"]:
+                fingerprint = self._fingerprint(span_list)
+                self.stored_problems[fingerprint] = PerformanceProblem(
+                    fingerprint=fingerprint,
+                    op=span_list[0].get("op"),
+                    desc=span_list[0].get("description", ""),
+                    parent_span_ids=[parent_span_id],
+                    type=self.group_type,
+                    cause_span_ids=[],
+                    offender_span_ids=[span["span_id"] for span in span_list if "span_id" in span],
+                )
+
+    def is_creation_allowed_for_project(self, project: Project) -> bool:
+        return True
+
+
+class FileIOMainThreadDetector(BaseIOMainThreadDetector):
     """
     Checks for a file io span on the main thread
     """
@@ -23,16 +71,13 @@ class FileIOMainThreadDetector(PerformanceDetector):
     __slots__ = ("spans_involved", "stored_problems")
 
     IGNORED_EXTENSIONS = {".nib", ".plist"}
+    SPAN_PREFIX = "file"
     type: DetectorType = DetectorType.FILE_IO_MAIN_THREAD
     settings_key = DetectorType.FILE_IO_MAIN_THREAD
+    group_type = PerformanceFileIOMainThreadGroupType
 
     def init(self):
-        self.spans_involved = {}
-        self.most_recent_start_time = {}
-        self.most_recent_hash = {}
-        self.stored_problems = {}
-        self.mapper = None
-        self.parent_to_blocked_span = defaultdict(list)
+        super().init()
         self._prepare_deobfuscation()
 
     def _prepare_deobfuscation(self):
@@ -78,34 +123,6 @@ class FileIOMainThreadDetector(PerformanceDetector):
         else:
             return frame.get("function", "")
 
-    def visit_span(self, span: Span):
-        if self._is_file_io_on_main_thread(span):
-            parent_span_id = span.get("parent_span_id")
-            self.parent_to_blocked_span[parent_span_id].append(span)
-
-    def on_complete(self):
-        for parent_span_id, span_list in self.parent_to_blocked_span.items():
-            span_list = [
-                span for span in span_list if "start_timestamp" in span and "timestamp" in span
-            ]
-            total_duration = total_span_time(span_list)
-            settings_for_span = self.settings_for_span(span_list[0])
-            if not settings_for_span:
-                return
-
-            _, _, _, _, settings = settings_for_span
-            if total_duration >= settings["duration_threshold"]:
-                fingerprint = self._fingerprint(span_list)
-                self.stored_problems[fingerprint] = PerformanceProblem(
-                    fingerprint=fingerprint,
-                    op=span_list[0].get("op"),
-                    desc=span_list[0].get("description", ""),
-                    parent_span_ids=[parent_span_id],
-                    type=PerformanceFileIOMainThreadGroupType,
-                    cause_span_ids=[],
-                    offender_span_ids=[span["span_id"] for span in span_list if "span_id" in span],
-                )
-
     def _fingerprint(self, span_list) -> str:
         call_stack_strings = []
         overall_stack = []
@@ -118,13 +135,11 @@ class FileIOMainThreadDetector(PerformanceDetector):
             overall_stack.append(
                 ".".join(sorted(set(call_stack_strings), key=lambda c: call_stack_strings.index(c)))
             )
-        call_stack = "-".join(
-            sorted(set(overall_stack), key=lambda s: overall_stack.index(s))
-        ).encode("utf8")
-        hashed_stack = hashlib.sha1(call_stack).hexdigest()
+        call_stack = "-".join(sorted(set(overall_stack), key=lambda s: overall_stack.index(s)))
+        hashed_stack = hashlib.sha1(call_stack.encode("utf8")).hexdigest()
         return f"1-{PerformanceFileIOMainThreadGroupType.type_id}-{hashed_stack}"
 
-    def _is_file_io_on_main_thread(self, span: Span) -> bool:
+    def _is_io_on_main_thread(self, span: Span) -> bool:
         data = span.get("data", {})
         if data is None:
             return False
@@ -139,5 +154,49 @@ class FileIOMainThreadDetector(PerformanceDetector):
             "organizations:performance-file-io-main-thread-detector", organization, actor=None
         )
 
+
+class DBMainThreadDetector(BaseIOMainThreadDetector):
+    """
+    Checks for a file io span on the main thread
+    """
+
+    __slots__ = ("spans_involved", "stored_problems")
+
+    SPAN_PREFIX = "db"
+    type: DetectorType = DetectorType.DB_MAIN_THREAD
+    settings_key = DetectorType.DB_MAIN_THREAD
+    group_type = PerformanceDBMainThreadGroupType
+
+    def init(self):
+        self.spans_involved = {}
+        self.most_recent_start_time = {}
+        self.most_recent_hash = {}
+        self.stored_problems = {}
+        self.mapper = None
+        self.parent_to_blocked_span = defaultdict(list)
+
+    def _fingerprint(self, span_list) -> str:
+        description_strings = []
+        for span in span_list:
+            description_strings.append(span.get("description"))
+        # Use set to remove dupes, and list index to preserve order
+        joined_queries = "-".join(
+            sorted(set(description_strings), key=lambda c: description_strings.index(c))
+        )
+        hashed_queries = hashlib.sha1(joined_queries.encode("utf8")).hexdigest()
+        return f"1-{PerformanceDBMainThreadGroupType.type_id}-{hashed_queries}"
+
+    def _is_io_on_main_thread(self, span: Span) -> bool:
+        data = span.get("data", {})
+        if data is None:
+            return False
+        # doing is True since the value can be any type
+        return data.get("blocked_main_thread", False) is True
+
+    def is_creation_allowed_for_organization(self, organization: Organization) -> bool:
+        return features.has(
+            "organizations:performance-db-main-thread-detector", organization, actor=None
+        )
+
     def is_creation_allowed_for_project(self, project: Project) -> bool:
         return True

+ 6 - 0
src/sentry/utils/performance_issues/performance_detection.py

@@ -200,6 +200,12 @@ def get_detection_settings(project_id: Optional[int] = None) -> Dict[DetectorTyp
                 "duration_threshold": 16,
             }
         ],
+        DetectorType.DB_MAIN_THREAD: [
+            {
+                # Basically the same as file io, but db instead, so continue using 16ms
+                "duration_threshold": 16,
+            }
+        ],
         DetectorType.N_PLUS_ONE_API_CALLS: {
             "detection_rate": settings["n_plus_one_api_calls_detection_rate"],
             "duration_threshold": 50,  # ms

+ 65 - 0
tests/sentry/utils/performance_issues/test_db_main_thread_detector.py

@@ -0,0 +1,65 @@
+from typing import List
+
+import pytest
+
+from sentry.eventstore.models import Event
+from sentry.issues.grouptype import PerformanceDBMainThreadGroupType
+from sentry.testutils import TestCase
+from sentry.testutils.performance_issues.event_generators import get_event
+from sentry.testutils.silo import region_silo_test
+from sentry.utils.performance_issues.detectors import DBMainThreadDetector
+from sentry.utils.performance_issues.performance_detection import (
+    PerformanceProblem,
+    get_detection_settings,
+    run_detector_on_data,
+)
+
+
+@region_silo_test
+@pytest.mark.django_db
+class DBMainThreadDetectorTest(TestCase):
+    def setUp(self):
+        super().setUp()
+        self.settings = get_detection_settings()
+
+    def find_problems(self, event: Event) -> List[PerformanceProblem]:
+        detector = DBMainThreadDetector(self.settings, event)
+        run_detector_on_data(detector, event)
+        return list(detector.stored_problems.values())
+
+    def test_detects_db_main_thread(self):
+        event = get_event("db-on-main-thread")
+
+        assert self.find_problems(event) == [
+            PerformanceProblem(
+                fingerprint=f"1-{PerformanceDBMainThreadGroupType.type_id}-86f1961bdc10a14809866c6a6ec0033797123ba9",
+                op="db",
+                desc="SELECT * FROM my_cool_database WHERE some_col=some_val",
+                type=PerformanceDBMainThreadGroupType,
+                parent_span_ids=["b93d2be92cd64fd5"],
+                cause_span_ids=[],
+                offender_span_ids=["054ba3a374d543eb"],
+            )
+        ]
+
+    def test_does_not_detect_db_main_thread(self):
+        event = get_event("db-on-main-thread")
+        event["spans"][0]["data"]["blocked_main_thread"] = False
+
+        assert self.find_problems(event) == []
+
+    def test_gives_problem_correct_title(self):
+        event = get_event("db-on-main-thread")
+        event["spans"][0]["data"]["blocked_main_thread"] = True
+        problem = self.find_problems(event)[0]
+        assert problem.title == "DB on Main Thread"
+
+    def test_duplicate_calls_do_not_change_callstack(self):
+        event = get_event("db-on-main-thread")
+        event["spans"][0]["data"]["blocked_main_thread"] = True
+        single_span_problem = self.find_problems(event)[0]
+        single_problem_fingerprint = single_span_problem.fingerprint
+        event["spans"].append(event["spans"][0])
+        double_span_problem = self.find_problems(event)[0]
+        assert double_span_problem.title == "DB on Main Thread"
+        assert double_span_problem.fingerprint == single_problem_fingerprint

+ 1 - 0
tests/snuba/api/endpoints/test_organization_events_trace.py

@@ -55,6 +55,7 @@ class OrganizationEventsTraceEndpointBase(APITestCase, SnubaTestCase):
             span = data["spans"][0]
             if "data" not in span:
                 span["data"] = {}
+            span["op"] = "file.write"
             span["data"].update({"duration": 1, "blocked_main_thread": True})
         with self.feature(self.FEATURES):
             return self.store_event(data, project_id=project_id, **kwargs)