Browse Source

feat(sdk-crashes): Collect ART Tracer Crashes (#81420)

The Android Runtime Tracer can crash when users enable profiling in the
Sentry Android SDK. While the Sentry Android SDK doesn't directly cause
these crashes, we want to know when they occur. As Sentry doesn't appear
in the stacktrace, we filter for the following specific methods in the
specified Android apex packages.

Fixes GH-81172
Philipp Hofmann 3 months ago
parent
commit
9a60418aab

+ 45 - 1
fixtures/sdk_crash_detection/crash_event_android.py

@@ -44,7 +44,9 @@ def get_frames(
 
 
 def get_crash_event(
-    sdk_frame_module="io.sentry.Hub", system_frame_module="java.lang.reflect.Method", **kwargs
+    sdk_frame_module="io.sentry.Hub",
+    system_frame_module="java.lang.reflect.Method",
+    **kwargs,
 ) -> dict[str, object]:
     return get_crash_event_with_frames(
         get_frames(sdk_frame_module, system_frame_module),
@@ -52,6 +54,48 @@ def get_crash_event(
     )
 
 
+def get_apex_frames(
+    apex_frame_function: str,
+    apex_frame_package: str,
+    system_frame_package: str,
+) -> Sequence[MutableMapping[str, str]]:
+    frames = [
+        {
+            "function": "__pthread_start",
+            "raw_function": "__pthread_start(void*)",
+            "symbol": "_ZL15__pthread_startPv",
+            "package": "/apex/com.android.runtime/lib/bionic/libc.so",
+        },
+        {
+            "function": "__start_thread",
+            "symbol": "__start_thread",
+            "package": "/apex/com.android.art/lib64/bionic/libc.so",
+        },
+        {
+            "function": apex_frame_function,
+            "symbol": apex_frame_function,
+            "package": apex_frame_package,
+        },
+        {
+            "function": "invoke",
+            "package": system_frame_package,
+        },
+    ]
+    return frames
+
+
+def get_apex_crash_event(
+    apex_frame_function="__start_thread",
+    apex_frame_package="/apex/com.android.art/lib64/bionic/libc.so",
+    system_frame_package="/apex/com.android.art/lib64/libart.so",
+    **kwargs,
+) -> dict[str, object]:
+    return get_crash_event_with_frames(
+        get_apex_frames(apex_frame_function, apex_frame_package, system_frame_package),
+        **kwargs,
+    )
+
+
 def get_crash_event_with_frames(frames: Sequence[Mapping[str, str]], **kwargs) -> dict[str, object]:
     result = {
         "event_id": "0a52a8331d3b45089ebd74f8118d4fa1",

+ 34 - 2
src/sentry/utils/sdk_crashes/sdk_crash_detection_config.py

@@ -1,5 +1,5 @@
 from collections.abc import Sequence
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from enum import Enum, unique
 from typing import TypedDict
 
@@ -12,6 +12,14 @@ from sentry.utils.sdk_crashes.path_replacer import (
 )
 
 
+@dataclass
+class FunctionAndPathPattern:
+    """Both the function and path pattern must match for a frame to be considered a SDK frame."""
+
+    function_pattern: str
+    path_pattern: str
+
+
 @dataclass
 class SDKFrameConfig:
     function_patterns: set[str]
@@ -20,6 +28,8 @@ class SDKFrameConfig:
 
     path_replacer: PathReplacer
 
+    function_and_path_patterns: list[FunctionAndPathPattern] = field(default_factory=list)
+
 
 @unique
 class SdkName(Enum):
@@ -183,6 +193,8 @@ def build_sdk_crash_detection_configs() -> Sequence[SDKCrashDetectionConfig]:
                 "sentry.java.spring-boot",
                 "sentry.java.spring-boot.jakarta",
                 "sentry.java.spring.jakarta",
+                # Required for getting Android Runtime Tracer crashes.
+                "sentry.native.android",
             ],
             # The sentry-java SDK sends SDK frames for uncaught exceptions since 7.0.0, which is required for detecting SDK crashes.
             # 7.0.0 was released in Nov 2023, see https://github.com/getsentry/sentry-java/releases/tag/7.0.0
@@ -196,13 +208,33 @@ def build_sdk_crash_detection_configs() -> Sequence[SDKCrashDetectionConfig]:
                 r"com.android.internal.**",
                 r"kotlin.**",
                 r"dalvik.**",
+                r"/apex/com.android.*/lib*/**",
             },
             sdk_frame_config=SDKFrameConfig(
                 function_patterns=set(),
                 path_patterns={
                     r"io.sentry.**",
                 },
-                path_replacer=KeepFieldPathReplacer(fields={"module", "filename"}),
+                # The Android Runtime Tracer can crash when users enable profiling in the
+                # Sentry Android SDK. While the Sentry Android SDK doesn't directly cause
+                # these crashes, we must know when they occur. As Sentry doesn't appear in
+                # the stacktrace, we filter for the following specific methods in the
+                # specified Android apex packages.
+                function_and_path_patterns=[
+                    FunctionAndPathPattern(
+                        function_pattern=r"*pthread_getcpuclockid*",
+                        path_pattern=r"/apex/com.android.art/lib64/bionic/libc.so",
+                    ),
+                    FunctionAndPathPattern(
+                        function_pattern=r"*art::Trace::StopTracing*",
+                        path_pattern=r"/apex/com.android.art/lib64/libart.so",
+                    ),
+                    FunctionAndPathPattern(
+                        function_pattern=r"*art::Thread::DumpState*",
+                        path_pattern=r"/apex/com.android.art/lib64/libart.so",
+                    ),
+                ],
+                path_replacer=KeepFieldPathReplacer(fields={"module", "filename", "package"}),
             ),
             sdk_crash_ignore_functions_matchers=set(),
         )

+ 12 - 0
src/sentry/utils/sdk_crashes/sdk_crash_detector.py

@@ -103,6 +103,18 @@ class SDKCrashDetector:
 
         function = frame.get("function")
         if function:
+            for (
+                function_and_path_pattern
+            ) in self.config.sdk_frame_config.function_and_path_patterns:
+                function_pattern = function_and_path_pattern.function_pattern
+                path_pattern = function_and_path_pattern.path_pattern
+
+                function_matches = glob_match(function, function_pattern, ignorecase=True)
+                path_matches = self._path_patters_match_frame({path_pattern}, frame)
+
+                if function_matches and path_matches:
+                    return True
+
             for patterns in self.config.sdk_frame_config.function_patterns:
                 if glob_match(function, patterns, ignorecase=True):
                     return True

+ 2 - 1
tests/sentry/utils/sdk_crashes/test_event_stripper.py

@@ -372,8 +372,8 @@ def test_strip_frames_with_keep_for_fields_path_replacer(store_and_strip_event,
 
     sentry_sdk_frame["module"] = "io.sentry.android.core.SentryAndroidOptions"
     sentry_sdk_frame["filename"] = "SentryAndroidOptions.java"
+    sentry_sdk_frame["package"] = "/apex/com.android.art/lib64/libart.so"
     sentry_sdk_frame["abs_path"] = "remove_me"
-    sentry_sdk_frame["package"] = "remove_me"
 
     event_data = get_crash_event_with_frames(frames)
 
@@ -389,6 +389,7 @@ def test_strip_frames_with_keep_for_fields_path_replacer(store_and_strip_event,
         "function": "register",
         "module": "io.sentry.android.core.SentryAndroidOptions",
         "filename": "SentryAndroidOptions.java",
+        "package": "/apex/com.android.art/lib64/libart.so",
         "in_app": True,
         "image_addr": "0x100304000",
     }

+ 127 - 1
tests/sentry/utils/sdk_crashes/test_sdk_crash_detection_java.py

@@ -4,7 +4,7 @@ from unittest.mock import patch
 
 import pytest
 
-from fixtures.sdk_crash_detection.crash_event_android import get_crash_event
+from fixtures.sdk_crash_detection.crash_event_android import get_apex_crash_event, get_crash_event
 from sentry.testutils.helpers.options import override_options
 from sentry.testutils.pytest.fixtures import django_db_all
 from sentry.utils.safe import get_path, set_path
@@ -143,6 +143,132 @@ def test_sdk_crash_is_reported_with_android_paths(
         assert mock_sdk_crash_reporter.report.call_count == 0
 
 
+@pytest.mark.parametrize(
+    ["apex_frame_function", "apex_frame_package", "system_frame_package", "detected"],
+    [
+        (
+            "pthread_getcpuclockid",
+            "/apex/com.android.art/lib64/bionic/libc.so",
+            "/apex/com.android.art/lib64/libart.so",
+            True,
+        ),
+        (
+            "__pthread_getcpuclockid",
+            "/apex/com.android.art/lib64/bionic/libc.so",
+            "/apex/com.android.art/lib64/libart.so",
+            True,
+        ),
+        (
+            "pthread_getcpuclockid(void*)",
+            "/apex/com.android.art/lib64/bionic/libc.so",
+            "/apex/com.android.art/lib64/libart.so",
+            True,
+        ),
+        (
+            "pthread_getcpuclocki",
+            "/apex/com.android.art/lib64/bionic/libc.so",
+            "/apex/com.android.art/lib64/libart.so",
+            False,
+        ),
+        (
+            "pthread_getcpuclockid",
+            "/apex/com.android.art/lib64/bionic/libc.s",
+            "/apex/com.android.art/lib64/libart.so",
+            False,
+        ),
+        (
+            "art::Trace::StopTracing",
+            "/apex/com.android.art/lib64/libart.so",
+            "/apex/com.android.art/lib64/bionic/libc.so",
+            True,
+        ),
+        (
+            "art::Trace::StopTracing_",
+            "/apex/com.android.art/lib64/libart.so",
+            "/apex/com.android.art/lib64/bionic/libc.so",
+            True,
+        ),
+        (
+            "art::Trace::StopTracing_",
+            "/apex/com.android.art/lib64/libart.s",
+            "/apex/com.android.art/lib64/bionic/libc.so",
+            False,
+        ),
+        (
+            "art::Thread::DumpState",
+            "/apex/com.android.art/lib64/libart.so",
+            "/apex/com.android.art/lib64/bionic/libc.so",
+            True,
+        ),
+        (
+            "_art::Thread::DumpState",
+            "/apex/com.android.art/lib64/libart.so",
+            "/apex/com.android.art/lib64/bionic/libc.so",
+            True,
+        ),
+        (
+            "_art::Thread::DumpState",
+            "/apex/com.android.art/lib64/libar.so",
+            "/apex/com.android.art/lib64/bionic/libc.so",
+            False,
+        ),
+    ],
+)
+@decorators
+def test_sdk_crash_is_reported_for_android_runtime_tracer_crashes(
+    mock_sdk_crash_reporter,
+    mock_random,
+    store_event,
+    configs,
+    apex_frame_function,
+    apex_frame_package,
+    system_frame_package,
+    detected,
+):
+    event = store_event(
+        data=get_apex_crash_event(
+            apex_frame_function=apex_frame_function,
+            apex_frame_package=apex_frame_package,
+            system_frame_package=system_frame_package,
+        )
+    )
+
+    configs[1].organization_allowlist = [event.project.organization_id]
+
+    sdk_crash_detection.detect_sdk_crash(event=event, configs=configs)
+
+    if detected:
+        assert mock_sdk_crash_reporter.report.call_count == 1
+        reported_event_data = mock_sdk_crash_reporter.report.call_args.args[0]
+
+        stripped_frames = get_path(
+            reported_event_data, "exception", "values", -1, "stacktrace", "frames"
+        )
+
+        assert len(stripped_frames) == 4
+
+        system_frame1 = stripped_frames[0]
+        assert system_frame1["function"] == "__pthread_start"
+        assert system_frame1["raw_function"] == "__pthread_start(void*)"
+        assert system_frame1["symbol"] == "_ZL15__pthread_startPv"
+        assert system_frame1["package"] == "/apex/com.android.runtime/lib/bionic/libc.so"
+        assert system_frame1["in_app"] is False
+
+        apex_frame = stripped_frames[2]
+        assert apex_frame["function"] == apex_frame_function
+        assert apex_frame["symbol"] == apex_frame_function
+        assert apex_frame["package"] == apex_frame_package
+        assert apex_frame["in_app"] is True
+
+        system_frame2 = stripped_frames[3]
+        assert system_frame2["function"] == "invoke"
+        assert system_frame2["package"] == system_frame_package
+        assert system_frame2["in_app"] is False
+
+    else:
+        assert mock_sdk_crash_reporter.report.call_count == 0
+
+
 @decorators
 def test_beta_sdk_version_detected(mock_sdk_crash_reporter, mock_random, store_event, configs):
     event_data = get_crash_event()