Browse Source

feat(java): deobfuscate exception value (#42349)

Adds preprocessor for java plugin to deobfuscate error event exception
values. Uses raw and deobfuscated stack trace to construct mapping.
Exception values are used as the subheading/culprit for issues in sentry
ui so it is useful to have it deobfuscated when possible.
edwardgou-sentry 2 years ago
parent
commit
1e6dba0aa0

+ 6 - 0
src/sentry/lang/java/plugin.py

@@ -1,5 +1,7 @@
 from symbolic import ProguardMapper
 
+from sentry.lang.java.processing import deobfuscate_exception_value
+from sentry.lang.java.utils import has_proguard_file
 from sentry.models import EventError, ProjectDebugFile
 from sentry.plugins.base.v2 import Plugin2
 from sentry.reprocessing import report_processing_issue
@@ -134,3 +136,7 @@ class JavaPlugin(Plugin2):
     def get_stacktrace_processors(self, data, stacktrace_infos, platforms, **kwargs):
         if "java" in platforms:
             return [JavaStacktraceProcessor]
+
+    def get_event_preprocessors(self, data):
+        if has_proguard_file(data):
+            return [deobfuscate_exception_value]

+ 25 - 0
src/sentry/lang/java/processing.py

@@ -0,0 +1,25 @@
+import re
+
+from sentry import features
+from sentry.models import Organization, Project
+from sentry.utils.safe import get_path
+
+
+def deobfuscate_exception_value(data):
+    project = Project.objects.get_from_cache(id=data["project"])
+    organization = Organization.objects.get_from_cache(id=project.organization_id)
+
+    if features.has("organizations:java-exception-value-deobfuscation", organization):
+        # Deobfuscate the exception value by regex replacing
+        # Mapping constructed by taking the last lines from the deobfuscated stacktrace and raw stacktrace
+        exception = get_path(data, "exception", "values", -1)
+        frame = get_path(exception, "stacktrace", "frames", -1)
+        raw_frame = get_path(exception, "raw_stacktrace", "frames", -1)
+        if frame and raw_frame:
+            deobfuscated_method_name = f"{frame['module']}.{frame['function']}"
+            raw_method_name = f"{raw_frame['module']}.{raw_frame['function']}"
+            exception["value"] = re.sub(
+                re.escape(raw_method_name), deobfuscated_method_name, exception["value"]
+            )
+
+    return data

+ 9 - 0
src/sentry/lang/java/utils.py

@@ -0,0 +1,9 @@
+from sentry.utils.safe import get_path
+
+
+def has_proguard_file(data):
+    """
+    Checks whether an event contains a proguard file
+    """
+    images = get_path(data, "debug_meta", "images", filter=True)
+    return get_path(images, 0, "type") == "proguard"

+ 89 - 84
tests/relay_integration/lang/java/test_plugin.py

@@ -45,99 +45,104 @@ PROGUARD_BUG_SOURCE = b"x"
 
 class BasicResolvingIntegrationTest(RelayStoreHelper, TransactionTestCase):
     def test_basic_resolving(self):
-        url = reverse(
-            "sentry-api-0-dsym-files",
-            kwargs={
-                "organization_slug": self.project.organization.slug,
-                "project_slug": self.project.slug,
-            },
-        )
+        with self.feature("organizations:java-exception-value-deobfuscation"):
+            url = reverse(
+                "sentry-api-0-dsym-files",
+                kwargs={
+                    "organization_slug": self.project.organization.slug,
+                    "project_slug": self.project.slug,
+                },
+            )
 
-        self.login_as(user=self.user)
+            self.login_as(user=self.user)
 
-        out = BytesIO()
-        f = zipfile.ZipFile(out, "w")
-        f.writestr("proguard/%s.txt" % PROGUARD_UUID, PROGUARD_SOURCE)
-        f.writestr("ignored-file.txt", b"This is just some stuff")
-        f.close()
+            out = BytesIO()
+            f = zipfile.ZipFile(out, "w")
+            f.writestr("proguard/%s.txt" % PROGUARD_UUID, PROGUARD_SOURCE)
+            f.writestr("ignored-file.txt", b"This is just some stuff")
+            f.close()
 
-        response = self.client.post(
-            url,
-            {
-                "file": SimpleUploadedFile(
-                    "symbols.zip", out.getvalue(), content_type="application/zip"
-                )
-            },
-            format="multipart",
-        )
-        assert response.status_code == 201, response.content
-        assert len(response.data) == 1
+            response = self.client.post(
+                url,
+                {
+                    "file": SimpleUploadedFile(
+                        "symbols.zip", out.getvalue(), content_type="application/zip"
+                    )
+                },
+                format="multipart",
+            )
+            assert response.status_code == 201, response.content
+            assert len(response.data) == 1
 
-        event_data = {
-            "user": {"ip_address": "31.172.207.97"},
-            "extra": {},
-            "project": self.project.id,
-            "platform": "java",
-            "debug_meta": {"images": [{"type": "proguard", "uuid": PROGUARD_UUID}]},
-            "exception": {
-                "values": [
+            event_data = {
+                "user": {"ip_address": "31.172.207.97"},
+                "extra": {},
+                "project": self.project.id,
+                "platform": "java",
+                "debug_meta": {"images": [{"type": "proguard", "uuid": PROGUARD_UUID}]},
+                "exception": {
+                    "values": [
+                        {
+                            "stacktrace": {
+                                "frames": [
+                                    {
+                                        "function": "a",
+                                        "abs_path": None,
+                                        "module": "org.a.b.g$a",
+                                        "filename": None,
+                                        "lineno": 67,
+                                    },
+                                    {
+                                        "function": "a",
+                                        "abs_path": None,
+                                        "module": "org.a.b.g$a",
+                                        "filename": None,
+                                        "lineno": 69,
+                                    },
+                                ]
+                            },
+                            "module": "org.a.b",
+                            "type": "g$a",
+                            "value": "Attempt to invoke virtual method 'org.a.b.g$a.a' on a null object reference",
+                        }
+                    ]
+                },
+                "timestamp": iso_format(before_now(seconds=1)),
+            }
+
+            event = self.post_and_retrieve_event(event_data)
+            if not self.use_relay():
+                # We measure the number of queries after an initial post,
+                # because there are many queries polluting the array
+                # before the actual "processing" happens (like, auth_user)
+                with self.assertWriteQueries(
                     {
-                        "stacktrace": {
-                            "frames": [
-                                {
-                                    "function": "a",
-                                    "abs_path": None,
-                                    "module": "org.a.b.g$a",
-                                    "filename": None,
-                                    "lineno": 67,
-                                },
-                                {
-                                    "function": "a",
-                                    "abs_path": None,
-                                    "module": "org.a.b.g$a",
-                                    "filename": None,
-                                    "lineno": 69,
-                                },
-                            ]
-                        },
-                        "module": "org.a.b",
-                        "type": "g$a",
-                        "value": "Oh no",
+                        "nodestore_node": 2,
+                        "sentry_eventuser": 1,
+                        "sentry_groupedmessage": 1,
+                        "sentry_userreport": 1,
                     }
-                ]
-            },
-            "timestamp": iso_format(before_now(seconds=1)),
-        }
+                ):
+                    self.post_and_retrieve_event(event_data)
 
-        event = self.post_and_retrieve_event(event_data)
-        if not self.use_relay():
-            # We measure the number of queries after an initial post,
-            # because there are many queries polluting the array
-            # before the actual "processing" happens (like, auth_user)
-            with self.assertWriteQueries(
-                {
-                    "nodestore_node": 2,
-                    "sentry_eventuser": 1,
-                    "sentry_groupedmessage": 1,
-                    "sentry_userreport": 1,
-                }
-            ):
-                self.post_and_retrieve_event(event_data)
+            exc = event.interfaces["exception"].values[0]
+            bt = exc.stacktrace
+            frames = bt.frames
 
-        exc = event.interfaces["exception"].values[0]
-        bt = exc.stacktrace
-        frames = bt.frames
+            assert exc.type == "Util$ClassContextSecurityManager"
+            assert (
+                exc.value
+                == "Attempt to invoke virtual method 'org.slf4j.helpers.Util$ClassContextSecurityManager.getExtraClassContext' on a null object reference"
+            )
+            assert exc.module == "org.slf4j.helpers"
+            assert frames[0].function == "getClassContext"
+            assert frames[0].module == "org.slf4j.helpers.Util$ClassContextSecurityManager"
+            assert frames[1].function == "getExtraClassContext"
+            assert frames[1].module == "org.slf4j.helpers.Util$ClassContextSecurityManager"
 
-        assert exc.type == "Util$ClassContextSecurityManager"
-        assert exc.module == "org.slf4j.helpers"
-        assert frames[0].function == "getClassContext"
-        assert frames[0].module == "org.slf4j.helpers.Util$ClassContextSecurityManager"
-        assert frames[1].function == "getExtraClassContext"
-        assert frames[1].module == "org.slf4j.helpers.Util$ClassContextSecurityManager"
-
-        assert event.culprit == (
-            "org.slf4j.helpers.Util$ClassContextSecurityManager " "in getExtraClassContext"
-        )
+            assert event.culprit == (
+                "org.slf4j.helpers.Util$ClassContextSecurityManager " "in getExtraClassContext"
+            )
 
     def test_resolving_inline(self):
         url = reverse(