Browse Source

feat(profiling): Deobfuscate Android methods' signature (#53427)

Pierre Massat 1 year ago
parent
commit
6584123828

+ 97 - 0
src/sentry/profiles/java.py

@@ -0,0 +1,97 @@
+from typing import List, Tuple
+
+JAVA_BASE_TYPES = {
+    "Z": "boolean",
+    "B": "byte",
+    "C": "char",
+    "S": "short",
+    "I": "int",
+    "J": "long",
+    "F": "float",
+    "D": "double",
+    "V": "void",
+}
+
+
+# parse_obfuscated_signature will parse an obfuscated signatures into parameter
+# and return types that can be then deobfuscated
+def parse_obfuscated_signature(signature: str) -> Tuple[List[str], str]:
+    if signature[0] != "(":
+        return [], ""
+
+    signature = signature[1:]
+    parameter_types, return_type = signature.rsplit(")", 1)
+    types = []
+    i = 0
+    arrays = 0
+
+    while i < len(parameter_types):
+        t = parameter_types[i]
+
+        if t in JAVA_BASE_TYPES:
+            start_index = i - arrays
+            types.append(parameter_types[start_index : i + 1])
+            arrays = 0
+        elif t == "L":
+            start_index = i - arrays
+            end_index = parameter_types[i:].index(";")
+            types.append(parameter_types[start_index : i + end_index + 1])
+            arrays = 0
+            i += end_index
+        elif t == "[":
+            arrays += 1
+        else:
+            arrays = 0
+
+        i += 1
+
+    return types, return_type
+
+
+# format_signature formats the types into a human-readable signature
+def format_signature(parameter_java_types: List[str], return_java_type: str) -> str:
+    signature = f"({', '.join(parameter_java_types)})"
+    if return_java_type and return_java_type != "void":
+        signature += f": {return_java_type}"
+    return signature
+
+
+def byte_code_type_to_java_type(mapper, byte_code_type: str) -> str:
+    if not byte_code_type:
+        return ""
+
+    token = byte_code_type[0]
+    if token in JAVA_BASE_TYPES:
+        return JAVA_BASE_TYPES[token]
+    elif token == "L":
+        # invalid signature
+        if byte_code_type[-1] != ";":
+            return byte_code_type
+        obfuscated = byte_code_type[1:-1].replace("/", ".")
+        mapped = mapper.remap_class(obfuscated)
+        if mapped:
+            return mapped
+        return obfuscated
+    elif token == "[":
+        return f"{byte_code_type_to_java_type(mapper, byte_code_type[1:])}[]"
+    else:
+        return byte_code_type
+
+
+# map_obfucated_signature will parse then deobfuscated a signature and
+# format it appropriately
+def deobfuscate_signature(mapper, signature: str) -> str:
+    if not signature:
+        return ""
+
+    parameter_types, return_type = parse_obfuscated_signature(signature)
+    if not (parameter_types or return_type):
+        return ""
+
+    parameter_java_types = []
+    for parameter_type in parameter_types:
+        new_class = byte_code_type_to_java_type(mapper, parameter_type)
+        parameter_java_types.append(new_class)
+
+    return_java_type = byte_code_type_to_java_type(mapper, return_type)
+    return format_signature(parameter_java_types, return_java_type)

+ 34 - 16
src/sentry/profiles/task.py

@@ -18,6 +18,7 @@ from sentry.lang.javascript.processing import generate_scraping_config
 from sentry.lang.native.symbolicator import RetrySymbolication, Symbolicator, SymbolicatorTaskKind
 from sentry.lang.native.symbolicator import RetrySymbolication, Symbolicator, SymbolicatorTaskKind
 from sentry.models import EventError, Organization, Project, ProjectDebugFile
 from sentry.models import EventError, Organization, Project, ProjectDebugFile
 from sentry.profiles.device import classify_device
 from sentry.profiles.device import classify_device
+from sentry.profiles.java import deobfuscate_signature
 from sentry.profiles.utils import get_from_profiling_service
 from sentry.profiles.utils import get_from_profiling_service
 from sentry.signals import first_profile_received
 from sentry.signals import first_profile_received
 from sentry.tasks.base import instrumented_task
 from sentry.tasks.base import instrumented_task
@@ -618,35 +619,52 @@ def _deobfuscate(profile: Profile, project: Project) -> None:
 
 
     with sentry_sdk.start_span(op="proguard.remap"):
     with sentry_sdk.start_span(op="proguard.remap"):
         for method in profile["profile"]["methods"]:
         for method in profile["profile"]["methods"]:
+            method.setdefault("data", {})
+
             mapped = mapper.remap_frame(
             mapped = mapper.remap_frame(
                 method["class_name"], method["name"], method["source_line"] or 0
                 method["class_name"], method["name"], method["source_line"] or 0
             )
             )
-            method.setdefault("data", {})
-            if len(mapped) == 1:
-                new_frame = mapped[0]
-                method.update(
-                    {
-                        "class_name": new_frame.class_name,
-                        "name": new_frame.method,
-                        "source_file": new_frame.file,
-                        "source_line": new_frame.line,
-                    }
-                )
-                method["data"]["deobfuscation_status"] = "deobfuscated"
-            elif len(mapped) > 1:
+
+            if "signature" in method and method["signature"]:
+                method["signature"] = deobfuscate_signature(mapper, method["signature"])
+
+            if len(mapped) >= 1:
+                new_frame = mapped[-1]
+                method["class_name"] = new_frame.class_name
+                method["name"] = new_frame.method
+                method["data"] = {
+                    "deobfuscation_status": "deobfuscated"
+                    if method.get("signature", None)
+                    else "partial"
+                }
+
+                if new_frame.file:
+                    method["source_file"] = new_frame.file
+
+                if new_frame.line:
+                    method["source_line"] = new_frame.line
+
                 bottom_class = mapped[-1].class_name
                 bottom_class = mapped[-1].class_name
                 method["inline_frames"] = [
                 method["inline_frames"] = [
                     {
                     {
                         "class_name": new_frame.class_name,
                         "class_name": new_frame.class_name,
+                        "data": {"deobfuscation_status": "deobfuscated"},
                         "name": new_frame.method,
                         "name": new_frame.method,
                         "source_file": method["source_file"]
                         "source_file": method["source_file"]
                         if bottom_class == new_frame.class_name
                         if bottom_class == new_frame.class_name
-                        else None,
+                        else "",
                         "source_line": new_frame.line,
                         "source_line": new_frame.line,
-                        "data": {"deobfuscation_status": "deobfuscated"},
                     }
                     }
-                    for new_frame in mapped
+                    for new_frame in reversed(mapped)
                 ]
                 ]
+
+                # vroom will only take into account frames in this list
+                # if it exists. since symbolic does not return a signature for
+                # the frame we deobfuscated, we update it to set
+                # the deobfuscated signature.
+                if len(method["inline_frames"]) > 0:
+                    method["inline_frames"][0]["data"] = method["data"]
+                    method["inline_frames"][0]["signature"] = method.get("signature", "")
             else:
             else:
                 mapped_class = mapper.remap_class(method["class_name"])
                 mapped_class = mapper.remap_class(method["class_name"])
                 if mapped_class:
                 if mapped_class:

+ 50 - 0
tests/sentry/profiles/test_java.py

@@ -0,0 +1,50 @@
+from tempfile import mkstemp
+
+import pytest
+from symbolic.proguard import ProguardMapper
+
+from sentry.profiles.java import deobfuscate_signature
+
+PROGUARD_SOURCE = b"""\
+# compiler: R8
+# compiler_version: 2.0.74
+# min_api: 16
+# pg_map_id: 5b46fdc
+# common_typos_disable
+# {"id":"com.android.tools.r8.mapping","version":"1.0"}
+org.slf4j.helpers.Util$ClassContextSecurityManager -> org.a.b.g$a:
+    65:65:void <init>() -> <init>
+    67:67:java.lang.Class[] getClassContext() -> a
+    69:69:java.lang.Class[] getExtraClassContext() -> a
+    65:65:void <init>(org.slf4j.helpers.Util$1) -> <init>
+"""
+
+
+@pytest.fixture
+def mapper():
+    _, mapping_file_path = mkstemp()
+    with open(mapping_file_path, "wb") as f:
+        f.write(PROGUARD_SOURCE)
+    mapper = ProguardMapper.open(mapping_file_path)
+    assert mapper.has_line_info
+    return mapper
+
+
+@pytest.mark.parametrize(
+    ["obfuscated", "expected"],
+    [
+        # invalid signatures
+        ("", ""),
+        ("()", ""),
+        # valid signatures
+        ("()V", "()"),
+        ("([I)V", "(int[])"),
+        ("(III)V", "(int, int, int)"),
+        ("([Ljava/lang/String;)V", "(java.lang.String[])"),
+        ("([[J)V", "(long[][])"),
+        ("(I)I", "(int): int"),
+        ("([B)V", "(byte[])"),
+    ],
+)
+def test_deobfuscate_signature(mapper, obfuscated, expected):
+    assert deobfuscate_signature(mapper, obfuscated) == expected

+ 16 - 9
tests/sentry/profiles/test_task.py

@@ -127,16 +127,18 @@ class ProfilesProcessTaskTest(TestCase):
                 "profile": {
                 "profile": {
                     "methods": [
                     "methods": [
                         {
                         {
-                            "name": "a",
                             "abs_path": None,
                             "abs_path": None,
                             "class_name": "org.a.b.g$a",
                             "class_name": "org.a.b.g$a",
+                            "name": "a",
+                            "signature": "()V",
                             "source_file": None,
                             "source_file": None,
                             "source_line": 67,
                             "source_line": 67,
                         },
                         },
                         {
                         {
-                            "name": "a",
                             "abs_path": None,
                             "abs_path": None,
                             "class_name": "org.a.b.g$a",
                             "class_name": "org.a.b.g$a",
+                            "name": "a",
+                            "signature": "()V",
                             "source_file": None,
                             "source_file": None,
                             "source_line": 69,
                             "source_line": 69,
                         },
                         },
@@ -178,16 +180,18 @@ class ProfilesProcessTaskTest(TestCase):
                 "profile": {
                 "profile": {
                     "methods": [
                     "methods": [
                         {
                         {
-                            "name": "onClick",
                             "abs_path": None,
                             "abs_path": None,
                             "class_name": "e.a.c.a",
                             "class_name": "e.a.c.a",
+                            "name": "onClick",
+                            "signature": "()V",
                             "source_file": None,
                             "source_file": None,
                             "source_line": 2,
                             "source_line": 2,
                         },
                         },
                         {
                         {
-                            "name": "t",
                             "abs_path": None,
                             "abs_path": None,
                             "class_name": "io.sentry.sample.MainActivity",
                             "class_name": "io.sentry.sample.MainActivity",
+                            "name": "t",
+                            "signature": "()V",
                             "source_file": "MainActivity.java",
                             "source_file": "MainActivity.java",
                             "source_line": 1,
                             "source_line": 1,
                         },
                         },
@@ -200,21 +204,24 @@ class ProfilesProcessTaskTest(TestCase):
         _deobfuscate(profile, project)
         _deobfuscate(profile, project)
         frames = profile["profile"]["methods"]
         frames = profile["profile"]["methods"]
 
 
-        assert sum(len(f.get("inline_frames", [{}])) for f in frames) == 4
+        assert sum(len(f.get("inline_frames", [])) for f in frames) == 3
 
 
         assert frames[0]["name"] == "onClick"
         assert frames[0]["name"] == "onClick"
         assert frames[0]["class_name"] == "io.sentry.sample.-$$Lambda$r3Avcbztes2hicEObh02jjhQqd4"
         assert frames[0]["class_name"] == "io.sentry.sample.-$$Lambda$r3Avcbztes2hicEObh02jjhQqd4"
 
 
+        assert frames[1]["inline_frames"][0]["name"] == "onClickHandler"
+        assert frames[1]["inline_frames"][0]["source_line"] == 40
         assert frames[1]["inline_frames"][0]["source_file"] == "MainActivity.java"
         assert frames[1]["inline_frames"][0]["source_file"] == "MainActivity.java"
         assert frames[1]["inline_frames"][0]["class_name"] == "io.sentry.sample.MainActivity"
         assert frames[1]["inline_frames"][0]["class_name"] == "io.sentry.sample.MainActivity"
-        assert frames[1]["inline_frames"][0]["name"] == "bar"
-        assert frames[1]["inline_frames"][0]["source_line"] == 54
+        assert frames[1]["inline_frames"][0]["signature"] == "()"
+
         assert frames[1]["inline_frames"][1]["name"] == "foo"
         assert frames[1]["inline_frames"][1]["name"] == "foo"
         assert frames[1]["inline_frames"][1]["source_line"] == 44
         assert frames[1]["inline_frames"][1]["source_line"] == 44
-        assert frames[1]["inline_frames"][2]["name"] == "onClickHandler"
-        assert frames[1]["inline_frames"][2]["source_line"] == 40
+
         assert frames[1]["inline_frames"][2]["source_file"] == "MainActivity.java"
         assert frames[1]["inline_frames"][2]["source_file"] == "MainActivity.java"
         assert frames[1]["inline_frames"][2]["class_name"] == "io.sentry.sample.MainActivity"
         assert frames[1]["inline_frames"][2]["class_name"] == "io.sentry.sample.MainActivity"
+        assert frames[1]["inline_frames"][2]["name"] == "bar"
+        assert frames[1]["inline_frames"][2]["source_line"] == 54
 
 
     def test_error_on_resolving(self):
     def test_error_on_resolving(self):
         out = BytesIO()
         out = BytesIO()