Просмотр исходного кода

feat(profiling): Deobfuscate Android methods' signature (#53427)

Pierre Massat 1 год назад
Родитель
Сommit
6584123828

+ 97 - 0
src/sentry/profiles/java.py

@@ -0,0 +1,97 @@
+from typing import List, Tuple
+
+JAVA_BASE_TYPES = {
+    "Z": "boolean",
+    "B": "byte",
+    "C": "char",
+    "S": "short",
+    "I": "int",
+    "J": "long",
+    "F": "float",
+    "D": "double",
+    "V": "void",
+}
+
+
+# parse_obfuscated_signature will parse an obfuscated signatures into parameter
+# and return types that can be then deobfuscated
+def parse_obfuscated_signature(signature: str) -> Tuple[List[str], str]:
+    if signature[0] != "(":
+        return [], ""
+
+    signature = signature[1:]
+    parameter_types, return_type = signature.rsplit(")", 1)
+    types = []
+    i = 0
+    arrays = 0
+
+    while i < len(parameter_types):
+        t = parameter_types[i]
+
+        if t in JAVA_BASE_TYPES:
+            start_index = i - arrays
+            types.append(parameter_types[start_index : i + 1])
+            arrays = 0
+        elif t == "L":
+            start_index = i - arrays
+            end_index = parameter_types[i:].index(";")
+            types.append(parameter_types[start_index : i + end_index + 1])
+            arrays = 0
+            i += end_index
+        elif t == "[":
+            arrays += 1
+        else:
+            arrays = 0
+
+        i += 1
+
+    return types, return_type
+
+
+# format_signature formats the types into a human-readable signature
+def format_signature(parameter_java_types: List[str], return_java_type: str) -> str:
+    signature = f"({', '.join(parameter_java_types)})"
+    if return_java_type and return_java_type != "void":
+        signature += f": {return_java_type}"
+    return signature
+
+
+def byte_code_type_to_java_type(mapper, byte_code_type: str) -> str:
+    if not byte_code_type:
+        return ""
+
+    token = byte_code_type[0]
+    if token in JAVA_BASE_TYPES:
+        return JAVA_BASE_TYPES[token]
+    elif token == "L":
+        # invalid signature
+        if byte_code_type[-1] != ";":
+            return byte_code_type
+        obfuscated = byte_code_type[1:-1].replace("/", ".")
+        mapped = mapper.remap_class(obfuscated)
+        if mapped:
+            return mapped
+        return obfuscated
+    elif token == "[":
+        return f"{byte_code_type_to_java_type(mapper, byte_code_type[1:])}[]"
+    else:
+        return byte_code_type
+
+
+# map_obfucated_signature will parse then deobfuscated a signature and
+# format it appropriately
+def deobfuscate_signature(mapper, signature: str) -> str:
+    if not signature:
+        return ""
+
+    parameter_types, return_type = parse_obfuscated_signature(signature)
+    if not (parameter_types or return_type):
+        return ""
+
+    parameter_java_types = []
+    for parameter_type in parameter_types:
+        new_class = byte_code_type_to_java_type(mapper, parameter_type)
+        parameter_java_types.append(new_class)
+
+    return_java_type = byte_code_type_to_java_type(mapper, return_type)
+    return format_signature(parameter_java_types, return_java_type)

+ 34 - 16
src/sentry/profiles/task.py

@@ -18,6 +18,7 @@ from sentry.lang.javascript.processing import generate_scraping_config
 from sentry.lang.native.symbolicator import RetrySymbolication, Symbolicator, SymbolicatorTaskKind
 from sentry.models import EventError, Organization, Project, ProjectDebugFile
 from sentry.profiles.device import classify_device
+from sentry.profiles.java import deobfuscate_signature
 from sentry.profiles.utils import get_from_profiling_service
 from sentry.signals import first_profile_received
 from sentry.tasks.base import instrumented_task
@@ -618,35 +619,52 @@ def _deobfuscate(profile: Profile, project: Project) -> None:
 
     with sentry_sdk.start_span(op="proguard.remap"):
         for method in profile["profile"]["methods"]:
+            method.setdefault("data", {})
+
             mapped = mapper.remap_frame(
                 method["class_name"], method["name"], method["source_line"] or 0
             )
-            method.setdefault("data", {})
-            if len(mapped) == 1:
-                new_frame = mapped[0]
-                method.update(
-                    {
-                        "class_name": new_frame.class_name,
-                        "name": new_frame.method,
-                        "source_file": new_frame.file,
-                        "source_line": new_frame.line,
-                    }
-                )
-                method["data"]["deobfuscation_status"] = "deobfuscated"
-            elif len(mapped) > 1:
+
+            if "signature" in method and method["signature"]:
+                method["signature"] = deobfuscate_signature(mapper, method["signature"])
+
+            if len(mapped) >= 1:
+                new_frame = mapped[-1]
+                method["class_name"] = new_frame.class_name
+                method["name"] = new_frame.method
+                method["data"] = {
+                    "deobfuscation_status": "deobfuscated"
+                    if method.get("signature", None)
+                    else "partial"
+                }
+
+                if new_frame.file:
+                    method["source_file"] = new_frame.file
+
+                if new_frame.line:
+                    method["source_line"] = new_frame.line
+
                 bottom_class = mapped[-1].class_name
                 method["inline_frames"] = [
                     {
                         "class_name": new_frame.class_name,
+                        "data": {"deobfuscation_status": "deobfuscated"},
                         "name": new_frame.method,
                         "source_file": method["source_file"]
                         if bottom_class == new_frame.class_name
-                        else None,
+                        else "",
                         "source_line": new_frame.line,
-                        "data": {"deobfuscation_status": "deobfuscated"},
                     }
-                    for new_frame in mapped
+                    for new_frame in reversed(mapped)
                 ]
+
+                # vroom will only take into account frames in this list
+                # if it exists. since symbolic does not return a signature for
+                # the frame we deobfuscated, we update it to set
+                # the deobfuscated signature.
+                if len(method["inline_frames"]) > 0:
+                    method["inline_frames"][0]["data"] = method["data"]
+                    method["inline_frames"][0]["signature"] = method.get("signature", "")
             else:
                 mapped_class = mapper.remap_class(method["class_name"])
                 if mapped_class:

+ 50 - 0
tests/sentry/profiles/test_java.py

@@ -0,0 +1,50 @@
+from tempfile import mkstemp
+
+import pytest
+from symbolic.proguard import ProguardMapper
+
+from sentry.profiles.java import deobfuscate_signature
+
+PROGUARD_SOURCE = b"""\
+# compiler: R8
+# compiler_version: 2.0.74
+# min_api: 16
+# pg_map_id: 5b46fdc
+# common_typos_disable
+# {"id":"com.android.tools.r8.mapping","version":"1.0"}
+org.slf4j.helpers.Util$ClassContextSecurityManager -> org.a.b.g$a:
+    65:65:void <init>() -> <init>
+    67:67:java.lang.Class[] getClassContext() -> a
+    69:69:java.lang.Class[] getExtraClassContext() -> a
+    65:65:void <init>(org.slf4j.helpers.Util$1) -> <init>
+"""
+
+
+@pytest.fixture
+def mapper():
+    _, mapping_file_path = mkstemp()
+    with open(mapping_file_path, "wb") as f:
+        f.write(PROGUARD_SOURCE)
+    mapper = ProguardMapper.open(mapping_file_path)
+    assert mapper.has_line_info
+    return mapper
+
+
+@pytest.mark.parametrize(
+    ["obfuscated", "expected"],
+    [
+        # invalid signatures
+        ("", ""),
+        ("()", ""),
+        # valid signatures
+        ("()V", "()"),
+        ("([I)V", "(int[])"),
+        ("(III)V", "(int, int, int)"),
+        ("([Ljava/lang/String;)V", "(java.lang.String[])"),
+        ("([[J)V", "(long[][])"),
+        ("(I)I", "(int): int"),
+        ("([B)V", "(byte[])"),
+    ],
+)
+def test_deobfuscate_signature(mapper, obfuscated, expected):
+    assert deobfuscate_signature(mapper, obfuscated) == expected

+ 16 - 9
tests/sentry/profiles/test_task.py

@@ -127,16 +127,18 @@ class ProfilesProcessTaskTest(TestCase):
                 "profile": {
                     "methods": [
                         {
-                            "name": "a",
                             "abs_path": None,
                             "class_name": "org.a.b.g$a",
+                            "name": "a",
+                            "signature": "()V",
                             "source_file": None,
                             "source_line": 67,
                         },
                         {
-                            "name": "a",
                             "abs_path": None,
                             "class_name": "org.a.b.g$a",
+                            "name": "a",
+                            "signature": "()V",
                             "source_file": None,
                             "source_line": 69,
                         },
@@ -178,16 +180,18 @@ class ProfilesProcessTaskTest(TestCase):
                 "profile": {
                     "methods": [
                         {
-                            "name": "onClick",
                             "abs_path": None,
                             "class_name": "e.a.c.a",
+                            "name": "onClick",
+                            "signature": "()V",
                             "source_file": None,
                             "source_line": 2,
                         },
                         {
-                            "name": "t",
                             "abs_path": None,
                             "class_name": "io.sentry.sample.MainActivity",
+                            "name": "t",
+                            "signature": "()V",
                             "source_file": "MainActivity.java",
                             "source_line": 1,
                         },
@@ -200,21 +204,24 @@ class ProfilesProcessTaskTest(TestCase):
         _deobfuscate(profile, project)
         frames = profile["profile"]["methods"]
 
-        assert sum(len(f.get("inline_frames", [{}])) for f in frames) == 4
+        assert sum(len(f.get("inline_frames", [])) for f in frames) == 3
 
         assert frames[0]["name"] == "onClick"
         assert frames[0]["class_name"] == "io.sentry.sample.-$$Lambda$r3Avcbztes2hicEObh02jjhQqd4"
 
+        assert frames[1]["inline_frames"][0]["name"] == "onClickHandler"
+        assert frames[1]["inline_frames"][0]["source_line"] == 40
         assert frames[1]["inline_frames"][0]["source_file"] == "MainActivity.java"
         assert frames[1]["inline_frames"][0]["class_name"] == "io.sentry.sample.MainActivity"
-        assert frames[1]["inline_frames"][0]["name"] == "bar"
-        assert frames[1]["inline_frames"][0]["source_line"] == 54
+        assert frames[1]["inline_frames"][0]["signature"] == "()"
+
         assert frames[1]["inline_frames"][1]["name"] == "foo"
         assert frames[1]["inline_frames"][1]["source_line"] == 44
-        assert frames[1]["inline_frames"][2]["name"] == "onClickHandler"
-        assert frames[1]["inline_frames"][2]["source_line"] == 40
+
         assert frames[1]["inline_frames"][2]["source_file"] == "MainActivity.java"
         assert frames[1]["inline_frames"][2]["class_name"] == "io.sentry.sample.MainActivity"
+        assert frames[1]["inline_frames"][2]["name"] == "bar"
+        assert frames[1]["inline_frames"][2]["source_line"] == 54
 
     def test_error_on_resolving(self):
         out = BytesIO()