Browse Source

feat(native): Support uploading BCSymbolMap DIFs (#25712)

Adds support for uploading BCSymbolMap and their associated PLists
which map their DebugIDs.  This allows sentry-cli to upload these
files and allows symbolicator to fetch them.
Floris Bruynooghe 3 years ago
parent
commit
b66b7d4e9a

+ 1 - 1
requirements-base.txt

@@ -55,7 +55,7 @@ snuba-sdk>=0.0.14,<1.0.0
 simplejson==3.17.2
 statsd==3.3
 structlog==17.1.0
-symbolic==8.0.3
+symbolic==8.1.0
 toronado==0.1.0
 ua-parser==0.10.0
 unidiff==0.6.0

+ 1 - 0
src/sentry/api/endpoints/chunk.py

@@ -24,6 +24,7 @@ CHUNK_UPLOAD_ACCEPT = (
     "release_files",  # Release files assemble
     "pdbs",  # PDB upload and debug id override
     "sources",  # Source artifact bundle upload
+    "bcsymbolmaps",  # BCSymbolMaps and associated PLists
 )
 
 

+ 2 - 0
src/sentry/constants.py

@@ -275,6 +275,8 @@ KNOWN_DIF_FORMATS = {
     "application/wasm": "wasm",
     "text/x-proguard+plain": "proguard",
     "application/x-sentry-bundle+zip": "sourcebundle",
+    "application/x-bcsymbolmap": "bcsymbolmap",
+    "application/x-debugid-map": "plist",
 }
 
 NATIVE_UNKNOWN_STRING = "<unknown>"

+ 105 - 23
src/sentry/models/debugfile.py

@@ -1,3 +1,4 @@
+import enum
 import errno
 import hashlib
 import logging
@@ -9,6 +10,7 @@ import uuid
 
 from django.db import models
 from symbolic import Archive, ObjectErrorUnsupportedObject, SymbolicError, normalize_debug_id
+from symbolic.debuginfo import BcSymbolMap, UuidMapping
 
 from sentry import options
 from sentry.constants import KNOWN_DIF_FORMATS
@@ -170,24 +172,55 @@ def clean_redundant_difs(project, debug_id):
     )
 
     all_features = set()
+    bcsymbolmap_seen = False
+    plist_seen = False
     for i, dif in enumerate(difs):
-        # We always keep the latest file. If it has no features, likely the
-        # previous files did not have features either and will be removed, or we
-        # keep both. Subsequent uploads will remove this file later.
-        if i > 0 and dif.features <= all_features:
-            dif.delete()
+        mime_type = dif.file.headers.get("Content-Type")
+        if mime_type == DIF_MIMETYPES["bcsymbolmap"]:
+            if not bcsymbolmap_seen:
+                bcsymbolmap_seen = True
+            else:
+                dif.delete()
+        elif mime_type == DIF_MIMETYPES["plist"]:
+            if not plist_seen:
+                plist_seen = True
+            else:
+                dif.delete()
         else:
-            all_features.update(dif.features)
+            # We always keep the latest file. If it has no features, likely the
+            # previous files did not have features either and will be removed, or we
+            # keep both. Subsequent uploads will remove this file later.
+            if i > 0 and dif.features <= all_features:
+                dif.delete()
+            else:
+                all_features.update(dif.features)
 
 
 def create_dif_from_id(project, meta, fileobj=None, file=None):
-    """This creates a mach dsym file or proguard mapping from the given
-    debug id and open file object to a debug file.  This will not verify the
-    debug id (intentionally so).  Use `detect_dif_from_path` to do that.
+    """Creates the :class:`ProjectDebugFile` entry for the provided DIF.
+
+    This creates the :class:`ProjectDebugFile` entry for the DIF provided in `meta` (a
+    :class:`DifMeta` object).  If the correct entry already exists this simply returns the
+    existing entry.
+
+    It intentionally does not validate the file, only will ensure a :class:`File` entry
+    exists and set its `ContentType` according to the provided :class:DifMeta`.
+
+    Returns a tuple of `(dif, created)` where `dif` is the `ProjectDebugFile` instance and
+    `created` is a bool.
     """
     if meta.file_format == "proguard":
         object_name = "proguard-mapping"
-    elif meta.file_format in ("macho", "elf", "pdb", "pe", "wasm", "sourcebundle"):
+    elif meta.file_format in (
+        "macho",
+        "elf",
+        "pdb",
+        "pe",
+        "wasm",
+        "sourcebundle",
+        "bcsymbolmap",
+        "plist",
+    ):
         object_name = meta.name
     elif meta.file_format == "breakpad":
         object_name = meta.name[:-4] if meta.name.endswith(".sym") else meta.name
@@ -269,7 +302,7 @@ class DifMeta:
     def __init__(self, file_format, arch, debug_id, path, code_id=None, name=None, data=None):
         self.file_format = file_format
         self.arch = arch
-        self.debug_id = debug_id
+        self.debug_id = debug_id  # TODO(flub): should this use normalize_debug_id()?
         self.code_id = code_id
         self.path = path
         self.data = data
@@ -310,6 +343,31 @@ class DifMeta:
         return os.path.basename(self.path)
 
 
+class DifKind(enum.Enum):
+    """The different kind of DIF files we can handle."""
+
+    Object = enum.auto()
+    BcSymbolMap = enum.auto()
+    PList = enum.auto()
+    # TODO(flub): should we include proguard?  The tradeoff is that we'd be matching the
+    # regex of it twice.  That cost is probably not too great to worry about.
+
+
+def determine_dif_kind(path):
+    """Returns the :class:`DifKind` detected at `path`."""
+    # TODO(flub): Using just the filename might be sufficient.  But the cost of opening a
+    # file that we'll open and parse right away anyway is rather minimal, though it would
+    # save a syscall.
+    with open(path, "rb") as fp:
+        data = fp.read(11)
+        if data.startswith(b"BCSymbolMap"):
+            return DifKind.BcSymbolMap
+        elif data.startswith(b"<?xml"):
+            return DifKind.PList
+        else:
+            return DifKind.Object
+
+
 def detect_dif_from_path(path, name=None, debug_id=None):
     """This detects which kind of dif(Debug Information File) the path
     provided is. It returns an array since an Archive can contain more than
@@ -332,19 +390,43 @@ def detect_dif_from_path(path, name=None, debug_id=None):
             )
         ]
 
-    # native debug information files (MachO, ELF or Breakpad)
-    try:
-        archive = Archive.open(path)
-    except ObjectErrorUnsupportedObject as e:
-        raise BadDif("Unsupported debug information file: %s" % e)
-    except SymbolicError as e:
-        logger.warning("dsymfile.bad-fat-object", exc_info=True)
-        raise BadDif("Invalid debug information file: %s" % e)
+    dif_kind = determine_dif_kind(path)
+    if dif_kind == DifKind.BcSymbolMap:
+        try:
+            BcSymbolMap.open(path)
+        except SymbolicError as e:
+            logger.warning("bcsymbolmap.bad-file", exc_info=True)
+            raise BadDif("Invalid BCSymbolMap: %s" % e)
+        else:
+            return [
+                DifMeta(
+                    file_format="bcsymbolmap", arch="any", debug_id=debug_id, name=name, path=path
+                )
+            ]
+    elif dif_kind == DifKind.PList:
+        try:
+            UuidMapping.from_plist(debug_id, path)
+        except SymbolicError as e:
+            logger.warning("plist.bad-file", exc_info=True)
+            raise BadDif("Invalid PList: %s" % e)
+        else:
+            return [
+                DifMeta(file_format="plist", arch="any", debug_id=debug_id, name=name, path=path)
+            ]
     else:
-        objs = []
-        for obj in archive.iter_objects():
-            objs.append(DifMeta.from_object(obj, path, name=name, debug_id=debug_id))
-        return objs
+        # native debug information files (MachO, ELF or Breakpad)
+        try:
+            archive = Archive.open(path)
+        except ObjectErrorUnsupportedObject as e:
+            raise BadDif("Unsupported debug information file: %s" % e)
+        except SymbolicError as e:
+            logger.warning("dsymfile.bad-fat-object", exc_info=True)
+            raise BadDif("Invalid debug information file: %s" % e)
+        else:
+            objs = []
+            for obj in archive.iter_objects():
+                objs.append(DifMeta.from_object(obj, path, name=name, debug_id=debug_id))
+            return objs
 
 
 def create_debug_file_from_dif(to_create, project):

+ 11 - 1
static/app/views/settings/projectDebugFiles/index.tsx

@@ -59,7 +59,17 @@ class ProjectDebugSymbols extends AsyncView<Props, State> {
         {
           query: {
             query: location.query.query,
-            file_formats: ['breakpad', 'macho', 'elf', 'pe', 'pdb', 'sourcebundle'],
+            file_formats: [
+              'breakpad',
+              'macho',
+              'elf',
+              'pe',
+              'pdb',
+              'sourcebundle',
+              'wasm',
+              'bcsymbolmap',
+              'plist',
+            ],
           },
         },
       ],