Browse Source

Add support to ingest inline attachments (#71522)

For individual attachments, it is now possible to pass the contents
within the `data` property directly instead of needing to do a
processing attachments store roundtrip via `attachment_chunk` messages.
Arpad Borsos 9 months ago
parent
commit
23f98725cb

+ 4 - 0
src/sentry/attachments/base.py

@@ -51,6 +51,7 @@ class CachedAttachment:
         self._data = data
         self.chunks = chunks
         self._cache = cache
+        self._has_initial_data = data is not UNINITIALIZED_DATA
 
     @classmethod
     def from_upload(cls, file, **kwargs):
@@ -75,6 +76,9 @@ class CachedAttachment:
         assert self.key is not None
         assert self.id is not None
 
+        if self._has_initial_data:
+            return
+
         if self.chunks is None:
             yield ATTACHMENT_UNCHUNKED_DATA_KEY.format(key=self.key, id=self.id)
             return

+ 30 - 29
src/sentry/event_manager.py

@@ -2796,42 +2796,43 @@ def save_attachment(
             project_id=project.id,
             key_id=key_id,
             outcome=Outcome.RATE_LIMITED,
-            reason="Too many attachments ({num_requests}} uploaded in a 5 minute window, will reset at {reset_time}",
+            reason=f"Too many attachments ({num_requests}) uploaded in a 5 minute window, will reset at {reset_time}",
             timestamp=timestamp,
             event_id=event_id,
             category=DataCategory.ATTACHMENT,
             quantity=attachment.size or 1,
         )
-    else:
-        file = EventAttachment.putfile(project.id, attachment)
+        return
 
-        EventAttachment.objects.create(
-            # lookup:
-            project_id=project.id,
-            group_id=group_id,
-            event_id=event_id,
-            # metadata:
-            type=attachment.type,
-            name=attachment.name,
-            content_type=file.content_type,
-            size=file.size,
-            sha1=file.sha1,
-            # storage:
-            file_id=file.file_id,
-            blob_path=file.blob_path,
-        )
+    file = EventAttachment.putfile(project.id, attachment)
 
-        track_outcome(
-            org_id=project.organization_id,
-            project_id=project.id,
-            key_id=key_id,
-            outcome=Outcome.ACCEPTED,
-            reason=None,
-            timestamp=timestamp,
-            event_id=event_id,
-            category=DataCategory.ATTACHMENT,
-            quantity=attachment.size or 1,
-        )
+    EventAttachment.objects.create(
+        # lookup:
+        project_id=project.id,
+        group_id=group_id,
+        event_id=event_id,
+        # metadata:
+        type=attachment.type,
+        name=attachment.name,
+        content_type=file.content_type,
+        size=file.size,
+        sha1=file.sha1,
+        # storage:
+        file_id=file.file_id,
+        blob_path=file.blob_path,
+    )
+
+    track_outcome(
+        org_id=project.organization_id,
+        project_id=project.id,
+        key_id=key_id,
+        outcome=Outcome.ACCEPTED,
+        reason=None,
+        timestamp=timestamp,
+        event_id=event_id,
+        category=DataCategory.ATTACHMENT,
+        quantity=attachment.size or 1,
+    )
 
 
 def save_attachments(cache_key: str | None, attachments: list[Attachment], job: Job) -> None:

+ 17 - 14
src/sentry/ingest/consumer/processors.py

@@ -279,23 +279,26 @@ def process_individual_attachment(message: IngestMessage, project: Project) -> N
     if event is not None:
         group_id = event.group_id
 
-    attachment = message["attachment"]
+    attachment_msg = message["attachment"]
+    attachment_type = attachment_msg.pop("attachment_type")
+
+    # NOTE: `get_from_chunks` will avoid the cache if `attachment_msg` contains `data` inline
     attachment = attachment_cache.get_from_chunks(
-        key=cache_key, type=attachment.pop("attachment_type"), **attachment
+        key=cache_key, type=attachment_type, **attachment_msg
     )
-    if attachment.type not in ("event.attachment", "event.view_hierarchy"):
-        logger.error("invalid individual attachment type: %s", attachment.type)
-        return
 
-    save_attachment(
-        cache_key,
-        attachment,
-        project,
-        event_id,
-        key_id=None,  # TODO: Inject this from Relay
-        group_id=group_id,
-        start_time=None,  # TODO: Inject this from Relay
-    )
+    if attachment_type in ("event.attachment", "event.view_hierarchy"):
+        save_attachment(
+            cache_key,
+            attachment,
+            project,
+            event_id,
+            key_id=None,  # TODO: Inject this from Relay
+            group_id=group_id,
+            start_time=None,  # TODO: Inject this from Relay
+        )
+    else:
+        logger.error("invalid individual attachment type: %s", attachment_type)
 
     attachment.delete()
 

+ 37 - 31
tests/sentry/ingest/ingest_consumer/test_ingest_consumer_processing.py

@@ -401,28 +401,27 @@ def test_deobfuscate_view_hierarchy(default_project, task_runner):
 
 
 @django_db_all
+@pytest.mark.parametrize("feature_enabled", [True, False], ids=["with_feature", "without_feature"])
 @pytest.mark.parametrize(
-    "event_attachments", [True, False], ids=["with_feature", "without_feature"]
-)
-@pytest.mark.parametrize(
-    "chunks",
+    "attachment",
     [
-        ((b"Hello ", b"World!"), "event.attachment", "application/octet-stream"),
-        ((b"",), "event.attachment", "application/octet-stream"),
-        ((), "event.attachment", "application/octet-stream"),
+        ([b"Hello ", b"World!"], "event.attachment", "application/octet-stream"),
+        ([b""], "event.attachment", "application/octet-stream"),
+        ([], "event.attachment", "application/octet-stream"),
         (
-            (b'{"rendering_system":"flutter","windows":[]}',),
+            [b'{"rendering_system":"flutter","windows":[]}'],
             "event.view_hierarchy",
             "application/json",
         ),
+        (b"inline attachment", "event.attachment", "application/octet-stream"),
     ],
-    ids=["basic", "zerolen", "nochunks", "view_hierarchy"],
+    ids=["basic", "zerolen", "nochunks", "view_hierarchy", "inline"],
 )
 @pytest.mark.parametrize("with_group", [True, False], ids=["with_group", "without_group"])
 def test_individual_attachments(
-    default_project, factories, monkeypatch, event_attachments, chunks, with_group, django_cache
+    default_project, factories, monkeypatch, feature_enabled, attachment, with_group, django_cache
 ):
-    monkeypatch.setattr("sentry.features.has", lambda *a, **kw: event_attachments)
+    monkeypatch.setattr("sentry.features.has", lambda *a, **kw: feature_enabled)
 
     event_id = uuid.uuid4().hex
     attachment_id = "ca90fb45-6dd9-40a0-a18f-8693aa621abb"
@@ -437,27 +436,34 @@ def test_individual_attachments(
         group_id = event.group.id
         assert group_id, "this test requires a group to work"
 
-    for i, chunk in enumerate(chunks[0]):
-        process_attachment_chunk(
-            {
-                "payload": chunk,
-                "event_id": event_id,
-                "project_id": project_id,
-                "id": attachment_id,
-                "chunk_index": i,
-            }
-        )
+    chunks, attachment_type, content_type = attachment
+    attachment_meta = {
+        "attachment_type": attachment_type,
+        "chunks": len(chunks),
+        "content_type": content_type,
+        "id": attachment_id,
+        "name": "foo.txt",
+    }
+    if isinstance(chunks, bytes):
+        attachment_meta["data"] = chunks
+        expected_content = chunks
+    else:
+        for i, chunk in enumerate(chunks):
+            process_attachment_chunk(
+                {
+                    "payload": chunk,
+                    "event_id": event_id,
+                    "project_id": project_id,
+                    "id": attachment_id,
+                    "chunk_index": i,
+                }
+            )
+        expected_content = b"".join(chunks)
 
     process_individual_attachment(
         {
             "type": "attachment",
-            "attachment": {
-                "attachment_type": chunks[1],
-                "chunks": len(chunks[0]),
-                "content_type": chunks[2],
-                "id": attachment_id,
-                "name": "foo.txt",
-            },
+            "attachment": attachment_meta,
             "event_id": event_id,
             "project_id": project_id,
         },
@@ -466,16 +472,16 @@ def test_individual_attachments(
 
     attachments = list(EventAttachment.objects.filter(project_id=project_id, event_id=event_id))
 
-    if not event_attachments:
+    if not feature_enabled:
         assert not attachments
     else:
         (attachment,) = attachments
         assert attachment.name == "foo.txt"
         assert attachment.group_id == group_id
-        assert attachment.content_type == chunks[2]
+        assert attachment.content_type == content_type
 
         with attachment.getfile() as file_contents:
-            assert file_contents.read() == b"".join(chunks[0])
+            assert file_contents.read() == expected_content
 
 
 @django_db_all