Browse Source

ref: Add success metrics to event payload (#19068)

Add flags to event payload whether Sentry considered event processing to have been successful, and if not, to which degree the event is useless. We want this for some internal high-level stats we're collecting and also in the event payload for the data team, maybe sometime.
Markus Unterwaditzer 4 years ago
parent
commit
ace5ed6b57

+ 5 - 0
src/sentry/event_manager.py

@@ -1346,6 +1346,11 @@ def _materialize_event_metrics(jobs):
 
         # Capture the actual size that goes into node store.
         event_metrics["bytes.stored.event"] = len(json.dumps(dict(job["event"].data.items())))
+
+        for metric_name in ("flag.processing.error", "flag.processing.fatal"):
+            if event_metrics.get(metric_name):
+                metrics.incr("event_manager.save.event_metrics.%s" % (metric_name,))
+
         job["event_metrics"] = event_metrics
 
 

+ 2 - 0
src/sentry/lang/java/plugin.py

@@ -67,6 +67,8 @@ class JavaStacktraceProcessor(StacktraceProcessor):
             if error_type is None:
                 continue
 
+            self.data.setdefault("_metrics", {})["flag.processing.error"] = True
+
             self.data.setdefault("errors", []).append(
                 {"type": error_type, "mapping_uuid": debug_id}
             )

+ 1 - 0
src/sentry/lang/javascript/errormapping.py

@@ -122,5 +122,6 @@ def rewrite_exception(data):
                     break
             except Exception as e:
                 logger.error('Failed to run processor "%s": %s', processor.vendor, e, exc_info=True)
+                data.setdefault("_metrics", {})["flag.processing.error"] = True
 
     return rv

+ 8 - 3
src/sentry/lang/native/error.py

@@ -88,7 +88,7 @@ class SymbolicationFailed(Exception):
         return u"".join(rv)
 
 
-def write_error(e, data, errors=None):
+def write_error(e, data):
     # User fixable but fatal errors are reported as processing
     # issues. We skip this for minidumps, as reprocessing is not
     # possible without persisting minidumps.
@@ -106,8 +106,13 @@ def write_error(e, data, errors=None):
     # do not want to report some processing issues (eg:
     # optional difs)
     if e.is_user_fixable or e.is_sdk_failure:
-        if errors is None:
-            errors = data.setdefault("errors", [])
+        errors = data.setdefault("errors", [])
         errors.append(e.get_data())
     else:
         logger.debug("Failed to symbolicate with native backend", exc_info=True)
+
+    if not e.is_user_fixable:
+        data.setdefault("_metrics", {})["flag.processing.error"] = True
+
+    if e.is_fatal:
+        data.setdefault("_metrics", {})["flag.processing.fatal"] = True

+ 7 - 6
src/sentry/lang/native/processing.py

@@ -73,7 +73,7 @@ def _merge_frame(new_frame, symbolicated):
         frame_meta["symbolicator_status"] = symbolicated["status"]
 
 
-def _handle_image_status(status, image, sdk_info, handle_symbolication_failed):
+def _handle_image_status(status, image, sdk_info, data):
     if status in ("found", "unused"):
         return
     elif status == "missing":
@@ -104,10 +104,11 @@ def _handle_image_status(status, image, sdk_info, handle_symbolication_failed):
     error.image_path = image.get("code_file")
     error.image_name = image_name(image.get("code_file"))
     error.image_uuid = image.get("debug_id")
-    handle_symbolication_failed(error)
 
+    write_error(error, data)
 
-def _merge_image(raw_image, complete_image, sdk_info, handle_symbolication_failed):
+
+def _merge_image(raw_image, complete_image, sdk_info, data):
     statuses = set()
 
     # Set image data from symbolicator as symbolicator might know more
@@ -119,7 +120,7 @@ def _merge_image(raw_image, complete_image, sdk_info, handle_symbolication_faile
             raw_image[k] = v
 
     for status in set(statuses):
-        _handle_image_status(status, raw_image, sdk_info, handle_symbolication_failed)
+        _handle_image_status(status, raw_image, sdk_info, data)
 
 
 def _handle_response_status(event_data, response_json):
@@ -179,7 +180,7 @@ def _merge_full_response(data, response):
 
     for complete_image in response["modules"]:
         image = {}
-        _merge_image(image, complete_image, sdk_info, lambda e: write_error(e, data))
+        _merge_image(image, complete_image, sdk_info, data)
         images.append(image)
 
     # Extract the crash reason and infos
@@ -316,7 +317,7 @@ def process_payload(data):
     sdk_info = get_sdk_from_event(data)
 
     for raw_image, complete_image in zip(modules, response["modules"]):
-        _merge_image(raw_image, complete_image, sdk_info, lambda e: write_error(e, data))
+        _merge_image(raw_image, complete_image, sdk_info, data)
 
     assert len(stacktraces) == len(response["stacktraces"]), (stacktraces, response)
 

+ 6 - 0
src/sentry/stacktraces/processing.py

@@ -559,8 +559,14 @@ def process_stacktraces(data, make_processors=None, set_raw_stacktrace=True):
                 changed = True
             if errors:
                 data.setdefault("errors", []).extend(dedup_errors(errors))
+                data.setdefault("_metrics", {})["flag.processing.error"] = True
                 changed = True
 
+    except Exception:
+        logger.exception("stacktraces.processing.crash")
+        data.setdefault("_metrics", {})["flag.processing.fatal"] = True
+        data.setdefault("_metrics", {})["flag.processing.error"] = True
+        changed = True
     finally:
         for processor in processors:
             processor.close()

+ 18 - 6
src/sentry/tasks/store.py

@@ -219,9 +219,7 @@ def _do_symbolicate_event(cache_key, start_time, event_id, symbolicate_task, dat
             span.set_data("symbolicaton_function", symbolication_function.__name__)
 
             with metrics.timer("tasks.store.symbolicate_event.symbolication"):
-                symbolicated_data = safe_execute(
-                    symbolication_function, data, _passthrough_errors=(RetrySymbolication,)
-                )
+                symbolicated_data = symbolication_function(data)
 
             span.set_data("symbolicated_data", bool(symbolicated_data))
             if symbolicated_data:
@@ -241,6 +239,9 @@ def _do_symbolicate_event(cache_key, start_time, event_id, symbolicate_task, dat
                 "symbolicate.failed.infinite_retry",
                 extra={"project_id": project_id, "event_id": event_id},
             )
+            data.setdefault("_metrics", {})["flag.processing.error"] = True
+            data.setdefault("_metrics", {})["flag.processing.fatal"] = True
+            has_changed = True
         else:
             # Requeue the task in the "sleep" queue
             retry_symbolicate_event.apply_async(
@@ -256,6 +257,11 @@ def _do_symbolicate_event(cache_key, start_time, event_id, symbolicate_task, dat
                 countdown=e.retry_after,
             )
             return
+    except Exception:
+        error_logger.exception("tasks.store.symbolicate_event.symbolication")
+        data.setdefault("_metrics", {})["flag.processing.error"] = True
+        data.setdefault("_metrics", {})["flag.processing.fatal"] = True
+        has_changed = True
 
     # We cannot persist canonical types in the cache, so we need to
     # downgrade this.
@@ -482,10 +488,16 @@ def _do_process_event(
                     plugin.get_event_preprocessors, data=data, _with_transaction=False
                 )
                 for processor in processors or ():
-                    result = safe_execute(processor, data)
-                    if result:
-                        data = result
+                    try:
+                        result = processor(data)
+                    except Exception:
+                        error_logger.exception("tasks.store.preprocessors.error")
+                        data.setdefault("_metrics", {})["flag.processing.error"] = True
                         has_changed = True
+                    else:
+                        if result:
+                            data = result
+                            has_changed = True
 
     assert data["project"] == project_id, "Project cannot be mutated by plugins"
 

+ 0 - 3
src/sentry/utils/safe.py

@@ -18,7 +18,6 @@ def safe_execute(func, *args, **kwargs):
     # side if we execute a query)
     _with_transaction = kwargs.pop("_with_transaction", True)
     expected_errors = kwargs.pop("expected_errors", None)
-    _passthrough_errors = kwargs.pop("_passthrough_errors", None)
     try:
         if _with_transaction:
             with transaction.atomic():
@@ -26,8 +25,6 @@ def safe_execute(func, *args, **kwargs):
         else:
             result = func(*args, **kwargs)
     except Exception as e:
-        if _passthrough_errors and isinstance(e, _passthrough_errors):
-            raise
         if hasattr(func, "im_class"):
             cls = func.im_class
         else:

+ 18 - 17
tests/sentry/lang/native/test_processing.py

@@ -13,9 +13,9 @@ from sentry.lang.native.processing import _merge_image
 
 
 def test_merge_symbolicator_image_empty():
-    errors = []
-    _merge_image({}, {}, None, errors.append)
-    assert not errors
+    data = {}
+    _merge_image({}, {}, None, data)
+    assert not data.get("errors")
 
 
 def test_merge_symbolicator_image_basic():
@@ -27,11 +27,12 @@ def test_merge_symbolicator_image_basic():
         "other2": "bar",
         "arch": "unknown",
     }
-    errors = []
 
-    _merge_image(raw_image, complete_image, sdk_info, errors.append)
+    data = {}
 
-    assert not errors
+    _merge_image(raw_image, complete_image, sdk_info, data)
+
+    assert not data.get("errors")
     assert raw_image == {
         "debug_status": "found",
         "unwind_status": "found",
@@ -50,11 +51,11 @@ def test_merge_symbolicator_image_basic_success():
         "other2": "bar",
         "arch": "foo",
     }
-    errors = []
+    data = {}
 
-    _merge_image(raw_image, complete_image, sdk_info, errors.append)
+    _merge_image(raw_image, complete_image, sdk_info, data)
 
-    assert not errors
+    assert not data.get("errors")
     assert raw_image == {
         "debug_status": "found",
         "unwind_status": "found",
@@ -69,11 +70,11 @@ def test_merge_symbolicator_image_remove_unknown_arch():
     raw_image = {"instruction_addr": 0xFEEBEE}
     sdk_info = {"sdk_name": "linux"}
     complete_image = {"debug_status": "found", "unwind_status": "found", "arch": "unknown"}
-    errors = []
+    data = {}
 
-    _merge_image(raw_image, complete_image, sdk_info, errors.append)
+    _merge_image(raw_image, complete_image, sdk_info, data)
 
-    assert not errors
+    assert not data.get("errors")
     assert raw_image == {
         "debug_status": "found",
         "unwind_status": "found",
@@ -100,14 +101,14 @@ def test_merge_symbolicator_image_errors(code_file, error):
         "other2": "bar",
         "arch": "unknown",
     }
-    errors = []
+    data = {}
 
-    _merge_image(raw_image, complete_image, sdk_info, errors.append)
+    _merge_image(raw_image, complete_image, sdk_info, data)
 
-    (e,) = errors
+    (e,) = data["errors"]
 
-    assert e.image_name == "foo"
-    assert e.type == error
+    assert e["image_path"].endswith("/foo")
+    assert e["type"] == error
 
     assert raw_image == {
         "debug_status": "found",