Browse Source

feat(perf): Add detection for render-blocking asset performance issues (#37826)

* feat(perf): Add detection for render-blocking asset performance issues

Tag transactions that have slow asset load spans before a slow FCP as having
render-blocking assets. The thresholds are configurable, but currently we're
looking for transactions with an FCP between 2s and 10s, where an asset load
takes up at least 25% of that time.

The thresholds will be tuned before we start generating
actual Performance Issues from this data - tagging the transactions will let us
see what we're detecting it and validate/tune it before it becomes visible to
users.

This detector's use of event properties is a little awkward given the current
`PerformanceDetector` interface, but I thought it would be better to get the
rest of our planned detectors in before we refactor too much.

Fixes PERF-1677
Matt Quinn 2 years ago
parent
commit
cdca9910c0

+ 78 - 6
src/sentry/utils/performance_issues/performance_detection.py

@@ -24,6 +24,7 @@ class DetectorType(Enum):
     DUPLICATE_SPANS = "duplicates"
     SEQUENTIAL_SLOW_SPANS = "sequential"
     LONG_TASK_SPANS = "long_task"
+    RENDER_BLOCKING_ASSET_SPAN = "render_blocking_assets"
 
 
 # Facade in front of performance detection to limit impact of detection on our events ingestion
@@ -85,6 +86,12 @@ def get_default_detection_settings():
                 "allowed_span_ops": ["ui.long-task", "ui.sentry.long-task"],
             }
         ],
+        DetectorType.RENDER_BLOCKING_ASSET_SPAN: {
+            "fcp_minimum_threshold": 2000.0,  # ms
+            "fcp_maximum_threshold": 10000.0,  # ms
+            "fcp_ratio_threshold": 0.25,
+            "allowed_span_ops": ["resource.link", "resource.script"],
+        },
     }
 
 
@@ -94,11 +101,14 @@ def _detect_performance_issue(data: Event, sdk_span: Any):
 
     detection_settings = get_default_detection_settings()
     detectors = {
-        DetectorType.DUPLICATE_SPANS: DuplicateSpanDetector(detection_settings),
-        DetectorType.DUPLICATE_SPANS_HASH: DuplicateSpanHashDetector(detection_settings),
-        DetectorType.SLOW_SPAN: SlowSpanDetector(detection_settings),
-        DetectorType.SEQUENTIAL_SLOW_SPANS: SequentialSlowSpanDetector(detection_settings),
-        DetectorType.LONG_TASK_SPANS: LongTaskSpanDetector(detection_settings),
+        DetectorType.DUPLICATE_SPANS: DuplicateSpanDetector(detection_settings, data),
+        DetectorType.DUPLICATE_SPANS_HASH: DuplicateSpanHashDetector(detection_settings, data),
+        DetectorType.SLOW_SPAN: SlowSpanDetector(detection_settings, data),
+        DetectorType.SEQUENTIAL_SLOW_SPANS: SequentialSlowSpanDetector(detection_settings, data),
+        DetectorType.LONG_TASK_SPANS: LongTaskSpanDetector(detection_settings, data),
+        DetectorType.RENDER_BLOCKING_ASSET_SPAN: RenderBlockingAssetSpanDetector(
+            detection_settings, data
+        ),
     }
 
     for span in spans:
@@ -143,8 +153,9 @@ class PerformanceDetector(ABC):
     Classes of this type have their visit functions called as the event is walked once and will store a performance issue if one is detected.
     """
 
-    def __init__(self, settings: Dict[str, Any]):
+    def __init__(self, settings: Dict[str, Any], event: Event):
         self.settings = settings[self.settings_key]
+        self._event = event
         self.init()
 
     @abstractmethod
@@ -170,6 +181,9 @@ class PerformanceDetector(ABC):
                 return op, span_id, op_prefix, span_duration, setting
         return None
 
+    def event(self) -> Event:
+        return self._event
+
     @property
     @abstractmethod
     def settings_key(self) -> DetectorType:
@@ -411,6 +425,64 @@ class LongTaskSpanDetector(PerformanceDetector):
             )
 
 
+class RenderBlockingAssetSpanDetector(PerformanceDetector):
+    __slots__ = ("stored_issues", "fcp", "transaction_start")
+
+    settings_key = DetectorType.RENDER_BLOCKING_ASSET_SPAN
+
+    def init(self):
+        self.stored_issues = {}
+        self.transaction_start = timedelta(seconds=self.event().get("transaction_start", 0))
+        self.fcp = None
+
+        # Only concern ourselves with transactions where the FCP is within the
+        # range we care about.
+        fcp_hash = self.event().get("measurements", {}).get("fcp", {})
+        if "value" in fcp_hash and ("unit" not in fcp_hash or fcp_hash["unit"] == "millisecond"):
+            fcp = timedelta(milliseconds=fcp_hash.get("value"))
+            fcp_minimum_threshold = timedelta(
+                milliseconds=self.settings.get("fcp_minimum_threshold")
+            )
+            fcp_maximum_threshold = timedelta(
+                milliseconds=self.settings.get("fcp_maximum_threshold")
+            )
+            if fcp >= fcp_minimum_threshold and fcp < fcp_maximum_threshold:
+                self.fcp = fcp
+
+    def visit_span(self, span: Span):
+        if not self.fcp:
+            return
+
+        op = span.get("op", None)
+        allowed_span_ops = self.settings.get("allowed_span_ops")
+        if op not in allowed_span_ops:
+            return False
+
+        if self._is_blocking_render(span):
+            span_id = span.get("span_id", None)
+            fingerprint = fingerprint_span(span)
+            if span_id and fingerprint:
+                self.stored_issues[fingerprint] = PerformanceSpanIssue(span_id, op, [span_id])
+
+        # If we visit a span that starts after FCP, then we know we've already
+        # seen all possible render-blocking resource spans.
+        span_start_timestamp = timedelta(seconds=span.get("start_timestamp", 0))
+        fcp_timestamp = self.transaction_start + self.fcp
+        if span_start_timestamp >= fcp_timestamp:
+            # Early return for all future span visits.
+            self.fcp = None
+
+    def _is_blocking_render(self, span):
+        span_end_timestamp = timedelta(seconds=span.get("timestamp", 0))
+        fcp_timestamp = self.transaction_start + self.fcp
+        if span_end_timestamp >= fcp_timestamp:
+            return False
+
+        span_duration = get_span_duration(span)
+        fcp_ratio_threshold = self.settings.get("fcp_ratio_threshold")
+        return span_duration / self.fcp > fcp_ratio_threshold
+
+
 # Reports metrics and creates spans for detection
 def report_metrics_for_detectors(
     event_id: Optional[str], detectors: Dict[str, PerformanceDetector], sdk_span: Any

+ 68 - 0
tests/sentry/utils/performance_issues/test_performance_detection.py

@@ -283,3 +283,71 @@ class PerformanceDetectionTest(unittest.TestCase):
                 ),
             ]
         )
+
+    def test_calls_detect_render_blocking_asset(self):
+        render_blocking_asset_event = {
+            "event_id": "a" * 16,
+            "measurements": {
+                "fcp": {
+                    "value": 2500.0,
+                    "unit": "millisecond",
+                }
+            },
+            "spans": [
+                create_span("resource.script", duration=1000.0),
+            ],
+        }
+        non_render_blocking_asset_event = {
+            "event_id": "a" * 16,
+            "measurements": {
+                "fcp": {
+                    "value": 2500.0,
+                    "unit": "millisecond",
+                }
+            },
+            "spans": [
+                modify_span_start(
+                    create_span("resource.script", duration=1000.0),
+                    2000.0,
+                ),
+            ],
+        }
+        short_render_blocking_asset_event = {
+            "event_id": "a" * 16,
+            "measurements": {
+                "fcp": {
+                    "value": 2500.0,
+                    "unit": "millisecond",
+                }
+            },
+            "spans": [
+                create_span("resource.script", duration=200.0),
+            ],
+        }
+
+        sdk_span_mock = Mock()
+
+        _detect_performance_issue(non_render_blocking_asset_event, sdk_span_mock)
+        assert sdk_span_mock.containing_transaction.set_tag.call_count == 0
+
+        _detect_performance_issue(short_render_blocking_asset_event, sdk_span_mock)
+        assert sdk_span_mock.containing_transaction.set_tag.call_count == 0
+
+        _detect_performance_issue(render_blocking_asset_event, sdk_span_mock)
+        assert sdk_span_mock.containing_transaction.set_tag.call_count == 3
+        sdk_span_mock.containing_transaction.set_tag.assert_has_calls(
+            [
+                call(
+                    "_pi_all_issue_count",
+                    1,
+                ),
+                call(
+                    "_pi_transaction",
+                    "aaaaaaaaaaaaaaaa",
+                ),
+                call(
+                    "_pi_render_blocking_assets",
+                    "bbbbbbbbbbbbbbbb",
+                ),
+            ]
+        )