Browse Source

feat(perf-issues): Add new span hashing strategy (#40673)

The new strategy config is a clone of the previous strategy for
calibrating metrics. Once we see that the metrics for the old and new
configs are the same, we'll roll out the actual strategy changes so that
metric changes can be observed.

For more context, the last time this was done:
* https://github.com/getsentry/sentry/pull/39636
* https://github.com/getsentry/sentry/pull/39732

Kicks off PERF-1763
Matt Quinn 2 years ago
parent
commit
3934191716

+ 18 - 0
src/sentry/event_manager.py

@@ -97,6 +97,7 @@ from sentry.ratelimits.sliding_windows import Quota, RedisSlidingWindowRateLimit
 from sentry.reprocessing2 import is_reprocessed_event, save_unprocessed_event
 from sentry.shared_integrations.exceptions import ApiError
 from sentry.signals import first_event_received, first_transaction_received, issue_unresolved
+from sentry.spans.grouping.strategy.config import INCOMING_DEFAULT_CONFIG_ID
 from sentry.tasks.commits import fetch_commits
 from sentry.tasks.integrations import kick_off_status_syncs
 from sentry.tasks.process_buffer import buffer_incr
@@ -2089,6 +2090,23 @@ def _calculate_span_grouping(jobs, projects):
                 amount=len(unique_default_hashes),
                 tags={"platform": job["platform"] or "unknown"},
             )
+
+            # Try the new hashing config that more aggresively parametrizes DB
+            # spans, and record the difference with the default hashing config.
+            with metrics.timer("event_manager.save.get_span_groupings.incoming"):
+                experimental_groupings = event.get_span_groupings(
+                    {"id": INCOMING_DEFAULT_CONFIG_ID}
+                )
+
+            unique_incoming_hashes = set(experimental_groupings.results.values())
+            metrics.incr(
+                "save_event.transaction.span_group_count.incoming",
+                amount=len(unique_incoming_hashes),
+            )
+            metrics.incr(
+                "save_event.transaction.span_group_count.difference",
+                amount=len(unique_default_hashes ^ unique_incoming_hashes),
+            )
         except Exception:
             sentry_sdk.capture_exception()
 

+ 12 - 0
src/sentry/spans/grouping/strategy/config.py

@@ -41,6 +41,7 @@ def register_configuration(config_id: str, strategies: Sequence[CallableStrategy
 
 
 DEFAULT_CONFIG_ID = "default:2022-10-04"
+INCOMING_DEFAULT_CONFIG_ID = "default:2022-10-27"
 
 register_configuration(
     "default:2021-08-25",
@@ -59,3 +60,14 @@ register_configuration(
         remove_redis_command_arguments_strategy,
     ],
 )
+
+# Currently just a duplicate of the previous config to calibrate metrics before
+# making strategy changes.
+register_configuration(
+    "default:2022-10-27",
+    strategies=[
+        loose_normalized_db_span_in_condition_strategy,
+        remove_http_client_query_string_strategy,
+        remove_redis_command_arguments_strategy,
+    ],
+)

+ 49 - 0
tests/sentry/spans/grouping/test_strategy.py

@@ -450,3 +450,52 @@ def test_default_2022_10_04_strategy(spans: List[Span], expected: Mapping[str, L
         key: hash_values(values)
         for key, values in {**expected, "a" * 16: ["transaction name"]}.items()
     }
+
+
+# Currently just a duplicate of the 2022_10_04 strategy tests until actual
+# strategy changes are made.
+@pytest.mark.parametrize(
+    "spans,expected",
+    [
+        ([], {}),
+        (
+            [
+                SpanBuilder()
+                .with_span_id("b" * 16)
+                .with_op("db.sql.query")
+                .with_description("SELECT * FROM table WHERE id IN (1, 2, 3)")
+                .build(),
+                SpanBuilder()
+                .with_span_id("c" * 16)
+                .with_op("db.sql.query")
+                .with_description("SELECT * FROM table WHERE id IN (4, 5, 6)")
+                .build(),
+                SpanBuilder()
+                .with_span_id("d" * 16)
+                .with_op("db.sql.query")
+                .with_description("SELECT * FROM table WHERE id IN (7, 8, 9)")
+                .build(),
+            ],
+            {
+                "b" * 16: ["SELECT * FROM table WHERE id IN (%s)"],
+                "c" * 16: ["SELECT * FROM table WHERE id IN (%s)"],
+                "d" * 16: ["SELECT * FROM table WHERE id IN (%s)"],
+            },
+        ),
+    ],
+)
+def test_default_2022_10_27_strategy(spans: List[Span], expected: Mapping[str, List[str]]) -> None:
+    event = {
+        "transaction": "transaction name",
+        "contexts": {
+            "trace": {
+                "span_id": "a" * 16,
+            },
+        },
+        "spans": spans,
+    }
+    configuration = CONFIGURATIONS["default:2022-10-27"]
+    assert configuration.execute_strategy(event).results == {
+        key: hash_values(values)
+        for key, values in {**expected, "a" * 16: ["transaction name"]}.items()
+    }