Browse Source

feat(metrics_indexer) Add a gauge to track the amount of strings looked up per batch (#35289)

We do not have a metric on the number of strings we look up per batch.
This metric would be useful to tune the batch size in a world with a lot more tags per messages.

Adding two of them, one in the consumer (before resolving all the hardcoded strings) and one in the postgres indexer, which only counts what we are trying to resolve from memecache/postgres.
Filippo Pacifici 2 years ago
parent
commit
b8cb9a934d

+ 2 - 0
src/sentry/sentry_metrics/indexer/postgres_v2.py

@@ -65,7 +65,9 @@ class PGStringIndexerV2(StringIndexer):
         unmapped keys the key_results objects are merged and returned:
             e.g. return cache_key_results.merge(db_read_key_results)
         """
+
         cache_keys = KeyCollection(org_strings)
+        metrics.gauge("sentry_metrics.indexer.lookups_per_batch", value=cache_keys.size)
         cache_key_strs = cache_keys.as_strings()
         cache_results = indexer_cache.get_many(cache_key_strs)
 

+ 4 - 0
src/sentry/sentry_metrics/multiprocess.py

@@ -436,6 +436,10 @@ def process_messages(
             org_strings[org_id].update(parsed_strings)
             strings.update(parsed_strings)
 
+    string_count = 0
+    for org_set in org_strings:
+        string_count += len(org_strings[org_set])
+    metrics.gauge("process_messages.lookups_per_batch", value=string_count)
     metrics.incr("process_messages.total_strings_indexer_lookup", amount=len(strings))
 
     with metrics.timer("metrics_consumer.bulk_record"):