|
@@ -1,5 +1,6 @@
|
|
|
import logging
|
|
|
import time
|
|
|
+from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
from dataclasses import asdict
|
|
|
from datetime import UTC, datetime, timedelta
|
|
|
from typing import Any, TypedDict
|
|
@@ -11,7 +12,7 @@ from redis.client import StrictRedis
|
|
|
from rediscluster import RedisCluster
|
|
|
from snuba_sdk import Column, Condition, Entity, Limit, Op, Query, Request
|
|
|
|
|
|
-from sentry import features, nodestore
|
|
|
+from sentry import features, nodestore, options
|
|
|
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
|
|
|
from sentry.eventstore.models import Event
|
|
|
from sentry.grouping.grouping_info import get_grouping_info
|
|
@@ -377,26 +378,11 @@ def update_groups(project, seer_response, group_id_batch_filtered, group_hashes_
|
|
|
)
|
|
|
|
|
|
|
|
|
-@metrics.wraps(f"{BACKFILL_NAME}.lookup_event_bulk", sample_rate=1.0)
|
|
|
-@sentry_sdk.tracing.trace
|
|
|
-def lookup_group_data_stacktrace_bulk(
|
|
|
- project: Project, rows: list[GroupEventRow]
|
|
|
-) -> dict[int, Event]:
|
|
|
- project_id = project.id
|
|
|
- node_id_to_group_data = {
|
|
|
- Event.generate_node_id(project_id, event_id=row["event_id"]): (
|
|
|
- row["event_id"],
|
|
|
- row["group_id"],
|
|
|
- )
|
|
|
- for row in rows
|
|
|
- }
|
|
|
-
|
|
|
- groups_to_event = {}
|
|
|
-
|
|
|
+def _make_nodestore_call(project, node_keys):
|
|
|
try:
|
|
|
bulk_data = _retry_operation(
|
|
|
nodestore.backend.get_multi,
|
|
|
- list(node_id_to_group_data.keys()),
|
|
|
+ node_keys,
|
|
|
retries=3,
|
|
|
delay=2,
|
|
|
)
|
|
@@ -404,7 +390,7 @@ def lookup_group_data_stacktrace_bulk(
|
|
|
extra = {
|
|
|
"organization_id": project.organization.id,
|
|
|
"project_id": project.id,
|
|
|
- "group_data": json.dumps(rows),
|
|
|
+ "node_keys": json.dumps(node_keys),
|
|
|
"error": e.message,
|
|
|
}
|
|
|
logger.exception(
|
|
@@ -413,6 +399,46 @@ def lookup_group_data_stacktrace_bulk(
|
|
|
)
|
|
|
raise
|
|
|
|
|
|
+ return bulk_data
|
|
|
+
|
|
|
+
|
|
|
+def make_nodestore_call_multithreaded(project, node_keys):
|
|
|
+ def process_chunk(chunk):
|
|
|
+ return _make_nodestore_call(project, chunk)
|
|
|
+
|
|
|
+ chunk_size = 5
|
|
|
+ chunks = [node_keys[i : i + chunk_size] for i in range(0, len(node_keys), chunk_size)]
|
|
|
+
|
|
|
+ bulk_data = {}
|
|
|
+ with ThreadPoolExecutor(max_workers=5) as executor:
|
|
|
+ future_to_chunk = {executor.submit(process_chunk, chunk): chunk for chunk in chunks}
|
|
|
+ for future in as_completed(future_to_chunk):
|
|
|
+ bulk_data.update(future.result())
|
|
|
+
|
|
|
+ return bulk_data
|
|
|
+
|
|
|
+
|
|
|
+@metrics.wraps(f"{BACKFILL_NAME}.lookup_event_bulk", sample_rate=1.0)
|
|
|
+@sentry_sdk.tracing.trace
|
|
|
+def lookup_group_data_stacktrace_bulk(
|
|
|
+ project: Project, rows: list[GroupEventRow]
|
|
|
+) -> dict[int, Event]:
|
|
|
+ project_id = project.id
|
|
|
+ node_id_to_group_data = {
|
|
|
+ Event.generate_node_id(project_id, event_id=row["event_id"]): (
|
|
|
+ row["event_id"],
|
|
|
+ row["group_id"],
|
|
|
+ )
|
|
|
+ for row in rows
|
|
|
+ }
|
|
|
+
|
|
|
+ groups_to_event = {}
|
|
|
+
|
|
|
+ if options.get("similarity.backfill_nodestore_use_multithread"):
|
|
|
+ bulk_data = make_nodestore_call_multithreaded(project, list(node_id_to_group_data.keys()))
|
|
|
+ else:
|
|
|
+ bulk_data = _make_nodestore_call(project, list(node_id_to_group_data.keys()))
|
|
|
+
|
|
|
for node_id, data in bulk_data.items():
|
|
|
if node_id in node_id_to_group_data:
|
|
|
event_id, group_id = (
|