|
@@ -6,7 +6,6 @@ import logging
|
|
|
import math
|
|
|
import pytz
|
|
|
import time
|
|
|
-from collections import defaultdict
|
|
|
from datetime import timedelta, datetime
|
|
|
|
|
|
from django.utils import timezone
|
|
@@ -14,7 +13,7 @@ from django.utils import timezone
|
|
|
from sentry import options
|
|
|
from sentry.api.paginator import SequencePaginator, Paginator
|
|
|
from sentry.event_manager import ALLOWED_FUTURE_DELTA
|
|
|
-from sentry.models import Release, Group, GroupEnvironment, GroupHash
|
|
|
+from sentry.models import Release, Group, GroupEnvironment
|
|
|
from sentry.search.django import backend as ds
|
|
|
from sentry.utils import snuba, metrics
|
|
|
from sentry.utils.dates import to_timestamp
|
|
@@ -37,14 +36,14 @@ def snuba_str_to_datetime(d):
|
|
|
|
|
|
|
|
|
def calculate_priority_cursor(data):
|
|
|
- times_seen = sum(data['times_seen'])
|
|
|
- last_seen = max(int(to_timestamp(snuba_str_to_datetime(d)) * 1000) for d in data['last_seen'])
|
|
|
+ times_seen = data['times_seen']
|
|
|
+ last_seen = int(to_timestamp(snuba_str_to_datetime(data['last_seen'])) * 1000)
|
|
|
return ((math.log(times_seen) * 600) + last_seen)
|
|
|
|
|
|
|
|
|
-def _datetime_cursor_calculator(field, fn):
|
|
|
+def _datetime_cursor_calculator(field):
|
|
|
def calculate(data):
|
|
|
- datetime = fn(snuba_str_to_datetime(d) for d in data[field])
|
|
|
+ datetime = snuba_str_to_datetime(data[field])
|
|
|
return int(to_timestamp(datetime) * 1000)
|
|
|
|
|
|
return calculate
|
|
@@ -60,13 +59,13 @@ sort_strategies = {
|
|
|
'priority', ['last_seen', 'times_seen'], calculate_priority_cursor,
|
|
|
),
|
|
|
'date': (
|
|
|
- 'last_seen', [], _datetime_cursor_calculator('last_seen', max),
|
|
|
+ 'last_seen', [], _datetime_cursor_calculator('last_seen'),
|
|
|
),
|
|
|
'new': (
|
|
|
- 'first_seen', [], _datetime_cursor_calculator('first_seen', min),
|
|
|
+ 'first_seen', [], _datetime_cursor_calculator('first_seen'),
|
|
|
),
|
|
|
'freq': (
|
|
|
- 'times_seen', [], lambda data: sum(data['times_seen']),
|
|
|
+ 'times_seen', [], lambda data: data['times_seen'],
|
|
|
),
|
|
|
}
|
|
|
|
|
@@ -221,29 +220,25 @@ class SnubaSearchBackend(ds.DjangoSearchBackend):
|
|
|
parameters,
|
|
|
)
|
|
|
|
|
|
- # maximum number of GroupHashes to send down to Snuba,
|
|
|
- # if more GroupHash candidates are found, a "bare" Snuba
|
|
|
+ # maximum number of Group IDs to send down to Snuba,
|
|
|
+ # if more Group ID candidates are found, a "bare" Snuba
|
|
|
# search is performed and the result groups are then
|
|
|
# post-filtered via queries to the Sentry DB
|
|
|
max_pre_snuba_candidates = options.get('snuba.search.max-pre-snuba-candidates')
|
|
|
|
|
|
# pre-filter query
|
|
|
- candidate_hashes = None
|
|
|
+ candidate_ids = None
|
|
|
if max_pre_snuba_candidates and limit <= max_pre_snuba_candidates:
|
|
|
- candidate_hashes = dict(
|
|
|
- GroupHash.objects.filter(
|
|
|
- group__in=group_queryset
|
|
|
- ).values_list(
|
|
|
- 'hash', 'group_id'
|
|
|
- )[:max_pre_snuba_candidates + 1]
|
|
|
+ candidate_ids = list(
|
|
|
+ group_queryset.values_list('id', flat=True)[:max_pre_snuba_candidates + 1]
|
|
|
)
|
|
|
- metrics.timing('snuba.search.num_candidates', len(candidate_hashes))
|
|
|
+ metrics.timing('snuba.search.num_candidates', len(candidate_ids))
|
|
|
|
|
|
- if not candidate_hashes:
|
|
|
+ if not candidate_ids:
|
|
|
# no matches could possibly be found from this point on
|
|
|
metrics.incr('snuba.search.no_candidates')
|
|
|
return Paginator(Group.objects.none()).get_result()
|
|
|
- elif len(candidate_hashes) > max_pre_snuba_candidates:
|
|
|
+ elif len(candidate_ids) > max_pre_snuba_candidates:
|
|
|
# If the pre-filter query didn't include anything to significantly
|
|
|
# filter down the number of results (from 'first_release', 'query',
|
|
|
# 'status', 'bookmarked_by', 'assigned_to', 'unassigned',
|
|
@@ -254,7 +249,7 @@ class SnubaSearchBackend(ds.DjangoSearchBackend):
|
|
|
# this queryset to the results from Snuba, which we call
|
|
|
# post-filtering.
|
|
|
metrics.incr('snuba.search.too_many_candidates')
|
|
|
- candidate_hashes = None
|
|
|
+ candidate_ids = None
|
|
|
|
|
|
sort, extra_aggregations, score_fn = sort_strategies[sort_by]
|
|
|
|
|
@@ -277,7 +272,7 @@ class SnubaSearchBackend(ds.DjangoSearchBackend):
|
|
|
# to answer the query (or hit the end of possible results). We do
|
|
|
# this because a common case for search is to return 100 groups
|
|
|
# sorted by `last_seen`, and we want to avoid returning all of
|
|
|
- # a project's hashes and then post-sorting them all in Postgres
|
|
|
+ # a project's groups and then post-sorting them all in Postgres
|
|
|
# when typically the first N results will do.
|
|
|
while (time.time() - time_start) < max_time:
|
|
|
num_chunks += 1
|
|
@@ -285,8 +280,8 @@ class SnubaSearchBackend(ds.DjangoSearchBackend):
|
|
|
# grow the chunk size on each iteration to account for huge projects
|
|
|
# and weird queries, up to a max size
|
|
|
chunk_limit = min(int(chunk_limit * chunk_growth), max_chunk_size)
|
|
|
- # but if we have candidate_hashes always query for at least that many items
|
|
|
- chunk_limit = max(chunk_limit, len(candidate_hashes) if candidate_hashes else 0)
|
|
|
+ # but if we have candidate_ids always query for at least that many items
|
|
|
+ chunk_limit = max(chunk_limit, len(candidate_ids) if candidate_ids else 0)
|
|
|
|
|
|
# {group_id: group_score, ...}
|
|
|
snuba_groups, more_results = snuba_search(
|
|
@@ -298,7 +293,7 @@ class SnubaSearchBackend(ds.DjangoSearchBackend):
|
|
|
sort=sort,
|
|
|
extra_aggregations=extra_aggregations,
|
|
|
score_fn=score_fn,
|
|
|
- candidate_hashes=candidate_hashes,
|
|
|
+ candidate_ids=candidate_ids,
|
|
|
limit=chunk_limit,
|
|
|
offset=offset,
|
|
|
**parameters
|
|
@@ -309,7 +304,7 @@ class SnubaSearchBackend(ds.DjangoSearchBackend):
|
|
|
if not snuba_groups:
|
|
|
break
|
|
|
|
|
|
- if candidate_hashes:
|
|
|
+ if candidate_ids:
|
|
|
# pre-filtered candidates were passed down to Snuba,
|
|
|
# so we're finished with filtering and these are the
|
|
|
# only results
|
|
@@ -350,7 +345,7 @@ class SnubaSearchBackend(ds.DjangoSearchBackend):
|
|
|
# something other than partially leak internal paginator logic up to here.
|
|
|
# Or make separate Paginator implementation just for Snuba search?
|
|
|
if cursor is not None \
|
|
|
- and not candidate_hashes \
|
|
|
+ and not candidate_ids \
|
|
|
and more_results:
|
|
|
if cursor.is_prev and min_score < cursor.value:
|
|
|
continue
|
|
@@ -367,7 +362,7 @@ class SnubaSearchBackend(ds.DjangoSearchBackend):
|
|
|
# * we started with Postgres candidates and so only do one Snuba query max
|
|
|
# * the paginator is returning enough results to satisfy the query (>= the limit)
|
|
|
# * there are no more groups in Snuba to post-filter
|
|
|
- if candidate_hashes \
|
|
|
+ if candidate_ids \
|
|
|
or len(paginator_results.results) >= limit \
|
|
|
or not more_results:
|
|
|
break
|
|
@@ -381,7 +376,7 @@ class SnubaSearchBackend(ds.DjangoSearchBackend):
|
|
|
|
|
|
|
|
|
def snuba_search(project_id, environment_id, tags, start, end,
|
|
|
- sort, extra_aggregations, score_fn, candidate_hashes,
|
|
|
+ sort, extra_aggregations, score_fn, candidate_ids,
|
|
|
limit, offset, **parameters):
|
|
|
"""
|
|
|
This function doesn't strictly benefit from or require being pulled out of the main
|
|
@@ -402,8 +397,8 @@ def snuba_search(project_id, environment_id, tags, start, end,
|
|
|
if environment_id is not None:
|
|
|
filters['environment'] = [environment_id]
|
|
|
|
|
|
- if candidate_hashes is not None:
|
|
|
- filters['primary_hash'] = candidate_hashes.keys()
|
|
|
+ if candidate_ids is not None:
|
|
|
+ filters['issue'] = candidate_ids
|
|
|
|
|
|
having = SnubaConditionBuilder({
|
|
|
'age_from': ScalarCondition('first_seen', '>'),
|
|
@@ -434,17 +429,17 @@ def snuba_search(project_id, environment_id, tags, start, end,
|
|
|
for alias in required_aggregations:
|
|
|
aggregations.append(aggregation_defs[alias] + [alias])
|
|
|
|
|
|
- # {hash -> {<agg_alias> -> <agg_value>,
|
|
|
- # <agg_alias> -> <agg_value>,
|
|
|
- # ...},
|
|
|
+ # {group_id -> {<agg_alias> -> <agg_value>,
|
|
|
+ # <agg_alias> -> <agg_value>,
|
|
|
+ # ...},
|
|
|
# ...}
|
|
|
# _OR_ if there's only one <agg_alias> in use
|
|
|
- # {hash -> <agg_value>,
|
|
|
+ # {group_id -> <agg_value>,
|
|
|
# ...}
|
|
|
snuba_results = snuba.query(
|
|
|
start=start,
|
|
|
end=end,
|
|
|
- groupby=['primary_hash'],
|
|
|
+ groupby=['issue'],
|
|
|
conditions=conditions,
|
|
|
having=having,
|
|
|
filter_keys=filters,
|
|
@@ -454,57 +449,32 @@ def snuba_search(project_id, environment_id, tags, start, end,
|
|
|
limit=limit + 1,
|
|
|
offset=offset,
|
|
|
)
|
|
|
- metrics.timing('snuba.search.num_result_hashes', len(snuba_results.keys()))
|
|
|
+ metrics.timing('snuba.search.num_result_groups', len(snuba_results.keys()))
|
|
|
more_results = len(snuba_results) == limit + 1
|
|
|
|
|
|
- # {hash -> group_id, ...}
|
|
|
- if candidate_hashes is not None:
|
|
|
- # any hash coming back had to come from our candidate set
|
|
|
- hash_to_group = candidate_hashes
|
|
|
- else:
|
|
|
- hash_to_group = dict(
|
|
|
- GroupHash.objects.filter(
|
|
|
- project_id=project_id,
|
|
|
- hash__in=snuba_results.keys()
|
|
|
- ).values_list(
|
|
|
- 'hash', 'group_id'
|
|
|
- )
|
|
|
- )
|
|
|
-
|
|
|
- # {group_id -> {field1: [...all values from field1 for all hashes...],
|
|
|
- # field2: [...all values from field2 for all hashes...]
|
|
|
+ # {group_id -> {field1: value1,
|
|
|
+ # field2: value2,
|
|
|
# ...}
|
|
|
# ...}
|
|
|
group_data = {}
|
|
|
- for hash, obj in snuba_results.items():
|
|
|
- if hash in hash_to_group:
|
|
|
- group_id = hash_to_group[hash]
|
|
|
-
|
|
|
- if group_id not in group_data:
|
|
|
- group_data[group_id] = defaultdict(list)
|
|
|
-
|
|
|
- dest = group_data[group_id]
|
|
|
-
|
|
|
- # NOTE: The Snuba utility code is trying to be helpful by collapsing
|
|
|
- # results with only one aggregate down to the single value. It's a
|
|
|
- # bit of a hack that we then immediately undo that work here, but
|
|
|
- # many other callers get value out of that functionality. If we see
|
|
|
- # this pattern again we should either add an option to opt-out of
|
|
|
- # the 'help' here or remove it from the Snuba code altogether.
|
|
|
- if len(required_aggregations) == 1:
|
|
|
- alias = list(required_aggregations)[0]
|
|
|
- dest[alias].append(obj)
|
|
|
- else:
|
|
|
- for k, v in obj.items():
|
|
|
- dest[k].append(v)
|
|
|
+ for group_id, obj in snuba_results.items():
|
|
|
+ if group_id not in group_data:
|
|
|
+ group_data[group_id] = {}
|
|
|
+
|
|
|
+ dest = group_data[group_id]
|
|
|
+
|
|
|
+ # NOTE: The Snuba utility code is trying to be helpful by collapsing
|
|
|
+ # results with only one aggregate down to the single value. It's a
|
|
|
+ # bit of a hack that we then immediately undo that work here, but
|
|
|
+ # many other callers get value out of that functionality. If we see
|
|
|
+ # this pattern again we should either add an option to opt-out of
|
|
|
+ # the 'help' here or remove it from the Snuba code altogether.
|
|
|
+ if len(required_aggregations) == 1:
|
|
|
+ alias = list(required_aggregations)[0]
|
|
|
+ dest[alias] = obj
|
|
|
else:
|
|
|
- logger.warning(
|
|
|
- 'search.hash_not_found',
|
|
|
- extra={
|
|
|
- 'project_id': project_id,
|
|
|
- 'hash': hash,
|
|
|
- },
|
|
|
- )
|
|
|
+ for k, v in obj.items():
|
|
|
+ dest[k] = v
|
|
|
|
|
|
return (
|
|
|
list(
|