Browse Source

ref(sessions): Remove deprecated get_filter usage [TET-226 TET-354] (#38438)

Refactors get_filter usage as it is deprecated,
and replaces it with SessionsV2QueryBuilder and
TimeseriesSessionsV2QueryBuilder that inherit from
QueryBuilder

This was previously reverted because it dropped
support for release semver version filters + 
we require better error handling for invalid 
search queries. 
This PR adds those missing pieces

Reverts getsentry/sentry#38431
Ahmed Etefy 2 years ago
parent
commit
c977357b73

+ 2 - 1
src/sentry/api/endpoints/organization_sessions.py

@@ -11,6 +11,7 @@ from sentry import release_health
 from sentry.api.bases import NoProjects, OrganizationEventsEndpointBase
 from sentry.api.paginator import GenericOffsetPaginator
 from sentry.api.utils import get_date_range_from_params
+from sentry.exceptions import InvalidSearchQuery
 from sentry.models import Organization
 from sentry.snuba.sessions_v2 import SNUBA_LIMIT, InvalidField, InvalidParams, QueryDefinition
 from sentry.utils.cursors import Cursor, CursorResult
@@ -80,7 +81,7 @@ class OrganizationSessionsEndpoint(OrganizationEventsEndpointBase):
             # TODO: this context manager should be decoupled from `OrganizationEventsEndpointBase`?
             with super().handle_query_errors():
                 yield
-        except (InvalidField, InvalidParams, NoProjects) as error:
+        except (InvalidField, InvalidParams, NoProjects, InvalidSearchQuery) as error:
             raise ParseError(detail=str(error))
 
 

+ 1 - 1
src/sentry/release_health/duplex.py

@@ -973,7 +973,7 @@ class DuplexReleaseHealthBackend(ReleaseHealthBackend):
         relative_hours = math.ceil((query.end - now).total_seconds() / 3600)
         sentry_tags = {"run_sessions_query.rel_end": f"{relative_hours}h"}
 
-        project_ids = query.filter_keys.get("project_id")
+        project_ids = query.params.get("project_id")
         if project_ids and len(project_ids) == 1:
             project_id = project_ids[0]
             sentry_tags["run_sessions_query.project_id"] = str(project_id)

+ 3 - 15
src/sentry/release_health/metrics_sessions_v2.py

@@ -40,7 +40,6 @@ from snuba_sdk import (
     Op,
 )
 from snuba_sdk.conditions import ConditionGroup
-from snuba_sdk.legacy import json_to_snql
 
 from sentry.api.utils import InvalidParams as UtilsInvalidParams
 from sentry.models import Release
@@ -54,7 +53,6 @@ from sentry.release_health.base import (
     SessionsQueryValue,
 )
 from sentry.sentry_metrics.configuration import UseCaseKey
-from sentry.snuba.dataset import EntityKey
 from sentry.snuba.metrics.datasource import get_series
 from sentry.snuba.metrics.naming_layer.public import SessionMetricKey
 from sentry.snuba.metrics.query import MetricField, MetricsQuery, OrderBy
@@ -415,7 +413,8 @@ def run_sessions_query(
     if not intervals:
         return _empty_result(query)
 
-    conditions = _get_filter_conditions(query.conditions)
+    conditions = query.get_filter_conditions()
+
     where, status_filter = _extract_status_filter_from_conditions(conditions)
     if status_filter == frozenset():
         # There was a condition that cannot be met, such as 'session:status:foo'
@@ -433,10 +432,7 @@ def run_sessions_query(
     if not fields:
         return _empty_result(query)
 
-    filter_keys = query.filter_keys.copy()
-    project_ids = filter_keys.pop("project_id")
-    assert not filter_keys
-
+    project_ids = query.params["project_id"]
     limit = Limit(query.limit) if query.limit else None
 
     ordered_preflight_filters: Dict[GroupByFieldName, Sequence[str]] = {}
@@ -702,14 +698,6 @@ def _empty_result(query: QueryDefinition) -> SessionsQueryResult:
     }
 
 
-def _get_filter_conditions(conditions: Any) -> ConditionGroup:
-    """Translate given conditions to snql"""
-    dummy_entity = EntityKey.MetricsSets.value
-    return json_to_snql(
-        {"selected_columns": ["value"], "conditions": conditions}, entity=dummy_entity
-    ).query.where
-
-
 def _extract_status_filter_from_conditions(
     conditions: ConditionGroup,
 ) -> Tuple[ConditionGroup, StatusFilter]:

+ 81 - 2
src/sentry/search/events/builder.py

@@ -1,6 +1,19 @@
 from collections import defaultdict
 from datetime import datetime, timedelta
-from typing import Any, Callable, Dict, List, Mapping, Match, Optional, Set, Tuple, Union, cast
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Mapping,
+    Match,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Union,
+    cast,
+)
 
 import sentry_sdk
 from django.utils import timezone
@@ -1165,7 +1178,7 @@ class QueryBuilder:
         if (
             search_filter.operator in ("!=", "NOT IN")
             and not search_filter.key.is_tag
-            and name != "event.type"
+            and name not in self.config.non_nullable_keys
         ):
             # Handle null columns on inequality comparisons. Any comparison
             # between a value and a null will result to null, so we need to
@@ -1645,12 +1658,78 @@ class HistogramQueryBuilder(QueryBuilder):
 
 
 class SessionsQueryBuilder(QueryBuilder):
+    # ToDo(ahmed): Rename this to AlertsSessionsQueryBuilder as it is exclusively used for crash rate alerts
     def resolve_params(self) -> List[WhereType]:
         conditions = super().resolve_params()
         conditions.append(Condition(self.column("org_id"), Op.EQ, self.organization_id))
         return conditions
 
 
+class SessionsV2QueryBuilder(QueryBuilder):
+    filter_allowlist_fields = {"project", "project_id", "environment", "release"}
+
+    def __init__(
+        self,
+        *args: Any,
+        granularity: Optional[int] = None,
+        extra_filter_allowlist_fields: Optional[Sequence[str]] = None,
+        **kwargs: Any,
+    ):
+        self._extra_filter_allowlist_fields = extra_filter_allowlist_fields or []
+        self.granularity = Granularity(granularity) if granularity is not None else None
+        super().__init__(*args, **kwargs)
+
+    def resolve_params(self) -> List[WhereType]:
+        conditions = super().resolve_params()
+        conditions.append(Condition(self.column("org_id"), Op.EQ, self.organization_id))
+        return conditions
+
+    def resolve_groupby(self, groupby_columns: Optional[List[str]] = None) -> List[SelectType]:
+        """
+        The default QueryBuilder `resolve_groupby` function needs to be overridden here because, it only adds the
+        columns in the groupBy clause to the query if the query has `aggregates` present in it. For this specific case
+        of the `sessions` dataset, the session fields are aggregates but these aggregate definitions are hidden away in
+        snuba so if we rely on the default QueryBuilder `resolve_groupby` method, then it won't add the requested
+        groupBy columns as it does not consider these fields as aggregates, and so we end up with clickhouse error that
+        the column is not under an aggregate function or in the `groupBy` basically.
+        """
+        if groupby_columns is None:
+            return []
+        return list({self.resolve_column(column) for column in groupby_columns})
+
+    def _default_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
+        name = search_filter.key.name
+        if name in self.filter_allowlist_fields or name in self._extra_filter_allowlist_fields:
+            return super()._default_filter_converter(search_filter)
+        raise InvalidSearchQuery(f"Invalid search filter: {name}")
+
+
+class TimeseriesSessionsV2QueryBuilder(SessionsV2QueryBuilder):
+    time_column = "bucketed_started"
+
+    def get_snql_query(self) -> Request:
+        self.validate_having_clause()
+
+        return Request(
+            dataset=self.dataset.value,
+            app_id="default",
+            query=Query(
+                match=Entity(self.dataset.value, sample=self.sample_rate),
+                select=[Column(self.time_column)] + self.columns,
+                array_join=self.array_join,
+                where=self.where,
+                having=self.having,
+                groupby=[Column(self.time_column)] + self.groupby,
+                orderby=self.orderby,
+                limit=self.limit,
+                offset=self.offset,
+                granularity=self.granularity,
+                limitby=self.limitby,
+            ),
+            flags=Flags(turbo=self.turbo),
+        )
+
+
 class MetricsQueryBuilder(QueryBuilder):
     def __init__(
         self,

+ 1 - 0
src/sentry/search/events/datasets/base.py

@@ -9,6 +9,7 @@ from sentry.search.events.types import SelectType, WhereType
 
 class DatasetConfig(abc.ABC):
     custom_threshold_columns = {}
+    non_nullable_keys = set()
     missing_function_error = InvalidSearchQuery
 
     @property

+ 7 - 177
src/sentry/search/events/datasets/discover.py

@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Callable, List, Mapping, Optional, Union
+from typing import Callable, Mapping, Optional, Union
 
 import sentry_sdk
 from django.utils.functional import cached_property
@@ -11,7 +11,6 @@ from snuba_sdk.function import Function, Identifier, Lambda
 
 from sentry.api.event_search import SearchFilter, SearchKey, SearchValue
 from sentry.exceptions import InvalidSearchQuery
-from sentry.models import Environment, Release, SemverFilter
 from sentry.models.group import Group
 from sentry.models.transaction_threshold import (
     TRANSACTION_METRICS,
@@ -29,15 +28,12 @@ from sentry.search.events.constants import (
     ISSUE_ALIAS,
     ISSUE_ID_ALIAS,
     MAX_QUERYABLE_TRANSACTION_THRESHOLDS,
-    MAX_SEARCH_RELEASES,
     MEASUREMENTS_FRAMES_FROZEN_RATE,
     MEASUREMENTS_FRAMES_SLOW_RATE,
     MEASUREMENTS_STALL_PERCENTAGE,
     MISERY_ALPHA,
     MISERY_BETA,
     NON_FAILURE_STATUS,
-    OPERATOR_NEGATION_MAP,
-    OPERATOR_TO_DJANGO,
     PROJECT_ALIAS,
     PROJECT_NAME_ALIAS,
     PROJECT_THRESHOLD_CONFIG_ALIAS,
@@ -47,7 +43,6 @@ from sentry.search.events.constants import (
     RELEASE_STAGE_ALIAS,
     SEMVER_ALIAS,
     SEMVER_BUILD_ALIAS,
-    SEMVER_EMPTY_RELEASE,
     SEMVER_PACKAGE_ALIAS,
     TEAM_KEY_TRANSACTION_ALIAS,
     TIMESTAMP_TO_DAY_ALIAS,
@@ -60,6 +55,9 @@ from sentry.search.events.constants import (
 )
 from sentry.search.events.datasets import field_aliases, filter_aliases
 from sentry.search.events.datasets.base import DatasetConfig
+from sentry.search.events.datasets.semver_and_stage_aliases import (
+    SemverAndStageFilterConverterMixin,
+)
 from sentry.search.events.fields import (
     ColumnArg,
     ColumnTagArg,
@@ -79,23 +77,18 @@ from sentry.search.events.fields import (
     normalize_percentile_alias,
     with_default,
 )
-from sentry.search.events.filter import (
-    _flip_field_sort,
-    handle_operator_negation,
-    parse_semver,
-    to_list,
-    translate_transaction_status,
-)
+from sentry.search.events.filter import to_list, translate_transaction_status
 from sentry.search.events.types import SelectType, WhereType
 from sentry.utils.numbers import format_grouped_length
 
 
-class DiscoverDatasetConfig(DatasetConfig):
+class DiscoverDatasetConfig(DatasetConfig, SemverAndStageFilterConverterMixin):
     custom_threshold_columns = {
         "apdex()",
         "count_miserable(user)",
         "user_misery()",
     }
+    non_nullable_keys = {"event.type"}
 
     def __init__(self, builder: QueryBuilder):
         self.builder = builder
@@ -1580,166 +1573,3 @@ class DiscoverDatasetConfig(DatasetConfig):
 
     def _key_transaction_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
         return filter_aliases.team_key_transaction_filter(self.builder, search_filter)
-
-    def _release_stage_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
-        """
-        Parses a release stage search and returns a snuba condition to filter to the
-        requested releases.
-        """
-        # TODO: Filter by project here as well. It's done elsewhere, but could critically limit versions
-        # for orgs with thousands of projects, each with their own releases (potentially drowning out ones we care about)
-
-        if "organization_id" not in self.builder.params:
-            raise ValueError("organization_id is a required param")
-
-        organization_id: int = self.builder.params["organization_id"]
-        project_ids: Optional[List[int]] = self.builder.params.get("project_id")
-        environments: Optional[List[Environment]] = self.builder.params.get(
-            "environment_objects", []
-        )
-        qs = (
-            Release.objects.filter_by_stage(
-                organization_id,
-                search_filter.operator,
-                search_filter.value.value,
-                project_ids=project_ids,
-                environments=environments,
-            )
-            .values_list("version", flat=True)
-            .order_by("date_added")[:MAX_SEARCH_RELEASES]
-        )
-        versions = list(qs)
-
-        if not versions:
-            # XXX: Just return a filter that will return no results if we have no versions
-            versions = [SEMVER_EMPTY_RELEASE]
-
-        return Condition(self.builder.column("release"), Op.IN, versions)
-
-    def _semver_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
-        """
-        Parses a semver query search and returns a snuba condition to filter to the
-        requested releases.
-
-        Since we only have semver information available in Postgres currently, we query
-        Postgres and return a list of versions to include/exclude. For most customers this
-        will work well, however some have extremely large numbers of releases, and we can't
-        pass them all to Snuba. To try and serve reasonable results, we:
-         - Attempt to query based on the initial semver query. If this returns
-           MAX_SEMVER_SEARCH_RELEASES results, we invert the query and see if it returns
-           fewer results. If so, we use a `NOT IN` snuba condition instead of an `IN`.
-         - Order the results such that the versions we return are semantically closest to
-           the passed filter. This means that when searching for `>= 1.0.0`, we'll return
-           version 1.0.0, 1.0.1, 1.1.0 before 9.x.x.
-        """
-        if "organization_id" not in self.builder.params:
-            raise ValueError("organization_id is a required param")
-
-        organization_id: int = self.builder.params["organization_id"]
-        project_ids: Optional[List[int]] = self.builder.params.get("project_id")
-        # We explicitly use `raw_value` here to avoid converting wildcards to shell values
-        version: str = search_filter.value.raw_value
-        operator: str = search_filter.operator
-
-        # Note that we sort this such that if we end up fetching more than
-        # MAX_SEMVER_SEARCH_RELEASES, we will return the releases that are closest to
-        # the passed filter.
-        order_by = Release.SEMVER_COLS
-        if operator.startswith("<"):
-            order_by = list(map(_flip_field_sort, order_by))
-        qs = (
-            Release.objects.filter_by_semver(
-                organization_id,
-                parse_semver(version, operator),
-                project_ids=project_ids,
-            )
-            .values_list("version", flat=True)
-            .order_by(*order_by)[:MAX_SEARCH_RELEASES]
-        )
-        versions = list(qs)
-        final_operator = Op.IN
-        if len(versions) == MAX_SEARCH_RELEASES:
-            # We want to limit how many versions we pass through to Snuba. If we've hit
-            # the limit, make an extra query and see whether the inverse has fewer ids.
-            # If so, we can do a NOT IN query with these ids instead. Otherwise, we just
-            # do our best.
-            operator = OPERATOR_NEGATION_MAP[operator]
-            # Note that the `order_by` here is important for index usage. Postgres seems
-            # to seq scan with this query if the `order_by` isn't included, so we
-            # include it even though we don't really care about order for this query
-            qs_flipped = (
-                Release.objects.filter_by_semver(organization_id, parse_semver(version, operator))
-                .order_by(*map(_flip_field_sort, order_by))
-                .values_list("version", flat=True)[:MAX_SEARCH_RELEASES]
-            )
-
-            exclude_versions = list(qs_flipped)
-            if exclude_versions and len(exclude_versions) < len(versions):
-                # Do a negative search instead
-                final_operator = Op.NOT_IN
-                versions = exclude_versions
-
-        if not versions:
-            # XXX: Just return a filter that will return no results if we have no versions
-            versions = [SEMVER_EMPTY_RELEASE]
-
-        return Condition(self.builder.column("release"), final_operator, versions)
-
-    def _semver_package_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
-        """
-        Applies a semver package filter to the search. Note that if the query returns more than
-        `MAX_SEARCH_RELEASES` here we arbitrarily return a subset of the releases.
-        """
-        if "organization_id" not in self.builder.params:
-            raise ValueError("organization_id is a required param")
-
-        organization_id: int = self.builder.params["organization_id"]
-        project_ids: Optional[List[int]] = self.builder.params.get("project_id")
-        package: str = search_filter.value.raw_value
-
-        versions = list(
-            Release.objects.filter_by_semver(
-                organization_id,
-                SemverFilter("exact", [], package),
-                project_ids=project_ids,
-            ).values_list("version", flat=True)[:MAX_SEARCH_RELEASES]
-        )
-
-        if not versions:
-            # XXX: Just return a filter that will return no results if we have no versions
-            versions = [SEMVER_EMPTY_RELEASE]
-
-        return Condition(self.builder.column("release"), Op.IN, versions)
-
-    def _semver_build_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
-        """
-        Applies a semver build filter to the search. Note that if the query returns more than
-        `MAX_SEARCH_RELEASES` here we arbitrarily return a subset of the releases.
-        """
-        if "organization_id" not in self.builder.params:
-            raise ValueError("organization_id is a required param")
-
-        organization_id: int = self.builder.params["organization_id"]
-        project_ids: Optional[List[int]] = self.builder.params.get("project_id")
-        build: str = search_filter.value.raw_value
-
-        operator, negated = handle_operator_negation(search_filter.operator)
-        try:
-            django_op = OPERATOR_TO_DJANGO[operator]
-        except KeyError:
-            raise InvalidSearchQuery("Invalid operation 'IN' for semantic version filter.")
-        versions = list(
-            Release.objects.filter_by_semver_build(
-                organization_id,
-                django_op,
-                build,
-                project_ids=project_ids,
-                negated=negated,
-            ).values_list("version", flat=True)[:MAX_SEARCH_RELEASES]
-        )
-
-        if not versions:
-            # XXX: Just return a filter that will return no results if we have no versions
-            versions = [SEMVER_EMPTY_RELEASE]
-
-        return Condition(self.builder.column("release"), Op.IN, versions)

+ 185 - 0
src/sentry/search/events/datasets/semver_and_stage_aliases.py

@@ -0,0 +1,185 @@
+from __future__ import annotations
+
+from abc import ABC
+from typing import List, Optional
+
+from snuba_sdk.conditions import Condition, Op
+
+from sentry.api.event_search import SearchFilter
+from sentry.exceptions import InvalidSearchQuery
+from sentry.models import Environment, Release, SemverFilter
+from sentry.search.events.constants import (
+    MAX_SEARCH_RELEASES,
+    OPERATOR_NEGATION_MAP,
+    OPERATOR_TO_DJANGO,
+    SEMVER_EMPTY_RELEASE,
+)
+from sentry.search.events.filter import _flip_field_sort, handle_operator_negation, parse_semver
+from sentry.search.events.types import WhereType
+
+
+class SemverAndStageFilterConverterMixin(ABC):
+    builder = None
+
+    def _release_stage_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
+        """
+        Parses a release stage search and returns a snuba condition to filter to the
+        requested releases.
+        """
+        # TODO: Filter by project here as well. It's done elsewhere, but could critically limit versions
+        # for orgs with thousands of projects, each with their own releases (potentially drowning out ones we care about)
+
+        if "organization_id" not in self.builder.params:
+            raise ValueError("organization_id is a required param")
+
+        organization_id: int = self.builder.params["organization_id"]
+        project_ids: Optional[List[int]] = self.builder.params.get("project_id")
+        environments: Optional[List[Environment]] = self.builder.params.get(
+            "environment_objects", []
+        )
+        qs = (
+            Release.objects.filter_by_stage(
+                organization_id,
+                search_filter.operator,
+                search_filter.value.value,
+                project_ids=project_ids,
+                environments=environments,
+            )
+            .values_list("version", flat=True)
+            .order_by("date_added")[:MAX_SEARCH_RELEASES]
+        )
+        versions = list(qs)
+
+        if not versions:
+            # XXX: Just return a filter that will return no results if we have no versions
+            versions = [SEMVER_EMPTY_RELEASE]
+
+        return Condition(self.builder.column("release"), Op.IN, versions)
+
+    def _semver_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
+        """
+        Parses a semver query search and returns a snuba condition to filter to the
+        requested releases.
+
+        Since we only have semver information available in Postgres currently, we query
+        Postgres and return a list of versions to include/exclude. For most customers this
+        will work well, however some have extremely large numbers of releases, and we can't
+        pass them all to Snuba. To try and serve reasonable results, we:
+         - Attempt to query based on the initial semver query. If this returns
+           MAX_SEMVER_SEARCH_RELEASES results, we invert the query and see if it returns
+           fewer results. If so, we use a `NOT IN` snuba condition instead of an `IN`.
+         - Order the results such that the versions we return are semantically closest to
+           the passed filter. This means that when searching for `>= 1.0.0`, we'll return
+           version 1.0.0, 1.0.1, 1.1.0 before 9.x.x.
+        """
+        if "organization_id" not in self.builder.params:
+            raise ValueError("organization_id is a required param")
+
+        organization_id: int = self.builder.params["organization_id"]
+        project_ids: Optional[List[int]] = self.builder.params.get("project_id")
+        # We explicitly use `raw_value` here to avoid converting wildcards to shell values
+        version: str = search_filter.value.raw_value
+        operator: str = search_filter.operator
+
+        # Note that we sort this such that if we end up fetching more than
+        # MAX_SEMVER_SEARCH_RELEASES, we will return the releases that are closest to
+        # the passed filter.
+        order_by = Release.SEMVER_COLS
+        if operator.startswith("<"):
+            order_by = list(map(_flip_field_sort, order_by))
+        qs = (
+            Release.objects.filter_by_semver(
+                organization_id,
+                parse_semver(version, operator),
+                project_ids=project_ids,
+            )
+            .values_list("version", flat=True)
+            .order_by(*order_by)[:MAX_SEARCH_RELEASES]
+        )
+        versions = list(qs)
+        final_operator = Op.IN
+        if len(versions) == MAX_SEARCH_RELEASES:
+            # We want to limit how many versions we pass through to Snuba. If we've hit
+            # the limit, make an extra query and see whether the inverse has fewer ids.
+            # If so, we can do a NOT IN query with these ids instead. Otherwise, we just
+            # do our best.
+            operator = OPERATOR_NEGATION_MAP[operator]
+            # Note that the `order_by` here is important for index usage. Postgres seems
+            # to seq scan with this query if the `order_by` isn't included, so we
+            # include it even though we don't really care about order for this query
+            qs_flipped = (
+                Release.objects.filter_by_semver(organization_id, parse_semver(version, operator))
+                .order_by(*map(_flip_field_sort, order_by))
+                .values_list("version", flat=True)[:MAX_SEARCH_RELEASES]
+            )
+
+            exclude_versions = list(qs_flipped)
+            if exclude_versions and len(exclude_versions) < len(versions):
+                # Do a negative search instead
+                final_operator = Op.NOT_IN
+                versions = exclude_versions
+
+        if not versions:
+            # XXX: Just return a filter that will return no results if we have no versions
+            versions = [SEMVER_EMPTY_RELEASE]
+
+        return Condition(self.builder.column("release"), final_operator, versions)
+
+    def _semver_package_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
+        """
+        Applies a semver package filter to the search. Note that if the query returns more than
+        `MAX_SEARCH_RELEASES` here we arbitrarily return a subset of the releases.
+        """
+        if "organization_id" not in self.builder.params:
+            raise ValueError("organization_id is a required param")
+
+        organization_id: int = self.builder.params["organization_id"]
+        project_ids: Optional[List[int]] = self.builder.params.get("project_id")
+        package: str = search_filter.value.raw_value
+
+        versions = list(
+            Release.objects.filter_by_semver(
+                organization_id,
+                SemverFilter("exact", [], package),
+                project_ids=project_ids,
+            ).values_list("version", flat=True)[:MAX_SEARCH_RELEASES]
+        )
+
+        if not versions:
+            # XXX: Just return a filter that will return no results if we have no versions
+            versions = [SEMVER_EMPTY_RELEASE]
+
+        return Condition(self.builder.column("release"), Op.IN, versions)
+
+    def _semver_build_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
+        """
+        Applies a semver build filter to the search. Note that if the query returns more than
+        `MAX_SEARCH_RELEASES` here we arbitrarily return a subset of the releases.
+        """
+        if "organization_id" not in self.builder.params:
+            raise ValueError("organization_id is a required param")
+
+        organization_id: int = self.builder.params["organization_id"]
+        project_ids: Optional[List[int]] = self.builder.params.get("project_id")
+        build: str = search_filter.value.raw_value
+
+        operator, negated = handle_operator_negation(search_filter.operator)
+        try:
+            django_op = OPERATOR_TO_DJANGO[operator]
+        except KeyError:
+            raise InvalidSearchQuery("Invalid operation 'IN' for semantic version filter.")
+        versions = list(
+            Release.objects.filter_by_semver_build(
+                organization_id,
+                django_op,
+                build,
+                project_ids=project_ids,
+                negated=negated,
+            ).values_list("version", flat=True)[:MAX_SEARCH_RELEASES]
+        )
+
+        if not versions:
+            # XXX: Just return a filter that will return no results if we have no versions
+            versions = [SEMVER_EMPTY_RELEASE]
+
+        return Condition(self.builder.column("release"), Op.IN, versions)

+ 17 - 2
src/sentry/search/events/datasets/sessions.py

@@ -4,14 +4,25 @@ from snuba_sdk import Function
 
 from sentry.api.event_search import SearchFilter
 from sentry.search.events.builder import QueryBuilder
-from sentry.search.events.constants import RELEASE_ALIAS
+from sentry.search.events.constants import (
+    RELEASE_ALIAS,
+    RELEASE_STAGE_ALIAS,
+    SEMVER_ALIAS,
+    SEMVER_BUILD_ALIAS,
+    SEMVER_PACKAGE_ALIAS,
+)
 from sentry.search.events.datasets import filter_aliases
 from sentry.search.events.datasets.base import DatasetConfig
+from sentry.search.events.datasets.semver_and_stage_aliases import (
+    SemverAndStageFilterConverterMixin,
+)
 from sentry.search.events.fields import SessionColumnArg, SnQLFunction
 from sentry.search.events.types import SelectType, WhereType
 
 
-class SessionsDatasetConfig(DatasetConfig):
+class SessionsDatasetConfig(DatasetConfig, SemverAndStageFilterConverterMixin):
+    non_nullable_keys = {"project", "project_id", "environment", "release"}
+
     def __init__(self, builder: QueryBuilder):
         self.builder = builder
 
@@ -21,6 +32,10 @@ class SessionsDatasetConfig(DatasetConfig):
     ) -> Mapping[str, Callable[[SearchFilter], Optional[WhereType]]]:
         return {
             RELEASE_ALIAS: self._release_filter_converter,
+            RELEASE_STAGE_ALIAS: self._release_stage_filter_converter,
+            SEMVER_ALIAS: self._semver_filter_converter,
+            SEMVER_PACKAGE_ALIAS: self._semver_package_filter_converter,
+            SEMVER_BUILD_ALIAS: self._semver_build_filter_converter,
         }
 
     @property

+ 4 - 10
src/sentry/snuba/metrics/query.py

@@ -5,7 +5,7 @@ from dataclasses import dataclass
 from datetime import datetime, timedelta
 from typing import Literal, Optional, Sequence, Set, Union
 
-from snuba_sdk import Column, Direction, Function, Granularity, Limit, Offset
+from snuba_sdk import Column, Direction, Granularity, Limit, Offset
 from snuba_sdk.conditions import Condition, ConditionGroup
 
 from sentry.api.utils import InvalidParams
@@ -134,16 +134,10 @@ class MetricsQuery(MetricsQueryValidationRunner):
         for condition in self.where:
             if (
                 isinstance(condition, Condition)
-                and isinstance(condition.lhs, Function)
-                and condition.lhs.function == "ifNull"
+                and isinstance(condition.lhs, Column)
+                and condition.lhs.name in UNALLOWED_TAGS
             ):
-                parameter = condition.lhs.parameters[0]
-                if isinstance(parameter, Column) and parameter.name.startswith(
-                    ("tags_raw[", "tags[")
-                ):
-                    tag_name = parameter.name.split("[")[1].split("]")[0]
-                    if tag_name in UNALLOWED_TAGS:
-                        raise InvalidParams(f"Tag name {tag_name} is not a valid query filter")
+                raise InvalidParams(f"Tag name {condition.lhs.name} is not a valid query filter")
 
     def validate_orderby(self) -> None:
         if not self.orderby:

+ 77 - 80
src/sentry/snuba/sessions_v2.py

@@ -5,12 +5,13 @@ from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional, Tuple
 
 import pytz
+from snuba_sdk import Column, Condition, Function, Limit, Op
 
 from sentry.api.utils import get_date_range_from_params
 from sentry.release_health.base import AllowedResolution, SessionsQueryConfig
-from sentry.search.events.filter import get_filter
+from sentry.search.events.builder import SessionsV2QueryBuilder, TimeseriesSessionsV2QueryBuilder
 from sentry.utils.dates import parse_stats_period, to_datetime, to_timestamp
-from sentry.utils.snuba import Dataset, raw_query, resolve_condition
+from sentry.utils.snuba import Dataset
 
 logger = logging.getLogger(__name__)
 
@@ -220,24 +221,12 @@ GROUPBY_MAP = {
     "session.status": SessionStatusGroupBy(),
 }
 
-CONDITION_COLUMNS = ["project", "project_id", "environment", "release"]
-FILTER_KEY_COLUMNS = ["project_id"]
 
-
-def resolve_column(col, extra_columns=None):
-    condition_columns = CONDITION_COLUMNS + (extra_columns or [])
-    if col in condition_columns:
-        return col
-    raise InvalidField(f'Invalid query field: "{col}"')
-
-
-def resolve_filter_key(col):
-    if col in FILTER_KEY_COLUMNS:
-        return col
-    raise InvalidField(f'Invalid query field: "{col}"')
+class InvalidField(Exception):
+    pass
 
 
-class InvalidField(Exception):
+class ZeroIntervalsException(Exception):
     pass
 
 
@@ -262,6 +251,7 @@ class QueryDefinition:
         self.raw_orderby = query.getlist("orderBy")  # only respected by metrics implementation
         self.limit = limit
         self.offset = offset
+        self._query_config = query_config
 
         if len(raw_fields) == 0:
             raise InvalidField('Request is missing a "field"')
@@ -311,28 +301,49 @@ class QueryDefinition:
             query_groupby.update(groupby.get_snuba_groupby())
         self.query_groupby = list(query_groupby)
 
-        # the `params` are:
-        # project_id, organization_id, environment;
-        # also: start, end; but we got those ourselves.
-        snuba_filter = get_filter(self.query, params)
-
-        # this makes sure that literals in complex queries are properly quoted,
-        # and unknown fields are raised as errors
-        if query_config.allow_session_status_query:
-            # NOTE: "''" is added because we use the event search parser, which
-            # resolves "session.status" to ifNull(..., "''")
-            column_resolver = lambda col: resolve_column(col, ["session.status", "''"])
-        else:
-            column_resolver = resolve_column
-
-        conditions = [resolve_condition(c, column_resolver) for c in snuba_filter.conditions]
-        filter_keys = {
-            resolve_filter_key(key): value for key, value in snuba_filter.filter_keys.items()
+    def to_query_builder_dict(self, orderby=None):
+        num_intervals = len(get_timestamps(self))
+        if num_intervals == 0:
+            raise ZeroIntervalsException
+
+        max_groups = SNUBA_LIMIT // num_intervals
+
+        query_builder_dict = {
+            "dataset": Dataset.Sessions,
+            "params": {
+                **self.params,
+                "start": self.start,
+                "end": self.end,
+            },
+            "selected_columns": self.query_columns,
+            "groupby_columns": self.query_groupby,
+            "query": self.query,
+            "orderby": orderby,
+            "limit": max_groups,
+            "auto_aggregations": True,
+            "granularity": self.rollup,
         }
-
-        self.aggregations = snuba_filter.aggregations
-        self.conditions = conditions
-        self.filter_keys = filter_keys
+        if self._query_config.allow_session_status_query:
+            query_builder_dict.update({"extra_filter_allowlist_fields": ["session.status"]})
+        return query_builder_dict
+
+    def get_filter_conditions(self):
+        """
+        Returns filter conditions for the query to be used for metrics queries, and hence excluding timestamp and
+        organization id condition that are later added by the metrics layer.
+        """
+        conditions = SessionsV2QueryBuilder(**self.to_query_builder_dict()).where
+        filter_conditions = []
+        for condition in conditions:
+            # Exclude sessions "started" timestamp condition and org_id condition, as it is not needed for metrics queries.
+            if (
+                isinstance(condition, Condition)
+                and isinstance(condition.lhs, Column)
+                and condition.lhs.name in ["started", "org_id"]
+            ):
+                continue
+            filter_conditions.append(condition)
+        return filter_conditions
 
     def __repr__(self):
         return f"{self.__class__.__name__}({repr(self.__dict__)})"
@@ -465,65 +476,51 @@ def _run_sessions_query(query):
     `totals` and again for the actual time-series data grouped by the requested
     interval.
     """
-
-    num_intervals = len(get_timestamps(query))
-    if num_intervals == 0:
-        return [], []
-
     # We only return the top-N groups, based on the first field that is being
     # queried, assuming that those are the most relevant to the user.
     # In a future iteration we might expose an `orderBy` query parameter.
     orderby = [f"-{query.primary_column}"]
-    max_groups = SNUBA_LIMIT // num_intervals
-
-    result_totals = raw_query(
-        dataset=Dataset.Sessions,
-        selected_columns=query.query_columns,
-        groupby=query.query_groupby,
-        aggregations=query.aggregations,
-        conditions=query.conditions,
-        filter_keys=query.filter_keys,
-        start=query.start,
-        end=query.end,
-        rollup=query.rollup,
-        orderby=orderby,
-        limit=max_groups,
-        referrer="sessions.totals",
-    )
 
-    totals = result_totals["data"]
-    if not totals:
+    try:
+        query_builder_dict = query.to_query_builder_dict(orderby=orderby)
+    except ZeroIntervalsException:
+        return [], []
+
+    result_totals = SessionsV2QueryBuilder(**query_builder_dict).run_query("sessions.totals")[
+        "data"
+    ]
+    if not result_totals:
         # No need to query time series if totals is already empty
         return [], []
 
     # We only get the time series for groups which also have a total:
     if query.query_groupby:
         # E.g. (release, environment) IN [(1, 2), (3, 4), ...]
-        groups = {tuple(row[column] for column in query.query_groupby) for row in totals}
-        extra_conditions = [[["tuple", query.query_groupby], "IN", groups]] + [
-            # This condition is redundant but might lead to better query performance
-            # Eg. [release IN [1, 3]], [environment IN [2, 4]]
-            [column, "IN", {row[column] for row in totals}]
+        groups = {tuple(row[column] for column in query.query_groupby) for row in result_totals}
+
+        extra_conditions = [
+            Condition(
+                Function("tuple", [Column(col) for col in query.query_groupby]),
+                Op.IN,
+                Function("tuple", list(groups)),
+            )
+        ] + [
+            Condition(
+                Column(column),
+                Op.IN,
+                Function("tuple", list({row[column] for row in result_totals})),
+            )
             for column in query.query_groupby
         ]
     else:
         extra_conditions = []
 
-    result_timeseries = raw_query(
-        dataset=Dataset.Sessions,
-        selected_columns=[TS_COL] + query.query_columns,
-        groupby=[TS_COL] + query.query_groupby,
-        aggregations=query.aggregations,
-        conditions=query.conditions + extra_conditions,
-        filter_keys=query.filter_keys,
-        start=query.start,
-        end=query.end,
-        rollup=query.rollup,
-        limit=SNUBA_LIMIT,
-        referrer="sessions.timeseries",
-    )
+    timeseries_query_builder = TimeseriesSessionsV2QueryBuilder(**query_builder_dict)
+    timeseries_query_builder.where.extend(extra_conditions)
+    timeseries_query_builder.limit = Limit(SNUBA_LIMIT)
+    result_timeseries = timeseries_query_builder.run_query("sessions.timeseries")["data"]
 
-    return totals, result_timeseries["data"]
+    return result_totals, result_timeseries
 
 
 def massage_sessions_result(

Some files were not shown because too many files changed in this diff