Browse Source

feat(discover): Only transform when ordering project (#39468)

- This updates the querybuilder with a orderby resolver so we can
implement more custom orderbys(orderbies?) in the future
- This changes the project field to just select the project_id only,
which results in needing a new post-processing capability to the
querybuilder
- This is done via the `value_resolver_map` and the `meta_resolver_map`
- Removed the snuba_filter param from transform_results since we no
longer use it
- Removes the old discover 1 query since it shouldn't be supported and
no longer is being used
- Needed to update ds code too since it relied on the old project
behaviour but doesn't use `discover.query`
William Mak 2 years ago
parent
commit
bf416f7ad2

+ 1 - 1
src/sentry/api/endpoints/organization_events_facets_performance.py

@@ -456,7 +456,7 @@ def query_facet_performance(
         )
 
         results = tag_query.run_query(f"{referrer}.tag_values".format(referrer, "tag_values"))
-        results = discover.transform_results(results, tag_query, {}, None)
+        results = discover.transform_results(results, tag_query, {})
 
         return results
 

+ 1 - 1
src/sentry/api/endpoints/organization_events_trace.py

@@ -256,7 +256,7 @@ def query_trace_data(
         referrer="api.trace-view.get-events",
     )
     transformed_results = [
-        discover.transform_results(result, query, {}, None)["data"]
+        discover.transform_results(result, query, {})["data"]
         for result, query in zip(results, [transaction_query, error_query])
     ]
     return cast(Sequence[SnubaTransaction], transformed_results[0]), cast(

+ 1 - 1
src/sentry/api/endpoints/organization_events_trends.py

@@ -490,7 +490,7 @@ class OrganizationEventsTrendsEndpointBase(OrganizationEventsV2EndpointBase):
                 trend_query.get_snql_query(),
                 referrer="api.trends.get-percentage-change",
             )
-            result = discover.transform_results(result, trend_query, {}, None)
+            result = discover.transform_results(result, trend_query, {})
             return result
 
         with self.handle_query_errors():

+ 14 - 1
src/sentry/api/endpoints/project_dynamic_sampling.py

@@ -65,6 +65,19 @@ class ProjectDynamicSamplingDistributionEndpoint(ProjectEndpoint):
             referrer=referrer,
         )["data"]
 
+    def transform_discover_data(self, data, builder):
+        transformed_data = []
+        for row in data:
+            transformed_data.append(
+                {
+                    key: builder.value_resolver_map[key](value)
+                    if key in builder.value_resolver_map
+                    else value
+                    for key, value in row.items()
+                }
+            )
+        return transformed_data
+
     def __project_stats_query(self, query, projects_in_org, org_id, query_time_range, trace_ids):
         """
         Smart query to get:
@@ -115,7 +128,7 @@ class ProjectDynamicSamplingDistributionEndpoint(ProjectEndpoint):
             SnubaRequest(dataset=Dataset.Discover.value, app_id="default", query=snuba_query),
             referrer=Referrer.DYNAMIC_SAMPLING_DISTRIBUTION_FETCH_PROJECT_STATS.value,
         )["data"]
-        return data
+        return self.transform_discover_data(data, builder)
 
     def __get_transactions_count(self, project, query, sample_size, query_time_range):
         # Run query that gets total count of transactions with these conditions for the specified

+ 0 - 6
src/sentry/api/urls.py

@@ -25,7 +25,6 @@ from sentry.discover.endpoints.discover_key_transactions import (
     KeyTransactionEndpoint,
     KeyTransactionListEndpoint,
 )
-from sentry.discover.endpoints.discover_query import DiscoverQueryEndpoint
 from sentry.discover.endpoints.discover_saved_queries import DiscoverSavedQueriesEndpoint
 from sentry.discover.endpoints.discover_saved_query_detail import (
     DiscoverSavedQueryDetailEndpoint,
@@ -962,11 +961,6 @@ urlpatterns = [
                     name="sentry-api-0-organization-codeowners-associations",
                 ),
                 # Discover
-                url(
-                    r"^(?P<organization_slug>[^\/]+)/discover/query/$",
-                    DiscoverQueryEndpoint.as_view(),
-                    name="sentry-api-0-discover-query",
-                ),
                 url(
                     r"^(?P<organization_slug>[^\/]+)/discover/homepage/$",
                     DiscoverHomepageQueryEndpoint.as_view(),

+ 0 - 2
src/sentry/apidocs/public_exclusion_list.py

@@ -451,7 +451,6 @@ from sentry.discover.endpoints.discover_key_transactions import (
     KeyTransactionEndpoint,
     KeyTransactionListEndpoint,
 )
-from sentry.discover.endpoints.discover_query import DiscoverQueryEndpoint
 from sentry.discover.endpoints.discover_saved_queries import DiscoverSavedQueriesEndpoint
 from sentry.discover.endpoints.discover_saved_query_detail import (
     DiscoverSavedQueryDetailEndpoint,
@@ -671,7 +670,6 @@ __EXCLUDED_FROM_PUBLIC_ENDPOINTS = {
     OrganizationCodeMappingsEndpoint,
     OrganizationCodeMappingDetailsEndpoint,
     OrganizationCodeMappingCodeOwnersEndpoint,
-    DiscoverQueryEndpoint,
     DiscoverSavedQueriesEndpoint,
     DiscoverSavedQueryDetailEndpoint,
     DiscoverSavedQueryVisitEndpoint,

+ 0 - 173
src/sentry/discover/endpoints/discover_query.py

@@ -1,173 +0,0 @@
-import logging
-from copy import deepcopy
-from functools import partial
-
-from rest_framework.request import Request
-from rest_framework.response import Response
-
-from sentry import features
-from sentry.api.base import pending_silo_endpoint
-from sentry.api.bases import OrganizationEndpoint
-from sentry.api.bases.organization import OrganizationPermission
-from sentry.api.exceptions import ResourceDoesNotExist
-from sentry.api.paginator import GenericOffsetPaginator
-from sentry.discover.utils import transform_aliases_and_query
-from sentry.types.ratelimit import RateLimit, RateLimitCategory
-from sentry.utils import snuba
-
-from .serializers import DiscoverQuerySerializer
-
-logger = logging.getLogger(__name__)
-
-
-class DiscoverQueryPermission(OrganizationPermission):
-    scope_map = {"POST": ["org:read", "project:read"]}
-
-
-@pending_silo_endpoint
-class DiscoverQueryEndpoint(OrganizationEndpoint):
-    permission_classes = (DiscoverQueryPermission,)
-    enforce_rate_limit = True
-
-    rate_limits = {
-        "POST": {
-            RateLimitCategory.IP: RateLimit(4, 1),
-            RateLimitCategory.USER: RateLimit(4, 1),
-            RateLimitCategory.ORGANIZATION: RateLimit(4, 1),
-        }
-    }
-
-    def has_feature(self, request: Request, organization):
-        return features.has(
-            "organizations:discover", organization, actor=request.user
-        ) or features.has("organizations:discover-basic", organization, actor=request.user)
-
-    def handle_results(self, snuba_results, requested_query, projects):
-        if "project.name" in requested_query["selected_columns"]:
-            project_name_index = requested_query["selected_columns"].index("project.name")
-            snuba_results["meta"].insert(
-                project_name_index, {"name": "project.name", "type": "String"}
-            )
-            if "project.id" not in requested_query["selected_columns"]:
-                snuba_results["meta"] = [
-                    field for field in snuba_results["meta"] if field["name"] != "project.id"
-                ]
-
-            for result in snuba_results["data"]:
-                if "project.id" in result:
-                    result["project.name"] = projects[result["project.id"]]
-                    if "project.id" not in requested_query["selected_columns"]:
-                        del result["project.id"]
-
-        if "project.name" in requested_query["groupby"]:
-            project_name_index = requested_query["groupby"].index("project.name")
-            snuba_results["meta"].insert(
-                project_name_index, {"name": "project.name", "type": "String"}
-            )
-            if "project.id" not in requested_query["groupby"]:
-                snuba_results["meta"] = [
-                    field for field in snuba_results["meta"] if field["name"] != "project.id"
-                ]
-
-            for result in snuba_results["data"]:
-                if "project.id" in result:
-                    result["project.name"] = projects[result["project.id"]]
-                    if "project.id" not in requested_query["groupby"]:
-                        del result["project.id"]
-
-        # Convert snuba types to json types
-        for col in snuba_results["meta"]:
-            col["type"] = snuba.get_json_type(col.get("type"))
-
-        return snuba_results
-
-    def do_query(self, projects, request, **kwargs):
-        requested_query = deepcopy(kwargs)
-
-        selected_columns = kwargs["selected_columns"]
-        groupby_columns = kwargs["groupby"]
-
-        if "project.name" in requested_query["selected_columns"]:
-            selected_columns.remove("project.name")
-            if "project.id" not in selected_columns:
-                selected_columns.append("project.id")
-
-        if "project.name" in requested_query["groupby"]:
-            groupby_columns.remove("project.name")
-            if "project.id" not in groupby_columns:
-                groupby_columns.append("project.id")
-
-        for aggregation in kwargs["aggregations"]:
-            if aggregation[1] == "project.name":
-                aggregation[1] = "project.id"
-
-        if not kwargs["aggregations"]:
-
-            data_fn = partial(transform_aliases_and_query, referrer="discover", **kwargs)
-            return self.paginate(
-                request=request,
-                on_results=lambda results: self.handle_results(results, requested_query, projects),
-                paginator=GenericOffsetPaginator(data_fn=data_fn),
-                max_per_page=1000,
-            )
-        else:
-            snuba_results = transform_aliases_and_query(referrer="discover", **kwargs)
-            return Response(
-                self.handle_results(snuba_results, requested_query, projects), status=200
-            )
-
-    def post(self, request: Request, organization) -> Response:
-        if not self.has_feature(request, organization):
-            return Response(status=404)
-        logger.info("discover1.request", extra={"organization_id": organization.id})
-
-        try:
-            requested_projects = set(map(int, request.data.get("projects", [])))
-        except (ValueError, TypeError):
-            raise ResourceDoesNotExist()
-        projects = self._get_projects_by_id(requested_projects, request, organization)
-
-        serializer = DiscoverQuerySerializer(data=request.data)
-
-        if not serializer.is_valid():
-            return Response(serializer.errors, status=400)
-
-        serialized = serializer.validated_data
-
-        has_aggregations = len(serialized.get("aggregations")) > 0
-
-        selected_columns = (
-            serialized.get("conditionFields", []) + []
-            if has_aggregations
-            else serialized.get("fields", [])
-        )
-
-        projects_map = {}
-        for project in projects:
-            projects_map[project.id] = project.slug
-
-        # Make sure that all selected fields are in the group by clause if there
-        # are aggregations
-        groupby = serialized.get("groupby") or []
-        fields = serialized.get("fields") or []
-        if has_aggregations:
-            for field in fields:
-                if field not in groupby:
-                    groupby.append(field)
-
-        return self.do_query(
-            projects=projects_map,
-            start=serialized.get("start"),
-            end=serialized.get("end"),
-            groupby=groupby,
-            selected_columns=selected_columns,
-            conditions=serialized.get("conditions"),
-            orderby=serialized.get("orderby"),
-            limit=serialized.get("limit"),
-            aggregations=serialized.get("aggregations"),
-            rollup=serialized.get("rollup"),
-            filter_keys={"project.id": list(projects_map.keys())},
-            arrayjoin=serialized.get("arrayjoin"),
-            request=request,
-            turbo=serialized.get("turbo"),
-        )

+ 0 - 149
src/sentry/discover/utils.py

@@ -1,149 +0,0 @@
-# TODO(mark) Once this import is removed, transform_data should not
-# be exported.
-from sentry import eventstore
-from sentry.snuba.discover import transform_data
-from sentry.utils.snuba import Dataset, aliased_query, get_function_index, get_snuba_column_name
-
-
-def parse_columns_in_functions(col, context=None, index=None):
-    """
-    Checks expressions for arguments that should be considered a column while
-    ignoring strings that represent clickhouse function names
-
-    if col is a list, means the expression has functions and we need
-    to parse for arguments that should be considered column names.
-
-    Assumptions here:
-     * strings that represent clickhouse function names are always followed by a list or tuple
-     * strings that are quoted with single quotes are used as string literals for CH
-     * otherwise we should attempt to get the snuba column name (or custom tag)
-    """
-
-    function_name_index = get_function_index(col)
-
-    if function_name_index is not None:
-        # if this is non zero, that means there are strings before this index
-        # that should be converted to snuba column names
-        # e.g. ['func1', ['column', 'func2', ['arg1']]]
-        if function_name_index > 0:
-            for i in range(0, function_name_index):
-                if context is not None:
-                    context[i] = get_snuba_column_name(col[i])
-
-        args = col[function_name_index + 1]
-
-        # check for nested functions in args
-        if get_function_index(args):
-            # look for columns
-            return parse_columns_in_functions(args, args)
-
-        # check each argument for column names
-        else:
-            for (i, arg) in enumerate(args):
-                parse_columns_in_functions(arg, args, i)
-    else:
-        # probably a column name
-        if context is not None and index is not None:
-            context[index] = get_snuba_column_name(col)
-
-
-def transform_aliases_and_query(**kwargs):
-    """
-    Convert aliases in selected_columns, groupby, aggregation, conditions,
-    orderby and arrayjoin fields to their internal Snuba format and post the
-    query to Snuba. Convert back translated aliases before returning snuba
-    results.
-
-    :deprecated: This method is deprecated. You should use sentry.snuba.discover instead.
-    """
-
-    arrayjoin_map = {"error": "exception_stacks", "stack": "exception_frames"}
-
-    translated_columns = {}
-    derived_columns = set()
-
-    selected_columns = kwargs.get("selected_columns")
-    groupby = kwargs.get("groupby")
-    aggregations = kwargs.get("aggregations")
-    conditions = kwargs.get("conditions")
-    filter_keys = kwargs["filter_keys"]
-    arrayjoin = kwargs.get("arrayjoin")
-    orderby = kwargs.get("orderby")
-    having = kwargs.get("having", [])
-    dataset = Dataset.Events
-
-    if selected_columns:
-        for (idx, col) in enumerate(selected_columns):
-            if isinstance(col, list):
-                # if list, means there are potentially nested functions and need to
-                # iterate and translate potential columns
-                parse_columns_in_functions(col)
-                selected_columns[idx] = col
-                translated_columns[col[2]] = col[2]
-                derived_columns.add(col[2])
-            else:
-                name = get_snuba_column_name(col)
-                selected_columns[idx] = name
-                translated_columns[name] = col
-
-    if groupby:
-        for (idx, col) in enumerate(groupby):
-            if col not in derived_columns:
-                name = get_snuba_column_name(col)
-            else:
-                name = col
-
-            groupby[idx] = name
-            translated_columns[name] = col
-
-    for aggregation in aggregations or []:
-        derived_columns.add(aggregation[2])
-        if isinstance(aggregation[1], str):
-            aggregation[1] = get_snuba_column_name(aggregation[1])
-        elif isinstance(aggregation[1], (set, tuple, list)):
-            aggregation[1] = [get_snuba_column_name(col) for col in aggregation[1]]
-
-    for col in list(filter_keys.keys()):
-        name = get_snuba_column_name(col)
-        filter_keys[name] = filter_keys.pop(col)
-
-    if conditions:
-        aliased_conditions = []
-        for condition in conditions:
-            field = condition[0]
-            if not isinstance(field, (list, tuple)) and field in derived_columns:
-                having.append(condition)
-            else:
-                aliased_conditions.append(condition)
-        kwargs["conditions"] = aliased_conditions
-
-    if having:
-        kwargs["having"] = having
-
-    if orderby:
-        orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby]
-        translated_orderby = []
-
-        for field_with_order in orderby:
-            field = field_with_order.lstrip("-")
-            translated_orderby.append(
-                "{}{}".format(
-                    "-" if field_with_order.startswith("-") else "",
-                    field if field in derived_columns else get_snuba_column_name(field),
-                )
-            )
-
-        kwargs["orderby"] = translated_orderby
-
-    kwargs["arrayjoin"] = arrayjoin_map.get(arrayjoin, arrayjoin)
-    kwargs["dataset"] = dataset
-
-    result = aliased_query(**kwargs)
-
-    snuba_filter = eventstore.Filter(
-        rollup=kwargs.get("rollup"),
-        start=kwargs.get("start"),
-        end=kwargs.get("end"),
-        orderby=kwargs.get("orderby"),
-    )
-    return transform_data(result, translated_columns, snuba_filter)

+ 24 - 2
src/sentry/search/events/builder.py

@@ -166,6 +166,10 @@ class QueryBuilder:
         self.projects_to_filter: Set[int] = set()
         self.function_alias_map: Dict[str, FunctionDetails] = {}
         self.equation_alias_map: Dict[str, SelectType] = {}
+        # field: function map for post-processing values
+        self.value_resolver_map: Dict[str, Callable[[Any], Any]] = {}
+        # value_resolver_map may change type
+        self.meta_resolver_map: Dict[str, str] = {}
 
         self.auto_aggregations = auto_aggregations
         self.limit = self.resolve_limit(limit)
@@ -179,6 +183,7 @@ class QueryBuilder:
             self.field_alias_converter,
             self.function_converter,
             self.search_filter_converter,
+            self.orderby_converter,
         ) = self.load_config()
 
         self.limitby = self.resolve_limitby(limitby)
@@ -251,6 +256,7 @@ class QueryBuilder:
         Mapping[str, Callable[[str], SelectType]],
         Mapping[str, SnQLFunction],
         Mapping[str, Callable[[SearchFilter], Optional[WhereType]]],
+        Mapping[str, Callable[[Direction], OrderBy]],
     ]:
         from sentry.search.events.datasets.discover import DiscoverDatasetConfig
         from sentry.search.events.datasets.metrics import MetricsDatasetConfig
@@ -269,8 +275,9 @@ class QueryBuilder:
         field_alias_converter = self.config.field_alias_converter
         function_converter = self.config.function_converter
         search_filter_converter = self.config.search_filter_converter
+        orderby_converter = self.config.orderby_converter
 
-        return field_alias_converter, function_converter, search_filter_converter
+        return field_alias_converter, function_converter, search_filter_converter, orderby_converter
 
     def resolve_limit(self, limit: Optional[int]) -> Optional[Limit]:
         return None if limit is None else Limit(limit)
@@ -625,7 +632,11 @@ class QueryBuilder:
                         "arrayJoin", [self.resolve_column(arguments[arg.name])]
                     )
                 else:
-                    arguments[arg.name] = self.resolve_column(arguments[arg.name])
+                    column = self.resolve_column(arguments[arg.name])
+                    # Can't keep aliased expressions
+                    if isinstance(column, AliasedExpression):
+                        column = column.exp
+                    arguments[arg.name] = column
             if combinator is not None and combinator.is_applicable(arg.name):
                 arguments[arg.name] = combinator.apply(arguments[arg.name])
                 combinator_applied = True
@@ -763,6 +774,9 @@ class QueryBuilder:
                     isinstance(selected_column, AliasedExpression)
                     and selected_column.alias == bare_orderby
                 ):
+                    if bare_orderby in self.orderby_converter:
+                        validated.append(self.orderby_converter[bare_orderby](direction))
+                        break
                     # We cannot directly order by an `AliasedExpression`.
                     # Instead, we order by the column inside.
                     validated.append(OrderBy(selected_column.exp, direction))
@@ -772,6 +786,8 @@ class QueryBuilder:
                     isinstance(selected_column, CurriedFunction)
                     and selected_column.alias == bare_orderby
                 ):
+                    if bare_orderby in self.orderby_converter:
+                        validated.append(self.orderby_converter[bare_orderby](direction))
                     validated.append(OrderBy(selected_column, direction))
                     break
 
@@ -883,6 +899,8 @@ class QueryBuilder:
         return None
 
     def get_field_type(self, field: str) -> Optional[str]:
+        if field in self.meta_resolver_map:
+            return self.meta_resolver_map[field]
         if (
             field == "transaction.duration"
             or is_duration_measurement(field)
@@ -920,6 +938,10 @@ class QueryBuilder:
 
         return {p.slug: p.id for p in project_slugs}
 
+    @cached_property  # type: ignore
+    def project_ids(self) -> Mapping[int, str]:
+        return {project_id: slug for slug, project_id in self.project_slugs.items()}
+
     def validate_having_clause(self) -> None:
         """Validate that the functions in having are selected columns
 

+ 7 - 0
src/sentry/search/events/datasets/base.py

@@ -1,6 +1,8 @@
 import abc
 from typing import Any, Callable, Dict, List, Mapping, Optional
 
+from snuba_sdk import OrderBy
+
 from sentry.api.event_search import SearchFilter
 from sentry.exceptions import InvalidSearchQuery
 from sentry.search.events import fields
@@ -29,6 +31,11 @@ class DatasetConfig(abc.ABC):
     def function_converter(self) -> Mapping[str, fields.SnQLFunction]:
         pass
 
+    @property
+    @abc.abstractmethod
+    def orderby_converter(self) -> Mapping[str, OrderBy]:
+        pass
+
     def reflective_result_type(
         self, index: Optional[int] = 0
     ) -> Callable[[List[fields.FunctionArg], Dict[str, Any]], str]:

Some files were not shown because too many files changed in this diff