Browse Source

feat(profiling): Introduce profile query builder (#40557)

This change exposes the profiles table via the events api. This allows us to
make discover style queries against the profiles table with the freedom to pick
any of the available columns, aggregate in various ways, sort the results as
needed and filter it with numerous filters. To access the profiles table, simply
specify `dataset=profiles` as a query parameter.
Tony Xiao 2 years ago
parent
commit
147cbe529f

+ 2 - 1
src/sentry/api/bases/organization_events.py

@@ -20,7 +20,7 @@ from sentry.models import Organization, Project, Team
 from sentry.models.group import Group
 from sentry.search.events.constants import DURATION_UNITS, SIZE_UNITS, TIMEOUT_ERROR_MESSAGE
 from sentry.search.events.fields import get_function_alias
-from sentry.snuba import discover, metrics_enhanced_performance, metrics_performance
+from sentry.snuba import discover, metrics_enhanced_performance, metrics_performance, profiles
 from sentry.utils import snuba
 from sentry.utils.cursors import Cursor
 from sentry.utils.dates import get_interval_from_range, get_rollup_from_request, parse_stats_period
@@ -33,6 +33,7 @@ DATASET_OPTIONS = {
     "discover": discover,
     "metricsEnhanced": metrics_enhanced_performance,
     "metrics": metrics_performance,
+    "profiles": profiles,
 }
 
 

+ 10 - 3
src/sentry/api/endpoints/organization_events.py

@@ -17,7 +17,7 @@ from sentry.apidocs.utils import inline_sentry_response_serializer
 from sentry.models.organization import Organization
 from sentry.ratelimits.config import RateLimitConfig
 from sentry.search.events.fields import is_function
-from sentry.snuba import discover, metrics_enhanced_performance
+from sentry.snuba import discover, metrics_enhanced_performance, metrics_performance
 from sentry.snuba.referrer import Referrer
 from sentry.types.ratelimit import RateLimit, RateLimitCategory
 
@@ -217,6 +217,12 @@ class OrganizationEventsEndpoint(OrganizationEventsV2EndpointBase):
             )
         )
 
+        use_profiles = features.has(
+            "organizations:profiling",
+            organization=organization,
+            actor=request.user,
+        )
+
         performance_dry_run_mep = features.has(
             "organizations:performance-dry-run-mep", organization=organization, actor=request.user
         )
@@ -224,8 +230,9 @@ class OrganizationEventsEndpoint(OrganizationEventsV2EndpointBase):
             "organizations:use-metrics-layer", organization=organization, actor=request.user
         )
 
-        dataset = self.get_dataset(request) if use_metrics else discover
-        metrics_enhanced = dataset != discover
+        use_custom_dataset = use_metrics or use_profiles
+        dataset = self.get_dataset(request) if use_custom_dataset else discover
+        metrics_enhanced = dataset in {metrics_performance, metrics_enhanced_performance}
 
         sentry_sdk.set_tag("performance.metrics_enhanced", metrics_enhanced)
         allow_metric_aggregates = request.GET.get("preventMetricAggregates") != "1"

+ 4 - 1
src/sentry/api/event_search.py

@@ -511,6 +511,7 @@ class SearchVisitor(NodeVisitor):
             or is_measurement(key)
             or is_span_op_breakdown(key)
             or self.builder.get_field_type(key) == "number"
+            or self.is_duration_key(key)
         )
 
     def is_duration_key(self, key):
@@ -705,7 +706,9 @@ class SearchVisitor(NodeVisitor):
 
         # Numeric and boolean filters overlap on 1 and 0 values.
         if self.is_numeric_key(search_key.name):
-            return self._handle_numeric_filter(search_key, "=", [search_value.text, ""])
+            return self._handle_numeric_filter(
+                search_key, "!=" if negated else "=", [search_value.text, ""]
+            )
 
         if self.is_boolean_key(search_key.name):
             if search_value.text.lower() in ("true", "1"):

+ 9 - 6
src/sentry/search/events/builder.py

@@ -272,6 +272,7 @@ class QueryBuilder:
         from sentry.search.events.datasets.discover import DiscoverDatasetConfig
         from sentry.search.events.datasets.metrics import MetricsDatasetConfig
         from sentry.search.events.datasets.metrics_layer import MetricsLayerDatasetConfig
+        from sentry.search.events.datasets.profiles import ProfilesDatasetConfig
         from sentry.search.events.datasets.sessions import SessionsDatasetConfig
 
         self.config: DatasetConfig
@@ -284,6 +285,8 @@ class QueryBuilder:
                 self.config = MetricsLayerDatasetConfig(self)
             else:
                 self.config = MetricsDatasetConfig(self)
+        elif self.dataset == Dataset.Profiles:
+            self.config = ProfilesDatasetConfig(self)
         else:
             raise NotImplementedError(f"Data Set configuration not found for {self.dataset}.")
 
@@ -1142,12 +1145,12 @@ class QueryBuilder:
     ) -> Optional[WhereType]:
         name = search_filter.key.name
         value = search_filter.value.value
-        if value and (measurement_meta := self.get_measument_by_name(name)):
-            unit = measurement_meta.get("unit")
-            value = self.resolve_measurement_value(unit, value)
-            search_filter = SearchFilter(
-                search_filter.key, search_filter.operator, SearchValue(value)
-            )
+        if value and (unit := self.get_field_type(name)):
+            if unit in SIZE_UNITS or unit in DURATION_UNITS:
+                value = self.resolve_measurement_value(unit, value)
+                search_filter = SearchFilter(
+                    search_filter.key, search_filter.operator, SearchValue(value)
+                )
 
         if name in NO_CONVERSION_FIELDS:
             return None

+ 350 - 0
src/sentry/search/events/datasets/profiles.py

@@ -0,0 +1,350 @@
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Callable, Mapping, Optional, Union
+
+from snuba_sdk import OrderBy
+
+from sentry.api.event_search import SearchFilter
+from sentry.search.events.constants import PROJECT_ALIAS, PROJECT_NAME_ALIAS
+from sentry.search.events.datasets import field_aliases, filter_aliases
+from sentry.search.events.datasets.base import DatasetConfig
+from sentry.search.events.fields import (
+    ColumnArg,
+    Combinator,
+    Function,
+    InvalidFunctionArgument,
+    InvalidSearchQuery,
+    NumberRange,
+    NumericColumn,
+    SnQLFunction,
+    with_default,
+)
+from sentry.search.events.types import NormalizedArg, ParamsType, SelectType, WhereType
+
+
+class Kind(Enum):
+    DATE = "date"
+    DURATION = "duration"
+    INTEGER = "integer"
+    NUMBER = "number"
+    STRING = "string"
+
+
+class Duration(Enum):
+    NANOSECOND = "nanosecond"
+    MICROSECOND = "microsecond"
+    MILLISECOND = "millisecond"
+    SECOND = "second"
+    MINUTE = "minute"
+    HOUR = "hour"
+    DAY = "day"
+    WEEK = "week"
+
+
+# The only units available right now are duration based
+Unit = Duration
+
+
+@dataclass(frozen=True)
+class Column:
+    # the external name to expose
+    alias: str
+    # the internal name in snuba
+    column: str
+    # type kind/type associated with this column
+    kind: Kind
+    # some kinds will have an unit associated with it
+    unit: Optional[Unit] = None
+
+
+COLUMNS = [
+    Column(alias="organization.id", column="organization_id", kind=Kind.INTEGER),
+    Column(alias="project.id", column="project_id", kind=Kind.INTEGER),
+    Column(alias="trace.transaction", column="transaction_id", kind=Kind.STRING),
+    Column(alias="id", column="profile_id", kind=Kind.STRING),
+    Column(alias="timestamp", column="received", kind=Kind.DATE),
+    Column(alias="device.arch", column="architecture", kind=Kind.STRING),
+    Column(alias="device.classification", column="device_classification", kind=Kind.STRING),
+    Column(alias="device.locale", column="device_locale", kind=Kind.STRING),
+    Column(alias="device.manufacturer", column="device_manufacturer", kind=Kind.STRING),
+    Column(alias="device.model", column="device_model", kind=Kind.STRING),
+    Column(alias="os.build", column="device_os_build_number", kind=Kind.STRING),
+    Column(alias="os.name", column="device_os_name", kind=Kind.STRING),
+    Column(alias="os.version", column="device_os_version", kind=Kind.STRING),
+    Column(
+        alias="profile.duration", column="duration_ns", kind=Kind.DURATION, unit=Duration.NANOSECOND
+    ),
+    Column(alias="environment", column="environment", kind=Kind.STRING),
+    Column(alias="platform.name", column="platform", kind=Kind.STRING),
+    Column(alias="trace", column="trace_id", kind=Kind.STRING),
+    Column(alias="transaction", column="transaction_name", kind=Kind.STRING),
+    # There is a `version_code` column that exists for
+    # legacy profiles, we've decided not to support that.
+    Column(alias="release", column="version_name", kind=Kind.STRING),
+    # We want to alias `project_id` to the column as well
+    # because the query builder uses that internally
+    Column(alias="project_id", column="project_id", kind=Kind.INTEGER),
+]
+
+COLUMN_MAP = {column.alias: column for column in COLUMNS}
+
+
+class ProfileColumnArg(ColumnArg):
+    def normalize(
+        self, value: str, params: ParamsType, combinator: Optional[Combinator]
+    ) -> NormalizedArg:
+        column = COLUMN_MAP.get(value)
+
+        # must be a known column or field alias
+        if column is None and value not in {PROJECT_ALIAS, PROJECT_NAME_ALIAS}:
+            raise InvalidFunctionArgument(f"{value} is not a valid column")
+
+        return value
+
+
+class ProfileNumericColumn(NumericColumn):
+    def _normalize(self, value: str) -> str:
+        column = COLUMN_MAP.get(value)
+
+        if column is None:
+            raise InvalidFunctionArgument(f"{value} is not a valid column")
+
+        if (
+            column.kind == Kind.INTEGER
+            or column.kind == Kind.DURATION
+            or column.kind == Kind.NUMBER
+        ):
+            return column.column
+
+        raise InvalidFunctionArgument(f"{value} is not a numeric column")
+
+    def get_type(self, value: str) -> str:
+        try:
+            return COLUMN_MAP[value].kind.value
+        except KeyError:
+            return Kind.NUMBER.value
+
+
+class ProfilesDatasetConfig(DatasetConfig):
+    non_nullable_keys = {
+        "organization.id",
+        "project.id",
+        "trace.transaction",
+        "id",
+        "timestamp",
+        "device.arch",
+        "device.classification",
+        "device.locale",
+        "device.manufacturer",
+        "device.model",
+        "os.name",
+        "os.version",
+        "profile.duration",
+        "platform.name",
+        "trace",
+        "transaction",
+        "release",
+        "project_id",
+    }
+
+    def __init__(self, builder: Any):
+        self.builder = builder
+
+    @property
+    def search_filter_converter(
+        self,
+    ) -> Mapping[str, Callable[[SearchFilter], Optional[WhereType]]]:
+        return {
+            PROJECT_ALIAS: self._project_slug_filter_converter,
+            PROJECT_NAME_ALIAS: self._project_slug_filter_converter,
+        }
+
+    def _project_slug_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
+        return filter_aliases.project_slug_converter(self.builder, search_filter)
+
+    @property
+    def field_alias_converter(self) -> Mapping[str, Callable[[str], SelectType]]:
+        return {
+            PROJECT_ALIAS: self._resolve_project_slug_alias,
+            PROJECT_NAME_ALIAS: self._resolve_project_slug_alias,
+        }
+
+    def _resolve_project_slug_alias(self, alias: str) -> SelectType:
+        return field_aliases.resolve_project_slug_alias(self.builder, alias)
+
+    @property
+    def function_converter(self) -> Mapping[str, SnQLFunction]:
+        return {
+            function.name: function
+            for function in [
+                # TODO: A lot of this is duplicated from the discover dataset.
+                # Ideally, we refactor it to be shared across datasets.
+                SnQLFunction(
+                    "last_seen",
+                    snql_aggregate=lambda _, alias: Function(
+                        "max",
+                        [self.builder.column("timestamp")],
+                        alias,
+                    ),
+                    default_result_type="date",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "latest_event",
+                    snql_aggregate=lambda _, alias: Function(
+                        "argMax",
+                        [self.builder.column("id"), self.builder.column("timestamp")],
+                        alias,
+                    ),
+                    default_result_type="string",
+                ),
+                SnQLFunction(
+                    "count",
+                    snql_aggregate=lambda _, alias: Function(
+                        "count",
+                        [],
+                        alias,
+                    ),
+                    default_result_type="integer",
+                ),
+                SnQLFunction(
+                    "count_unique",
+                    required_args=[ProfileColumnArg("column")],
+                    snql_aggregate=lambda args, alias: Function("uniq", [args["column"]], alias),
+                    default_result_type="integer",
+                ),
+                SnQLFunction(
+                    "percentile",
+                    required_args=[
+                        ProfileNumericColumn("column"),
+                        NumberRange("percentile", 0, 1),
+                    ],
+                    snql_aggregate=self._resolve_percentile,
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "p50",
+                    optional_args=[
+                        with_default("profile.duration", ProfileNumericColumn("column")),
+                    ],
+                    snql_aggregate=lambda args, alias: self._resolve_percentile(args, alias, 0.5),
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "p75",
+                    optional_args=[
+                        with_default("profile.duration", ProfileNumericColumn("column")),
+                    ],
+                    snql_aggregate=lambda args, alias: self._resolve_percentile(args, alias, 0.75),
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "p95",
+                    optional_args=[
+                        with_default("profile.duration", ProfileNumericColumn("column")),
+                    ],
+                    snql_aggregate=lambda args, alias: self._resolve_percentile(args, alias, 0.95),
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "p99",
+                    optional_args=[
+                        with_default("profile.duration", ProfileNumericColumn("column")),
+                    ],
+                    snql_aggregate=lambda args, alias: self._resolve_percentile(args, alias, 0.99),
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "p100",
+                    optional_args=[
+                        with_default("profile.duration", ProfileNumericColumn("column")),
+                    ],
+                    snql_aggregate=lambda args, alias: self._resolve_percentile(args, alias, 1),
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "min",
+                    required_args=[ProfileNumericColumn("column")],
+                    snql_aggregate=lambda args, alias: Function("min", [args["column"]], alias),
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "max",
+                    required_args=[ProfileNumericColumn("column")],
+                    snql_aggregate=lambda args, alias: Function("max", [args["column"]], alias),
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "avg",
+                    required_args=[ProfileNumericColumn("column")],
+                    snql_aggregate=lambda args, alias: Function("avg", [args["column"]], alias),
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "sum",
+                    required_args=[ProfileNumericColumn("column")],
+                    snql_aggregate=lambda args, alias: Function("sum", [args["column"]], alias),
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                ),
+            ]
+        }
+
+    @property
+    def orderby_converter(self) -> Mapping[str, OrderBy]:
+        return {}
+
+    def resolve_column(self, column: str) -> str:
+        try:
+            return COLUMN_MAP[column].column
+        except KeyError:
+            raise InvalidSearchQuery(f"Unknown field: {column}")
+
+    def resolve_column_type(self, column: str) -> Optional[str]:
+        try:
+            col = COLUMN_MAP[column]
+            if col.unit:
+                # if the column has an associated unit,
+                # prioritize that over the kind
+                return col.unit.value
+            return col.kind.value
+        except KeyError:
+            return None
+
+    def _resolve_percentile(
+        self,
+        args: Mapping[str, Union[str, Column, SelectType, int, float]],
+        alias: str,
+        fixed_percentile: Optional[float] = None,
+    ) -> SelectType:
+        return (
+            Function(
+                "max",
+                [args["column"]],
+                alias,
+            )
+            if fixed_percentile == 1
+            else Function(
+                f'quantile({fixed_percentile if fixed_percentile is not None else args["percentile"]})',
+                [args["column"]],
+                alias,
+            )
+        )

+ 1 - 0
src/sentry/snuba/dataset.py

@@ -13,6 +13,7 @@ class Dataset(Enum):
     Metrics = "metrics"
     PerformanceMetrics = "generic_metrics"
     Replays = "replays"
+    Profiles = "profiles"
 
 
 @unique

+ 71 - 0
src/sentry/snuba/profiles.py

@@ -0,0 +1,71 @@
+from typing import Any, List, Optional, Sequence
+
+from snuba_sdk.conditions import Condition, Op
+
+from sentry.search.events.builder import QueryBuilder
+from sentry.search.events.fields import InvalidSearchQuery
+from sentry.search.events.types import ParamsType, WhereType
+from sentry.snuba.discover import transform_tips
+from sentry.utils.snuba import Dataset
+
+
+def query(
+    selected_columns: Sequence[str],
+    query: Optional[str],
+    params: ParamsType,
+    equations: Optional[Sequence[str]] = None,
+    orderby: Optional[Sequence[str]] = None,
+    offset: int = 0,
+    limit: int = 50,
+    referrer: Optional[str] = None,
+    auto_fields: bool = False,
+    auto_aggregations: bool = False,
+    use_aggregate_conditions: bool = False,
+    allow_metric_aggregates: bool = False,
+    transform_alias_to_input_format: bool = False,
+    has_metrics: bool = False,
+    functions_acl: Optional[Sequence[str]] = None,
+    use_metrics_layer: bool = False,
+) -> Any:
+    if not selected_columns:
+        raise InvalidSearchQuery("No columns selected")
+
+    builder = ProfilesQueryBuilder(
+        dataset=Dataset.Profiles,
+        params=params,
+        query=query,
+        selected_columns=selected_columns,
+        orderby=orderby,
+        auto_fields=auto_fields,
+        auto_aggregations=auto_aggregations,
+        use_aggregate_conditions=use_aggregate_conditions,
+        functions_acl=functions_acl,
+        limit=limit,
+        offset=offset,
+    )
+    result = builder.process_results(builder.run_query(referrer))
+    result["meta"]["tips"] = transform_tips(builder.tips)
+    return result
+
+
+class ProfilesQueryBuilder(QueryBuilder):  # type: ignore
+    def resolve_column_name(self, col: str) -> str:
+        return self.config.resolve_column(col)
+
+    def resolve_params(self) -> List[WhereType]:
+        conditions = super().resolve_params()
+
+        # the profiles dataset requires a condition
+        # on the organization_id in the query
+        conditions.append(
+            Condition(
+                self.column("organization.id"),
+                Op.EQ,
+                self.params["organization_id"],
+            )
+        )
+
+        return conditions
+
+    def get_field_type(self, field: str) -> Optional[str]:
+        return self.config.resolve_column_type(field)

+ 550 - 0
tests/sentry/snuba/test_profiles.py

@@ -0,0 +1,550 @@
+from datetime import datetime, timedelta
+
+import pytest
+from django.utils import timezone
+from snuba_sdk.aliased_expression import AliasedExpression
+from snuba_sdk.column import Column
+from snuba_sdk.conditions import Condition, Op, Or
+from snuba_sdk.function import Function
+
+from sentry.search.events.datasets.profiles import COLUMNS as PROFILE_COLUMNS
+from sentry.search.events.datasets.profiles import ProfilesDatasetConfig
+from sentry.search.events.fields import InvalidSearchQuery
+from sentry.snuba.profiles import ProfilesQueryBuilder
+from sentry.testutils.factories import Factories
+from sentry.utils.snuba import Dataset
+
+# pin a timestamp for now so tests results dont change
+now = datetime(2022, 10, 31, 0, 0, tzinfo=timezone.utc)
+today = now.replace(hour=0, minute=0, second=0, microsecond=0)
+
+
+@pytest.fixture
+def params():
+    organization = Factories.create_organization()
+    team = Factories.create_team(organization=organization)
+    project = Factories.create_project(organization=organization, teams=[team])
+
+    user = Factories.create_user()
+    Factories.create_team_membership(team=team, user=user)
+
+    return {
+        "start": now - timedelta(days=7),
+        "end": now - timedelta(seconds=1),
+        "project_id": [project.id],
+        "project_objects": [project],
+        "organization_id": organization.id,
+        "user_id": user.id,
+        "team_id": [team.id],
+    }
+
+
+@pytest.mark.parametrize(
+    "field,resolved",
+    [pytest.param(column.alias, column.column, id=column.alias) for column in PROFILE_COLUMNS],
+)
+@pytest.mark.django_db
+def test_field_resolution(params, field, resolved):
+    builder = ProfilesQueryBuilder(
+        dataset=Dataset.Profiles,
+        params=params,
+        selected_columns=[field],
+    )
+    if field == resolved:
+        assert builder.columns == [Column(field)]
+    else:
+        assert builder.columns == [AliasedExpression(Column(resolved), alias=field)]
+
+
+@pytest.mark.parametrize(
+    "field,resolved",
+    [
+        pytest.param(
+            "last_seen()",
+            Function("max", parameters=[Column("received")], alias="last_seen"),
+            id="last_seen()",
+        ),
+        pytest.param(
+            "latest_event()",
+            Function(
+                "argMax",
+                parameters=[Column("profile_id"), Column("received")],
+                alias="latest_event",
+            ),
+            id="latest_event()",
+        ),
+        pytest.param("count()", Function("count", parameters=[], alias="count"), id="count()"),
+        pytest.param(
+            "count_unique(transaction)",
+            Function(
+                "uniq", parameters=[Column("transaction_name")], alias="count_unique_transaction"
+            ),
+            id="count_unique(transaction)",
+        ),
+        pytest.param(
+            "percentile(profile.duration,0.25)",
+            Function(
+                "quantile(0.25)",
+                parameters=[Column("duration_ns")],
+                alias="percentile_profile_duration_0_25",
+            ),
+            id="percentile(profile.duration,0.25)",
+        ),
+        *[
+            pytest.param(
+                f"p{qt}()",
+                Function(
+                    f"quantile(0.{qt.rstrip('0')})",
+                    parameters=[Column("duration_ns")],
+                    alias=f"p{qt}",
+                ),
+                id=f"p{qt}()",
+            )
+            for qt in ["50", "75", "95", "99"]
+        ],
+        pytest.param(
+            "p100()",
+            Function(
+                "max",
+                parameters=[Column("duration_ns")],
+                alias="p100",
+            ),
+            id="p100()",
+        ),
+        *[
+            pytest.param(
+                f"p{qt}(profile.duration)",
+                Function(
+                    f"quantile(0.{qt.rstrip('0')})",
+                    parameters=[Column("duration_ns")],
+                    alias=f"p{qt}_profile_duration",
+                ),
+                id=f"p{qt}(profile.duration)",
+            )
+            for qt in ["50", "75", "95", "99"]
+        ],
+        pytest.param(
+            "p100(profile.duration)",
+            Function(
+                "max",
+                parameters=[Column("duration_ns")],
+                alias="p100_profile_duration",
+            ),
+            id="p100(profile.duration)",
+        ),
+        *[
+            pytest.param(
+                f"{fn}(profile.duration)",
+                Function(
+                    fn,
+                    parameters=[Column("duration_ns")],
+                    alias=f"{fn}_profile_duration",
+                ),
+                id=f"{fn}(profile.duration)",
+            )
+            for fn in ["min", "max", "avg", "sum"]
+        ],
+    ],
+)
+@pytest.mark.django_db
+def test_aggregate_resolution(params, field, resolved):
+    builder = ProfilesQueryBuilder(
+        dataset=Dataset.Profiles,
+        params=params,
+        selected_columns=[field],
+    )
+    assert builder.columns == [resolved]
+
+
+@pytest.mark.parametrize(
+    "field,message",
+    [
+        pytest.param("foo", "Unknown field: foo", id="foo"),
+        pytest.param("count(id)", "count: expected 0 argument\\(s\\)", id="count(id)"),
+        pytest.param(
+            "count_unique(foo)",
+            "count_unique: column argument invalid: foo is not a valid column",
+            id="count_unique(foo)",
+        ),
+        *[
+            pytest.param(
+                f"p{qt}(foo)",
+                f"p{qt}: column argument invalid: foo is not a valid column",
+                id=f"p{qt}(foo)",
+            )
+            for qt in ["50", "75", "95", "99"]
+        ],
+        *[
+            pytest.param(
+                f"p{qt}(id)",
+                f"p{qt}: column argument invalid: id is not a numeric column",
+                id=f"p{qt}(id)",
+            )
+            for qt in ["50", "75", "95", "99"]
+        ],
+        pytest.param(
+            "percentile(foo,0.25)",
+            "percentile: column argument invalid: foo is not a valid column",
+            id="percentile(foo,0.25)",
+        ),
+        pytest.param(
+            "percentile(id,0.25)",
+            "percentile: column argument invalid: id is not a numeric column",
+            id="percentile(id,0.25)",
+        ),
+        *[
+            pytest.param(
+                f"{fn}(foo)",
+                f"{fn}: column argument invalid: foo is not a valid column",
+                id=f"{fn}(foo)",
+            )
+            for fn in ["min", "max", "avg", "sum"]
+        ],
+        *[
+            pytest.param(
+                f"{fn}(id)",
+                f"{fn}: column argument invalid: id is not a numeric column",
+                id=f"{fn}(id)",
+            )
+            for fn in ["min", "max", "avg", "sum"]
+        ],
+    ],
+)
+@pytest.mark.django_db
+def test_invalid_field_resolution(params, field, message):
+    with pytest.raises(InvalidSearchQuery, match=message):
+        ProfilesQueryBuilder(
+            dataset=Dataset.Profiles,
+            params=params,
+            selected_columns=[field],
+        )
+
+
+def is_null(column: str) -> Function:
+    return Function("isNull", parameters=[Column(column)])
+
+
+@pytest.mark.parametrize(
+    "query,conditions",
+    [
+        pytest.param(
+            "project.id:1", [Condition(Column("project_id"), Op.EQ, 1.0)], id="project.id:1"
+        ),
+        pytest.param(
+            "!project.id:1",
+            [Condition(Column("project_id"), Op.NEQ, 1.0)],
+            id="!project.id:1",
+        ),
+        pytest.param(
+            f"trace.transaction:{'a' * 32}",
+            [Condition(Column("transaction_id"), Op.EQ, "a" * 32)],
+            id=f"trace.transaction:{'a' * 32}",
+        ),
+        pytest.param(
+            f"!trace.transaction:{'a' * 32}",
+            [Condition(Column("transaction_id"), Op.NEQ, "a" * 32)],
+            id=f"!trace.transaction:{'a' * 32}",
+        ),
+        pytest.param(
+            f"id:{'a' * 32}",
+            [Condition(Column("profile_id"), Op.EQ, "a" * 32)],
+            id=f"id:{'a' * 32}",
+        ),
+        pytest.param(
+            f"!id:{'a' * 32}",
+            [Condition(Column("profile_id"), Op.NEQ, "a" * 32)],
+            id=f"!id:{'a' * 32}",
+        ),
+        pytest.param(
+            f"timestamp:{today.isoformat()}",
+            [
+                # filtering for a timestamp means we search for a window around it
+                Condition(Column("received"), Op.GTE, today - timedelta(minutes=5)),
+                Condition(Column("received"), Op.LT, today + timedelta(minutes=6)),
+            ],
+            id=f"timestamp:{today.isoformat()}",
+        ),
+        pytest.param(
+            f"!timestamp:{today.isoformat()}",
+            [],  # not sure what this should be yet
+            id=f"!timestamp:{today.isoformat()}",
+            marks=pytest.mark.xfail(reason="date filters cannot negated"),
+        ),
+        pytest.param(
+            "device.arch:x86_64",
+            [Condition(Column("architecture"), Op.EQ, "x86_64")],
+            id="device.arch:x86_64",
+        ),
+        pytest.param(
+            "!device.arch:x86_64",
+            [Condition(Column("architecture"), Op.NEQ, "x86_64")],
+            id="!device.arch:x86_64",
+        ),
+        pytest.param(
+            "device.classification:high",
+            [Condition(Column("device_classification"), Op.EQ, "high")],
+            id="device.classification:high",
+        ),
+        pytest.param(
+            "!device.classification:high",
+            [Condition(Column("device_classification"), Op.NEQ, "high")],
+            id="!device.classification:high",
+        ),
+        pytest.param(
+            "device.locale:en_US",
+            [Condition(Column("device_locale"), Op.EQ, "en_US")],
+            id="device.locale:en_US",
+        ),
+        pytest.param(
+            "!device.locale:en_US",
+            [Condition(Column("device_locale"), Op.NEQ, "en_US")],
+            id="!device.locale:en_US",
+        ),
+        pytest.param(
+            "device.manufacturer:Apple",
+            [Condition(Column("device_manufacturer"), Op.EQ, "Apple")],
+            id="device.manufacturer:Apple",
+        ),
+        pytest.param(
+            "!device.manufacturer:Apple",
+            [Condition(Column("device_manufacturer"), Op.NEQ, "Apple")],
+            id="!device.manufacturer:Apple",
+        ),
+        pytest.param(
+            "device.model:iPhone14,2",
+            [Condition(Column("device_model"), Op.EQ, "iPhone14,2")],
+            id="device.model:iPhone14,2",
+        ),
+        pytest.param(
+            "!device.model:iPhone14,2",
+            [Condition(Column("device_model"), Op.NEQ, "iPhone14,2")],
+            id="!device.model:iPhone14,2",
+        ),
+        pytest.param(
+            "device.model:iPhone14,2",
+            [Condition(Column("device_model"), Op.EQ, "iPhone14,2")],
+            id="device.model:iPhone14,2",
+        ),
+        pytest.param(
+            "os.build:20G817",
+            [Condition(Column("device_os_build_number"), Op.EQ, "20G817")],
+            id="os.build:20G817",
+        ),
+        pytest.param(
+            "!os.build:20G817",
+            [
+                # os.build is a nullable column
+                Or(
+                    conditions=[
+                        Condition(is_null("device_os_build_number"), Op.EQ, 1),
+                        Condition(Column("device_os_build_number"), Op.NEQ, "20G817"),
+                    ]
+                )
+            ],
+            id="!os.build:20G817",
+        ),
+        pytest.param(
+            "os.name:iOS",
+            [Condition(Column("device_os_name"), Op.EQ, "iOS")],
+            id="os.name:iOS",
+        ),
+        pytest.param(
+            "!os.name:iOS",
+            [Condition(Column("device_os_name"), Op.NEQ, "iOS")],
+            id="!os.name:iOS",
+        ),
+        pytest.param(
+            "os.version:15.2",
+            [Condition(Column("device_os_version"), Op.EQ, "15.2")],
+            id="os.version:15.2",
+        ),
+        pytest.param(
+            "!os.version:15.2",
+            [Condition(Column("device_os_version"), Op.NEQ, "15.2")],
+            id="!os.version:15.2",
+        ),
+        pytest.param(
+            "profile.duration:1",
+            # since 1 mean 1 millisecond, and converted to nanoseconds its 1e6
+            [Condition(Column("duration_ns"), Op.EQ, 1e6)],
+            id="profile.duration:1",
+        ),
+        pytest.param(
+            "!profile.duration:1",
+            # since 1 mean 1 millisecond, and converted to nanoseconds its 1e6
+            [Condition(Column("duration_ns"), Op.NEQ, 1e6)],
+            id="!profile.duration:1",
+        ),
+        pytest.param(
+            "profile.duration:>1",
+            # since 1 mean 1 millisecond, and converted to nanoseconds its 1e6
+            [Condition(Column("duration_ns"), Op.GT, 1e6)],
+            id="profile.duration:>1",
+        ),
+        pytest.param(
+            "profile.duration:<1",
+            # since 1 mean 1 millisecond, and converted to nanoseconds its 1e6
+            [Condition(Column("duration_ns"), Op.LT, 1e6)],
+            id="profile.duration:<1",
+        ),
+        pytest.param(
+            "profile.duration:1s",
+            # since 1s mean 1 second, and converted to nanoseconds its 1e9
+            [Condition(Column("duration_ns"), Op.EQ, 1e9)],
+            id="profile.duration:1s",
+        ),
+        pytest.param(
+            "environment:dev",
+            [Condition(Column("environment"), Op.EQ, "dev")],
+            id="environment:dev",
+        ),
+        pytest.param(
+            "!environment:dev",
+            [
+                # environment is a nullable column
+                Or(
+                    conditions=[
+                        Condition(is_null("environment"), Op.EQ, 1),
+                        Condition(Column("environment"), Op.NEQ, "dev"),
+                    ]
+                )
+            ],
+            id="!environment:dev",
+        ),
+        pytest.param(
+            "platform.name:cocoa",
+            [Condition(Column("platform"), Op.EQ, "cocoa")],
+            id="platform.name:cocoa",
+        ),
+        pytest.param(
+            "!platform.name:cocoa",
+            [Condition(Column("platform"), Op.NEQ, "cocoa")],
+            id="!platform.name:cocoa",
+        ),
+        pytest.param(
+            f"trace:{'a' * 32}",
+            [Condition(Column("trace_id"), Op.EQ, "a" * 32)],
+            id=f"trace:{'a' * 32}",
+        ),
+        pytest.param(
+            f"!trace:{'a' * 32}",
+            [Condition(Column("trace_id"), Op.NEQ, "a" * 32)],
+            id=f"!trace:{'a' * 32}",
+        ),
+        pytest.param(
+            "transaction:foo",
+            [Condition(Column("transaction_name"), Op.EQ, "foo")],
+            id="transaction:foo",
+        ),
+        pytest.param(
+            "!transaction:foo",
+            [Condition(Column("transaction_name"), Op.NEQ, "foo")],
+            id="!transaction:foo",
+        ),
+        pytest.param(
+            "release:foo",
+            [Condition(Column("version_name"), Op.EQ, "foo")],
+            id="release:foo",
+        ),
+        pytest.param(
+            "!release:foo",
+            [Condition(Column("version_name"), Op.NEQ, "foo")],
+            id="!release:foo",
+        ),
+        pytest.param(
+            "project_id:1",
+            [Condition(Column("project_id"), Op.EQ, 1)],
+            id="project_id:1",
+        ),
+        pytest.param(
+            "!project_id:1",
+            [Condition(Column("project_id"), Op.NEQ, 1)],
+            id="!project_id:1",
+        ),
+    ],
+)
+@pytest.mark.django_db
+def test_where_resolution(params, query, conditions):
+    builder = ProfilesQueryBuilder(
+        dataset=Dataset.Profiles,
+        params=params,
+        selected_columns=["count()"],
+        query=query,
+    )
+
+    for condition in conditions:
+        assert condition in builder.where, condition
+
+
+@pytest.mark.parametrize("field", [pytest.param("project"), pytest.param("project.name")])
+@pytest.mark.django_db
+def test_where_resolution_project_slug(params, field):
+    project = params["project_objects"][0]
+
+    builder = ProfilesQueryBuilder(
+        dataset=Dataset.Profiles,
+        params=params,
+        selected_columns=["count()"],
+        query=f"{field}:{project.slug}",
+    )
+    assert Condition(Column("project_id"), Op.EQ, project.id), builder.condition
+
+    builder = ProfilesQueryBuilder(
+        dataset=Dataset.Profiles,
+        params=params,
+        selected_columns=["count()"],
+        query=f"!{field}:{project.slug}",
+    )
+    assert Condition(Column("project_id"), Op.NEQ, project.id), builder.condition
+
+
+@pytest.mark.parametrize(
+    "field,column",
+    [
+        pytest.param(
+            column.alias,
+            column.column,
+            id=f"has:{column.alias}",
+            marks=pytest.mark.skip(reason="has not working yet"),
+        )
+        for column in PROFILE_COLUMNS
+    ],
+)
+@pytest.mark.django_db
+def test_has_resolution(params, field, column):
+    builder = ProfilesQueryBuilder(
+        dataset=Dataset.Profiles,
+        params=params,
+        selected_columns=["count()"],
+        query=f"has:{field}",
+    )
+    if field in ProfilesDatasetConfig.non_nullable_keys:
+        assert Condition(Column(column), Op.NEQ, "") in builder.where
+    else:
+        assert Condition(is_null(column), Op.NEQ, 1) in builder.where
+
+
+@pytest.mark.parametrize(
+    "field,column",
+    [
+        pytest.param(
+            column.alias,
+            column.column,
+            id=f"!has:{column.alias}",
+            marks=pytest.mark.skip(reason="!has not working yet"),
+        )
+        for column in PROFILE_COLUMNS
+    ],
+)
+@pytest.mark.django_db
+def test_not_has_resolution(params, field, column):
+    builder = ProfilesQueryBuilder(
+        dataset=Dataset.Profiles,
+        params=params,
+        selected_columns=["count()"],
+        query=f"!has:{field}",
+    )
+    if field in ProfilesDatasetConfig.non_nullable_keys:
+        assert Condition(Column(column), Op.EQ, "") in builder.where
+    else:
+        assert Condition(is_null(column), Op.EQ, 1) in builder.where

+ 29 - 0
tests/snuba/api/endpoints/test_organization_events.py

@@ -5285,3 +5285,32 @@ class OrganizationEventsEndpointTest(APITestCase, SnubaTestCase):
         }
         response = self.do_request(query)
         assert response.status_code == 200, response.content
+
+    @mock.patch("sentry.search.events.builder.raw_snql_query")
+    def test_profiles_dataset_simple(self, mock_snql_query):
+        mock_snql_query.side_effect = [{"meta": {}, "data": []}]
+
+        query = {
+            "field": [
+                "project",
+                "transaction",
+                "last_seen()",
+                "latest_event()",
+                "count()",
+                "count_unique(transaction)",
+                "percentile(profile.duration, 0.25)",
+                "p50(profile.duration)",
+                "p75(profile.duration)",
+                "p95(profile.duration)",
+                "p99(profile.duration)",
+                "p100(profile.duration)",
+                "min(profile.duration)",
+                "max(profile.duration)",
+                "avg(profile.duration)",
+                "sum(profile.duration)",
+            ],
+            "project": [self.project.id],
+            "dataset": "profiles",
+        }
+        response = self.do_request(query, features={"organizations:profiling": True})
+        assert response.status_code == 200, response.content

+ 2 - 1
tests/snuba/api/endpoints/test_organization_events_mep.py

@@ -84,7 +84,8 @@ class OrganizationEventsMetricsEnhancedPerformanceEndpointTest(MetricsEnhancedPe
 
         assert response.status_code == 400, response.content
         assert (
-            response.data["detail"] == "dataset must be one of: discover, metricsEnhanced, metrics"
+            response.data["detail"]
+            == "dataset must be one of: discover, metricsEnhanced, metrics, profiles"
         )
 
     def test_out_of_retention(self):