Browse Source

fix(on-demand-metrics): is on demand metric check (#54092)

Ogi 1 year ago
parent
commit
780d7e5320

+ 6 - 2
src/sentry/search/events/builder/metrics.py

@@ -40,7 +40,11 @@ from sentry.search.events.types import (
 from sentry.sentry_metrics import indexer
 from sentry.sentry_metrics.use_case_id_registry import UseCaseID
 from sentry.snuba.dataset import Dataset
-from sentry.snuba.metrics.extraction import QUERY_HASH_KEY, OndemandMetricSpec, is_on_demand_query
+from sentry.snuba.metrics.extraction import (
+    QUERY_HASH_KEY,
+    OndemandMetricSpec,
+    is_on_demand_metric_query,
+)
 from sentry.snuba.metrics.fields import histogram as metrics_histogram
 from sentry.snuba.metrics.query import MetricField, MetricsQuery
 from sentry.utils.dates import to_timestamp
@@ -117,7 +121,7 @@ class MetricsQueryBuilder(QueryBuilder):
         if not field:
             return None
 
-        if not is_on_demand_query(dataset, field, query):
+        if not is_on_demand_metric_query(dataset, field, query):
             return None
 
         try:

+ 65 - 8
src/sentry/snuba/metrics/extraction.py

@@ -211,10 +211,10 @@ class MetricSpec(TypedDict):
 
 def is_on_demand_snuba_query(snuba_query: SnubaQuery) -> bool:
     """Returns ``True`` if the snuba query can't be supported by standard metrics."""
-    return is_on_demand_query(snuba_query.dataset, snuba_query.aggregate, snuba_query.query)
+    return is_on_demand_metric_query(snuba_query.dataset, snuba_query.aggregate, snuba_query.query)
 
 
-def is_on_demand_query(
+def is_on_demand_metric_query(
     dataset: Optional[Union[str, Dataset]], aggregate: str, query: Optional[str]
 ) -> bool:
     """Returns ``True`` if the dataset is performance metrics and query contains non-standard search fields."""
@@ -222,11 +222,32 @@ def is_on_demand_query(
     if not dataset or Dataset(dataset) != Dataset.PerformanceMetrics:
         return False
 
+    if is_standard_metrics_compatible(dataset, aggregate, query):
+        return False
+
+    for field in _get_aggregate_fields(aggregate):
+        if not _is_on_demand_supported_field(field):
+            return False
+    try:
+        return _is_on_demand_supported_query(event_search.parse_search_query(query))
+    except InvalidSearchQuery:
+        logger.error(f"Failed to parse search query: {query}", exc_info=True)
+        return False
+
+
+def is_standard_metrics_compatible(
+    dataset: Optional[Union[str, Dataset]], aggregate: str, query: Optional[str]
+) -> bool:
+    """Returns ``True`` if the query can be supported by standard metrics."""
+
+    if not dataset or Dataset(dataset) not in [Dataset.Metrics, Dataset.PerformanceMetrics]:
+        return False
+
     for field in _get_aggregate_fields(aggregate):
         if not _is_standard_metrics_field(field):
-            return True
+            return False
     try:
-        return not _is_standard_metrics_query(event_search.parse_search_query(query))
+        return _is_standard_metrics_query(event_search.parse_search_query(query))
     except InvalidSearchQuery:
         logger.error(f"Failed to parse search query: {query}", exc_info=True)
         return False
@@ -274,10 +295,40 @@ def _is_standard_metrics_search_filter(token: QueryToken) -> bool:
     return True
 
 
+def _is_on_demand_supported_query(tokens: Sequence[QueryToken]) -> bool:
+    """
+    Recursively checks if any of the supplied token contain search filters that can't be handled by standard metrics.
+    """
+
+    for token in tokens:
+        if not _is_on_demand_supported_search_filter(token):
+            return False
+
+    return True
+
+
+def _is_on_demand_supported_search_filter(token: QueryToken) -> bool:
+    if isinstance(token, SearchFilter):
+        return _is_on_demand_supported_field(token.key.name)
+
+    if isinstance(token, ParenExpression):
+        return _is_on_demand_supported_query(token.children)
+
+    return True
+
+
 def _is_standard_metrics_field(field: str) -> bool:
     return field in _STANDARD_METRIC_FIELDS
 
 
+def _is_on_demand_supported_field(field: str) -> bool:
+    try:
+        _map_field_name(field)
+        return True
+    except ValueError:
+        return False
+
+
 class OndemandMetricSpec:
     """
     Contains the information required to query or extract an on-demand metric.
@@ -352,9 +403,8 @@ class OndemandMetricSpec:
 
     def query_hash(self) -> str:
         """Returns a hash of the query and field to be used as a unique identifier for the on-demand metric."""
-
-        # TODO: Figure out how to support multiple fields and different but equivalent queries
-        str_to_hash = f"{self.field};{self._query}"
+        sorted_conditions = str(_deep_sorted(self.condition()))
+        str_to_hash = f"{self.field};{sorted_conditions}"
         return hashlib.shake_128(bytes(str_to_hash, encoding="ascii")).hexdigest(4)
 
     def condition(self) -> RuleCondition:
@@ -394,7 +444,7 @@ def _convert_countif_filter(key: str, op: str, value: str) -> RuleCondition:
 
 def _map_field_name(search_key: str) -> str:
     """
-    Maps a the name of a field in a search query to the event protocol path.
+    Maps a name of a field in a search query to the event protocol path.
 
     Raises an exception if the field is not supported.
     """
@@ -533,3 +583,10 @@ class SearchQueryConverter:
             condition = {"op": "not", "inner": condition}
 
         return condition
+
+
+def _deep_sorted(value: Union[Any, Dict[Any, Any]]) -> Union[Any, Dict[Any, Any]]:
+    if isinstance(value, dict):
+        return {key: _deep_sorted(value) for key, value in sorted(value.items())}
+    else:
+        return value

+ 1 - 1
tests/sentry/search/events/builder/test_metrics.py

@@ -2166,7 +2166,7 @@ class AlertMetricsQueryBuilderTest(MetricBuilderBaseTest):
         query_hash_index = indexer.resolve(UseCaseID.TRANSACTIONS, None, QUERY_HASH_KEY)
 
         query_hash_clause = Condition(
-            lhs=Column(name=f"tags_raw[{query_hash_index}]"), op=Op.EQ, rhs="80237309"
+            lhs=Column(name=f"tags_raw[{query_hash_index}]"), op=Op.EQ, rhs="3b902501"
         )
 
         assert query_hash_clause in snql_query.where

+ 153 - 43
tests/sentry/snuba/test_extraction.py

@@ -1,64 +1,174 @@
 from sentry.snuba.dataset import Dataset
-from sentry.snuba.metrics.extraction import OndemandMetricSpec, is_on_demand_query
+from sentry.snuba.metrics.extraction import (
+    OndemandMetricSpec,
+    is_on_demand_metric_query,
+    is_standard_metrics_compatible,
+)
 
 
-def test_is_on_demand_query_wrong_dataset():
-    assert is_on_demand_query(Dataset.Transactions, "count()", "geo.city:Vienna") is False
-    assert (
-        is_on_demand_query(Dataset.Metrics, "count()", "browser.version:1 os.name:android") is False
-    )
+class TestIsOnDemandMetricQuery:
+    perf_metrics = Dataset.PerformanceMetrics
 
+    def test_wrong_dataset(self):
+        assert (
+            is_on_demand_metric_query(Dataset.Transactions, "count()", "geo.city:Vienna") is False
+        )
+        assert (
+            is_on_demand_metric_query(
+                Dataset.Metrics, "count()", "browser.version:1 os.name:android"
+            )
+            is False
+        )
 
-def test_is_on_demand_query_no_query():
-    assert is_on_demand_query(Dataset.PerformanceMetrics, "count()", "") is False
+    def test_no_query(self):
+        assert is_on_demand_metric_query(Dataset.PerformanceMetrics, "count()", "") is False
 
+    def test_invalid_query(self):
+        assert is_on_demand_metric_query(self.perf_metrics, "count()", "AND") is False
+        assert (
+            is_on_demand_metric_query(self.perf_metrics, "count()", ")AND transaction.duration:>=1")
+            is False
+        )
+        assert (
+            is_on_demand_metric_query(self.perf_metrics, "count()", "transaction.duration:>=abc")
+            is False
+        )
+        assert is_on_demand_metric_query(self.perf_metrics, "count_if(}", "") is False
 
-def test_is_on_demand_query_invalid_query():
-    dataset = Dataset.PerformanceMetrics
+    def test_on_demand_queries(self):
+        # # transaction.duration is a non-standard field
+        assert (
+            is_on_demand_metric_query(self.perf_metrics, "count()", "transaction.duration:>=1")
+            is True
+        )
+        # # geo.city is a non-standard field
+        assert is_on_demand_metric_query(self.perf_metrics, "count()", "geo.city:Vienna") is True
+        # os.name is a standard field, browser.version is not
+        assert (
+            is_on_demand_metric_query(
+                self.perf_metrics, "count()", "geo.city:Vienna os.name:android"
+            )
+            is True
+        )
+        # os.version is not a standard field
+        assert (
+            is_on_demand_metric_query(
+                self.perf_metrics,
+                "count()",
+                "(release:a OR transaction.op:b) transaction.duration:>1s",
+            )
+            is True
+        )
 
-    assert is_on_demand_query(dataset, "count()", "AND") is False
-    assert is_on_demand_query(dataset, "count()", ")AND transaction.duration:>=1") is False
-    assert is_on_demand_query(dataset, "count()", "transaction.duration:>=abc") is False
-    assert is_on_demand_query(dataset, "count_if(}", "") is False
+    def test_standard_comaptible_queries(self):
+        assert is_on_demand_metric_query(self.perf_metrics, "count()", "") is False
+        assert is_on_demand_metric_query(self.perf_metrics, "count()", "environment:dev") is False
+        assert (
+            is_on_demand_metric_query(
+                self.perf_metrics, "count()", "release:initial OR os.name:android"
+            )
+            is False
+        )
+        assert (
+            is_on_demand_metric_query(
+                self.perf_metrics,
+                "count()",
+                "(http.method:POST OR http.status_code:404) browser.name:chrome",
+            )
+            is False
+        )
+        assert is_on_demand_metric_query(self.perf_metrics, "foo.bar", "") is False
+        assert is_on_demand_metric_query(self.perf_metrics, "count()", "foo.bar") is False
+
+    def test_countif(self):
+        assert (
+            is_on_demand_metric_query(
+                self.perf_metrics, "count_if(transaction.duration,equals,300)", ""
+            )
+            is True
+        )
+        assert (
+            is_on_demand_metric_query(self.perf_metrics, 'count_if(release,equals,"foo")', "")
+            is False
+        )
 
 
-def test_is_on_demand_query_true():
-    dataset = Dataset.PerformanceMetrics
+class TestIsStandardMetricsCompatible:
+    perf_metrics = Dataset.PerformanceMetrics
 
-    # transaction.duration is a non-standard field
-    assert is_on_demand_query(dataset, "count()", "transaction.duration:>=1") is True
-    # transaction.duration is a non-standard field
-    assert is_on_demand_query(dataset, "count()", "geo.city:Vienna") is True
-    # os.name is a standard field, browser.version is not
-    assert is_on_demand_query(dataset, "count()", "browser.version:1 os.name:android") is True
-    # os.version is not a standard field
-    assert (
-        is_on_demand_query(
-            dataset, "count()", "(release:a OR transaction.op:b) transaction.duration:>1s"
+    def test_wrong_dataset(self):
+        assert is_standard_metrics_compatible(Dataset.Transactions, "count()", "") is False
+        assert (
+            is_standard_metrics_compatible(Dataset.Discover, "count()", "os.name:android") is False
         )
-        is True
-    )
 
+    def test_no_query(self):
+        assert is_standard_metrics_compatible(Dataset.PerformanceMetrics, "count()", "") is True
 
-def test_is_on_demand_query_false():
-    dataset = Dataset.PerformanceMetrics
+    def test_invalid_query(self):
+        dataset = Dataset.PerformanceMetrics
 
-    assert is_on_demand_query(dataset, "count()", "") is False
-    assert is_on_demand_query(dataset, "count()", "environment:dev") is False
-    assert is_on_demand_query(dataset, "count()", "release:initial OR os.name:android") is False
-    assert (
-        is_on_demand_query(
-            dataset, "count()", "(http.method:POST OR http.status_code:404) browser.name:chrome"
-        )
-        is False
-    )
+        assert is_standard_metrics_compatible(dataset, "count()", ")AND os.name:>=1") is False
+        assert is_standard_metrics_compatible(dataset, "count()", "os.name><=abc") is False
 
+    def test_on_demand_queries(self):
+        # # transaction.duration is a non-standard field
+        assert (
+            is_standard_metrics_compatible(self.perf_metrics, "count()", "transaction.duration:>=1")
+            is False
+        )
+        # # geo.city is a non-standard field
+        assert (
+            is_standard_metrics_compatible(self.perf_metrics, "count()", "geo.city:Vienna") is False
+        )
+        # os.name is a standard field, browser.version is not
+        assert (
+            is_standard_metrics_compatible(
+                self.perf_metrics, "count()", "geo.city:Vienna os.name:android"
+            )
+            is False
+        )
+        # os.version is not a standard field
+        assert (
+            is_standard_metrics_compatible(
+                self.perf_metrics,
+                "count()",
+                "(release:a OR transaction.op:b) transaction.duration:>1s",
+            )
+            is False
+        )
 
-def test_is_on_demand_query_countif():
-    dataset = Dataset.PerformanceMetrics
+    def test_standard_comaptible_queries(self):
+        assert is_standard_metrics_compatible(self.perf_metrics, "count()", "") is True
+        assert (
+            is_standard_metrics_compatible(self.perf_metrics, "count()", "environment:dev") is True
+        )
+        assert (
+            is_standard_metrics_compatible(
+                self.perf_metrics, "count()", "release:initial OR os.name:android"
+            )
+            is True
+        )
+        assert (
+            is_standard_metrics_compatible(
+                self.perf_metrics,
+                "count()",
+                "(http.method:POST OR http.status_code:404) browser.name:chrome",
+            )
+            is True
+        )
 
-    assert is_on_demand_query(dataset, "count_if(transaction.duration,equals,300)", "") is True
-    assert is_on_demand_query(dataset, 'count_if(release,equals,"foo")', "") is False
+    def test_countif(self):
+        assert (
+            is_standard_metrics_compatible(
+                self.perf_metrics, "count_if(transaction.duration,equals,300)", ""
+            )
+            is False
+        )
+        assert (
+            is_standard_metrics_compatible(self.perf_metrics, 'count_if(release,equals,"foo")', "")
+            is True
+        )
 
 
 def test_spec_simple_query_count():