Browse Source

chore(metrics): Remove tag values are strings option (#41092)

- This removes the tag value option since we're now fully on using tag
values as strings instead of indexed integers
- This is needed so we can start on wildcard searching
William Mak 2 years ago
parent
commit
4c9c03f8a9

+ 0 - 48
.github/workflows/backend.yml

@@ -107,54 +107,6 @@ jobs:
       - name: Handle artifacts
         uses: ./.github/actions/artifacts
 
-  backend-test-snuba-contains-metrics-tag-values:
-    if: needs.files-changed.outputs.backend == 'true'
-    needs: files-changed
-    name: backend test (snuba contains metrics tag values)
-    runs-on: ubuntu-20.04
-    timeout-minutes: 40
-    strategy:
-      # This helps not having to run multiple jobs because one fails, thus, reducing resource usage
-      # and reducing the risk that one of many runs would turn red again (read: intermittent tests)
-      fail-fast: false
-      matrix:
-        # XXX: When updating this, make sure you also update MATRIX_INSTANCE_TOTAL.
-        instance: [0]
-        pg-version: ['9.6']
-
-    env:
-      # XXX: MATRIX_INSTANCE_TOTAL must be hardcoded to the length of strategy.matrix.instance.
-      MATRIX_INSTANCE_TOTAL: 1
-      MIGRATIONS_TEST_MIGRATE: 1
-
-    steps:
-      - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8  # v3.1.0
-        with:
-          # Avoid codecov error message related to SHA resolution:
-          # https://github.com/codecov/codecov-bash/blob/7100762afbc822b91806a6574658129fe0d23a7d/codecov#L891
-          fetch-depth: '2'
-
-      - name: Setup sentry env
-        uses: ./.github/actions/setup-sentry
-        id: setup
-        with:
-          snuba: true
-          # Right now, we run so few bigtable related tests that the
-          # overhead of running bigtable in all backend tests
-          # is way smaller than the time it would take to run in its own job.
-          bigtable: true
-          pg-version: ${{ matrix.pg-version }}
-
-      - name: Run backend test (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }})
-        run: |
-          export PYTEST_ADDOPTS="-m 'sentry_metrics and not broken_under_tags_values_as_strings'"
-          export SENTRY_METRICS_SIMULATE_TAG_VALUES_IN_CLICKHOUSE=1
-          make test-python-ci
-          make test-snuba
-
-      - name: Handle artifacts
-        uses: ./.github/actions/artifacts
-
   cli:
     if: needs.files-changed.outputs.backend == 'true'
     needs: files-changed

+ 0 - 3
src/sentry/options/defaults.py

@@ -460,9 +460,6 @@ register("api.deprecation.brownout-cron", default="0 12 * * *", type=String)
 # Brownout duration to be stored in ISO8601 format for durations (See https://en.wikipedia.org/wiki/ISO_8601#Durations)
 register("api.deprecation.brownout-duration", default="PT1M")
 
-# switch all metrics usage over to using strings for tag values
-register("sentry-metrics.performance.tags-values-are-strings", default=False)
-
 # Flag to determine whether performance metrics indexer should index tag
 # values or not
 register("sentry-metrics.performance.index-tag-values", default=True)

+ 3 - 45
src/sentry/search/events/builder/metrics.py

@@ -22,14 +22,12 @@ from snuba_sdk import (
     Request,
 )
 
-from sentry import options
 from sentry.api.event_search import SearchFilter
 from sentry.exceptions import IncompatibleMetricsQuery, InvalidSearchQuery
 from sentry.search.events import constants, fields
 from sentry.search.events.builder import QueryBuilder
 from sentry.search.events.filter import ParsedTerms
 from sentry.search.events.types import (
-    EventsResponse,
     HistogramParams,
     ParamsType,
     QueryFramework,
@@ -60,9 +58,6 @@ class MetricsQueryBuilder(QueryBuilder):
         self.metric_ids: Set[int] = set()
         self.allow_metric_aggregates = allow_metric_aggregates
         self._indexer_cache: Dict[str, Optional[int]] = {}
-        self.tag_values_are_strings = options.get(
-            "sentry-metrics.performance.tags-values-are-strings"
-        )
         # Don't do any of the actions that would impact performance in anyway
         # Skips all indexer checks, and won't interact with clickhouse
         self.dry_run = dry_run
@@ -142,7 +137,7 @@ class MetricsQueryBuilder(QueryBuilder):
         tag_id = self.resolve_metric_index(col)
         if tag_id is None:
             raise InvalidSearchQuery(f"Unknown field: {col}")
-        if self.is_performance and self.tag_values_are_strings:
+        if self.is_performance:
             return f"tags_raw[{tag_id}]"
         else:
             return f"tags[{tag_id}]"
@@ -330,7 +325,7 @@ class MetricsQueryBuilder(QueryBuilder):
         return self._indexer_cache[value]
 
     def resolve_tag_value(self, value: str) -> Optional[Union[int, str]]:
-        if self.is_performance and self.tag_values_are_strings or self.use_metrics_layer:
+        if self.is_performance or self.use_metrics_layer:
             return value
         if self.dry_run:
             return -1
@@ -415,10 +410,8 @@ class MetricsQueryBuilder(QueryBuilder):
         for value in sorted_values:
             if value:
                 values.append(self._resolve_environment_filter_value(value))
-            elif self.tag_values_are_strings:
-                values.append("")
             else:
-                values.append(0)
+                values.append("")
         values.sort()
         environment = self.column("environment")
         if len(values) == 1:
@@ -738,41 +731,6 @@ class MetricsQueryBuilder(QueryBuilder):
 
         return result
 
-    def process_results(self, results: Any) -> EventsResponse:
-        """Go through the results of a metrics query and reverse resolve its tags"""
-        processed_results: EventsResponse = super().process_results(results)
-        tags: List[str] = []
-        cached_resolves: Dict[int, Optional[str]] = {}
-        # no-op if they're already strings
-        if self.tag_values_are_strings:
-            return processed_results
-
-        with sentry_sdk.start_span(op="mep", description="resolve_tags"):
-            for column in self.columns:
-                if (
-                    isinstance(column, AliasedExpression)
-                    and column.exp.subscriptable == "tags"
-                    and column.alias
-                ):
-                    tags.append(column.alias)
-                # transaction is a special case since we use a transform null & unparam
-                if column.alias in ["transaction", "title"]:
-                    tags.append(column.alias)
-
-            for tag in tags:
-                for row in processed_results["data"]:
-                    if isinstance(row[tag], int):
-                        if row[tag] not in cached_resolves:
-                            resolved_tag = indexer.reverse_resolve(
-                                UseCaseKey.PERFORMANCE, self.organization_id, row[tag]
-                            )
-                            cached_resolves[row[tag]] = resolved_tag
-                        row[tag] = cached_resolves[row[tag]]
-                if tag in processed_results["meta"]["fields"]:
-                    processed_results["meta"]["fields"][tag] = "string"
-
-        return processed_results
-
     @staticmethod
     def get_default_value(meta_type: str) -> Any:
         """Given a meta type return the expected default type

+ 1 - 4
src/sentry/search/events/datasets/function_aliases.py

@@ -2,7 +2,6 @@ from typing import Callable, Optional, Sequence, Union
 
 from snuba_sdk import Column, Function
 
-from sentry import options
 from sentry.exceptions import InvalidSearchQuery
 from sentry.models.transaction_threshold import (
     TRANSACTION_METRICS,
@@ -93,9 +92,7 @@ def resolve_project_threshold_config(
         project_threshold_override_config_keys.append(
             (
                 Function("toUInt64", [project_id]),
-                transaction_id
-                if options.get("sentry-metrics.performance.tags-values-are-strings")
-                else Function("toUInt64", [transaction_id]),
+                transaction_id,
             )
         )
         project_threshold_override_config_values.append(metric)

+ 3 - 5
src/sentry/search/events/datasets/metrics.py

@@ -183,7 +183,7 @@ class MetricsDatasetConfig(DatasetConfig):
                                         "equals",
                                         [
                                             self.builder.column("transaction"),
-                                            "" if self.builder.tag_values_are_strings else 0,
+                                            "",
                                         ],
                                     ),
                                 ],
@@ -216,9 +216,7 @@ class MetricsDatasetConfig(DatasetConfig):
                                                 "notEquals",
                                                 [
                                                     self.builder.column("transaction"),
-                                                    ""
-                                                    if self.builder.tag_values_are_strings
-                                                    else 0,
+                                                    "",
                                                 ],
                                             ),
                                             Function(
@@ -651,7 +649,7 @@ class MetricsDatasetConfig(DatasetConfig):
             "transform",
             [
                 Column(self.builder.resolve_column_name("transaction")),
-                [0 if not self.builder.tag_values_are_strings else ""],
+                [""],
                 [self.builder.resolve_tag_value("<< unparameterized >>")],
             ],
             alias,

+ 2 - 10
src/sentry/sentry_metrics/utils.py

@@ -1,6 +1,5 @@
 from typing import Optional, Sequence, Union
 
-from sentry import options
 from sentry.api.utils import InvalidParams
 from sentry.sentry_metrics import indexer
 from sentry.sentry_metrics.configuration import UseCaseKey
@@ -19,9 +18,6 @@ class MetricIndexNotFound(InvalidParams):
 def reverse_resolve_tag_value(
     use_case_id: UseCaseKey, org_id: int, index: Union[int, str, None], weak: bool = False
 ) -> Optional[str]:
-    # XXX(markus): Normally there would be a check for the option
-    # "sentry-metrics.performance.tags-values-are-strings", but this function
-    # is sometimes called with metric IDs for reasons I haven't figured out.
     if isinstance(index, str) or index is None:
         return index
     else:
@@ -70,9 +66,7 @@ def resolve(
 def resolve_tag_key(use_case_id: UseCaseKey, org_id: int, string: str) -> str:
     resolved = resolve(use_case_id, org_id, string)
     assert use_case_id in (UseCaseKey.PERFORMANCE, UseCaseKey.RELEASE_HEALTH)
-    if use_case_id == UseCaseKey.PERFORMANCE and options.get(
-        "sentry-metrics.performance.tags-values-are-strings"
-    ):
+    if use_case_id == UseCaseKey.PERFORMANCE:
         return f"tags_raw[{resolved}]"
     else:
         return f"tags[{resolved}]"
@@ -81,9 +75,7 @@ def resolve_tag_key(use_case_id: UseCaseKey, org_id: int, string: str) -> str:
 def resolve_tag_value(use_case_id: UseCaseKey, org_id: int, string: str) -> Union[str, int]:
     assert isinstance(string, str)
     assert use_case_id in (UseCaseKey.PERFORMANCE, UseCaseKey.RELEASE_HEALTH)
-    if use_case_id == UseCaseKey.PERFORMANCE and options.get(
-        "sentry-metrics.performance.tags-values-are-strings"
-    ):
+    if use_case_id == UseCaseKey.PERFORMANCE:
         return string
     return resolve_weak(use_case_id, org_id, string)
 

+ 2 - 7
src/sentry/snuba/metrics/fields/snql.py

@@ -2,7 +2,6 @@ from typing import List, Optional, Sequence, Set
 
 from snuba_sdk import Column, Function
 
-from sentry import options
 from sentry.api.utils import InvalidParams
 from sentry.search.events.datasets.function_aliases import resolve_project_threshold_config
 from sentry.sentry_metrics.configuration import UseCaseKey
@@ -486,9 +485,7 @@ def count_transaction_name_snql_factory(aggregate_filter, org_id, transaction_na
                 UseCaseKey.PERFORMANCE, org_id, "<< unparameterized >>"
             )
         elif transaction_name_identifier == is_null:
-            inner_tag_value = (
-                "" if options.get("sentry-metrics.performance.tags-values-are-strings") else 0
-            )
+            inner_tag_value = ""
         else:
             raise InvalidParams("Invalid condition for tag value filter")
 
@@ -560,8 +557,6 @@ def team_key_transaction_snql(org_id, team_key_condition_rhs, alias=None):
 
 
 def transform_null_to_unparameterized_snql(org_id, tag_key, alias=None):
-    tags_values_are_strings = options.get("sentry-metrics.performance.tags-values-are-strings")
-
     return Function(
         "transform",
         [
@@ -569,7 +564,7 @@ def transform_null_to_unparameterized_snql(org_id, tag_key, alias=None):
             # Here we support the case in which the given tag value for "tag_key" is not set. In that
             # case ClickHouse will return 0 or "" from the expression based on the array type, and we want to interpret
             # that as "<< unparameterized >>".
-            ["" if tags_values_are_strings else 0],
+            [""],
             [resolve_tag_value(UseCaseKey.PERFORMANCE, org_id, "<< unparameterized >>")],
         ],
         alias,

+ 1 - 40
src/sentry/snuba/metrics_performance.py

@@ -1,8 +1,7 @@
 from datetime import timedelta
-from typing import Any, Dict, List, Optional, Sequence
+from typing import Dict, List, Optional, Sequence
 
 import sentry_sdk
-from snuba_sdk import AliasedExpression
 
 from sentry.discover.arithmetic import categorize_columns
 from sentry.search.events.builder import (
@@ -11,48 +10,10 @@ from sentry.search.events.builder import (
     TimeseriesMetricQueryBuilder,
 )
 from sentry.search.events.fields import get_function_alias
-from sentry.sentry_metrics import indexer
-from sentry.sentry_metrics.configuration import UseCaseKey
 from sentry.snuba import discover
 from sentry.utils.snuba import Dataset, SnubaTSResult
 
 
-def resolve_tags(results: Any, query_definition: MetricsQueryBuilder) -> Any:
-    """Go through the results of a metrics query and reverse resolve its tags"""
-    if query_definition.use_metrics_layer:
-        return results
-    tags: List[str] = []
-    cached_resolves: Dict[int, str] = {}
-    # no-op if they're already strings
-    if query_definition.tag_values_are_strings:
-        return results
-
-    with sentry_sdk.start_span(op="mep", description="resolve_tags"):
-        for column in query_definition.columns:
-            if (
-                isinstance(column, AliasedExpression)
-                and column.exp.subscriptable == "tags"
-                and column.alias
-            ):
-                tags.append(column.alias)
-            # transaction is a special case since we use a transform null & unparam
-            if column.alias in ["transaction", "title"]:
-                tags.append(column.alias)
-
-        for tag in tags:
-            for row in results["data"]:
-                if row[tag] not in cached_resolves:
-                    resolved_tag = indexer.reverse_resolve(
-                        UseCaseKey.PERFORMANCE, query_definition.organization_id, row[tag]
-                    )
-                    cached_resolves[row[tag]] = resolved_tag
-                row[tag] = cached_resolves[row[tag]]
-            if tag in results["meta"]["fields"]:
-                results["meta"]["fields"][tag] = "string"
-
-    return results
-
-
 def query(
     selected_columns,
     query,

+ 2 - 13
src/sentry/utils/pytest/metrics.py

@@ -1,6 +1,5 @@
 import dataclasses
 import functools
-import os
 
 import pytest
 
@@ -26,9 +25,6 @@ def control_metrics_access(monkeypatch, request, set_sentry_option):
     from sentry.snuba import tasks
     from sentry.utils import snuba
 
-    # Explicitly set option to avoid hitting DB from within resolve_tag_values
-    set_sentry_option("sentry-metrics.performance.tags-values-are-strings", False)
-
     if "sentry_metrics" in {mark.name for mark in request.node.iter_markers()}:
         mock_indexer = MockIndexer()
         monkeypatch.setattr("sentry.sentry_metrics.indexer.backend", mock_indexer)
@@ -39,18 +35,11 @@ def control_metrics_access(monkeypatch, request, set_sentry_option):
             "sentry.sentry_metrics.indexer.reverse_resolve", mock_indexer.reverse_resolve
         )
 
-        tag_values_are_strings = (
-            os.environ.get("SENTRY_METRICS_SIMULATE_TAG_VALUES_IN_CLICKHOUSE") == "1"
-        )
-        set_sentry_option(
-            "sentry-metrics.performance.tags-values-are-strings", tag_values_are_strings
-        )
         old_resolve = indexer.resolve
 
         def new_resolve(use_case_id, org_id, string):
             if (
                 use_case_id == UseCaseKey.PERFORMANCE
-                and tag_values_are_strings
                 and string in STRINGS_THAT_LOOK_LIKE_TAG_VALUES
             ):
                 pytest.fail(
@@ -71,7 +60,7 @@ def control_metrics_access(monkeypatch, request, set_sentry_option):
             is_metrics = "metrics" in query.match.name
 
             if is_performance_metrics:
-                _validate_query(query, tag_values_are_strings)
+                _validate_query(query, True)
             elif is_metrics:
                 _validate_query(query, False)
 
@@ -87,7 +76,7 @@ def control_metrics_access(monkeypatch, request, set_sentry_option):
             is_metrics = "metrics" in query.match.name
 
             if is_performance_metrics:
-                _validate_query(query, tag_values_are_strings)
+                _validate_query(query, True)
             elif is_metrics:
                 _validate_query(query, False)
 

+ 4 - 94
tests/sentry/search/events/builder/test_metrics.py

@@ -1,6 +1,5 @@
 import datetime
 import math
-import re
 from typing import List
 from unittest import mock
 
@@ -8,7 +7,6 @@ import pytest
 from django.utils import timezone
 from snuba_sdk import AliasedExpression, Column, Condition, Function, Op
 
-from sentry import options
 from sentry.exceptions import IncompatibleMetricsQuery
 from sentry.search.events import constants
 from sentry.search.events.builder import (
@@ -101,14 +99,7 @@ class MetricBuilderBaseTest(MetricsEnhancedPerformanceTestCase):
             Condition(Column("org_id"), Op.EQ, self.organization.id),
         ]
 
-        if not options.get("sentry-metrics.performance.tags-values-are-strings"):
-            for string in self.METRIC_STRINGS:
-                indexer.record(
-                    use_case_id=UseCaseKey.PERFORMANCE, org_id=self.organization.id, string=string
-                )
-            self.expected_tag_value_type = "UInt64"
-        else:
-            self.expected_tag_value_type = "String"
+        self.expected_tag_value_type = "String"
 
         indexer.record(
             use_case_id=UseCaseKey.PERFORMANCE, org_id=self.organization.id, string="transaction"
@@ -145,25 +136,14 @@ class MetricBuilderBaseTest(MetricsEnhancedPerformanceTestCase):
         )
 
     def build_transaction_transform(self, alias):
-        if not options.get("sentry-metrics.performance.tags-values-are-strings"):
-            tags_col = "tags"
-            unparam = indexer.resolve(
-                UseCaseKey.PERFORMANCE, self.organization.id, "<< unparameterized >>"
-            )
-            null = 0
-        else:
-            tags_col = "tags_raw"
-            unparam = "<< unparameterized >>"
-            null = ""
-
         return Function(
             "transform",
             [
                 Column(
-                    f"{tags_col}[{indexer.resolve(UseCaseKey.PERFORMANCE, self.organization.id, 'transaction')}]"
+                    f"tags_raw[{indexer.resolve(UseCaseKey.PERFORMANCE, self.organization.id, 'transaction')}]"
                 ),
-                [null],
-                [unparam],
+                [""],
+                ["<< unparameterized >>"],
             ],
             alias,
         )
@@ -435,35 +415,6 @@ class MetricQueryBuilderTest(MetricBuilderBaseTest):
             ],
         )
 
-    def test_missing_transaction_index(self):
-        if options.get("sentry-metrics.performance.tags-values-are-strings"):
-            pytest.skip("test does not apply if tag values are in clickhouse")
-
-        with pytest.raises(
-            IncompatibleMetricsQuery,
-            match=re.compile("Transaction value .* in filter not found"),
-        ):
-            MetricsQueryBuilder(
-                self.params,
-                query="transaction:something_else",
-                dataset=Dataset.PerformanceMetrics,
-                selected_columns=["transaction", "project", "p95(transaction.duration)"],
-            )
-
-    def test_missing_transaction_index_in_filter(self):
-        if options.get("sentry-metrics.performance.tags-values-are-strings"):
-            pytest.skip("test does not apply if tag values are in clickhouse")
-        with pytest.raises(
-            IncompatibleMetricsQuery,
-            match=re.compile("Transaction value .* in filter not found"),
-        ):
-            MetricsQueryBuilder(
-                self.params,
-                query="transaction:[something_else, something_else2]",
-                dataset=Dataset.PerformanceMetrics,
-                selected_columns=["transaction", "project", "p95(transaction.duration)"],
-            )
-
     def test_incorrect_parameter_for_metrics(self):
         with pytest.raises(IncompatibleMetricsQuery):
             MetricsQueryBuilder(
@@ -1498,14 +1449,6 @@ class MetricQueryBuilderTest(MetricBuilderBaseTest):
 
         expected = [mock.call(UseCaseKey.PERFORMANCE, self.organization.id, "transaction")]
 
-        if not options.get("sentry-metrics.performance.tags-values-are-strings"):
-            expected.append(
-                mock.call(UseCaseKey.PERFORMANCE, self.organization.id, "foo_transaction")
-            )
-            expected.append(
-                mock.call(UseCaseKey.PERFORMANCE, self.organization.id, "<< unparameterized >>")
-            )
-
         expected.extend(
             [
                 mock.call(
@@ -1517,9 +1460,6 @@ class MetricQueryBuilderTest(MetricBuilderBaseTest):
             ]
         )
 
-        if not options.get("sentry-metrics.performance.tags-values-are-strings"):
-            expected.append(mock.call(UseCaseKey.PERFORMANCE, self.organization.id, "good"))
-
         self.assertCountEqual(mock_indexer.mock_calls, expected)
 
     def test_custom_measurement_allowed(self):
@@ -1698,36 +1638,6 @@ class TimeseriesMetricQueryBuilderTest(MetricBuilderBaseTest):
             ],
         )
 
-    def test_missing_transaction_index(self):
-        if options.get("sentry-metrics.performance.tags-values-are-strings"):
-            pytest.skip("test does not apply if tag values are in clickhouse")
-        with pytest.raises(
-            IncompatibleMetricsQuery,
-            match=re.compile("Transaction value .* in filter not found"),
-        ):
-            TimeseriesMetricQueryBuilder(
-                self.params,
-                dataset=Dataset.PerformanceMetrics,
-                interval=900,
-                query="transaction:something_else",
-                selected_columns=["project", "p95(transaction.duration)"],
-            )
-
-    def test_missing_transaction_index_in_filter(self):
-        if options.get("sentry-metrics.performance.tags-values-are-strings"):
-            pytest.skip("test does not apply if tag values are in clickhouse")
-        with pytest.raises(
-            IncompatibleMetricsQuery,
-            match=re.compile("Transaction value .* in filter not found"),
-        ):
-            TimeseriesMetricQueryBuilder(
-                self.params,
-                dataset=Dataset.PerformanceMetrics,
-                interval=900,
-                query="transaction:[something_else, something_else2]",
-                selected_columns=["p95(transaction.duration)"],
-            )
-
     def test_project_filter(self):
         query = TimeseriesMetricQueryBuilder(
             self.params,

Some files were not shown because too many files changed in this diff