Просмотр исходного кода

ref(MEP): Add new option to query tag values as strings from clickhouse (#36397)

Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
Markus Unterwaditzer 2 лет назад
Родитель
Сommit
5ceaca6389

+ 52 - 0
.github/workflows/backend.yml

@@ -99,6 +99,58 @@ jobs:
       - name: Handle artifacts
         uses: ./.github/actions/artifacts
 
+  backend-test-snuba-contains-metrics-tag-values:
+    if: needs.files-changed.outputs.backend == 'true'
+    needs: files-changed
+    name: backend test (snuba contains metrics tag values)
+    runs-on: ubuntu-20.04
+    timeout-minutes: 20
+    strategy:
+      # This helps not having to run multiple jobs because one fails, thus, reducing resource usage
+      # and reducing the risk that one of many runs would turn red again (read: intermittent tests)
+      fail-fast: false
+      matrix:
+        python-version: [3.8.12]
+        # XXX: When updating this, make sure you also update MATRIX_INSTANCE_TOTAL.
+        instance: [0]
+        pg-version: ['9.6']
+
+    env:
+      # XXX: MATRIX_INSTANCE_TOTAL must be hardcoded to the length of strategy.matrix.instance.
+      MATRIX_INSTANCE_TOTAL: 1
+      MIGRATIONS_TEST_MIGRATE: 1
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          # Avoid codecov error message related to SHA resolution:
+          # https://github.com/codecov/codecov-bash/blob/7100762afbc822b91806a6574658129fe0d23a7d/codecov#L891
+          fetch-depth: '2'
+
+      - name: Setup sentry env (python ${{ matrix.python-version }})
+        uses: ./.github/actions/setup-sentry
+        id: setup
+        with:
+          python-version: ${{ matrix.python-version }}
+          pip-cache-version: ${{ secrets.PIP_CACHE_VERSION }}
+          snuba: true
+          # Right now, we run so few bigtable related tests that the
+          # overhead of running bigtable in all backend tests
+          # is way smaller than the time it would take to run in its own job.
+          bigtable: true
+          pg-version: ${{ matrix.pg-version }}
+
+      - name: Run backend test (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }})
+        run: |
+          # Note: `USE_SNUBA` is not used for backend tests because there are a few failing tests with Snuba enabled.
+          unset USE_SNUBA
+          export PYTEST_ADDOPTS="-m 'sentry_metrics and not broken_under_tags_values_as_strings'"
+          export SENTRY_METRICS_SIMULATE_TAG_VALUES_IN_CLICKHOUSE=1
+          make test-python-ci
+
+      - name: Handle artifacts
+        uses: ./.github/actions/artifacts
+
   cli:
     if: needs.files-changed.outputs.backend == 'true'
     needs: files-changed

+ 2 - 0
pyproject.toml

@@ -24,6 +24,8 @@ markers = [
     "snuba: mark a test as requiring snuba",
     "itunes: test requires iTunes interaction, skipped unless --itunes is provided",
     "getsentryllc: test requires credentials for the GetSentry LLC organisation in Apple App Store Connect",
+    "sentry_metrics: test requires access to sentry metrics",
+    "broken_under_tags_values_as_strings: tests that currently fail if tags values are strings in clickhouse. this is mainly because they are not going to the right dataset",
 ]
 selenium_driver = "chrome"
 filterwarnings = [

+ 3 - 0
src/sentry/options/defaults.py

@@ -454,3 +454,6 @@ register("sentry-metrics.last-seen-updater.accept-rate", default=0.0)
 register("api.deprecation.brownout-cron", default="0 12 * * *", type=String)
 # Brownout duration to be stored in ISO8601 format for durations (See https://en.wikipedia.org/wiki/ISO_8601#Durations)
 register("api.deprecation.brownout-duration", default="PT1M")
+
+# switch all metrics usage over to using strings for tag values
+register("sentry-metrics.performance.tags-values-are-strings", default=False)

+ 12 - 2
src/sentry/search/events/builder.py

@@ -16,6 +16,7 @@ from snuba_sdk.function import CurriedFunction, Function
 from snuba_sdk.orderby import Direction, LimitBy, OrderBy
 from snuba_sdk.query import Query
 
+from sentry import options
 from sentry.api.event_search import (
     AggregateFilter,
     ParenExpression,
@@ -1678,7 +1679,12 @@ class MetricsQueryBuilder(QueryBuilder):
         tag_id = self.resolve_metric_index(col)
         if tag_id is None:
             raise InvalidSearchQuery(f"Unknown field: {col}")
-        return f"tags[{tag_id}]"
+        if self.is_performance and options.get(
+            "sentry-metrics.performance.tags-values-are-strings"
+        ):
+            return f"tags_raw[{tag_id}]"
+        else:
+            return f"tags[{tag_id}]"
 
     def column(self, name: str) -> Column:
         """Given an unresolved sentry name and return a snql column.
@@ -1853,7 +1859,11 @@ class MetricsQueryBuilder(QueryBuilder):
 
         return self._indexer_cache[value]
 
-    def _resolve_tag_value(self, value: str) -> int:
+    def _resolve_tag_value(self, value: str) -> Union[int, str]:
+        if self.is_performance and options.get(
+            "sentry-metrics.performance.tags-values-are-strings"
+        ):
+            return value
         if self.dry_run:
             return -1
         result = self.resolve_metric_index(value)

+ 2 - 2
src/sentry/search/events/datasets/field_aliases.py

@@ -8,8 +8,8 @@ from sentry.models import Project, ProjectTeam
 from sentry.search.events import constants, fields
 from sentry.search.events.builder import QueryBuilder
 from sentry.search.events.types import SelectType
-from sentry.sentry_metrics import indexer
 from sentry.sentry_metrics.configuration import UseCaseKey
+from sentry.sentry_metrics.utils import resolve_tag_value
 from sentry.utils.numbers import format_grouped_length
 
 
@@ -48,7 +48,7 @@ def resolve_team_key_transaction_alias(
     count = len(team_key_transactions)
     if resolve_metric_index:
         team_key_transactions = [
-            (project, indexer.resolve(org_id, transaction, UseCaseKey.PERFORMANCE))
+            (project, resolve_tag_value(UseCaseKey.PERFORMANCE, org_id, transaction))
             for project, transaction in team_key_transactions
         ]
 

+ 15 - 7
src/sentry/search/events/datasets/metrics.py

@@ -4,6 +4,7 @@ from typing import Any, Callable, Dict, List, Mapping, Optional, Union
 
 from snuba_sdk import AliasedExpression, Column, Function
 
+from sentry import options
 from sentry.api.event_search import SearchFilter
 from sentry.exceptions import IncompatibleMetricsQuery, InvalidSearchQuery
 from sentry.search.events import constants, fields
@@ -52,6 +53,13 @@ class MetricsDatasetConfig(DatasetConfig):
         self.builder.metric_ids.add(metric_id)
         return metric_id
 
+    def resolve_tag_value(self, value: str) -> Union[str, int]:
+        if self.builder.is_performance and options.get(
+            "sentry-metrics.performance.tags-values-are-strings"
+        ):
+            return value
+        return self.resolve_value(value)
+
     def resolve_value(self, value: str) -> int:
         if self.builder.dry_run:
             return -1
@@ -306,7 +314,7 @@ class MetricsDatasetConfig(DatasetConfig):
                     calculated_args=[
                         {
                             "name": "resolved_val",
-                            "fn": lambda args: self.resolve_value(args["if_val"]),
+                            "fn": lambda args: self.resolve_tag_value(args["if_val"]),
                         }
                     ],
                     snql_counter=lambda args, alias: Function(
@@ -370,7 +378,7 @@ class MetricsDatasetConfig(DatasetConfig):
                     calculated_args=[
                         {
                             "name": "resolved_val",
-                            "fn": lambda args: self.resolve_value(args["if_val"]),
+                            "fn": lambda args: self.resolve_tag_value(args["if_val"]),
                         }
                     ],
                     snql_set=lambda args, alias: Function(
@@ -588,8 +596,8 @@ class MetricsDatasetConfig(DatasetConfig):
         _: Mapping[str, Union[str, Column, SelectType, int, float]],
         alias: Optional[str] = None,
     ) -> SelectType:
-        metric_satisfied = self.resolve_value(constants.METRIC_SATISFIED_TAG_VALUE)
-        metric_tolerated = self.resolve_value(constants.METRIC_TOLERATED_TAG_VALUE)
+        metric_satisfied = self.resolve_tag_value(constants.METRIC_SATISFIED_TAG_VALUE)
+        metric_tolerated = self.resolve_tag_value(constants.METRIC_TOLERATED_TAG_VALUE)
 
         # Nothing is satisfied or tolerated, the score must be 0
         if metric_satisfied is None and metric_tolerated is None:
@@ -654,7 +662,7 @@ class MetricsDatasetConfig(DatasetConfig):
         args: Mapping[str, Union[str, Column, SelectType, int, float]],
         alias: Optional[str] = None,
     ) -> SelectType:
-        metric_frustrated = self.resolve_value(constants.METRIC_FRUSTRATED_TAG_VALUE)
+        metric_frustrated = self.resolve_tag_value(constants.METRIC_FRUSTRATED_TAG_VALUE)
 
         # Nobody is miserable, we can return 0
         if metric_frustrated is None:
@@ -724,7 +732,7 @@ class MetricsDatasetConfig(DatasetConfig):
         _: Mapping[str, Union[str, Column, SelectType, int, float]],
         alias: Optional[str] = None,
     ) -> SelectType:
-        statuses = [self.resolve_value(status) for status in constants.NON_FAILURE_STATUS]
+        statuses = [self.resolve_tag_value(status) for status in constants.NON_FAILURE_STATUS]
         return self._resolve_count_if(
             Function(
                 "equals",
@@ -812,7 +820,7 @@ class MetricsDatasetConfig(DatasetConfig):
                 alias,
             )
 
-        quality_id = self.resolve_value(quality)
+        quality_id = self.resolve_tag_value(quality)
         if quality_id is None:
             return Function(
                 # This matches the type from doing `select toTypeName(count()) ...` from clickhouse

+ 46 - 2
src/sentry/sentry_metrics/utils.py

@@ -1,5 +1,6 @@
-from typing import Optional, Sequence
+from typing import Optional, Sequence, Union
 
+from sentry import options
 from sentry.api.utils import InvalidParams
 from sentry.sentry_metrics import indexer
 from sentry.sentry_metrics.configuration import UseCaseKey
@@ -15,6 +16,21 @@ class MetricIndexNotFound(InvalidParams):  # type: ignore
     pass
 
 
+def reverse_resolve_tag_value(
+    use_case_id: UseCaseKey, index: Union[int, str, None], weak: bool = False
+) -> Optional[str]:
+    # XXX(markus): Normally there would be a check for the option
+    # "sentry-metrics.performance.tags-values-are-strings", but this function
+    # is sometimes called with metric IDs for reasons I haven't figured out.
+    if isinstance(index, str) or index is None:
+        return index
+    else:
+        if weak:
+            return reverse_resolve_weak(use_case_id, index)
+        else:
+            return reverse_resolve(use_case_id, index)
+
+
 def reverse_resolve(use_case_id: UseCaseKey, index: int) -> str:
     assert index > 0
     resolved = indexer.reverse_resolve(index, use_case_id=use_case_id)
@@ -53,7 +69,35 @@ def resolve(
 
 def resolve_tag_key(use_case_id: UseCaseKey, org_id: int, string: str) -> str:
     resolved = resolve(use_case_id, org_id, string)
-    return f"tags[{resolved}]"
+    assert use_case_id in (UseCaseKey.PERFORMANCE, UseCaseKey.RELEASE_HEALTH)
+    if use_case_id == UseCaseKey.PERFORMANCE and options.get(
+        "sentry-metrics.performance.tags-values-are-strings"
+    ):
+        return f"tags_raw[{resolved}]"
+    else:
+        return f"tags[{resolved}]"
+
+
+def resolve_tag_value(use_case_id: UseCaseKey, org_id: int, string: str) -> Union[str, int]:
+    assert isinstance(string, str)
+    assert use_case_id in (UseCaseKey.PERFORMANCE, UseCaseKey.RELEASE_HEALTH)
+    if use_case_id == UseCaseKey.PERFORMANCE and options.get(
+        "sentry-metrics.performance.tags-values-are-strings"
+    ):
+        return string
+    return resolve_weak(use_case_id, org_id, string)
+
+
+def resolve_tag_values(
+    use_case_id: UseCaseKey, org_id: int, strings: Sequence[str]
+) -> Sequence[Union[str, int]]:
+    rv = []
+    for string in strings:
+        resolved = resolve_tag_value(use_case_id, org_id, string)
+        if resolved != STRING_NOT_FOUND:
+            rv.append(resolved)
+
+    return rv
 
 
 def resolve_weak(use_case_id: UseCaseKey, org_id: int, string: str) -> int:

+ 10 - 6
src/sentry/snuba/entity_subscription.py

@@ -27,10 +27,10 @@ from sentry.sentry_metrics.configuration import UseCaseKey
 from sentry.sentry_metrics.utils import (
     MetricIndexNotFound,
     resolve,
-    resolve_many_weak,
     resolve_tag_key,
-    resolve_weak,
-    reverse_resolve,
+    resolve_tag_value,
+    resolve_tag_values,
+    reverse_resolve_tag_value,
 )
 from sentry.snuba.dataset import EntityKey
 from sentry.snuba.metrics.naming_layer.mri import SessionMRI
@@ -352,7 +352,7 @@ class BaseMetricsEntitySubscription(BaseEntitySubscription, ABC):
                 Condition(
                     Column(resolve_tag_key(UseCaseKey.RELEASE_HEALTH, self.org_id, "environment")),
                     Op.EQ,
-                    resolve_weak(UseCaseKey.RELEASE_HEALTH, self.org_id, environment.name),
+                    resolve_tag_value(UseCaseKey.RELEASE_HEALTH, self.org_id, environment.name),
                 )
             )
         qb.add_conditions(extra_conditions)
@@ -426,7 +426,11 @@ class BaseCrashRateMetricsEntitySubscription(BaseMetricsEntitySubscription):
             translated_data: Dict[str, Any] = {}
             session_status = resolve_tag_key(UseCaseKey.RELEASE_HEALTH, org_id, "session.status")
             for row in data:
-                tag_value = reverse_resolve(UseCaseKey.RELEASE_HEALTH, row[session_status])
+                tag_value = reverse_resolve_tag_value(
+                    UseCaseKey.RELEASE_HEALTH, row[session_status]
+                )
+                if tag_value is None:
+                    raise MetricIndexNotFound()
                 translated_data[tag_value] = row[value_col_name]
 
             total_session_count = translated_data.get("init", 0)
@@ -533,7 +537,7 @@ class MetricsCountersEntitySubscription(BaseCrashRateMetricsEntitySubscription):
             Condition(
                 Column(self.session_status),
                 Op.IN,
-                resolve_many_weak(UseCaseKey.RELEASE_HEALTH, self.org_id, ["crashed", "init"]),
+                resolve_tag_values(UseCaseKey.RELEASE_HEALTH, self.org_id, ["crashed", "init"]),
             )
         )
         return extra_conditions

+ 15 - 9
src/sentry/snuba/metrics/datasource.py

@@ -22,7 +22,12 @@ from sentry.api.utils import InvalidParams
 from sentry.models import Organization, Project
 from sentry.sentry_metrics import indexer
 from sentry.sentry_metrics.configuration import UseCaseKey
-from sentry.sentry_metrics.utils import resolve_tag_key, reverse_resolve
+from sentry.sentry_metrics.utils import (
+    MetricIndexNotFound,
+    resolve_tag_key,
+    reverse_resolve,
+    reverse_resolve_tag_value,
+)
 from sentry.snuba.dataset import Dataset, EntityKey
 from sentry.snuba.metrics.fields import run_metrics_query
 from sentry.snuba.metrics.fields.base import get_derived_metrics, org_id_from_projects
@@ -344,9 +349,9 @@ def _fetch_tags_or_values_per_ids(
 
         for row in rows:
             metric_id = row["metric_id"]
-            if column.startswith("tags["):
+            if column.startswith(("tags[", "tags_raw[")):
                 value_id = row[column]
-                if value_id > 0:
+                if value_id not in (None, 0):
                     tag_or_value_ids_per_metric_id[metric_id].append(value_id)
             else:
                 tag_or_value_ids_per_metric_id[metric_id].extend(row[column])
@@ -388,12 +393,12 @@ def _fetch_tags_or_values_per_ids(
     else:
         tag_or_value_ids = {tag_id for ids in tag_or_value_id_lists for tag_id in ids}
 
-    if column.startswith("tags["):
-        tag_id = column.split("tags[")[1].split("]")[0]
+    if column.startswith(("tags[", "tags_raw[")):
+        tag_id = column.split("[")[1].split("]")[0]
         tags_or_values = [
             {
                 "key": reverse_resolve(use_case_id, int(tag_id)),
-                "value": reverse_resolve(use_case_id, value_id),
+                "value": reverse_resolve_tag_value(use_case_id, value_id),
             }
             for value_id in tag_or_value_ids
         ]
@@ -477,18 +482,19 @@ def get_tag_values(
     assert projects
 
     org_id = org_id_from_projects(projects)
-    tag_id = indexer.resolve(org_id, tag_name, use_case_id=use_case_id)
 
     if tag_name in UNALLOWED_TAGS:
         raise InvalidParams(f"Tag name {tag_name} is an unallowed tag")
 
-    if tag_id is None:
+    try:
+        tag_id = resolve_tag_key(use_case_id, org_id, tag_name)
+    except MetricIndexNotFound:
         raise InvalidParams(f"Tag {tag_name} is not available in the indexer")
 
     try:
         tags, _ = _fetch_tags_or_values_per_ids(
             projects=projects,
-            column=f"tags[{tag_id}]",
+            column=tag_id,
             metric_names=metric_names,
             referrer="snuba.metrics.meta.get_tag_values",
             use_case_id=use_case_id,

+ 21 - 13
src/sentry/snuba/metrics/fields/snql.py

@@ -3,7 +3,7 @@ from typing import List
 from snuba_sdk import Column, Function
 
 from sentry.sentry_metrics.configuration import UseCaseKey
-from sentry.sentry_metrics.utils import resolve_weak
+from sentry.sentry_metrics.utils import resolve_tag_key, resolve_tag_value, resolve_tag_values
 from sentry.snuba.metrics.naming_layer.public import (
     TransactionSatisfactionTagValue,
     TransactionStatusTagValue,
@@ -24,9 +24,13 @@ def _aggregation_on_session_status_func_factory(aggregate):
                             "equals",
                             [
                                 Column(
-                                    f"tags[{resolve_weak(UseCaseKey.RELEASE_HEALTH, org_id, 'session.status')}]"
+                                    resolve_tag_key(
+                                        UseCaseKey.RELEASE_HEALTH, org_id, "session.status"
+                                    )
+                                ),
+                                resolve_tag_value(
+                                    UseCaseKey.RELEASE_HEALTH, org_id, session_status
                                 ),
-                                resolve_weak(UseCaseKey.RELEASE_HEALTH, org_id, session_status),
                             ],
                         ),
                         Function("in", [Column("metric_id"), list(metric_ids)]),
@@ -63,11 +67,11 @@ def _aggregation_on_tx_status_func_factory(aggregate):
             return metric_match
 
         tx_col = Column(
-            f"tags[{resolve_weak(UseCaseKey.PERFORMANCE, org_id, TransactionTagsKey.TRANSACTION_STATUS.value)}]"
+            resolve_tag_key(
+                UseCaseKey.PERFORMANCE, org_id, TransactionTagsKey.TRANSACTION_STATUS.value
+            )
         )
-        excluded_statuses = [
-            resolve_weak(UseCaseKey.PERFORMANCE, org_id, s) for s in exclude_tx_statuses
-        ]
+        excluded_statuses = resolve_tag_values(UseCaseKey.PERFORMANCE, org_id, exclude_tx_statuses)
         exclude_tx_statuses = Function(
             "notIn",
             [
@@ -115,9 +119,15 @@ def _aggregation_on_tx_satisfaction_func_factory(aggregate):
                             "equals",
                             [
                                 Column(
-                                    f"tags[{resolve_weak(UseCaseKey.PERFORMANCE, org_id, TransactionTagsKey.TRANSACTION_SATISFACTION.value)}]"
+                                    resolve_tag_key(
+                                        UseCaseKey.PERFORMANCE,
+                                        org_id,
+                                        TransactionTagsKey.TRANSACTION_SATISFACTION.value,
+                                    )
+                                ),
+                                resolve_tag_value(
+                                    UseCaseKey.PERFORMANCE, org_id, satisfaction_value
                                 ),
-                                resolve_weak(UseCaseKey.PERFORMANCE, org_id, satisfaction_value),
                             ],
                         ),
                         Function("in", [Column("metric_id"), list(metric_ids)]),
@@ -289,10 +299,8 @@ def session_duration_filters(org_id):
         Function(
             "equals",
             (
-                Column(
-                    f"tags[{resolve_weak(UseCaseKey.RELEASE_HEALTH, org_id, 'session.status')}]"
-                ),
-                resolve_weak(UseCaseKey.RELEASE_HEALTH, org_id, "exited"),
+                Column(resolve_tag_key(UseCaseKey.RELEASE_HEALTH, org_id, "session.status")),
+                resolve_tag_value(UseCaseKey.RELEASE_HEALTH, org_id, "exited"),
             ),
         )
     ]

Некоторые файлы не были показаны из-за большого количества измененных файлов