3 years ago · 669cccc8b3
--- a/.github/workflows/snuba-integration-test.yml
+++ b/.github/workflows/snuba-integration-test.yml
@@ -18,7 +18,7 @@ jobs:
 
															     env:
														
 
															       USE_SNUBA: 1
														
 
															       MIGRATIONS_TEST_MIGRATE: 1
														
 
															-      USE_REDIS_INDEXER: 1
														
 
															+      USE_INDEXER: 1
														
 
															     steps:
														
 
															       - uses: actions/checkout@v2
														
--- a/migrations_lockfile.txt
+++ b/migrations_lockfile.txt
@@ -6,5 +6,5 @@ To resolve this, rebase against latest master and regenerate your migration. Thi
 
															 will then be regenerated, and you should be able to merge without conflicts.
														
 
															 nodestore: 0002_nodestore_no_dictfield
														
 
															-sentry: 0234_grouphistory
														
 
															+sentry: 0235_add_metricskeyindexer_table
														
 
															 social_auth: 0001_initial
														
--- a/src/sentry/conf/server.py
+++ b/src/sentry/conf/server.py
@@ -334,6 +334,7 @@ INSTALLED_APPS = (
 
															     "sentry.analytics.events",
														
 
															     "sentry.nodestore",
														
 
															     "sentry.search",
														
 
															+    "sentry.sentry_metrics.indexer",
														
 
															     "sentry.snuba",
														
 
															     "sentry.lang.java.apps.Config",
														
 
															     "sentry.lang.javascript.apps.Config",
														
--- a/src/sentry/migrations/0235_add_metricskeyindexer_table.py
+++ b/src/sentry/migrations/0235_add_metricskeyindexer_table.py
@@ -0,0 +1,55 @@
 
															+# Generated by Django 2.2.24 on 2021-10-04 18:19
														
 
															+
														
 
															+import django.utils.timezone
														
 
															+from django.db import migrations, models
														
 
															+
														
 
															+import sentry.db.models.fields.bounded
														
 
															+
														
 
															+
														
 
															+class Migration(migrations.Migration):
														
 
															+    # This flag is used to mark that a migration shouldn't be automatically run in
														
 
															+    # production. We set this to True for operations that we think are risky and want
														
 
															+    # someone from ops to run manually and monitor.
														
 
															+    # General advice is that if in doubt, mark your migration as `is_dangerous`.
														
 
															+    # Some things you should always mark as dangerous:
														
 
															+    # - Large data migrations. Typically we want these to be run manually by ops so that
														
 
															+    #   they can be monitored. Since data migrations will now hold a transaction open
														
 
															+    #   this is even more important.
														
 
															+    # - Adding columns to highly active tables, even ones that are NULL.
														
 
															+    is_dangerous = False
														
 
															+
														
 
															+    # This flag is used to decide whether to run this migration in a transaction or not.
														
 
															+    # By default we prefer to run in a transaction, but for migrations where you want
														
 
															+    # to `CREATE INDEX CONCURRENTLY` this needs to be set to False. Typically you'll
														
 
															+    # want to create an index concurrently when adding one to an existing table.
														
 
															+    # You'll also usually want to set this to `False` if you're writing a data
														
 
															+    # migration, since we don't want the entire migration to run in one long-running
														
 
															+    # transaction.
														
 
															+    atomic = True
														
 
															+
														
 
															+    dependencies = [
														
 
															+        ("sentry", "0234_grouphistory"),
														
 
															+    ]
														
 
															+
														
 
															+    operations = [
														
 
															+        migrations.CreateModel(
														
 
															+            name="MetricsKeyIndexer",
														
 
															+            fields=[
														
 
															+                (
														
 
															+                    "id",
														
 
															+                    sentry.db.models.fields.bounded.BoundedBigAutoField(
														
 
															+                        primary_key=True, serialize=False
														
 
															+                    ),
														
 
															+                ),
														
 
															+                ("string", models.CharField(max_length=200)),
														
 
															+                ("date_added", models.DateTimeField(default=django.utils.timezone.now)),
														
 
															+            ],
														
 
															+            options={
														
 
															+                "db_table": "sentry_metricskeyindexer",
														
 
															+            },
														
 
															+        ),
														
 
															+        migrations.AddConstraint(
														
 
															+            model_name="metricskeyindexer",
														
 
															+            constraint=models.UniqueConstraint(fields=("string",), name="unique_string"),
														
 
															+        ),
														
 
															+    ]
														
--- a/src/sentry/new_migrations/monkey/executor.py
+++ b/src/sentry/new_migrations/monkey/executor.py
@@ -5,7 +5,7 @@ from django.contrib.contenttypes.management import RenameContentType
 
															 from django.db.migrations.executor import MigrationExecutor
														
 
															 from django.db.migrations.operations import SeparateDatabaseAndState
														
 
															 from django.db.migrations.operations.fields import FieldOperation
														
 
															-from django.db.migrations.operations.models import ModelOperation
														
 
															+from django.db.migrations.operations.models import IndexOperation, ModelOperation
														
 
															 logger = logging.getLogger(__name__)
														
@@ -37,7 +37,9 @@ class SentryMigrationExecutor(MigrationExecutor):
 
															         def _check_operations(operations):
														
 
															             failed_ops = []
														
 
															             for operation in operations:
														
 
															-                if isinstance(operation, (FieldOperation, ModelOperation, RenameContentType)):
														
 
															+                if isinstance(
														
 
															+                    operation, (FieldOperation, ModelOperation, RenameContentType, IndexOperation)
														
 
															+                ):
														
 
															                     continue
														
 
															                 elif isinstance(operation, SeparateDatabaseAndState):
														
 
															                     failed_ops.extend(_check_operations(operation.database_operations))
														
--- a/src/sentry/release_health/metrics.py
+++ b/src/sentry/release_health/metrics.py
@@ -40,32 +40,32 @@ def get_tag_values_list(org_id: int, values: Sequence[str]) -> Sequence[int]:
 
															 def metric_id(org_id: int, name: str) -> int:
														
 
															-    index = indexer.resolve(org_id, name)  # type: ignore
														
 
															+    index = indexer.resolve(name)  # type: ignore
														
 
															     if index is None:
														
 
															         raise MetricIndexNotFound(name)
														
 
															     return index  # type: ignore
														
 
															 def tag_key(org_id: int, name: str) -> str:
														
 
															-    index = indexer.resolve(org_id, name)  # type: ignore
														
 
															+    index = indexer.resolve(name)  # type: ignore
														
 
															     if index is None:
														
 
															         raise MetricIndexNotFound(name)
														
 
															     return f"tags[{index}]"
														
 
															 def tag_value(org_id: int, name: str) -> int:
														
 
															-    index = indexer.resolve(org_id, name)  # type: ignore
														
 
															+    index = indexer.resolve(name)  # type: ignore
														
 
															     if index is None:
														
 
															         raise MetricIndexNotFound(name)
														
 
															     return index  # type: ignore
														
 
															 def try_get_string_index(org_id: int, name: str) -> Optional[int]:
														
 
															-    return indexer.resolve(org_id, name)  # type: ignore
														
 
															+    return indexer.resolve(name)  # type: ignore
														
 
															 def reverse_tag_value(org_id: int, index: int) -> str:
														
 
															-    str_value = indexer.reverse_resolve(org_id, index)  # type: ignore
														
 
															+    str_value = indexer.reverse_resolve(index)  # type: ignore
														
 
															     # If the value can't be reversed it's very likely a real programming bug
														
 
															     # instead of something to be caught down: We probably got back a value from
														
 
															     # Snuba that's not in the indexer => partial data loss
														
@@ -338,7 +338,7 @@ class MetricsReleaseHealthBackend(ReleaseHealthBackend):
 
															         rv = {}
														
 
															         for project_id, release in project_releases:
														
 
															-            release_tag_value = indexer.resolve(org_id, release)  # type: ignore
														
 
															+            release_tag_value = indexer.resolve(release)  # type: ignore
														
 
															             if release_tag_value is None:
														
 
															                 # Don't emit empty releases -- for exact compatibility with
														
 
															                 # sessions table backend.
														
--- a/src/sentry/sentry_metrics/indexer/base.py
+++ b/src/sentry/sentry_metrics/indexer/base.py
@@ -11,10 +11,10 @@ class StringIndexer(Service):  # type: ignore
 
															     __all__ = ("record", "resolve", "reverse_resolve", "bulk_record")
														
 
															-    def bulk_record(self, org_id: int, strings: List[str]) -> Dict[str, int]:
														
 
															+    def bulk_record(self, strings: List[str]) -> Dict[str, int]:
														
 
															         raise NotImplementedError()
														
 
															-    def record(self, org_id: int, string: str) -> int:
														
 
															+    def record(self, string: str) -> int:
														
 
															         """Store a string and return the integer ID generated for it
														
 
															         With every call to this method, the lifetime of the entry will be
														
@@ -22,7 +22,7 @@ class StringIndexer(Service):  # type: ignore
 
															         """
														
 
															         raise NotImplementedError()
														
 
															-    def resolve(self, org_id: int, string: str) -> Optional[int]:
														
 
															+    def resolve(self, string: str) -> Optional[int]:
														
 
															         """Lookup the integer ID for a string.
														
 
															         Does not affect the lifetime of the entry.
														
@@ -31,7 +31,7 @@ class StringIndexer(Service):  # type: ignore
 
															         """
														
 
															         raise NotImplementedError()
														
 
															-    def reverse_resolve(self, org_id: int, id: int) -> Optional[str]:
														
 
															+    def reverse_resolve(self, id: int) -> Optional[str]:
														
 
															         """Lookup the stored string for a given integer ID.
														
 
															         Returns None if the entry cannot be found.
														
--- a/src/sentry/sentry_metrics/indexer/indexer_consumer.py
+++ b/src/sentry/sentry_metrics/indexer/indexer_consumer.py
@@ -36,7 +36,6 @@ class MetricsIndexerWorker(AbstractBatchWorker):  # type: ignore
 
															     def process_message(self, message: Any) -> MutableMapping[str, Any]:
														
 
															         parsed_message: MutableMapping[str, Any] = json.loads(message.value(), use_rapid_json=True)
														
 
															-        org_id = parsed_message["org_id"]
														
 
															         metric_name = parsed_message["name"]
														
 
															         tags = parsed_message["tags"]
														
@@ -46,7 +45,7 @@ class MetricsIndexerWorker(AbstractBatchWorker):  # type: ignore
 
															             *tags.values(),
														
 
															         }
														
 
															-        mapping = indexer.bulk_record(org_id, list(strings))  # type: ignore
														
 
															+        mapping = indexer.bulk_record(list(strings))  # type: ignore
														
 
															         new_tags = {mapping[k]: mapping[v] for k, v in tags.items()}
														
--- a/src/sentry/sentry_metrics/indexer/mock.py
+++ b/src/sentry/sentry_metrics/indexer/mock.py
@@ -31,13 +31,13 @@ class SimpleIndexer(StringIndexer):
 
															         self._strings: DefaultDict[str, int] = defaultdict(self._counter.__next__)
														
 
															         self._reverse: Dict[int, str] = {}
														
 
															-    def record(self, org_id: int, string: str) -> int:
														
 
															+    def record(self, string: str) -> int:
														
 
															         return self._record(string)
														
 
															-    def resolve(self, org_id: int, string: str) -> Optional[int]:
														
 
															+    def resolve(self, string: str) -> Optional[int]:
														
 
															         return self._strings.get(string)
														
 
															-    def reverse_resolve(self, org_id: int, id: int) -> Optional[str]:
														
 
															+    def reverse_resolve(self, id: int) -> Optional[str]:
														
 
															         return self._reverse.get(id)
														
 
															     def _record(self, string: str) -> int:
														
--- a/src/sentry/sentry_metrics/indexer/models.py
+++ b/src/sentry/sentry_metrics/indexer/models.py
@@ -0,0 +1,30 @@
 
															+from typing import Any
														
 
															+
														
 
															+from django.db import connections, models, router
														
 
															+from django.utils import timezone
														
 
															+
														
 
															+from sentry.db.models import Model
														
 
															+
														
 
															+
														
 
															+class MetricsKeyIndexer(Model):  # type: ignore
														
 
															+    __include_in_export__ = False
														
 
															+
														
 
															+    string = models.CharField(max_length=200)
														
 
															+    date_added = models.DateTimeField(default=timezone.now)
														
 
															+
														
 
															+    class Meta:
														
 
															+        db_table = "sentry_metricskeyindexer"
														
 
															+        app_label = "sentry"
														
 
															+        constraints = [
														
 
															+            models.UniqueConstraint(fields=["string"], name="unique_string"),
														
 
															+        ]
														
 
															+
														
 
															+    @classmethod
														
 
															+    def get_next_values(cls, num: int) -> Any:
														
 
															+        using = router.db_for_write(cls)
														
 
															+        connection = connections[using].cursor()
														
 
															+
														
 
															+        connection.execute(
														
 
															+            "SELECT nextval('sentry_metricskeyindexer_id_seq') from generate_series(1,%s)", [num]
														
 
															+        )
														
 
															+        return connection.fetchall()