Browse Source

feat(discover): Add a dataset_source field to saved query (#74102)

We'd like to track the "source" for the split dataset: whether the user
themselves
picked a split dataset because the UI allowed them to, or if we
attempted to
split it for them.
Shruthi 8 months ago
parent
commit
139f182a4d

+ 1 - 1
migrations_lockfile.txt

@@ -10,6 +10,6 @@ hybridcloud: 0016_add_control_cacheversion
 nodestore: 0002_nodestore_no_dictfield
 remote_subscriptions: 0003_drop_remote_subscription
 replays: 0004_index_together
-sentry: 0736_rm_reprocessing_step2
+sentry: 0737_add_discover_saved_query_dataset_source
 social_auth: 0002_default_auto_field
 uptime: 0003_drop_remote_subscription

+ 29 - 0
src/sentry/discover/models.py

@@ -1,3 +1,4 @@
+from enum import Enum
 from typing import ClassVar
 
 from django.db import models, router, transaction
@@ -38,6 +39,31 @@ class DiscoverSavedQueryTypes(TypesClass):
     TYPE_NAMES = [t[1] for t in TYPES]
 
 
+class DatasetSourcesTypes(Enum):
+    """
+    Ambiguous queries that haven't been or couldn't be categorized into a
+    specific dataset.
+    """
+
+    UNKNOWN = 0
+    """
+     Dataset inferred by either running the query or using heuristics.
+    """
+    INFERRED = 1
+    """
+     Canonical dataset, user explicitly selected it.
+    """
+    USER = 2
+    """
+     Was an ambiguous dataset forced to split (i.e. we picked a default)
+    """
+    FORCED = 3
+
+    @classmethod
+    def as_choices(cls):
+        return tuple((source.value, source.name.lower()) for source in cls)
+
+
 @region_silo_model
 class DiscoverSavedQueryProject(Model):
     __relocation_scope__ = RelocationScope.Excluded
@@ -73,6 +99,9 @@ class DiscoverSavedQuery(Model):
     dataset = BoundedPositiveIntegerField(
         choices=DiscoverSavedQueryTypes.as_choices(), default=DiscoverSavedQueryTypes.DISCOVER
     )
+    dataset_source = BoundedPositiveIntegerField(
+        choices=DatasetSourcesTypes.as_choices(), default=DatasetSourcesTypes.UNKNOWN.value
+    )
 
     class Meta:
         app_label = "sentry"

+ 49 - 0
src/sentry/migrations/0737_add_discover_saved_query_dataset_source.py

@@ -0,0 +1,49 @@
+# Generated by Django 5.0.6 on 2024-07-11 14:02
+
+from django.db import migrations
+
+import sentry.db.models.fields.bounded
+from sentry.new_migrations.migrations import CheckedMigration
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production.
+    # This should only be used for operations where it's safe to run the migration after your
+    # code has deployed. So this should not be used for most operations that alter the schema
+    # of a table.
+    # Here are some things that make sense to mark as post deployment:
+    # - Large data migrations. Typically we want these to be run manually so that they can be
+    #   monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   run this outside deployments so that we don't block them. Note that while adding an index
+    #   is a schema change, it's completely safe to run the operation after the code has deployed.
+    # Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment
+
+    is_post_deployment = False
+
+    dependencies = [
+        ("sentry", "0736_rm_reprocessing_step2"),
+    ]
+
+    operations = [
+        migrations.SeparateDatabaseAndState(
+            database_operations=[
+                migrations.RunSQL(
+                    """
+                    ALTER TABLE "sentry_discoversavedquery" ADD COLUMN "dataset_source" integer NOT NULL DEFAULT 0;
+                    """,
+                    reverse_sql="""
+                    ALTER TABLE "sentry_discoversavedquery" DROP COLUMN "dataset_source";
+                    """,
+                    hints={"tables": ["sentry_discoversavedquery"]},
+                ),
+            ],
+            state_operations=[
+                migrations.AddField(
+                    model_name="discoversavedquery",
+                    name="dataset_source",
+                    field=sentry.db.models.fields.bounded.BoundedPositiveIntegerField(default=0),
+                ),
+            ],
+        )
+    ]