Browse Source

feat(eventuser): Migration to remove EventUser model from Django (1/2) (#61233)

## Objective:

First migration to remove all references to the EventUser model within
Django.
NisanthanNanthakumar 1 year ago
parent
commit
848b8dfd8a

+ 1 - 33
fixtures/backup/model_dependencies/detailed.json

@@ -1791,33 +1791,6 @@
       ]
     ]
   },
-  "sentry.eventuser": {
-    "dangling": false,
-    "foreign_keys": {
-      "project_id": {
-        "kind": "ImplicitForeignKey",
-        "model": "sentry.project",
-        "nullable": false
-      }
-    },
-    "model": "sentry.eventuser",
-    "relocation_dependencies": [],
-    "relocation_scope": "Excluded",
-    "silos": [
-      "Region"
-    ],
-    "table_name": "sentry_eventuser",
-    "uniques": [
-      [
-        "hash",
-        "project_id"
-      ],
-      [
-        "ident",
-        "project_id"
-      ]
-    ]
-  },
   "sentry.exporteddata": {
     "dangling": false,
     "foreign_keys": {
@@ -6037,11 +6010,6 @@
         "model": "sentry.environment",
         "nullable": true
       },
-      "event_user_id": {
-        "kind": "ImplicitForeignKey",
-        "model": "sentry.eventuser",
-        "nullable": true
-      },
       "group_id": {
         "kind": "ImplicitForeignKey",
         "model": "sentry.group",
@@ -6130,4 +6098,4 @@
       ]
     ]
   }
-}
+}

+ 1 - 5
fixtures/backup/model_dependencies/flat.json

@@ -247,9 +247,6 @@
     "sentry.processingissue",
     "sentry.rawevent"
   ],
-  "sentry.eventuser": [
-    "sentry.project"
-  ],
   "sentry.exporteddata": [
     "sentry.file",
     "sentry.organization",
@@ -832,7 +829,6 @@
   ],
   "sentry.userreport": [
     "sentry.environment",
-    "sentry.eventuser",
     "sentry.group",
     "sentry.project"
   ],
@@ -844,4 +840,4 @@
   "social_auth.usersocialauth": [
     "sentry.user"
   ]
-}
+}

+ 1 - 2
fixtures/backup/model_dependencies/sorted.json

@@ -85,7 +85,6 @@
   "sentry.externalissue",
   "sentry.externalactor",
   "sentry.exporteddata",
-  "sentry.eventuser",
   "sentry.eventprocessingissue",
   "sentry.eventattachment",
   "sentry.environment",
@@ -226,4 +225,4 @@
   "sentry.alertruletriggerexclusion",
   "sentry.alertruletriggeraction",
   "sentry.servicehookproject"
-]
+]

+ 1 - 2
fixtures/backup/model_dependencies/truncate.json

@@ -85,7 +85,6 @@
   "sentry_externalissue",
   "sentry_externalactor",
   "sentry_exporteddata",
-  "sentry_eventuser",
   "sentry_eventprocessingissue",
   "sentry_eventattachment",
   "sentry_environment",
@@ -226,4 +225,4 @@
   "sentry_alertruletriggerexclusion",
   "sentry_alertruletriggeraction",
   "sentry_servicehookproject"
-]
+]

+ 1 - 1
migrations_lockfile.txt

@@ -9,5 +9,5 @@ feedback: 0003_feedback_add_env
 hybridcloud: 0009_make_user_id_optional_for_slug_reservation_replica
 nodestore: 0002_nodestore_no_dictfield
 replays: 0003_add_size_to_recording_segment
-sentry: 0612_expand_relocation_model
+sentry: 0613_drop_eventuser_table_part_1
 social_auth: 0002_default_auto_field

+ 30 - 0
src/sentry/migrations/0613_drop_eventuser_table_part_1.py

@@ -0,0 +1,30 @@
+# Generated by Django 3.2.23 on 2023-12-06 05:29
+
+from django.db import migrations
+
+from sentry.new_migrations.migrations import CheckedMigration
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production. For
+    # the most part, this should only be used for operations where it's safe to run the migration
+    # after your code has deployed. So this should not be used for most operations that alter the
+    # schema of a table.
+    # Here are some things that make sense to mark as dangerous:
+    # - Large data migrations. Typically we want these to be run manually by ops so that they can
+    #   be monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   have ops run this and not block the deploy. Note that while adding an index is a schema
+    #   change, it's completely safe to run the operation after the code has deployed.
+    is_dangerous = False
+
+    dependencies = [
+        ("sentry", "0612_expand_relocation_model"),
+    ]
+
+    operations = [
+        migrations.SeparateDatabaseAndState(
+            state_operations=[migrations.DeleteModel(name="EventUser")],
+            database_operations=[],
+        )
+    ]

+ 0 - 1
src/sentry/models/__init__.py

@@ -36,7 +36,6 @@ from .environment import *  # NOQA
 from .event import *  # NOQA
 from .eventattachment import *  # NOQA
 from .eventerror import *  # NOQA
-from .eventuser import *  # NOQA
 from .featureadoption import *  # NOQA
 from .files import *  # NOQA
 from .group import *  # NOQA

+ 0 - 134
src/sentry/models/eventuser.py

@@ -1,134 +0,0 @@
-from functools import reduce
-from operator import or_
-
-from django.db import models
-from django.utils import timezone
-
-from sentry.backup.scopes import RelocationScope
-from sentry.constants import MAX_EMAIL_FIELD_LENGTH
-from sentry.db.models import BoundedBigIntegerField, Model, region_silo_only_model, sane_repr
-from sentry.utils.datastructures import BidirectionalMapping
-from sentry.utils.hashlib import md5_text
-
-# The order of these keys are significant to also indicate priority
-# when used in hashing and determining uniqueness. If you change the order
-# you will break stuff.
-KEYWORD_MAP = BidirectionalMapping(
-    {
-        "ident": "id",
-        "username": "username",
-        "email": "email",
-        "ip_address": "ip",
-    }
-)
-
-
-@region_silo_only_model
-class EventUser(Model):
-    __relocation_scope__ = RelocationScope.Excluded
-
-    project_id = BoundedBigIntegerField(db_index=True)
-    hash = models.CharField(max_length=32)
-    ident = models.CharField(max_length=128, null=True)
-    email = models.EmailField(null=True, max_length=MAX_EMAIL_FIELD_LENGTH)
-    username = models.CharField(max_length=128, null=True)
-    name = models.CharField(max_length=128, null=True)
-    ip_address = models.GenericIPAddressField(null=True)
-    date_added = models.DateTimeField(default=timezone.now, db_index=True)
-
-    class Meta:
-        app_label = "sentry"
-        db_table = "sentry_eventuser"
-        unique_together = (("project_id", "ident"), ("project_id", "hash"))
-        index_together = (
-            ("project_id", "email"),
-            ("project_id", "ip_address"),
-        )
-
-    __repr__ = sane_repr("project_id", "ident", "email", "username", "ip_address")
-
-    @classmethod
-    def attr_from_keyword(cls, keyword):
-        return KEYWORD_MAP.get_key(keyword)
-
-    @classmethod
-    def hash_from_tag(cls, value):
-        return md5_text(value.split(":", 1)[-1]).hexdigest()
-
-    @classmethod
-    def for_tags(cls, project_id, values):
-        """
-        Finds matching EventUser objects from a list of tag values.
-
-        Return a dictionary of {tag_value: event_user}.
-        """
-        hashes = [cls.hash_from_tag(v) for v in values]
-        return {e.tag_value: e for e in cls.objects.filter(project_id=project_id, hash__in=hashes)}
-
-    def save(self, *args, **kwargs):
-        assert (
-            self.ident or self.username or self.email or self.ip_address
-        ), "No identifying value found for user"
-        if not self.hash:
-            self.set_hash()
-        super().save(*args, **kwargs)
-
-    def set_hash(self):
-        self.hash = self.build_hash()
-
-    def build_hash(self):
-        for key, value in self.iter_attributes():
-            if value:
-                return md5_text(value).hexdigest()
-
-    @property
-    def tag_value(self):
-        """
-        Return the identifier used with tags to link this user.
-        """
-        for key, value in self.iter_attributes():
-            if value:
-                return f"{KEYWORD_MAP[key]}:{value}"
-
-    def iter_attributes(self):
-        """
-        Iterate over key/value pairs for this EventUser in priority order.
-        """
-        for key in KEYWORD_MAP.keys():
-            yield key, getattr(self, key)
-
-    def get_label(self):
-        return self.email or self.username or self.ident or self.ip_address
-
-    def get_display_name(self):
-        return self.name or self.email or self.username
-
-    def find_similar_users(self, user):
-        from sentry.models.organizationmemberteam import OrganizationMemberTeam
-        from sentry.models.project import Project
-
-        # limit to only teams user has opted into
-        project_ids = list(
-            Project.objects.filter(
-                teams__in=OrganizationMemberTeam.objects.filter(
-                    organizationmember__user=user,
-                    organizationmember__organization__project=self.project_id,
-                    is_active=True,
-                ).values("team")
-            ).values_list("id", flat=True)[:1000]
-        )
-        if not project_ids:
-            return type(self).objects.none()
-
-        filters = []
-        if self.email:
-            filters.append(models.Q(email=self.email))
-        if self.ip_address:
-            filters.append(models.Q(ip_address=self.ip_address))
-        if not filters:
-            return type(self).objects.none()
-        return (
-            type(self)
-            .objects.exclude(id=self.id)
-            .filter(reduce(or_, filters), project_id__in=project_ids)
-        )

+ 0 - 73
tests/sentry/models/test_eventuser.py

@@ -1,73 +0,0 @@
-from __future__ import annotations
-
-from hashlib import md5
-
-from sentry.models.eventuser import EventUser
-from sentry.testutils.cases import TestCase
-from sentry.testutils.silo import region_silo_test
-
-
-@region_silo_test
-class EventUserTestCase(TestCase):
-    def test_build_hash(self):
-        cases: list[tuple[dict[str, str], str | None]] = [
-            (
-                {
-                    "ident": "ident",
-                    "username": "username",
-                    "email": "email",
-                    "ip_address": "127.0.0.1",
-                },
-                "67217d8b401cf5e72bbf5103d60f3e97",
-            ),
-            (
-                {"username": "username", "email": "email", "ip_address": "127.0.0.1"},
-                "14c4b06b824ec593239362517f538b29",
-            ),
-            ({"email": "email", "ip_address": "127.0.0.1"}, "0c83f57c786a0b4a39efab23731c7ebc"),
-            ({"ip_address": "127.0.0.1"}, "f528764d624db129b32c21fbca0cb8d6"),
-            ({}, None),
-        ]
-        for kw, value in cases:
-            assert EventUser(**kw).build_hash() == value
-
-    def test_tag_value(self):
-        cases: list[tuple[dict[str, str], str | None]] = [
-            (
-                {
-                    "ident": "ident",
-                    "username": "username",
-                    "email": "email",
-                    "ip_address": "127.0.0.1",
-                },
-                "id:ident",
-            ),
-            (
-                {"username": "username", "email": "email", "ip_address": "127.0.0.1"},
-                "username:username",
-            ),
-            ({"email": "email", "ip_address": "127.0.0.1"}, "email:email"),
-            ({"ip_address": "127.0.0.1"}, "ip:127.0.0.1"),
-            ({}, None),
-        ]
-        for kw, value in cases:
-            assert EventUser(**kw).tag_value == value
-
-    def test_attr_from_keyword(self):
-        cases = [
-            ("id", "ident"),
-            ("username", "username"),
-            ("email", "email"),
-            ("ip", "ip_address"),
-        ]
-        for keyword, attr in cases:
-            assert EventUser.attr_from_keyword(keyword) == attr
-
-    def test_hash_from_tag(self):
-        assert EventUser.hash_from_tag("foo:bar:baz") == md5(b"bar:baz").hexdigest()
-
-    def test_for_tags(self):
-        eu = EventUser.objects.create(project_id=1, ident="matt")
-        assert EventUser.for_tags(1, ["id:matt"]) == {"id:matt": eu}
-        assert EventUser.for_tags(1, ["id:doesnotexist"]) == {}
-        assert EventUser.for_tags(1, ["id:matt", "id:doesnotexist"]) == {"id:matt": eu}