Browse Source

feat(db): Add support for JSON in pickle field (#36350)

Armin Ronacher 2 years ago
parent
commit
0d5531a646

+ 3 - 0
src/sentry/conf/server.py

@@ -523,6 +523,9 @@ CELERY_ALWAYS_EAGER = False
 # this works.
 CELERY_COMPLAIN_ABOUT_BAD_USE_OF_PICKLE = False
 
+# Complain about bad use of pickle in PickledObjectField
+PICKLED_OBJECT_FIELD_COMPLAIN_ABOUT_BAD_USE_OF_PICKLE = False
+
 # We use the old task protocol because during benchmarking we noticed that it's faster
 # than the new protocol. If we ever need to bump this it should be fine, there were no
 # compatibility issues, just need to run benchmarks and do some tests to make sure

+ 38 - 2
src/sentry/db/models/fields/picklefield.py

@@ -1,4 +1,11 @@
+from django.conf import settings
+
 import django_picklefield
+from sentry.db.models.fields import jsonfield
+from sentry.utils import json
+
+PICKLE_WRITE_JSON = False
+PICKLE_READ_JSON = True
 
 
 class PickledObjectField(django_picklefield.PickledObjectField):
@@ -6,15 +13,44 @@ class PickledObjectField(django_picklefield.PickledObjectField):
     changed handling for bytes (we do not allow them toplevel for
     historic reasons) and empty strings handling.
 
-    This field should not be used for new code!
+    This will eventually move over to storing JSON behind the scenes
+    and can already read JSON if it's placed in the database.  In
+    tests it will already fail with an error if code tries to put
+    values in there which cannot be serialized as JSON.
     """
 
     empty_strings_allowed = True
 
+    def __init__(self, *args, **kwargs):
+        self.write_json = kwargs.pop("write_json", PICKLE_WRITE_JSON)
+        self.read_json = kwargs.pop("read_json", PICKLE_READ_JSON)
+        self.disable_pickle_validation = kwargs.pop("disable_pickle_validation", False)
+        super().__init__(*args, **kwargs)
+
     def get_db_prep_value(self, value, *args, **kwargs):
         if isinstance(value, bytes):
             value = value.decode("utf-8")
+        if self.write_json:
+            if value is None and self.null:
+                return None
+            return json.dumps(value, default=jsonfield.default)
+        elif (
+            settings.PICKLED_OBJECT_FIELD_COMPLAIN_ABOUT_BAD_USE_OF_PICKLE
+            and not self.disable_pickle_validation
+        ):
+            try:
+                json.dumps(value, default=jsonfield.default)
+            except Exception as e:
+                raise TypeError(
+                    "Tried to serialize a pickle field with a value that cannot be serialized as JSON: %s"
+                    % (e,)
+                )
         return super().get_db_prep_value(value, *args, **kwargs)
 
     def to_python(self, value):
-        return super().to_python(value)
+        try:
+            if value is None:
+                return None
+            return json.loads(value)
+        except ValueError:
+            return super().to_python(value)

+ 6 - 1
src/sentry/models/authenticator.py

@@ -119,7 +119,12 @@ class Authenticator(BaseModel):
     created_at = models.DateTimeField(_("created at"), default=timezone.now)
     last_used_at = models.DateTimeField(_("last used at"), null=True)
     type = BoundedPositiveIntegerField(choices=AUTHENTICATOR_CHOICES)
-    config = PickledObjectField()
+
+    # This field stores bytes and as such cannot currently
+    # be serialized by our JSON serializer.  This would require
+    # further changes.  As such this validation is currently
+    # disabled to make tests pass.
+    config = PickledObjectField(disable_pickle_validation=True)
 
     objects = AuthenticatorManager()
 

+ 1 - 0
src/sentry/utils/pytest/sentry.py

@@ -111,6 +111,7 @@ def pytest_configure(config):
     settings.BROKER_URL = "memory://"
     settings.CELERY_ALWAYS_EAGER = False
     settings.CELERY_COMPLAIN_ABOUT_BAD_USE_OF_PICKLE = True
+    settings.PICKLED_OBJECT_FIELD_COMPLAIN_ABOUT_BAD_USE_OF_PICKLE = True
     settings.CELERY_EAGER_PROPAGATES_EXCEPTIONS = True
 
     settings.DEBUG_VIEWS = True

+ 68 - 0
tests/sentry/db/models/fields/test_picklefield.py

@@ -0,0 +1,68 @@
+from django.db import connection, models
+
+from sentry.db.models.fields.picklefield import PickledObjectField
+from sentry.testutils import TestCase
+
+
+class JsonReadingPickleModel(models.Model):
+    data = PickledObjectField(write_json=False)
+
+    class Meta:
+        app_label = "fixtures"
+
+
+class JsonWritingPickleModel(models.Model):
+    data = PickledObjectField(write_json=True)
+
+    class Meta:
+        app_label = "fixtures"
+
+
+class PickledObjectFieldTest(TestCase):
+    def test_pickle_by_default(self):
+        obj = JsonReadingPickleModel.objects.create(
+            data={"foo": "bar"},
+        )
+        obj = JsonReadingPickleModel.objects.get(id=obj.id)
+        self.assertEqual(obj.data, {"foo": "bar"})
+
+        with connection.cursor() as cur:
+            cur.execute("select * from fixtures_jsonreadingpicklemodel where id = %s", [obj.id])
+            row = cur.fetchone()
+
+            # pickled, since we still write pickle in this case
+            assert row[1] == "gAJ9cQBYAwAAAGZvb3EBWAMAAABiYXJxAnMu"
+
+            # put some JSON there
+            cur.execute(
+                "update fixtures_jsonreadingpicklemodel set data = %s where id = %s",
+                ['{"foo": "bar2"}', obj.id],
+            )
+
+        # we observe the update as json
+        obj = JsonReadingPickleModel.objects.get(id=obj.id)
+        self.assertEqual(obj.data, {"foo": "bar2"})
+
+    def test_json_by_default(self):
+        obj = JsonWritingPickleModel.objects.create(
+            data={"foo": "bar2"},
+        )
+        obj = JsonWritingPickleModel.objects.get(id=obj.id)
+        self.assertEqual(obj.data, {"foo": "bar2"})
+
+        with connection.cursor() as cur:
+            cur.execute("select * from fixtures_jsonwritingpicklemodel where id = %s", [obj.id])
+            row = cur.fetchone()
+
+            # should be JSON
+            assert row[1] == '{"foo":"bar2"}'
+
+            # put some pickle there
+            cur.execute(
+                "update fixtures_jsonwritingpicklemodel set data = %s where id = %s",
+                ["gAJ9cQBYAwAAAGZvb3EBWAMAAABiYXJxAnMu", obj.id],
+            )
+
+        # we observe the update as pickle
+        obj = JsonWritingPickleModel.objects.get(id=obj.id)
+        self.assertEqual(obj.data, {"foo": "bar"})