Browse Source

ref: add check to dev for roundtrip of json pickle fields (#45602)

this adds a check for the `WRITE_JSON` feature which ensures that the
values round-trip through the database -- it is currently enabled in
development and the plan is to try it out on a small percentage of
traffic in production to gain confidence

I had to make 3 changes:
- `sentry:relay-rev-lastchange`: this was storing a datetime -- it now
stores a string of that datetime (this appears to be fine, it was only
used to serve a json endpoint with exactly the same string
serialization)
- `onboarding:complete`: this stored a datetime, there are no readers of
it so I converted it to the json'd datetime
- `sentry:transaction_name_cluster_rules` -- this was storing a list of
tuples (which gets immediately converted to/from a dict) -- I converted
the value to a list of lists so it roundtrips and also converted the
inner lists back to tuples upon reading so the original types are
preserved
anthony sottile 2 years ago
parent
commit
85a89db1ae

+ 10 - 3
src/sentry/db/models/fields/picklefield.py

@@ -5,7 +5,6 @@ from sentry.db.models.fields import jsonfield
 from sentry.utils import json
 
 PICKLE_WRITE_JSON = False
-PICKLE_READ_JSON = True
 
 
 class PickledObjectField(django_picklefield.PickledObjectField):
@@ -23,7 +22,6 @@ class PickledObjectField(django_picklefield.PickledObjectField):
 
     def __init__(self, *args, **kwargs):
         self.write_json = kwargs.pop("write_json", PICKLE_WRITE_JSON)
-        self.read_json = kwargs.pop("read_json", PICKLE_READ_JSON)
         self.disable_pickle_validation = kwargs.pop("disable_pickle_validation", False)
         super().__init__(*args, **kwargs)
 
@@ -39,12 +37,21 @@ class PickledObjectField(django_picklefield.PickledObjectField):
             and not self.disable_pickle_validation
         ):
             try:
-                json.dumps(value, default=jsonfield.default)
+                s = json.dumps(value, default=jsonfield.default)
             except Exception as e:
                 raise TypeError(
                     "Tried to serialize a pickle field with a value that cannot be serialized as JSON: %s"
                     % (e,)
                 )
+            else:
+                rt = json.loads(s)
+                if value != rt:
+                    raise TypeError(
+                        f"json serialized database value was not the same after deserializing:\n"
+                        f"- {type(value)=}\n"
+                        f"- {type(rt)=}"
+                    )
+
         return super().get_db_prep_value(value, *args, **kwargs)
 
     def to_python(self, value):

+ 3 - 2
src/sentry/ingest/transaction_clusterer/rules.py

@@ -69,8 +69,9 @@ class ProjectOptionRuleStore:
 
     def write(self, project: Project, rules: RuleSet) -> None:
         """Writes the rules to project options, sorted by depth."""
-        sorted_rules = self._sort(rules)
-        project.update_option(self._option_name, sorted_rules)
+        # we make sure the database stores lists such that they are json round trippable
+        converted_rules = [list(tup) for tup in self._sort(rules)]
+        project.update_option(self._option_name, converted_rules)
 
 
 class CompositeRuleStore:

+ 5 - 1
src/sentry/projectoptions/manager.py

@@ -4,6 +4,8 @@ from datetime import datetime
 
 from pytz import utc
 
+from sentry.utils import json
+
 
 class WellKnownProjectOption:
     def __init__(self, key, default=None, epoch_defaults=None):
@@ -74,7 +76,9 @@ class ProjectOptionsManager:
 
         ProjectOption.objects.set_value(project, "sentry:relay-rev", uuid.uuid4().hex)
         ProjectOption.objects.set_value(
-            project, "sentry:relay-rev-lastchange", datetime.utcnow().replace(tzinfo=utc)
+            project,
+            "sentry:relay-rev-lastchange",
+            json.datetime_to_str(datetime.utcnow().replace(tzinfo=utc)),
         )
 
     def register(self, key, default=None, epoch_defaults=None):

+ 2 - 1
src/sentry/receivers/onboarding.py

@@ -36,6 +36,7 @@ from sentry.signals import (
     project_created,
     transaction_processed,
 )
+from sentry.utils import json
 from sentry.utils.event import has_event_minified_stack_trace
 from sentry.utils.javascript import has_sourcemap
 
@@ -66,7 +67,7 @@ def try_mark_onboarding_complete(organization_id):
                 OrganizationOption.objects.create(
                     organization_id=organization_id,
                     key="onboarding:complete",
-                    value={"updated": timezone.now()},
+                    value={"updated": json.datetime_to_str(timezone.now())},
                 )
         except IntegrityError:
             pass

+ 5 - 1
src/sentry/utils/json.py

@@ -23,11 +23,15 @@ TKey = TypeVar("TKey")
 TValue = TypeVar("TValue")
 
 
+def datetime_to_str(o: datetime.datetime) -> str:
+    return o.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+
+
 def better_default_encoder(o: object) -> object:
     if isinstance(o, uuid.UUID):
         return o.hex
     elif isinstance(o, datetime.datetime):
-        return o.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+        return datetime_to_str(o)
     elif isinstance(o, datetime.date):
         return o.isoformat()
     elif isinstance(o, datetime.time):

+ 2 - 3
tests/sentry/ingest/test_transaction_clusterer.py

@@ -293,9 +293,8 @@ def test_transaction_clusterer_generates_rules(default_project):
     with Feature({feature: True}):
         assert _get_projconfig_tx_rules(default_project) is None
 
-    default_project.update_option(
-        "sentry:transaction_name_cluster_rules", [("/rule/*/0/**", 0), ("/rule/*/1/**", 1)]
-    )
+    rules = {"/rule/*/0/**": 0, "/rule/*/1/**": 1}
+    ProjectOptionRuleStore().write(default_project, rules)
 
     with Feature({feature: False}):
         assert _get_projconfig_tx_rules(default_project) is None