Browse Source

Reapply "perf: move interfaces and integrations to orjson (#71055)"

This reverts commit 191f282684c8ce4f10c82b3232041a1f7934de8c.
Yagiz Nizipli 9 months ago
parent
commit
dacfa8bd89

+ 5 - 5
src/sentry/api/endpoints/source_map_debug_blue_thunder_edition.py

@@ -1,5 +1,6 @@
 from typing import Literal, TypedDict
 
+import orjson
 import sentry_sdk
 from django.db.models import QuerySet
 from django.utils.encoding import force_bytes, force_str
@@ -33,7 +34,6 @@ from sentry.models.releasefile import (
     ReleaseFile,
 )
 from sentry.sdk_updates import get_sdk_index
-from sentry.utils import json
 from sentry.utils.javascript import find_sourcemap
 from sentry.utils.safe import get_path
 from sentry.utils.urls import non_standard_url_join
@@ -416,7 +416,7 @@ class ReleaseLookupData:
             self._get_dist_matched_artifact_index_release_file()
         )
         if dist_matched_artifact_index_release_file is not None:
-            raw_data = json.load(dist_matched_artifact_index_release_file.file.getfile())
+            raw_data = orjson.loads(dist_matched_artifact_index_release_file.file.getfile().read())
             files = raw_data.get("files")
             for potential_source_file_name in self.matching_source_file_names:
                 matching_file = files.get(potential_source_file_name)
@@ -453,7 +453,7 @@ class ReleaseLookupData:
                     return
 
         for artifact_index_file in self._get_artifact_index_release_files():
-            raw_data = json.load(artifact_index_file.file.getfile())
+            raw_data = orjson.loads(artifact_index_file.file.getfile().read())
             files = raw_data.get("files")
             for potential_source_file_name in self.matching_source_file_names:
                 if files.get(potential_source_file_name) is not None:
@@ -534,14 +534,14 @@ class ReleaseLookupData:
             self._get_dist_matched_artifact_index_release_file()
         )
         if dist_matched_artifact_index_release_file is not None:
-            raw_data = json.load(dist_matched_artifact_index_release_file.file.getfile())
+            raw_data = orjson.loads(dist_matched_artifact_index_release_file.file.getfile().read())
             files = raw_data.get("files")
             if files.get(matching_source_map_name) is not None:
                 self.source_map_lookup_result = "found"
                 return
 
         for artifact_index_file in self._get_artifact_index_release_files():
-            raw_data = json.load(artifact_index_file.file.getfile())
+            raw_data = orjson.loads(artifact_index_file.file.getfile().read())
             files = raw_data.get("files")
             if files.get(matching_source_map_name) is not None:
                 self.source_map_lookup_result = "wrong-dist"

+ 1 - 2
src/sentry/backup/crypto.py

@@ -14,7 +14,6 @@ from cryptography.hazmat.primitives.asymmetric import padding
 from google.cloud.kms import KeyManagementServiceClient as KeyManagementServiceClient
 from google_crc32c import value as crc32c
 
-from sentry.utils import json
 from sentry.utils.env import gcp_project_id
 
 
@@ -94,7 +93,7 @@ class GCPKMSEncryptor(Encryptor):
     def get_public_key_pem(self) -> bytes:
         if self.crypto_key_version is None:
             # Read the user supplied configuration into the proper format.
-            gcp_kms_config_json = json.load(self.__fp)
+            gcp_kms_config_json = orjson.loads(self.__fp.read())
             try:
                 self.crypto_key_version = CryptoKeyVersion(**gcp_kms_config_json)
             except TypeError:

+ 5 - 2
src/sentry/backup/imports.py

@@ -38,7 +38,6 @@ from sentry.services.hybrid_cloud.import_export.model import (
 from sentry.services.hybrid_cloud.import_export.service import ImportExportService
 from sentry.silo.base import SiloMode
 from sentry.silo.safety import unguarded_write
-from sentry.utils import json
 from sentry.utils.env import is_split_db
 
 __all__ = (
@@ -289,7 +288,11 @@ def _import(
                 batch = []
                 last_seen_model_name = model_name
             if len(batch) >= MAX_BATCH_SIZE:
-                yield (last_seen_model_name, json.dumps(batch), num_current_model_instances_yielded)
+                yield (
+                    last_seen_model_name,
+                    orjson.dumps(batch).decode(),
+                    num_current_model_instances_yielded,
+                )
                 num_current_model_instances_yielded += len(batch)
                 batch = []
 

+ 10 - 11
src/sentry/backup/sanitize.py

@@ -9,12 +9,11 @@ from typing import Any
 from urllib.parse import urlparse, urlunparse
 from uuid import UUID, uuid4
 
+import orjson
 import petname
 from dateutil.parser import parse as parse_datetime
 from django.utils.text import slugify
 
-from sentry.utils import json
-
 UPPER_CASE_HEX = {"A", "B", "C", "D", "E", "F"}
 UPPER_CASE_NON_HEX = {
     "H",
@@ -89,14 +88,14 @@ class SanitizableField:
     model: NormalizedModelName
     field: str
 
-    def validate_json_model(self, json: Any) -> None:
+    def validate_json_model(self, obj: Any) -> None:
         """
         Validates the JSON model is shaped the way we expect a serialized Django model to be,
         and that we have the right kind of model for this `SanitizableField`. Raises errors if there
         is a validation failure.
         """
 
-        model_name = json.get("model", None)
+        model_name = obj.get("model", None)
         if model_name is None:
             raise InvalidJSONError(
                 "JSON is not properly formatted, must be a serialized Django model"
@@ -106,12 +105,12 @@ class SanitizableField:
         return None
 
 
-def _get_field_value(json: Any, field: SanitizableField) -> Any | None:
-    return json.get("fields", {}).get(field.field, None)
+def _get_field_value(obj: Any, field: SanitizableField) -> Any | None:
+    return obj.get("fields", {}).get(field.field, None)
 
 
-def _set_field_value(json: Any, field: SanitizableField, value: Any) -> Any:
-    json.get("fields", {})[field.field] = value
+def _set_field_value(obj: Any, field: SanitizableField, value: Any) -> Any:
+    obj.get("fields", {})[field.field] = value
     return value
 
 
@@ -291,12 +290,12 @@ class Sanitizer:
         `set_json()` is the preferred method for doing so.
         """
 
-        old_serialized = json.dumps(old_json)
+        old_serialized = orjson.dumps(old_json).decode()
         interned = self.interned_strings.get(old_serialized)
         if interned is not None:
-            return json.loads(interned)
+            return orjson.loads(interned)
 
-        new_serialized = json.dumps(new_json)
+        new_serialized = orjson.dumps(new_json).decode()
         self.interned_strings[old_serialized] = new_serialized
         return new_json
 

+ 9 - 10
src/sentry/buffer/redis.py

@@ -9,6 +9,7 @@ from enum import Enum
 from time import time
 from typing import Any, TypeVar
 
+import orjson
 import rb
 from django.utils.encoding import force_bytes, force_str
 from rediscluster import RedisCluster
@@ -16,7 +17,7 @@ from rediscluster import RedisCluster
 from sentry.buffer.base import Buffer
 from sentry.db import models
 from sentry.tasks.process_buffer import process_incr
-from sentry.utils import json, metrics
+from sentry.utils import metrics
 from sentry.utils.hashlib import md5_text
 from sentry.utils.imports import import_string
 from sentry.utils.redis import (
@@ -47,7 +48,9 @@ def _validate_json_roundtrip(value: dict[str, Any], model: type[models.Model]) -
         _last_validation_log = time()
         try:
             if (
-                RedisBuffer._load_values(json.loads(json.dumps(RedisBuffer._dump_values(value))))
+                RedisBuffer._load_values(
+                    orjson.loads(orjson.dumps(RedisBuffer._dump_values(value)))
+                )
                 != value
             ):
                 logger.error("buffer.corrupted_value", extra={"value": value, "model": model})
@@ -252,10 +255,6 @@ class RedisBuffer(Buffer):
     def push_to_sorted_set(self, key: str, value: list[int] | int) -> None:
         value_dict = {value: time()}
         self._execute_redis_operation(key, RedisOperation.SORTED_SET_ADD, value_dict)
-        logger.info(
-            "redis_buffer.push_to_sorted_set",
-            extra={"key_name": key, "value": json.dumps(value_dict)},
-        )
 
     def get_sorted_set(self, key: str, min: float, max: float) -> list[tuple[int, datetime]]:
         redis_set = self._execute_redis_operation(
@@ -347,7 +346,7 @@ class RedisBuffer(Buffer):
         _validate_json_roundtrip(filters, model)
 
         if is_instance_redis_cluster(self.cluster, self.is_redis_cluster):
-            pipe.hsetnx(key, "f", json.dumps(self._dump_values(filters)))
+            pipe.hsetnx(key, "f", orjson.dumps(self._dump_values(filters)).decode())
         else:
             pipe.hsetnx(key, "f", pickle.dumps(filters))
 
@@ -361,7 +360,7 @@ class RedisBuffer(Buffer):
             _validate_json_roundtrip(extra, model)
             for column, value in extra.items():
                 if is_instance_redis_cluster(self.cluster, self.is_redis_cluster):
-                    pipe.hset(key, "e+" + column, json.dumps(self._dump_value(value)))
+                    pipe.hset(key, "e+" + column, orjson.dumps(self._dump_value(value)).decode())
                 else:
                     pipe.hset(key, "e+" + column, pickle.dumps(value))
 
@@ -474,7 +473,7 @@ class RedisBuffer(Buffer):
             model = import_string(force_str(values.pop("m")))
 
             if values["f"].startswith(b"{" if not self.is_redis_cluster else "{"):
-                filters = self._load_values(json.loads(force_str(values.pop("f"))))
+                filters = self._load_values(orjson.loads(force_str(values.pop("f"))))
             else:
                 # TODO(dcramer): legacy pickle support - remove in Sentry 9.1
                 filters = pickle.loads(force_bytes(values.pop("f")))
@@ -487,7 +486,7 @@ class RedisBuffer(Buffer):
                     incr_values[k[2:]] = int(v)
                 elif k.startswith("e+"):
                     if v.startswith(b"[" if not self.is_redis_cluster else "["):
-                        extra_values[k[2:]] = self._load_value(json.loads(force_str(v)))
+                        extra_values[k[2:]] = self._load_value(orjson.loads(force_str(v)))
                     else:
                         # TODO(dcramer): legacy pickle support - remove in Sentry 9.1
                         extra_values[k[2:]] = pickle.loads(force_bytes(v))

+ 4 - 3
src/sentry/cache/redis.py

@@ -1,4 +1,5 @@
-from sentry.utils import json
+import orjson
+
 from sentry.utils.redis import get_cluster_from_options, get_cluster_routing_client, redis_clusters
 
 from .base import BaseCache
@@ -25,7 +26,7 @@ class CommonRedisCache(BaseCache):
 
     def set(self, key, value, timeout, version=None, raw=False):
         key = self.make_key(key, version=version)
-        v = json.dumps(value) if not raw else value
+        v = orjson.dumps(value).decode() if not raw else value
         if len(v) > self.max_size:
             raise ValueTooLarge(f"Cache key too large: {key!r} {len(v)!r}")
         if timeout:
@@ -45,7 +46,7 @@ class CommonRedisCache(BaseCache):
         key = self.make_key(key, version=version)
         result = self._client(raw=raw).get(key)
         if result is not None and not raw:
-            result = json.loads(result)
+            result = orjson.loads(result)
 
         self._mark_transaction("get")
 

+ 2 - 2
src/sentry/charts/chartcuterie.py

@@ -3,6 +3,7 @@ from typing import Any
 from urllib.parse import urljoin
 from uuid import uuid4
 
+import orjson
 import requests
 import sentry_sdk
 from django.conf import settings
@@ -10,7 +11,6 @@ from django.conf import settings
 from sentry import options
 from sentry.exceptions import InvalidConfiguration
 from sentry.models.file import get_storage
-from sentry.utils import json
 from sentry.utils.http import absolute_uri
 
 from .base import ChartRenderer, logger
@@ -76,7 +76,7 @@ class Chartcuterie(ChartRenderer):
             assert self.service_url is not None
             resp = requests.post(
                 url=urljoin(self.service_url, "render"),
-                data=json.dumps(payload),
+                data=orjson.dumps(payload),
                 headers={"Content-Type": "application/json"},
             )
 

+ 3 - 3
src/sentry/db/models/fields/array.py

@@ -1,9 +1,9 @@
 import ast
 
+import orjson
 from django.db import models
 
 from sentry.db.models.utils import Creator
-from sentry.utils import json
 
 
 # Adapted from django-pgfields
@@ -54,8 +54,8 @@ class ArrayField(models.Field):
             value = []
         if isinstance(value, str):
             try:
-                value = json.loads(value)
-            except json.JSONDecodeError:
+                value = orjson.loads(value)
+            except orjson.JSONDecodeError:
                 # This is to accommodate the erroneous exports pre 21.4.0
                 # See getsentry/sentry#23843 for more details
                 try:

+ 4 - 4
src/sentry/db/models/fields/gzippeddict.py

@@ -3,10 +3,10 @@ from __future__ import annotations
 import logging
 import pickle
 
+import orjson
 from django.db.models import TextField
 
 from sentry.db.models.utils import Creator
-from sentry.utils import json
 from sentry.utils.strings import decompress
 
 __all__ = ("GzippedDictField",)
@@ -32,7 +32,7 @@ class GzippedDictField(TextField):
         try:
             if not value:
                 return {}
-            return json.loads(value)
+            return orjson.loads(value)
         except (ValueError, TypeError):
             if isinstance(value, str) and value:
                 try:
@@ -47,7 +47,7 @@ class GzippedDictField(TextField):
     def from_db_value(self, value, expression, connection):
         return self.to_python(value)
 
-    def get_prep_value(self, value):
+    def get_prep_value(self, value) -> str | None:
         if not value and self.null:
             # save ourselves some storage
             return None
@@ -55,7 +55,7 @@ class GzippedDictField(TextField):
             value = value.decode("utf-8")
         if value is None and self.null:
             return None
-        return json.dumps(value)
+        return orjson.dumps(value).decode()
 
     def value_to_string(self, obj):
         return self.get_prep_value(self.value_from_object(obj))

+ 7 - 6
src/sentry/db/models/fields/jsonfield.py

@@ -25,13 +25,13 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 """
+import orjson
 from django.core.exceptions import ValidationError
 from django.db import models
 from django.db.models.lookups import Contains, Exact, IContains, IExact, In, Lookup
 from django.utils.translation import gettext_lazy as _
 
 from sentry.db.models.utils import Creator
-from sentry.utils import json
 
 
 class JSONField(models.TextField):
@@ -83,8 +83,8 @@ class JSONField(models.TextField):
             if callable(default):
                 default = default()
             if isinstance(default, str):
-                return json.loads(default)
-            return json.loads(json.dumps(default))
+                return orjson.loads(default)
+            return orjson.loads(orjson.dumps(default))
         return super().get_default()
 
     def get_internal_type(self):
@@ -101,7 +101,7 @@ class JSONField(models.TextField):
                 if self.blank:
                     return ""
             try:
-                value = json.loads(value)
+                value = orjson.loads(value)
             except ValueError:
                 msg = self.error_messages["invalid"] % value
                 raise ValidationError(msg)
@@ -111,12 +111,13 @@ class JSONField(models.TextField):
     def get_db_prep_value(self, value, connection=None, prepared=None):
         return self.get_prep_value(value)
 
-    def get_prep_value(self, value):
+    def get_prep_value(self, value) -> str | None:
         if value is None:
             if not self.null and self.blank:
                 return ""
             return None
-        return json.dumps(value)
+        # TODO(@anonrig): Remove support for non-string keys.
+        return orjson.dumps(value, option=orjson.OPT_NON_STR_KEYS).decode()
 
     def value_to_string(self, obj):
         return self.value_from_object(obj)

Some files were not shown because too many files changed in this diff