fields.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. from base64 import b64decode, b64encode
  2. from copy import deepcopy
  3. from pickle import dumps, loads
  4. from typing import Any
  5. from zlib import compress, decompress
  6. from django.conf import settings
  7. from django.core import checks
  8. from django.db import models
  9. from django.utils.encoding import force_str
  10. from .constants import DEFAULT_PROTOCOL
  11. def dbsafe_decode(value: Any, compress_object: bool = False) -> Any:
  12. value = value.encode() # encode str to bytes
  13. value = b64decode(value)
  14. if compress_object:
  15. value = decompress(value)
  16. return loads(value)
  17. class PickledObject(str):
  18. """
  19. A subclass of string so it can be told whether a string is a pickled
  20. object or not (if the object is an instance of this class then it must
  21. [well, should] be a pickled one).
  22. Only really useful for passing pre-encoded values to ``default``
  23. with ``dbsafe_encode``, not that doing so is necessary. If you
  24. remove PickledObject and its references, you won't be able to pass
  25. in pre-encoded values anymore, but you can always just pass in the
  26. python objects themselves.
  27. """
  28. class _ObjectWrapper:
  29. """
  30. A class used to wrap object that have properties that may clash with the
  31. ORM internals.
  32. For example, objects with the `prepare_database_save` property such as
  33. `django.db.Model` subclasses won't work under certain conditions and the
  34. same apply for trying to retrieve any `callable` object.
  35. """
  36. __slots__ = ("_obj",)
  37. def __init__(self, obj: Any) -> None:
  38. self._obj = obj
  39. def wrap_conflictual_object(obj: Any) -> Any:
  40. if hasattr(obj, "prepare_database_save") or callable(obj):
  41. obj = _ObjectWrapper(obj)
  42. return obj
  43. def get_default_protocol() -> Any:
  44. return getattr(settings, "PICKLEFIELD_DEFAULT_PROTOCOL", DEFAULT_PROTOCOL)
  45. def dbsafe_encode(
  46. value: Any, compress_object: bool = False, pickle_protocol: Any = None, copy: bool = True
  47. ) -> Any:
  48. # We use deepcopy() here to avoid a problem with cPickle, where dumps
  49. # can generate different character streams for same lookup value if
  50. # they are referenced differently.
  51. # The reason this is important is because we do all of our lookups as
  52. # simple string matches, thus the character streams must be the same
  53. # for the lookups to work properly. See tests.py for more information.
  54. if pickle_protocol is None:
  55. pickle_protocol = get_default_protocol()
  56. if copy:
  57. # Copy can be very expensive if users aren't going to perform lookups
  58. # on the value anyway.
  59. value = deepcopy(value)
  60. value = dumps(value, protocol=pickle_protocol)
  61. if compress_object:
  62. value = compress(value)
  63. value = b64encode(value).decode() # decode bytes to str
  64. return PickledObject(value)
  65. class PickledObjectField(models.Field):
  66. """
  67. A field that will accept *any* python object and store it in the
  68. database. PickledObjectField will optionally compress its values if
  69. declared with the keyword argument ``compress=True``.
  70. Does not actually encode and compress ``None`` objects (although you
  71. can still do lookups using None). This way, it is still possible to
  72. use the ``isnull`` lookup type correctly.
  73. """
  74. empty_strings_allowed: bool = False
  75. def __init__(self, *args: Any, **kwargs: Any) -> None:
  76. self.compress = kwargs.pop("compress", False)
  77. protocol = kwargs.pop("protocol", None)
  78. if protocol is None:
  79. protocol = get_default_protocol()
  80. self.protocol = protocol
  81. self.copy = kwargs.pop("copy", True)
  82. kwargs.setdefault("editable", False)
  83. super().__init__(*args, **kwargs)
  84. def get_default(self) -> Any:
  85. """
  86. Returns the default value for this field.
  87. The default implementation on models.Field calls force_unicode
  88. on the default, which means you can't set arbitrary Python
  89. objects as the default. To fix this, we just return the value
  90. without calling force_unicode on it. Note that if you set a
  91. callable as a default, the field will still call it. It will
  92. *not* try to pickle and encode it.
  93. """
  94. if self.has_default():
  95. if callable(self.default):
  96. return self.default()
  97. return self.default
  98. # If the field doesn't have a default, then we punt to models.Field.
  99. return super().get_default()
  100. def _check_default(self) -> list[Any]:
  101. if self.has_default() and isinstance(self.default, (list, dict, set)):
  102. return [
  103. checks.Warning(
  104. "%s default should be a callable instead of a mutable instance so "
  105. "that it's not shared between all field instances."
  106. % (self.__class__.__name__,),
  107. hint=(
  108. "Use a callable instead, e.g., use `%s` instead of "
  109. "`%r`."
  110. % (
  111. type(self.default).__name__,
  112. self.default,
  113. )
  114. ),
  115. obj=self,
  116. id="picklefield.E001",
  117. )
  118. ]
  119. else:
  120. return []
  121. def check(self, **kwargs: Any) -> Any:
  122. errors = super().check(**kwargs)
  123. errors.extend(self._check_default())
  124. return errors
  125. def deconstruct(self) -> tuple[str, str, Any, Any]:
  126. name, path, args, kwargs = super().deconstruct()
  127. if self.compress:
  128. kwargs["compress"] = True
  129. if self.protocol != get_default_protocol():
  130. kwargs["protocol"] = self.protocol
  131. return name, path, args, kwargs
  132. def to_python(self, value: Any) -> Any:
  133. """
  134. B64decode and unpickle the object, optionally decompressing it.
  135. If an error is raised in de-pickling and we're sure the value is
  136. a definite pickle, the error is allowed to propagate. If we
  137. aren't sure if the value is a pickle or not, then we catch the
  138. error and return the original value instead.
  139. """
  140. if value is not None:
  141. try:
  142. value = dbsafe_decode(value, self.compress)
  143. except Exception:
  144. # If the value is a definite pickle; and an error is raised in
  145. # de-pickling it should be allowed to propagate.
  146. if isinstance(value, PickledObject):
  147. raise
  148. else:
  149. if isinstance(value, _ObjectWrapper):
  150. return value._obj
  151. return value
  152. def pre_save(self, model_instance: Any, add: Any) -> Any:
  153. value = super().pre_save(model_instance, add)
  154. return wrap_conflictual_object(value)
  155. def from_db_value(self, value: Any, expression: Any, connection: Any) -> Any:
  156. return self.to_python(value)
  157. def get_db_prep_value(self, value: Any, connection: Any = None, prepared: bool = False) -> Any:
  158. """
  159. Pickle and b64encode the object, optionally compressing it.
  160. The pickling protocol is specified explicitly (by default 2),
  161. rather than as -1 or HIGHEST_PROTOCOL, because we don't want the
  162. protocol to change over time. If it did, ``exact`` and ``in``
  163. lookups would likely fail, since pickle would now be generating
  164. a different string.
  165. """
  166. if value is not None and not isinstance(value, PickledObject):
  167. # We call force_str here explicitly, so that the encoded string
  168. # isn't rejected by the postgresql_psycopg2 backend. Alternatively,
  169. # we could have just registered PickledObject with the psycopg
  170. # marshaller (telling it to store it like it would a string), but
  171. # since both of these methods result in the same value being stored,
  172. # doing things this way is much easier.
  173. value = force_str(dbsafe_encode(value, self.compress, self.protocol, self.copy))
  174. return value
  175. def value_to_string(self, obj: Any) -> Any:
  176. value = self.value_from_object(obj)
  177. return self.get_db_prep_value(value)
  178. def get_internal_type(self) -> str:
  179. return "TextField"
  180. def get_lookup(self, lookup_name: str) -> Any:
  181. """
  182. We need to limit the lookup types.
  183. """
  184. if lookup_name not in ["exact", "in", "isnull"]:
  185. raise TypeError("Lookup type %s is not supported." % lookup_name)
  186. return super().get_lookup(lookup_name)