query.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. from collections import OrderedDict
  2. from itertools import chain
  3. from typing import Dict, Iterable, List, Optional, Tuple, Union
  4. from django.core.exceptions import SuspiciousOperation
  5. from django.db import connections, models, router
  6. from django.db.models import Expression, Q
  7. from django.db.models.fields import NOT_PROVIDED
  8. from .sql import PostgresInsertQuery, PostgresQuery
  9. from .types import ConflictAction
  10. ConflictTarget = List[Union[str, Tuple[str]]]
  11. class PostgresQuerySet(models.QuerySet):
  12. """Adds support for PostgreSQL specifics."""
  13. def __init__(self, model=None, query=None, using=None, hints=None):
  14. """Initializes a new instance of :see:PostgresQuerySet."""
  15. super().__init__(model, query, using, hints)
  16. self.query = query or PostgresQuery(self.model)
  17. self.conflict_target = None
  18. self.conflict_action = None
  19. self.conflict_update_condition = None
  20. self.index_predicate = None
  21. def annotate(self, **annotations):
  22. """Custom version of the standard annotate function that allows using
  23. field names as annotated fields.
  24. Normally, the annotate function doesn't allow you to use the
  25. name of an existing field on the model as the alias name. This
  26. version of the function does allow that.
  27. This is done by temporarily renaming the fields in order to avoid the
  28. check for conflicts that the base class does.
  29. We rename all fields instead of the ones that already exist because
  30. the annotations are stored in an OrderedDict. Renaming only the
  31. conflicts will mess up the order.
  32. """
  33. fields = {field.name: field for field in self.model._meta.get_fields()}
  34. new_annotations = OrderedDict()
  35. renames = {}
  36. for name, value in annotations.items():
  37. if name in fields:
  38. new_name = "%s_new" % name
  39. new_annotations[new_name] = value
  40. renames[new_name] = name
  41. else:
  42. new_annotations[name] = value
  43. # run the base class's annotate function
  44. result = super().annotate(**new_annotations)
  45. # rename the annotations back to as specified
  46. result.rename_annotations(**renames)
  47. return result
  48. def rename_annotations(self, **annotations):
  49. """Renames the aliases for the specified annotations:
  50. .annotate(myfield=F('somestuf__myfield'))
  51. .rename_annotations(myfield='field')
  52. Arguments:
  53. annotations:
  54. The annotations to rename. Mapping the
  55. old name to the new name.
  56. """
  57. self.query.rename_annotations(annotations)
  58. return self
  59. def on_conflict(
  60. self,
  61. fields: ConflictTarget,
  62. action: ConflictAction,
  63. index_predicate: Optional[Union[Expression, Q, str]] = None,
  64. update_condition: Optional[Union[Expression, Q, str]] = None,
  65. ):
  66. """Sets the action to take when conflicts arise when attempting to
  67. insert/create a new row.
  68. Arguments:
  69. fields:
  70. The fields the conflicts can occur in.
  71. action:
  72. The action to take when the conflict occurs.
  73. index_predicate:
  74. The index predicate to satisfy an arbiter partial index (i.e. what partial index to use for checking
  75. conflicts)
  76. update_condition:
  77. Only update if this SQL expression evaluates to true.
  78. """
  79. self.conflict_target = fields
  80. self.conflict_action = action
  81. self.conflict_update_condition = update_condition
  82. self.index_predicate = index_predicate
  83. return self
  84. def bulk_insert(
  85. self,
  86. rows: List[dict],
  87. return_model: bool = False,
  88. using: Optional[str] = None,
  89. ):
  90. """Creates multiple new records in the database.
  91. This allows specifying custom conflict behavior using .on_conflict().
  92. If no special behavior was specified, this uses the normal Django create(..)
  93. Arguments:
  94. rows:
  95. An iterable of dictionaries, where each dictionary
  96. describes the fields to insert.
  97. return_model (default: False):
  98. If model instances should be returned rather than
  99. just dicts.
  100. using:
  101. Name of the database connection to use for
  102. this query.
  103. Returns:
  104. A list of either the dicts of the rows inserted, including the pk or
  105. the models of the rows inserted with defaults for any fields not specified
  106. """
  107. def is_empty(r):
  108. return all([False for _ in r])
  109. if not rows or is_empty(rows):
  110. return []
  111. if not self.conflict_target and not self.conflict_action:
  112. # no special action required, use the standard Django bulk_create(..)
  113. return super().bulk_create(
  114. [self.model(**fields) for fields in rows]
  115. )
  116. deduped_rows = rows
  117. # when we do a ConflictAction.NOTHING, we are actually
  118. # doing a ON CONFLICT DO UPDATE with a trick to avoid
  119. # touching conflicting rows... however, ON CONFLICT UPDATE
  120. # barfs when you specify the exact same row twice:
  121. #
  122. # > "cannot affect row a second time"
  123. #
  124. # we filter out the duplicates here to make sure we maintain
  125. # the same behaviour as the real ON CONFLICT DO NOTHING
  126. if self.conflict_action == ConflictAction.NOTHING:
  127. deduped_rows = []
  128. for row in rows:
  129. if row in deduped_rows:
  130. continue
  131. deduped_rows.append(row)
  132. compiler = self._build_insert_compiler(deduped_rows, using=using)
  133. objs = compiler.execute_sql(return_id=not return_model)
  134. if return_model:
  135. return [
  136. self._create_model_instance(dict(row, **obj), compiler.using)
  137. for row, obj in zip(deduped_rows, objs)
  138. ]
  139. return [dict(row, **obj) for row, obj in zip(deduped_rows, objs)]
  140. def insert(self, using: Optional[str] = None, **fields):
  141. """Creates a new record in the database.
  142. This allows specifying custom conflict behavior using .on_conflict().
  143. If no special behavior was specified, this uses the normal Django create(..)
  144. Arguments:
  145. fields:
  146. The fields of the row to create.
  147. using:
  148. The name of the database connection
  149. to use for this query.
  150. Returns:
  151. The primary key of the record that was created.
  152. """
  153. if self.conflict_target or self.conflict_action:
  154. compiler = self._build_insert_compiler([fields], using=using)
  155. rows = compiler.execute_sql(return_id=True)
  156. _, pk_db_column = self.model._meta.pk.get_attname_column()
  157. if not rows or len(rows) == 0:
  158. return None
  159. return rows[0][pk_db_column]
  160. # no special action required, use the standard Django create(..)
  161. return super().create(**fields).pk
  162. def insert_and_get(self, using: Optional[str] = None, **fields):
  163. """Creates a new record in the database and then gets the entire row.
  164. This allows specifying custom conflict behavior using .on_conflict().
  165. If no special behavior was specified, this uses the normal Django create(..)
  166. Arguments:
  167. fields:
  168. The fields of the row to create.
  169. using:
  170. The name of the database connection
  171. to use for this query.
  172. Returns:
  173. The model instance representing the row that was created.
  174. """
  175. if not self.conflict_target and not self.conflict_action:
  176. # no special action required, use the standard Django create(..)
  177. return super().create(**fields)
  178. compiler = self._build_insert_compiler([fields], using=using)
  179. rows = compiler.execute_sql(return_id=False)
  180. if not rows:
  181. return None
  182. columns = rows[0]
  183. # get a list of columns that are officially part of the model and
  184. # preserve the fact that the attribute name
  185. # might be different than the database column name
  186. model_columns = {}
  187. for field in self.model._meta.local_concrete_fields:
  188. model_columns[field.column] = field.attname
  189. # strip out any columns/fields returned by the db that
  190. # are not present in the model
  191. model_init_fields = {}
  192. for column_name, column_value in columns.items():
  193. try:
  194. model_init_fields[model_columns[column_name]] = column_value
  195. except KeyError:
  196. pass
  197. return self._create_model_instance(model_init_fields, compiler.using)
  198. def upsert(
  199. self,
  200. conflict_target: ConflictTarget,
  201. fields: dict,
  202. index_predicate: Optional[Union[Expression, Q, str]] = None,
  203. using: Optional[str] = None,
  204. update_condition: Optional[Union[Expression, Q, str]] = None,
  205. ) -> int:
  206. """Creates a new record or updates the existing one with the specified
  207. data.
  208. Arguments:
  209. conflict_target:
  210. Fields to pass into the ON CONFLICT clause.
  211. fields:
  212. Fields to insert/update.
  213. index_predicate:
  214. The index predicate to satisfy an arbiter partial index (i.e. what partial index to use for checking
  215. conflicts)
  216. using:
  217. The name of the database connection to
  218. use for this query.
  219. update_condition:
  220. Only update if this SQL expression evaluates to true.
  221. Returns:
  222. The primary key of the row that was created/updated.
  223. """
  224. self.on_conflict(
  225. conflict_target,
  226. ConflictAction.UPDATE,
  227. index_predicate=index_predicate,
  228. update_condition=update_condition,
  229. )
  230. return self.insert(**fields, using=using)
  231. def upsert_and_get(
  232. self,
  233. conflict_target: ConflictTarget,
  234. fields: dict,
  235. index_predicate: Optional[Union[Expression, Q, str]] = None,
  236. using: Optional[str] = None,
  237. update_condition: Optional[Union[Expression, Q, str]] = None,
  238. ):
  239. """Creates a new record or updates the existing one with the specified
  240. data and then gets the row.
  241. Arguments:
  242. conflict_target:
  243. Fields to pass into the ON CONFLICT clause.
  244. fields:
  245. Fields to insert/update.
  246. index_predicate:
  247. The index predicate to satisfy an arbiter partial index (i.e. what partial index to use for checking
  248. conflicts)
  249. using:
  250. The name of the database connection to
  251. use for this query.
  252. update_condition:
  253. Only update if this SQL expression evaluates to true.
  254. Returns:
  255. The model instance representing the row
  256. that was created/updated.
  257. """
  258. self.on_conflict(
  259. conflict_target,
  260. ConflictAction.UPDATE,
  261. index_predicate=index_predicate,
  262. update_condition=update_condition,
  263. )
  264. return self.insert_and_get(**fields, using=using)
  265. def bulk_upsert(
  266. self,
  267. conflict_target: ConflictTarget,
  268. rows: Iterable[Dict],
  269. index_predicate: Optional[Union[Expression, Q, str]] = None,
  270. return_model: bool = False,
  271. using: Optional[str] = None,
  272. update_condition: Optional[Union[Expression, Q, str]] = None,
  273. ):
  274. """Creates a set of new records or updates the existing ones with the
  275. specified data.
  276. Arguments:
  277. conflict_target:
  278. Fields to pass into the ON CONFLICT clause.
  279. rows:
  280. Rows to upsert.
  281. index_predicate:
  282. The index predicate to satisfy an arbiter partial index (i.e. what partial index to use for checking
  283. conflicts)
  284. return_model (default: False):
  285. If model instances should be returned rather than
  286. just dicts.
  287. using:
  288. The name of the database connection to use
  289. for this query.
  290. update_condition:
  291. Only update if this SQL expression evaluates to true.
  292. Returns:
  293. A list of either the dicts of the rows upserted, including the pk or
  294. the models of the rows upserted
  295. """
  296. self.on_conflict(
  297. conflict_target,
  298. ConflictAction.UPDATE,
  299. index_predicate=index_predicate,
  300. update_condition=update_condition,
  301. )
  302. return self.bulk_insert(rows, return_model, using=using)
  303. def _create_model_instance(
  304. self, field_values: dict, using: str, apply_converters: bool = True
  305. ):
  306. """Creates a new instance of the model with the specified field.
  307. Use this after the row was inserted into the database. The new
  308. instance will marked as "saved".
  309. """
  310. converted_field_values = field_values.copy()
  311. if apply_converters:
  312. connection = connections[using]
  313. for field in self.model._meta.local_concrete_fields:
  314. if field.attname not in converted_field_values:
  315. continue
  316. # converters can be defined on the field, or by
  317. # the database back-end we're using
  318. field_column = field.get_col(self.model._meta.db_table)
  319. converters = field.get_db_converters(
  320. connection
  321. ) + connection.ops.get_db_converters(field_column)
  322. for converter in converters:
  323. converted_field_values[field.attname] = converter(
  324. converted_field_values[field.attname],
  325. field_column,
  326. connection,
  327. )
  328. instance = self.model(**converted_field_values)
  329. instance._state.db = using
  330. instance._state.adding = False
  331. return instance
  332. def _build_insert_compiler(
  333. self, rows: Iterable[Dict], using: Optional[str] = None
  334. ):
  335. """Builds the SQL compiler for a insert query.
  336. Arguments:
  337. rows:
  338. An iterable of dictionaries, where each entry
  339. describes a record to insert.
  340. using:
  341. The name of the database connection to use
  342. for this query.
  343. Returns:
  344. The SQL compiler for the insert.
  345. """
  346. # ask the db router which connection to use
  347. using = (
  348. using or self._db or router.db_for_write(self.model, **self._hints)
  349. )
  350. # create model objects, we also have to detect cases
  351. # such as:
  352. # [dict(first_name='swen'), dict(fist_name='swen', last_name='kooij')]
  353. # we need to be certain that each row specifies the exact same
  354. # amount of fields/columns
  355. objs = []
  356. rows_iter = iter(rows)
  357. first_row = next(rows_iter)
  358. field_count = len(first_row)
  359. for index, row in enumerate(chain([first_row], rows_iter)):
  360. if field_count != len(row):
  361. raise SuspiciousOperation(
  362. (
  363. "In bulk upserts, you cannot have rows with different field "
  364. "configurations. Row {0} has a different field config than "
  365. "the first row."
  366. ).format(index)
  367. )
  368. objs.append(
  369. self._create_model_instance(row, using, apply_converters=False)
  370. )
  371. # get the fields to be used during update/insert
  372. insert_fields, update_fields = self._get_upsert_fields(first_row)
  373. # build a normal insert query
  374. query = PostgresInsertQuery(self.model)
  375. query.conflict_action = self.conflict_action
  376. query.conflict_target = self.conflict_target
  377. query.conflict_update_condition = self.conflict_update_condition
  378. query.index_predicate = self.index_predicate
  379. query.values(objs, insert_fields, update_fields)
  380. compiler = query.get_compiler(using)
  381. return compiler
  382. def _is_magical_field(self, model_instance, field, is_insert: bool):
  383. """Verifies whether this field is gonna modify something on its own.
  384. "Magical" means that a field modifies the field value
  385. during the pre_save.
  386. Arguments:
  387. model_instance:
  388. The model instance the field is defined on.
  389. field:
  390. The field to get of whether the field is
  391. magical.
  392. is_insert:
  393. Pretend whether this is an insert?
  394. Returns:
  395. True when this field modifies something.
  396. """
  397. # does this field modify someting upon insert?
  398. old_value = getattr(model_instance, field.name, None)
  399. field.pre_save(model_instance, is_insert)
  400. new_value = getattr(model_instance, field.name, None)
  401. return old_value != new_value
  402. def _get_upsert_fields(self, kwargs):
  403. """Gets the fields to use in an upsert.
  404. This some nice magic. We'll split the fields into
  405. a group of "insert fields" and "update fields":
  406. INSERT INTO bla ("val1", "val2") ON CONFLICT DO UPDATE SET val1 = EXCLUDED.val1
  407. ^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^
  408. insert_fields update_fields
  409. Often, fields appear in both lists. But, for example,
  410. a :see:DateTime field with `auto_now_add=True` set, will
  411. only appear in "insert_fields", since it won't be set
  412. on existing rows.
  413. Other than that, the user specificies a list of fields
  414. in the upsert() call. That migt not be all fields. The
  415. user could decide to leave out optional fields. If we
  416. end up doing an update, we don't want to overwrite
  417. those non-specified fields.
  418. We cannot just take the list of fields the user
  419. specifies, because as mentioned, some fields
  420. make modifications to the model on their own.
  421. We'll have to detect which fields make modifications
  422. and include them in the list of insert/update fields.
  423. """
  424. model_instance = self.model(**kwargs)
  425. insert_fields = []
  426. update_fields = []
  427. for field in model_instance._meta.local_concrete_fields:
  428. has_default = field.default != NOT_PROVIDED
  429. if field.name in kwargs or field.column in kwargs:
  430. insert_fields.append(field)
  431. update_fields.append(field)
  432. continue
  433. elif has_default:
  434. insert_fields.append(field)
  435. continue
  436. # special handling for 'pk' which always refers to
  437. # the primary key, so if we the user specifies `pk`
  438. # instead of a concrete field, we have to handle that
  439. if field.primary_key is True and "pk" in kwargs:
  440. insert_fields.append(field)
  441. update_fields.append(field)
  442. continue
  443. if self._is_magical_field(model_instance, field, is_insert=True):
  444. insert_fields.append(field)
  445. if self._is_magical_field(model_instance, field, is_insert=False):
  446. update_fields.append(field)
  447. return insert_fields, update_fields