compressionchunker.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /**
  2. * Copyright (c) 2018-present, Gregory Szorc
  3. * All rights reserved.
  4. *
  5. * This software may be modified and distributed under the terms
  6. * of the BSD license. See the LICENSE file for details.
  7. */
  8. #include "python-zstandard.h"
  9. extern PyObject* ZstdError;
  10. PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__,
  11. "Iterator of output chunks from ZstdCompressionChunker.\n"
  12. );
  13. static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) {
  14. Py_XDECREF(self->chunker);
  15. PyObject_Del(self);
  16. }
  17. static PyObject* ZstdCompressionChunkerIterator_iter(PyObject* self) {
  18. Py_INCREF(self);
  19. return self;
  20. }
  21. static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) {
  22. size_t zresult;
  23. PyObject* chunk;
  24. ZstdCompressionChunker* chunker = self->chunker;
  25. ZSTD_EndDirective zFlushMode;
  26. if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) {
  27. PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()");
  28. return NULL;
  29. }
  30. if (chunker->finished) {
  31. return NULL;
  32. }
  33. /* If we have data left in the input, consume it. */
  34. while (chunker->input.pos < chunker->input.size) {
  35. Py_BEGIN_ALLOW_THREADS
  36. zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
  37. &chunker->input, ZSTD_e_continue);
  38. Py_END_ALLOW_THREADS
  39. /* Input is fully consumed. */
  40. if (chunker->input.pos == chunker->input.size) {
  41. chunker->input.src = NULL;
  42. chunker->input.pos = 0;
  43. chunker->input.size = 0;
  44. PyBuffer_Release(&chunker->inBuffer);
  45. }
  46. if (ZSTD_isError(zresult)) {
  47. PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
  48. return NULL;
  49. }
  50. /* If it produced a full output chunk, emit it. */
  51. if (chunker->output.pos == chunker->output.size) {
  52. chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
  53. if (!chunk) {
  54. return NULL;
  55. }
  56. chunker->output.pos = 0;
  57. return chunk;
  58. }
  59. /* Else continue to compress available input data. */
  60. }
  61. /* We also need this here for the special case of an empty input buffer. */
  62. if (chunker->input.pos == chunker->input.size) {
  63. chunker->input.src = NULL;
  64. chunker->input.pos = 0;
  65. chunker->input.size = 0;
  66. PyBuffer_Release(&chunker->inBuffer);
  67. }
  68. /* No more input data. A partial chunk may be in chunker->output.
  69. * If we're in normal compression mode, we're done. Otherwise if we're in
  70. * flush or finish mode, we need to emit what data remains.
  71. */
  72. if (self->mode == compressionchunker_mode_normal) {
  73. /* We don't need to set StopIteration. */
  74. return NULL;
  75. }
  76. if (self->mode == compressionchunker_mode_flush) {
  77. zFlushMode = ZSTD_e_flush;
  78. }
  79. else if (self->mode == compressionchunker_mode_finish) {
  80. zFlushMode = ZSTD_e_end;
  81. }
  82. else {
  83. PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen");
  84. return NULL;
  85. }
  86. Py_BEGIN_ALLOW_THREADS
  87. zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
  88. &chunker->input, zFlushMode);
  89. Py_END_ALLOW_THREADS
  90. if (ZSTD_isError(zresult)) {
  91. PyErr_Format(ZstdError, "zstd compress error: %s",
  92. ZSTD_getErrorName(zresult));
  93. return NULL;
  94. }
  95. if (!zresult && chunker->output.pos == 0) {
  96. return NULL;
  97. }
  98. chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
  99. if (!chunk) {
  100. return NULL;
  101. }
  102. chunker->output.pos = 0;
  103. if (!zresult && self->mode == compressionchunker_mode_finish) {
  104. chunker->finished = 1;
  105. }
  106. return chunk;
  107. }
  108. PyTypeObject ZstdCompressionChunkerIteratorType = {
  109. PyVarObject_HEAD_INIT(NULL, 0)
  110. "zstd.ZstdCompressionChunkerIterator", /* tp_name */
  111. sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */
  112. 0, /* tp_itemsize */
  113. (destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */
  114. 0, /* tp_print */
  115. 0, /* tp_getattr */
  116. 0, /* tp_setattr */
  117. 0, /* tp_compare */
  118. 0, /* tp_repr */
  119. 0, /* tp_as_number */
  120. 0, /* tp_as_sequence */
  121. 0, /* tp_as_mapping */
  122. 0, /* tp_hash */
  123. 0, /* tp_call */
  124. 0, /* tp_str */
  125. 0, /* tp_getattro */
  126. 0, /* tp_setattro */
  127. 0, /* tp_as_buffer */
  128. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
  129. ZstdCompressionChunkerIterator__doc__, /* tp_doc */
  130. 0, /* tp_traverse */
  131. 0, /* tp_clear */
  132. 0, /* tp_richcompare */
  133. 0, /* tp_weaklistoffset */
  134. ZstdCompressionChunkerIterator_iter, /* tp_iter */
  135. (iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */
  136. 0, /* tp_methods */
  137. 0, /* tp_members */
  138. 0, /* tp_getset */
  139. 0, /* tp_base */
  140. 0, /* tp_dict */
  141. 0, /* tp_descr_get */
  142. 0, /* tp_descr_set */
  143. 0, /* tp_dictoffset */
  144. 0, /* tp_init */
  145. 0, /* tp_alloc */
  146. PyType_GenericNew, /* tp_new */
  147. };
  148. PyDoc_STRVAR(ZstdCompressionChunker__doc__,
  149. "Compress chunks iteratively into exact chunk sizes.\n"
  150. );
  151. static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* self) {
  152. PyBuffer_Release(&self->inBuffer);
  153. self->input.src = NULL;
  154. PyMem_Free(self->output.dst);
  155. self->output.dst = NULL;
  156. Py_XDECREF(self->compressor);
  157. PyObject_Del(self);
  158. }
  159. static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
  160. static char* kwlist[] = {
  161. "data",
  162. NULL
  163. };
  164. ZstdCompressionChunkerIterator* result;
  165. if (self->finished) {
  166. PyErr_SetString(ZstdError, "cannot call compress() after compression finished");
  167. return NULL;
  168. }
  169. if (self->inBuffer.obj) {
  170. PyErr_SetString(ZstdError,
  171. "cannot perform operation before consuming output from previous operation");
  172. return NULL;
  173. }
  174. #if PY_MAJOR_VERSION >= 3
  175. if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
  176. #else
  177. if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
  178. #endif
  179. kwlist, &self->inBuffer)) {
  180. return NULL;
  181. }
  182. if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') || self->inBuffer.ndim > 1) {
  183. PyErr_SetString(PyExc_ValueError,
  184. "data buffer should be contiguous and have at most one dimension");
  185. PyBuffer_Release(&self->inBuffer);
  186. return NULL;
  187. }
  188. result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
  189. if (!result) {
  190. PyBuffer_Release(&self->inBuffer);
  191. return NULL;
  192. }
  193. self->input.src = self->inBuffer.buf;
  194. self->input.size = self->inBuffer.len;
  195. self->input.pos = 0;
  196. result->chunker = self;
  197. Py_INCREF(result->chunker);
  198. result->mode = compressionchunker_mode_normal;
  199. return result;
  200. }
  201. static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) {
  202. ZstdCompressionChunkerIterator* result;
  203. if (self->finished) {
  204. PyErr_SetString(ZstdError, "cannot call finish() after compression finished");
  205. return NULL;
  206. }
  207. if (self->inBuffer.obj) {
  208. PyErr_SetString(ZstdError,
  209. "cannot call finish() before consuming output from previous operation");
  210. return NULL;
  211. }
  212. result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
  213. if (!result) {
  214. return NULL;
  215. }
  216. result->chunker = self;
  217. Py_INCREF(result->chunker);
  218. result->mode = compressionchunker_mode_finish;
  219. return result;
  220. }
  221. static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
  222. ZstdCompressionChunkerIterator* result;
  223. if (self->finished) {
  224. PyErr_SetString(ZstdError, "cannot call flush() after compression finished");
  225. return NULL;
  226. }
  227. if (self->inBuffer.obj) {
  228. PyErr_SetString(ZstdError,
  229. "cannot call flush() before consuming output from previous operation");
  230. return NULL;
  231. }
  232. result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
  233. if (!result) {
  234. return NULL;
  235. }
  236. result->chunker = self;
  237. Py_INCREF(result->chunker);
  238. result->mode = compressionchunker_mode_flush;
  239. return result;
  240. }
  241. static PyMethodDef ZstdCompressionChunker_methods[] = {
  242. { "compress", (PyCFunction)ZstdCompressionChunker_compress, METH_VARARGS | METH_KEYWORDS,
  243. PyDoc_STR("compress data") },
  244. { "finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
  245. PyDoc_STR("finish compression operation") },
  246. { "flush", (PyCFunction)ZstdCompressionChunker_flush, METH_VARARGS | METH_KEYWORDS,
  247. PyDoc_STR("finish compression operation") },
  248. { NULL, NULL }
  249. };
  250. PyTypeObject ZstdCompressionChunkerType = {
  251. PyVarObject_HEAD_INIT(NULL, 0)
  252. "zstd.ZstdCompressionChunkerType", /* tp_name */
  253. sizeof(ZstdCompressionChunker), /* tp_basicsize */
  254. 0, /* tp_itemsize */
  255. (destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */
  256. 0, /* tp_print */
  257. 0, /* tp_getattr */
  258. 0, /* tp_setattr */
  259. 0, /* tp_compare */
  260. 0, /* tp_repr */
  261. 0, /* tp_as_number */
  262. 0, /* tp_as_sequence */
  263. 0, /* tp_as_mapping */
  264. 0, /* tp_hash */
  265. 0, /* tp_call */
  266. 0, /* tp_str */
  267. 0, /* tp_getattro */
  268. 0, /* tp_setattro */
  269. 0, /* tp_as_buffer */
  270. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
  271. ZstdCompressionChunker__doc__, /* tp_doc */
  272. 0, /* tp_traverse */
  273. 0, /* tp_clear */
  274. 0, /* tp_richcompare */
  275. 0, /* tp_weaklistoffset */
  276. 0, /* tp_iter */
  277. 0, /* tp_iternext */
  278. ZstdCompressionChunker_methods, /* tp_methods */
  279. 0, /* tp_members */
  280. 0, /* tp_getset */
  281. 0, /* tp_base */
  282. 0, /* tp_dict */
  283. 0, /* tp_descr_get */
  284. 0, /* tp_descr_set */
  285. 0, /* tp_dictoffset */
  286. 0, /* tp_init */
  287. 0, /* tp_alloc */
  288. PyType_GenericNew, /* tp_new */
  289. };
  290. void compressionchunker_module_init(PyObject* module) {
  291. Py_TYPE(&ZstdCompressionChunkerIteratorType) = &PyType_Type;
  292. if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) {
  293. return;
  294. }
  295. Py_TYPE(&ZstdCompressionChunkerType) = &PyType_Type;
  296. if (PyType_Ready(&ZstdCompressionChunkerType) < 0) {
  297. return;
  298. }
  299. }