compressionchunker.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. /**
  2. * Copyright (c) 2018-present, Gregory Szorc
  3. * All rights reserved.
  4. *
  5. * This software may be modified and distributed under the terms
  6. * of the BSD license. See the LICENSE file for details.
  7. */
  8. #include "python-zstandard.h"
  9. extern PyObject *ZstdError;
  10. static void
  11. ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator *self) {
  12. Py_XDECREF(self->chunker);
  13. PyObject_Del(self);
  14. }
  15. static PyObject *ZstdCompressionChunkerIterator_iter(PyObject *self) {
  16. Py_INCREF(self);
  17. return self;
  18. }
  19. static PyObject *
  20. ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator *self) {
  21. size_t zresult;
  22. PyObject *chunk;
  23. ZstdCompressionChunker *chunker = self->chunker;
  24. ZSTD_EndDirective zFlushMode;
  25. if (self->mode != compressionchunker_mode_normal &&
  26. chunker->input.pos != chunker->input.size) {
  27. PyErr_SetString(ZstdError, "input should have been fully consumed "
  28. "before calling flush() or finish()");
  29. return NULL;
  30. }
  31. if (chunker->finished) {
  32. return NULL;
  33. }
  34. /* If we have data left in the input, consume it. */
  35. while (chunker->input.pos < chunker->input.size) {
  36. Py_BEGIN_ALLOW_THREADS zresult =
  37. ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
  38. &chunker->input, ZSTD_e_continue);
  39. Py_END_ALLOW_THREADS
  40. /* Input is fully consumed. */
  41. if (chunker->input.pos == chunker->input.size) {
  42. chunker->input.src = NULL;
  43. chunker->input.pos = 0;
  44. chunker->input.size = 0;
  45. PyBuffer_Release(&chunker->inBuffer);
  46. }
  47. if (ZSTD_isError(zresult)) {
  48. PyErr_Format(ZstdError, "zstd compress error: %s",
  49. ZSTD_getErrorName(zresult));
  50. return NULL;
  51. }
  52. /* If it produced a full output chunk, emit it. */
  53. if (chunker->output.pos == chunker->output.size) {
  54. chunk = PyBytes_FromStringAndSize(chunker->output.dst,
  55. chunker->output.pos);
  56. if (!chunk) {
  57. return NULL;
  58. }
  59. chunker->output.pos = 0;
  60. return chunk;
  61. }
  62. /* Else continue to compress available input data. */
  63. }
  64. /* We also need this here for the special case of an empty input buffer. */
  65. if (chunker->input.pos == chunker->input.size) {
  66. chunker->input.src = NULL;
  67. chunker->input.pos = 0;
  68. chunker->input.size = 0;
  69. PyBuffer_Release(&chunker->inBuffer);
  70. }
  71. /* No more input data. A partial chunk may be in chunker->output.
  72. * If we're in normal compression mode, we're done. Otherwise if we're in
  73. * flush or finish mode, we need to emit what data remains.
  74. */
  75. if (self->mode == compressionchunker_mode_normal) {
  76. /* We don't need to set StopIteration. */
  77. return NULL;
  78. }
  79. if (self->mode == compressionchunker_mode_flush) {
  80. zFlushMode = ZSTD_e_flush;
  81. }
  82. else if (self->mode == compressionchunker_mode_finish) {
  83. zFlushMode = ZSTD_e_end;
  84. }
  85. else {
  86. PyErr_SetString(ZstdError,
  87. "unhandled compression mode; this should never happen");
  88. return NULL;
  89. }
  90. Py_BEGIN_ALLOW_THREADS zresult =
  91. ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
  92. &chunker->input, zFlushMode);
  93. Py_END_ALLOW_THREADS
  94. if (ZSTD_isError(zresult)) {
  95. PyErr_Format(ZstdError, "zstd compress error: %s",
  96. ZSTD_getErrorName(zresult));
  97. return NULL;
  98. }
  99. if (!zresult && chunker->output.pos == 0) {
  100. return NULL;
  101. }
  102. chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
  103. if (!chunk) {
  104. return NULL;
  105. }
  106. chunker->output.pos = 0;
  107. if (!zresult && self->mode == compressionchunker_mode_finish) {
  108. chunker->finished = 1;
  109. }
  110. return chunk;
  111. }
  112. PyType_Slot ZstdCompressionChunkerIteratorSlots[] = {
  113. {Py_tp_dealloc, ZstdCompressionChunkerIterator_dealloc},
  114. {Py_tp_iter, ZstdCompressionChunkerIterator_iter},
  115. {Py_tp_iternext, ZstdCompressionChunkerIterator_iternext},
  116. {Py_tp_new, PyType_GenericNew},
  117. {0, NULL},
  118. };
  119. PyType_Spec ZstdCompressionChunkerIteratorSpec = {
  120. "zstd.ZstdCompressionChunkerIterator",
  121. sizeof(ZstdCompressionChunkerIterator),
  122. 0,
  123. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
  124. ZstdCompressionChunkerIteratorSlots,
  125. };
  126. PyTypeObject *ZstdCompressionChunkerIteratorType;
  127. static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker *self) {
  128. PyBuffer_Release(&self->inBuffer);
  129. self->input.src = NULL;
  130. PyMem_Free(self->output.dst);
  131. self->output.dst = NULL;
  132. Py_XDECREF(self->compressor);
  133. PyObject_Del(self);
  134. }
  135. static ZstdCompressionChunkerIterator *
  136. ZstdCompressionChunker_compress(ZstdCompressionChunker *self, PyObject *args,
  137. PyObject *kwargs) {
  138. static char *kwlist[] = {"data", NULL};
  139. ZstdCompressionChunkerIterator *result;
  140. if (self->finished) {
  141. PyErr_SetString(ZstdError,
  142. "cannot call compress() after compression finished");
  143. return NULL;
  144. }
  145. if (self->inBuffer.obj) {
  146. PyErr_SetString(ZstdError, "cannot perform operation before consuming "
  147. "output from previous operation");
  148. return NULL;
  149. }
  150. if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress", kwlist,
  151. &self->inBuffer)) {
  152. return NULL;
  153. }
  154. result = (ZstdCompressionChunkerIterator *)PyObject_CallObject(
  155. (PyObject *)ZstdCompressionChunkerIteratorType, NULL);
  156. if (!result) {
  157. PyBuffer_Release(&self->inBuffer);
  158. return NULL;
  159. }
  160. self->input.src = self->inBuffer.buf;
  161. self->input.size = self->inBuffer.len;
  162. self->input.pos = 0;
  163. result->chunker = self;
  164. Py_INCREF(result->chunker);
  165. result->mode = compressionchunker_mode_normal;
  166. return result;
  167. }
  168. static ZstdCompressionChunkerIterator *
  169. ZstdCompressionChunker_finish(ZstdCompressionChunker *self) {
  170. ZstdCompressionChunkerIterator *result;
  171. if (self->finished) {
  172. PyErr_SetString(ZstdError,
  173. "cannot call finish() after compression finished");
  174. return NULL;
  175. }
  176. if (self->inBuffer.obj) {
  177. PyErr_SetString(ZstdError, "cannot call finish() before consuming "
  178. "output from previous operation");
  179. return NULL;
  180. }
  181. result = (ZstdCompressionChunkerIterator *)PyObject_CallObject(
  182. (PyObject *)ZstdCompressionChunkerIteratorType, NULL);
  183. if (!result) {
  184. return NULL;
  185. }
  186. result->chunker = self;
  187. Py_INCREF(result->chunker);
  188. result->mode = compressionchunker_mode_finish;
  189. return result;
  190. }
  191. static ZstdCompressionChunkerIterator *
  192. ZstdCompressionChunker_flush(ZstdCompressionChunker *self, PyObject *args,
  193. PyObject *kwargs) {
  194. ZstdCompressionChunkerIterator *result;
  195. if (self->finished) {
  196. PyErr_SetString(ZstdError,
  197. "cannot call flush() after compression finished");
  198. return NULL;
  199. }
  200. if (self->inBuffer.obj) {
  201. PyErr_SetString(ZstdError, "cannot call flush() before consuming "
  202. "output from previous operation");
  203. return NULL;
  204. }
  205. result = (ZstdCompressionChunkerIterator *)PyObject_CallObject(
  206. (PyObject *)ZstdCompressionChunkerIteratorType, NULL);
  207. if (!result) {
  208. return NULL;
  209. }
  210. result->chunker = self;
  211. Py_INCREF(result->chunker);
  212. result->mode = compressionchunker_mode_flush;
  213. return result;
  214. }
  215. static PyMethodDef ZstdCompressionChunker_methods[] = {
  216. {"compress", (PyCFunction)ZstdCompressionChunker_compress,
  217. METH_VARARGS | METH_KEYWORDS, PyDoc_STR("compress data")},
  218. {"finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
  219. PyDoc_STR("finish compression operation")},
  220. {"flush", (PyCFunction)ZstdCompressionChunker_flush,
  221. METH_VARARGS | METH_KEYWORDS, PyDoc_STR("finish compression operation")},
  222. {NULL, NULL}};
  223. PyType_Slot ZstdCompressionChunkerSlots[] = {
  224. {Py_tp_dealloc, ZstdCompressionChunker_dealloc},
  225. {Py_tp_methods, ZstdCompressionChunker_methods},
  226. {Py_tp_new, PyType_GenericNew},
  227. {0, NULL},
  228. };
  229. PyType_Spec ZstdCompressionChunkerSpec = {
  230. "zstd.ZstdCompressionChunkerType",
  231. sizeof(ZstdCompressionChunker),
  232. 0,
  233. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
  234. ZstdCompressionChunkerSlots,
  235. };
  236. PyTypeObject *ZstdCompressionChunkerType;
  237. void compressionchunker_module_init(PyObject *module) {
  238. ZstdCompressionChunkerIteratorType =
  239. (PyTypeObject *)PyType_FromSpec(&ZstdCompressionChunkerIteratorSpec);
  240. if (PyType_Ready(ZstdCompressionChunkerIteratorType) < 0) {
  241. return;
  242. }
  243. ZstdCompressionChunkerType =
  244. (PyTypeObject *)PyType_FromSpec(&ZstdCompressionChunkerSpec);
  245. if (PyType_Ready(ZstdCompressionChunkerType) < 0) {
  246. return;
  247. }
  248. }