zstd.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. /**
  2. * Copyright (c) 2016-present, Gregory Szorc
  3. * All rights reserved.
  4. *
  5. * This software may be modified and distributed under the terms
  6. * of the BSD license. See the LICENSE file for details.
  7. */
  8. /* A Python C extension for Zstandard. */
  9. #if defined(_WIN32)
  10. #define WIN32_LEAN_AND_MEAN
  11. #include <Windows.h>
  12. #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
  13. #include <sys/types.h>
  14. #include <sys/sysctl.h>
  15. #endif
  16. #include "python-zstandard.h"
  17. PyObject *ZstdError;
  18. PyDoc_STRVAR(estimate_decompression_context_size__doc__,
  19. "estimate_decompression_context_size()\n"
  20. "\n"
  21. "Estimate the amount of memory allocated to a decompression context.\n"
  22. );
  23. static PyObject* estimate_decompression_context_size(PyObject* self) {
  24. return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
  25. }
  26. PyDoc_STRVAR(frame_content_size__doc__,
  27. "frame_content_size(data)\n"
  28. "\n"
  29. "Obtain the decompressed size of a frame."
  30. );
  31. static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) {
  32. static char* kwlist[] = {
  33. "source",
  34. NULL
  35. };
  36. Py_buffer source;
  37. PyObject* result = NULL;
  38. unsigned long long size;
  39. #if PY_MAJOR_VERSION >= 3
  40. if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size",
  41. #else
  42. if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size",
  43. #endif
  44. kwlist, &source)) {
  45. return NULL;
  46. }
  47. if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
  48. PyErr_SetString(PyExc_ValueError,
  49. "data buffer should be contiguous and have at most one dimension");
  50. goto finally;
  51. }
  52. size = ZSTD_getFrameContentSize(source.buf, source.len);
  53. if (size == ZSTD_CONTENTSIZE_ERROR) {
  54. PyErr_SetString(ZstdError, "error when determining content size");
  55. }
  56. else if (size == ZSTD_CONTENTSIZE_UNKNOWN) {
  57. result = PyLong_FromLong(-1);
  58. }
  59. else {
  60. result = PyLong_FromUnsignedLongLong(size);
  61. }
  62. finally:
  63. PyBuffer_Release(&source);
  64. return result;
  65. }
  66. PyDoc_STRVAR(frame_header_size__doc__,
  67. "frame_header_size(data)\n"
  68. "\n"
  69. "Obtain the size of a frame header.\n"
  70. );
  71. static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) {
  72. static char* kwlist[] = {
  73. "source",
  74. NULL
  75. };
  76. Py_buffer source;
  77. PyObject* result = NULL;
  78. size_t zresult;
  79. #if PY_MAJOR_VERSION >= 3
  80. if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size",
  81. #else
  82. if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size",
  83. #endif
  84. kwlist, &source)) {
  85. return NULL;
  86. }
  87. if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
  88. PyErr_SetString(PyExc_ValueError,
  89. "data buffer should be contiguous and have at most one dimension");
  90. goto finally;
  91. }
  92. zresult = ZSTD_frameHeaderSize(source.buf, source.len);
  93. if (ZSTD_isError(zresult)) {
  94. PyErr_Format(ZstdError, "could not determine frame header size: %s",
  95. ZSTD_getErrorName(zresult));
  96. }
  97. else {
  98. result = PyLong_FromSize_t(zresult);
  99. }
  100. finally:
  101. PyBuffer_Release(&source);
  102. return result;
  103. }
  104. PyDoc_STRVAR(get_frame_parameters__doc__,
  105. "get_frame_parameters(data)\n"
  106. "\n"
  107. "Obtains a ``FrameParameters`` instance by parsing data.\n");
  108. PyDoc_STRVAR(train_dictionary__doc__,
  109. "train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n"
  110. " threads=None,notifications=0, dict_id=0, level=0)\n"
  111. "\n"
  112. "Train a dictionary from sample data using the COVER algorithm.\n"
  113. "\n"
  114. "A compression dictionary of size ``dict_size`` will be created from the\n"
  115. "iterable of ``samples``. The raw dictionary bytes will be returned.\n"
  116. "\n"
  117. "The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n"
  118. "*segment size* and *dmer size*. A reasonable range for ``k`` is\n"
  119. "``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
  120. "``d`` must be less than or equal to ``k``.\n"
  121. "\n"
  122. "``steps`` can be specified to control the number of steps through potential\n"
  123. "values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n"
  124. "those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n"
  125. "will be varied in this mode.\n"
  126. "\n"
  127. "``threads`` can specify how many threads to use to test various ``k`` and\n"
  128. "``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n"
  129. "a single thread is used.\n"
  130. "\n"
  131. "When ``k`` and ``d`` are not defined, default values are used and the\n"
  132. "algorithm will perform multiple iterations - or steps - to try to find\n"
  133. "ideal parameters. If both ``k`` and ``d`` are specified, then those values\n"
  134. "will be used. ``steps`` or ``threads`` triggers optimization mode to test\n"
  135. "multiple ``k`` and ``d`` variations.\n"
  136. );
  137. static char zstd_doc[] = "Interface to zstandard";
  138. static PyMethodDef zstd_methods[] = {
  139. { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
  140. METH_NOARGS, estimate_decompression_context_size__doc__ },
  141. { "frame_content_size", (PyCFunction)frame_content_size,
  142. METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ },
  143. { "frame_header_size", (PyCFunction)frame_header_size,
  144. METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ },
  145. { "get_frame_parameters", (PyCFunction)get_frame_parameters,
  146. METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ },
  147. { "train_dictionary", (PyCFunction)train_dictionary,
  148. METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
  149. { NULL, NULL }
  150. };
  151. void bufferutil_module_init(PyObject* mod);
  152. void compressobj_module_init(PyObject* mod);
  153. void compressor_module_init(PyObject* mod);
  154. void compressionparams_module_init(PyObject* mod);
  155. void constants_module_init(PyObject* mod);
  156. void compressionchunker_module_init(PyObject* mod);
  157. void compressiondict_module_init(PyObject* mod);
  158. void compressionreader_module_init(PyObject* mod);
  159. void compressionwriter_module_init(PyObject* mod);
  160. void compressoriterator_module_init(PyObject* mod);
  161. void decompressor_module_init(PyObject* mod);
  162. void decompressobj_module_init(PyObject* mod);
  163. void decompressionreader_module_init(PyObject *mod);
  164. void decompressionwriter_module_init(PyObject* mod);
  165. void decompressoriterator_module_init(PyObject* mod);
  166. void frameparams_module_init(PyObject* mod);
  167. void zstd_module_init(PyObject* m) {
  168. /* python-zstandard relies on unstable zstd C API features. This means
  169. that changes in zstd may break expectations in python-zstandard.
  170. python-zstandard is distributed with a copy of the zstd sources.
  171. python-zstandard is only guaranteed to work with the bundled version
  172. of zstd.
  173. However, downstream redistributors or packagers may unbundle zstd
  174. from python-zstandard. This can result in a mismatch between zstd
  175. versions and API semantics. This essentially "voids the warranty"
  176. of python-zstandard and may cause undefined behavior.
  177. We detect this mismatch here and refuse to load the module if this
  178. scenario is detected.
  179. */
  180. if (ZSTD_VERSION_NUMBER != 10506 || ZSTD_versionNumber() != 10506) {
  181. PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
  182. return;
  183. }
  184. bufferutil_module_init(m);
  185. compressionparams_module_init(m);
  186. compressiondict_module_init(m);
  187. compressobj_module_init(m);
  188. compressor_module_init(m);
  189. compressionchunker_module_init(m);
  190. compressionreader_module_init(m);
  191. compressionwriter_module_init(m);
  192. compressoriterator_module_init(m);
  193. constants_module_init(m);
  194. decompressor_module_init(m);
  195. decompressobj_module_init(m);
  196. decompressionreader_module_init(m);
  197. decompressionwriter_module_init(m);
  198. decompressoriterator_module_init(m);
  199. frameparams_module_init(m);
  200. }
  201. #if defined(__GNUC__) && (__GNUC__ >= 4)
  202. # define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default")))
  203. #else
  204. # define PYTHON_ZSTD_VISIBILITY
  205. #endif
  206. #if PY_MAJOR_VERSION >= 3
  207. static struct PyModuleDef zstd_module = {
  208. PyModuleDef_HEAD_INIT,
  209. "zstd",
  210. zstd_doc,
  211. -1,
  212. zstd_methods
  213. };
  214. PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) {
  215. PyObject *m = PyModule_Create(&zstd_module);
  216. if (m) {
  217. zstd_module_init(m);
  218. if (PyErr_Occurred()) {
  219. Py_DECREF(m);
  220. m = NULL;
  221. }
  222. }
  223. return m;
  224. }
  225. #else
  226. PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) {
  227. PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
  228. if (m) {
  229. zstd_module_init(m);
  230. }
  231. }
  232. #endif
  233. /* Attempt to resolve the number of CPUs in the system. */
  234. int cpu_count() {
  235. int count = 0;
  236. #if defined(_WIN32)
  237. SYSTEM_INFO si;
  238. si.dwNumberOfProcessors = 0;
  239. GetSystemInfo(&si);
  240. count = si.dwNumberOfProcessors;
  241. #elif defined(__APPLE__)
  242. int num;
  243. size_t size = sizeof(int);
  244. if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
  245. count = num;
  246. }
  247. #elif defined(__linux__)
  248. count = sysconf(_SC_NPROCESSORS_ONLN);
  249. #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
  250. int mib[2];
  251. size_t len = sizeof(count);
  252. mib[0] = CTL_HW;
  253. mib[1] = HW_NCPU;
  254. if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
  255. count = 0;
  256. }
  257. #elif defined(__hpux)
  258. count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
  259. #endif
  260. return count;
  261. }
  262. size_t roundpow2(size_t i) {
  263. i--;
  264. i |= i >> 1;
  265. i |= i >> 2;
  266. i |= i >> 4;
  267. i |= i >> 8;
  268. i |= i >> 16;
  269. i++;
  270. return i;
  271. }
  272. /* Safer version of _PyBytes_Resize().
  273. *
  274. * _PyBytes_Resize() only works if the refcount is 1. In some scenarios,
  275. * we can get an object with a refcount > 1, even if it was just created
  276. * with PyBytes_FromStringAndSize()! That's because (at least) CPython
  277. * pre-allocates PyBytes instances of size 1 for every possible byte value.
  278. *
  279. * If non-0 is returned, obj may or may not be NULL.
  280. */
  281. int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) {
  282. PyObject* tmp;
  283. if ((*obj)->ob_refcnt == 1) {
  284. return _PyBytes_Resize(obj, size);
  285. }
  286. tmp = PyBytes_FromStringAndSize(NULL, size);
  287. if (!tmp) {
  288. return -1;
  289. }
  290. memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj),
  291. PyBytes_GET_SIZE(*obj));
  292. Py_DECREF(*obj);
  293. *obj = tmp;
  294. return 0;
  295. }