123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344 |
- /**
- * Copyright (c) 2016-present, Gregory Szorc
- * All rights reserved.
- *
- * This software may be modified and distributed under the terms
- * of the BSD license. See the LICENSE file for details.
- */
- /* A Python C extension for Zstandard. */
- #if defined(_WIN32)
- #define WIN32_LEAN_AND_MEAN
- #include <Windows.h>
- #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
- #include <sys/types.h>
- #include <sys/sysctl.h>
- #endif
- #include "python-zstandard.h"
- PyObject *ZstdError;
- PyDoc_STRVAR(estimate_decompression_context_size__doc__,
- "estimate_decompression_context_size()\n"
- "\n"
- "Estimate the amount of memory allocated to a decompression context.\n"
- );
- static PyObject* estimate_decompression_context_size(PyObject* self) {
- return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
- }
- PyDoc_STRVAR(frame_content_size__doc__,
- "frame_content_size(data)\n"
- "\n"
- "Obtain the decompressed size of a frame."
- );
- static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) {
- static char* kwlist[] = {
- "source",
- NULL
- };
- Py_buffer source;
- PyObject* result = NULL;
- unsigned long long size;
- #if PY_MAJOR_VERSION >= 3
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size",
- #else
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size",
- #endif
- kwlist, &source)) {
- return NULL;
- }
- if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
- PyErr_SetString(PyExc_ValueError,
- "data buffer should be contiguous and have at most one dimension");
- goto finally;
- }
- size = ZSTD_getFrameContentSize(source.buf, source.len);
- if (size == ZSTD_CONTENTSIZE_ERROR) {
- PyErr_SetString(ZstdError, "error when determining content size");
- }
- else if (size == ZSTD_CONTENTSIZE_UNKNOWN) {
- result = PyLong_FromLong(-1);
- }
- else {
- result = PyLong_FromUnsignedLongLong(size);
- }
- finally:
- PyBuffer_Release(&source);
- return result;
- }
- PyDoc_STRVAR(frame_header_size__doc__,
- "frame_header_size(data)\n"
- "\n"
- "Obtain the size of a frame header.\n"
- );
- static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) {
- static char* kwlist[] = {
- "source",
- NULL
- };
- Py_buffer source;
- PyObject* result = NULL;
- size_t zresult;
- #if PY_MAJOR_VERSION >= 3
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size",
- #else
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size",
- #endif
- kwlist, &source)) {
- return NULL;
- }
- if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
- PyErr_SetString(PyExc_ValueError,
- "data buffer should be contiguous and have at most one dimension");
- goto finally;
- }
- zresult = ZSTD_frameHeaderSize(source.buf, source.len);
- if (ZSTD_isError(zresult)) {
- PyErr_Format(ZstdError, "could not determine frame header size: %s",
- ZSTD_getErrorName(zresult));
- }
- else {
- result = PyLong_FromSize_t(zresult);
- }
- finally:
- PyBuffer_Release(&source);
- return result;
- }
- PyDoc_STRVAR(get_frame_parameters__doc__,
- "get_frame_parameters(data)\n"
- "\n"
- "Obtains a ``FrameParameters`` instance by parsing data.\n");
- PyDoc_STRVAR(train_dictionary__doc__,
- "train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n"
- " threads=None,notifications=0, dict_id=0, level=0)\n"
- "\n"
- "Train a dictionary from sample data using the COVER algorithm.\n"
- "\n"
- "A compression dictionary of size ``dict_size`` will be created from the\n"
- "iterable of ``samples``. The raw dictionary bytes will be returned.\n"
- "\n"
- "The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n"
- "*segment size* and *dmer size*. A reasonable range for ``k`` is\n"
- "``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
- "``d`` must be less than or equal to ``k``.\n"
- "\n"
- "``steps`` can be specified to control the number of steps through potential\n"
- "values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n"
- "those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n"
- "will be varied in this mode.\n"
- "\n"
- "``threads`` can specify how many threads to use to test various ``k`` and\n"
- "``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n"
- "a single thread is used.\n"
- "\n"
- "When ``k`` and ``d`` are not defined, default values are used and the\n"
- "algorithm will perform multiple iterations - or steps - to try to find\n"
- "ideal parameters. If both ``k`` and ``d`` are specified, then those values\n"
- "will be used. ``steps`` or ``threads`` triggers optimization mode to test\n"
- "multiple ``k`` and ``d`` variations.\n"
- );
- static char zstd_doc[] = "Interface to zstandard";
- static PyMethodDef zstd_methods[] = {
- { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
- METH_NOARGS, estimate_decompression_context_size__doc__ },
- { "frame_content_size", (PyCFunction)frame_content_size,
- METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ },
- { "frame_header_size", (PyCFunction)frame_header_size,
- METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ },
- { "get_frame_parameters", (PyCFunction)get_frame_parameters,
- METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ },
- { "train_dictionary", (PyCFunction)train_dictionary,
- METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
- { NULL, NULL }
- };
- void bufferutil_module_init(PyObject* mod);
- void compressobj_module_init(PyObject* mod);
- void compressor_module_init(PyObject* mod);
- void compressionparams_module_init(PyObject* mod);
- void constants_module_init(PyObject* mod);
- void compressionchunker_module_init(PyObject* mod);
- void compressiondict_module_init(PyObject* mod);
- void compressionreader_module_init(PyObject* mod);
- void compressionwriter_module_init(PyObject* mod);
- void compressoriterator_module_init(PyObject* mod);
- void decompressor_module_init(PyObject* mod);
- void decompressobj_module_init(PyObject* mod);
- void decompressionreader_module_init(PyObject *mod);
- void decompressionwriter_module_init(PyObject* mod);
- void decompressoriterator_module_init(PyObject* mod);
- void frameparams_module_init(PyObject* mod);
- void zstd_module_init(PyObject* m) {
- /* python-zstandard relies on unstable zstd C API features. This means
- that changes in zstd may break expectations in python-zstandard.
- python-zstandard is distributed with a copy of the zstd sources.
- python-zstandard is only guaranteed to work with the bundled version
- of zstd.
- However, downstream redistributors or packagers may unbundle zstd
- from python-zstandard. This can result in a mismatch between zstd
- versions and API semantics. This essentially "voids the warranty"
- of python-zstandard and may cause undefined behavior.
- We detect this mismatch here and refuse to load the module if this
- scenario is detected.
- */
- if (ZSTD_VERSION_NUMBER != 10506 || ZSTD_versionNumber() != 10506) {
- PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
- return;
- }
- bufferutil_module_init(m);
- compressionparams_module_init(m);
- compressiondict_module_init(m);
- compressobj_module_init(m);
- compressor_module_init(m);
- compressionchunker_module_init(m);
- compressionreader_module_init(m);
- compressionwriter_module_init(m);
- compressoriterator_module_init(m);
- constants_module_init(m);
- decompressor_module_init(m);
- decompressobj_module_init(m);
- decompressionreader_module_init(m);
- decompressionwriter_module_init(m);
- decompressoriterator_module_init(m);
- frameparams_module_init(m);
- }
- #if defined(__GNUC__) && (__GNUC__ >= 4)
- # define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default")))
- #else
- # define PYTHON_ZSTD_VISIBILITY
- #endif
- #if PY_MAJOR_VERSION >= 3
- static struct PyModuleDef zstd_module = {
- PyModuleDef_HEAD_INIT,
- "zstd",
- zstd_doc,
- -1,
- zstd_methods
- };
- PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) {
- PyObject *m = PyModule_Create(&zstd_module);
- if (m) {
- zstd_module_init(m);
- if (PyErr_Occurred()) {
- Py_DECREF(m);
- m = NULL;
- }
- }
- return m;
- }
- #else
- PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) {
- PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
- if (m) {
- zstd_module_init(m);
- }
- }
- #endif
- /* Attempt to resolve the number of CPUs in the system. */
- int cpu_count() {
- int count = 0;
- #if defined(_WIN32)
- SYSTEM_INFO si;
- si.dwNumberOfProcessors = 0;
- GetSystemInfo(&si);
- count = si.dwNumberOfProcessors;
- #elif defined(__APPLE__)
- int num;
- size_t size = sizeof(int);
- if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
- count = num;
- }
- #elif defined(__linux__)
- count = sysconf(_SC_NPROCESSORS_ONLN);
- #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
- int mib[2];
- size_t len = sizeof(count);
- mib[0] = CTL_HW;
- mib[1] = HW_NCPU;
- if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
- count = 0;
- }
- #elif defined(__hpux)
- count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
- #endif
- return count;
- }
- size_t roundpow2(size_t i) {
- i--;
- i |= i >> 1;
- i |= i >> 2;
- i |= i >> 4;
- i |= i >> 8;
- i |= i >> 16;
- i++;
- return i;
- }
- /* Safer version of _PyBytes_Resize().
- *
- * _PyBytes_Resize() only works if the refcount is 1. In some scenarios,
- * we can get an object with a refcount > 1, even if it was just created
- * with PyBytes_FromStringAndSize()! That's because (at least) CPython
- * pre-allocates PyBytes instances of size 1 for every possible byte value.
- *
- * If non-0 is returned, obj may or may not be NULL.
- */
- int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) {
- PyObject* tmp;
- if ((*obj)->ob_refcnt == 1) {
- return _PyBytes_Resize(obj, size);
- }
- tmp = PyBytes_FromStringAndSize(NULL, size);
- if (!tmp) {
- return -1;
- }
- memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj),
- PyBytes_GET_SIZE(*obj));
- Py_DECREF(*obj);
- *obj = tmp;
- return 0;
- }
|