123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818 |
- /**
- * Copyright (c) 2017-present, Gregory Szorc
- * All rights reserved.
- *
- * This software may be modified and distributed under the terms
- * of the BSD license. See the LICENSE file for details.
- */
- #include "python-zstandard.h"
- extern PyObject* ZstdError;
- static void set_unsupported_operation(void) {
- PyObject* iomod;
- PyObject* exc;
- iomod = PyImport_ImportModule("io");
- if (NULL == iomod) {
- return;
- }
- exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
- if (NULL == exc) {
- Py_DECREF(iomod);
- return;
- }
- PyErr_SetNone(exc);
- Py_DECREF(exc);
- Py_DECREF(iomod);
- }
- static void reader_dealloc(ZstdCompressionReader* self) {
- Py_XDECREF(self->compressor);
- Py_XDECREF(self->reader);
- if (self->buffer.buf) {
- PyBuffer_Release(&self->buffer);
- memset(&self->buffer, 0, sizeof(self->buffer));
- }
- PyObject_Del(self);
- }
- static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
- if (self->entered) {
- PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
- return NULL;
- }
- self->entered = 1;
- Py_INCREF(self);
- return self;
- }
- static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) {
- PyObject* exc_type;
- PyObject* exc_value;
- PyObject* exc_tb;
- if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
- return NULL;
- }
- self->entered = 0;
- self->closed = 1;
- /* Release resources associated with source. */
- Py_CLEAR(self->reader);
- if (self->buffer.buf) {
- PyBuffer_Release(&self->buffer);
- memset(&self->buffer, 0, sizeof(self->buffer));
- }
- Py_CLEAR(self->compressor);
- Py_RETURN_FALSE;
- }
- static PyObject* reader_readable(ZstdCompressionReader* self) {
- Py_RETURN_TRUE;
- }
- static PyObject* reader_writable(ZstdCompressionReader* self) {
- Py_RETURN_FALSE;
- }
- static PyObject* reader_seekable(ZstdCompressionReader* self) {
- Py_RETURN_FALSE;
- }
- static PyObject* reader_readline(PyObject* self, PyObject* args) {
- set_unsupported_operation();
- return NULL;
- }
- static PyObject* reader_readlines(PyObject* self, PyObject* args) {
- set_unsupported_operation();
- return NULL;
- }
- static PyObject* reader_write(PyObject* self, PyObject* args) {
- PyErr_SetString(PyExc_OSError, "stream is not writable");
- return NULL;
- }
- static PyObject* reader_writelines(PyObject* self, PyObject* args) {
- PyErr_SetString(PyExc_OSError, "stream is not writable");
- return NULL;
- }
- static PyObject* reader_isatty(PyObject* self) {
- Py_RETURN_FALSE;
- }
- static PyObject* reader_flush(PyObject* self) {
- Py_RETURN_NONE;
- }
- static PyObject* reader_close(ZstdCompressionReader* self) {
- self->closed = 1;
- Py_RETURN_NONE;
- }
- static PyObject* reader_tell(ZstdCompressionReader* self) {
- /* TODO should this raise OSError since stream isn't seekable? */
- return PyLong_FromUnsignedLongLong(self->bytesCompressed);
- }
- int read_compressor_input(ZstdCompressionReader* self) {
- if (self->finishedInput) {
- return 0;
- }
- if (self->input.pos != self->input.size) {
- return 0;
- }
- if (self->reader) {
- Py_buffer buffer;
- assert(self->readResult == NULL);
- self->readResult = PyObject_CallMethod(self->reader, "read",
- "k", self->readSize);
- if (NULL == self->readResult) {
- return -1;
- }
- memset(&buffer, 0, sizeof(buffer));
- if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
- return -1;
- }
- /* EOF */
- if (0 == buffer.len) {
- self->finishedInput = 1;
- Py_CLEAR(self->readResult);
- }
- else {
- self->input.src = buffer.buf;
- self->input.size = buffer.len;
- self->input.pos = 0;
- }
- PyBuffer_Release(&buffer);
- }
- else {
- assert(self->buffer.buf);
- self->input.src = self->buffer.buf;
- self->input.size = self->buffer.len;
- self->input.pos = 0;
- }
- return 1;
- }
- int compress_input(ZstdCompressionReader* self, ZSTD_outBuffer* output) {
- size_t oldPos;
- size_t zresult;
- /* If we have data left over, consume it. */
- if (self->input.pos < self->input.size) {
- oldPos = output->pos;
- Py_BEGIN_ALLOW_THREADS
- zresult = ZSTD_compressStream2(self->compressor->cctx,
- output, &self->input, ZSTD_e_continue);
- Py_END_ALLOW_THREADS
- self->bytesCompressed += output->pos - oldPos;
- /* Input exhausted. Clear out state tracking. */
- if (self->input.pos == self->input.size) {
- memset(&self->input, 0, sizeof(self->input));
- Py_CLEAR(self->readResult);
- if (self->buffer.buf) {
- self->finishedInput = 1;
- }
- }
- if (ZSTD_isError(zresult)) {
- PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
- return -1;
- }
- }
- if (output->pos && output->pos == output->size) {
- return 1;
- }
- else {
- return 0;
- }
- }
- static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
- static char* kwlist[] = {
- "size",
- NULL
- };
- Py_ssize_t size = -1;
- PyObject* result = NULL;
- char* resultBuffer;
- Py_ssize_t resultSize;
- size_t zresult;
- size_t oldPos;
- int readResult, compressResult;
- if (self->closed) {
- PyErr_SetString(PyExc_ValueError, "stream is closed");
- return NULL;
- }
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
- return NULL;
- }
- if (size < -1) {
- PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
- return NULL;
- }
- if (size == -1) {
- return PyObject_CallMethod((PyObject*)self, "readall", NULL);
- }
- if (self->finishedOutput || size == 0) {
- return PyBytes_FromStringAndSize("", 0);
- }
- result = PyBytes_FromStringAndSize(NULL, size);
- if (NULL == result) {
- return NULL;
- }
- PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
- self->output.dst = resultBuffer;
- self->output.size = resultSize;
- self->output.pos = 0;
- readinput:
- compressResult = compress_input(self, &self->output);
- if (-1 == compressResult) {
- Py_XDECREF(result);
- return NULL;
- }
- else if (0 == compressResult) {
- /* There is room in the output. We fall through to below, which will
- * either get more input for us or will attempt to end the stream.
- */
- }
- else if (1 == compressResult) {
- memset(&self->output, 0, sizeof(self->output));
- return result;
- }
- else {
- assert(0);
- }
- readResult = read_compressor_input(self);
- if (-1 == readResult) {
- return NULL;
- }
- else if (0 == readResult) { }
- else if (1 == readResult) { }
- else {
- assert(0);
- }
- if (self->input.size) {
- goto readinput;
- }
- /* Else EOF */
- oldPos = self->output.pos;
- zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
- &self->input, ZSTD_e_end);
- self->bytesCompressed += self->output.pos - oldPos;
- if (ZSTD_isError(zresult)) {
- PyErr_Format(ZstdError, "error ending compression stream: %s",
- ZSTD_getErrorName(zresult));
- Py_XDECREF(result);
- return NULL;
- }
- assert(self->output.pos);
- if (0 == zresult) {
- self->finishedOutput = 1;
- }
- if (safe_pybytes_resize(&result, self->output.pos)) {
- Py_XDECREF(result);
- return NULL;
- }
- memset(&self->output, 0, sizeof(self->output));
- return result;
- }
- static PyObject* reader_read1(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
- static char* kwlist[] = {
- "size",
- NULL
- };
- Py_ssize_t size = -1;
- PyObject* result = NULL;
- char* resultBuffer;
- Py_ssize_t resultSize;
- ZSTD_outBuffer output;
- int compressResult;
- size_t oldPos;
- size_t zresult;
- if (self->closed) {
- PyErr_SetString(PyExc_ValueError, "stream is closed");
- return NULL;
- }
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:read1", kwlist, &size)) {
- return NULL;
- }
- if (size < -1) {
- PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
- return NULL;
- }
- if (self->finishedOutput || size == 0) {
- return PyBytes_FromStringAndSize("", 0);
- }
- if (size == -1) {
- size = ZSTD_CStreamOutSize();
- }
- result = PyBytes_FromStringAndSize(NULL, size);
- if (NULL == result) {
- return NULL;
- }
- PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
- output.dst = resultBuffer;
- output.size = resultSize;
- output.pos = 0;
- /* read1() is supposed to use at most 1 read() from the underlying stream.
- However, we can't satisfy this requirement with compression because
- not every input will generate output. We /could/ flush the compressor,
- but this may not be desirable. We allow multiple read() from the
- underlying stream. But unlike read(), we return as soon as output data
- is available.
- */
- compressResult = compress_input(self, &output);
- if (-1 == compressResult) {
- Py_XDECREF(result);
- return NULL;
- }
- else if (0 == compressResult || 1 == compressResult) { }
- else {
- assert(0);
- }
- if (output.pos) {
- goto finally;
- }
- while (!self->finishedInput) {
- int readResult = read_compressor_input(self);
- if (-1 == readResult) {
- Py_XDECREF(result);
- return NULL;
- }
- else if (0 == readResult || 1 == readResult) { }
- else {
- assert(0);
- }
- compressResult = compress_input(self, &output);
- if (-1 == compressResult) {
- Py_XDECREF(result);
- return NULL;
- }
- else if (0 == compressResult || 1 == compressResult) { }
- else {
- assert(0);
- }
- if (output.pos) {
- goto finally;
- }
- }
- /* EOF */
- oldPos = output.pos;
- zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
- ZSTD_e_end);
- self->bytesCompressed += output.pos - oldPos;
- if (ZSTD_isError(zresult)) {
- PyErr_Format(ZstdError, "error ending compression stream: %s",
- ZSTD_getErrorName(zresult));
- Py_XDECREF(result);
- return NULL;
- }
- if (zresult == 0) {
- self->finishedOutput = 1;
- }
- finally:
- if (result) {
- if (safe_pybytes_resize(&result, output.pos)) {
- Py_XDECREF(result);
- return NULL;
- }
- }
- return result;
- }
- static PyObject* reader_readall(PyObject* self) {
- PyObject* chunks = NULL;
- PyObject* empty = NULL;
- PyObject* result = NULL;
- /* Our strategy is to collect chunks into a list then join all the
- * chunks at the end. We could potentially use e.g. an io.BytesIO. But
- * this feels simple enough to implement and avoids potentially expensive
- * reallocations of large buffers.
- */
- chunks = PyList_New(0);
- if (NULL == chunks) {
- return NULL;
- }
- while (1) {
- PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
- if (NULL == chunk) {
- Py_DECREF(chunks);
- return NULL;
- }
- if (!PyBytes_Size(chunk)) {
- Py_DECREF(chunk);
- break;
- }
- if (PyList_Append(chunks, chunk)) {
- Py_DECREF(chunk);
- Py_DECREF(chunks);
- return NULL;
- }
- Py_DECREF(chunk);
- }
- empty = PyBytes_FromStringAndSize("", 0);
- if (NULL == empty) {
- Py_DECREF(chunks);
- return NULL;
- }
- result = PyObject_CallMethod(empty, "join", "O", chunks);
- Py_DECREF(empty);
- Py_DECREF(chunks);
- return result;
- }
- static PyObject* reader_readinto(ZstdCompressionReader* self, PyObject* args) {
- Py_buffer dest;
- ZSTD_outBuffer output;
- int readResult, compressResult;
- PyObject* result = NULL;
- size_t zresult;
- size_t oldPos;
- if (self->closed) {
- PyErr_SetString(PyExc_ValueError, "stream is closed");
- return NULL;
- }
- if (self->finishedOutput) {
- return PyLong_FromLong(0);
- }
- if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
- return NULL;
- }
- if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
- PyErr_SetString(PyExc_ValueError,
- "destination buffer should be contiguous and have at most one dimension");
- goto finally;
- }
- output.dst = dest.buf;
- output.size = dest.len;
- output.pos = 0;
- compressResult = compress_input(self, &output);
- if (-1 == compressResult) {
- goto finally;
- }
- else if (0 == compressResult) { }
- else if (1 == compressResult) {
- result = PyLong_FromSize_t(output.pos);
- goto finally;
- }
- else {
- assert(0);
- }
- while (!self->finishedInput) {
- readResult = read_compressor_input(self);
- if (-1 == readResult) {
- goto finally;
- }
- else if (0 == readResult || 1 == readResult) {}
- else {
- assert(0);
- }
- compressResult = compress_input(self, &output);
- if (-1 == compressResult) {
- goto finally;
- }
- else if (0 == compressResult) { }
- else if (1 == compressResult) {
- result = PyLong_FromSize_t(output.pos);
- goto finally;
- }
- else {
- assert(0);
- }
- }
- /* EOF */
- oldPos = output.pos;
- zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
- ZSTD_e_end);
- self->bytesCompressed += self->output.pos - oldPos;
- if (ZSTD_isError(zresult)) {
- PyErr_Format(ZstdError, "error ending compression stream: %s",
- ZSTD_getErrorName(zresult));
- goto finally;
- }
- assert(output.pos);
- if (0 == zresult) {
- self->finishedOutput = 1;
- }
- result = PyLong_FromSize_t(output.pos);
- finally:
- PyBuffer_Release(&dest);
- return result;
- }
- static PyObject* reader_readinto1(ZstdCompressionReader* self, PyObject* args) {
- Py_buffer dest;
- PyObject* result = NULL;
- ZSTD_outBuffer output;
- int compressResult;
- size_t oldPos;
- size_t zresult;
- if (self->closed) {
- PyErr_SetString(PyExc_ValueError, "stream is closed");
- return NULL;
- }
- if (self->finishedOutput) {
- return PyLong_FromLong(0);
- }
- if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
- return NULL;
- }
- if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
- PyErr_SetString(PyExc_ValueError,
- "destination buffer should be contiguous and have at most one dimension");
- goto finally;
- }
- output.dst = dest.buf;
- output.size = dest.len;
- output.pos = 0;
- compressResult = compress_input(self, &output);
- if (-1 == compressResult) {
- goto finally;
- }
- else if (0 == compressResult || 1 == compressResult) { }
- else {
- assert(0);
- }
- if (output.pos) {
- result = PyLong_FromSize_t(output.pos);
- goto finally;
- }
- while (!self->finishedInput) {
- int readResult = read_compressor_input(self);
- if (-1 == readResult) {
- goto finally;
- }
- else if (0 == readResult || 1 == readResult) { }
- else {
- assert(0);
- }
- compressResult = compress_input(self, &output);
- if (-1 == compressResult) {
- goto finally;
- }
- else if (0 == compressResult) { }
- else if (1 == compressResult) {
- result = PyLong_FromSize_t(output.pos);
- goto finally;
- }
- else {
- assert(0);
- }
- /* If we produced output and we're not done with input, emit
- * that output now, as we've hit restrictions of read1().
- */
- if (output.pos && !self->finishedInput) {
- result = PyLong_FromSize_t(output.pos);
- goto finally;
- }
- /* Otherwise we either have no output or we've exhausted the
- * input. Either we try to get more input or we fall through
- * to EOF below */
- }
- /* EOF */
- oldPos = output.pos;
- zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
- ZSTD_e_end);
- self->bytesCompressed += self->output.pos - oldPos;
- if (ZSTD_isError(zresult)) {
- PyErr_Format(ZstdError, "error ending compression stream: %s",
- ZSTD_getErrorName(zresult));
- goto finally;
- }
- assert(output.pos);
- if (0 == zresult) {
- self->finishedOutput = 1;
- }
- result = PyLong_FromSize_t(output.pos);
- finally:
- PyBuffer_Release(&dest);
- return result;
- }
- static PyObject* reader_iter(PyObject* self) {
- set_unsupported_operation();
- return NULL;
- }
- static PyObject* reader_iternext(PyObject* self) {
- set_unsupported_operation();
- return NULL;
- }
- static PyMethodDef reader_methods[] = {
- { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
- PyDoc_STR("Enter a compression context") },
- { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
- PyDoc_STR("Exit a compression context") },
- { "close", (PyCFunction)reader_close, METH_NOARGS,
- PyDoc_STR("Close the stream so it cannot perform any more operations") },
- { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
- { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
- { "readable", (PyCFunction)reader_readable, METH_NOARGS,
- PyDoc_STR("Returns True") },
- { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
- { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS, NULL },
- { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
- { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
- { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
- { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
- { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
- { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
- PyDoc_STR("Returns False") },
- { "tell", (PyCFunction)reader_tell, METH_NOARGS,
- PyDoc_STR("Returns current number of bytes compressed") },
- { "writable", (PyCFunction)reader_writable, METH_NOARGS,
- PyDoc_STR("Returns False") },
- { "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") },
- { "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") },
- { NULL, NULL }
- };
- static PyMemberDef reader_members[] = {
- { "closed", T_BOOL, offsetof(ZstdCompressionReader, closed),
- READONLY, "whether stream is closed" },
- { NULL }
- };
- PyTypeObject ZstdCompressionReaderType = {
- PyVarObject_HEAD_INIT(NULL, 0)
- "zstd.ZstdCompressionReader", /* tp_name */
- sizeof(ZstdCompressionReader), /* tp_basicsize */
- 0, /* tp_itemsize */
- (destructor)reader_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- 0, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- 0, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- reader_iter, /* tp_iter */
- reader_iternext, /* tp_iternext */
- reader_methods, /* tp_methods */
- reader_members, /* tp_members */
- 0, /* tp_getset */
- 0, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- 0, /* tp_init */
- 0, /* tp_alloc */
- PyType_GenericNew, /* tp_new */
- };
- void compressionreader_module_init(PyObject* mod) {
- /* TODO make reader a sub-class of io.RawIOBase */
- Py_TYPE(&ZstdCompressionReaderType) = &PyType_Type;
- if (PyType_Ready(&ZstdCompressionReaderType) < 0) {
- return;
- }
- }
|