123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- /* stringlib: bytes joining implementation */
- #if STRINGLIB_IS_UNICODE
- #error join.h only compatible with byte-wise strings
- #endif
- Py_LOCAL_INLINE(PyObject *)
- STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
- {
- const char *sepstr = STRINGLIB_STR(sep);
- Py_ssize_t seplen = STRINGLIB_LEN(sep);
- PyObject *res = NULL;
- char *p;
- Py_ssize_t seqlen = 0;
- Py_ssize_t sz = 0;
- Py_ssize_t i, nbufs;
- PyObject *seq, *item;
- Py_buffer *buffers = NULL;
- #define NB_STATIC_BUFFERS 10
- Py_buffer static_buffers[NB_STATIC_BUFFERS];
- #define GIL_THRESHOLD 1048576
- int drop_gil = 1;
- PyThreadState *save = NULL;
- seq = PySequence_Fast(iterable, "can only join an iterable");
- if (seq == NULL) {
- return NULL;
- }
- seqlen = PySequence_Fast_GET_SIZE(seq);
- if (seqlen == 0) {
- Py_DECREF(seq);
- return STRINGLIB_NEW(NULL, 0);
- }
- #if !STRINGLIB_MUTABLE
- if (seqlen == 1) {
- item = PySequence_Fast_GET_ITEM(seq, 0);
- if (STRINGLIB_CHECK_EXACT(item)) {
- Py_INCREF(item);
- Py_DECREF(seq);
- return item;
- }
- }
- #endif
- if (seqlen > NB_STATIC_BUFFERS) {
- buffers = PyMem_NEW(Py_buffer, seqlen);
- if (buffers == NULL) {
- Py_DECREF(seq);
- PyErr_NoMemory();
- return NULL;
- }
- }
- else {
- buffers = static_buffers;
- }
- /* Here is the general case. Do a pre-pass to figure out the total
- * amount of space we'll need (sz), and see whether all arguments are
- * bytes-like.
- */
- for (i = 0, nbufs = 0; i < seqlen; i++) {
- Py_ssize_t itemlen;
- item = PySequence_Fast_GET_ITEM(seq, i);
- if (PyBytes_CheckExact(item)) {
- /* Fast path. */
- buffers[i].obj = Py_NewRef(item);
- buffers[i].buf = PyBytes_AS_STRING(item);
- buffers[i].len = PyBytes_GET_SIZE(item);
- }
- else {
- if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
- PyErr_Format(PyExc_TypeError,
- "sequence item %zd: expected a bytes-like object, "
- "%.80s found",
- i, Py_TYPE(item)->tp_name);
- goto error;
- }
- /* If the backing objects are mutable, then dropping the GIL
- * opens up race conditions where another thread tries to modify
- * the object which we hold a buffer on it. Such code has data
- * races anyway, but this is a conservative approach that avoids
- * changing the behaviour of that data race.
- */
- drop_gil = 0;
- }
- nbufs = i + 1; /* for error cleanup */
- itemlen = buffers[i].len;
- if (itemlen > PY_SSIZE_T_MAX - sz) {
- PyErr_SetString(PyExc_OverflowError,
- "join() result is too long");
- goto error;
- }
- sz += itemlen;
- if (i != 0) {
- if (seplen > PY_SSIZE_T_MAX - sz) {
- PyErr_SetString(PyExc_OverflowError,
- "join() result is too long");
- goto error;
- }
- sz += seplen;
- }
- if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
- PyErr_SetString(PyExc_RuntimeError,
- "sequence changed size during iteration");
- goto error;
- }
- }
- /* Allocate result space. */
- res = STRINGLIB_NEW(NULL, sz);
- if (res == NULL)
- goto error;
- /* Catenate everything. */
- p = STRINGLIB_STR(res);
- if (sz < GIL_THRESHOLD) {
- drop_gil = 0; /* Benefits are likely outweighed by the overheads */
- }
- if (drop_gil) {
- save = PyEval_SaveThread();
- }
- if (!seplen) {
- /* fast path */
- for (i = 0; i < nbufs; i++) {
- Py_ssize_t n = buffers[i].len;
- char *q = buffers[i].buf;
- memcpy(p, q, n);
- p += n;
- }
- }
- else {
- for (i = 0; i < nbufs; i++) {
- Py_ssize_t n;
- char *q;
- if (i) {
- memcpy(p, sepstr, seplen);
- p += seplen;
- }
- n = buffers[i].len;
- q = buffers[i].buf;
- memcpy(p, q, n);
- p += n;
- }
- }
- if (drop_gil) {
- PyEval_RestoreThread(save);
- }
- goto done;
- error:
- res = NULL;
- done:
- Py_DECREF(seq);
- for (i = 0; i < nbufs; i++)
- PyBuffer_Release(&buffers[i]);
- if (buffers != static_buffers)
- PyMem_Free(buffers);
- return res;
- }
- #undef NB_STATIC_BUFFERS
- #undef GIL_THRESHOLD
|