join.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. /* stringlib: bytes joining implementation */
  2. #if STRINGLIB_IS_UNICODE
  3. #error join.h only compatible with byte-wise strings
  4. #endif
  5. Py_LOCAL_INLINE(PyObject *)
  6. STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
  7. {
  8. const char *sepstr = STRINGLIB_STR(sep);
  9. Py_ssize_t seplen = STRINGLIB_LEN(sep);
  10. PyObject *res = NULL;
  11. char *p;
  12. Py_ssize_t seqlen = 0;
  13. Py_ssize_t sz = 0;
  14. Py_ssize_t i, nbufs;
  15. PyObject *seq, *item;
  16. Py_buffer *buffers = NULL;
  17. #define NB_STATIC_BUFFERS 10
  18. Py_buffer static_buffers[NB_STATIC_BUFFERS];
  19. #define GIL_THRESHOLD 1048576
  20. int drop_gil = 1;
  21. PyThreadState *save = NULL;
  22. seq = PySequence_Fast(iterable, "can only join an iterable");
  23. if (seq == NULL) {
  24. return NULL;
  25. }
  26. seqlen = PySequence_Fast_GET_SIZE(seq);
  27. if (seqlen == 0) {
  28. Py_DECREF(seq);
  29. return STRINGLIB_NEW(NULL, 0);
  30. }
  31. #if !STRINGLIB_MUTABLE
  32. if (seqlen == 1) {
  33. item = PySequence_Fast_GET_ITEM(seq, 0);
  34. if (STRINGLIB_CHECK_EXACT(item)) {
  35. Py_INCREF(item);
  36. Py_DECREF(seq);
  37. return item;
  38. }
  39. }
  40. #endif
  41. if (seqlen > NB_STATIC_BUFFERS) {
  42. buffers = PyMem_NEW(Py_buffer, seqlen);
  43. if (buffers == NULL) {
  44. Py_DECREF(seq);
  45. PyErr_NoMemory();
  46. return NULL;
  47. }
  48. }
  49. else {
  50. buffers = static_buffers;
  51. }
  52. /* Here is the general case. Do a pre-pass to figure out the total
  53. * amount of space we'll need (sz), and see whether all arguments are
  54. * bytes-like.
  55. */
  56. for (i = 0, nbufs = 0; i < seqlen; i++) {
  57. Py_ssize_t itemlen;
  58. item = PySequence_Fast_GET_ITEM(seq, i);
  59. if (PyBytes_CheckExact(item)) {
  60. /* Fast path. */
  61. buffers[i].obj = Py_NewRef(item);
  62. buffers[i].buf = PyBytes_AS_STRING(item);
  63. buffers[i].len = PyBytes_GET_SIZE(item);
  64. }
  65. else {
  66. if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
  67. PyErr_Format(PyExc_TypeError,
  68. "sequence item %zd: expected a bytes-like object, "
  69. "%.80s found",
  70. i, Py_TYPE(item)->tp_name);
  71. goto error;
  72. }
  73. /* If the backing objects are mutable, then dropping the GIL
  74. * opens up race conditions where another thread tries to modify
  75. * the object which we hold a buffer on it. Such code has data
  76. * races anyway, but this is a conservative approach that avoids
  77. * changing the behaviour of that data race.
  78. */
  79. drop_gil = 0;
  80. }
  81. nbufs = i + 1; /* for error cleanup */
  82. itemlen = buffers[i].len;
  83. if (itemlen > PY_SSIZE_T_MAX - sz) {
  84. PyErr_SetString(PyExc_OverflowError,
  85. "join() result is too long");
  86. goto error;
  87. }
  88. sz += itemlen;
  89. if (i != 0) {
  90. if (seplen > PY_SSIZE_T_MAX - sz) {
  91. PyErr_SetString(PyExc_OverflowError,
  92. "join() result is too long");
  93. goto error;
  94. }
  95. sz += seplen;
  96. }
  97. if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
  98. PyErr_SetString(PyExc_RuntimeError,
  99. "sequence changed size during iteration");
  100. goto error;
  101. }
  102. }
  103. /* Allocate result space. */
  104. res = STRINGLIB_NEW(NULL, sz);
  105. if (res == NULL)
  106. goto error;
  107. /* Catenate everything. */
  108. p = STRINGLIB_STR(res);
  109. if (sz < GIL_THRESHOLD) {
  110. drop_gil = 0; /* Benefits are likely outweighed by the overheads */
  111. }
  112. if (drop_gil) {
  113. save = PyEval_SaveThread();
  114. }
  115. if (!seplen) {
  116. /* fast path */
  117. for (i = 0; i < nbufs; i++) {
  118. Py_ssize_t n = buffers[i].len;
  119. char *q = buffers[i].buf;
  120. memcpy(p, q, n);
  121. p += n;
  122. }
  123. }
  124. else {
  125. for (i = 0; i < nbufs; i++) {
  126. Py_ssize_t n;
  127. char *q;
  128. if (i) {
  129. memcpy(p, sepstr, seplen);
  130. p += seplen;
  131. }
  132. n = buffers[i].len;
  133. q = buffers[i].buf;
  134. memcpy(p, q, n);
  135. p += n;
  136. }
  137. }
  138. if (drop_gil) {
  139. PyEval_RestoreThread(save);
  140. }
  141. goto done;
  142. error:
  143. res = NULL;
  144. done:
  145. Py_DECREF(seq);
  146. for (i = 0; i < nbufs; i++)
  147. PyBuffer_Release(&buffers[i]);
  148. if (buffers != static_buffers)
  149. PyMem_Free(buffers);
  150. return res;
  151. }
  152. #undef NB_STATIC_BUFFERS
  153. #undef GIL_THRESHOLD