_speedups.c 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. #include <Python.h>
  2. #define GET_DELTA(inp, inp_end, delta) \
  3. while (inp < inp_end) { \
  4. switch (*inp++) { \
  5. case '"': \
  6. case '\'': \
  7. case '&': \
  8. delta += 4; \
  9. break; \
  10. case '<': \
  11. case '>': \
  12. delta += 3; \
  13. break; \
  14. } \
  15. }
  16. #define DO_ESCAPE(inp, inp_end, outp) \
  17. { \
  18. Py_ssize_t ncopy = 0; \
  19. while (inp < inp_end) { \
  20. switch (*inp) { \
  21. case '"': \
  22. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  23. outp += ncopy; ncopy = 0; \
  24. *outp++ = '&'; \
  25. *outp++ = '#'; \
  26. *outp++ = '3'; \
  27. *outp++ = '4'; \
  28. *outp++ = ';'; \
  29. break; \
  30. case '\'': \
  31. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  32. outp += ncopy; ncopy = 0; \
  33. *outp++ = '&'; \
  34. *outp++ = '#'; \
  35. *outp++ = '3'; \
  36. *outp++ = '9'; \
  37. *outp++ = ';'; \
  38. break; \
  39. case '&': \
  40. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  41. outp += ncopy; ncopy = 0; \
  42. *outp++ = '&'; \
  43. *outp++ = 'a'; \
  44. *outp++ = 'm'; \
  45. *outp++ = 'p'; \
  46. *outp++ = ';'; \
  47. break; \
  48. case '<': \
  49. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  50. outp += ncopy; ncopy = 0; \
  51. *outp++ = '&'; \
  52. *outp++ = 'l'; \
  53. *outp++ = 't'; \
  54. *outp++ = ';'; \
  55. break; \
  56. case '>': \
  57. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  58. outp += ncopy; ncopy = 0; \
  59. *outp++ = '&'; \
  60. *outp++ = 'g'; \
  61. *outp++ = 't'; \
  62. *outp++ = ';'; \
  63. break; \
  64. default: \
  65. ncopy++; \
  66. } \
  67. inp++; \
  68. } \
  69. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  70. }
  71. static PyObject*
  72. escape_unicode_kind1(PyUnicodeObject *in)
  73. {
  74. Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in);
  75. Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in);
  76. Py_UCS1 *outp;
  77. PyObject *out;
  78. Py_ssize_t delta = 0;
  79. GET_DELTA(inp, inp_end, delta);
  80. if (!delta) {
  81. Py_INCREF(in);
  82. return (PyObject*)in;
  83. }
  84. out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta,
  85. PyUnicode_IS_ASCII(in) ? 127 : 255);
  86. if (!out)
  87. return NULL;
  88. inp = PyUnicode_1BYTE_DATA(in);
  89. outp = PyUnicode_1BYTE_DATA(out);
  90. DO_ESCAPE(inp, inp_end, outp);
  91. return out;
  92. }
  93. static PyObject*
  94. escape_unicode_kind2(PyUnicodeObject *in)
  95. {
  96. Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in);
  97. Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in);
  98. Py_UCS2 *outp;
  99. PyObject *out;
  100. Py_ssize_t delta = 0;
  101. GET_DELTA(inp, inp_end, delta);
  102. if (!delta) {
  103. Py_INCREF(in);
  104. return (PyObject*)in;
  105. }
  106. out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535);
  107. if (!out)
  108. return NULL;
  109. inp = PyUnicode_2BYTE_DATA(in);
  110. outp = PyUnicode_2BYTE_DATA(out);
  111. DO_ESCAPE(inp, inp_end, outp);
  112. return out;
  113. }
  114. static PyObject*
  115. escape_unicode_kind4(PyUnicodeObject *in)
  116. {
  117. Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in);
  118. Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in);
  119. Py_UCS4 *outp;
  120. PyObject *out;
  121. Py_ssize_t delta = 0;
  122. GET_DELTA(inp, inp_end, delta);
  123. if (!delta) {
  124. Py_INCREF(in);
  125. return (PyObject*)in;
  126. }
  127. out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111);
  128. if (!out)
  129. return NULL;
  130. inp = PyUnicode_4BYTE_DATA(in);
  131. outp = PyUnicode_4BYTE_DATA(out);
  132. DO_ESCAPE(inp, inp_end, outp);
  133. return out;
  134. }
  135. static PyObject*
  136. escape_unicode(PyObject *self, PyObject *s)
  137. {
  138. if (!PyUnicode_Check(s))
  139. return NULL;
  140. // This check is no longer needed in Python 3.12.
  141. if (PyUnicode_READY(s))
  142. return NULL;
  143. switch (PyUnicode_KIND(s)) {
  144. case PyUnicode_1BYTE_KIND:
  145. return escape_unicode_kind1((PyUnicodeObject*) s);
  146. case PyUnicode_2BYTE_KIND:
  147. return escape_unicode_kind2((PyUnicodeObject*) s);
  148. case PyUnicode_4BYTE_KIND:
  149. return escape_unicode_kind4((PyUnicodeObject*) s);
  150. }
  151. assert(0); /* shouldn't happen */
  152. return NULL;
  153. }
  154. static PyMethodDef module_methods[] = {
  155. {"_escape_inner", (PyCFunction)escape_unicode, METH_O, NULL},
  156. {NULL, NULL, 0, NULL} /* Sentinel */
  157. };
  158. static struct PyModuleDef module_definition = {
  159. PyModuleDef_HEAD_INIT,
  160. "markupsafe._speedups",
  161. NULL,
  162. -1,
  163. module_methods,
  164. NULL,
  165. NULL,
  166. NULL,
  167. NULL
  168. };
  169. PyMODINIT_FUNC
  170. PyInit__speedups(void)
  171. {
  172. PyObject *m = PyModule_Create(&module_definition);
  173. if (m == NULL) {
  174. return NULL;
  175. }
  176. #ifdef Py_GIL_DISABLED
  177. PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED);
  178. #endif
  179. return m;
  180. }