_speedups.c 9.7 KB


  1. /**
  2. * markupsafe._speedups
  3. * ~~~~~~~~~~~~~~~~~~~~
  4. *
  5. * C implementation of escaping for better performance. Used instead of
  6. * the native Python implementation when compiled.
  7. *
  8. * :copyright: 2010 Pallets
  9. * :license: BSD-3-Clause
  10. */
  11. #include <Python.h>
  12. #if PY_MAJOR_VERSION < 3
  13. #define ESCAPED_CHARS_TABLE_SIZE 63
  14. #define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL)));
  15. static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE];
  16. static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE];
  17. #endif
  18. static PyObject* markup;
  19. static int
  20. init_constants(void)
  21. {
  22. PyObject *module;
  23. #if PY_MAJOR_VERSION < 3
  24. /* mapping of characters to replace */
  25. escaped_chars_repl['"'] = UNICHR("&#34;");
  26. escaped_chars_repl['\''] = UNICHR("&#39;");
  27. escaped_chars_repl['&'] = UNICHR("&amp;");
  28. escaped_chars_repl['<'] = UNICHR("&lt;");
  29. escaped_chars_repl['>'] = UNICHR("&gt;");
  30. /* lengths of those characters when replaced - 1 */
  31. memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len));
  32. escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \
  33. escaped_chars_delta_len['&'] = 4;
  34. escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3;
  35. #endif
  36. /* import markup type so that we can mark the return value */
  37. module = PyImport_ImportModule("markupsafe");
  38. if (!module)
  39. return 0;
  40. markup = PyObject_GetAttrString(module, "Markup");
  41. Py_DECREF(module);
  42. return 1;
  43. }
  44. #if PY_MAJOR_VERSION < 3
  45. static PyObject*
  46. escape_unicode(PyUnicodeObject *in)
  47. {
  48. PyUnicodeObject *out;
  49. Py_UNICODE *inp = PyUnicode_AS_UNICODE(in);
  50. const Py_UNICODE *inp_end = PyUnicode_AS_UNICODE(in) + PyUnicode_GET_SIZE(in);
  51. Py_UNICODE *next_escp;
  52. Py_UNICODE *outp;
  53. Py_ssize_t delta=0, erepl=0, delta_len=0;
  54. /* First we need to figure out how long the escaped string will be */
  55. while (*(inp) || inp < inp_end) {
  56. if (*inp < ESCAPED_CHARS_TABLE_SIZE) {
  57. delta += escaped_chars_delta_len[*inp];
  58. erepl += !!escaped_chars_delta_len[*inp];
  59. }
  60. ++inp;
  61. }
  62. /* Do we need to escape anything at all? */
  63. if (!erepl) {
  64. Py_INCREF(in);
  65. return (PyObject*)in;
  66. }
  67. out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(in) + delta);
  68. if (!out)
  69. return NULL;
  70. outp = PyUnicode_AS_UNICODE(out);
  71. inp = PyUnicode_AS_UNICODE(in);
  72. while (erepl-- > 0) {
  73. /* look for the next substitution */
  74. next_escp = inp;
  75. while (next_escp < inp_end) {
  76. if (*next_escp < ESCAPED_CHARS_TABLE_SIZE &&
  77. (delta_len = escaped_chars_delta_len[*next_escp])) {
  78. ++delta_len;
  79. break;
  80. }
  81. ++next_escp;
  82. }
  83. if (next_escp > inp) {
  84. /* copy unescaped chars between inp and next_escp */
  85. Py_UNICODE_COPY(outp, inp, next_escp-inp);
  86. outp += next_escp - inp;
  87. }
  88. /* escape 'next_escp' */
  89. Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len);
  90. outp += delta_len;
  91. inp = next_escp + 1;
  92. }
  93. if (inp < inp_end)
  94. Py_UNICODE_COPY(outp, inp, PyUnicode_GET_SIZE(in) - (inp - PyUnicode_AS_UNICODE(in)));
  95. return (PyObject*)out;
  96. }
  97. #else /* PY_MAJOR_VERSION < 3 */
  98. #define GET_DELTA(inp, inp_end, delta) \
  99. while (inp < inp_end) { \
  100. switch (*inp++) { \
  101. case '"': \
  102. case '\'': \
  103. case '&': \
  104. delta += 4; \
  105. break; \
  106. case '<': \
  107. case '>': \
  108. delta += 3; \
  109. break; \
  110. } \
  111. }
  112. #define DO_ESCAPE(inp, inp_end, outp) \
  113. { \
  114. Py_ssize_t ncopy = 0; \
  115. while (inp < inp_end) { \
  116. switch (*inp) { \
  117. case '"': \
  118. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  119. outp += ncopy; ncopy = 0; \
  120. *outp++ = '&'; \
  121. *outp++ = '#'; \
  122. *outp++ = '3'; \
  123. *outp++ = '4'; \
  124. *outp++ = ';'; \
  125. break; \
  126. case '\'': \
  127. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  128. outp += ncopy; ncopy = 0; \
  129. *outp++ = '&'; \
  130. *outp++ = '#'; \
  131. *outp++ = '3'; \
  132. *outp++ = '9'; \
  133. *outp++ = ';'; \
  134. break; \
  135. case '&': \
  136. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  137. outp += ncopy; ncopy = 0; \
  138. *outp++ = '&'; \
  139. *outp++ = 'a'; \
  140. *outp++ = 'm'; \
  141. *outp++ = 'p'; \
  142. *outp++ = ';'; \
  143. break; \
  144. case '<': \
  145. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  146. outp += ncopy; ncopy = 0; \
  147. *outp++ = '&'; \
  148. *outp++ = 'l'; \
  149. *outp++ = 't'; \
  150. *outp++ = ';'; \
  151. break; \
  152. case '>': \
  153. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  154. outp += ncopy; ncopy = 0; \
  155. *outp++ = '&'; \
  156. *outp++ = 'g'; \
  157. *outp++ = 't'; \
  158. *outp++ = ';'; \
  159. break; \
  160. default: \
  161. ncopy++; \
  162. } \
  163. inp++; \
  164. } \
  165. memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
  166. }
  167. static PyObject*
  168. escape_unicode_kind1(PyUnicodeObject *in)
  169. {
  170. Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in);
  171. Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in);
  172. Py_UCS1 *outp;
  173. PyObject *out;
  174. Py_ssize_t delta = 0;
  175. GET_DELTA(inp, inp_end, delta);
  176. if (!delta) {
  177. Py_INCREF(in);
  178. return (PyObject*)in;
  179. }
  180. out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta,
  181. PyUnicode_IS_ASCII(in) ? 127 : 255);
  182. if (!out)
  183. return NULL;
  184. inp = PyUnicode_1BYTE_DATA(in);
  185. outp = PyUnicode_1BYTE_DATA(out);
  186. DO_ESCAPE(inp, inp_end, outp);
  187. return out;
  188. }
  189. static PyObject*
  190. escape_unicode_kind2(PyUnicodeObject *in)
  191. {
  192. Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in);
  193. Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in);
  194. Py_UCS2 *outp;
  195. PyObject *out;
  196. Py_ssize_t delta = 0;
  197. GET_DELTA(inp, inp_end, delta);
  198. if (!delta) {
  199. Py_INCREF(in);
  200. return (PyObject*)in;
  201. }
  202. out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535);
  203. if (!out)
  204. return NULL;
  205. inp = PyUnicode_2BYTE_DATA(in);
  206. outp = PyUnicode_2BYTE_DATA(out);
  207. DO_ESCAPE(inp, inp_end, outp);
  208. return out;
  209. }
  210. static PyObject*
  211. escape_unicode_kind4(PyUnicodeObject *in)
  212. {
  213. Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in);
  214. Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in);
  215. Py_UCS4 *outp;
  216. PyObject *out;
  217. Py_ssize_t delta = 0;
  218. GET_DELTA(inp, inp_end, delta);
  219. if (!delta) {
  220. Py_INCREF(in);
  221. return (PyObject*)in;
  222. }
  223. out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111);
  224. if (!out)
  225. return NULL;
  226. inp = PyUnicode_4BYTE_DATA(in);
  227. outp = PyUnicode_4BYTE_DATA(out);
  228. DO_ESCAPE(inp, inp_end, outp);
  229. return out;
  230. }
  231. static PyObject*
  232. escape_unicode(PyUnicodeObject *in)
  233. {
  234. if (PyUnicode_READY(in))
  235. return NULL;
  236. switch (PyUnicode_KIND(in)) {
  237. case PyUnicode_1BYTE_KIND:
  238. return escape_unicode_kind1(in);
  239. case PyUnicode_2BYTE_KIND:
  240. return escape_unicode_kind2(in);
  241. case PyUnicode_4BYTE_KIND:
  242. return escape_unicode_kind4(in);
  243. }
  244. assert(0); /* shouldn't happen */
  245. return NULL;
  246. }
  247. #endif /* PY_MAJOR_VERSION < 3 */
  248. static PyObject*
  249. escape(PyObject *self, PyObject *text)
  250. {
  251. static PyObject *id_html;
  252. PyObject *s = NULL, *rv = NULL, *html;
  253. if (id_html == NULL) {
  254. #if PY_MAJOR_VERSION < 3
  255. id_html = PyString_InternFromString("__html__");
  256. #else
  257. id_html = PyUnicode_InternFromString("__html__");
  258. #endif
  259. if (id_html == NULL) {
  260. return NULL;
  261. }
  262. }
  263. /* we don't have to escape integers, bools or floats */
  264. if (PyLong_CheckExact(text) ||
  265. #if PY_MAJOR_VERSION < 3
  266. PyInt_CheckExact(text) ||
  267. #endif
  268. PyFloat_CheckExact(text) || PyBool_Check(text) ||
  269. text == Py_None)
  270. return PyObject_CallFunctionObjArgs(markup, text, NULL);
  271. /* if the object has an __html__ method that performs the escaping */
  272. html = PyObject_GetAttr(text ,id_html);
  273. if (html) {
  274. s = PyObject_CallObject(html, NULL);
  275. Py_DECREF(html);
  276. if (s == NULL) {
  277. return NULL;
  278. }
  279. /* Convert to Markup object */
  280. rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
  281. Py_DECREF(s);
  282. return rv;
  283. }
  284. /* otherwise make the object unicode if it isn't, then escape */
  285. PyErr_Clear();
  286. if (!PyUnicode_Check(text)) {
  287. #if PY_MAJOR_VERSION < 3
  288. PyObject *unicode = PyObject_Unicode(text);
  289. #else
  290. PyObject *unicode = PyObject_Str(text);
  291. #endif
  292. if (!unicode)
  293. return NULL;
  294. s = escape_unicode((PyUnicodeObject*)unicode);
  295. Py_DECREF(unicode);
  296. }
  297. else
  298. s = escape_unicode((PyUnicodeObject*)text);
  299. /* convert the unicode string into a markup object. */
  300. rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
  301. Py_DECREF(s);
  302. return rv;
  303. }
  304. static PyObject*
  305. escape_silent(PyObject *self, PyObject *text)
  306. {
  307. if (text != Py_None)
  308. return escape(self, text);
  309. return PyObject_CallFunctionObjArgs(markup, NULL);
  310. }
  311. static PyObject*
  312. soft_unicode(PyObject *self, PyObject *s)
  313. {
  314. if (!PyUnicode_Check(s))
  315. #if PY_MAJOR_VERSION < 3
  316. return PyObject_Unicode(s);
  317. #else
  318. return PyObject_Str(s);
  319. #endif
  320. Py_INCREF(s);
  321. return s;
  322. }
  323. static PyMethodDef module_methods[] = {
  324. {"escape", (PyCFunction)escape, METH_O,
  325. "escape(s) -> markup\n\n"
  326. "Convert the characters &, <, >, ', and \" in string s to HTML-safe\n"
  327. "sequences. Use this if you need to display text that might contain\n"
  328. "such characters in HTML. Marks return value as markup string."},
  329. {"escape_silent", (PyCFunction)escape_silent, METH_O,
  330. "escape_silent(s) -> markup\n\n"
  331. "Like escape but converts None to an empty string."},
  332. {"soft_unicode", (PyCFunction)soft_unicode, METH_O,
  333. "soft_unicode(object) -> string\n\n"
  334. "Make a string unicode if it isn't already. That way a markup\n"
  335. "string is not converted back to unicode."},
  336. {NULL, NULL, 0, NULL} /* Sentinel */
  337. };
  338. #if PY_MAJOR_VERSION < 3
  339. #ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
  340. #define PyMODINIT_FUNC void
  341. #endif
  342. PyMODINIT_FUNC
  343. init10markupsafe9_speedups(void)
  344. {
  345. if (!init_constants())
  346. return;
  347. Py_InitModule3("markupsafe._speedups", module_methods, "");
  348. }
  349. #else /* Python 3.x module initialization */
  350. static struct PyModuleDef module_definition = {
  351. PyModuleDef_HEAD_INIT,
  352. "markupsafe._speedups",
  353. NULL,
  354. -1,
  355. module_methods,
  356. NULL,
  357. NULL,
  358. NULL,
  359. NULL
  360. };
  361. PyMODINIT_FUNC
  362. PyInit_10markupsafe9_speedups(void)
  363. {
  364. if (!init_constants())
  365. return NULL;
  366. return PyModule_Create(&module_definition);
  367. }
  368. #endif