123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423 |
- /**
- * markupsafe._speedups
- * ~~~~~~~~~~~~~~~~~~~~
- *
- * C implementation of escaping for better performance. Used instead of
- * the native Python implementation when compiled.
- *
- * :copyright: 2010 Pallets
- * :license: BSD-3-Clause
- */
- #include <Python.h>
- #if PY_MAJOR_VERSION < 3
- #define ESCAPED_CHARS_TABLE_SIZE 63
- #define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL)));
- static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE];
- static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE];
- #endif
- static PyObject* markup;
- static int
- init_constants(void)
- {
- PyObject *module;
- #if PY_MAJOR_VERSION < 3
- /* mapping of characters to replace */
- escaped_chars_repl['"'] = UNICHR(""");
- escaped_chars_repl['\''] = UNICHR("'");
- escaped_chars_repl['&'] = UNICHR("&");
- escaped_chars_repl['<'] = UNICHR("<");
- escaped_chars_repl['>'] = UNICHR(">");
- /* lengths of those characters when replaced - 1 */
- memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len));
- escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \
- escaped_chars_delta_len['&'] = 4;
- escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3;
- #endif
- /* import markup type so that we can mark the return value */
- module = PyImport_ImportModule("markupsafe");
- if (!module)
- return 0;
- markup = PyObject_GetAttrString(module, "Markup");
- Py_DECREF(module);
- return 1;
- }
- #if PY_MAJOR_VERSION < 3
- static PyObject*
- escape_unicode(PyUnicodeObject *in)
- {
- PyUnicodeObject *out;
- Py_UNICODE *inp = PyUnicode_AS_UNICODE(in);
- const Py_UNICODE *inp_end = PyUnicode_AS_UNICODE(in) + PyUnicode_GET_SIZE(in);
- Py_UNICODE *next_escp;
- Py_UNICODE *outp;
- Py_ssize_t delta=0, erepl=0, delta_len=0;
- /* First we need to figure out how long the escaped string will be */
- while (*(inp) || inp < inp_end) {
- if (*inp < ESCAPED_CHARS_TABLE_SIZE) {
- delta += escaped_chars_delta_len[*inp];
- erepl += !!escaped_chars_delta_len[*inp];
- }
- ++inp;
- }
- /* Do we need to escape anything at all? */
- if (!erepl) {
- Py_INCREF(in);
- return (PyObject*)in;
- }
- out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(in) + delta);
- if (!out)
- return NULL;
- outp = PyUnicode_AS_UNICODE(out);
- inp = PyUnicode_AS_UNICODE(in);
- while (erepl-- > 0) {
- /* look for the next substitution */
- next_escp = inp;
- while (next_escp < inp_end) {
- if (*next_escp < ESCAPED_CHARS_TABLE_SIZE &&
- (delta_len = escaped_chars_delta_len[*next_escp])) {
- ++delta_len;
- break;
- }
- ++next_escp;
- }
- if (next_escp > inp) {
- /* copy unescaped chars between inp and next_escp */
- Py_UNICODE_COPY(outp, inp, next_escp-inp);
- outp += next_escp - inp;
- }
- /* escape 'next_escp' */
- Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len);
- outp += delta_len;
- inp = next_escp + 1;
- }
- if (inp < inp_end)
- Py_UNICODE_COPY(outp, inp, PyUnicode_GET_SIZE(in) - (inp - PyUnicode_AS_UNICODE(in)));
- return (PyObject*)out;
- }
- #else /* PY_MAJOR_VERSION < 3 */
- #define GET_DELTA(inp, inp_end, delta) \
- while (inp < inp_end) { \
- switch (*inp++) { \
- case '"': \
- case '\'': \
- case '&': \
- delta += 4; \
- break; \
- case '<': \
- case '>': \
- delta += 3; \
- break; \
- } \
- }
- #define DO_ESCAPE(inp, inp_end, outp) \
- { \
- Py_ssize_t ncopy = 0; \
- while (inp < inp_end) { \
- switch (*inp) { \
- case '"': \
- memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
- outp += ncopy; ncopy = 0; \
- *outp++ = '&'; \
- *outp++ = '#'; \
- *outp++ = '3'; \
- *outp++ = '4'; \
- *outp++ = ';'; \
- break; \
- case '\'': \
- memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
- outp += ncopy; ncopy = 0; \
- *outp++ = '&'; \
- *outp++ = '#'; \
- *outp++ = '3'; \
- *outp++ = '9'; \
- *outp++ = ';'; \
- break; \
- case '&': \
- memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
- outp += ncopy; ncopy = 0; \
- *outp++ = '&'; \
- *outp++ = 'a'; \
- *outp++ = 'm'; \
- *outp++ = 'p'; \
- *outp++ = ';'; \
- break; \
- case '<': \
- memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
- outp += ncopy; ncopy = 0; \
- *outp++ = '&'; \
- *outp++ = 'l'; \
- *outp++ = 't'; \
- *outp++ = ';'; \
- break; \
- case '>': \
- memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
- outp += ncopy; ncopy = 0; \
- *outp++ = '&'; \
- *outp++ = 'g'; \
- *outp++ = 't'; \
- *outp++ = ';'; \
- break; \
- default: \
- ncopy++; \
- } \
- inp++; \
- } \
- memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
- }
- static PyObject*
- escape_unicode_kind1(PyUnicodeObject *in)
- {
- Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in);
- Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in);
- Py_UCS1 *outp;
- PyObject *out;
- Py_ssize_t delta = 0;
- GET_DELTA(inp, inp_end, delta);
- if (!delta) {
- Py_INCREF(in);
- return (PyObject*)in;
- }
- out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta,
- PyUnicode_IS_ASCII(in) ? 127 : 255);
- if (!out)
- return NULL;
- inp = PyUnicode_1BYTE_DATA(in);
- outp = PyUnicode_1BYTE_DATA(out);
- DO_ESCAPE(inp, inp_end, outp);
- return out;
- }
- static PyObject*
- escape_unicode_kind2(PyUnicodeObject *in)
- {
- Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in);
- Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in);
- Py_UCS2 *outp;
- PyObject *out;
- Py_ssize_t delta = 0;
- GET_DELTA(inp, inp_end, delta);
- if (!delta) {
- Py_INCREF(in);
- return (PyObject*)in;
- }
- out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535);
- if (!out)
- return NULL;
- inp = PyUnicode_2BYTE_DATA(in);
- outp = PyUnicode_2BYTE_DATA(out);
- DO_ESCAPE(inp, inp_end, outp);
- return out;
- }
- static PyObject*
- escape_unicode_kind4(PyUnicodeObject *in)
- {
- Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in);
- Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in);
- Py_UCS4 *outp;
- PyObject *out;
- Py_ssize_t delta = 0;
- GET_DELTA(inp, inp_end, delta);
- if (!delta) {
- Py_INCREF(in);
- return (PyObject*)in;
- }
- out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111);
- if (!out)
- return NULL;
- inp = PyUnicode_4BYTE_DATA(in);
- outp = PyUnicode_4BYTE_DATA(out);
- DO_ESCAPE(inp, inp_end, outp);
- return out;
- }
- static PyObject*
- escape_unicode(PyUnicodeObject *in)
- {
- if (PyUnicode_READY(in))
- return NULL;
- switch (PyUnicode_KIND(in)) {
- case PyUnicode_1BYTE_KIND:
- return escape_unicode_kind1(in);
- case PyUnicode_2BYTE_KIND:
- return escape_unicode_kind2(in);
- case PyUnicode_4BYTE_KIND:
- return escape_unicode_kind4(in);
- }
- assert(0); /* shouldn't happen */
- return NULL;
- }
- #endif /* PY_MAJOR_VERSION < 3 */
- static PyObject*
- escape(PyObject *self, PyObject *text)
- {
- static PyObject *id_html;
- PyObject *s = NULL, *rv = NULL, *html;
- if (id_html == NULL) {
- #if PY_MAJOR_VERSION < 3
- id_html = PyString_InternFromString("__html__");
- #else
- id_html = PyUnicode_InternFromString("__html__");
- #endif
- if (id_html == NULL) {
- return NULL;
- }
- }
- /* we don't have to escape integers, bools or floats */
- if (PyLong_CheckExact(text) ||
- #if PY_MAJOR_VERSION < 3
- PyInt_CheckExact(text) ||
- #endif
- PyFloat_CheckExact(text) || PyBool_Check(text) ||
- text == Py_None)
- return PyObject_CallFunctionObjArgs(markup, text, NULL);
- /* if the object has an __html__ method that performs the escaping */
- html = PyObject_GetAttr(text ,id_html);
- if (html) {
- s = PyObject_CallObject(html, NULL);
- Py_DECREF(html);
- if (s == NULL) {
- return NULL;
- }
- /* Convert to Markup object */
- rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
- Py_DECREF(s);
- return rv;
- }
- /* otherwise make the object unicode if it isn't, then escape */
- PyErr_Clear();
- if (!PyUnicode_Check(text)) {
- #if PY_MAJOR_VERSION < 3
- PyObject *unicode = PyObject_Unicode(text);
- #else
- PyObject *unicode = PyObject_Str(text);
- #endif
- if (!unicode)
- return NULL;
- s = escape_unicode((PyUnicodeObject*)unicode);
- Py_DECREF(unicode);
- }
- else
- s = escape_unicode((PyUnicodeObject*)text);
- /* convert the unicode string into a markup object. */
- rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
- Py_DECREF(s);
- return rv;
- }
- static PyObject*
- escape_silent(PyObject *self, PyObject *text)
- {
- if (text != Py_None)
- return escape(self, text);
- return PyObject_CallFunctionObjArgs(markup, NULL);
- }
- static PyObject*
- soft_unicode(PyObject *self, PyObject *s)
- {
- if (!PyUnicode_Check(s))
- #if PY_MAJOR_VERSION < 3
- return PyObject_Unicode(s);
- #else
- return PyObject_Str(s);
- #endif
- Py_INCREF(s);
- return s;
- }
- static PyMethodDef module_methods[] = {
- {"escape", (PyCFunction)escape, METH_O,
- "escape(s) -> markup\n\n"
- "Convert the characters &, <, >, ', and \" in string s to HTML-safe\n"
- "sequences. Use this if you need to display text that might contain\n"
- "such characters in HTML. Marks return value as markup string."},
- {"escape_silent", (PyCFunction)escape_silent, METH_O,
- "escape_silent(s) -> markup\n\n"
- "Like escape but converts None to an empty string."},
- {"soft_unicode", (PyCFunction)soft_unicode, METH_O,
- "soft_unicode(object) -> string\n\n"
- "Make a string unicode if it isn't already. That way a markup\n"
- "string is not converted back to unicode."},
- {NULL, NULL, 0, NULL} /* Sentinel */
- };
- #if PY_MAJOR_VERSION < 3
- #ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
- #define PyMODINIT_FUNC void
- #endif
- PyMODINIT_FUNC
- init10markupsafe9_speedups(void)
- {
- if (!init_constants())
- return;
- Py_InitModule3("markupsafe._speedups", module_methods, "");
- }
- #else /* Python 3.x module initialization */
- static struct PyModuleDef module_definition = {
- PyModuleDef_HEAD_INIT,
- "markupsafe._speedups",
- NULL,
- -1,
- module_methods,
- NULL,
- NULL,
- NULL,
- NULL
- };
- PyMODINIT_FUNC
- PyInit_10markupsafe9_speedups(void)
- {
- if (!init_constants())
- return NULL;
- return PyModule_Create(&module_definition);
- }
- #endif
|