/* * wchar_t helpers */ typedef uint16_t cffi_char16_t; typedef uint32_t cffi_char32_t; #if Py_UNICODE_SIZE == 2 /* Before Python 2.7, PyUnicode_FromWideChar is not able to convert wchar_t values greater than 65535 into two-unicode-characters surrogates. But even the Python 2.7 version doesn't detect wchar_t values that are out of range(1114112), and just returns nonsense. From cffi 1.11 we can't use it anyway, because we need a version with char32_t input types. */ static PyObject * _my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size) { PyObject *unicode; Py_ssize_t i; Py_ssize_t alloc; const cffi_char32_t *orig_w; alloc = size; orig_w = w; for (i = size; i > 0; i--) { if (*w > 0xFFFF) alloc++; w++; } w = orig_w; unicode = PyUnicode_FromUnicode(NULL, alloc); if (!unicode) return NULL; /* Copy the wchar_t data into the new object */ { Py_UNICODE *u; u = PyUnicode_AS_UNICODE(unicode); for (i = size; i > 0; i--) { if (*w > 0xFFFF) { cffi_char32_t ordinal; if (*w > 0x10FFFF) { PyErr_Format(PyExc_ValueError, "char32_t out of range for " "conversion to unicode: 0x%x", (int)*w); Py_DECREF(unicode); return NULL; } ordinal = *w++; ordinal -= 0x10000; *u++ = 0xD800 | (ordinal >> 10); *u++ = 0xDC00 | (ordinal & 0x3FF); } else *u++ = *w++; } } return unicode; } static PyObject * _my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size) { return PyUnicode_FromUnicode((const Py_UNICODE *)w, size); } #else /* Py_UNICODE_SIZE == 4 */ static PyObject * _my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size) { return PyUnicode_FromUnicode((const Py_UNICODE *)w, size); } static PyObject * _my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size) { /* 'size' is the length of the 'w' array */ PyObject *result = PyUnicode_FromUnicode(NULL, size); if (result != NULL) { Py_UNICODE *u_base = PyUnicode_AS_UNICODE(result); Py_UNICODE *u = u_base; if (size == 1) { /* performance only */ *u = (cffi_char32_t)*w; } else { while (size > 0) { cffi_char32_t ch = *w++; size--; if (0xD800 <= ch && ch <= 0xDBFF && size > 0) { cffi_char32_t ch2 = *w; if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; w++; size--; } } *u++ = ch; } if (PyUnicode_Resize(&result, u - u_base) < 0) { Py_DECREF(result); return NULL; } } } return result; } #endif #define IS_SURROGATE(u) (0xD800 <= (u)[0] && (u)[0] <= 0xDBFF && \ 0xDC00 <= (u)[1] && (u)[1] <= 0xDFFF) #define AS_SURROGATE(u) (0x10000 + (((u)[0] - 0xD800) << 10) + \ ((u)[1] - 0xDC00)) static int _my_PyUnicode_AsSingleChar16(PyObject *unicode, cffi_char16_t *result, char *err_got) { Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); if (PyUnicode_GET_SIZE(unicode) != 1) { sprintf(err_got, "unicode string of length %zd", PyUnicode_GET_SIZE(unicode)); return -1; } #if Py_UNICODE_SIZE == 4 if (((unsigned int)u[0]) > 0xFFFF) { sprintf(err_got, "larger-than-0xFFFF character"); return -1; } #endif *result = (cffi_char16_t)u[0]; return 0; } static int _my_PyUnicode_AsSingleChar32(PyObject *unicode, cffi_char32_t *result, char *err_got) { Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); if (PyUnicode_GET_SIZE(unicode) == 1) { *result = (cffi_char32_t)u[0]; return 0; } #if Py_UNICODE_SIZE == 2 if (PyUnicode_GET_SIZE(unicode) == 2 && IS_SURROGATE(u)) { *result = AS_SURROGATE(u); return 0; } #endif sprintf(err_got, "unicode string of length %zd", PyUnicode_GET_SIZE(unicode)); return -1; } static Py_ssize_t _my_PyUnicode_SizeAsChar16(PyObject *unicode) { Py_ssize_t length = PyUnicode_GET_SIZE(unicode); Py_ssize_t result = length; #if Py_UNICODE_SIZE == 4 Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); Py_ssize_t i; for (i=0; i 0xFFFF) result++; } #endif return result; } static Py_ssize_t _my_PyUnicode_SizeAsChar32(PyObject *unicode) { Py_ssize_t length = PyUnicode_GET_SIZE(unicode); Py_ssize_t result = length; #if Py_UNICODE_SIZE == 2 Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); Py_ssize_t i; for (i=0; i 0xFFFF) { if (ordinal > 0x10FFFF) { PyErr_Format(PyExc_ValueError, "unicode character out of range for " "conversion to char16_t: 0x%x", (int)ordinal); return -1; } ordinal -= 0x10000; *result++ = 0xD800 | (ordinal >> 10); *result++ = 0xDC00 | (ordinal & 0x3FF); continue; } #endif *result++ = ordinal; } return 0; } static int _my_PyUnicode_AsChar32(PyObject *unicode, cffi_char32_t *result, Py_ssize_t resultlen) { Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode); Py_ssize_t i; for (i=0; i