123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- /*
- * wchar_t helpers, version CPython >= 3.3.
- *
- * CPython 3.3 added support for sys.maxunicode == 0x10FFFF on all
- * platforms, even ones with wchar_t limited to 2 bytes. As such,
- * this code here works from the outside like wchar_helper.h in the
- * case Py_UNICODE_SIZE == 4, but the implementation is very different.
- */
- typedef uint16_t cffi_char16_t;
- typedef uint32_t cffi_char32_t;
- static PyObject *
- _my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size)
- {
- return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, w, size);
- }
- static PyObject *
- _my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size)
- {
- /* are there any surrogate pairs, and if so, how many? */
- Py_ssize_t i, count_surrogates = 0;
- for (i = 0; i < size - 1; i++) {
- if (0xD800 <= w[i] && w[i] <= 0xDBFF &&
- 0xDC00 <= w[i+1] && w[i+1] <= 0xDFFF)
- count_surrogates++;
- }
- if (count_surrogates == 0) {
- /* no, fast path */
- return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, w, size);
- }
- else
- {
- PyObject *result = PyUnicode_New(size - count_surrogates, 0x10FFFF);
- Py_UCS4 *data;
- assert(PyUnicode_KIND(result) == PyUnicode_4BYTE_KIND);
- data = PyUnicode_4BYTE_DATA(result);
- for (i = 0; i < size; i++)
- {
- cffi_char32_t ch = w[i];
- if (0xD800 <= ch && ch <= 0xDBFF && i < size - 1) {
- cffi_char32_t ch2 = w[i + 1];
- if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
- ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
- i++;
- }
- }
- *data++ = ch;
- }
- return result;
- }
- }
- static int
- _my_PyUnicode_AsSingleChar16(PyObject *unicode, cffi_char16_t *result,
- char *err_got)
- {
- cffi_char32_t ch;
- if (PyUnicode_GET_LENGTH(unicode) != 1) {
- sprintf(err_got, "unicode string of length %zd",
- PyUnicode_GET_LENGTH(unicode));
- return -1;
- }
- ch = PyUnicode_READ_CHAR(unicode, 0);
- if (ch > 0xFFFF)
- {
- sprintf(err_got, "larger-than-0xFFFF character");
- return -1;
- }
- *result = (cffi_char16_t)ch;
- return 0;
- }
- static int
- _my_PyUnicode_AsSingleChar32(PyObject *unicode, cffi_char32_t *result,
- char *err_got)
- {
- if (PyUnicode_GET_LENGTH(unicode) != 1) {
- sprintf(err_got, "unicode string of length %zd",
- PyUnicode_GET_LENGTH(unicode));
- return -1;
- }
- *result = PyUnicode_READ_CHAR(unicode, 0);
- return 0;
- }
- static Py_ssize_t _my_PyUnicode_SizeAsChar16(PyObject *unicode)
- {
- Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
- Py_ssize_t result = length;
- unsigned int kind = PyUnicode_KIND(unicode);
- if (kind == PyUnicode_4BYTE_KIND)
- {
- Py_UCS4 *data = PyUnicode_4BYTE_DATA(unicode);
- Py_ssize_t i;
- for (i = 0; i < length; i++) {
- if (data[i] > 0xFFFF)
- result++;
- }
- }
- return result;
- }
- static Py_ssize_t _my_PyUnicode_SizeAsChar32(PyObject *unicode)
- {
- return PyUnicode_GET_LENGTH(unicode);
- }
- static int _my_PyUnicode_AsChar16(PyObject *unicode,
- cffi_char16_t *result,
- Py_ssize_t resultlen)
- {
- Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
- unsigned int kind = PyUnicode_KIND(unicode);
- void *data = PyUnicode_DATA(unicode);
- Py_ssize_t i;
- for (i = 0; i < len; i++) {
- cffi_char32_t ordinal = PyUnicode_READ(kind, data, i);
- if (ordinal > 0xFFFF) {
- if (ordinal > 0x10FFFF) {
- PyErr_Format(PyExc_ValueError,
- "unicode character out of range for "
- "conversion to char16_t: 0x%x", (int)ordinal);
- return -1;
- }
- ordinal -= 0x10000;
- *result++ = 0xD800 | (ordinal >> 10);
- *result++ = 0xDC00 | (ordinal & 0x3FF);
- }
- else
- *result++ = ordinal;
- }
- return 0;
- }
- static int _my_PyUnicode_AsChar32(PyObject *unicode,
- cffi_char32_t *result,
- Py_ssize_t resultlen)
- {
- if (PyUnicode_AsUCS4(unicode, (Py_UCS4 *)result, resultlen, 0) == NULL)
- return -1;
- return 0;
- }
|