SMusatov
/
ydb
mirror of https://github.com/ydb-platform/ydb.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
							/*
 * wchar_t helpers
 */

typedef uint16_t cffi_char16_t;
typedef uint32_t cffi_char32_t;


#if Py_UNICODE_SIZE == 2

/* Before Python 2.7, PyUnicode_FromWideChar is not able to convert
   wchar_t values greater than 65535 into two-unicode-characters surrogates.
   But even the Python 2.7 version doesn't detect wchar_t values that are
   out of range(1114112), and just returns nonsense.

   From cffi 1.11 we can't use it anyway, because we need a version
   with char32_t input types.
*/
static PyObject *
_my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size)
{
    PyObject *unicode;
    Py_ssize_t i;
    Py_ssize_t alloc;
    const cffi_char32_t *orig_w;

    alloc = size;
    orig_w = w;
    for (i = size; i > 0; i--) {
        if (*w > 0xFFFF)
            alloc++;
        w++;
    }
    w = orig_w;
    unicode = PyUnicode_FromUnicode(NULL, alloc);
    if (!unicode)
        return NULL;

    /* Copy the wchar_t data into the new object */
    {
        Py_UNICODE *u;
        u = PyUnicode_AS_UNICODE(unicode);
        for (i = size; i > 0; i--) {
            if (*w > 0xFFFF) {
                cffi_char32_t ordinal;
                if (*w > 0x10FFFF) {
                    PyErr_Format(PyExc_ValueError,
                                 "char32_t out of range for "
                                 "conversion to unicode: 0x%x", (int)*w);
                    Py_DECREF(unicode);
                    return NULL;
                }
                ordinal = *w++;
                ordinal -= 0x10000;
                *u++ = 0xD800 | (ordinal >> 10);
                *u++ = 0xDC00 | (ordinal & 0x3FF);
            }
            else
                *u++ = *w++;
        }
    }
    return unicode;
}

static PyObject *
_my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size)
{
    return PyUnicode_FromUnicode((const Py_UNICODE *)w, size);
}

#else   /* Py_UNICODE_SIZE == 4 */

static PyObject *
_my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size)
{
    return PyUnicode_FromUnicode((const Py_UNICODE *)w, size);
}

static PyObject *
_my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size)
{
    /* 'size' is the length of the 'w' array */
    PyObject *result = PyUnicode_FromUnicode(NULL, size);

    if (result != NULL) {
        Py_UNICODE *u_base = PyUnicode_AS_UNICODE(result);
        Py_UNICODE *u = u_base;

        if (size == 1) {      /* performance only */
            *u = (cffi_char32_t)*w;
        }
        else {
            while (size > 0) {
                cffi_char32_t ch = *w++;
                size--;
                if (0xD800 <= ch && ch <= 0xDBFF && size > 0) {
                    cffi_char32_t ch2 = *w;
                    if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
                        ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
                        w++;
                        size--;
                    }
                }
                *u++ = ch;
            }
            if (PyUnicode_Resize(&result, u - u_base) < 0) {
                Py_DECREF(result);
                return NULL;
            }
        }
    }
    return result;
}

#endif


#define IS_SURROGATE(u)   (0xD800 <= (u)[0] && (u)[0] <= 0xDBFF &&   \
                           0xDC00 <= (u)[1] && (u)[1] <= 0xDFFF)
#define AS_SURROGATE(u)   (0x10000 + (((u)[0] - 0xD800) << 10) +     \
                                     ((u)[1] - 0xDC00))

static int
_my_PyUnicode_AsSingleChar16(PyObject *unicode, cffi_char16_t *result,
                             char *err_got)
{
    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
    if (PyUnicode_GET_SIZE(unicode) != 1) {
        sprintf(err_got, "unicode string of length %zd",
                PyUnicode_GET_SIZE(unicode));
        return -1;
    }
#if Py_UNICODE_SIZE == 4
    if (((unsigned int)u[0]) > 0xFFFF)
    {
        sprintf(err_got, "larger-than-0xFFFF character");
        return -1;
    }
#endif
    *result = (cffi_char16_t)u[0];
    return 0;
}

static int
_my_PyUnicode_AsSingleChar32(PyObject *unicode, cffi_char32_t *result,
                             char *err_got)
{
    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
    if (PyUnicode_GET_SIZE(unicode) == 1) {
        *result = (cffi_char32_t)u[0];
        return 0;
    }
#if Py_UNICODE_SIZE == 2
    if (PyUnicode_GET_SIZE(unicode) == 2 && IS_SURROGATE(u)) {
        *result = AS_SURROGATE(u);
        return 0;
    }
#endif
    sprintf(err_got, "unicode string of length %zd",
            PyUnicode_GET_SIZE(unicode));
    return -1;
}

static Py_ssize_t _my_PyUnicode_SizeAsChar16(PyObject *unicode)
{
    Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
    Py_ssize_t result = length;

#if Py_UNICODE_SIZE == 4
    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
    Py_ssize_t i;

    for (i=0; i<length; i++) {
        if (u[i] > 0xFFFF)
            result++;
    }
#endif
    return result;
}

static Py_ssize_t _my_PyUnicode_SizeAsChar32(PyObject *unicode)
{
    Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
    Py_ssize_t result = length;

#if Py_UNICODE_SIZE == 2
    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
    Py_ssize_t i;

    for (i=0; i<length-1; i++) {
        if (IS_SURROGATE(u+i))
            result--;
    }
#endif
    return result;
}

static int _my_PyUnicode_AsChar16(PyObject *unicode,
                                  cffi_char16_t *result,
                                  Py_ssize_t resultlen)
{
    Py_ssize_t len = PyUnicode_GET_SIZE(unicode);
    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
    Py_ssize_t i;
    for (i=0; i<len; i++) {
#if Py_UNICODE_SIZE == 2
        cffi_char16_t ordinal = u[i];
#else
        cffi_char32_t ordinal = u[i];
        if (ordinal > 0xFFFF) {
            if (ordinal > 0x10FFFF) {
                PyErr_Format(PyExc_ValueError,
                             "unicode character out of range for "
                             "conversion to char16_t: 0x%x", (int)ordinal);
                return -1;
            }
            ordinal -= 0x10000;
            *result++ = 0xD800 | (ordinal >> 10);
            *result++ = 0xDC00 | (ordinal & 0x3FF);
            continue;
        }
#endif
        *result++ = ordinal;
    }
    return 0;
}

static int _my_PyUnicode_AsChar32(PyObject *unicode,
                                  cffi_char32_t *result,
                                  Py_ssize_t resultlen)
{
    Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
    Py_ssize_t i;
    for (i=0; i<resultlen; i++) {
        cffi_char32_t ordinal = *u;
#if Py_UNICODE_SIZE == 2
        if (IS_SURROGATE(u)) {
            ordinal = AS_SURROGATE(u);
            u++;
        }
#endif
        result[i] = ordinal;
        u++;
    }
    return 0;
}