SMusatov
/
ydb
mirror of https://github.com/ydb-platform/ydb.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
							/*
 * wchar_t helpers, version CPython >= 3.3.
 *
 * CPython 3.3 added support for sys.maxunicode == 0x10FFFF on all
 * platforms, even ones with wchar_t limited to 2 bytes.  As such,
 * this code here works from the outside like wchar_helper.h in the
 * case Py_UNICODE_SIZE == 4, but the implementation is very different.
 */

typedef uint16_t cffi_char16_t;
typedef uint32_t cffi_char32_t;


static PyObject *
_my_PyUnicode_FromChar32(const cffi_char32_t *w, Py_ssize_t size)
{
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, w, size);
}

static PyObject *
_my_PyUnicode_FromChar16(const cffi_char16_t *w, Py_ssize_t size)
{
    /* are there any surrogate pairs, and if so, how many? */
    Py_ssize_t i, count_surrogates = 0;
    for (i = 0; i < size - 1; i++) {
        if (0xD800 <= w[i] && w[i] <= 0xDBFF &&
                0xDC00 <= w[i+1] && w[i+1] <= 0xDFFF)
            count_surrogates++;
    }
    if (count_surrogates == 0) {
        /* no, fast path */
        return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, w, size);
    }
    else
    {
        PyObject *result = PyUnicode_New(size - count_surrogates, 0x10FFFF);
        Py_UCS4 *data;
        assert(PyUnicode_KIND(result) == PyUnicode_4BYTE_KIND);
        data = PyUnicode_4BYTE_DATA(result);

        for (i = 0; i < size; i++)
        {
            cffi_char32_t ch = w[i];
            if (0xD800 <= ch && ch <= 0xDBFF && i < size - 1) {
                cffi_char32_t ch2 = w[i + 1];
                if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
                    ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
                    i++;
                }
            }
            *data++ = ch;
        }
        return result;
    }
}

static int
_my_PyUnicode_AsSingleChar16(PyObject *unicode, cffi_char16_t *result,
                             char *err_got)
{
    cffi_char32_t ch;
    if (PyUnicode_GET_LENGTH(unicode) != 1) {
        sprintf(err_got, "unicode string of length %zd",
                PyUnicode_GET_LENGTH(unicode));
        return -1;
    }
    ch = PyUnicode_READ_CHAR(unicode, 0);

    if (ch > 0xFFFF)
    {
        sprintf(err_got, "larger-than-0xFFFF character");
        return -1;
    }
    *result = (cffi_char16_t)ch;
    return 0;
}

static int
_my_PyUnicode_AsSingleChar32(PyObject *unicode, cffi_char32_t *result,
                             char *err_got)
{
    if (PyUnicode_GET_LENGTH(unicode) != 1) {
        sprintf(err_got, "unicode string of length %zd",
                PyUnicode_GET_LENGTH(unicode));
        return -1;
    }
    *result = PyUnicode_READ_CHAR(unicode, 0);
    return 0;
}

static Py_ssize_t _my_PyUnicode_SizeAsChar16(PyObject *unicode)
{
    Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
    Py_ssize_t result = length;
    unsigned int kind = PyUnicode_KIND(unicode);

    if (kind == PyUnicode_4BYTE_KIND)
    {
        Py_UCS4 *data = PyUnicode_4BYTE_DATA(unicode);
        Py_ssize_t i;
        for (i = 0; i < length; i++) {
            if (data[i] > 0xFFFF)
                result++;
        }
    }
    return result;
}

static Py_ssize_t _my_PyUnicode_SizeAsChar32(PyObject *unicode)
{
    return PyUnicode_GET_LENGTH(unicode);
}

static int _my_PyUnicode_AsChar16(PyObject *unicode,
                                  cffi_char16_t *result,
                                  Py_ssize_t resultlen)
{
    Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
    unsigned int kind = PyUnicode_KIND(unicode);
    void *data = PyUnicode_DATA(unicode);
    Py_ssize_t i;

    for (i = 0; i < len; i++) {
        cffi_char32_t ordinal = PyUnicode_READ(kind, data, i);
        if (ordinal > 0xFFFF) {
            if (ordinal > 0x10FFFF) {
                PyErr_Format(PyExc_ValueError,
                             "unicode character out of range for "
                             "conversion to char16_t: 0x%x", (int)ordinal);
                return -1;
            }
            ordinal -= 0x10000;
            *result++ = 0xD800 | (ordinal >> 10);
            *result++ = 0xDC00 | (ordinal & 0x3FF);
        }
        else
            *result++ = ordinal;
    }
    return 0;
}

static int _my_PyUnicode_AsChar32(PyObject *unicode,
                                  cffi_char32_t *result,
                                  Py_ssize_t resultlen)
{
    if (PyUnicode_AsUCS4(unicode, (Py_UCS4 *)result, resultlen, 0) == NULL)
        return -1;
    return 0;
}