1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072 |
- /* ------------------------------------------------------------------------
- _codecs -- Provides access to the codec registry and the builtin
- codecs.
- This module should never be imported directly. The standard library
- module "codecs" wraps this builtin module for use within Python.
- The codec registry is accessible via:
- register(search_function) -> None
- lookup(encoding) -> CodecInfo object
- The builtin Unicode codecs use the following interface:
- <encoding>_encode(Unicode_object[,errors='strict']) ->
- (string object, bytes consumed)
- <encoding>_decode(char_buffer_obj[,errors='strict']) ->
- (Unicode object, bytes consumed)
- These <encoding>s are available: utf_8, unicode_escape,
- raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
- Written by Marc-Andre Lemburg (mal@lemburg.com).
- Copyright (c) Corporation for National Research Initiatives.
- ------------------------------------------------------------------------ */
- #define PY_SSIZE_T_CLEAN
- #include "Python.h"
- #ifdef MS_WINDOWS
- #include <windows.h>
- #endif
- /*[clinic input]
- module _codecs
- [clinic start generated code]*/
- /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
- #include "pycore_runtime.h"
- #include "clinic/_codecsmodule.c.h"
- /* --- Registry ----------------------------------------------------------- */
- /*[clinic input]
- _codecs.register
- search_function: object
- /
- Register a codec search function.
- Search functions are expected to take one argument, the encoding name in
- all lower case letters, and either return None, or a tuple of functions
- (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
- [clinic start generated code]*/
- static PyObject *
- _codecs_register(PyObject *module, PyObject *search_function)
- /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
- {
- if (PyCodec_Register(search_function))
- return NULL;
- Py_RETURN_NONE;
- }
- /*[clinic input]
- _codecs.unregister
- search_function: object
- /
- Unregister a codec search function and clear the registry's cache.
- If the search function is not registered, do nothing.
- [clinic start generated code]*/
- static PyObject *
- _codecs_unregister(PyObject *module, PyObject *search_function)
- /*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
- {
- if (PyCodec_Unregister(search_function) < 0) {
- return NULL;
- }
- Py_RETURN_NONE;
- }
- /*[clinic input]
- _codecs.lookup
- encoding: str
- /
- Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
- [clinic start generated code]*/
- static PyObject *
- _codecs_lookup_impl(PyObject *module, const char *encoding)
- /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
- {
- return _PyCodec_Lookup(encoding);
- }
- /*[clinic input]
- _codecs.encode
- obj: object
- encoding: str(c_default="NULL") = "utf-8"
- errors: str(c_default="NULL") = "strict"
- Encodes obj using the codec registered for encoding.
- The default encoding is 'utf-8'. errors may be given to set a
- different error handling scheme. Default is 'strict' meaning that encoding
- errors raise a ValueError. Other possible values are 'ignore', 'replace'
- and 'backslashreplace' as well as any other name registered with
- codecs.register_error that can handle ValueErrors.
- [clinic start generated code]*/
- static PyObject *
- _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
- const char *errors)
- /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
- {
- if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
- /* Encode via the codec registry */
- return PyCodec_Encode(obj, encoding, errors);
- }
- /*[clinic input]
- _codecs.decode
- obj: object
- encoding: str(c_default="NULL") = "utf-8"
- errors: str(c_default="NULL") = "strict"
- Decodes obj using the codec registered for encoding.
- Default encoding is 'utf-8'. errors may be given to set a
- different error handling scheme. Default is 'strict' meaning that encoding
- errors raise a ValueError. Other possible values are 'ignore', 'replace'
- and 'backslashreplace' as well as any other name registered with
- codecs.register_error that can handle ValueErrors.
- [clinic start generated code]*/
- static PyObject *
- _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
- const char *errors)
- /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
- {
- if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
- /* Decode via the codec registry */
- return PyCodec_Decode(obj, encoding, errors);
- }
- /* --- Helpers ------------------------------------------------------------ */
- static
- PyObject *codec_tuple(PyObject *decoded,
- Py_ssize_t len)
- {
- if (decoded == NULL)
- return NULL;
- return Py_BuildValue("Nn", decoded, len);
- }
- /* --- String codecs ------------------------------------------------------ */
- /*[clinic input]
- _codecs.escape_decode
- data: Py_buffer(accept={str, buffer})
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors)
- /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
- {
- PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
- errors, 0, NULL);
- return codec_tuple(decoded, data->len);
- }
- /*[clinic input]
- _codecs.escape_encode
- data: object(subclass_of='&PyBytes_Type')
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_escape_encode_impl(PyObject *module, PyObject *data,
- const char *errors)
- /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
- {
- Py_ssize_t size;
- Py_ssize_t newsize;
- PyObject *v;
- size = PyBytes_GET_SIZE(data);
- if (size > PY_SSIZE_T_MAX / 4) {
- PyErr_SetString(PyExc_OverflowError,
- "string is too large to encode");
- return NULL;
- }
- newsize = 4*size;
- v = PyBytes_FromStringAndSize(NULL, newsize);
- if (v == NULL) {
- return NULL;
- }
- else {
- Py_ssize_t i;
- char c;
- char *p = PyBytes_AS_STRING(v);
- for (i = 0; i < size; i++) {
- /* There's at least enough room for a hex escape */
- assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
- c = PyBytes_AS_STRING(data)[i];
- if (c == '\'' || c == '\\')
- *p++ = '\\', *p++ = c;
- else if (c == '\t')
- *p++ = '\\', *p++ = 't';
- else if (c == '\n')
- *p++ = '\\', *p++ = 'n';
- else if (c == '\r')
- *p++ = '\\', *p++ = 'r';
- else if (c < ' ' || c >= 0x7f) {
- *p++ = '\\';
- *p++ = 'x';
- *p++ = Py_hexdigits[(c & 0xf0) >> 4];
- *p++ = Py_hexdigits[c & 0xf];
- }
- else
- *p++ = c;
- }
- *p = '\0';
- if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
- return NULL;
- }
- }
- return codec_tuple(v, size);
- }
- /* --- Decoder ------------------------------------------------------------ */
- /*[clinic input]
- _codecs.utf_7_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=0cd3a944a32a4089 input=dbf8c8998102dc7d]*/
- {
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
- errors,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /*[clinic input]
- _codecs.utf_8_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=10f74dec8d9bb8bf input=ca06bc8a9c970e25]*/
- {
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
- errors,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /*[clinic input]
- _codecs.utf_16_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=783b442abcbcc2d0 input=5b0f52071ba6cadc]*/
- {
- int byteorder = 0;
- /* This is overwritten unless final is true. */
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
- errors, &byteorder,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /*[clinic input]
- _codecs.utf_16_le_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=899b9e6364379dcd input=115bd8c7b783d0bf]*/
- {
- int byteorder = -1;
- /* This is overwritten unless final is true. */
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
- errors, &byteorder,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /*[clinic input]
- _codecs.utf_16_be_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=49f6465ea07669c8 input=63131422b01f9cb4]*/
- {
- int byteorder = 1;
- /* This is overwritten unless final is true. */
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
- errors, &byteorder,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /* This non-standard version also provides access to the byteorder
- parameter of the builtin UTF-16 codec.
- It returns a tuple (unicode, bytesread, byteorder) with byteorder
- being the value in effect at the end of data.
- */
- /*[clinic input]
- _codecs.utf_16_ex_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- byteorder: int = 0
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int byteorder, int final)
- /*[clinic end generated code: output=0f385f251ecc1988 input=f368a51cf384bf4c]*/
- {
- /* This is overwritten unless final is true. */
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
- errors, &byteorder,
- final ? NULL : &consumed);
- if (decoded == NULL)
- return NULL;
- return Py_BuildValue("Nni", decoded, consumed, byteorder);
- }
- /*[clinic input]
- _codecs.utf_32_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=2fc961807f7b145f input=fcdf3658c5e9b5f3]*/
- {
- int byteorder = 0;
- /* This is overwritten unless final is true. */
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
- errors, &byteorder,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /*[clinic input]
- _codecs.utf_32_le_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=ec8f46b67a94f3e6 input=12220556e885f817]*/
- {
- int byteorder = -1;
- /* This is overwritten unless final is true. */
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
- errors, &byteorder,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /*[clinic input]
- _codecs.utf_32_be_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=ff82bae862c92c4e input=2bc669b4781598db]*/
- {
- int byteorder = 1;
- /* This is overwritten unless final is true. */
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
- errors, &byteorder,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /* This non-standard version also provides access to the byteorder
- parameter of the builtin UTF-32 codec.
- It returns a tuple (unicode, bytesread, byteorder) with byteorder
- being the value in effect at the end of data.
- */
- /*[clinic input]
- _codecs.utf_32_ex_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- byteorder: int = 0
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int byteorder, int final)
- /*[clinic end generated code: output=6bfb177dceaf4848 input=4a2323d0013620df]*/
- {
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
- errors, &byteorder,
- final ? NULL : &consumed);
- if (decoded == NULL)
- return NULL;
- return Py_BuildValue("Nni", decoded, consumed, byteorder);
- }
- /*[clinic input]
- _codecs.unicode_escape_decode
- data: Py_buffer(accept={str, buffer})
- errors: str(accept={str, NoneType}) = None
- final: bool = True
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=b284f97b12c635ee input=15019f081ffe272b]*/
- {
- Py_ssize_t consumed = data->len;
- PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
- errors,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /*[clinic input]
- _codecs.raw_unicode_escape_decode
- data: Py_buffer(accept={str, buffer})
- errors: str(accept={str, NoneType}) = None
- final: bool = True
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=11dbd96301e2879e input=b93f823aa8c343ad]*/
- {
- Py_ssize_t consumed = data->len;
- PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
- errors,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /*[clinic input]
- _codecs.latin_1_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors)
- /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
- {
- PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
- return codec_tuple(decoded, data->len);
- }
- /*[clinic input]
- _codecs.ascii_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors)
- /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
- {
- PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
- return codec_tuple(decoded, data->len);
- }
- /*[clinic input]
- _codecs.charmap_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- mapping: object = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, PyObject *mapping)
- /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
- {
- PyObject *decoded;
- if (mapping == Py_None)
- mapping = NULL;
- decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
- return codec_tuple(decoded, data->len);
- }
- #ifdef MS_WINDOWS
- /*[clinic input]
- _codecs.mbcs_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=39b65b8598938c4b input=f144ad1ed6d8f5a6]*/
- {
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
- errors, final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /*[clinic input]
- _codecs.oem_decode
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
- const char *errors, int final)
- /*[clinic end generated code: output=da1617612f3fcad8 input=629bf87376d211b4]*/
- {
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
- data->buf, data->len, errors, final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- /*[clinic input]
- _codecs.code_page_decode
- codepage: int
- data: Py_buffer
- errors: str(accept={str, NoneType}) = None
- final: bool = False
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_code_page_decode_impl(PyObject *module, int codepage,
- Py_buffer *data, const char *errors, int final)
- /*[clinic end generated code: output=53008ea967da3fff input=6a32589b0658c277]*/
- {
- Py_ssize_t consumed = data->len;
- PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
- data->buf, data->len,
- errors,
- final ? NULL : &consumed);
- return codec_tuple(decoded, consumed);
- }
- #endif /* MS_WINDOWS */
- /* --- Encoder ------------------------------------------------------------ */
- /*[clinic input]
- _codecs.readbuffer_encode
- data: Py_buffer(accept={str, buffer})
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
- const char *errors)
- /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
- {
- PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
- return codec_tuple(result, data->len);
- }
- /*[clinic input]
- _codecs.utf_7_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
- {
- return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.utf_8_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
- {
- return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
- PyUnicode_GET_LENGTH(str));
- }
- /* This version provides access to the byteorder parameter of the
- builtin UTF-16 codecs as optional third argument. It defaults to 0
- which means: use the native byte order and prepend the data with a
- BOM mark.
- */
- /*[clinic input]
- _codecs.utf_16_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- byteorder: int = 0
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
- const char *errors, int byteorder)
- /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
- {
- return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.utf_16_le_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
- {
- return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.utf_16_be_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
- {
- return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
- PyUnicode_GET_LENGTH(str));
- }
- /* This version provides access to the byteorder parameter of the
- builtin UTF-32 codecs as optional third argument. It defaults to 0
- which means: use the native byte order and prepend the data with a
- BOM mark.
- */
- /*[clinic input]
- _codecs.utf_32_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- byteorder: int = 0
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
- const char *errors, int byteorder)
- /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
- {
- return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.utf_32_le_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
- {
- return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.utf_32_be_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
- {
- return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.unicode_escape_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
- {
- return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.raw_unicode_escape_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
- {
- return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.latin_1_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
- {
- return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.ascii_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_ascii_encode_impl(PyObject *module, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
- {
- return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.charmap_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- mapping: object = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_charmap_encode_impl(PyObject *module, PyObject *str,
- const char *errors, PyObject *mapping)
- /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
- {
- if (mapping == Py_None)
- mapping = NULL;
- return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.charmap_build
- map: unicode
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_charmap_build_impl(PyObject *module, PyObject *map)
- /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
- {
- return PyUnicode_BuildEncodingMap(map);
- }
- #ifdef MS_WINDOWS
- /*[clinic input]
- _codecs.mbcs_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
- /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
- {
- return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.oem_encode
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
- /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
- {
- return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
- PyUnicode_GET_LENGTH(str));
- }
- /*[clinic input]
- _codecs.code_page_encode
- code_page: int
- str: unicode
- errors: str(accept={str, NoneType}) = None
- /
- [clinic start generated code]*/
- static PyObject *
- _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
- const char *errors)
- /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
- {
- return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
- PyUnicode_GET_LENGTH(str));
- }
- #endif /* MS_WINDOWS */
- /* --- Error handler registry --------------------------------------------- */
- /*[clinic input]
- _codecs.register_error
- errors: str
- handler: object
- /
- Register the specified error handler under the name errors.
- handler must be a callable object, that will be called with an exception
- instance containing information about the location of the encoding/decoding
- error and must return a (replacement, new position) tuple.
- [clinic start generated code]*/
- static PyObject *
- _codecs_register_error_impl(PyObject *module, const char *errors,
- PyObject *handler)
- /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
- {
- if (PyCodec_RegisterError(errors, handler))
- return NULL;
- Py_RETURN_NONE;
- }
- /*[clinic input]
- _codecs.lookup_error
- name: str
- /
- lookup_error(errors) -> handler
- Return the error handler for the specified error handling name or raise a
- LookupError, if no handler exists under this name.
- [clinic start generated code]*/
- static PyObject *
- _codecs_lookup_error_impl(PyObject *module, const char *name)
- /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
- {
- return PyCodec_LookupError(name);
- }
- /* --- Module API --------------------------------------------------------- */
- static PyMethodDef _codecs_functions[] = {
- _CODECS_REGISTER_METHODDEF
- _CODECS_UNREGISTER_METHODDEF
- _CODECS_LOOKUP_METHODDEF
- _CODECS_ENCODE_METHODDEF
- _CODECS_DECODE_METHODDEF
- _CODECS_ESCAPE_ENCODE_METHODDEF
- _CODECS_ESCAPE_DECODE_METHODDEF
- _CODECS_UTF_8_ENCODE_METHODDEF
- _CODECS_UTF_8_DECODE_METHODDEF
- _CODECS_UTF_7_ENCODE_METHODDEF
- _CODECS_UTF_7_DECODE_METHODDEF
- _CODECS_UTF_16_ENCODE_METHODDEF
- _CODECS_UTF_16_LE_ENCODE_METHODDEF
- _CODECS_UTF_16_BE_ENCODE_METHODDEF
- _CODECS_UTF_16_DECODE_METHODDEF
- _CODECS_UTF_16_LE_DECODE_METHODDEF
- _CODECS_UTF_16_BE_DECODE_METHODDEF
- _CODECS_UTF_16_EX_DECODE_METHODDEF
- _CODECS_UTF_32_ENCODE_METHODDEF
- _CODECS_UTF_32_LE_ENCODE_METHODDEF
- _CODECS_UTF_32_BE_ENCODE_METHODDEF
- _CODECS_UTF_32_DECODE_METHODDEF
- _CODECS_UTF_32_LE_DECODE_METHODDEF
- _CODECS_UTF_32_BE_DECODE_METHODDEF
- _CODECS_UTF_32_EX_DECODE_METHODDEF
- _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
- _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
- _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
- _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
- _CODECS_LATIN_1_ENCODE_METHODDEF
- _CODECS_LATIN_1_DECODE_METHODDEF
- _CODECS_ASCII_ENCODE_METHODDEF
- _CODECS_ASCII_DECODE_METHODDEF
- _CODECS_CHARMAP_ENCODE_METHODDEF
- _CODECS_CHARMAP_DECODE_METHODDEF
- _CODECS_CHARMAP_BUILD_METHODDEF
- _CODECS_READBUFFER_ENCODE_METHODDEF
- _CODECS_MBCS_ENCODE_METHODDEF
- _CODECS_MBCS_DECODE_METHODDEF
- _CODECS_OEM_ENCODE_METHODDEF
- _CODECS_OEM_DECODE_METHODDEF
- _CODECS_CODE_PAGE_ENCODE_METHODDEF
- _CODECS_CODE_PAGE_DECODE_METHODDEF
- _CODECS_REGISTER_ERROR_METHODDEF
- _CODECS_LOOKUP_ERROR_METHODDEF
- {NULL, NULL} /* sentinel */
- };
- static PyModuleDef_Slot _codecs_slots[] = {
- {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
- {0, NULL}
- };
- static struct PyModuleDef codecsmodule = {
- PyModuleDef_HEAD_INIT,
- "_codecs",
- NULL,
- 0,
- _codecs_functions,
- _codecs_slots,
- NULL,
- NULL,
- NULL
- };
- PyMODINIT_FUNC
- PyInit__codecs(void)
- {
- return PyModuleDef_Init(&codecsmodule);
- }
|