12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520 |
- /* ------------------------------------------------------------------------
- Python Codec Registry and support functions
- Written by Marc-Andre Lemburg (mal@lemburg.com).
- Copyright (c) Corporation for National Research Initiatives.
- ------------------------------------------------------------------------ */
- #include "Python.h"
- #include "pycore_call.h" // _PyObject_CallNoArgs()
- #include "pycore_interp.h" // PyInterpreterState.codec_search_path
- #include "pycore_pyerrors.h" // _PyErr_FormatNote()
- #include "pycore_pystate.h" // _PyInterpreterState_GET()
- #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
- #include <ctype.h>
- const char *Py_hexdigits = "0123456789abcdef";
- /* --- Codec Registry ----------------------------------------------------- */
- /* Import the standard encodings package which will register the first
- codec search function.
- This is done in a lazy way so that the Unicode implementation does
- not downgrade startup time of scripts not needing it.
- ImportErrors are silently ignored by this function. Only one try is
- made.
- */
- static int _PyCodecRegistry_Init(void); /* Forward */
- int PyCodec_Register(PyObject *search_function)
- {
- PyInterpreterState *interp = _PyInterpreterState_GET();
- if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
- goto onError;
- if (search_function == NULL) {
- PyErr_BadArgument();
- goto onError;
- }
- if (!PyCallable_Check(search_function)) {
- PyErr_SetString(PyExc_TypeError, "argument must be callable");
- goto onError;
- }
- return PyList_Append(interp->codec_search_path, search_function);
- onError:
- return -1;
- }
- int
- PyCodec_Unregister(PyObject *search_function)
- {
- PyInterpreterState *interp = PyInterpreterState_Get();
- PyObject *codec_search_path = interp->codec_search_path;
- /* Do nothing if codec_search_path is not created yet or was cleared. */
- if (codec_search_path == NULL) {
- return 0;
- }
- assert(PyList_CheckExact(codec_search_path));
- Py_ssize_t n = PyList_GET_SIZE(codec_search_path);
- for (Py_ssize_t i = 0; i < n; i++) {
- PyObject *item = PyList_GET_ITEM(codec_search_path, i);
- if (item == search_function) {
- if (interp->codec_search_cache != NULL) {
- assert(PyDict_CheckExact(interp->codec_search_cache));
- PyDict_Clear(interp->codec_search_cache);
- }
- return PyList_SetSlice(codec_search_path, i, i+1, NULL);
- }
- }
- return 0;
- }
- extern int _Py_normalize_encoding(const char *, char *, size_t);
- /* Convert a string to a normalized Python string(decoded from UTF-8): all characters are
- converted to lower case, spaces and hyphens are replaced with underscores. */
- static
- PyObject *normalizestring(const char *string)
- {
- size_t len = strlen(string);
- char *encoding;
- PyObject *v;
- if (len > PY_SSIZE_T_MAX) {
- PyErr_SetString(PyExc_OverflowError, "string is too large");
- return NULL;
- }
- encoding = PyMem_Malloc(len + 1);
- if (encoding == NULL)
- return PyErr_NoMemory();
- if (!_Py_normalize_encoding(string, encoding, len + 1))
- {
- PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed");
- PyMem_Free(encoding);
- return NULL;
- }
- v = PyUnicode_FromString(encoding);
- PyMem_Free(encoding);
- return v;
- }
- /* Lookup the given encoding and return a tuple providing the codec
- facilities.
- The encoding string is looked up converted to all lower-case
- characters. This makes encodings looked up through this mechanism
- effectively case-insensitive.
- If no codec is found, a LookupError is set and NULL returned.
- As side effect, this tries to load the encodings package, if not
- yet done. This is part of the lazy load strategy for the encodings
- package.
- */
- PyObject *_PyCodec_Lookup(const char *encoding)
- {
- if (encoding == NULL) {
- PyErr_BadArgument();
- return NULL;
- }
- PyInterpreterState *interp = _PyInterpreterState_GET();
- if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) {
- return NULL;
- }
- /* Convert the encoding to a normalized Python string: all
- characters are converted to lower case, spaces and hyphens are
- replaced with underscores. */
- PyObject *v = normalizestring(encoding);
- if (v == NULL) {
- return NULL;
- }
- /* Intern the string. We'll make it immortal later if lookup succeeds. */
- _PyUnicode_InternMortal(interp, &v);
- /* First, try to lookup the name in the registry dictionary */
- PyObject *result = PyDict_GetItemWithError(interp->codec_search_cache, v);
- if (result != NULL) {
- Py_INCREF(result);
- Py_DECREF(v);
- return result;
- }
- else if (PyErr_Occurred()) {
- goto onError;
- }
- /* Next, scan the search functions in order of registration */
- const Py_ssize_t len = PyList_Size(interp->codec_search_path);
- if (len < 0)
- goto onError;
- if (len == 0) {
- PyErr_SetString(PyExc_LookupError,
- "no codec search functions registered: "
- "can't find encoding");
- goto onError;
- }
- Py_ssize_t i;
- for (i = 0; i < len; i++) {
- PyObject *func;
- func = PyList_GetItem(interp->codec_search_path, i);
- if (func == NULL)
- goto onError;
- result = PyObject_CallOneArg(func, v);
- if (result == NULL)
- goto onError;
- if (result == Py_None) {
- Py_DECREF(result);
- continue;
- }
- if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
- PyErr_SetString(PyExc_TypeError,
- "codec search functions must return 4-tuples");
- Py_DECREF(result);
- goto onError;
- }
- break;
- }
- if (i == len) {
- /* XXX Perhaps we should cache misses too ? */
- PyErr_Format(PyExc_LookupError,
- "unknown encoding: %s", encoding);
- goto onError;
- }
- _PyUnicode_InternImmortal(interp, &v);
- /* Cache and return the result */
- if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) {
- Py_DECREF(result);
- goto onError;
- }
- Py_DECREF(v);
- return result;
- onError:
- Py_DECREF(v);
- return NULL;
- }
- /* Codec registry encoding check API. */
- int PyCodec_KnownEncoding(const char *encoding)
- {
- PyObject *codecs;
- codecs = _PyCodec_Lookup(encoding);
- if (!codecs) {
- PyErr_Clear();
- return 0;
- }
- else {
- Py_DECREF(codecs);
- return 1;
- }
- }
- static
- PyObject *args_tuple(PyObject *object,
- const char *errors)
- {
- PyObject *args;
- args = PyTuple_New(1 + (errors != NULL));
- if (args == NULL)
- return NULL;
- PyTuple_SET_ITEM(args, 0, Py_NewRef(object));
- if (errors) {
- PyObject *v;
- v = PyUnicode_FromString(errors);
- if (v == NULL) {
- Py_DECREF(args);
- return NULL;
- }
- PyTuple_SET_ITEM(args, 1, v);
- }
- return args;
- }
- /* Helper function to get a codec item */
- static
- PyObject *codec_getitem(const char *encoding, int index)
- {
- PyObject *codecs;
- PyObject *v;
- codecs = _PyCodec_Lookup(encoding);
- if (codecs == NULL)
- return NULL;
- v = PyTuple_GET_ITEM(codecs, index);
- Py_DECREF(codecs);
- return Py_NewRef(v);
- }
- /* Helper functions to create an incremental codec. */
- static
- PyObject *codec_makeincrementalcodec(PyObject *codec_info,
- const char *errors,
- const char *attrname)
- {
- PyObject *ret, *inccodec;
- inccodec = PyObject_GetAttrString(codec_info, attrname);
- if (inccodec == NULL)
- return NULL;
- if (errors)
- ret = PyObject_CallFunction(inccodec, "s", errors);
- else
- ret = _PyObject_CallNoArgs(inccodec);
- Py_DECREF(inccodec);
- return ret;
- }
- static
- PyObject *codec_getincrementalcodec(const char *encoding,
- const char *errors,
- const char *attrname)
- {
- PyObject *codec_info, *ret;
- codec_info = _PyCodec_Lookup(encoding);
- if (codec_info == NULL)
- return NULL;
- ret = codec_makeincrementalcodec(codec_info, errors, attrname);
- Py_DECREF(codec_info);
- return ret;
- }
- /* Helper function to create a stream codec. */
- static
- PyObject *codec_getstreamcodec(const char *encoding,
- PyObject *stream,
- const char *errors,
- const int index)
- {
- PyObject *codecs, *streamcodec, *codeccls;
- codecs = _PyCodec_Lookup(encoding);
- if (codecs == NULL)
- return NULL;
- codeccls = PyTuple_GET_ITEM(codecs, index);
- if (errors != NULL)
- streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
- else
- streamcodec = PyObject_CallOneArg(codeccls, stream);
- Py_DECREF(codecs);
- return streamcodec;
- }
- /* Helpers to work with the result of _PyCodec_Lookup
- */
- PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info,
- const char *errors)
- {
- return codec_makeincrementalcodec(codec_info, errors,
- "incrementaldecoder");
- }
- PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info,
- const char *errors)
- {
- return codec_makeincrementalcodec(codec_info, errors,
- "incrementalencoder");
- }
- /* Convenience APIs to query the Codec registry.
- All APIs return a codec object with incremented refcount.
- */
- PyObject *PyCodec_Encoder(const char *encoding)
- {
- return codec_getitem(encoding, 0);
- }
- PyObject *PyCodec_Decoder(const char *encoding)
- {
- return codec_getitem(encoding, 1);
- }
- PyObject *PyCodec_IncrementalEncoder(const char *encoding,
- const char *errors)
- {
- return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
- }
- PyObject *PyCodec_IncrementalDecoder(const char *encoding,
- const char *errors)
- {
- return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
- }
- PyObject *PyCodec_StreamReader(const char *encoding,
- PyObject *stream,
- const char *errors)
- {
- return codec_getstreamcodec(encoding, stream, errors, 2);
- }
- PyObject *PyCodec_StreamWriter(const char *encoding,
- PyObject *stream,
- const char *errors)
- {
- return codec_getstreamcodec(encoding, stream, errors, 3);
- }
- /* Encode an object (e.g. a Unicode object) using the given encoding
- and return the resulting encoded object (usually a Python string).
- errors is passed to the encoder factory as argument if non-NULL. */
- static PyObject *
- _PyCodec_EncodeInternal(PyObject *object,
- PyObject *encoder,
- const char *encoding,
- const char *errors)
- {
- PyObject *args = NULL, *result = NULL;
- PyObject *v = NULL;
- args = args_tuple(object, errors);
- if (args == NULL)
- goto onError;
- result = PyObject_Call(encoder, args, NULL);
- if (result == NULL) {
- _PyErr_FormatNote("%s with '%s' codec failed", "encoding", encoding);
- goto onError;
- }
- if (!PyTuple_Check(result) ||
- PyTuple_GET_SIZE(result) != 2) {
- PyErr_SetString(PyExc_TypeError,
- "encoder must return a tuple (object, integer)");
- goto onError;
- }
- v = Py_NewRef(PyTuple_GET_ITEM(result,0));
- /* We don't check or use the second (integer) entry. */
- Py_DECREF(args);
- Py_DECREF(encoder);
- Py_DECREF(result);
- return v;
- onError:
- Py_XDECREF(result);
- Py_XDECREF(args);
- Py_XDECREF(encoder);
- return NULL;
- }
- /* Decode an object (usually a Python string) using the given encoding
- and return an equivalent object (e.g. a Unicode object).
- errors is passed to the decoder factory as argument if non-NULL. */
- static PyObject *
- _PyCodec_DecodeInternal(PyObject *object,
- PyObject *decoder,
- const char *encoding,
- const char *errors)
- {
- PyObject *args = NULL, *result = NULL;
- PyObject *v;
- args = args_tuple(object, errors);
- if (args == NULL)
- goto onError;
- result = PyObject_Call(decoder, args, NULL);
- if (result == NULL) {
- _PyErr_FormatNote("%s with '%s' codec failed", "decoding", encoding);
- goto onError;
- }
- if (!PyTuple_Check(result) ||
- PyTuple_GET_SIZE(result) != 2) {
- PyErr_SetString(PyExc_TypeError,
- "decoder must return a tuple (object,integer)");
- goto onError;
- }
- v = Py_NewRef(PyTuple_GET_ITEM(result,0));
- /* We don't check or use the second (integer) entry. */
- Py_DECREF(args);
- Py_DECREF(decoder);
- Py_DECREF(result);
- return v;
- onError:
- Py_XDECREF(args);
- Py_XDECREF(decoder);
- Py_XDECREF(result);
- return NULL;
- }
- /* Generic encoding/decoding API */
- PyObject *PyCodec_Encode(PyObject *object,
- const char *encoding,
- const char *errors)
- {
- PyObject *encoder;
- encoder = PyCodec_Encoder(encoding);
- if (encoder == NULL)
- return NULL;
- return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
- }
- PyObject *PyCodec_Decode(PyObject *object,
- const char *encoding,
- const char *errors)
- {
- PyObject *decoder;
- decoder = PyCodec_Decoder(encoding);
- if (decoder == NULL)
- return NULL;
- return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
- }
- /* Text encoding/decoding API */
- PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
- const char *alternate_command)
- {
- PyObject *codec;
- PyObject *attr;
- int is_text_codec;
- codec = _PyCodec_Lookup(encoding);
- if (codec == NULL)
- return NULL;
- /* Backwards compatibility: assume any raw tuple describes a text
- * encoding, and the same for anything lacking the private
- * attribute.
- */
- if (!PyTuple_CheckExact(codec)) {
- if (_PyObject_LookupAttr(codec, &_Py_ID(_is_text_encoding), &attr) < 0) {
- Py_DECREF(codec);
- return NULL;
- }
- if (attr != NULL) {
- is_text_codec = PyObject_IsTrue(attr);
- Py_DECREF(attr);
- if (is_text_codec <= 0) {
- Py_DECREF(codec);
- if (!is_text_codec)
- PyErr_Format(PyExc_LookupError,
- "'%.400s' is not a text encoding; "
- "use %s to handle arbitrary codecs",
- encoding, alternate_command);
- return NULL;
- }
- }
- }
- /* This appears to be a valid text encoding */
- return codec;
- }
- static
- PyObject *codec_getitem_checked(const char *encoding,
- const char *alternate_command,
- int index)
- {
- PyObject *codec;
- PyObject *v;
- codec = _PyCodec_LookupTextEncoding(encoding, alternate_command);
- if (codec == NULL)
- return NULL;
- v = Py_NewRef(PyTuple_GET_ITEM(codec, index));
- Py_DECREF(codec);
- return v;
- }
- static PyObject * _PyCodec_TextEncoder(const char *encoding)
- {
- return codec_getitem_checked(encoding, "codecs.encode()", 0);
- }
- static PyObject * _PyCodec_TextDecoder(const char *encoding)
- {
- return codec_getitem_checked(encoding, "codecs.decode()", 1);
- }
- PyObject *_PyCodec_EncodeText(PyObject *object,
- const char *encoding,
- const char *errors)
- {
- PyObject *encoder;
- encoder = _PyCodec_TextEncoder(encoding);
- if (encoder == NULL)
- return NULL;
- return _PyCodec_EncodeInternal(object, encoder, encoding, errors);
- }
- PyObject *_PyCodec_DecodeText(PyObject *object,
- const char *encoding,
- const char *errors)
- {
- PyObject *decoder;
- decoder = _PyCodec_TextDecoder(encoding);
- if (decoder == NULL)
- return NULL;
- return _PyCodec_DecodeInternal(object, decoder, encoding, errors);
- }
- /* Register the error handling callback function error under the name
- name. This function will be called by the codec when it encounters
- an unencodable characters/undecodable bytes and doesn't know the
- callback name, when name is specified as the error parameter
- in the call to the encode/decode function.
- Return 0 on success, -1 on error */
- int PyCodec_RegisterError(const char *name, PyObject *error)
- {
- PyInterpreterState *interp = _PyInterpreterState_GET();
- if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
- return -1;
- if (!PyCallable_Check(error)) {
- PyErr_SetString(PyExc_TypeError, "handler must be callable");
- return -1;
- }
- return PyDict_SetItemString(interp->codec_error_registry,
- name, error);
- }
- /* Lookup the error handling callback function registered under the
- name error. As a special case NULL can be passed, in which case
- the error handling callback for strict encoding will be returned. */
- PyObject *PyCodec_LookupError(const char *name)
- {
- PyObject *handler = NULL;
- PyInterpreterState *interp = _PyInterpreterState_GET();
- if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
- return NULL;
- if (name==NULL)
- name = "strict";
- handler = _PyDict_GetItemStringWithError(interp->codec_error_registry, name);
- if (handler) {
- Py_INCREF(handler);
- }
- else if (!PyErr_Occurred()) {
- PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
- }
- return handler;
- }
- static void wrong_exception_type(PyObject *exc)
- {
- PyErr_Format(PyExc_TypeError,
- "don't know how to handle %.200s in error callback",
- Py_TYPE(exc)->tp_name);
- }
- PyObject *PyCodec_StrictErrors(PyObject *exc)
- {
- if (PyExceptionInstance_Check(exc))
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- else
- PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
- return NULL;
- }
- PyObject *PyCodec_IgnoreErrors(PyObject *exc)
- {
- Py_ssize_t end;
- if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
- if (PyUnicodeEncodeError_GetEnd(exc, &end))
- return NULL;
- }
- else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
- if (PyUnicodeDecodeError_GetEnd(exc, &end))
- return NULL;
- }
- else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
- if (PyUnicodeTranslateError_GetEnd(exc, &end))
- return NULL;
- }
- else {
- wrong_exception_type(exc);
- return NULL;
- }
- return Py_BuildValue("(Nn)", PyUnicode_New(0, 0), end);
- }
- PyObject *PyCodec_ReplaceErrors(PyObject *exc)
- {
- Py_ssize_t start, end, i, len;
- if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
- PyObject *res;
- Py_UCS1 *outp;
- if (PyUnicodeEncodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeEncodeError_GetEnd(exc, &end))
- return NULL;
- len = end - start;
- res = PyUnicode_New(len, '?');
- if (res == NULL)
- return NULL;
- assert(PyUnicode_KIND(res) == PyUnicode_1BYTE_KIND);
- outp = PyUnicode_1BYTE_DATA(res);
- for (i = 0; i < len; ++i)
- outp[i] = '?';
- assert(_PyUnicode_CheckConsistency(res, 1));
- return Py_BuildValue("(Nn)", res, end);
- }
- else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
- if (PyUnicodeDecodeError_GetEnd(exc, &end))
- return NULL;
- return Py_BuildValue("(Cn)",
- (int)Py_UNICODE_REPLACEMENT_CHARACTER,
- end);
- }
- else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
- PyObject *res;
- Py_UCS2 *outp;
- if (PyUnicodeTranslateError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeTranslateError_GetEnd(exc, &end))
- return NULL;
- len = end - start;
- res = PyUnicode_New(len, Py_UNICODE_REPLACEMENT_CHARACTER);
- if (res == NULL)
- return NULL;
- assert(PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND);
- outp = PyUnicode_2BYTE_DATA(res);
- for (i = 0; i < len; i++)
- outp[i] = Py_UNICODE_REPLACEMENT_CHARACTER;
- assert(_PyUnicode_CheckConsistency(res, 1));
- return Py_BuildValue("(Nn)", res, end);
- }
- else {
- wrong_exception_type(exc);
- return NULL;
- }
- }
- PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
- {
- if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
- PyObject *restuple;
- PyObject *object;
- Py_ssize_t i;
- Py_ssize_t start;
- Py_ssize_t end;
- PyObject *res;
- Py_UCS1 *outp;
- Py_ssize_t ressize;
- Py_UCS4 ch;
- if (PyUnicodeEncodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeEncodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeEncodeError_GetObject(exc)))
- return NULL;
- if (end - start > PY_SSIZE_T_MAX / (2+7+1))
- end = start + PY_SSIZE_T_MAX / (2+7+1);
- for (i = start, ressize = 0; i < end; ++i) {
- /* object is guaranteed to be "ready" */
- ch = PyUnicode_READ_CHAR(object, i);
- if (ch<10)
- ressize += 2+1+1;
- else if (ch<100)
- ressize += 2+2+1;
- else if (ch<1000)
- ressize += 2+3+1;
- else if (ch<10000)
- ressize += 2+4+1;
- else if (ch<100000)
- ressize += 2+5+1;
- else if (ch<1000000)
- ressize += 2+6+1;
- else
- ressize += 2+7+1;
- }
- /* allocate replacement */
- res = PyUnicode_New(ressize, 127);
- if (res == NULL) {
- Py_DECREF(object);
- return NULL;
- }
- outp = PyUnicode_1BYTE_DATA(res);
- /* generate replacement */
- for (i = start; i < end; ++i) {
- int digits;
- int base;
- ch = PyUnicode_READ_CHAR(object, i);
- *outp++ = '&';
- *outp++ = '#';
- if (ch<10) {
- digits = 1;
- base = 1;
- }
- else if (ch<100) {
- digits = 2;
- base = 10;
- }
- else if (ch<1000) {
- digits = 3;
- base = 100;
- }
- else if (ch<10000) {
- digits = 4;
- base = 1000;
- }
- else if (ch<100000) {
- digits = 5;
- base = 10000;
- }
- else if (ch<1000000) {
- digits = 6;
- base = 100000;
- }
- else {
- digits = 7;
- base = 1000000;
- }
- while (digits-->0) {
- *outp++ = '0' + ch/base;
- ch %= base;
- base /= 10;
- }
- *outp++ = ';';
- }
- assert(_PyUnicode_CheckConsistency(res, 1));
- restuple = Py_BuildValue("(Nn)", res, end);
- Py_DECREF(object);
- return restuple;
- }
- else {
- wrong_exception_type(exc);
- return NULL;
- }
- }
- PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
- {
- PyObject *object;
- Py_ssize_t i;
- Py_ssize_t start;
- Py_ssize_t end;
- PyObject *res;
- Py_UCS1 *outp;
- int ressize;
- Py_UCS4 c;
- if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
- const unsigned char *p;
- if (PyUnicodeDecodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeDecodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeDecodeError_GetObject(exc)))
- return NULL;
- p = (const unsigned char*)PyBytes_AS_STRING(object);
- res = PyUnicode_New(4 * (end - start), 127);
- if (res == NULL) {
- Py_DECREF(object);
- return NULL;
- }
- outp = PyUnicode_1BYTE_DATA(res);
- for (i = start; i < end; i++, outp += 4) {
- unsigned char c = p[i];
- outp[0] = '\\';
- outp[1] = 'x';
- outp[2] = Py_hexdigits[(c>>4)&0xf];
- outp[3] = Py_hexdigits[c&0xf];
- }
- assert(_PyUnicode_CheckConsistency(res, 1));
- Py_DECREF(object);
- return Py_BuildValue("(Nn)", res, end);
- }
- if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
- if (PyUnicodeEncodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeEncodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeEncodeError_GetObject(exc)))
- return NULL;
- }
- else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) {
- if (PyUnicodeTranslateError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeTranslateError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeTranslateError_GetObject(exc)))
- return NULL;
- }
- else {
- wrong_exception_type(exc);
- return NULL;
- }
- if (end - start > PY_SSIZE_T_MAX / (1+1+8))
- end = start + PY_SSIZE_T_MAX / (1+1+8);
- for (i = start, ressize = 0; i < end; ++i) {
- /* object is guaranteed to be "ready" */
- c = PyUnicode_READ_CHAR(object, i);
- if (c >= 0x10000) {
- ressize += 1+1+8;
- }
- else if (c >= 0x100) {
- ressize += 1+1+4;
- }
- else
- ressize += 1+1+2;
- }
- res = PyUnicode_New(ressize, 127);
- if (res == NULL) {
- Py_DECREF(object);
- return NULL;
- }
- outp = PyUnicode_1BYTE_DATA(res);
- for (i = start; i < end; ++i) {
- c = PyUnicode_READ_CHAR(object, i);
- *outp++ = '\\';
- if (c >= 0x00010000) {
- *outp++ = 'U';
- *outp++ = Py_hexdigits[(c>>28)&0xf];
- *outp++ = Py_hexdigits[(c>>24)&0xf];
- *outp++ = Py_hexdigits[(c>>20)&0xf];
- *outp++ = Py_hexdigits[(c>>16)&0xf];
- *outp++ = Py_hexdigits[(c>>12)&0xf];
- *outp++ = Py_hexdigits[(c>>8)&0xf];
- }
- else if (c >= 0x100) {
- *outp++ = 'u';
- *outp++ = Py_hexdigits[(c>>12)&0xf];
- *outp++ = Py_hexdigits[(c>>8)&0xf];
- }
- else
- *outp++ = 'x';
- *outp++ = Py_hexdigits[(c>>4)&0xf];
- *outp++ = Py_hexdigits[c&0xf];
- }
- assert(_PyUnicode_CheckConsistency(res, 1));
- Py_DECREF(object);
- return Py_BuildValue("(Nn)", res, end);
- }
- static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
- PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
- {
- if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
- PyObject *restuple;
- PyObject *object;
- Py_ssize_t i;
- Py_ssize_t start;
- Py_ssize_t end;
- PyObject *res;
- Py_UCS1 *outp;
- Py_ssize_t ressize;
- int replsize;
- Py_UCS4 c;
- char buffer[256]; /* NAME_MAXLEN */
- if (PyUnicodeEncodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeEncodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeEncodeError_GetObject(exc)))
- return NULL;
- if (!ucnhash_capi) {
- /* load the unicode data module */
- ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
- PyUnicodeData_CAPSULE_NAME, 1);
- if (!ucnhash_capi) {
- return NULL;
- }
- }
- for (i = start, ressize = 0; i < end; ++i) {
- /* object is guaranteed to be "ready" */
- c = PyUnicode_READ_CHAR(object, i);
- if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) {
- replsize = 1+1+1+(int)strlen(buffer)+1;
- }
- else if (c >= 0x10000) {
- replsize = 1+1+8;
- }
- else if (c >= 0x100) {
- replsize = 1+1+4;
- }
- else
- replsize = 1+1+2;
- if (ressize > PY_SSIZE_T_MAX - replsize)
- break;
- ressize += replsize;
- }
- end = i;
- res = PyUnicode_New(ressize, 127);
- if (res==NULL)
- return NULL;
- for (i = start, outp = PyUnicode_1BYTE_DATA(res);
- i < end; ++i) {
- c = PyUnicode_READ_CHAR(object, i);
- *outp++ = '\\';
- if (ucnhash_capi->getname(c, buffer, sizeof(buffer), 1)) {
- *outp++ = 'N';
- *outp++ = '{';
- strcpy((char *)outp, buffer);
- outp += strlen(buffer);
- *outp++ = '}';
- continue;
- }
- if (c >= 0x00010000) {
- *outp++ = 'U';
- *outp++ = Py_hexdigits[(c>>28)&0xf];
- *outp++ = Py_hexdigits[(c>>24)&0xf];
- *outp++ = Py_hexdigits[(c>>20)&0xf];
- *outp++ = Py_hexdigits[(c>>16)&0xf];
- *outp++ = Py_hexdigits[(c>>12)&0xf];
- *outp++ = Py_hexdigits[(c>>8)&0xf];
- }
- else if (c >= 0x100) {
- *outp++ = 'u';
- *outp++ = Py_hexdigits[(c>>12)&0xf];
- *outp++ = Py_hexdigits[(c>>8)&0xf];
- }
- else
- *outp++ = 'x';
- *outp++ = Py_hexdigits[(c>>4)&0xf];
- *outp++ = Py_hexdigits[c&0xf];
- }
- assert(outp == PyUnicode_1BYTE_DATA(res) + ressize);
- assert(_PyUnicode_CheckConsistency(res, 1));
- restuple = Py_BuildValue("(Nn)", res, end);
- Py_DECREF(object);
- return restuple;
- }
- else {
- wrong_exception_type(exc);
- return NULL;
- }
- }
- #define ENC_UNKNOWN -1
- #define ENC_UTF8 0
- #define ENC_UTF16BE 1
- #define ENC_UTF16LE 2
- #define ENC_UTF32BE 3
- #define ENC_UTF32LE 4
- static int
- get_standard_encoding(const char *encoding, int *bytelength)
- {
- if (Py_TOLOWER(encoding[0]) == 'u' &&
- Py_TOLOWER(encoding[1]) == 't' &&
- Py_TOLOWER(encoding[2]) == 'f') {
- encoding += 3;
- if (*encoding == '-' || *encoding == '_' )
- encoding++;
- if (encoding[0] == '8' && encoding[1] == '\0') {
- *bytelength = 3;
- return ENC_UTF8;
- }
- else if (encoding[0] == '1' && encoding[1] == '6') {
- encoding += 2;
- *bytelength = 2;
- if (*encoding == '\0') {
- #ifdef WORDS_BIGENDIAN
- return ENC_UTF16BE;
- #else
- return ENC_UTF16LE;
- #endif
- }
- if (*encoding == '-' || *encoding == '_' )
- encoding++;
- if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') {
- if (Py_TOLOWER(encoding[0]) == 'b')
- return ENC_UTF16BE;
- if (Py_TOLOWER(encoding[0]) == 'l')
- return ENC_UTF16LE;
- }
- }
- else if (encoding[0] == '3' && encoding[1] == '2') {
- encoding += 2;
- *bytelength = 4;
- if (*encoding == '\0') {
- #ifdef WORDS_BIGENDIAN
- return ENC_UTF32BE;
- #else
- return ENC_UTF32LE;
- #endif
- }
- if (*encoding == '-' || *encoding == '_' )
- encoding++;
- if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') {
- if (Py_TOLOWER(encoding[0]) == 'b')
- return ENC_UTF32BE;
- if (Py_TOLOWER(encoding[0]) == 'l')
- return ENC_UTF32LE;
- }
- }
- }
- else if (strcmp(encoding, "CP_UTF8") == 0) {
- *bytelength = 3;
- return ENC_UTF8;
- }
- return ENC_UNKNOWN;
- }
- /* This handler is declared static until someone demonstrates
- a need to call it directly. */
- static PyObject *
- PyCodec_SurrogatePassErrors(PyObject *exc)
- {
- PyObject *restuple;
- PyObject *object;
- PyObject *encode;
- const char *encoding;
- int code;
- int bytelength;
- Py_ssize_t i;
- Py_ssize_t start;
- Py_ssize_t end;
- PyObject *res;
- if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
- unsigned char *outp;
- if (PyUnicodeEncodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeEncodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeEncodeError_GetObject(exc)))
- return NULL;
- if (!(encode = PyUnicodeEncodeError_GetEncoding(exc))) {
- Py_DECREF(object);
- return NULL;
- }
- if (!(encoding = PyUnicode_AsUTF8(encode))) {
- Py_DECREF(object);
- Py_DECREF(encode);
- return NULL;
- }
- code = get_standard_encoding(encoding, &bytelength);
- Py_DECREF(encode);
- if (code == ENC_UNKNOWN) {
- /* Not supported, fail with original exception */
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- Py_DECREF(object);
- return NULL;
- }
- if (end - start > PY_SSIZE_T_MAX / bytelength)
- end = start + PY_SSIZE_T_MAX / bytelength;
- res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start));
- if (!res) {
- Py_DECREF(object);
- return NULL;
- }
- outp = (unsigned char*)PyBytes_AsString(res);
- for (i = start; i < end; i++) {
- /* object is guaranteed to be "ready" */
- Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
- if (!Py_UNICODE_IS_SURROGATE(ch)) {
- /* Not a surrogate, fail with original exception */
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- Py_DECREF(res);
- Py_DECREF(object);
- return NULL;
- }
- switch (code) {
- case ENC_UTF8:
- *outp++ = (unsigned char)(0xe0 | (ch >> 12));
- *outp++ = (unsigned char)(0x80 | ((ch >> 6) & 0x3f));
- *outp++ = (unsigned char)(0x80 | (ch & 0x3f));
- break;
- case ENC_UTF16LE:
- *outp++ = (unsigned char) ch;
- *outp++ = (unsigned char)(ch >> 8);
- break;
- case ENC_UTF16BE:
- *outp++ = (unsigned char)(ch >> 8);
- *outp++ = (unsigned char) ch;
- break;
- case ENC_UTF32LE:
- *outp++ = (unsigned char) ch;
- *outp++ = (unsigned char)(ch >> 8);
- *outp++ = (unsigned char)(ch >> 16);
- *outp++ = (unsigned char)(ch >> 24);
- break;
- case ENC_UTF32BE:
- *outp++ = (unsigned char)(ch >> 24);
- *outp++ = (unsigned char)(ch >> 16);
- *outp++ = (unsigned char)(ch >> 8);
- *outp++ = (unsigned char) ch;
- break;
- }
- }
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
- Py_DECREF(object);
- return restuple;
- }
- else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
- const unsigned char *p;
- Py_UCS4 ch = 0;
- if (PyUnicodeDecodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeDecodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeDecodeError_GetObject(exc)))
- return NULL;
- p = (const unsigned char*)PyBytes_AS_STRING(object);
- if (!(encode = PyUnicodeDecodeError_GetEncoding(exc))) {
- Py_DECREF(object);
- return NULL;
- }
- if (!(encoding = PyUnicode_AsUTF8(encode))) {
- Py_DECREF(object);
- Py_DECREF(encode);
- return NULL;
- }
- code = get_standard_encoding(encoding, &bytelength);
- Py_DECREF(encode);
- if (code == ENC_UNKNOWN) {
- /* Not supported, fail with original exception */
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- Py_DECREF(object);
- return NULL;
- }
- /* Try decoding a single surrogate character. If
- there are more, let the codec call us again. */
- p += start;
- if (PyBytes_GET_SIZE(object) - start >= bytelength) {
- switch (code) {
- case ENC_UTF8:
- if ((p[0] & 0xf0) == 0xe0 &&
- (p[1] & 0xc0) == 0x80 &&
- (p[2] & 0xc0) == 0x80) {
- /* it's a three-byte code */
- ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
- }
- break;
- case ENC_UTF16LE:
- ch = p[1] << 8 | p[0];
- break;
- case ENC_UTF16BE:
- ch = p[0] << 8 | p[1];
- break;
- case ENC_UTF32LE:
- ch = (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
- break;
- case ENC_UTF32BE:
- ch = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
- break;
- }
- }
- Py_DECREF(object);
- if (!Py_UNICODE_IS_SURROGATE(ch)) {
- /* it's not a surrogate - fail */
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- return NULL;
- }
- res = PyUnicode_FromOrdinal(ch);
- if (res == NULL)
- return NULL;
- return Py_BuildValue("(Nn)", res, start + bytelength);
- }
- else {
- wrong_exception_type(exc);
- return NULL;
- }
- }
- static PyObject *
- PyCodec_SurrogateEscapeErrors(PyObject *exc)
- {
- PyObject *restuple;
- PyObject *object;
- Py_ssize_t i;
- Py_ssize_t start;
- Py_ssize_t end;
- PyObject *res;
- if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
- char *outp;
- if (PyUnicodeEncodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeEncodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeEncodeError_GetObject(exc)))
- return NULL;
- res = PyBytes_FromStringAndSize(NULL, end-start);
- if (!res) {
- Py_DECREF(object);
- return NULL;
- }
- outp = PyBytes_AsString(res);
- for (i = start; i < end; i++) {
- /* object is guaranteed to be "ready" */
- Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
- if (ch < 0xdc80 || ch > 0xdcff) {
- /* Not a UTF-8b surrogate, fail with original exception */
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- Py_DECREF(res);
- Py_DECREF(object);
- return NULL;
- }
- *outp++ = ch - 0xdc00;
- }
- restuple = Py_BuildValue("(On)", res, end);
- Py_DECREF(res);
- Py_DECREF(object);
- return restuple;
- }
- else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) {
- PyObject *str;
- const unsigned char *p;
- Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */
- int consumed = 0;
- if (PyUnicodeDecodeError_GetStart(exc, &start))
- return NULL;
- if (PyUnicodeDecodeError_GetEnd(exc, &end))
- return NULL;
- if (!(object = PyUnicodeDecodeError_GetObject(exc)))
- return NULL;
- p = (const unsigned char*)PyBytes_AS_STRING(object);
- while (consumed < 4 && consumed < end-start) {
- /* Refuse to escape ASCII bytes. */
- if (p[start+consumed] < 128)
- break;
- ch[consumed] = 0xdc00 + p[start+consumed];
- consumed++;
- }
- Py_DECREF(object);
- if (!consumed) {
- /* codec complained about ASCII byte. */
- PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
- return NULL;
- }
- str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed);
- if (str == NULL)
- return NULL;
- return Py_BuildValue("(Nn)", str, start+consumed);
- }
- else {
- wrong_exception_type(exc);
- return NULL;
- }
- }
- static PyObject *strict_errors(PyObject *self, PyObject *exc)
- {
- return PyCodec_StrictErrors(exc);
- }
- static PyObject *ignore_errors(PyObject *self, PyObject *exc)
- {
- return PyCodec_IgnoreErrors(exc);
- }
- static PyObject *replace_errors(PyObject *self, PyObject *exc)
- {
- return PyCodec_ReplaceErrors(exc);
- }
- static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
- {
- return PyCodec_XMLCharRefReplaceErrors(exc);
- }
- static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
- {
- return PyCodec_BackslashReplaceErrors(exc);
- }
- static PyObject *namereplace_errors(PyObject *self, PyObject *exc)
- {
- return PyCodec_NameReplaceErrors(exc);
- }
- static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
- {
- return PyCodec_SurrogatePassErrors(exc);
- }
- static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
- {
- return PyCodec_SurrogateEscapeErrors(exc);
- }
- static int _PyCodecRegistry_Init(void)
- {
- static struct {
- const char *name;
- PyMethodDef def;
- } methods[] =
- {
- {
- "strict",
- {
- "strict_errors",
- strict_errors,
- METH_O,
- PyDoc_STR("Implements the 'strict' error handling, which "
- "raises a UnicodeError on coding errors.")
- }
- },
- {
- "ignore",
- {
- "ignore_errors",
- ignore_errors,
- METH_O,
- PyDoc_STR("Implements the 'ignore' error handling, which "
- "ignores malformed data and continues.")
- }
- },
- {
- "replace",
- {
- "replace_errors",
- replace_errors,
- METH_O,
- PyDoc_STR("Implements the 'replace' error handling, which "
- "replaces malformed data with a replacement marker.")
- }
- },
- {
- "xmlcharrefreplace",
- {
- "xmlcharrefreplace_errors",
- xmlcharrefreplace_errors,
- METH_O,
- PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
- "which replaces an unencodable character with the "
- "appropriate XML character reference.")
- }
- },
- {
- "backslashreplace",
- {
- "backslashreplace_errors",
- backslashreplace_errors,
- METH_O,
- PyDoc_STR("Implements the 'backslashreplace' error handling, "
- "which replaces malformed data with a backslashed "
- "escape sequence.")
- }
- },
- {
- "namereplace",
- {
- "namereplace_errors",
- namereplace_errors,
- METH_O,
- PyDoc_STR("Implements the 'namereplace' error handling, "
- "which replaces an unencodable character with a "
- "\\N{...} escape sequence.")
- }
- },
- {
- "surrogatepass",
- {
- "surrogatepass",
- surrogatepass_errors,
- METH_O
- }
- },
- {
- "surrogateescape",
- {
- "surrogateescape",
- surrogateescape_errors,
- METH_O
- }
- }
- };
- PyInterpreterState *interp = _PyInterpreterState_GET();
- PyObject *mod;
- if (interp->codec_search_path != NULL)
- return 0;
- interp->codec_search_path = PyList_New(0);
- if (interp->codec_search_path == NULL) {
- return -1;
- }
- interp->codec_search_cache = PyDict_New();
- if (interp->codec_search_cache == NULL) {
- return -1;
- }
- interp->codec_error_registry = PyDict_New();
- if (interp->codec_error_registry == NULL) {
- return -1;
- }
- for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
- PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL);
- if (!func) {
- return -1;
- }
- int res = PyCodec_RegisterError(methods[i].name, func);
- Py_DECREF(func);
- if (res) {
- return -1;
- }
- }
- mod = PyImport_ImportModule("encodings");
- if (mod == NULL) {
- return -1;
- }
- Py_DECREF(mod);
- interp->codecs_initialized = 1;
- return 0;
- }
|