12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820 |
- /* JSON accelerator C extensor: _json module.
- *
- * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
- * and as an extension module (Py_BUILD_CORE_MODULE define) on other
- * platforms. */
- #ifndef Py_BUILD_CORE_BUILTIN
- # define Py_BUILD_CORE_MODULE 1
- #endif
- #include "Python.h"
- #include "pycore_ceval.h" // _Py_EnterRecursiveCall()
- #include "pycore_runtime.h" // _PyRuntime
- #include "structmember.h" // PyMemberDef
- #include "pycore_global_objects.h" // _Py_ID()
- #include <stdbool.h> // bool
- typedef struct _PyScannerObject {
- PyObject_HEAD
- signed char strict;
- PyObject *object_hook;
- PyObject *object_pairs_hook;
- PyObject *parse_float;
- PyObject *parse_int;
- PyObject *parse_constant;
- PyObject *memo;
- } PyScannerObject;
- static PyMemberDef scanner_members[] = {
- {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
- {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
- {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
- {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
- {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
- {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
- {NULL}
- };
- typedef struct _PyEncoderObject {
- PyObject_HEAD
- PyObject *markers;
- PyObject *defaultfn;
- PyObject *encoder;
- PyObject *indent;
- PyObject *key_separator;
- PyObject *item_separator;
- char sort_keys;
- char skipkeys;
- int allow_nan;
- PyCFunction fast_encode;
- } PyEncoderObject;
- static PyMemberDef encoder_members[] = {
- {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
- {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
- {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
- {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
- {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
- {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
- {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
- {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
- {NULL}
- };
- /* Forward decls */
- static PyObject *
- ascii_escape_unicode(PyObject *pystr);
- static PyObject *
- py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
- static PyObject *
- scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
- static PyObject *
- _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
- static PyObject *
- scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
- static void
- scanner_dealloc(PyObject *self);
- static int
- scanner_clear(PyScannerObject *self);
- static PyObject *
- encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
- static void
- encoder_dealloc(PyObject *self);
- static int
- encoder_clear(PyEncoderObject *self);
- static int
- encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level);
- static int
- encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level);
- static int
- encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level);
- static PyObject *
- _encoded_const(PyObject *obj);
- static void
- raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
- static PyObject *
- encoder_encode_string(PyEncoderObject *s, PyObject *obj);
- static PyObject *
- encoder_encode_float(PyEncoderObject *s, PyObject *obj);
- #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
- #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
- static Py_ssize_t
- ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
- {
- /* Escape unicode code point c to ASCII escape sequences
- in char *output. output must have at least 12 bytes unused to
- accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
- output[chars++] = '\\';
- switch (c) {
- case '\\': output[chars++] = c; break;
- case '"': output[chars++] = c; break;
- case '\b': output[chars++] = 'b'; break;
- case '\f': output[chars++] = 'f'; break;
- case '\n': output[chars++] = 'n'; break;
- case '\r': output[chars++] = 'r'; break;
- case '\t': output[chars++] = 't'; break;
- default:
- if (c >= 0x10000) {
- /* UTF-16 surrogate pair */
- Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
- output[chars++] = 'u';
- output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
- output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
- output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
- output[chars++] = Py_hexdigits[(v ) & 0xf];
- c = Py_UNICODE_LOW_SURROGATE(c);
- output[chars++] = '\\';
- }
- output[chars++] = 'u';
- output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
- output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
- output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
- output[chars++] = Py_hexdigits[(c ) & 0xf];
- }
- return chars;
- }
- static PyObject *
- ascii_escape_unicode(PyObject *pystr)
- {
- /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
- Py_ssize_t i;
- Py_ssize_t input_chars;
- Py_ssize_t output_size;
- Py_ssize_t chars;
- PyObject *rval;
- const void *input;
- Py_UCS1 *output;
- int kind;
- if (PyUnicode_READY(pystr) == -1)
- return NULL;
- input_chars = PyUnicode_GET_LENGTH(pystr);
- input = PyUnicode_DATA(pystr);
- kind = PyUnicode_KIND(pystr);
- /* Compute the output size */
- for (i = 0, output_size = 2; i < input_chars; i++) {
- Py_UCS4 c = PyUnicode_READ(kind, input, i);
- Py_ssize_t d;
- if (S_CHAR(c)) {
- d = 1;
- }
- else {
- switch(c) {
- case '\\': case '"': case '\b': case '\f':
- case '\n': case '\r': case '\t':
- d = 2; break;
- default:
- d = c >= 0x10000 ? 12 : 6;
- }
- }
- if (output_size > PY_SSIZE_T_MAX - d) {
- PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
- return NULL;
- }
- output_size += d;
- }
- rval = PyUnicode_New(output_size, 127);
- if (rval == NULL) {
- return NULL;
- }
- output = PyUnicode_1BYTE_DATA(rval);
- chars = 0;
- output[chars++] = '"';
- for (i = 0; i < input_chars; i++) {
- Py_UCS4 c = PyUnicode_READ(kind, input, i);
- if (S_CHAR(c)) {
- output[chars++] = c;
- }
- else {
- chars = ascii_escape_unichar(c, output, chars);
- }
- }
- output[chars++] = '"';
- #ifdef Py_DEBUG
- assert(_PyUnicode_CheckConsistency(rval, 1));
- #endif
- return rval;
- }
- static PyObject *
- escape_unicode(PyObject *pystr)
- {
- /* Take a PyUnicode pystr and return a new escaped PyUnicode */
- Py_ssize_t i;
- Py_ssize_t input_chars;
- Py_ssize_t output_size;
- Py_ssize_t chars;
- PyObject *rval;
- const void *input;
- int kind;
- Py_UCS4 maxchar;
- if (PyUnicode_READY(pystr) == -1)
- return NULL;
- maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
- input_chars = PyUnicode_GET_LENGTH(pystr);
- input = PyUnicode_DATA(pystr);
- kind = PyUnicode_KIND(pystr);
- /* Compute the output size */
- for (i = 0, output_size = 2; i < input_chars; i++) {
- Py_UCS4 c = PyUnicode_READ(kind, input, i);
- Py_ssize_t d;
- switch (c) {
- case '\\': case '"': case '\b': case '\f':
- case '\n': case '\r': case '\t':
- d = 2;
- break;
- default:
- if (c <= 0x1f)
- d = 6;
- else
- d = 1;
- }
- if (output_size > PY_SSIZE_T_MAX - d) {
- PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
- return NULL;
- }
- output_size += d;
- }
- rval = PyUnicode_New(output_size, maxchar);
- if (rval == NULL)
- return NULL;
- kind = PyUnicode_KIND(rval);
- #define ENCODE_OUTPUT do { \
- chars = 0; \
- output[chars++] = '"'; \
- for (i = 0; i < input_chars; i++) { \
- Py_UCS4 c = PyUnicode_READ(kind, input, i); \
- switch (c) { \
- case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
- case '"': output[chars++] = '\\'; output[chars++] = c; break; \
- case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
- case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
- case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
- case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
- case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
- default: \
- if (c <= 0x1f) { \
- output[chars++] = '\\'; \
- output[chars++] = 'u'; \
- output[chars++] = '0'; \
- output[chars++] = '0'; \
- output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
- output[chars++] = Py_hexdigits[(c ) & 0xf]; \
- } else { \
- output[chars++] = c; \
- } \
- } \
- } \
- output[chars++] = '"'; \
- } while (0)
- if (kind == PyUnicode_1BYTE_KIND) {
- Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
- ENCODE_OUTPUT;
- } else if (kind == PyUnicode_2BYTE_KIND) {
- Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
- ENCODE_OUTPUT;
- } else {
- Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
- assert(kind == PyUnicode_4BYTE_KIND);
- ENCODE_OUTPUT;
- }
- #undef ENCODE_OUTPUT
- #ifdef Py_DEBUG
- assert(_PyUnicode_CheckConsistency(rval, 1));
- #endif
- return rval;
- }
- static void
- raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
- {
- /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
- _Py_DECLARE_STR(json_decoder, "json.decoder");
- PyObject *JSONDecodeError =
- _PyImport_GetModuleAttr(&_Py_STR(json_decoder), &_Py_ID(JSONDecodeError));
- if (JSONDecodeError == NULL) {
- return;
- }
- PyObject *exc;
- exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
- Py_DECREF(JSONDecodeError);
- if (exc) {
- PyErr_SetObject(JSONDecodeError, exc);
- Py_DECREF(exc);
- }
- }
- static void
- raise_stop_iteration(Py_ssize_t idx)
- {
- PyObject *value = PyLong_FromSsize_t(idx);
- if (value != NULL) {
- PyErr_SetObject(PyExc_StopIteration, value);
- Py_DECREF(value);
- }
- }
- static PyObject *
- _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
- /* return (rval, idx) tuple, stealing reference to rval */
- PyObject *tpl;
- PyObject *pyidx;
- /*
- steal a reference to rval, returns (rval, idx)
- */
- if (rval == NULL) {
- return NULL;
- }
- pyidx = PyLong_FromSsize_t(idx);
- if (pyidx == NULL) {
- Py_DECREF(rval);
- return NULL;
- }
- tpl = PyTuple_New(2);
- if (tpl == NULL) {
- Py_DECREF(pyidx);
- Py_DECREF(rval);
- return NULL;
- }
- PyTuple_SET_ITEM(tpl, 0, rval);
- PyTuple_SET_ITEM(tpl, 1, pyidx);
- return tpl;
- }
- static PyObject *
- scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
- {
- /* Read the JSON string from PyUnicode pystr.
- end is the index of the first character after the quote.
- if strict is zero then literal control characters are allowed
- *next_end_ptr is a return-by-reference index of the character
- after the end quote
- Return value is a new PyUnicode
- */
- PyObject *rval = NULL;
- Py_ssize_t len;
- Py_ssize_t begin = end - 1;
- Py_ssize_t next /* = begin */;
- const void *buf;
- int kind;
- if (PyUnicode_READY(pystr) == -1)
- return 0;
- _PyUnicodeWriter writer;
- _PyUnicodeWriter_Init(&writer);
- writer.overallocate = 1;
- len = PyUnicode_GET_LENGTH(pystr);
- buf = PyUnicode_DATA(pystr);
- kind = PyUnicode_KIND(pystr);
- if (end < 0 || len < end) {
- PyErr_SetString(PyExc_ValueError, "end is out of bounds");
- goto bail;
- }
- while (1) {
- /* Find the end of the string or the next escape */
- Py_UCS4 c;
- {
- // Use tight scope variable to help register allocation.
- Py_UCS4 d = 0;
- for (next = end; next < len; next++) {
- d = PyUnicode_READ(kind, buf, next);
- if (d == '"' || d == '\\') {
- break;
- }
- if (d <= 0x1f && strict) {
- raise_errmsg("Invalid control character at", pystr, next);
- goto bail;
- }
- }
- c = d;
- }
- if (c == '"') {
- // Fast path for simple case.
- if (writer.buffer == NULL) {
- PyObject *ret = PyUnicode_Substring(pystr, end, next);
- if (ret == NULL) {
- goto bail;
- }
- *next_end_ptr = next + 1;;
- return ret;
- }
- }
- else if (c != '\\') {
- raise_errmsg("Unterminated string starting at", pystr, begin);
- goto bail;
- }
- /* Pick up this chunk if it's not zero length */
- if (next != end) {
- if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
- goto bail;
- }
- }
- next++;
- if (c == '"') {
- end = next;
- break;
- }
- if (next == len) {
- raise_errmsg("Unterminated string starting at", pystr, begin);
- goto bail;
- }
- c = PyUnicode_READ(kind, buf, next);
- if (c != 'u') {
- /* Non-unicode backslash escapes */
- end = next + 1;
- switch (c) {
- case '"': break;
- case '\\': break;
- case '/': break;
- case 'b': c = '\b'; break;
- case 'f': c = '\f'; break;
- case 'n': c = '\n'; break;
- case 'r': c = '\r'; break;
- case 't': c = '\t'; break;
- default: c = 0;
- }
- if (c == 0) {
- raise_errmsg("Invalid \\escape", pystr, end - 2);
- goto bail;
- }
- }
- else {
- c = 0;
- next++;
- end = next + 4;
- if (end >= len) {
- raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
- goto bail;
- }
- /* Decode 4 hex digits */
- for (; next < end; next++) {
- Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
- c <<= 4;
- switch (digit) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- c |= (digit - '0'); break;
- case 'a': case 'b': case 'c': case 'd': case 'e':
- case 'f':
- c |= (digit - 'a' + 10); break;
- case 'A': case 'B': case 'C': case 'D': case 'E':
- case 'F':
- c |= (digit - 'A' + 10); break;
- default:
- raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
- goto bail;
- }
- }
- /* Surrogate pair */
- if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
- PyUnicode_READ(kind, buf, next++) == '\\' &&
- PyUnicode_READ(kind, buf, next++) == 'u') {
- Py_UCS4 c2 = 0;
- end += 6;
- /* Decode 4 hex digits */
- for (; next < end; next++) {
- Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
- c2 <<= 4;
- switch (digit) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- c2 |= (digit - '0'); break;
- case 'a': case 'b': case 'c': case 'd': case 'e':
- case 'f':
- c2 |= (digit - 'a' + 10); break;
- case 'A': case 'B': case 'C': case 'D': case 'E':
- case 'F':
- c2 |= (digit - 'A' + 10); break;
- default:
- raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
- goto bail;
- }
- }
- if (Py_UNICODE_IS_LOW_SURROGATE(c2))
- c = Py_UNICODE_JOIN_SURROGATES(c, c2);
- else
- end -= 6;
- }
- }
- if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
- goto bail;
- }
- }
- rval = _PyUnicodeWriter_Finish(&writer);
- *next_end_ptr = end;
- return rval;
- bail:
- *next_end_ptr = -1;
- _PyUnicodeWriter_Dealloc(&writer);
- return NULL;
- }
- PyDoc_STRVAR(pydoc_scanstring,
- "scanstring(string, end, strict=True) -> (string, end)\n"
- "\n"
- "Scan the string s for a JSON string. End is the index of the\n"
- "character in s after the quote that started the JSON string.\n"
- "Unescapes all valid JSON string escape sequences and raises ValueError\n"
- "on attempt to decode an invalid string. If strict is False then literal\n"
- "control characters are allowed in the string.\n"
- "\n"
- "Returns a tuple of the decoded string and the index of the character in s\n"
- "after the end quote."
- );
- static PyObject *
- py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
- {
- PyObject *pystr;
- PyObject *rval;
- Py_ssize_t end;
- Py_ssize_t next_end = -1;
- int strict = 1;
- if (!PyArg_ParseTuple(args, "On|p:scanstring", &pystr, &end, &strict)) {
- return NULL;
- }
- if (PyUnicode_Check(pystr)) {
- rval = scanstring_unicode(pystr, end, strict, &next_end);
- }
- else {
- PyErr_Format(PyExc_TypeError,
- "first argument must be a string, not %.80s",
- Py_TYPE(pystr)->tp_name);
- return NULL;
- }
- return _build_rval_index_tuple(rval, next_end);
- }
- PyDoc_STRVAR(pydoc_encode_basestring_ascii,
- "encode_basestring_ascii(string) -> string\n"
- "\n"
- "Return an ASCII-only JSON representation of a Python string"
- );
- static PyObject *
- py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
- {
- PyObject *rval;
- /* Return an ASCII-only JSON representation of a Python string */
- /* METH_O */
- if (PyUnicode_Check(pystr)) {
- rval = ascii_escape_unicode(pystr);
- }
- else {
- PyErr_Format(PyExc_TypeError,
- "first argument must be a string, not %.80s",
- Py_TYPE(pystr)->tp_name);
- return NULL;
- }
- return rval;
- }
- PyDoc_STRVAR(pydoc_encode_basestring,
- "encode_basestring(string) -> string\n"
- "\n"
- "Return a JSON representation of a Python string"
- );
- static PyObject *
- py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
- {
- PyObject *rval;
- /* Return a JSON representation of a Python string */
- /* METH_O */
- if (PyUnicode_Check(pystr)) {
- rval = escape_unicode(pystr);
- }
- else {
- PyErr_Format(PyExc_TypeError,
- "first argument must be a string, not %.80s",
- Py_TYPE(pystr)->tp_name);
- return NULL;
- }
- return rval;
- }
- static void
- scanner_dealloc(PyObject *self)
- {
- PyTypeObject *tp = Py_TYPE(self);
- /* bpo-31095: UnTrack is needed before calling any callbacks */
- PyObject_GC_UnTrack(self);
- scanner_clear((PyScannerObject *)self);
- tp->tp_free(self);
- Py_DECREF(tp);
- }
- static int
- scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
- {
- Py_VISIT(Py_TYPE(self));
- Py_VISIT(self->object_hook);
- Py_VISIT(self->object_pairs_hook);
- Py_VISIT(self->parse_float);
- Py_VISIT(self->parse_int);
- Py_VISIT(self->parse_constant);
- Py_VISIT(self->memo);
- return 0;
- }
- static int
- scanner_clear(PyScannerObject *self)
- {
- Py_CLEAR(self->object_hook);
- Py_CLEAR(self->object_pairs_hook);
- Py_CLEAR(self->parse_float);
- Py_CLEAR(self->parse_int);
- Py_CLEAR(self->parse_constant);
- Py_CLEAR(self->memo);
- return 0;
- }
- static PyObject *
- _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
- {
- /* Read a JSON object from PyUnicode pystr.
- idx is the index of the first character after the opening curly brace.
- *next_idx_ptr is a return-by-reference index to the first character after
- the closing curly brace.
- Returns a new PyObject (usually a dict, but object_hook can change that)
- */
- const void *str;
- int kind;
- Py_ssize_t end_idx;
- PyObject *val = NULL;
- PyObject *rval = NULL;
- PyObject *key = NULL;
- int has_pairs_hook = (s->object_pairs_hook != Py_None);
- Py_ssize_t next_idx;
- if (PyUnicode_READY(pystr) == -1)
- return NULL;
- str = PyUnicode_DATA(pystr);
- kind = PyUnicode_KIND(pystr);
- end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
- if (has_pairs_hook)
- rval = PyList_New(0);
- else
- rval = PyDict_New();
- if (rval == NULL)
- return NULL;
- /* skip whitespace after { */
- while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
- /* only loop if the object is non-empty */
- if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
- while (1) {
- PyObject *memokey;
- /* read key */
- if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
- raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
- goto bail;
- }
- key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
- if (key == NULL)
- goto bail;
- memokey = PyDict_SetDefault(s->memo, key, key);
- if (memokey == NULL) {
- goto bail;
- }
- Py_SETREF(key, Py_NewRef(memokey));
- idx = next_idx;
- /* skip whitespace between key and : delimiter, read :, skip whitespace */
- while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
- if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
- raise_errmsg("Expecting ':' delimiter", pystr, idx);
- goto bail;
- }
- idx++;
- while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
- /* read any JSON term */
- val = scan_once_unicode(s, pystr, idx, &next_idx);
- if (val == NULL)
- goto bail;
- if (has_pairs_hook) {
- PyObject *item = PyTuple_Pack(2, key, val);
- if (item == NULL)
- goto bail;
- Py_CLEAR(key);
- Py_CLEAR(val);
- if (PyList_Append(rval, item) == -1) {
- Py_DECREF(item);
- goto bail;
- }
- Py_DECREF(item);
- }
- else {
- if (PyDict_SetItem(rval, key, val) < 0)
- goto bail;
- Py_CLEAR(key);
- Py_CLEAR(val);
- }
- idx = next_idx;
- /* skip whitespace before } or , */
- while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
- /* bail if the object is closed or we didn't get the , delimiter */
- if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
- break;
- if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
- raise_errmsg("Expecting ',' delimiter", pystr, idx);
- goto bail;
- }
- idx++;
- /* skip whitespace after , delimiter */
- while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
- }
- }
- *next_idx_ptr = idx + 1;
- if (has_pairs_hook) {
- val = PyObject_CallOneArg(s->object_pairs_hook, rval);
- Py_DECREF(rval);
- return val;
- }
- /* if object_hook is not None: rval = object_hook(rval) */
- if (s->object_hook != Py_None) {
- val = PyObject_CallOneArg(s->object_hook, rval);
- Py_DECREF(rval);
- return val;
- }
- return rval;
- bail:
- Py_XDECREF(key);
- Py_XDECREF(val);
- Py_XDECREF(rval);
- return NULL;
- }
- static PyObject *
- _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
- /* Read a JSON array from PyUnicode pystr.
- idx is the index of the first character after the opening brace.
- *next_idx_ptr is a return-by-reference index to the first character after
- the closing brace.
- Returns a new PyList
- */
- const void *str;
- int kind;
- Py_ssize_t end_idx;
- PyObject *val = NULL;
- PyObject *rval;
- Py_ssize_t next_idx;
- if (PyUnicode_READY(pystr) == -1)
- return NULL;
- rval = PyList_New(0);
- if (rval == NULL)
- return NULL;
- str = PyUnicode_DATA(pystr);
- kind = PyUnicode_KIND(pystr);
- end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
- /* skip whitespace after [ */
- while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
- /* only loop if the array is non-empty */
- if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
- while (1) {
- /* read any JSON term */
- val = scan_once_unicode(s, pystr, idx, &next_idx);
- if (val == NULL)
- goto bail;
- if (PyList_Append(rval, val) == -1)
- goto bail;
- Py_CLEAR(val);
- idx = next_idx;
- /* skip whitespace between term and , */
- while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
- /* bail if the array is closed or we didn't get the , delimiter */
- if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
- break;
- if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
- raise_errmsg("Expecting ',' delimiter", pystr, idx);
- goto bail;
- }
- idx++;
- /* skip whitespace after , */
- while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
- }
- }
- /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
- if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
- raise_errmsg("Expecting value", pystr, end_idx);
- goto bail;
- }
- *next_idx_ptr = idx + 1;
- return rval;
- bail:
- Py_XDECREF(val);
- Py_DECREF(rval);
- return NULL;
- }
- static PyObject *
- _parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
- /* Read a JSON constant.
- constant is the constant string that was found
- ("NaN", "Infinity", "-Infinity").
- idx is the index of the first character of the constant
- *next_idx_ptr is a return-by-reference index to the first character after
- the constant.
- Returns the result of parse_constant
- */
- PyObject *cstr;
- PyObject *rval;
- /* constant is "NaN", "Infinity", or "-Infinity" */
- cstr = PyUnicode_InternFromString(constant);
- if (cstr == NULL)
- return NULL;
- /* rval = parse_constant(constant) */
- rval = PyObject_CallOneArg(s->parse_constant, cstr);
- idx += PyUnicode_GET_LENGTH(cstr);
- Py_DECREF(cstr);
- *next_idx_ptr = idx;
- return rval;
- }
- static PyObject *
- _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
- /* Read a JSON number from PyUnicode pystr.
- idx is the index of the first character of the number
- *next_idx_ptr is a return-by-reference index to the first character after
- the number.
- Returns a new PyObject representation of that number:
- PyLong, or PyFloat.
- May return other types if parse_int or parse_float are set
- */
- const void *str;
- int kind;
- Py_ssize_t end_idx;
- Py_ssize_t idx = start;
- int is_float = 0;
- PyObject *rval;
- PyObject *numstr = NULL;
- PyObject *custom_func;
- if (PyUnicode_READY(pystr) == -1)
- return NULL;
- str = PyUnicode_DATA(pystr);
- kind = PyUnicode_KIND(pystr);
- end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
- /* read a sign if it's there, make sure it's not the end of the string */
- if (PyUnicode_READ(kind, str, idx) == '-') {
- idx++;
- if (idx > end_idx) {
- raise_stop_iteration(start);
- return NULL;
- }
- }
- /* read as many integer digits as we find as long as it doesn't start with 0 */
- if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
- idx++;
- while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
- }
- /* if it starts with 0 we only expect one integer digit */
- else if (PyUnicode_READ(kind, str, idx) == '0') {
- idx++;
- }
- /* no integer digits, error */
- else {
- raise_stop_iteration(start);
- return NULL;
- }
- /* if the next char is '.' followed by a digit then read all float digits */
- if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
- is_float = 1;
- idx += 2;
- while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
- }
- /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
- if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
- Py_ssize_t e_start = idx;
- idx++;
- /* read an exponent sign if present */
- if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
- /* read all digits */
- while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
- /* if we got a digit, then parse as float. if not, backtrack */
- if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
- is_float = 1;
- }
- else {
- idx = e_start;
- }
- }
- if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
- custom_func = s->parse_float;
- else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
- custom_func = s->parse_int;
- else
- custom_func = NULL;
- if (custom_func) {
- /* copy the section we determined to be a number */
- numstr = PyUnicode_FromKindAndData(kind,
- (char*)str + kind * start,
- idx - start);
- if (numstr == NULL)
- return NULL;
- rval = PyObject_CallOneArg(custom_func, numstr);
- }
- else {
- Py_ssize_t i, n;
- char *buf;
- /* Straight conversion to ASCII, to avoid costly conversion of
- decimal unicode digits (which cannot appear here) */
- n = idx - start;
- numstr = PyBytes_FromStringAndSize(NULL, n);
- if (numstr == NULL)
- return NULL;
- buf = PyBytes_AS_STRING(numstr);
- for (i = 0; i < n; i++) {
- buf[i] = (char) PyUnicode_READ(kind, str, i + start);
- }
- if (is_float)
- rval = PyFloat_FromString(numstr);
- else
- rval = PyLong_FromString(buf, NULL, 10);
- }
- Py_DECREF(numstr);
- *next_idx_ptr = idx;
- return rval;
- }
- static PyObject *
- scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
- {
- /* Read one JSON term (of any kind) from PyUnicode pystr.
- idx is the index of the first character of the term
- *next_idx_ptr is a return-by-reference index to the first character after
- the number.
- Returns a new PyObject representation of the term.
- */
- PyObject *res;
- const void *str;
- int kind;
- Py_ssize_t length;
- if (PyUnicode_READY(pystr) == -1)
- return NULL;
- str = PyUnicode_DATA(pystr);
- kind = PyUnicode_KIND(pystr);
- length = PyUnicode_GET_LENGTH(pystr);
- if (idx < 0) {
- PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
- return NULL;
- }
- if (idx >= length) {
- raise_stop_iteration(idx);
- return NULL;
- }
- switch (PyUnicode_READ(kind, str, idx)) {
- case '"':
- /* string */
- return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
- case '{':
- /* object */
- if (_Py_EnterRecursiveCall(" while decoding a JSON object "
- "from a unicode string"))
- return NULL;
- res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
- _Py_LeaveRecursiveCall();
- return res;
- case '[':
- /* array */
- if (_Py_EnterRecursiveCall(" while decoding a JSON array "
- "from a unicode string"))
- return NULL;
- res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
- _Py_LeaveRecursiveCall();
- return res;
- case 'n':
- /* null */
- if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
- *next_idx_ptr = idx + 4;
- Py_RETURN_NONE;
- }
- break;
- case 't':
- /* true */
- if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
- *next_idx_ptr = idx + 4;
- Py_RETURN_TRUE;
- }
- break;
- case 'f':
- /* false */
- if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
- PyUnicode_READ(kind, str, idx + 2) == 'l' &&
- PyUnicode_READ(kind, str, idx + 3) == 's' &&
- PyUnicode_READ(kind, str, idx + 4) == 'e') {
- *next_idx_ptr = idx + 5;
- Py_RETURN_FALSE;
- }
- break;
- case 'N':
- /* NaN */
- if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
- PyUnicode_READ(kind, str, idx + 2) == 'N') {
- return _parse_constant(s, "NaN", idx, next_idx_ptr);
- }
- break;
- case 'I':
- /* Infinity */
- if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
- PyUnicode_READ(kind, str, idx + 2) == 'f' &&
- PyUnicode_READ(kind, str, idx + 3) == 'i' &&
- PyUnicode_READ(kind, str, idx + 4) == 'n' &&
- PyUnicode_READ(kind, str, idx + 5) == 'i' &&
- PyUnicode_READ(kind, str, idx + 6) == 't' &&
- PyUnicode_READ(kind, str, idx + 7) == 'y') {
- return _parse_constant(s, "Infinity", idx, next_idx_ptr);
- }
- break;
- case '-':
- /* -Infinity */
- if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
- PyUnicode_READ(kind, str, idx + 2) == 'n' &&
- PyUnicode_READ(kind, str, idx + 3) == 'f' &&
- PyUnicode_READ(kind, str, idx + 4) == 'i' &&
- PyUnicode_READ(kind, str, idx + 5) == 'n' &&
- PyUnicode_READ(kind, str, idx + 6) == 'i' &&
- PyUnicode_READ(kind, str, idx + 7) == 't' &&
- PyUnicode_READ(kind, str, idx + 8) == 'y') {
- return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
- }
- break;
- }
- /* Didn't find a string, object, array, or named constant. Look for a number. */
- return _match_number_unicode(s, pystr, idx, next_idx_ptr);
- }
- static PyObject *
- scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
- {
- /* Python callable interface to scan_once_{str,unicode} */
- PyObject *pystr;
- PyObject *rval;
- Py_ssize_t idx;
- Py_ssize_t next_idx = -1;
- static char *kwlist[] = {"string", "idx", NULL};
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
- return NULL;
- if (PyUnicode_Check(pystr)) {
- rval = scan_once_unicode(self, pystr, idx, &next_idx);
- }
- else {
- PyErr_Format(PyExc_TypeError,
- "first argument must be a string, not %.80s",
- Py_TYPE(pystr)->tp_name);
- return NULL;
- }
- PyDict_Clear(self->memo);
- if (rval == NULL)
- return NULL;
- return _build_rval_index_tuple(rval, next_idx);
- }
- static PyObject *
- scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
- {
- PyScannerObject *s;
- PyObject *ctx;
- PyObject *strict;
- static char *kwlist[] = {"context", NULL};
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
- return NULL;
- s = (PyScannerObject *)type->tp_alloc(type, 0);
- if (s == NULL) {
- return NULL;
- }
- s->memo = PyDict_New();
- if (s->memo == NULL)
- goto bail;
- /* All of these will fail "gracefully" so we don't need to verify them */
- strict = PyObject_GetAttrString(ctx, "strict");
- if (strict == NULL)
- goto bail;
- s->strict = PyObject_IsTrue(strict);
- Py_DECREF(strict);
- if (s->strict < 0)
- goto bail;
- s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
- if (s->object_hook == NULL)
- goto bail;
- s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
- if (s->object_pairs_hook == NULL)
- goto bail;
- s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
- if (s->parse_float == NULL)
- goto bail;
- s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
- if (s->parse_int == NULL)
- goto bail;
- s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
- if (s->parse_constant == NULL)
- goto bail;
- return (PyObject *)s;
- bail:
- Py_DECREF(s);
- return NULL;
- }
- PyDoc_STRVAR(scanner_doc, "JSON scanner object");
- static PyType_Slot PyScannerType_slots[] = {
- {Py_tp_doc, (void *)scanner_doc},
- {Py_tp_dealloc, scanner_dealloc},
- {Py_tp_call, scanner_call},
- {Py_tp_traverse, scanner_traverse},
- {Py_tp_clear, scanner_clear},
- {Py_tp_members, scanner_members},
- {Py_tp_new, scanner_new},
- {0, 0}
- };
- static PyType_Spec PyScannerType_spec = {
- .name = "_json.Scanner",
- .basicsize = sizeof(PyScannerObject),
- .itemsize = 0,
- .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
- .slots = PyScannerType_slots,
- };
- static PyObject *
- encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
- {
- static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
- PyEncoderObject *s;
- PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
- PyObject *item_separator;
- int sort_keys, skipkeys, allow_nan;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
- &markers, &defaultfn, &encoder, &indent,
- &key_separator, &item_separator,
- &sort_keys, &skipkeys, &allow_nan))
- return NULL;
- if (markers != Py_None && !PyDict_Check(markers)) {
- PyErr_Format(PyExc_TypeError,
- "make_encoder() argument 1 must be dict or None, "
- "not %.200s", Py_TYPE(markers)->tp_name);
- return NULL;
- }
- s = (PyEncoderObject *)type->tp_alloc(type, 0);
- if (s == NULL)
- return NULL;
- s->markers = Py_NewRef(markers);
- s->defaultfn = Py_NewRef(defaultfn);
- s->encoder = Py_NewRef(encoder);
- s->indent = Py_NewRef(indent);
- s->key_separator = Py_NewRef(key_separator);
- s->item_separator = Py_NewRef(item_separator);
- s->sort_keys = sort_keys;
- s->skipkeys = skipkeys;
- s->allow_nan = allow_nan;
- s->fast_encode = NULL;
- if (PyCFunction_Check(s->encoder)) {
- PyCFunction f = PyCFunction_GetFunction(s->encoder);
- if (f == (PyCFunction)py_encode_basestring_ascii ||
- f == (PyCFunction)py_encode_basestring) {
- s->fast_encode = f;
- }
- }
- return (PyObject *)s;
- }
- static PyObject *
- encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
- {
- /* Python callable interface to encode_listencode_obj */
- static char *kwlist[] = {"obj", "_current_indent_level", NULL};
- PyObject *obj, *result;
- Py_ssize_t indent_level;
- _PyUnicodeWriter writer;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
- &obj, &indent_level))
- return NULL;
- _PyUnicodeWriter_Init(&writer);
- writer.overallocate = 1;
- if (encoder_listencode_obj(self, &writer, obj, indent_level)) {
- _PyUnicodeWriter_Dealloc(&writer);
- return NULL;
- }
- result = PyTuple_New(1);
- if (result == NULL ||
- PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) {
- Py_XDECREF(result);
- return NULL;
- }
- return result;
- }
- static PyObject *
- _encoded_const(PyObject *obj)
- {
- /* Return the JSON string representation of None, True, False */
- if (obj == Py_None) {
- return Py_NewRef(&_Py_ID(null));
- }
- else if (obj == Py_True) {
- return Py_NewRef(&_Py_ID(true));
- }
- else if (obj == Py_False) {
- return Py_NewRef(&_Py_ID(false));
- }
- else {
- PyErr_SetString(PyExc_ValueError, "not a const");
- return NULL;
- }
- }
- static PyObject *
- encoder_encode_float(PyEncoderObject *s, PyObject *obj)
- {
- /* Return the JSON representation of a PyFloat. */
- double i = PyFloat_AS_DOUBLE(obj);
- if (!Py_IS_FINITE(i)) {
- if (!s->allow_nan) {
- PyErr_Format(
- PyExc_ValueError,
- "Out of range float values are not JSON compliant: %R",
- obj
- );
- return NULL;
- }
- if (i > 0) {
- return PyUnicode_FromString("Infinity");
- }
- else if (i < 0) {
- return PyUnicode_FromString("-Infinity");
- }
- else {
- return PyUnicode_FromString("NaN");
- }
- }
- return PyFloat_Type.tp_repr(obj);
- }
- static PyObject *
- encoder_encode_string(PyEncoderObject *s, PyObject *obj)
- {
- /* Return the JSON representation of a string */
- PyObject *encoded;
- if (s->fast_encode) {
- return s->fast_encode(NULL, obj);
- }
- encoded = PyObject_CallOneArg(s->encoder, obj);
- if (encoded != NULL && !PyUnicode_Check(encoded)) {
- PyErr_Format(PyExc_TypeError,
- "encoder() must return a string, not %.80s",
- Py_TYPE(encoded)->tp_name);
- Py_DECREF(encoded);
- return NULL;
- }
- return encoded;
- }
- static int
- _steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
- {
- /* Append stolen and then decrement its reference count */
- int rval = _PyUnicodeWriter_WriteStr(writer, stolen);
- Py_DECREF(stolen);
- return rval;
- }
- static int
- encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
- PyObject *obj, Py_ssize_t indent_level)
- {
- /* Encode Python object obj to a JSON term */
- PyObject *newobj;
- int rv;
- if (obj == Py_None) {
- return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4);
- }
- else if (obj == Py_True) {
- return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4);
- }
- else if (obj == Py_False) {
- return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5);
- }
- else if (PyUnicode_Check(obj)) {
- PyObject *encoded = encoder_encode_string(s, obj);
- if (encoded == NULL)
- return -1;
- return _steal_accumulate(writer, encoded);
- }
- else if (PyLong_Check(obj)) {
- PyObject *encoded = PyLong_Type.tp_repr(obj);
- if (encoded == NULL)
- return -1;
- return _steal_accumulate(writer, encoded);
- }
- else if (PyFloat_Check(obj)) {
- PyObject *encoded = encoder_encode_float(s, obj);
- if (encoded == NULL)
- return -1;
- return _steal_accumulate(writer, encoded);
- }
- else if (PyList_Check(obj) || PyTuple_Check(obj)) {
- if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
- return -1;
- rv = encoder_listencode_list(s, writer, obj, indent_level);
- _Py_LeaveRecursiveCall();
- return rv;
- }
- else if (PyDict_Check(obj)) {
- if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
- return -1;
- rv = encoder_listencode_dict(s, writer, obj, indent_level);
- _Py_LeaveRecursiveCall();
- return rv;
- }
- else {
- PyObject *ident = NULL;
- if (s->markers != Py_None) {
- int has_key;
- ident = PyLong_FromVoidPtr(obj);
- if (ident == NULL)
- return -1;
- has_key = PyDict_Contains(s->markers, ident);
- if (has_key) {
- if (has_key != -1)
- PyErr_SetString(PyExc_ValueError, "Circular reference detected");
- Py_DECREF(ident);
- return -1;
- }
- if (PyDict_SetItem(s->markers, ident, obj)) {
- Py_DECREF(ident);
- return -1;
- }
- }
- newobj = PyObject_CallOneArg(s->defaultfn, obj);
- if (newobj == NULL) {
- Py_XDECREF(ident);
- return -1;
- }
- if (_Py_EnterRecursiveCall(" while encoding a JSON object")) {
- Py_DECREF(newobj);
- Py_XDECREF(ident);
- return -1;
- }
- rv = encoder_listencode_obj(s, writer, newobj, indent_level);
- _Py_LeaveRecursiveCall();
- Py_DECREF(newobj);
- if (rv) {
- Py_XDECREF(ident);
- return -1;
- }
- if (ident != NULL) {
- if (PyDict_DelItem(s->markers, ident)) {
- Py_XDECREF(ident);
- return -1;
- }
- Py_XDECREF(ident);
- }
- return rv;
- }
- }
- static int
- encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
- PyObject *key, PyObject *value, Py_ssize_t indent_level)
- {
- PyObject *keystr = NULL;
- PyObject *encoded;
- if (PyUnicode_Check(key)) {
- keystr = Py_NewRef(key);
- }
- else if (PyFloat_Check(key)) {
- keystr = encoder_encode_float(s, key);
- }
- else if (key == Py_True || key == Py_False || key == Py_None) {
- /* This must come before the PyLong_Check because
- True and False are also 1 and 0.*/
- keystr = _encoded_const(key);
- }
- else if (PyLong_Check(key)) {
- keystr = PyLong_Type.tp_repr(key);
- }
- else if (s->skipkeys) {
- return 0;
- }
- else {
- PyErr_Format(PyExc_TypeError,
- "keys must be str, int, float, bool or None, "
- "not %.100s", Py_TYPE(key)->tp_name);
- return -1;
- }
- if (keystr == NULL) {
- return -1;
- }
- if (*first) {
- *first = false;
- }
- else {
- if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
- Py_DECREF(keystr);
- return -1;
- }
- }
- encoded = encoder_encode_string(s, keystr);
- Py_DECREF(keystr);
- if (encoded == NULL) {
- return -1;
- }
- if (_steal_accumulate(writer, encoded) < 0) {
- return -1;
- }
- if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
- return -1;
- }
- if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
- return -1;
- }
- return 0;
- }
- static int
- encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
- PyObject *dct, Py_ssize_t indent_level)
- {
- /* Encode Python dict dct a JSON term */
- PyObject *ident = NULL;
- PyObject *items = NULL;
- PyObject *key, *value;
- bool first = true;
- if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
- return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
- if (s->markers != Py_None) {
- int has_key;
- ident = PyLong_FromVoidPtr(dct);
- if (ident == NULL)
- goto bail;
- has_key = PyDict_Contains(s->markers, ident);
- if (has_key) {
- if (has_key != -1)
- PyErr_SetString(PyExc_ValueError, "Circular reference detected");
- goto bail;
- }
- if (PyDict_SetItem(s->markers, ident, dct)) {
- goto bail;
- }
- }
- if (_PyUnicodeWriter_WriteChar(writer, '{'))
- goto bail;
- if (s->indent != Py_None) {
- /* TODO: DOES NOT RUN */
- indent_level += 1;
- /*
- newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
- separator = _item_separator + newline_indent
- buf += newline_indent
- */
- }
- if (s->sort_keys || !PyDict_CheckExact(dct)) {
- items = PyMapping_Items(dct);
- if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0))
- goto bail;
- for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
- PyObject *item = PyList_GET_ITEM(items, i);
- if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
- PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
- goto bail;
- }
- key = PyTuple_GET_ITEM(item, 0);
- value = PyTuple_GET_ITEM(item, 1);
- if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
- goto bail;
- }
- Py_CLEAR(items);
- } else {
- Py_ssize_t pos = 0;
- while (PyDict_Next(dct, &pos, &key, &value)) {
- if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
- goto bail;
- }
- }
- if (ident != NULL) {
- if (PyDict_DelItem(s->markers, ident))
- goto bail;
- Py_CLEAR(ident);
- }
- /* TODO DOES NOT RUN; dead code
- if (s->indent != Py_None) {
- indent_level -= 1;
- yield '\n' + (' ' * (_indent * _current_indent_level))
- }*/
- if (_PyUnicodeWriter_WriteChar(writer, '}'))
- goto bail;
- return 0;
- bail:
- Py_XDECREF(items);
- Py_XDECREF(ident);
- return -1;
- }
- static int
- encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
- PyObject *seq, Py_ssize_t indent_level)
- {
- PyObject *ident = NULL;
- PyObject *s_fast = NULL;
- Py_ssize_t i;
- ident = NULL;
- s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
- if (s_fast == NULL)
- return -1;
- if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
- Py_DECREF(s_fast);
- return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2);
- }
- if (s->markers != Py_None) {
- int has_key;
- ident = PyLong_FromVoidPtr(seq);
- if (ident == NULL)
- goto bail;
- has_key = PyDict_Contains(s->markers, ident);
- if (has_key) {
- if (has_key != -1)
- PyErr_SetString(PyExc_ValueError, "Circular reference detected");
- goto bail;
- }
- if (PyDict_SetItem(s->markers, ident, seq)) {
- goto bail;
- }
- }
- if (_PyUnicodeWriter_WriteChar(writer, '['))
- goto bail;
- if (s->indent != Py_None) {
- /* TODO: DOES NOT RUN */
- indent_level += 1;
- /*
- newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
- separator = _item_separator + newline_indent
- buf += newline_indent
- */
- }
- for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
- PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
- if (i) {
- if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
- goto bail;
- }
- if (encoder_listencode_obj(s, writer, obj, indent_level))
- goto bail;
- }
- if (ident != NULL) {
- if (PyDict_DelItem(s->markers, ident))
- goto bail;
- Py_CLEAR(ident);
- }
- /* TODO: DOES NOT RUN
- if (s->indent != Py_None) {
- indent_level -= 1;
- yield '\n' + (' ' * (_indent * _current_indent_level))
- }*/
- if (_PyUnicodeWriter_WriteChar(writer, ']'))
- goto bail;
- Py_DECREF(s_fast);
- return 0;
- bail:
- Py_XDECREF(ident);
- Py_DECREF(s_fast);
- return -1;
- }
- static void
- encoder_dealloc(PyObject *self)
- {
- PyTypeObject *tp = Py_TYPE(self);
- /* bpo-31095: UnTrack is needed before calling any callbacks */
- PyObject_GC_UnTrack(self);
- encoder_clear((PyEncoderObject *)self);
- tp->tp_free(self);
- Py_DECREF(tp);
- }
- static int
- encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
- {
- Py_VISIT(Py_TYPE(self));
- Py_VISIT(self->markers);
- Py_VISIT(self->defaultfn);
- Py_VISIT(self->encoder);
- Py_VISIT(self->indent);
- Py_VISIT(self->key_separator);
- Py_VISIT(self->item_separator);
- return 0;
- }
- static int
- encoder_clear(PyEncoderObject *self)
- {
- /* Deallocate Encoder */
- Py_CLEAR(self->markers);
- Py_CLEAR(self->defaultfn);
- Py_CLEAR(self->encoder);
- Py_CLEAR(self->indent);
- Py_CLEAR(self->key_separator);
- Py_CLEAR(self->item_separator);
- return 0;
- }
- PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
- static PyType_Slot PyEncoderType_slots[] = {
- {Py_tp_doc, (void *)encoder_doc},
- {Py_tp_dealloc, encoder_dealloc},
- {Py_tp_call, encoder_call},
- {Py_tp_traverse, encoder_traverse},
- {Py_tp_clear, encoder_clear},
- {Py_tp_members, encoder_members},
- {Py_tp_new, encoder_new},
- {0, 0}
- };
- static PyType_Spec PyEncoderType_spec = {
- .name = "_json.Encoder",
- .basicsize = sizeof(PyEncoderObject),
- .itemsize = 0,
- .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
- .slots = PyEncoderType_slots
- };
- static PyMethodDef speedups_methods[] = {
- {"encode_basestring_ascii",
- (PyCFunction)py_encode_basestring_ascii,
- METH_O,
- pydoc_encode_basestring_ascii},
- {"encode_basestring",
- (PyCFunction)py_encode_basestring,
- METH_O,
- pydoc_encode_basestring},
- {"scanstring",
- (PyCFunction)py_scanstring,
- METH_VARARGS,
- pydoc_scanstring},
- {NULL, NULL, 0, NULL}
- };
- PyDoc_STRVAR(module_doc,
- "json speedups\n");
- static int
- _json_exec(PyObject *module)
- {
- PyObject *PyScannerType = PyType_FromSpec(&PyScannerType_spec);
- if (PyScannerType == NULL) {
- return -1;
- }
- int rc = PyModule_AddObjectRef(module, "make_scanner", PyScannerType);
- Py_DECREF(PyScannerType);
- if (rc < 0) {
- return -1;
- }
- PyObject *PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
- if (PyEncoderType == NULL) {
- return -1;
- }
- rc = PyModule_AddObjectRef(module, "make_encoder", PyEncoderType);
- Py_DECREF(PyEncoderType);
- if (rc < 0) {
- return -1;
- }
- return 0;
- }
- static PyModuleDef_Slot _json_slots[] = {
- {Py_mod_exec, _json_exec},
- {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
- {0, NULL}
- };
- static struct PyModuleDef jsonmodule = {
- .m_base = PyModuleDef_HEAD_INIT,
- .m_name = "_json",
- .m_doc = module_doc,
- .m_methods = speedups_methods,
- .m_slots = _json_slots,
- };
- PyMODINIT_FUNC
- PyInit__json(void)
- {
- return PyModuleDef_Init(&jsonmodule);
- }
|