_speedups.c 106 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408
  1. /* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */
  2. #include "Python.h"
  3. #include "structmember.h"
  4. #if PY_MAJOR_VERSION >= 3
  5. #define PyInt_FromSsize_t PyLong_FromSsize_t
  6. #define PyInt_AsSsize_t PyLong_AsSsize_t
  7. #define PyInt_Check(obj) 0
  8. #define PyInt_CheckExact(obj) 0
  9. #define JSON_UNICHR Py_UCS4
  10. #define JSON_InternFromString PyUnicode_InternFromString
  11. #define PyString_GET_SIZE PyUnicode_GET_LENGTH
  12. #define PY2_UNUSED
  13. #define PY3_UNUSED UNUSED
  14. #else /* PY_MAJOR_VERSION >= 3 */
  15. #define PY2_UNUSED UNUSED
  16. #define PY3_UNUSED
  17. #define PyBytes_Check PyString_Check
  18. #define PyUnicode_READY(obj) 0
  19. #define PyUnicode_KIND(obj) (sizeof(Py_UNICODE))
  20. #define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj)))
  21. #define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)])
  22. #define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE
  23. #define JSON_UNICHR Py_UNICODE
  24. #define JSON_InternFromString PyString_InternFromString
  25. #endif /* PY_MAJOR_VERSION < 3 */
  26. #if PY_VERSION_HEX < 0x03090000
  27. #if !defined(PyObject_CallNoArgs)
  28. #define PyObject_CallNoArgs(callable) PyObject_CallFunctionObjArgs(callable, NULL);
  29. #endif
  30. #if !defined(PyObject_CallOneArg)
  31. #define PyObject_CallOneArg(callable, arg) PyObject_CallFunctionObjArgs(callable, arg, NULL);
  32. #endif
  33. #endif /* PY_VERSION_HEX < 0x03090000 */
  34. #if PY_VERSION_HEX < 0x02070000
  35. #if !defined(PyOS_string_to_double)
  36. #define PyOS_string_to_double json_PyOS_string_to_double
  37. static double
  38. json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
  39. static double
  40. json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception)
  41. {
  42. double x;
  43. assert(endptr == NULL);
  44. assert(overflow_exception == NULL);
  45. PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
  46. x = PyOS_ascii_atof(s);
  47. PyFPE_END_PROTECT(x)
  48. return x;
  49. }
  50. #endif
  51. #endif /* PY_VERSION_HEX < 0x02070000 */
  52. #if PY_VERSION_HEX < 0x02060000
  53. #if !defined(Py_TYPE)
  54. #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
  55. #endif
  56. #if !defined(Py_SIZE)
  57. #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
  58. #endif
  59. #if !defined(PyVarObject_HEAD_INIT)
  60. #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
  61. #endif
  62. #endif /* PY_VERSION_HEX < 0x02060000 */
  63. #ifdef __GNUC__
  64. #define UNUSED __attribute__((__unused__))
  65. #else
  66. #define UNUSED
  67. #endif
  68. #define DEFAULT_ENCODING "utf-8"
  69. #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
  70. #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
  71. #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
  72. #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
  73. #define JSON_ALLOW_NAN 1
  74. #define JSON_IGNORE_NAN 2
  75. static PyObject *JSON_Infinity = NULL;
  76. static PyObject *JSON_NegInfinity = NULL;
  77. static PyObject *JSON_NaN = NULL;
  78. static PyObject *JSON_EmptyUnicode = NULL;
  79. #if PY_MAJOR_VERSION < 3
  80. static PyObject *JSON_EmptyStr = NULL;
  81. #endif
  82. static PyTypeObject PyScannerType;
  83. static PyTypeObject PyEncoderType;
  84. typedef struct {
  85. PyObject *large_strings; /* A list of previously accumulated large strings */
  86. PyObject *small_strings; /* Pending small strings */
  87. } JSON_Accu;
  88. static int
  89. JSON_Accu_Init(JSON_Accu *acc);
  90. static int
  91. JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode);
  92. static PyObject *
  93. JSON_Accu_FinishAsList(JSON_Accu *acc);
  94. static void
  95. JSON_Accu_Destroy(JSON_Accu *acc);
  96. #define ERR_EXPECTING_VALUE "Expecting value"
  97. #define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'"
  98. #define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'"
  99. #define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'"
  100. #define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes"
  101. #define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'"
  102. #define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter"
  103. #define ERR_STRING_UNTERMINATED "Unterminated string starting at"
  104. #define ERR_STRING_CONTROL "Invalid control character %r at"
  105. #define ERR_STRING_ESC1 "Invalid \\X escape sequence %r"
  106. #define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence"
  107. #define FOR_JSON_METHOD_NAME "for_json"
  108. #define ASDICT_METHOD_NAME "_asdict"
  109. typedef struct _PyScannerObject {
  110. PyObject_HEAD
  111. PyObject *encoding;
  112. PyObject *strict_bool;
  113. int strict;
  114. PyObject *object_hook;
  115. PyObject *pairs_hook;
  116. PyObject *parse_float;
  117. PyObject *parse_int;
  118. PyObject *parse_constant;
  119. PyObject *memo;
  120. } PyScannerObject;
  121. static PyMemberDef scanner_members[] = {
  122. {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
  123. {"strict", T_OBJECT, offsetof(PyScannerObject, strict_bool), READONLY, "strict"},
  124. {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
  125. {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
  126. {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
  127. {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
  128. {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
  129. {NULL}
  130. };
  131. typedef struct _PyEncoderObject {
  132. PyObject_HEAD
  133. PyObject *markers;
  134. PyObject *defaultfn;
  135. PyObject *encoder;
  136. PyObject *indent;
  137. PyObject *key_separator;
  138. PyObject *item_separator;
  139. PyObject *sort_keys;
  140. PyObject *key_memo;
  141. PyObject *encoding;
  142. PyObject *Decimal;
  143. PyObject *skipkeys_bool;
  144. int skipkeys;
  145. int fast_encode;
  146. /* 0, JSON_ALLOW_NAN, JSON_IGNORE_NAN */
  147. int allow_or_ignore_nan;
  148. int use_decimal;
  149. int namedtuple_as_object;
  150. int tuple_as_array;
  151. int iterable_as_array;
  152. PyObject *max_long_size;
  153. PyObject *min_long_size;
  154. PyObject *item_sort_key;
  155. PyObject *item_sort_kw;
  156. int for_json;
  157. } PyEncoderObject;
  158. static PyMemberDef encoder_members[] = {
  159. {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
  160. {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
  161. {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
  162. {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"},
  163. {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
  164. {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
  165. {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
  166. {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
  167. /* Python 2.5 does not support T_BOOl */
  168. {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"},
  169. {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
  170. {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"},
  171. {"max_long_size", T_OBJECT, offsetof(PyEncoderObject, max_long_size), READONLY, "max_long_size"},
  172. {"min_long_size", T_OBJECT, offsetof(PyEncoderObject, min_long_size), READONLY, "min_long_size"},
  173. {NULL}
  174. };
  175. static PyObject *
  176. join_list_unicode(PyObject *lst);
  177. static PyObject *
  178. JSON_ParseEncoding(PyObject *encoding);
  179. static PyObject *
  180. maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj);
  181. static Py_ssize_t
  182. ascii_char_size(JSON_UNICHR c);
  183. static Py_ssize_t
  184. ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars);
  185. static PyObject *
  186. ascii_escape_unicode(PyObject *pystr);
  187. static PyObject *
  188. ascii_escape_str(PyObject *pystr);
  189. static PyObject *
  190. py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
  191. #if PY_MAJOR_VERSION < 3
  192. static PyObject *
  193. join_list_string(PyObject *lst);
  194. static PyObject *
  195. scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
  196. static PyObject *
  197. scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr);
  198. static PyObject *
  199. _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
  200. #endif
  201. static PyObject *
  202. scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr);
  203. static PyObject *
  204. scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
  205. static PyObject *
  206. _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
  207. static PyObject *
  208. scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
  209. static void
  210. scanner_dealloc(PyObject *self);
  211. static int
  212. scanner_clear(PyObject *self);
  213. static PyObject *
  214. encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
  215. static void
  216. encoder_dealloc(PyObject *self);
  217. static int
  218. encoder_clear(PyObject *self);
  219. static int
  220. is_raw_json(PyObject *obj);
  221. static PyObject *
  222. encoder_stringify_key(PyEncoderObject *s, PyObject *key);
  223. static int
  224. encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level);
  225. static int
  226. encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level);
  227. static int
  228. encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level);
  229. static PyObject *
  230. _encoded_const(PyObject *obj);
  231. static void
  232. raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
  233. static PyObject *
  234. encoder_encode_string(PyEncoderObject *s, PyObject *obj);
  235. static int
  236. _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
  237. static PyObject *
  238. _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
  239. static int
  240. _call_json_method(PyObject *obj, const char *method_name, PyObject **result);
  241. static PyObject *
  242. encoder_encode_float(PyEncoderObject *s, PyObject *obj);
  243. static PyObject *
  244. moduleinit(void);
  245. #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
  246. #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
  247. #define MIN_EXPANSION 6
  248. static PyObject* RawJSONType = NULL;
  249. static int
  250. is_raw_json(PyObject *obj)
  251. {
  252. return PyObject_IsInstance(obj, RawJSONType) ? 1 : 0;
  253. }
  254. static int
  255. JSON_Accu_Init(JSON_Accu *acc)
  256. {
  257. /* Lazily allocated */
  258. acc->large_strings = NULL;
  259. acc->small_strings = PyList_New(0);
  260. if (acc->small_strings == NULL)
  261. return -1;
  262. return 0;
  263. }
  264. static int
  265. flush_accumulator(JSON_Accu *acc)
  266. {
  267. Py_ssize_t nsmall = PyList_GET_SIZE(acc->small_strings);
  268. if (nsmall) {
  269. int ret;
  270. PyObject *joined;
  271. if (acc->large_strings == NULL) {
  272. acc->large_strings = PyList_New(0);
  273. if (acc->large_strings == NULL)
  274. return -1;
  275. }
  276. #if PY_MAJOR_VERSION >= 3
  277. joined = join_list_unicode(acc->small_strings);
  278. #else /* PY_MAJOR_VERSION >= 3 */
  279. joined = join_list_string(acc->small_strings);
  280. #endif /* PY_MAJOR_VERSION < 3 */
  281. if (joined == NULL)
  282. return -1;
  283. if (PyList_SetSlice(acc->small_strings, 0, nsmall, NULL)) {
  284. Py_DECREF(joined);
  285. return -1;
  286. }
  287. ret = PyList_Append(acc->large_strings, joined);
  288. Py_DECREF(joined);
  289. return ret;
  290. }
  291. return 0;
  292. }
  293. static int
  294. JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode)
  295. {
  296. Py_ssize_t nsmall;
  297. #if PY_MAJOR_VERSION >= 3
  298. assert(PyUnicode_Check(unicode));
  299. #else /* PY_MAJOR_VERSION >= 3 */
  300. assert(PyString_Check(unicode) || PyUnicode_Check(unicode));
  301. #endif /* PY_MAJOR_VERSION < 3 */
  302. if (PyList_Append(acc->small_strings, unicode))
  303. return -1;
  304. nsmall = PyList_GET_SIZE(acc->small_strings);
  305. /* Each item in a list of unicode objects has an overhead (in 64-bit
  306. * builds) of:
  307. * - 8 bytes for the list slot
  308. * - 56 bytes for the header of the unicode object
  309. * that is, 64 bytes. 100000 such objects waste more than 6MB
  310. * compared to a single concatenated string.
  311. */
  312. if (nsmall < 100000)
  313. return 0;
  314. return flush_accumulator(acc);
  315. }
  316. static PyObject *
  317. JSON_Accu_FinishAsList(JSON_Accu *acc)
  318. {
  319. int ret;
  320. PyObject *res;
  321. ret = flush_accumulator(acc);
  322. Py_CLEAR(acc->small_strings);
  323. if (ret) {
  324. Py_CLEAR(acc->large_strings);
  325. return NULL;
  326. }
  327. res = acc->large_strings;
  328. acc->large_strings = NULL;
  329. if (res == NULL)
  330. return PyList_New(0);
  331. return res;
  332. }
  333. static void
  334. JSON_Accu_Destroy(JSON_Accu *acc)
  335. {
  336. Py_CLEAR(acc->small_strings);
  337. Py_CLEAR(acc->large_strings);
  338. }
  339. static int
  340. IS_DIGIT(JSON_UNICHR c)
  341. {
  342. return c >= '0' && c <= '9';
  343. }
  344. static PyObject *
  345. maybe_quote_bigint(PyEncoderObject* s, PyObject *encoded, PyObject *obj)
  346. {
  347. if (s->max_long_size != Py_None && s->min_long_size != Py_None) {
  348. if (PyObject_RichCompareBool(obj, s->max_long_size, Py_GE) ||
  349. PyObject_RichCompareBool(obj, s->min_long_size, Py_LE)) {
  350. #if PY_MAJOR_VERSION >= 3
  351. PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded);
  352. #else
  353. PyObject* quoted = PyString_FromFormat("\"%s\"",
  354. PyString_AsString(encoded));
  355. #endif
  356. Py_DECREF(encoded);
  357. encoded = quoted;
  358. }
  359. }
  360. return encoded;
  361. }
  362. static int
  363. _call_json_method(PyObject *obj, const char *method_name, PyObject **result)
  364. {
  365. int rval = 0;
  366. PyObject *method = PyObject_GetAttrString(obj, method_name);
  367. if (method == NULL) {
  368. PyErr_Clear();
  369. return 0;
  370. }
  371. if (PyCallable_Check(method)) {
  372. PyObject *tmp = PyObject_CallNoArgs(method);
  373. if (tmp == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) {
  374. PyErr_Clear();
  375. } else {
  376. // This will set result to NULL if a TypeError occurred,
  377. // which must be checked by the caller
  378. *result = tmp;
  379. rval = 1;
  380. }
  381. }
  382. Py_DECREF(method);
  383. return rval;
  384. }
  385. static int
  386. _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
  387. {
  388. /* PyObject to Py_ssize_t converter */
  389. *size_ptr = PyInt_AsSsize_t(o);
  390. if (*size_ptr == -1 && PyErr_Occurred())
  391. return 0;
  392. return 1;
  393. }
  394. static PyObject *
  395. _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
  396. {
  397. /* Py_ssize_t to PyObject converter */
  398. return PyInt_FromSsize_t(*size_ptr);
  399. }
  400. static Py_ssize_t
  401. ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars)
  402. {
  403. /* Escape unicode code point c to ASCII escape sequences
  404. in char *output. output must have at least 12 bytes unused to
  405. accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
  406. if (S_CHAR(c)) {
  407. output[chars++] = (char)c;
  408. }
  409. else {
  410. output[chars++] = '\\';
  411. switch (c) {
  412. case '\\': output[chars++] = (char)c; break;
  413. case '"': output[chars++] = (char)c; break;
  414. case '\b': output[chars++] = 'b'; break;
  415. case '\f': output[chars++] = 'f'; break;
  416. case '\n': output[chars++] = 'n'; break;
  417. case '\r': output[chars++] = 'r'; break;
  418. case '\t': output[chars++] = 't'; break;
  419. default:
  420. #if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)
  421. if (c >= 0x10000) {
  422. /* UTF-16 surrogate pair */
  423. JSON_UNICHR v = c - 0x10000;
  424. c = 0xd800 | ((v >> 10) & 0x3ff);
  425. output[chars++] = 'u';
  426. output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
  427. output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
  428. output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
  429. output[chars++] = "0123456789abcdef"[(c ) & 0xf];
  430. c = 0xdc00 | (v & 0x3ff);
  431. output[chars++] = '\\';
  432. }
  433. #endif
  434. output[chars++] = 'u';
  435. output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
  436. output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
  437. output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
  438. output[chars++] = "0123456789abcdef"[(c ) & 0xf];
  439. }
  440. }
  441. return chars;
  442. }
  443. static Py_ssize_t
  444. ascii_char_size(JSON_UNICHR c)
  445. {
  446. if (S_CHAR(c)) {
  447. return 1;
  448. }
  449. else if (c == '\\' ||
  450. c == '"' ||
  451. c == '\b' ||
  452. c == '\f' ||
  453. c == '\n' ||
  454. c == '\r' ||
  455. c == '\t') {
  456. return 2;
  457. }
  458. #if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)
  459. else if (c >= 0x10000U) {
  460. return 2 * MIN_EXPANSION;
  461. }
  462. #endif
  463. else {
  464. return MIN_EXPANSION;
  465. }
  466. }
  467. static PyObject *
  468. ascii_escape_unicode(PyObject *pystr)
  469. {
  470. /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
  471. Py_ssize_t i;
  472. Py_ssize_t input_chars = PyUnicode_GET_LENGTH(pystr);
  473. Py_ssize_t output_size = 2;
  474. Py_ssize_t chars;
  475. PY2_UNUSED int kind = PyUnicode_KIND(pystr);
  476. void *data = PyUnicode_DATA(pystr);
  477. PyObject *rval;
  478. char *output;
  479. output_size = 2;
  480. for (i = 0; i < input_chars; i++) {
  481. output_size += ascii_char_size(PyUnicode_READ(kind, data, i));
  482. }
  483. #if PY_MAJOR_VERSION >= 3
  484. rval = PyUnicode_New(output_size, 127);
  485. if (rval == NULL) {
  486. return NULL;
  487. }
  488. assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND);
  489. output = (char *)PyUnicode_DATA(rval);
  490. #else
  491. rval = PyString_FromStringAndSize(NULL, output_size);
  492. if (rval == NULL) {
  493. return NULL;
  494. }
  495. output = PyString_AS_STRING(rval);
  496. #endif
  497. chars = 0;
  498. output[chars++] = '"';
  499. for (i = 0; i < input_chars; i++) {
  500. chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars);
  501. }
  502. output[chars++] = '"';
  503. assert(chars == output_size);
  504. return rval;
  505. }
  506. #if PY_MAJOR_VERSION >= 3
  507. static PyObject *
  508. ascii_escape_str(PyObject *pystr)
  509. {
  510. PyObject *rval;
  511. PyObject *input = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(pystr), PyBytes_GET_SIZE(pystr), NULL);
  512. if (input == NULL)
  513. return NULL;
  514. rval = ascii_escape_unicode(input);
  515. Py_DECREF(input);
  516. return rval;
  517. }
  518. #else /* PY_MAJOR_VERSION >= 3 */
  519. static PyObject *
  520. ascii_escape_str(PyObject *pystr)
  521. {
  522. /* Take a PyString pystr and return a new ASCII-only escaped PyString */
  523. Py_ssize_t i;
  524. Py_ssize_t input_chars;
  525. Py_ssize_t output_size;
  526. Py_ssize_t chars;
  527. PyObject *rval;
  528. char *output;
  529. char *input_str;
  530. input_chars = PyString_GET_SIZE(pystr);
  531. input_str = PyString_AS_STRING(pystr);
  532. output_size = 2;
  533. /* Fast path for a string that's already ASCII */
  534. for (i = 0; i < input_chars; i++) {
  535. JSON_UNICHR c = (JSON_UNICHR)input_str[i];
  536. if (c > 0x7f) {
  537. /* We hit a non-ASCII character, bail to unicode mode */
  538. PyObject *uni;
  539. uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
  540. if (uni == NULL) {
  541. return NULL;
  542. }
  543. rval = ascii_escape_unicode(uni);
  544. Py_DECREF(uni);
  545. return rval;
  546. }
  547. output_size += ascii_char_size(c);
  548. }
  549. rval = PyString_FromStringAndSize(NULL, output_size);
  550. if (rval == NULL) {
  551. return NULL;
  552. }
  553. chars = 0;
  554. output = PyString_AS_STRING(rval);
  555. output[chars++] = '"';
  556. for (i = 0; i < input_chars; i++) {
  557. chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars);
  558. }
  559. output[chars++] = '"';
  560. assert(chars == output_size);
  561. return rval;
  562. }
  563. #endif /* PY_MAJOR_VERSION < 3 */
  564. static PyObject *
  565. encoder_stringify_key(PyEncoderObject *s, PyObject *key)
  566. {
  567. if (PyUnicode_Check(key)) {
  568. Py_INCREF(key);
  569. return key;
  570. }
  571. #if PY_MAJOR_VERSION >= 3
  572. else if (PyBytes_Check(key) && s->encoding != NULL) {
  573. const char *encoding = PyUnicode_AsUTF8(s->encoding);
  574. if (encoding == NULL)
  575. return NULL;
  576. return PyUnicode_Decode(
  577. PyBytes_AS_STRING(key),
  578. PyBytes_GET_SIZE(key),
  579. encoding,
  580. NULL);
  581. }
  582. #else /* PY_MAJOR_VERSION >= 3 */
  583. else if (PyString_Check(key)) {
  584. Py_INCREF(key);
  585. return key;
  586. }
  587. #endif /* PY_MAJOR_VERSION < 3 */
  588. else if (PyFloat_Check(key)) {
  589. return encoder_encode_float(s, key);
  590. }
  591. else if (key == Py_True || key == Py_False || key == Py_None) {
  592. /* This must come before the PyInt_Check because
  593. True and False are also 1 and 0.*/
  594. return _encoded_const(key);
  595. }
  596. else if (PyInt_Check(key) || PyLong_Check(key)) {
  597. if (!(PyInt_CheckExact(key) || PyLong_CheckExact(key))) {
  598. /* See #118, do not trust custom str/repr */
  599. PyObject *res;
  600. PyObject *tmp = PyObject_CallOneArg((PyObject *)&PyLong_Type, key);
  601. if (tmp == NULL) {
  602. return NULL;
  603. }
  604. res = PyObject_Str(tmp);
  605. Py_DECREF(tmp);
  606. return res;
  607. }
  608. else {
  609. return PyObject_Str(key);
  610. }
  611. }
  612. else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) {
  613. return PyObject_Str(key);
  614. }
  615. if (s->skipkeys) {
  616. Py_INCREF(Py_None);
  617. return Py_None;
  618. }
  619. PyErr_Format(PyExc_TypeError,
  620. "keys must be str, int, float, bool or None, "
  621. "not %.100s", key->ob_type->tp_name);
  622. return NULL;
  623. }
  624. static PyObject *
  625. encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct)
  626. {
  627. PyObject *items;
  628. PyObject *iter = NULL;
  629. PyObject *lst = NULL;
  630. PyObject *item = NULL;
  631. PyObject *kstr = NULL;
  632. PyObject *sortfun = NULL;
  633. PyObject *sortres;
  634. static PyObject *sortargs = NULL;
  635. if (sortargs == NULL) {
  636. sortargs = PyTuple_New(0);
  637. if (sortargs == NULL)
  638. return NULL;
  639. }
  640. if (PyDict_CheckExact(dct))
  641. items = PyDict_Items(dct);
  642. else
  643. items = PyMapping_Items(dct);
  644. if (items == NULL)
  645. return NULL;
  646. iter = PyObject_GetIter(items);
  647. Py_DECREF(items);
  648. if (iter == NULL)
  649. return NULL;
  650. if (s->item_sort_kw == Py_None)
  651. return iter;
  652. lst = PyList_New(0);
  653. if (lst == NULL)
  654. goto bail;
  655. while ((item = PyIter_Next(iter))) {
  656. PyObject *key, *value;
  657. if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
  658. PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
  659. goto bail;
  660. }
  661. key = PyTuple_GET_ITEM(item, 0);
  662. if (key == NULL)
  663. goto bail;
  664. #if PY_MAJOR_VERSION < 3
  665. else if (PyString_Check(key)) {
  666. /* item can be added as-is */
  667. }
  668. #endif /* PY_MAJOR_VERSION < 3 */
  669. else if (PyUnicode_Check(key)) {
  670. /* item can be added as-is */
  671. }
  672. else {
  673. PyObject *tpl;
  674. kstr = encoder_stringify_key(s, key);
  675. if (kstr == NULL)
  676. goto bail;
  677. else if (kstr == Py_None) {
  678. /* skipkeys */
  679. Py_DECREF(kstr);
  680. continue;
  681. }
  682. value = PyTuple_GET_ITEM(item, 1);
  683. if (value == NULL)
  684. goto bail;
  685. tpl = PyTuple_Pack(2, kstr, value);
  686. if (tpl == NULL)
  687. goto bail;
  688. Py_CLEAR(kstr);
  689. Py_DECREF(item);
  690. item = tpl;
  691. }
  692. if (PyList_Append(lst, item))
  693. goto bail;
  694. Py_DECREF(item);
  695. }
  696. Py_CLEAR(iter);
  697. if (PyErr_Occurred())
  698. goto bail;
  699. sortfun = PyObject_GetAttrString(lst, "sort");
  700. if (sortfun == NULL)
  701. goto bail;
  702. sortres = PyObject_Call(sortfun, sortargs, s->item_sort_kw);
  703. if (!sortres)
  704. goto bail;
  705. Py_DECREF(sortres);
  706. Py_CLEAR(sortfun);
  707. iter = PyObject_GetIter(lst);
  708. Py_CLEAR(lst);
  709. return iter;
  710. bail:
  711. Py_XDECREF(sortfun);
  712. Py_XDECREF(kstr);
  713. Py_XDECREF(item);
  714. Py_XDECREF(lst);
  715. Py_XDECREF(iter);
  716. return NULL;
  717. }
  718. /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
  719. static PyObject *JSONDecodeError = NULL;
  720. static void
  721. raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
  722. {
  723. PyObject *exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
  724. if (exc) {
  725. PyErr_SetObject(JSONDecodeError, exc);
  726. Py_DECREF(exc);
  727. }
  728. }
  729. static PyObject *
  730. join_list_unicode(PyObject *lst)
  731. {
  732. /* return u''.join(lst) */
  733. return PyUnicode_Join(JSON_EmptyUnicode, lst);
  734. }
  735. #if PY_MAJOR_VERSION >= 3
  736. #define join_list_string join_list_unicode
  737. #else /* PY_MAJOR_VERSION >= 3 */
  738. static PyObject *
  739. join_list_string(PyObject *lst)
  740. {
  741. /* return ''.join(lst) */
  742. static PyObject *joinfn = NULL;
  743. if (joinfn == NULL) {
  744. joinfn = PyObject_GetAttrString(JSON_EmptyStr, "join");
  745. if (joinfn == NULL)
  746. return NULL;
  747. }
  748. return PyObject_CallOneArg(joinfn, lst);
  749. }
  750. #endif /* PY_MAJOR_VERSION < 3 */
  751. static PyObject *
  752. _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx)
  753. {
  754. /* return (rval, idx) tuple, stealing reference to rval */
  755. PyObject *tpl;
  756. PyObject *pyidx;
  757. /*
  758. steal a reference to rval, returns (rval, idx)
  759. */
  760. if (rval == NULL) {
  761. assert(PyErr_Occurred());
  762. return NULL;
  763. }
  764. pyidx = PyInt_FromSsize_t(idx);
  765. if (pyidx == NULL) {
  766. Py_DECREF(rval);
  767. return NULL;
  768. }
  769. tpl = PyTuple_New(2);
  770. if (tpl == NULL) {
  771. Py_DECREF(pyidx);
  772. Py_DECREF(rval);
  773. return NULL;
  774. }
  775. PyTuple_SET_ITEM(tpl, 0, rval);
  776. PyTuple_SET_ITEM(tpl, 1, pyidx);
  777. return tpl;
  778. }
  779. #define APPEND_OLD_CHUNK \
  780. if (chunk != NULL) { \
  781. if (chunks == NULL) { \
  782. chunks = PyList_New(0); \
  783. if (chunks == NULL) { \
  784. goto bail; \
  785. } \
  786. } \
  787. if (PyList_Append(chunks, chunk)) { \
  788. goto bail; \
  789. } \
  790. Py_CLEAR(chunk); \
  791. }
  792. #if PY_MAJOR_VERSION < 3
  793. static PyObject *
  794. scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
  795. {
  796. /* Read the JSON string from PyString pystr.
  797. end is the index of the first character after the quote.
  798. encoding is the encoding of pystr (must be an ASCII superset)
  799. if strict is zero then literal control characters are allowed
  800. *next_end_ptr is a return-by-reference index of the character
  801. after the end quote
  802. Return value is a new PyString (if ASCII-only) or PyUnicode
  803. */
  804. PyObject *rval;
  805. Py_ssize_t len = PyString_GET_SIZE(pystr);
  806. Py_ssize_t begin = end - 1;
  807. Py_ssize_t next = begin;
  808. int has_unicode = 0;
  809. char *buf = PyString_AS_STRING(pystr);
  810. PyObject *chunks = NULL;
  811. PyObject *chunk = NULL;
  812. PyObject *strchunk = NULL;
  813. if (len == end) {
  814. raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
  815. goto bail;
  816. }
  817. else if (end < 0 || len < end) {
  818. PyErr_SetString(PyExc_ValueError, "end is out of bounds");
  819. goto bail;
  820. }
  821. while (1) {
  822. /* Find the end of the string or the next escape */
  823. Py_UNICODE c = 0;
  824. for (next = end; next < len; next++) {
  825. c = (unsigned char)buf[next];
  826. if (c == '"' || c == '\\') {
  827. break;
  828. }
  829. else if (strict && c <= 0x1f) {
  830. raise_errmsg(ERR_STRING_CONTROL, pystr, next);
  831. goto bail;
  832. }
  833. else if (c > 0x7f) {
  834. has_unicode = 1;
  835. }
  836. }
  837. if (!(c == '"' || c == '\\')) {
  838. raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
  839. goto bail;
  840. }
  841. /* Pick up this chunk if it's not zero length */
  842. if (next != end) {
  843. APPEND_OLD_CHUNK
  844. strchunk = PyString_FromStringAndSize(&buf[end], next - end);
  845. if (strchunk == NULL) {
  846. goto bail;
  847. }
  848. if (has_unicode) {
  849. chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
  850. Py_DECREF(strchunk);
  851. if (chunk == NULL) {
  852. goto bail;
  853. }
  854. }
  855. else {
  856. chunk = strchunk;
  857. }
  858. }
  859. next++;
  860. if (c == '"') {
  861. end = next;
  862. break;
  863. }
  864. if (next == len) {
  865. raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
  866. goto bail;
  867. }
  868. c = buf[next];
  869. if (c != 'u') {
  870. /* Non-unicode backslash escapes */
  871. end = next + 1;
  872. switch (c) {
  873. case '"': break;
  874. case '\\': break;
  875. case '/': break;
  876. case 'b': c = '\b'; break;
  877. case 'f': c = '\f'; break;
  878. case 'n': c = '\n'; break;
  879. case 'r': c = '\r'; break;
  880. case 't': c = '\t'; break;
  881. default: c = 0;
  882. }
  883. if (c == 0) {
  884. raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
  885. goto bail;
  886. }
  887. }
  888. else {
  889. c = 0;
  890. next++;
  891. end = next + 4;
  892. if (end >= len) {
  893. raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
  894. goto bail;
  895. }
  896. /* Decode 4 hex digits */
  897. for (; next < end; next++) {
  898. JSON_UNICHR digit = (JSON_UNICHR)buf[next];
  899. c <<= 4;
  900. switch (digit) {
  901. case '0': case '1': case '2': case '3': case '4':
  902. case '5': case '6': case '7': case '8': case '9':
  903. c |= (digit - '0'); break;
  904. case 'a': case 'b': case 'c': case 'd': case 'e':
  905. case 'f':
  906. c |= (digit - 'a' + 10); break;
  907. case 'A': case 'B': case 'C': case 'D': case 'E':
  908. case 'F':
  909. c |= (digit - 'A' + 10); break;
  910. default:
  911. raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
  912. goto bail;
  913. }
  914. }
  915. #if defined(Py_UNICODE_WIDE)
  916. /* Surrogate pair */
  917. if ((c & 0xfc00) == 0xd800) {
  918. if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') {
  919. JSON_UNICHR c2 = 0;
  920. end += 6;
  921. /* Decode 4 hex digits */
  922. for (next += 2; next < end; next++) {
  923. c2 <<= 4;
  924. JSON_UNICHR digit = buf[next];
  925. switch (digit) {
  926. case '0': case '1': case '2': case '3': case '4':
  927. case '5': case '6': case '7': case '8': case '9':
  928. c2 |= (digit - '0'); break;
  929. case 'a': case 'b': case 'c': case 'd': case 'e':
  930. case 'f':
  931. c2 |= (digit - 'a' + 10); break;
  932. case 'A': case 'B': case 'C': case 'D': case 'E':
  933. case 'F':
  934. c2 |= (digit - 'A' + 10); break;
  935. default:
  936. raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
  937. goto bail;
  938. }
  939. }
  940. if ((c2 & 0xfc00) != 0xdc00) {
  941. /* not a low surrogate, rewind */
  942. end -= 6;
  943. next = end;
  944. }
  945. else {
  946. c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
  947. }
  948. }
  949. }
  950. #endif /* Py_UNICODE_WIDE */
  951. }
  952. if (c > 0x7f) {
  953. has_unicode = 1;
  954. }
  955. APPEND_OLD_CHUNK
  956. if (has_unicode) {
  957. chunk = PyUnicode_FromOrdinal(c);
  958. if (chunk == NULL) {
  959. goto bail;
  960. }
  961. }
  962. else {
  963. char c_char = Py_CHARMASK(c);
  964. chunk = PyString_FromStringAndSize(&c_char, 1);
  965. if (chunk == NULL) {
  966. goto bail;
  967. }
  968. }
  969. }
  970. if (chunks == NULL) {
  971. if (chunk != NULL)
  972. rval = chunk;
  973. else {
  974. rval = JSON_EmptyStr;
  975. Py_INCREF(rval);
  976. }
  977. }
  978. else {
  979. APPEND_OLD_CHUNK
  980. rval = join_list_string(chunks);
  981. if (rval == NULL) {
  982. goto bail;
  983. }
  984. Py_CLEAR(chunks);
  985. }
  986. *next_end_ptr = end;
  987. return rval;
  988. bail:
  989. *next_end_ptr = -1;
  990. Py_XDECREF(chunk);
  991. Py_XDECREF(chunks);
  992. return NULL;
  993. }
  994. #endif /* PY_MAJOR_VERSION < 3 */
  995. static PyObject *
  996. scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
  997. {
  998. /* Read the JSON string from PyUnicode pystr.
  999. end is the index of the first character after the quote.
  1000. if strict is zero then literal control characters are allowed
  1001. *next_end_ptr is a return-by-reference index of the character
  1002. after the end quote
  1003. Return value is a new PyUnicode
  1004. */
  1005. PyObject *rval;
  1006. Py_ssize_t begin = end - 1;
  1007. Py_ssize_t next = begin;
  1008. PY2_UNUSED int kind = PyUnicode_KIND(pystr);
  1009. Py_ssize_t len = PyUnicode_GET_LENGTH(pystr);
  1010. void *buf = PyUnicode_DATA(pystr);
  1011. PyObject *chunks = NULL;
  1012. PyObject *chunk = NULL;
  1013. if (len == end) {
  1014. raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
  1015. goto bail;
  1016. }
  1017. else if (end < 0 || len < end) {
  1018. PyErr_SetString(PyExc_ValueError, "end is out of bounds");
  1019. goto bail;
  1020. }
  1021. while (1) {
  1022. /* Find the end of the string or the next escape */
  1023. JSON_UNICHR c = 0;
  1024. for (next = end; next < len; next++) {
  1025. c = PyUnicode_READ(kind, buf, next);
  1026. if (c == '"' || c == '\\') {
  1027. break;
  1028. }
  1029. else if (strict && c <= 0x1f) {
  1030. raise_errmsg(ERR_STRING_CONTROL, pystr, next);
  1031. goto bail;
  1032. }
  1033. }
  1034. if (!(c == '"' || c == '\\')) {
  1035. raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
  1036. goto bail;
  1037. }
  1038. /* Pick up this chunk if it's not zero length */
  1039. if (next != end) {
  1040. APPEND_OLD_CHUNK
  1041. #if PY_MAJOR_VERSION < 3
  1042. chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end);
  1043. #else
  1044. chunk = PyUnicode_Substring(pystr, end, next);
  1045. #endif
  1046. if (chunk == NULL) {
  1047. goto bail;
  1048. }
  1049. }
  1050. next++;
  1051. if (c == '"') {
  1052. end = next;
  1053. break;
  1054. }
  1055. if (next == len) {
  1056. raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
  1057. goto bail;
  1058. }
  1059. c = PyUnicode_READ(kind, buf, next);
  1060. if (c != 'u') {
  1061. /* Non-unicode backslash escapes */
  1062. end = next + 1;
  1063. switch (c) {
  1064. case '"': break;
  1065. case '\\': break;
  1066. case '/': break;
  1067. case 'b': c = '\b'; break;
  1068. case 'f': c = '\f'; break;
  1069. case 'n': c = '\n'; break;
  1070. case 'r': c = '\r'; break;
  1071. case 't': c = '\t'; break;
  1072. default: c = 0;
  1073. }
  1074. if (c == 0) {
  1075. raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
  1076. goto bail;
  1077. }
  1078. }
  1079. else {
  1080. c = 0;
  1081. next++;
  1082. end = next + 4;
  1083. if (end >= len) {
  1084. raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
  1085. goto bail;
  1086. }
  1087. /* Decode 4 hex digits */
  1088. for (; next < end; next++) {
  1089. JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
  1090. c <<= 4;
  1091. switch (digit) {
  1092. case '0': case '1': case '2': case '3': case '4':
  1093. case '5': case '6': case '7': case '8': case '9':
  1094. c |= (digit - '0'); break;
  1095. case 'a': case 'b': case 'c': case 'd': case 'e':
  1096. case 'f':
  1097. c |= (digit - 'a' + 10); break;
  1098. case 'A': case 'B': case 'C': case 'D': case 'E':
  1099. case 'F':
  1100. c |= (digit - 'A' + 10); break;
  1101. default:
  1102. raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
  1103. goto bail;
  1104. }
  1105. }
  1106. #if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)
  1107. /* Surrogate pair */
  1108. if ((c & 0xfc00) == 0xd800) {
  1109. JSON_UNICHR c2 = 0;
  1110. if (end + 6 < len &&
  1111. PyUnicode_READ(kind, buf, next) == '\\' &&
  1112. PyUnicode_READ(kind, buf, next + 1) == 'u') {
  1113. end += 6;
  1114. /* Decode 4 hex digits */
  1115. for (next += 2; next < end; next++) {
  1116. JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
  1117. c2 <<= 4;
  1118. switch (digit) {
  1119. case '0': case '1': case '2': case '3': case '4':
  1120. case '5': case '6': case '7': case '8': case '9':
  1121. c2 |= (digit - '0'); break;
  1122. case 'a': case 'b': case 'c': case 'd': case 'e':
  1123. case 'f':
  1124. c2 |= (digit - 'a' + 10); break;
  1125. case 'A': case 'B': case 'C': case 'D': case 'E':
  1126. case 'F':
  1127. c2 |= (digit - 'A' + 10); break;
  1128. default:
  1129. raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
  1130. goto bail;
  1131. }
  1132. }
  1133. if ((c2 & 0xfc00) != 0xdc00) {
  1134. /* not a low surrogate, rewind */
  1135. end -= 6;
  1136. next = end;
  1137. }
  1138. else {
  1139. c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
  1140. }
  1141. }
  1142. }
  1143. #endif
  1144. }
  1145. APPEND_OLD_CHUNK
  1146. chunk = PyUnicode_FromOrdinal(c);
  1147. if (chunk == NULL) {
  1148. goto bail;
  1149. }
  1150. }
  1151. if (chunks == NULL) {
  1152. if (chunk != NULL)
  1153. rval = chunk;
  1154. else {
  1155. rval = JSON_EmptyUnicode;
  1156. Py_INCREF(rval);
  1157. }
  1158. }
  1159. else {
  1160. APPEND_OLD_CHUNK
  1161. rval = join_list_unicode(chunks);
  1162. if (rval == NULL) {
  1163. goto bail;
  1164. }
  1165. Py_CLEAR(chunks);
  1166. }
  1167. *next_end_ptr = end;
  1168. return rval;
  1169. bail:
  1170. *next_end_ptr = -1;
  1171. Py_XDECREF(chunk);
  1172. Py_XDECREF(chunks);
  1173. return NULL;
  1174. }
  1175. PyDoc_STRVAR(pydoc_scanstring,
  1176. "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
  1177. "\n"
  1178. "Scan the string s for a JSON string. End is the index of the\n"
  1179. "character in s after the quote that started the JSON string.\n"
  1180. "Unescapes all valid JSON string escape sequences and raises ValueError\n"
  1181. "on attempt to decode an invalid string. If strict is False then literal\n"
  1182. "control characters are allowed in the string.\n"
  1183. "\n"
  1184. "Returns a tuple of the decoded string and the index of the character in s\n"
  1185. "after the end quote."
  1186. );
  1187. static PyObject *
  1188. py_scanstring(PyObject* self UNUSED, PyObject *args)
  1189. {
  1190. PyObject *pystr;
  1191. PyObject *rval;
  1192. Py_ssize_t end;
  1193. Py_ssize_t next_end = -1;
  1194. char *encoding = NULL;
  1195. int strict = 1;
  1196. if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
  1197. return NULL;
  1198. }
  1199. if (encoding == NULL) {
  1200. encoding = DEFAULT_ENCODING;
  1201. }
  1202. if (PyUnicode_Check(pystr)) {
  1203. if (PyUnicode_READY(pystr))
  1204. return NULL;
  1205. rval = scanstring_unicode(pystr, end, strict, &next_end);
  1206. }
  1207. #if PY_MAJOR_VERSION < 3
  1208. /* Using a bytes input is unsupported for scanning in Python 3.
  1209. It is coerced to str in the decoder before it gets here. */
  1210. else if (PyString_Check(pystr)) {
  1211. rval = scanstring_str(pystr, end, encoding, strict, &next_end);
  1212. }
  1213. #endif
  1214. else {
  1215. PyErr_Format(PyExc_TypeError,
  1216. "first argument must be a string, not %.80s",
  1217. Py_TYPE(pystr)->tp_name);
  1218. return NULL;
  1219. }
  1220. return _build_rval_index_tuple(rval, next_end);
  1221. }
  1222. PyDoc_STRVAR(pydoc_encode_basestring_ascii,
  1223. "encode_basestring_ascii(basestring) -> str\n"
  1224. "\n"
  1225. "Return an ASCII-only JSON representation of a Python string"
  1226. );
  1227. static PyObject *
  1228. py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
  1229. {
  1230. /* Return an ASCII-only JSON representation of a Python string */
  1231. /* METH_O */
  1232. if (PyBytes_Check(pystr)) {
  1233. return ascii_escape_str(pystr);
  1234. }
  1235. else if (PyUnicode_Check(pystr)) {
  1236. if (PyUnicode_READY(pystr))
  1237. return NULL;
  1238. return ascii_escape_unicode(pystr);
  1239. }
  1240. else {
  1241. PyErr_Format(PyExc_TypeError,
  1242. "first argument must be a string, not %.80s",
  1243. Py_TYPE(pystr)->tp_name);
  1244. return NULL;
  1245. }
  1246. }
  1247. static void
  1248. scanner_dealloc(PyObject *self)
  1249. {
  1250. /* bpo-31095: UnTrack is needed before calling any callbacks */
  1251. PyObject_GC_UnTrack(self);
  1252. scanner_clear(self);
  1253. Py_TYPE(self)->tp_free(self);
  1254. }
  1255. static int
  1256. scanner_traverse(PyObject *self, visitproc visit, void *arg)
  1257. {
  1258. PyScannerObject *s;
  1259. assert(PyScanner_Check(self));
  1260. s = (PyScannerObject *)self;
  1261. Py_VISIT(s->encoding);
  1262. Py_VISIT(s->strict_bool);
  1263. Py_VISIT(s->object_hook);
  1264. Py_VISIT(s->pairs_hook);
  1265. Py_VISIT(s->parse_float);
  1266. Py_VISIT(s->parse_int);
  1267. Py_VISIT(s->parse_constant);
  1268. Py_VISIT(s->memo);
  1269. return 0;
  1270. }
  1271. static int
  1272. scanner_clear(PyObject *self)
  1273. {
  1274. PyScannerObject *s;
  1275. assert(PyScanner_Check(self));
  1276. s = (PyScannerObject *)self;
  1277. Py_CLEAR(s->encoding);
  1278. Py_CLEAR(s->strict_bool);
  1279. Py_CLEAR(s->object_hook);
  1280. Py_CLEAR(s->pairs_hook);
  1281. Py_CLEAR(s->parse_float);
  1282. Py_CLEAR(s->parse_int);
  1283. Py_CLEAR(s->parse_constant);
  1284. Py_CLEAR(s->memo);
  1285. return 0;
  1286. }
  1287. #if PY_MAJOR_VERSION < 3
  1288. static PyObject *
  1289. _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
  1290. {
  1291. /* Read a JSON object from PyString pystr.
  1292. idx is the index of the first character after the opening curly brace.
  1293. *next_idx_ptr is a return-by-reference index to the first character after
  1294. the closing curly brace.
  1295. Returns a new PyObject (usually a dict, but object_hook or
  1296. object_pairs_hook can change that)
  1297. */
  1298. char *str = PyString_AS_STRING(pystr);
  1299. Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
  1300. PyObject *rval = NULL;
  1301. PyObject *pairs = NULL;
  1302. PyObject *item;
  1303. PyObject *key = NULL;
  1304. PyObject *val = NULL;
  1305. char *encoding = PyString_AS_STRING(s->encoding);
  1306. int has_pairs_hook = (s->pairs_hook != Py_None);
  1307. int did_parse = 0;
  1308. Py_ssize_t next_idx;
  1309. if (has_pairs_hook) {
  1310. pairs = PyList_New(0);
  1311. if (pairs == NULL)
  1312. return NULL;
  1313. }
  1314. else {
  1315. rval = PyDict_New();
  1316. if (rval == NULL)
  1317. return NULL;
  1318. }
  1319. /* skip whitespace after { */
  1320. while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
  1321. /* only loop if the object is non-empty */
  1322. if (idx <= end_idx && str[idx] != '}') {
  1323. int trailing_delimiter = 0;
  1324. while (idx <= end_idx) {
  1325. PyObject *memokey;
  1326. trailing_delimiter = 0;
  1327. /* read key */
  1328. if (str[idx] != '"') {
  1329. raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
  1330. goto bail;
  1331. }
  1332. key = scanstring_str(pystr, idx + 1, encoding, s->strict, &next_idx);
  1333. if (key == NULL)
  1334. goto bail;
  1335. memokey = PyDict_GetItem(s->memo, key);
  1336. if (memokey != NULL) {
  1337. Py_INCREF(memokey);
  1338. Py_DECREF(key);
  1339. key = memokey;
  1340. }
  1341. else {
  1342. if (PyDict_SetItem(s->memo, key, key) < 0)
  1343. goto bail;
  1344. }
  1345. idx = next_idx;
  1346. /* skip whitespace between key and : delimiter, read :, skip whitespace */
  1347. while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
  1348. if (idx > end_idx || str[idx] != ':') {
  1349. raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx);
  1350. goto bail;
  1351. }
  1352. idx++;
  1353. while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
  1354. /* read any JSON data type */
  1355. val = scan_once_str(s, pystr, idx, &next_idx);
  1356. if (val == NULL)
  1357. goto bail;
  1358. if (has_pairs_hook) {
  1359. item = PyTuple_Pack(2, key, val);
  1360. if (item == NULL)
  1361. goto bail;
  1362. Py_CLEAR(key);
  1363. Py_CLEAR(val);
  1364. if (PyList_Append(pairs, item) == -1) {
  1365. Py_DECREF(item);
  1366. goto bail;
  1367. }
  1368. Py_DECREF(item);
  1369. }
  1370. else {
  1371. if (PyDict_SetItem(rval, key, val) < 0)
  1372. goto bail;
  1373. Py_CLEAR(key);
  1374. Py_CLEAR(val);
  1375. }
  1376. idx = next_idx;
  1377. /* skip whitespace before } or , */
  1378. while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
  1379. /* bail if the object is closed or we didn't get the , delimiter */
  1380. did_parse = 1;
  1381. if (idx > end_idx) break;
  1382. if (str[idx] == '}') {
  1383. break;
  1384. }
  1385. else if (str[idx] != ',') {
  1386. raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
  1387. goto bail;
  1388. }
  1389. idx++;
  1390. /* skip whitespace after , delimiter */
  1391. while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
  1392. trailing_delimiter = 1;
  1393. }
  1394. if (trailing_delimiter) {
  1395. raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
  1396. goto bail;
  1397. }
  1398. }
  1399. /* verify that idx < end_idx, str[idx] should be '}' */
  1400. if (idx > end_idx || str[idx] != '}') {
  1401. if (did_parse) {
  1402. raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
  1403. } else {
  1404. raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
  1405. }
  1406. goto bail;
  1407. }
  1408. /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
  1409. if (s->pairs_hook != Py_None) {
  1410. val = PyObject_CallOneArg(s->pairs_hook, pairs);
  1411. if (val == NULL)
  1412. goto bail;
  1413. Py_DECREF(pairs);
  1414. *next_idx_ptr = idx + 1;
  1415. return val;
  1416. }
  1417. /* if object_hook is not None: rval = object_hook(rval) */
  1418. if (s->object_hook != Py_None) {
  1419. val = PyObject_CallOneArg(s->object_hook, rval);
  1420. if (val == NULL)
  1421. goto bail;
  1422. Py_DECREF(rval);
  1423. rval = val;
  1424. val = NULL;
  1425. }
  1426. *next_idx_ptr = idx + 1;
  1427. return rval;
  1428. bail:
  1429. Py_XDECREF(rval);
  1430. Py_XDECREF(key);
  1431. Py_XDECREF(val);
  1432. Py_XDECREF(pairs);
  1433. return NULL;
  1434. }
  1435. #endif /* PY_MAJOR_VERSION < 3 */
  1436. static PyObject *
  1437. _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
  1438. {
  1439. /* Read a JSON object from PyUnicode pystr.
  1440. idx is the index of the first character after the opening curly brace.
  1441. *next_idx_ptr is a return-by-reference index to the first character after
  1442. the closing curly brace.
  1443. Returns a new PyObject (usually a dict, but object_hook can change that)
  1444. */
  1445. void *str = PyUnicode_DATA(pystr);
  1446. Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
  1447. PY2_UNUSED int kind = PyUnicode_KIND(pystr);
  1448. PyObject *rval = NULL;
  1449. PyObject *pairs = NULL;
  1450. PyObject *item;
  1451. PyObject *key = NULL;
  1452. PyObject *val = NULL;
  1453. int has_pairs_hook = (s->pairs_hook != Py_None);
  1454. int did_parse = 0;
  1455. Py_ssize_t next_idx;
  1456. if (has_pairs_hook) {
  1457. pairs = PyList_New(0);
  1458. if (pairs == NULL)
  1459. return NULL;
  1460. }
  1461. else {
  1462. rval = PyDict_New();
  1463. if (rval == NULL)
  1464. return NULL;
  1465. }
  1466. /* skip whitespace after { */
  1467. while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
  1468. /* only loop if the object is non-empty */
  1469. if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
  1470. int trailing_delimiter = 0;
  1471. while (idx <= end_idx) {
  1472. PyObject *memokey;
  1473. trailing_delimiter = 0;
  1474. /* read key */
  1475. if (PyUnicode_READ(kind, str, idx) != '"') {
  1476. raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
  1477. goto bail;
  1478. }
  1479. key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
  1480. if (key == NULL)
  1481. goto bail;
  1482. memokey = PyDict_GetItem(s->memo, key);
  1483. if (memokey != NULL) {
  1484. Py_INCREF(memokey);
  1485. Py_DECREF(key);
  1486. key = memokey;
  1487. }
  1488. else {
  1489. if (PyDict_SetItem(s->memo, key, key) < 0)
  1490. goto bail;
  1491. }
  1492. idx = next_idx;
  1493. /* skip whitespace between key and : delimiter, read :, skip
  1494. whitespace */
  1495. while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
  1496. if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
  1497. raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx);
  1498. goto bail;
  1499. }
  1500. idx++;
  1501. while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
  1502. /* read any JSON term */
  1503. val = scan_once_unicode(s, pystr, idx, &next_idx);
  1504. if (val == NULL)
  1505. goto bail;
  1506. if (has_pairs_hook) {
  1507. item = PyTuple_Pack(2, key, val);
  1508. if (item == NULL)
  1509. goto bail;
  1510. Py_CLEAR(key);
  1511. Py_CLEAR(val);
  1512. if (PyList_Append(pairs, item) == -1) {
  1513. Py_DECREF(item);
  1514. goto bail;
  1515. }
  1516. Py_DECREF(item);
  1517. }
  1518. else {
  1519. if (PyDict_SetItem(rval, key, val) < 0)
  1520. goto bail;
  1521. Py_CLEAR(key);
  1522. Py_CLEAR(val);
  1523. }
  1524. idx = next_idx;
  1525. /* skip whitespace before } or , */
  1526. while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
  1527. /* bail if the object is closed or we didn't get the ,
  1528. delimiter */
  1529. did_parse = 1;
  1530. if (idx > end_idx) break;
  1531. if (PyUnicode_READ(kind, str, idx) == '}') {
  1532. break;
  1533. }
  1534. else if (PyUnicode_READ(kind, str, idx) != ',') {
  1535. raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
  1536. goto bail;
  1537. }
  1538. idx++;
  1539. /* skip whitespace after , delimiter */
  1540. while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
  1541. trailing_delimiter = 1;
  1542. }
  1543. if (trailing_delimiter) {
  1544. raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
  1545. goto bail;
  1546. }
  1547. }
  1548. /* verify that idx < end_idx, str[idx] should be '}' */
  1549. if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
  1550. if (did_parse) {
  1551. raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
  1552. } else {
  1553. raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
  1554. }
  1555. goto bail;
  1556. }
  1557. /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
  1558. if (s->pairs_hook != Py_None) {
  1559. val = PyObject_CallOneArg(s->pairs_hook, pairs);
  1560. if (val == NULL)
  1561. goto bail;
  1562. Py_DECREF(pairs);
  1563. *next_idx_ptr = idx + 1;
  1564. return val;
  1565. }
  1566. /* if object_hook is not None: rval = object_hook(rval) */
  1567. if (s->object_hook != Py_None) {
  1568. val = PyObject_CallOneArg(s->object_hook, rval);
  1569. if (val == NULL)
  1570. goto bail;
  1571. Py_DECREF(rval);
  1572. rval = val;
  1573. val = NULL;
  1574. }
  1575. *next_idx_ptr = idx + 1;
  1576. return rval;
  1577. bail:
  1578. Py_XDECREF(rval);
  1579. Py_XDECREF(key);
  1580. Py_XDECREF(val);
  1581. Py_XDECREF(pairs);
  1582. return NULL;
  1583. }
  1584. #if PY_MAJOR_VERSION < 3
  1585. static PyObject *
  1586. _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
  1587. {
  1588. /* Read a JSON array from PyString pystr.
  1589. idx is the index of the first character after the opening brace.
  1590. *next_idx_ptr is a return-by-reference index to the first character after
  1591. the closing brace.
  1592. Returns a new PyList
  1593. */
  1594. char *str = PyString_AS_STRING(pystr);
  1595. Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
  1596. PyObject *val = NULL;
  1597. PyObject *rval = PyList_New(0);
  1598. Py_ssize_t next_idx;
  1599. if (rval == NULL)
  1600. return NULL;
  1601. /* skip whitespace after [ */
  1602. while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
  1603. /* only loop if the array is non-empty */
  1604. if (idx <= end_idx && str[idx] != ']') {
  1605. int trailing_delimiter = 0;
  1606. while (idx <= end_idx) {
  1607. trailing_delimiter = 0;
  1608. /* read any JSON term and de-tuplefy the (rval, idx) */
  1609. val = scan_once_str(s, pystr, idx, &next_idx);
  1610. if (val == NULL) {
  1611. goto bail;
  1612. }
  1613. if (PyList_Append(rval, val) == -1)
  1614. goto bail;
  1615. Py_CLEAR(val);
  1616. idx = next_idx;
  1617. /* skip whitespace between term and , */
  1618. while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
  1619. /* bail if the array is closed or we didn't get the , delimiter */
  1620. if (idx > end_idx) break;
  1621. if (str[idx] == ']') {
  1622. break;
  1623. }
  1624. else if (str[idx] != ',') {
  1625. raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
  1626. goto bail;
  1627. }
  1628. idx++;
  1629. /* skip whitespace after , */
  1630. while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
  1631. trailing_delimiter = 1;
  1632. }
  1633. if (trailing_delimiter) {
  1634. raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
  1635. goto bail;
  1636. }
  1637. }
  1638. /* verify that idx < end_idx, str[idx] should be ']' */
  1639. if (idx > end_idx || str[idx] != ']') {
  1640. if (PyList_GET_SIZE(rval)) {
  1641. raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
  1642. } else {
  1643. raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
  1644. }
  1645. goto bail;
  1646. }
  1647. *next_idx_ptr = idx + 1;
  1648. return rval;
  1649. bail:
  1650. Py_XDECREF(val);
  1651. Py_DECREF(rval);
  1652. return NULL;
  1653. }
  1654. #endif /* PY_MAJOR_VERSION < 3 */
  1655. static PyObject *
  1656. _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
  1657. {
  1658. /* Read a JSON array from PyString pystr.
  1659. idx is the index of the first character after the opening brace.
  1660. *next_idx_ptr is a return-by-reference index to the first character after
  1661. the closing brace.
  1662. Returns a new PyList
  1663. */
  1664. PY2_UNUSED int kind = PyUnicode_KIND(pystr);
  1665. void *str = PyUnicode_DATA(pystr);
  1666. Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
  1667. PyObject *val = NULL;
  1668. PyObject *rval = PyList_New(0);
  1669. Py_ssize_t next_idx;
  1670. if (rval == NULL)
  1671. return NULL;
  1672. /* skip whitespace after [ */
  1673. while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
  1674. /* only loop if the array is non-empty */
  1675. if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
  1676. int trailing_delimiter = 0;
  1677. while (idx <= end_idx) {
  1678. trailing_delimiter = 0;
  1679. /* read any JSON term */
  1680. val = scan_once_unicode(s, pystr, idx, &next_idx);
  1681. if (val == NULL) {
  1682. goto bail;
  1683. }
  1684. if (PyList_Append(rval, val) == -1)
  1685. goto bail;
  1686. Py_CLEAR(val);
  1687. idx = next_idx;
  1688. /* skip whitespace between term and , */
  1689. while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
  1690. /* bail if the array is closed or we didn't get the , delimiter */
  1691. if (idx > end_idx) break;
  1692. if (PyUnicode_READ(kind, str, idx) == ']') {
  1693. break;
  1694. }
  1695. else if (PyUnicode_READ(kind, str, idx) != ',') {
  1696. raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
  1697. goto bail;
  1698. }
  1699. idx++;
  1700. /* skip whitespace after , */
  1701. while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
  1702. trailing_delimiter = 1;
  1703. }
  1704. if (trailing_delimiter) {
  1705. raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
  1706. goto bail;
  1707. }
  1708. }
  1709. /* verify that idx < end_idx, str[idx] should be ']' */
  1710. if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
  1711. if (PyList_GET_SIZE(rval)) {
  1712. raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
  1713. } else {
  1714. raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
  1715. }
  1716. goto bail;
  1717. }
  1718. *next_idx_ptr = idx + 1;
  1719. return rval;
  1720. bail:
  1721. Py_XDECREF(val);
  1722. Py_DECREF(rval);
  1723. return NULL;
  1724. }
  1725. static PyObject *
  1726. _parse_constant(PyScannerObject *s, PyObject *pystr, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
  1727. {
  1728. /* Read a JSON constant from PyString pystr.
  1729. constant is the Python string that was found
  1730. ("NaN", "Infinity", "-Infinity").
  1731. idx is the index of the first character of the constant
  1732. *next_idx_ptr is a return-by-reference index to the first character after
  1733. the constant.
  1734. Returns the result of parse_constant
  1735. */
  1736. PyObject *rval;
  1737. if (s->parse_constant == Py_None) {
  1738. raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
  1739. return NULL;
  1740. }
  1741. /* rval = parse_constant(constant) */
  1742. rval = PyObject_CallOneArg(s->parse_constant, constant);
  1743. idx += PyString_GET_SIZE(constant);
  1744. *next_idx_ptr = idx;
  1745. return rval;
  1746. }
  1747. #if PY_MAJOR_VERSION < 3
  1748. static PyObject *
  1749. _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
  1750. {
  1751. /* Read a JSON number from PyString pystr.
  1752. idx is the index of the first character of the number
  1753. *next_idx_ptr is a return-by-reference index to the first character after
  1754. the number.
  1755. Returns a new PyObject representation of that number:
  1756. PyInt, PyLong, or PyFloat.
  1757. May return other types if parse_int or parse_float are set
  1758. */
  1759. char *str = PyString_AS_STRING(pystr);
  1760. Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
  1761. Py_ssize_t idx = start;
  1762. int is_float = 0;
  1763. PyObject *rval;
  1764. PyObject *numstr;
  1765. /* read a sign if it's there, make sure it's not the end of the string */
  1766. if (str[idx] == '-') {
  1767. if (idx >= end_idx) {
  1768. raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
  1769. return NULL;
  1770. }
  1771. idx++;
  1772. }
  1773. /* read as many integer digits as we find as long as it doesn't start with 0 */
  1774. if (str[idx] >= '1' && str[idx] <= '9') {
  1775. idx++;
  1776. while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
  1777. }
  1778. /* if it starts with 0 we only expect one integer digit */
  1779. else if (str[idx] == '0') {
  1780. idx++;
  1781. }
  1782. /* no integer digits, error */
  1783. else {
  1784. raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
  1785. return NULL;
  1786. }
  1787. /* if the next char is '.' followed by a digit then read all float digits */
  1788. if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
  1789. is_float = 1;
  1790. idx += 2;
  1791. while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
  1792. }
  1793. /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
  1794. if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
  1795. /* save the index of the 'e' or 'E' just in case we need to backtrack */
  1796. Py_ssize_t e_start = idx;
  1797. idx++;
  1798. /* read an exponent sign if present */
  1799. if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
  1800. /* read all digits */
  1801. while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
  1802. /* if we got a digit, then parse as float. if not, backtrack */
  1803. if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
  1804. is_float = 1;
  1805. }
  1806. else {
  1807. idx = e_start;
  1808. }
  1809. }
  1810. /* copy the section we determined to be a number */
  1811. numstr = PyString_FromStringAndSize(&str[start], idx - start);
  1812. if (numstr == NULL)
  1813. return NULL;
  1814. if (is_float) {
  1815. /* parse as a float using a fast path if available, otherwise call user defined method */
  1816. if (s->parse_float != (PyObject *)&PyFloat_Type) {
  1817. rval = PyObject_CallOneArg(s->parse_float, numstr);
  1818. }
  1819. else {
  1820. /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
  1821. double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
  1822. NULL, NULL);
  1823. if (d == -1.0 && PyErr_Occurred()) {
  1824. Py_DECREF(numstr);
  1825. return NULL;
  1826. }
  1827. rval = PyFloat_FromDouble(d);
  1828. }
  1829. }
  1830. else {
  1831. /* parse as an int using a fast path if available, otherwise call user defined method */
  1832. if (s->parse_int != (PyObject *)&PyInt_Type) {
  1833. rval = PyObject_CallOneArg(s->parse_int, numstr);
  1834. }
  1835. else {
  1836. rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
  1837. }
  1838. }
  1839. Py_DECREF(numstr);
  1840. *next_idx_ptr = idx;
  1841. return rval;
  1842. }
  1843. #endif /* PY_MAJOR_VERSION < 3 */
  1844. static PyObject *
  1845. _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
  1846. {
  1847. /* Read a JSON number from PyUnicode pystr.
  1848. idx is the index of the first character of the number
  1849. *next_idx_ptr is a return-by-reference index to the first character after
  1850. the number.
  1851. Returns a new PyObject representation of that number:
  1852. PyInt, PyLong, or PyFloat.
  1853. May return other types if parse_int or parse_float are set
  1854. */
  1855. PY2_UNUSED int kind = PyUnicode_KIND(pystr);
  1856. void *str = PyUnicode_DATA(pystr);
  1857. Py_ssize_t end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
  1858. Py_ssize_t idx = start;
  1859. int is_float = 0;
  1860. JSON_UNICHR c;
  1861. PyObject *rval;
  1862. PyObject *numstr;
  1863. /* read a sign if it's there, make sure it's not the end of the string */
  1864. if (PyUnicode_READ(kind, str, idx) == '-') {
  1865. if (idx >= end_idx) {
  1866. raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
  1867. return NULL;
  1868. }
  1869. idx++;
  1870. }
  1871. /* read as many integer digits as we find as long as it doesn't start with 0 */
  1872. c = PyUnicode_READ(kind, str, idx);
  1873. if (c == '0') {
  1874. /* if it starts with 0 we only expect one integer digit */
  1875. idx++;
  1876. }
  1877. else if (IS_DIGIT(c)) {
  1878. idx++;
  1879. while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) {
  1880. idx++;
  1881. }
  1882. }
  1883. else {
  1884. /* no integer digits, error */
  1885. raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
  1886. return NULL;
  1887. }
  1888. /* if the next char is '.' followed by a digit then read all float digits */
  1889. if (idx < end_idx &&
  1890. PyUnicode_READ(kind, str, idx) == '.' &&
  1891. IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) {
  1892. is_float = 1;
  1893. idx += 2;
  1894. while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++;
  1895. }
  1896. /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
  1897. if (idx < end_idx &&
  1898. (PyUnicode_READ(kind, str, idx) == 'e' ||
  1899. PyUnicode_READ(kind, str, idx) == 'E')) {
  1900. Py_ssize_t e_start = idx;
  1901. idx++;
  1902. /* read an exponent sign if present */
  1903. if (idx < end_idx &&
  1904. (PyUnicode_READ(kind, str, idx) == '-' ||
  1905. PyUnicode_READ(kind, str, idx) == '+')) idx++;
  1906. /* read all digits */
  1907. while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++;
  1908. /* if we got a digit, then parse as float. if not, backtrack */
  1909. if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) {
  1910. is_float = 1;
  1911. }
  1912. else {
  1913. idx = e_start;
  1914. }
  1915. }
  1916. /* copy the section we determined to be a number */
  1917. #if PY_MAJOR_VERSION >= 3
  1918. numstr = PyUnicode_Substring(pystr, start, idx);
  1919. #else
  1920. numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start);
  1921. #endif
  1922. if (numstr == NULL)
  1923. return NULL;
  1924. if (is_float) {
  1925. /* parse as a float using a fast path if available, otherwise call user defined method */
  1926. if (s->parse_float != (PyObject *)&PyFloat_Type) {
  1927. rval = PyObject_CallOneArg(s->parse_float, numstr);
  1928. }
  1929. else {
  1930. #if PY_MAJOR_VERSION >= 3
  1931. rval = PyFloat_FromString(numstr);
  1932. #else
  1933. rval = PyFloat_FromString(numstr, NULL);
  1934. #endif
  1935. }
  1936. }
  1937. else {
  1938. /* no fast path for unicode -> int, just call */
  1939. rval = PyObject_CallOneArg(s->parse_int, numstr);
  1940. }
  1941. Py_DECREF(numstr);
  1942. *next_idx_ptr = idx;
  1943. return rval;
  1944. }
  1945. #if PY_MAJOR_VERSION < 3
  1946. static PyObject *
  1947. scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
  1948. {
  1949. /* Read one JSON term (of any kind) from PyString pystr.
  1950. idx is the index of the first character of the term
  1951. *next_idx_ptr is a return-by-reference index to the first character after
  1952. the number.
  1953. Returns a new PyObject representation of the term.
  1954. */
  1955. char *str = PyString_AS_STRING(pystr);
  1956. Py_ssize_t length = PyString_GET_SIZE(pystr);
  1957. PyObject *rval = NULL;
  1958. int fallthrough = 0;
  1959. if (idx < 0 || idx >= length) {
  1960. raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
  1961. return NULL;
  1962. }
  1963. switch (str[idx]) {
  1964. case '"':
  1965. /* string */
  1966. rval = scanstring_str(pystr, idx + 1,
  1967. PyString_AS_STRING(s->encoding),
  1968. s->strict,
  1969. next_idx_ptr);
  1970. break;
  1971. case '{':
  1972. /* object */
  1973. if (Py_EnterRecursiveCall(" while decoding a JSON object "
  1974. "from a string"))
  1975. return NULL;
  1976. rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
  1977. Py_LeaveRecursiveCall();
  1978. break;
  1979. case '[':
  1980. /* array */
  1981. if (Py_EnterRecursiveCall(" while decoding a JSON array "
  1982. "from a string"))
  1983. return NULL;
  1984. rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
  1985. Py_LeaveRecursiveCall();
  1986. break;
  1987. case 'n':
  1988. /* null */
  1989. if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
  1990. Py_INCREF(Py_None);
  1991. *next_idx_ptr = idx + 4;
  1992. rval = Py_None;
  1993. }
  1994. else
  1995. fallthrough = 1;
  1996. break;
  1997. case 't':
  1998. /* true */
  1999. if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
  2000. Py_INCREF(Py_True);
  2001. *next_idx_ptr = idx + 4;
  2002. rval = Py_True;
  2003. }
  2004. else
  2005. fallthrough = 1;
  2006. break;
  2007. case 'f':
  2008. /* false */
  2009. if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
  2010. Py_INCREF(Py_False);
  2011. *next_idx_ptr = idx + 5;
  2012. rval = Py_False;
  2013. }
  2014. else
  2015. fallthrough = 1;
  2016. break;
  2017. case 'N':
  2018. /* NaN */
  2019. if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
  2020. rval = _parse_constant(s, pystr, JSON_NaN, idx, next_idx_ptr);
  2021. }
  2022. else
  2023. fallthrough = 1;
  2024. break;
  2025. case 'I':
  2026. /* Infinity */
  2027. if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
  2028. rval = _parse_constant(s, pystr, JSON_Infinity, idx, next_idx_ptr);
  2029. }
  2030. else
  2031. fallthrough = 1;
  2032. break;
  2033. case '-':
  2034. /* -Infinity */
  2035. if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
  2036. rval = _parse_constant(s, pystr, JSON_NegInfinity, idx, next_idx_ptr);
  2037. }
  2038. else
  2039. fallthrough = 1;
  2040. break;
  2041. default:
  2042. fallthrough = 1;
  2043. }
  2044. /* Didn't find a string, object, array, or named constant. Look for a number. */
  2045. if (fallthrough)
  2046. rval = _match_number_str(s, pystr, idx, next_idx_ptr);
  2047. return rval;
  2048. }
  2049. #endif /* PY_MAJOR_VERSION < 3 */
  2050. static PyObject *
  2051. scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
  2052. {
  2053. /* Read one JSON term (of any kind) from PyUnicode pystr.
  2054. idx is the index of the first character of the term
  2055. *next_idx_ptr is a return-by-reference index to the first character after
  2056. the number.
  2057. Returns a new PyObject representation of the term.
  2058. */
  2059. PY2_UNUSED int kind = PyUnicode_KIND(pystr);
  2060. void *str = PyUnicode_DATA(pystr);
  2061. Py_ssize_t length = PyUnicode_GET_LENGTH(pystr);
  2062. PyObject *rval = NULL;
  2063. int fallthrough = 0;
  2064. if (idx < 0 || idx >= length) {
  2065. raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
  2066. return NULL;
  2067. }
  2068. switch (PyUnicode_READ(kind, str, idx)) {
  2069. case '"':
  2070. /* string */
  2071. rval = scanstring_unicode(pystr, idx + 1,
  2072. s->strict,
  2073. next_idx_ptr);
  2074. break;
  2075. case '{':
  2076. /* object */
  2077. if (Py_EnterRecursiveCall(" while decoding a JSON object "
  2078. "from a unicode string"))
  2079. return NULL;
  2080. rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
  2081. Py_LeaveRecursiveCall();
  2082. break;
  2083. case '[':
  2084. /* array */
  2085. if (Py_EnterRecursiveCall(" while decoding a JSON array "
  2086. "from a unicode string"))
  2087. return NULL;
  2088. rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
  2089. Py_LeaveRecursiveCall();
  2090. break;
  2091. case 'n':
  2092. /* null */
  2093. if ((idx + 3 < length) &&
  2094. PyUnicode_READ(kind, str, idx + 1) == 'u' &&
  2095. PyUnicode_READ(kind, str, idx + 2) == 'l' &&
  2096. PyUnicode_READ(kind, str, idx + 3) == 'l') {
  2097. Py_INCREF(Py_None);
  2098. *next_idx_ptr = idx + 4;
  2099. rval = Py_None;
  2100. }
  2101. else
  2102. fallthrough = 1;
  2103. break;
  2104. case 't':
  2105. /* true */
  2106. if ((idx + 3 < length) &&
  2107. PyUnicode_READ(kind, str, idx + 1) == 'r' &&
  2108. PyUnicode_READ(kind, str, idx + 2) == 'u' &&
  2109. PyUnicode_READ(kind, str, idx + 3) == 'e') {
  2110. Py_INCREF(Py_True);
  2111. *next_idx_ptr = idx + 4;
  2112. rval = Py_True;
  2113. }
  2114. else
  2115. fallthrough = 1;
  2116. break;
  2117. case 'f':
  2118. /* false */
  2119. if ((idx + 4 < length) &&
  2120. PyUnicode_READ(kind, str, idx + 1) == 'a' &&
  2121. PyUnicode_READ(kind, str, idx + 2) == 'l' &&
  2122. PyUnicode_READ(kind, str, idx + 3) == 's' &&
  2123. PyUnicode_READ(kind, str, idx + 4) == 'e') {
  2124. Py_INCREF(Py_False);
  2125. *next_idx_ptr = idx + 5;
  2126. rval = Py_False;
  2127. }
  2128. else
  2129. fallthrough = 1;
  2130. break;
  2131. case 'N':
  2132. /* NaN */
  2133. if ((idx + 2 < length) &&
  2134. PyUnicode_READ(kind, str, idx + 1) == 'a' &&
  2135. PyUnicode_READ(kind, str, idx + 2) == 'N') {
  2136. rval = _parse_constant(s, pystr, JSON_NaN, idx, next_idx_ptr);
  2137. }
  2138. else
  2139. fallthrough = 1;
  2140. break;
  2141. case 'I':
  2142. /* Infinity */
  2143. if ((idx + 7 < length) &&
  2144. PyUnicode_READ(kind, str, idx + 1) == 'n' &&
  2145. PyUnicode_READ(kind, str, idx + 2) == 'f' &&
  2146. PyUnicode_READ(kind, str, idx + 3) == 'i' &&
  2147. PyUnicode_READ(kind, str, idx + 4) == 'n' &&
  2148. PyUnicode_READ(kind, str, idx + 5) == 'i' &&
  2149. PyUnicode_READ(kind, str, idx + 6) == 't' &&
  2150. PyUnicode_READ(kind, str, idx + 7) == 'y') {
  2151. rval = _parse_constant(s, pystr, JSON_Infinity, idx, next_idx_ptr);
  2152. }
  2153. else
  2154. fallthrough = 1;
  2155. break;
  2156. case '-':
  2157. /* -Infinity */
  2158. if ((idx + 8 < length) &&
  2159. PyUnicode_READ(kind, str, idx + 1) == 'I' &&
  2160. PyUnicode_READ(kind, str, idx + 2) == 'n' &&
  2161. PyUnicode_READ(kind, str, idx + 3) == 'f' &&
  2162. PyUnicode_READ(kind, str, idx + 4) == 'i' &&
  2163. PyUnicode_READ(kind, str, idx + 5) == 'n' &&
  2164. PyUnicode_READ(kind, str, idx + 6) == 'i' &&
  2165. PyUnicode_READ(kind, str, idx + 7) == 't' &&
  2166. PyUnicode_READ(kind, str, idx + 8) == 'y') {
  2167. rval = _parse_constant(s, pystr, JSON_NegInfinity, idx, next_idx_ptr);
  2168. }
  2169. else
  2170. fallthrough = 1;
  2171. break;
  2172. default:
  2173. fallthrough = 1;
  2174. }
  2175. /* Didn't find a string, object, array, or named constant. Look for a number. */
  2176. if (fallthrough)
  2177. rval = _match_number_unicode(s, pystr, idx, next_idx_ptr);
  2178. return rval;
  2179. }
  2180. static PyObject *
  2181. scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
  2182. {
  2183. /* Python callable interface to scan_once_{str,unicode} */
  2184. PyObject *pystr;
  2185. PyObject *rval;
  2186. Py_ssize_t idx;
  2187. Py_ssize_t next_idx = -1;
  2188. static char *kwlist[] = {"string", "idx", NULL};
  2189. PyScannerObject *s;
  2190. assert(PyScanner_Check(self));
  2191. s = (PyScannerObject *)self;
  2192. if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
  2193. return NULL;
  2194. if (PyUnicode_Check(pystr)) {
  2195. if (PyUnicode_READY(pystr))
  2196. return NULL;
  2197. rval = scan_once_unicode(s, pystr, idx, &next_idx);
  2198. }
  2199. #if PY_MAJOR_VERSION < 3
  2200. else if (PyString_Check(pystr)) {
  2201. rval = scan_once_str(s, pystr, idx, &next_idx);
  2202. }
  2203. #endif /* PY_MAJOR_VERSION < 3 */
  2204. else {
  2205. PyErr_Format(PyExc_TypeError,
  2206. "first argument must be a string, not %.80s",
  2207. Py_TYPE(pystr)->tp_name);
  2208. return NULL;
  2209. }
  2210. PyDict_Clear(s->memo);
  2211. return _build_rval_index_tuple(rval, next_idx);
  2212. }
  2213. static PyObject *
  2214. JSON_ParseEncoding(PyObject *encoding)
  2215. {
  2216. if (encoding == Py_None)
  2217. return JSON_InternFromString(DEFAULT_ENCODING);
  2218. #if PY_MAJOR_VERSION >= 3
  2219. if (PyUnicode_Check(encoding)) {
  2220. if (PyUnicode_AsUTF8(encoding) == NULL) {
  2221. return NULL;
  2222. }
  2223. Py_INCREF(encoding);
  2224. return encoding;
  2225. }
  2226. #else /* PY_MAJOR_VERSION >= 3 */
  2227. if (PyString_Check(encoding)) {
  2228. Py_INCREF(encoding);
  2229. return encoding;
  2230. }
  2231. if (PyUnicode_Check(encoding))
  2232. return PyUnicode_AsEncodedString(encoding, NULL, NULL);
  2233. #endif /* PY_MAJOR_VERSION >= 3 */
  2234. PyErr_SetString(PyExc_TypeError, "encoding must be a string");
  2235. return NULL;
  2236. }
  2237. static PyObject *
  2238. scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  2239. {
  2240. /* Initialize Scanner object */
  2241. PyObject *ctx;
  2242. static char *kwlist[] = {"context", NULL};
  2243. PyScannerObject *s;
  2244. PyObject *encoding;
  2245. if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
  2246. return NULL;
  2247. s = (PyScannerObject *)type->tp_alloc(type, 0);
  2248. if (s == NULL)
  2249. return NULL;
  2250. if (s->memo == NULL) {
  2251. s->memo = PyDict_New();
  2252. if (s->memo == NULL)
  2253. goto bail;
  2254. }
  2255. encoding = PyObject_GetAttrString(ctx, "encoding");
  2256. if (encoding == NULL)
  2257. goto bail;
  2258. s->encoding = JSON_ParseEncoding(encoding);
  2259. Py_XDECREF(encoding);
  2260. if (s->encoding == NULL)
  2261. goto bail;
  2262. /* All of these will fail "gracefully" so we don't need to verify them */
  2263. s->strict_bool = PyObject_GetAttrString(ctx, "strict");
  2264. if (s->strict_bool == NULL)
  2265. goto bail;
  2266. s->strict = PyObject_IsTrue(s->strict_bool);
  2267. if (s->strict < 0)
  2268. goto bail;
  2269. s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
  2270. if (s->object_hook == NULL)
  2271. goto bail;
  2272. s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
  2273. if (s->pairs_hook == NULL)
  2274. goto bail;
  2275. s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
  2276. if (s->parse_float == NULL)
  2277. goto bail;
  2278. s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
  2279. if (s->parse_int == NULL)
  2280. goto bail;
  2281. s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
  2282. if (s->parse_constant == NULL)
  2283. goto bail;
  2284. return (PyObject *)s;
  2285. bail:
  2286. Py_DECREF(s);
  2287. return NULL;
  2288. }
  2289. PyDoc_STRVAR(scanner_doc, "JSON scanner object");
  2290. static
  2291. PyTypeObject PyScannerType = {
  2292. PyVarObject_HEAD_INIT(NULL, 0)
  2293. "simplejson._speedups.Scanner", /* tp_name */
  2294. sizeof(PyScannerObject), /* tp_basicsize */
  2295. 0, /* tp_itemsize */
  2296. scanner_dealloc, /* tp_dealloc */
  2297. 0, /* tp_print */
  2298. 0, /* tp_getattr */
  2299. 0, /* tp_setattr */
  2300. 0, /* tp_compare */
  2301. 0, /* tp_repr */
  2302. 0, /* tp_as_number */
  2303. 0, /* tp_as_sequence */
  2304. 0, /* tp_as_mapping */
  2305. 0, /* tp_hash */
  2306. scanner_call, /* tp_call */
  2307. 0, /* tp_str */
  2308. 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
  2309. 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
  2310. 0, /* tp_as_buffer */
  2311. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
  2312. scanner_doc, /* tp_doc */
  2313. scanner_traverse, /* tp_traverse */
  2314. scanner_clear, /* tp_clear */
  2315. 0, /* tp_richcompare */
  2316. 0, /* tp_weaklistoffset */
  2317. 0, /* tp_iter */
  2318. 0, /* tp_iternext */
  2319. 0, /* tp_methods */
  2320. scanner_members, /* tp_members */
  2321. 0, /* tp_getset */
  2322. 0, /* tp_base */
  2323. 0, /* tp_dict */
  2324. 0, /* tp_descr_get */
  2325. 0, /* tp_descr_set */
  2326. 0, /* tp_dictoffset */
  2327. 0, /* tp_init */
  2328. 0,/* PyType_GenericAlloc, */ /* tp_alloc */
  2329. scanner_new, /* tp_new */
  2330. 0,/* PyObject_GC_Del, */ /* tp_free */
  2331. };
  2332. static PyObject *
  2333. encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  2334. {
  2335. static char *kwlist[] = {
  2336. "markers",
  2337. "default",
  2338. "encoder",
  2339. "indent",
  2340. "key_separator",
  2341. "item_separator",
  2342. "sort_keys",
  2343. "skipkeys",
  2344. "allow_nan",
  2345. "key_memo",
  2346. "use_decimal",
  2347. "namedtuple_as_object",
  2348. "tuple_as_array",
  2349. "int_as_string_bitcount",
  2350. "item_sort_key",
  2351. "encoding",
  2352. "for_json",
  2353. "ignore_nan",
  2354. "Decimal",
  2355. "iterable_as_array",
  2356. NULL};
  2357. PyEncoderObject *s;
  2358. PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
  2359. PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo;
  2360. PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array, *iterable_as_array;
  2361. PyObject *int_as_string_bitcount, *item_sort_key, *encoding, *for_json;
  2362. PyObject *ignore_nan, *Decimal;
  2363. int is_true;
  2364. if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOOO:make_encoder", kwlist,
  2365. &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
  2366. &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal,
  2367. &namedtuple_as_object, &tuple_as_array,
  2368. &int_as_string_bitcount, &item_sort_key, &encoding, &for_json,
  2369. &ignore_nan, &Decimal, &iterable_as_array))
  2370. return NULL;
  2371. s = (PyEncoderObject *)type->tp_alloc(type, 0);
  2372. if (s == NULL)
  2373. return NULL;
  2374. Py_INCREF(markers);
  2375. s->markers = markers;
  2376. Py_INCREF(defaultfn);
  2377. s->defaultfn = defaultfn;
  2378. Py_INCREF(encoder);
  2379. s->encoder = encoder;
  2380. #if PY_MAJOR_VERSION >= 3
  2381. if (encoding == Py_None) {
  2382. s->encoding = NULL;
  2383. }
  2384. else
  2385. #endif /* PY_MAJOR_VERSION >= 3 */
  2386. {
  2387. s->encoding = JSON_ParseEncoding(encoding);
  2388. if (s->encoding == NULL)
  2389. goto bail;
  2390. }
  2391. Py_INCREF(indent);
  2392. s->indent = indent;
  2393. Py_INCREF(key_separator);
  2394. s->key_separator = key_separator;
  2395. Py_INCREF(item_separator);
  2396. s->item_separator = item_separator;
  2397. Py_INCREF(skipkeys);
  2398. s->skipkeys_bool = skipkeys;
  2399. s->skipkeys = PyObject_IsTrue(skipkeys);
  2400. if (s->skipkeys < 0)
  2401. goto bail;
  2402. Py_INCREF(key_memo);
  2403. s->key_memo = key_memo;
  2404. s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
  2405. is_true = PyObject_IsTrue(ignore_nan);
  2406. if (is_true < 0)
  2407. goto bail;
  2408. s->allow_or_ignore_nan = is_true ? JSON_IGNORE_NAN : 0;
  2409. is_true = PyObject_IsTrue(allow_nan);
  2410. if (is_true < 0)
  2411. goto bail;
  2412. s->allow_or_ignore_nan |= is_true ? JSON_ALLOW_NAN : 0;
  2413. s->use_decimal = PyObject_IsTrue(use_decimal);
  2414. if (s->use_decimal < 0)
  2415. goto bail;
  2416. s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object);
  2417. if (s->namedtuple_as_object < 0)
  2418. goto bail;
  2419. s->tuple_as_array = PyObject_IsTrue(tuple_as_array);
  2420. if (s->tuple_as_array < 0)
  2421. goto bail;
  2422. s->iterable_as_array = PyObject_IsTrue(iterable_as_array);
  2423. if (s->iterable_as_array < 0)
  2424. goto bail;
  2425. if (PyInt_Check(int_as_string_bitcount) || PyLong_Check(int_as_string_bitcount)) {
  2426. static const unsigned long long_long_bitsize = SIZEOF_LONG_LONG * 8;
  2427. long int_as_string_bitcount_val = PyLong_AsLong(int_as_string_bitcount);
  2428. if (int_as_string_bitcount_val > 0 && int_as_string_bitcount_val < (long)long_long_bitsize) {
  2429. s->max_long_size = PyLong_FromUnsignedLongLong(1ULL << (int)int_as_string_bitcount_val);
  2430. s->min_long_size = PyLong_FromLongLong(-1LL << (int)int_as_string_bitcount_val);
  2431. if (s->min_long_size == NULL || s->max_long_size == NULL) {
  2432. goto bail;
  2433. }
  2434. }
  2435. else {
  2436. PyErr_Format(PyExc_TypeError,
  2437. "int_as_string_bitcount (%ld) must be greater than 0 and less than the number of bits of a `long long` type (%lu bits)",
  2438. int_as_string_bitcount_val, long_long_bitsize);
  2439. goto bail;
  2440. }
  2441. }
  2442. else if (int_as_string_bitcount == Py_None) {
  2443. Py_INCREF(Py_None);
  2444. s->max_long_size = Py_None;
  2445. Py_INCREF(Py_None);
  2446. s->min_long_size = Py_None;
  2447. }
  2448. else {
  2449. PyErr_SetString(PyExc_TypeError, "int_as_string_bitcount must be None or an integer");
  2450. goto bail;
  2451. }
  2452. if (item_sort_key != Py_None) {
  2453. if (!PyCallable_Check(item_sort_key)) {
  2454. PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable");
  2455. goto bail;
  2456. }
  2457. }
  2458. else {
  2459. is_true = PyObject_IsTrue(sort_keys);
  2460. if (is_true < 0)
  2461. goto bail;
  2462. if (is_true) {
  2463. static PyObject *itemgetter0 = NULL;
  2464. if (!itemgetter0) {
  2465. PyObject *operator = PyImport_ImportModule("operator");
  2466. if (!operator)
  2467. goto bail;
  2468. itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0);
  2469. Py_DECREF(operator);
  2470. }
  2471. item_sort_key = itemgetter0;
  2472. if (!item_sort_key)
  2473. goto bail;
  2474. }
  2475. }
  2476. if (item_sort_key == Py_None) {
  2477. Py_INCREF(Py_None);
  2478. s->item_sort_kw = Py_None;
  2479. }
  2480. else {
  2481. s->item_sort_kw = PyDict_New();
  2482. if (s->item_sort_kw == NULL)
  2483. goto bail;
  2484. if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key))
  2485. goto bail;
  2486. }
  2487. Py_INCREF(sort_keys);
  2488. s->sort_keys = sort_keys;
  2489. Py_INCREF(item_sort_key);
  2490. s->item_sort_key = item_sort_key;
  2491. Py_INCREF(Decimal);
  2492. s->Decimal = Decimal;
  2493. s->for_json = PyObject_IsTrue(for_json);
  2494. if (s->for_json < 0)
  2495. goto bail;
  2496. return (PyObject *)s;
  2497. bail:
  2498. Py_DECREF(s);
  2499. return NULL;
  2500. }
  2501. static PyObject *
  2502. encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
  2503. {
  2504. /* Python callable interface to encode_listencode_obj */
  2505. static char *kwlist[] = {"obj", "_current_indent_level", NULL};
  2506. PyObject *obj;
  2507. Py_ssize_t indent_level;
  2508. PyEncoderObject *s;
  2509. JSON_Accu rval;
  2510. assert(PyEncoder_Check(self));
  2511. s = (PyEncoderObject *)self;
  2512. if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
  2513. &obj, _convertPyInt_AsSsize_t, &indent_level))
  2514. return NULL;
  2515. if (JSON_Accu_Init(&rval))
  2516. return NULL;
  2517. if (encoder_listencode_obj(s, &rval, obj, indent_level)) {
  2518. JSON_Accu_Destroy(&rval);
  2519. return NULL;
  2520. }
  2521. return JSON_Accu_FinishAsList(&rval);
  2522. }
  2523. static PyObject *
  2524. _encoded_const(PyObject *obj)
  2525. {
  2526. /* Return the JSON string representation of None, True, False */
  2527. if (obj == Py_None) {
  2528. static PyObject *s_null = NULL;
  2529. if (s_null == NULL) {
  2530. s_null = JSON_InternFromString("null");
  2531. }
  2532. Py_INCREF(s_null);
  2533. return s_null;
  2534. }
  2535. else if (obj == Py_True) {
  2536. static PyObject *s_true = NULL;
  2537. if (s_true == NULL) {
  2538. s_true = JSON_InternFromString("true");
  2539. }
  2540. Py_INCREF(s_true);
  2541. return s_true;
  2542. }
  2543. else if (obj == Py_False) {
  2544. static PyObject *s_false = NULL;
  2545. if (s_false == NULL) {
  2546. s_false = JSON_InternFromString("false");
  2547. }
  2548. Py_INCREF(s_false);
  2549. return s_false;
  2550. }
  2551. else {
  2552. PyErr_SetString(PyExc_ValueError, "not a const");
  2553. return NULL;
  2554. }
  2555. }
  2556. static PyObject *
  2557. encoder_encode_float(PyEncoderObject *s, PyObject *obj)
  2558. {
  2559. /* Return the JSON representation of a PyFloat */
  2560. double i = PyFloat_AS_DOUBLE(obj);
  2561. if (!Py_IS_FINITE(i)) {
  2562. if (!s->allow_or_ignore_nan) {
  2563. PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
  2564. return NULL;
  2565. }
  2566. if (s->allow_or_ignore_nan & JSON_IGNORE_NAN) {
  2567. return _encoded_const(Py_None);
  2568. }
  2569. /* JSON_ALLOW_NAN is set */
  2570. else if (i > 0) {
  2571. Py_INCREF(JSON_Infinity);
  2572. return JSON_Infinity;
  2573. }
  2574. else if (i < 0) {
  2575. Py_INCREF(JSON_NegInfinity);
  2576. return JSON_NegInfinity;
  2577. }
  2578. else {
  2579. Py_INCREF(JSON_NaN);
  2580. return JSON_NaN;
  2581. }
  2582. }
  2583. /* Use a better float format here? */
  2584. if (PyFloat_CheckExact(obj)) {
  2585. return PyObject_Repr(obj);
  2586. }
  2587. else {
  2588. /* See #118, do not trust custom str/repr */
  2589. PyObject *res;
  2590. PyObject *tmp = PyObject_CallOneArg((PyObject *)&PyFloat_Type, obj);
  2591. if (tmp == NULL) {
  2592. return NULL;
  2593. }
  2594. res = PyObject_Repr(tmp);
  2595. Py_DECREF(tmp);
  2596. return res;
  2597. }
  2598. }
  2599. static PyObject *
  2600. encoder_encode_string(PyEncoderObject *s, PyObject *obj)
  2601. {
  2602. /* Return the JSON representation of a string */
  2603. PyObject *encoded;
  2604. if (s->fast_encode) {
  2605. return py_encode_basestring_ascii(NULL, obj);
  2606. }
  2607. encoded = PyObject_CallOneArg(s->encoder, obj);
  2608. if (encoded != NULL &&
  2609. #if PY_MAJOR_VERSION < 3
  2610. !PyString_Check(encoded) &&
  2611. #endif /* PY_MAJOR_VERSION < 3 */
  2612. !PyUnicode_Check(encoded))
  2613. {
  2614. PyErr_Format(PyExc_TypeError,
  2615. "encoder() must return a string, not %.80s",
  2616. Py_TYPE(encoded)->tp_name);
  2617. Py_DECREF(encoded);
  2618. return NULL;
  2619. }
  2620. return encoded;
  2621. }
  2622. static int
  2623. _steal_accumulate(JSON_Accu *accu, PyObject *stolen)
  2624. {
  2625. /* Append stolen and then decrement its reference count */
  2626. int rval = JSON_Accu_Accumulate(accu, stolen);
  2627. Py_DECREF(stolen);
  2628. return rval;
  2629. }
  2630. static int
  2631. encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level)
  2632. {
  2633. /* Encode Python object obj to a JSON term, rval is a PyList */
  2634. int rv = -1;
  2635. do {
  2636. PyObject *newobj;
  2637. if (obj == Py_None || obj == Py_True || obj == Py_False) {
  2638. PyObject *cstr = _encoded_const(obj);
  2639. if (cstr != NULL)
  2640. rv = _steal_accumulate(rval, cstr);
  2641. }
  2642. else if ((PyBytes_Check(obj) && s->encoding != NULL) ||
  2643. PyUnicode_Check(obj))
  2644. {
  2645. PyObject *encoded = encoder_encode_string(s, obj);
  2646. if (encoded != NULL)
  2647. rv = _steal_accumulate(rval, encoded);
  2648. }
  2649. else if (PyInt_Check(obj) || PyLong_Check(obj)) {
  2650. PyObject *encoded;
  2651. if (PyInt_CheckExact(obj) || PyLong_CheckExact(obj)) {
  2652. encoded = PyObject_Str(obj);
  2653. }
  2654. else {
  2655. /* See #118, do not trust custom str/repr */
  2656. PyObject *tmp = PyObject_CallOneArg((PyObject *)&PyLong_Type, obj);
  2657. if (tmp == NULL) {
  2658. encoded = NULL;
  2659. }
  2660. else {
  2661. encoded = PyObject_Str(tmp);
  2662. Py_DECREF(tmp);
  2663. }
  2664. }
  2665. if (encoded != NULL) {
  2666. encoded = maybe_quote_bigint(s, encoded, obj);
  2667. if (encoded == NULL)
  2668. break;
  2669. rv = _steal_accumulate(rval, encoded);
  2670. }
  2671. }
  2672. else if (PyFloat_Check(obj)) {
  2673. PyObject *encoded = encoder_encode_float(s, obj);
  2674. if (encoded != NULL)
  2675. rv = _steal_accumulate(rval, encoded);
  2676. }
  2677. else if (s->for_json && _call_json_method(obj, FOR_JSON_METHOD_NAME, &newobj)) {
  2678. if (newobj == NULL) {
  2679. return -1;
  2680. }
  2681. if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
  2682. Py_DECREF(newobj);
  2683. return rv;
  2684. }
  2685. rv = encoder_listencode_obj(s, rval, newobj, indent_level);
  2686. Py_DECREF(newobj);
  2687. Py_LeaveRecursiveCall();
  2688. }
  2689. else if (s->namedtuple_as_object && _call_json_method(obj, ASDICT_METHOD_NAME, &newobj)) {
  2690. if (newobj == NULL) {
  2691. return -1;
  2692. }
  2693. if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
  2694. Py_DECREF(newobj);
  2695. return rv;
  2696. }
  2697. if (PyDict_Check(newobj)) {
  2698. rv = encoder_listencode_dict(s, rval, newobj, indent_level);
  2699. } else {
  2700. PyErr_Format(
  2701. PyExc_TypeError,
  2702. "_asdict() must return a dict, not %.80s",
  2703. Py_TYPE(newobj)->tp_name
  2704. );
  2705. rv = -1;
  2706. }
  2707. Py_DECREF(newobj);
  2708. Py_LeaveRecursiveCall();
  2709. }
  2710. else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) {
  2711. if (Py_EnterRecursiveCall(" while encoding a JSON object"))
  2712. return rv;
  2713. rv = encoder_listencode_list(s, rval, obj, indent_level);
  2714. Py_LeaveRecursiveCall();
  2715. }
  2716. else if (PyDict_Check(obj)) {
  2717. if (Py_EnterRecursiveCall(" while encoding a JSON object"))
  2718. return rv;
  2719. rv = encoder_listencode_dict(s, rval, obj, indent_level);
  2720. Py_LeaveRecursiveCall();
  2721. }
  2722. else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) {
  2723. PyObject *encoded = PyObject_Str(obj);
  2724. if (encoded != NULL)
  2725. rv = _steal_accumulate(rval, encoded);
  2726. }
  2727. else if (is_raw_json(obj))
  2728. {
  2729. PyObject *encoded = PyObject_GetAttrString(obj, "encoded_json");
  2730. if (encoded != NULL)
  2731. rv = _steal_accumulate(rval, encoded);
  2732. }
  2733. else {
  2734. PyObject *ident = NULL;
  2735. PyObject *newobj;
  2736. if (s->iterable_as_array) {
  2737. newobj = PyObject_GetIter(obj);
  2738. if (newobj == NULL)
  2739. PyErr_Clear();
  2740. else {
  2741. rv = encoder_listencode_list(s, rval, newobj, indent_level);
  2742. Py_DECREF(newobj);
  2743. break;
  2744. }
  2745. }
  2746. if (s->markers != Py_None) {
  2747. int has_key;
  2748. ident = PyLong_FromVoidPtr(obj);
  2749. if (ident == NULL)
  2750. break;
  2751. has_key = PyDict_Contains(s->markers, ident);
  2752. if (has_key) {
  2753. if (has_key != -1)
  2754. PyErr_SetString(PyExc_ValueError, "Circular reference detected");
  2755. Py_DECREF(ident);
  2756. break;
  2757. }
  2758. if (PyDict_SetItem(s->markers, ident, obj)) {
  2759. Py_DECREF(ident);
  2760. break;
  2761. }
  2762. }
  2763. if (Py_EnterRecursiveCall(" while encoding a JSON object"))
  2764. return rv;
  2765. newobj = PyObject_CallOneArg(s->defaultfn, obj);
  2766. if (newobj == NULL) {
  2767. Py_XDECREF(ident);
  2768. Py_LeaveRecursiveCall();
  2769. break;
  2770. }
  2771. rv = encoder_listencode_obj(s, rval, newobj, indent_level);
  2772. Py_LeaveRecursiveCall();
  2773. Py_DECREF(newobj);
  2774. if (rv) {
  2775. Py_XDECREF(ident);
  2776. rv = -1;
  2777. }
  2778. else if (ident != NULL) {
  2779. if (PyDict_DelItem(s->markers, ident)) {
  2780. Py_XDECREF(ident);
  2781. rv = -1;
  2782. }
  2783. Py_XDECREF(ident);
  2784. }
  2785. }
  2786. } while (0);
  2787. return rv;
  2788. }
  2789. static int
  2790. encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level)
  2791. {
  2792. /* Encode Python dict dct a JSON term */
  2793. static PyObject *open_dict = NULL;
  2794. static PyObject *close_dict = NULL;
  2795. static PyObject *empty_dict = NULL;
  2796. PyObject *kstr = NULL;
  2797. PyObject *ident = NULL;
  2798. PyObject *iter = NULL;
  2799. PyObject *item = NULL;
  2800. PyObject *items = NULL;
  2801. PyObject *encoded = NULL;
  2802. Py_ssize_t idx;
  2803. if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
  2804. open_dict = JSON_InternFromString("{");
  2805. close_dict = JSON_InternFromString("}");
  2806. empty_dict = JSON_InternFromString("{}");
  2807. if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
  2808. return -1;
  2809. }
  2810. if (PyDict_Size(dct) == 0)
  2811. return JSON_Accu_Accumulate(rval, empty_dict);
  2812. if (s->markers != Py_None) {
  2813. int has_key;
  2814. ident = PyLong_FromVoidPtr(dct);
  2815. if (ident == NULL)
  2816. goto bail;
  2817. has_key = PyDict_Contains(s->markers, ident);
  2818. if (has_key) {
  2819. if (has_key != -1)
  2820. PyErr_SetString(PyExc_ValueError, "Circular reference detected");
  2821. goto bail;
  2822. }
  2823. if (PyDict_SetItem(s->markers, ident, dct)) {
  2824. goto bail;
  2825. }
  2826. }
  2827. if (JSON_Accu_Accumulate(rval, open_dict))
  2828. goto bail;
  2829. if (s->indent != Py_None) {
  2830. /* TODO: DOES NOT RUN */
  2831. indent_level += 1;
  2832. /*
  2833. newline_indent = '\n' + (_indent * _current_indent_level)
  2834. separator = _item_separator + newline_indent
  2835. buf += newline_indent
  2836. */
  2837. }
  2838. iter = encoder_dict_iteritems(s, dct);
  2839. if (iter == NULL)
  2840. goto bail;
  2841. idx = 0;
  2842. while ((item = PyIter_Next(iter))) {
  2843. PyObject *encoded, *key, *value;
  2844. if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
  2845. PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
  2846. goto bail;
  2847. }
  2848. key = PyTuple_GET_ITEM(item, 0);
  2849. if (key == NULL)
  2850. goto bail;
  2851. value = PyTuple_GET_ITEM(item, 1);
  2852. if (value == NULL)
  2853. goto bail;
  2854. encoded = PyDict_GetItem(s->key_memo, key);
  2855. if (encoded != NULL) {
  2856. Py_INCREF(encoded);
  2857. } else {
  2858. kstr = encoder_stringify_key(s, key);
  2859. if (kstr == NULL)
  2860. goto bail;
  2861. else if (kstr == Py_None) {
  2862. /* skipkeys */
  2863. Py_DECREF(item);
  2864. Py_DECREF(kstr);
  2865. continue;
  2866. }
  2867. }
  2868. if (idx) {
  2869. if (JSON_Accu_Accumulate(rval, s->item_separator))
  2870. goto bail;
  2871. }
  2872. if (encoded == NULL) {
  2873. encoded = encoder_encode_string(s, kstr);
  2874. Py_CLEAR(kstr);
  2875. if (encoded == NULL)
  2876. goto bail;
  2877. if (PyDict_SetItem(s->key_memo, key, encoded))
  2878. goto bail;
  2879. }
  2880. if (JSON_Accu_Accumulate(rval, encoded)) {
  2881. goto bail;
  2882. }
  2883. Py_CLEAR(encoded);
  2884. if (JSON_Accu_Accumulate(rval, s->key_separator))
  2885. goto bail;
  2886. if (encoder_listencode_obj(s, rval, value, indent_level))
  2887. goto bail;
  2888. Py_CLEAR(item);
  2889. idx += 1;
  2890. }
  2891. Py_CLEAR(iter);
  2892. if (PyErr_Occurred())
  2893. goto bail;
  2894. if (ident != NULL) {
  2895. if (PyDict_DelItem(s->markers, ident))
  2896. goto bail;
  2897. Py_CLEAR(ident);
  2898. }
  2899. if (s->indent != Py_None) {
  2900. /* TODO: DOES NOT RUN */
  2901. indent_level -= 1;
  2902. /*
  2903. yield '\n' + (_indent * _current_indent_level)
  2904. */
  2905. }
  2906. if (JSON_Accu_Accumulate(rval, close_dict))
  2907. goto bail;
  2908. return 0;
  2909. bail:
  2910. Py_XDECREF(encoded);
  2911. Py_XDECREF(items);
  2912. Py_XDECREF(item);
  2913. Py_XDECREF(iter);
  2914. Py_XDECREF(kstr);
  2915. Py_XDECREF(ident);
  2916. return -1;
  2917. }
  2918. static int
  2919. encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level)
  2920. {
  2921. /* Encode Python list seq to a JSON term */
  2922. static PyObject *open_array = NULL;
  2923. static PyObject *close_array = NULL;
  2924. static PyObject *empty_array = NULL;
  2925. PyObject *ident = NULL;
  2926. PyObject *iter = NULL;
  2927. PyObject *obj = NULL;
  2928. int is_true;
  2929. int i = 0;
  2930. if (open_array == NULL || close_array == NULL || empty_array == NULL) {
  2931. open_array = JSON_InternFromString("[");
  2932. close_array = JSON_InternFromString("]");
  2933. empty_array = JSON_InternFromString("[]");
  2934. if (open_array == NULL || close_array == NULL || empty_array == NULL)
  2935. return -1;
  2936. }
  2937. ident = NULL;
  2938. is_true = PyObject_IsTrue(seq);
  2939. if (is_true == -1)
  2940. return -1;
  2941. else if (is_true == 0)
  2942. return JSON_Accu_Accumulate(rval, empty_array);
  2943. if (s->markers != Py_None) {
  2944. int has_key;
  2945. ident = PyLong_FromVoidPtr(seq);
  2946. if (ident == NULL)
  2947. goto bail;
  2948. has_key = PyDict_Contains(s->markers, ident);
  2949. if (has_key) {
  2950. if (has_key != -1)
  2951. PyErr_SetString(PyExc_ValueError, "Circular reference detected");
  2952. goto bail;
  2953. }
  2954. if (PyDict_SetItem(s->markers, ident, seq)) {
  2955. goto bail;
  2956. }
  2957. }
  2958. iter = PyObject_GetIter(seq);
  2959. if (iter == NULL)
  2960. goto bail;
  2961. if (JSON_Accu_Accumulate(rval, open_array))
  2962. goto bail;
  2963. if (s->indent != Py_None) {
  2964. /* TODO: DOES NOT RUN */
  2965. indent_level += 1;
  2966. /*
  2967. newline_indent = '\n' + (_indent * _current_indent_level)
  2968. separator = _item_separator + newline_indent
  2969. buf += newline_indent
  2970. */
  2971. }
  2972. while ((obj = PyIter_Next(iter))) {
  2973. if (i) {
  2974. if (JSON_Accu_Accumulate(rval, s->item_separator))
  2975. goto bail;
  2976. }
  2977. if (encoder_listencode_obj(s, rval, obj, indent_level))
  2978. goto bail;
  2979. i++;
  2980. Py_CLEAR(obj);
  2981. }
  2982. Py_CLEAR(iter);
  2983. if (PyErr_Occurred())
  2984. goto bail;
  2985. if (ident != NULL) {
  2986. if (PyDict_DelItem(s->markers, ident))
  2987. goto bail;
  2988. Py_CLEAR(ident);
  2989. }
  2990. if (s->indent != Py_None) {
  2991. /* TODO: DOES NOT RUN */
  2992. indent_level -= 1;
  2993. /*
  2994. yield '\n' + (_indent * _current_indent_level)
  2995. */
  2996. }
  2997. if (JSON_Accu_Accumulate(rval, close_array))
  2998. goto bail;
  2999. return 0;
  3000. bail:
  3001. Py_XDECREF(obj);
  3002. Py_XDECREF(iter);
  3003. Py_XDECREF(ident);
  3004. return -1;
  3005. }
  3006. static void
  3007. encoder_dealloc(PyObject *self)
  3008. {
  3009. /* bpo-31095: UnTrack is needed before calling any callbacks */
  3010. PyObject_GC_UnTrack(self);
  3011. encoder_clear(self);
  3012. Py_TYPE(self)->tp_free(self);
  3013. }
  3014. static int
  3015. encoder_traverse(PyObject *self, visitproc visit, void *arg)
  3016. {
  3017. PyEncoderObject *s;
  3018. assert(PyEncoder_Check(self));
  3019. s = (PyEncoderObject *)self;
  3020. Py_VISIT(s->markers);
  3021. Py_VISIT(s->defaultfn);
  3022. Py_VISIT(s->encoder);
  3023. Py_VISIT(s->encoding);
  3024. Py_VISIT(s->indent);
  3025. Py_VISIT(s->key_separator);
  3026. Py_VISIT(s->item_separator);
  3027. Py_VISIT(s->key_memo);
  3028. Py_VISIT(s->sort_keys);
  3029. Py_VISIT(s->item_sort_kw);
  3030. Py_VISIT(s->item_sort_key);
  3031. Py_VISIT(s->max_long_size);
  3032. Py_VISIT(s->min_long_size);
  3033. Py_VISIT(s->Decimal);
  3034. return 0;
  3035. }
  3036. static int
  3037. encoder_clear(PyObject *self)
  3038. {
  3039. /* Deallocate Encoder */
  3040. PyEncoderObject *s;
  3041. assert(PyEncoder_Check(self));
  3042. s = (PyEncoderObject *)self;
  3043. Py_CLEAR(s->markers);
  3044. Py_CLEAR(s->defaultfn);
  3045. Py_CLEAR(s->encoder);
  3046. Py_CLEAR(s->encoding);
  3047. Py_CLEAR(s->indent);
  3048. Py_CLEAR(s->key_separator);
  3049. Py_CLEAR(s->item_separator);
  3050. Py_CLEAR(s->key_memo);
  3051. Py_CLEAR(s->skipkeys_bool);
  3052. Py_CLEAR(s->sort_keys);
  3053. Py_CLEAR(s->item_sort_kw);
  3054. Py_CLEAR(s->item_sort_key);
  3055. Py_CLEAR(s->max_long_size);
  3056. Py_CLEAR(s->min_long_size);
  3057. Py_CLEAR(s->Decimal);
  3058. return 0;
  3059. }
  3060. PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
  3061. static
  3062. PyTypeObject PyEncoderType = {
  3063. PyVarObject_HEAD_INIT(NULL, 0)
  3064. "simplejson._speedups.Encoder", /* tp_name */
  3065. sizeof(PyEncoderObject), /* tp_basicsize */
  3066. 0, /* tp_itemsize */
  3067. encoder_dealloc, /* tp_dealloc */
  3068. 0, /* tp_print */
  3069. 0, /* tp_getattr */
  3070. 0, /* tp_setattr */
  3071. 0, /* tp_compare */
  3072. 0, /* tp_repr */
  3073. 0, /* tp_as_number */
  3074. 0, /* tp_as_sequence */
  3075. 0, /* tp_as_mapping */
  3076. 0, /* tp_hash */
  3077. encoder_call, /* tp_call */
  3078. 0, /* tp_str */
  3079. 0, /* tp_getattro */
  3080. 0, /* tp_setattro */
  3081. 0, /* tp_as_buffer */
  3082. Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
  3083. encoder_doc, /* tp_doc */
  3084. encoder_traverse, /* tp_traverse */
  3085. encoder_clear, /* tp_clear */
  3086. 0, /* tp_richcompare */
  3087. 0, /* tp_weaklistoffset */
  3088. 0, /* tp_iter */
  3089. 0, /* tp_iternext */
  3090. 0, /* tp_methods */
  3091. encoder_members, /* tp_members */
  3092. 0, /* tp_getset */
  3093. 0, /* tp_base */
  3094. 0, /* tp_dict */
  3095. 0, /* tp_descr_get */
  3096. 0, /* tp_descr_set */
  3097. 0, /* tp_dictoffset */
  3098. 0, /* tp_init */
  3099. 0, /* tp_alloc */
  3100. encoder_new, /* tp_new */
  3101. 0, /* tp_free */
  3102. };
  3103. static PyMethodDef speedups_methods[] = {
  3104. {"encode_basestring_ascii",
  3105. (PyCFunction)py_encode_basestring_ascii,
  3106. METH_O,
  3107. pydoc_encode_basestring_ascii},
  3108. {"scanstring",
  3109. (PyCFunction)py_scanstring,
  3110. METH_VARARGS,
  3111. pydoc_scanstring},
  3112. {NULL, NULL, 0, NULL}
  3113. };
  3114. PyDoc_STRVAR(module_doc,
  3115. "simplejson speedups\n");
  3116. #if PY_MAJOR_VERSION >= 3
  3117. static struct PyModuleDef moduledef = {
  3118. PyModuleDef_HEAD_INIT,
  3119. "_speedups", /* m_name */
  3120. module_doc, /* m_doc */
  3121. -1, /* m_size */
  3122. speedups_methods, /* m_methods */
  3123. NULL, /* m_reload */
  3124. NULL, /* m_traverse */
  3125. NULL, /* m_clear*/
  3126. NULL, /* m_free */
  3127. };
  3128. #endif
  3129. PyObject *
  3130. import_dependency(char *module_name, char *attr_name)
  3131. {
  3132. PyObject *rval;
  3133. PyObject *module = PyImport_ImportModule(module_name);
  3134. if (module == NULL)
  3135. return NULL;
  3136. rval = PyObject_GetAttrString(module, attr_name);
  3137. Py_DECREF(module);
  3138. return rval;
  3139. }
  3140. static int
  3141. init_constants(void)
  3142. {
  3143. JSON_NaN = JSON_InternFromString("NaN");
  3144. if (JSON_NaN == NULL)
  3145. return 0;
  3146. JSON_Infinity = JSON_InternFromString("Infinity");
  3147. if (JSON_Infinity == NULL)
  3148. return 0;
  3149. JSON_NegInfinity = JSON_InternFromString("-Infinity");
  3150. if (JSON_NegInfinity == NULL)
  3151. return 0;
  3152. #if PY_MAJOR_VERSION >= 3
  3153. JSON_EmptyUnicode = PyUnicode_New(0, 127);
  3154. #else /* PY_MAJOR_VERSION >= 3 */
  3155. JSON_EmptyStr = PyString_FromString("");
  3156. if (JSON_EmptyStr == NULL)
  3157. return 0;
  3158. JSON_EmptyUnicode = PyUnicode_FromUnicode(NULL, 0);
  3159. #endif /* PY_MAJOR_VERSION >= 3 */
  3160. if (JSON_EmptyUnicode == NULL)
  3161. return 0;
  3162. return 1;
  3163. }
  3164. static PyObject *
  3165. moduleinit(void)
  3166. {
  3167. PyObject *m;
  3168. if (PyType_Ready(&PyScannerType) < 0)
  3169. return NULL;
  3170. if (PyType_Ready(&PyEncoderType) < 0)
  3171. return NULL;
  3172. if (!init_constants())
  3173. return NULL;
  3174. #if PY_MAJOR_VERSION >= 3
  3175. m = PyModule_Create(&moduledef);
  3176. #else
  3177. m = Py_InitModule3("_speedups", speedups_methods, module_doc);
  3178. #endif
  3179. Py_INCREF((PyObject*)&PyScannerType);
  3180. PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
  3181. Py_INCREF((PyObject*)&PyEncoderType);
  3182. PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
  3183. RawJSONType = import_dependency("simplejson.raw_json", "RawJSON");
  3184. if (RawJSONType == NULL)
  3185. return NULL;
  3186. JSONDecodeError = import_dependency("simplejson.errors", "JSONDecodeError");
  3187. if (JSONDecodeError == NULL)
  3188. return NULL;
  3189. return m;
  3190. }
  3191. #if PY_MAJOR_VERSION >= 3
  3192. PyMODINIT_FUNC
  3193. PyInit__speedups(void)
  3194. {
  3195. return moduleinit();
  3196. }
  3197. #else
  3198. void
  3199. init_speedups(void)
  3200. {
  3201. moduleinit();
  3202. }
  3203. #endif