multibytecodec.c 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084
  1. /*
  2. * multibytecodec.c: Common Multibyte Codec Implementation
  3. *
  4. * Written by Hye-Shik Chang <perky@FreeBSD.org>
  5. */
  6. #define PY_SSIZE_T_CLEAN
  7. #include "Python.h"
  8. #include "structmember.h" // PyMemberDef
  9. #include "multibytecodec.h"
  10. #include "clinic/multibytecodec.c.h"
  11. #define MODULE_NAME "_multibytecodec"
  12. typedef struct {
  13. PyTypeObject *encoder_type;
  14. PyTypeObject *decoder_type;
  15. PyTypeObject *reader_type;
  16. PyTypeObject *writer_type;
  17. PyTypeObject *multibytecodec_type;
  18. PyObject *str_write;
  19. } module_state;
  20. static module_state *
  21. get_module_state(PyObject *module)
  22. {
  23. module_state *state = PyModule_GetState(module);
  24. assert(state != NULL);
  25. return state;
  26. }
  27. static struct PyModuleDef _multibytecodecmodule;
  28. static module_state *
  29. find_state_by_def(PyTypeObject *type)
  30. {
  31. PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule);
  32. assert(module != NULL);
  33. return get_module_state(module);
  34. }
  35. #define clinic_get_state() find_state_by_def(type)
  36. /*[clinic input]
  37. module _multibytecodec
  38. class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
  39. class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
  40. class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
  41. class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
  42. class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
  43. [clinic start generated code]*/
  44. /*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
  45. #undef clinic_get_state
  46. typedef struct {
  47. PyObject *inobj;
  48. Py_ssize_t inpos, inlen;
  49. unsigned char *outbuf, *outbuf_end;
  50. PyObject *excobj, *outobj;
  51. } MultibyteEncodeBuffer;
  52. typedef struct {
  53. const unsigned char *inbuf, *inbuf_top, *inbuf_end;
  54. PyObject *excobj;
  55. _PyUnicodeWriter writer;
  56. } MultibyteDecodeBuffer;
  57. static char *incnewkwarglist[] = {"errors", NULL};
  58. static char *streamkwarglist[] = {"stream", "errors", NULL};
  59. static PyObject *multibytecodec_encode(const MultibyteCodec *,
  60. MultibyteCodec_State *, PyObject *, Py_ssize_t *,
  61. PyObject *, int);
  62. #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
  63. static PyObject *
  64. make_tuple(PyObject *object, Py_ssize_t len)
  65. {
  66. PyObject *v, *w;
  67. if (object == NULL)
  68. return NULL;
  69. v = PyTuple_New(2);
  70. if (v == NULL) {
  71. Py_DECREF(object);
  72. return NULL;
  73. }
  74. PyTuple_SET_ITEM(v, 0, object);
  75. w = PyLong_FromSsize_t(len);
  76. if (w == NULL) {
  77. Py_DECREF(v);
  78. return NULL;
  79. }
  80. PyTuple_SET_ITEM(v, 1, w);
  81. return v;
  82. }
  83. static PyObject *
  84. internal_error_callback(const char *errors)
  85. {
  86. if (errors == NULL || strcmp(errors, "strict") == 0)
  87. return ERROR_STRICT;
  88. else if (strcmp(errors, "ignore") == 0)
  89. return ERROR_IGNORE;
  90. else if (strcmp(errors, "replace") == 0)
  91. return ERROR_REPLACE;
  92. else
  93. return PyUnicode_FromString(errors);
  94. }
  95. static PyObject *
  96. call_error_callback(PyObject *errors, PyObject *exc)
  97. {
  98. PyObject *cb, *r;
  99. const char *str;
  100. assert(PyUnicode_Check(errors));
  101. str = PyUnicode_AsUTF8(errors);
  102. if (str == NULL)
  103. return NULL;
  104. cb = PyCodec_LookupError(str);
  105. if (cb == NULL)
  106. return NULL;
  107. r = PyObject_CallOneArg(cb, exc);
  108. Py_DECREF(cb);
  109. return r;
  110. }
  111. static PyObject *
  112. codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
  113. {
  114. const char *errors;
  115. if (self->errors == ERROR_STRICT)
  116. errors = "strict";
  117. else if (self->errors == ERROR_IGNORE)
  118. errors = "ignore";
  119. else if (self->errors == ERROR_REPLACE)
  120. errors = "replace";
  121. else {
  122. return Py_NewRef(self->errors);
  123. }
  124. return PyUnicode_FromString(errors);
  125. }
  126. static int
  127. codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
  128. void *closure)
  129. {
  130. PyObject *cb;
  131. const char *str;
  132. if (value == NULL) {
  133. PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
  134. return -1;
  135. }
  136. if (!PyUnicode_Check(value)) {
  137. PyErr_SetString(PyExc_TypeError, "errors must be a string");
  138. return -1;
  139. }
  140. str = PyUnicode_AsUTF8(value);
  141. if (str == NULL)
  142. return -1;
  143. cb = internal_error_callback(str);
  144. if (cb == NULL)
  145. return -1;
  146. ERROR_DECREF(self->errors);
  147. self->errors = cb;
  148. return 0;
  149. }
  150. /* This getset handlers list is used by all the stateful codec objects */
  151. static PyGetSetDef codecctx_getsets[] = {
  152. {"errors", (getter)codecctx_errors_get,
  153. (setter)codecctx_errors_set,
  154. PyDoc_STR("how to treat errors")},
  155. {NULL,}
  156. };
  157. static int
  158. expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
  159. {
  160. Py_ssize_t orgpos, orgsize, incsize;
  161. orgpos = (Py_ssize_t)((char *)buf->outbuf -
  162. PyBytes_AS_STRING(buf->outobj));
  163. orgsize = PyBytes_GET_SIZE(buf->outobj);
  164. incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
  165. if (orgsize > PY_SSIZE_T_MAX - incsize) {
  166. PyErr_NoMemory();
  167. return -1;
  168. }
  169. if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
  170. return -1;
  171. buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
  172. buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
  173. + PyBytes_GET_SIZE(buf->outobj);
  174. return 0;
  175. }
  176. #define REQUIRE_ENCODEBUFFER(buf, s) do { \
  177. if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
  178. if (expand_encodebuffer(buf, s) == -1) \
  179. goto errorexit; \
  180. } while(0)
  181. /**
  182. * MultibyteCodec object
  183. */
  184. static int
  185. multibytecodec_encerror(const MultibyteCodec *codec,
  186. MultibyteCodec_State *state,
  187. MultibyteEncodeBuffer *buf,
  188. PyObject *errors, Py_ssize_t e)
  189. {
  190. PyObject *retobj = NULL, *retstr = NULL, *tobj;
  191. Py_ssize_t retstrsize, newpos;
  192. Py_ssize_t esize, start, end;
  193. const char *reason;
  194. if (e > 0) {
  195. reason = "illegal multibyte sequence";
  196. esize = e;
  197. }
  198. else {
  199. switch (e) {
  200. case MBERR_TOOSMALL:
  201. REQUIRE_ENCODEBUFFER(buf, -1);
  202. return 0; /* retry it */
  203. case MBERR_TOOFEW:
  204. reason = "incomplete multibyte sequence";
  205. esize = (Py_ssize_t)buf->inpos;
  206. break;
  207. case MBERR_INTERNAL:
  208. PyErr_SetString(PyExc_RuntimeError,
  209. "internal codec error");
  210. return -1;
  211. default:
  212. PyErr_SetString(PyExc_RuntimeError,
  213. "unknown runtime error");
  214. return -1;
  215. }
  216. }
  217. if (errors == ERROR_REPLACE) {
  218. PyObject *replchar;
  219. Py_ssize_t r;
  220. Py_ssize_t inpos;
  221. int kind;
  222. const void *data;
  223. replchar = PyUnicode_FromOrdinal('?');
  224. if (replchar == NULL)
  225. goto errorexit;
  226. kind = PyUnicode_KIND(replchar);
  227. data = PyUnicode_DATA(replchar);
  228. inpos = 0;
  229. for (;;) {
  230. Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
  231. r = codec->encode(state, codec,
  232. kind, data, &inpos, 1,
  233. &buf->outbuf, outleft, 0);
  234. if (r == MBERR_TOOSMALL) {
  235. REQUIRE_ENCODEBUFFER(buf, -1);
  236. continue;
  237. }
  238. else
  239. break;
  240. }
  241. Py_DECREF(replchar);
  242. if (r != 0) {
  243. REQUIRE_ENCODEBUFFER(buf, 1);
  244. *buf->outbuf++ = '?';
  245. }
  246. }
  247. if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
  248. buf->inpos += esize;
  249. return 0;
  250. }
  251. start = (Py_ssize_t)buf->inpos;
  252. end = start + esize;
  253. /* use cached exception object if available */
  254. if (buf->excobj == NULL) {
  255. buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
  256. "sOnns",
  257. codec->encoding, buf->inobj,
  258. start, end, reason);
  259. if (buf->excobj == NULL)
  260. goto errorexit;
  261. }
  262. else
  263. if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
  264. PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
  265. PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
  266. goto errorexit;
  267. if (errors == ERROR_STRICT) {
  268. PyCodec_StrictErrors(buf->excobj);
  269. goto errorexit;
  270. }
  271. retobj = call_error_callback(errors, buf->excobj);
  272. if (retobj == NULL)
  273. goto errorexit;
  274. if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
  275. (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
  276. !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
  277. PyErr_SetString(PyExc_TypeError,
  278. "encoding error handler must return "
  279. "(str, int) tuple");
  280. goto errorexit;
  281. }
  282. if (PyUnicode_Check(tobj)) {
  283. Py_ssize_t inpos;
  284. retstr = multibytecodec_encode(codec, state, tobj,
  285. &inpos, ERROR_STRICT,
  286. MBENC_FLUSH);
  287. if (retstr == NULL)
  288. goto errorexit;
  289. }
  290. else {
  291. retstr = Py_NewRef(tobj);
  292. }
  293. assert(PyBytes_Check(retstr));
  294. retstrsize = PyBytes_GET_SIZE(retstr);
  295. if (retstrsize > 0) {
  296. REQUIRE_ENCODEBUFFER(buf, retstrsize);
  297. memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
  298. buf->outbuf += retstrsize;
  299. }
  300. newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
  301. if (newpos < 0 && !PyErr_Occurred())
  302. newpos += (Py_ssize_t)buf->inlen;
  303. if (newpos < 0 || newpos > buf->inlen) {
  304. PyErr_Clear();
  305. PyErr_Format(PyExc_IndexError,
  306. "position %zd from error handler out of bounds",
  307. newpos);
  308. goto errorexit;
  309. }
  310. buf->inpos = newpos;
  311. Py_DECREF(retobj);
  312. Py_DECREF(retstr);
  313. return 0;
  314. errorexit:
  315. Py_XDECREF(retobj);
  316. Py_XDECREF(retstr);
  317. return -1;
  318. }
  319. static int
  320. multibytecodec_decerror(const MultibyteCodec *codec,
  321. MultibyteCodec_State *state,
  322. MultibyteDecodeBuffer *buf,
  323. PyObject *errors, Py_ssize_t e)
  324. {
  325. PyObject *retobj = NULL, *retuni = NULL;
  326. Py_ssize_t newpos;
  327. const char *reason;
  328. Py_ssize_t esize, start, end;
  329. if (e > 0) {
  330. reason = "illegal multibyte sequence";
  331. esize = e;
  332. }
  333. else {
  334. switch (e) {
  335. case MBERR_TOOSMALL:
  336. return 0; /* retry it */
  337. case MBERR_TOOFEW:
  338. reason = "incomplete multibyte sequence";
  339. esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
  340. break;
  341. case MBERR_INTERNAL:
  342. PyErr_SetString(PyExc_RuntimeError,
  343. "internal codec error");
  344. return -1;
  345. case MBERR_EXCEPTION:
  346. return -1;
  347. default:
  348. PyErr_SetString(PyExc_RuntimeError,
  349. "unknown runtime error");
  350. return -1;
  351. }
  352. }
  353. if (errors == ERROR_REPLACE) {
  354. if (_PyUnicodeWriter_WriteChar(&buf->writer,
  355. Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
  356. goto errorexit;
  357. }
  358. if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
  359. buf->inbuf += esize;
  360. return 0;
  361. }
  362. start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
  363. end = start + esize;
  364. /* use cached exception object if available */
  365. if (buf->excobj == NULL) {
  366. buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
  367. (const char *)buf->inbuf_top,
  368. (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
  369. start, end, reason);
  370. if (buf->excobj == NULL)
  371. goto errorexit;
  372. }
  373. else
  374. if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
  375. PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
  376. PyUnicodeDecodeError_SetReason(buf->excobj, reason))
  377. goto errorexit;
  378. if (errors == ERROR_STRICT) {
  379. PyCodec_StrictErrors(buf->excobj);
  380. goto errorexit;
  381. }
  382. retobj = call_error_callback(errors, buf->excobj);
  383. if (retobj == NULL)
  384. goto errorexit;
  385. if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
  386. !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
  387. !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
  388. PyErr_SetString(PyExc_TypeError,
  389. "decoding error handler must return "
  390. "(str, int) tuple");
  391. goto errorexit;
  392. }
  393. if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
  394. goto errorexit;
  395. newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
  396. if (newpos < 0 && !PyErr_Occurred())
  397. newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
  398. if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
  399. PyErr_Clear();
  400. PyErr_Format(PyExc_IndexError,
  401. "position %zd from error handler out of bounds",
  402. newpos);
  403. goto errorexit;
  404. }
  405. buf->inbuf = buf->inbuf_top + newpos;
  406. Py_DECREF(retobj);
  407. return 0;
  408. errorexit:
  409. Py_XDECREF(retobj);
  410. return -1;
  411. }
  412. static PyObject *
  413. multibytecodec_encode(const MultibyteCodec *codec,
  414. MultibyteCodec_State *state,
  415. PyObject *text, Py_ssize_t *inpos_t,
  416. PyObject *errors, int flags)
  417. {
  418. MultibyteEncodeBuffer buf;
  419. Py_ssize_t finalsize, r = 0;
  420. Py_ssize_t datalen;
  421. int kind;
  422. const void *data;
  423. if (PyUnicode_READY(text) < 0)
  424. return NULL;
  425. datalen = PyUnicode_GET_LENGTH(text);
  426. if (datalen == 0 && !(flags & MBENC_RESET))
  427. return PyBytes_FromStringAndSize(NULL, 0);
  428. buf.excobj = NULL;
  429. buf.outobj = NULL;
  430. buf.inobj = text; /* borrowed reference */
  431. buf.inpos = 0;
  432. buf.inlen = datalen;
  433. kind = PyUnicode_KIND(buf.inobj);
  434. data = PyUnicode_DATA(buf.inobj);
  435. if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
  436. PyErr_NoMemory();
  437. goto errorexit;
  438. }
  439. buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
  440. if (buf.outobj == NULL)
  441. goto errorexit;
  442. buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
  443. buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
  444. while (buf.inpos < buf.inlen) {
  445. /* we don't reuse inleft and outleft here.
  446. * error callbacks can relocate the cursor anywhere on buffer*/
  447. Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
  448. r = codec->encode(state, codec,
  449. kind, data,
  450. &buf.inpos, buf.inlen,
  451. &buf.outbuf, outleft, flags);
  452. if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
  453. break;
  454. else if (multibytecodec_encerror(codec, state, &buf, errors,r))
  455. goto errorexit;
  456. else if (r == MBERR_TOOFEW)
  457. break;
  458. }
  459. if (codec->encreset != NULL && (flags & MBENC_RESET))
  460. for (;;) {
  461. Py_ssize_t outleft;
  462. outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
  463. r = codec->encreset(state, codec, &buf.outbuf,
  464. outleft);
  465. if (r == 0)
  466. break;
  467. else if (multibytecodec_encerror(codec, state,
  468. &buf, errors, r))
  469. goto errorexit;
  470. }
  471. finalsize = (Py_ssize_t)((char *)buf.outbuf -
  472. PyBytes_AS_STRING(buf.outobj));
  473. if (finalsize != PyBytes_GET_SIZE(buf.outobj))
  474. if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
  475. goto errorexit;
  476. if (inpos_t)
  477. *inpos_t = buf.inpos;
  478. Py_XDECREF(buf.excobj);
  479. return buf.outobj;
  480. errorexit:
  481. Py_XDECREF(buf.excobj);
  482. Py_XDECREF(buf.outobj);
  483. return NULL;
  484. }
  485. /*[clinic input]
  486. _multibytecodec.MultibyteCodec.encode
  487. input: object
  488. errors: str(accept={str, NoneType}) = None
  489. Return an encoded string version of `input'.
  490. 'errors' may be given to set a different error handling scheme. Default is
  491. 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
  492. values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
  493. registered with codecs.register_error that can handle UnicodeEncodeErrors.
  494. [clinic start generated code]*/
  495. static PyObject *
  496. _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
  497. PyObject *input,
  498. const char *errors)
  499. /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
  500. {
  501. MultibyteCodec_State state;
  502. PyObject *errorcb, *r, *ucvt;
  503. Py_ssize_t datalen;
  504. if (PyUnicode_Check(input))
  505. ucvt = NULL;
  506. else {
  507. input = ucvt = PyObject_Str(input);
  508. if (input == NULL)
  509. return NULL;
  510. else if (!PyUnicode_Check(input)) {
  511. PyErr_SetString(PyExc_TypeError,
  512. "couldn't convert the object to unicode.");
  513. Py_DECREF(ucvt);
  514. return NULL;
  515. }
  516. }
  517. if (PyUnicode_READY(input) < 0) {
  518. Py_XDECREF(ucvt);
  519. return NULL;
  520. }
  521. datalen = PyUnicode_GET_LENGTH(input);
  522. errorcb = internal_error_callback(errors);
  523. if (errorcb == NULL) {
  524. Py_XDECREF(ucvt);
  525. return NULL;
  526. }
  527. if (self->codec->encinit != NULL &&
  528. self->codec->encinit(&state, self->codec) != 0)
  529. goto errorexit;
  530. r = multibytecodec_encode(self->codec, &state,
  531. input, NULL, errorcb,
  532. MBENC_FLUSH | MBENC_RESET);
  533. if (r == NULL)
  534. goto errorexit;
  535. ERROR_DECREF(errorcb);
  536. Py_XDECREF(ucvt);
  537. return make_tuple(r, datalen);
  538. errorexit:
  539. ERROR_DECREF(errorcb);
  540. Py_XDECREF(ucvt);
  541. return NULL;
  542. }
  543. /*[clinic input]
  544. _multibytecodec.MultibyteCodec.decode
  545. input: Py_buffer
  546. errors: str(accept={str, NoneType}) = None
  547. Decodes 'input'.
  548. 'errors' may be given to set a different error handling scheme. Default is
  549. 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
  550. values are 'ignore' and 'replace' as well as any other name registered with
  551. codecs.register_error that is able to handle UnicodeDecodeErrors."
  552. [clinic start generated code]*/
  553. static PyObject *
  554. _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
  555. Py_buffer *input,
  556. const char *errors)
  557. /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
  558. {
  559. MultibyteCodec_State state;
  560. MultibyteDecodeBuffer buf;
  561. PyObject *errorcb, *res;
  562. const char *data;
  563. Py_ssize_t datalen;
  564. data = input->buf;
  565. datalen = input->len;
  566. errorcb = internal_error_callback(errors);
  567. if (errorcb == NULL) {
  568. return NULL;
  569. }
  570. if (datalen == 0) {
  571. ERROR_DECREF(errorcb);
  572. return make_tuple(PyUnicode_New(0, 0), 0);
  573. }
  574. _PyUnicodeWriter_Init(&buf.writer);
  575. buf.writer.min_length = datalen;
  576. buf.excobj = NULL;
  577. buf.inbuf = buf.inbuf_top = (unsigned char *)data;
  578. buf.inbuf_end = buf.inbuf_top + datalen;
  579. if (self->codec->decinit != NULL &&
  580. self->codec->decinit(&state, self->codec) != 0)
  581. goto errorexit;
  582. while (buf.inbuf < buf.inbuf_end) {
  583. Py_ssize_t inleft, r;
  584. inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
  585. r = self->codec->decode(&state, self->codec,
  586. &buf.inbuf, inleft, &buf.writer);
  587. if (r == 0)
  588. break;
  589. else if (multibytecodec_decerror(self->codec, &state,
  590. &buf, errorcb, r))
  591. goto errorexit;
  592. }
  593. res = _PyUnicodeWriter_Finish(&buf.writer);
  594. if (res == NULL)
  595. goto errorexit;
  596. Py_XDECREF(buf.excobj);
  597. ERROR_DECREF(errorcb);
  598. return make_tuple(res, datalen);
  599. errorexit:
  600. ERROR_DECREF(errorcb);
  601. Py_XDECREF(buf.excobj);
  602. _PyUnicodeWriter_Dealloc(&buf.writer);
  603. return NULL;
  604. }
  605. static struct PyMethodDef multibytecodec_methods[] = {
  606. _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
  607. _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
  608. {NULL, NULL},
  609. };
  610. static int
  611. multibytecodec_clear(MultibyteCodecObject *self)
  612. {
  613. Py_CLEAR(self->cjk_module);
  614. return 0;
  615. }
  616. static int
  617. multibytecodec_traverse(MultibyteCodecObject *self, visitproc visit, void *arg)
  618. {
  619. Py_VISIT(Py_TYPE(self));
  620. Py_VISIT(self->cjk_module);
  621. return 0;
  622. }
  623. static void
  624. multibytecodec_dealloc(MultibyteCodecObject *self)
  625. {
  626. PyObject_GC_UnTrack(self);
  627. PyTypeObject *tp = Py_TYPE(self);
  628. (void)multibytecodec_clear(self);
  629. tp->tp_free(self);
  630. Py_DECREF(tp);
  631. }
  632. static PyType_Slot multibytecodec_slots[] = {
  633. {Py_tp_dealloc, multibytecodec_dealloc},
  634. {Py_tp_getattro, PyObject_GenericGetAttr},
  635. {Py_tp_methods, multibytecodec_methods},
  636. {Py_tp_traverse, multibytecodec_traverse},
  637. {Py_tp_clear, multibytecodec_clear},
  638. {0, NULL},
  639. };
  640. static PyType_Spec multibytecodec_spec = {
  641. .name = MODULE_NAME ".MultibyteCodec",
  642. .basicsize = sizeof(MultibyteCodecObject),
  643. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
  644. Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
  645. .slots = multibytecodec_slots,
  646. };
  647. /**
  648. * Utility functions for stateful codec mechanism
  649. */
  650. #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
  651. #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
  652. static PyObject *
  653. encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
  654. PyObject *unistr, int final)
  655. {
  656. PyObject *ucvt, *r = NULL;
  657. PyObject *inbuf = NULL;
  658. Py_ssize_t inpos, datalen;
  659. PyObject *origpending = NULL;
  660. if (PyUnicode_Check(unistr))
  661. ucvt = NULL;
  662. else {
  663. unistr = ucvt = PyObject_Str(unistr);
  664. if (unistr == NULL)
  665. return NULL;
  666. else if (!PyUnicode_Check(unistr)) {
  667. PyErr_SetString(PyExc_TypeError,
  668. "couldn't convert the object to str.");
  669. Py_DECREF(ucvt);
  670. return NULL;
  671. }
  672. }
  673. if (ctx->pending) {
  674. PyObject *inbuf_tmp;
  675. origpending = Py_NewRef(ctx->pending);
  676. inbuf_tmp = Py_NewRef(ctx->pending);
  677. PyUnicode_Append(&inbuf_tmp, unistr);
  678. if (inbuf_tmp == NULL)
  679. goto errorexit;
  680. Py_CLEAR(ctx->pending);
  681. inbuf = inbuf_tmp;
  682. }
  683. else {
  684. origpending = NULL;
  685. inbuf = Py_NewRef(unistr);
  686. }
  687. if (PyUnicode_READY(inbuf) < 0)
  688. goto errorexit;
  689. inpos = 0;
  690. datalen = PyUnicode_GET_LENGTH(inbuf);
  691. r = multibytecodec_encode(ctx->codec, &ctx->state,
  692. inbuf, &inpos,
  693. ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
  694. if (r == NULL) {
  695. /* recover the original pending buffer */
  696. Py_XSETREF(ctx->pending, origpending);
  697. origpending = NULL;
  698. goto errorexit;
  699. }
  700. Py_XDECREF(origpending);
  701. if (inpos < datalen) {
  702. if (datalen - inpos > MAXENCPENDING) {
  703. /* normal codecs can't reach here */
  704. PyErr_SetString(PyExc_UnicodeError,
  705. "pending buffer overflow");
  706. goto errorexit;
  707. }
  708. ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
  709. if (ctx->pending == NULL) {
  710. /* normal codecs can't reach here */
  711. goto errorexit;
  712. }
  713. }
  714. Py_DECREF(inbuf);
  715. Py_XDECREF(ucvt);
  716. return r;
  717. errorexit:
  718. Py_XDECREF(r);
  719. Py_XDECREF(ucvt);
  720. Py_XDECREF(origpending);
  721. Py_XDECREF(inbuf);
  722. return NULL;
  723. }
  724. static int
  725. decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
  726. MultibyteDecodeBuffer *buf)
  727. {
  728. Py_ssize_t npendings;
  729. npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
  730. if (npendings + ctx->pendingsize > MAXDECPENDING ||
  731. npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
  732. PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
  733. return -1;
  734. }
  735. memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
  736. ctx->pendingsize += npendings;
  737. return 0;
  738. }
  739. static int
  740. decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
  741. Py_ssize_t size)
  742. {
  743. buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
  744. buf->inbuf_end = buf->inbuf_top + size;
  745. buf->writer.min_length += size;
  746. return 0;
  747. }
  748. static int
  749. decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
  750. MultibyteDecodeBuffer *buf)
  751. {
  752. while (buf->inbuf < buf->inbuf_end) {
  753. Py_ssize_t inleft;
  754. Py_ssize_t r;
  755. inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
  756. r = ctx->codec->decode(&ctx->state, ctx->codec,
  757. &buf->inbuf, inleft, &buf->writer);
  758. if (r == 0 || r == MBERR_TOOFEW)
  759. break;
  760. else if (multibytecodec_decerror(ctx->codec, &ctx->state,
  761. buf, ctx->errors, r))
  762. return -1;
  763. }
  764. return 0;
  765. }
  766. /*[clinic input]
  767. _multibytecodec.MultibyteIncrementalEncoder.encode
  768. input: object
  769. final: bool = False
  770. [clinic start generated code]*/
  771. static PyObject *
  772. _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
  773. PyObject *input,
  774. int final)
  775. /*[clinic end generated code: output=123361b6c505e2c1 input=bd5f7d40d43e99b0]*/
  776. {
  777. return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
  778. }
  779. /*[clinic input]
  780. _multibytecodec.MultibyteIncrementalEncoder.getstate
  781. [clinic start generated code]*/
  782. static PyObject *
  783. _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
  784. /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
  785. {
  786. /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
  787. for UTF-8 encoded buffer (each character can use up to 4
  788. bytes), and required bytes for MultibyteCodec_State.c. A byte
  789. array is used to avoid different compilers generating different
  790. values for the same state, e.g. as a result of struct padding.
  791. */
  792. unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
  793. Py_ssize_t statesize;
  794. const char *pendingbuffer = NULL;
  795. Py_ssize_t pendingsize;
  796. if (self->pending != NULL) {
  797. pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
  798. if (pendingbuffer == NULL) {
  799. return NULL;
  800. }
  801. if (pendingsize > MAXENCPENDING*4) {
  802. PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
  803. return NULL;
  804. }
  805. statebytes[0] = (unsigned char)pendingsize;
  806. memcpy(statebytes + 1, pendingbuffer, pendingsize);
  807. statesize = 1 + pendingsize;
  808. } else {
  809. statebytes[0] = 0;
  810. statesize = 1;
  811. }
  812. memcpy(statebytes+statesize, self->state.c,
  813. sizeof(self->state.c));
  814. statesize += sizeof(self->state.c);
  815. return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
  816. 1 /* little-endian */ ,
  817. 0 /* unsigned */ );
  818. }
  819. /*[clinic input]
  820. _multibytecodec.MultibyteIncrementalEncoder.setstate
  821. state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
  822. /
  823. [clinic start generated code]*/
  824. static PyObject *
  825. _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
  826. PyLongObject *statelong)
  827. /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
  828. {
  829. PyObject *pending = NULL;
  830. unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
  831. if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
  832. 1 /* little-endian */ ,
  833. 0 /* unsigned */ ) < 0) {
  834. goto errorexit;
  835. }
  836. if (statebytes[0] > MAXENCPENDING*4) {
  837. PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
  838. return NULL;
  839. }
  840. pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
  841. statebytes[0], "strict");
  842. if (pending == NULL) {
  843. goto errorexit;
  844. }
  845. Py_XSETREF(self->pending, pending);
  846. memcpy(self->state.c, statebytes+1+statebytes[0],
  847. sizeof(self->state.c));
  848. Py_RETURN_NONE;
  849. errorexit:
  850. Py_XDECREF(pending);
  851. return NULL;
  852. }
  853. /*[clinic input]
  854. _multibytecodec.MultibyteIncrementalEncoder.reset
  855. [clinic start generated code]*/
  856. static PyObject *
  857. _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
  858. /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
  859. {
  860. /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
  861. unsigned char buffer[4], *outbuf;
  862. Py_ssize_t r;
  863. if (self->codec->encreset != NULL) {
  864. outbuf = buffer;
  865. r = self->codec->encreset(&self->state, self->codec,
  866. &outbuf, sizeof(buffer));
  867. if (r != 0)
  868. return NULL;
  869. }
  870. Py_CLEAR(self->pending);
  871. Py_RETURN_NONE;
  872. }
  873. static struct PyMethodDef mbiencoder_methods[] = {
  874. _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
  875. _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
  876. _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
  877. _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
  878. {NULL, NULL},
  879. };
  880. static PyObject *
  881. mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  882. {
  883. MultibyteIncrementalEncoderObject *self;
  884. PyObject *codec = NULL;
  885. char *errors = NULL;
  886. if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
  887. incnewkwarglist, &errors))
  888. return NULL;
  889. self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
  890. if (self == NULL)
  891. return NULL;
  892. codec = PyObject_GetAttrString((PyObject *)type, "codec");
  893. if (codec == NULL)
  894. goto errorexit;
  895. module_state *state = find_state_by_def(type);
  896. if (!MultibyteCodec_Check(state, codec)) {
  897. PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
  898. goto errorexit;
  899. }
  900. self->codec = ((MultibyteCodecObject *)codec)->codec;
  901. self->pending = NULL;
  902. self->errors = internal_error_callback(errors);
  903. if (self->errors == NULL)
  904. goto errorexit;
  905. if (self->codec->encinit != NULL &&
  906. self->codec->encinit(&self->state, self->codec) != 0)
  907. goto errorexit;
  908. Py_DECREF(codec);
  909. return (PyObject *)self;
  910. errorexit:
  911. Py_XDECREF(self);
  912. Py_XDECREF(codec);
  913. return NULL;
  914. }
  915. static int
  916. mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
  917. {
  918. return 0;
  919. }
  920. static int
  921. mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
  922. visitproc visit, void *arg)
  923. {
  924. if (ERROR_ISCUSTOM(self->errors))
  925. Py_VISIT(self->errors);
  926. return 0;
  927. }
  928. static void
  929. mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
  930. {
  931. PyTypeObject *tp = Py_TYPE(self);
  932. PyObject_GC_UnTrack(self);
  933. ERROR_DECREF(self->errors);
  934. Py_CLEAR(self->pending);
  935. tp->tp_free(self);
  936. Py_DECREF(tp);
  937. }
  938. static PyType_Slot encoder_slots[] = {
  939. {Py_tp_dealloc, mbiencoder_dealloc},
  940. {Py_tp_getattro, PyObject_GenericGetAttr},
  941. {Py_tp_traverse, mbiencoder_traverse},
  942. {Py_tp_methods, mbiencoder_methods},
  943. {Py_tp_getset, codecctx_getsets},
  944. {Py_tp_init, mbiencoder_init},
  945. {Py_tp_new, mbiencoder_new},
  946. {0, NULL},
  947. };
  948. static PyType_Spec encoder_spec = {
  949. .name = MODULE_NAME ".MultibyteIncrementalEncoder",
  950. .basicsize = sizeof(MultibyteIncrementalEncoderObject),
  951. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
  952. Py_TPFLAGS_IMMUTABLETYPE),
  953. .slots = encoder_slots,
  954. };
  955. /*[clinic input]
  956. _multibytecodec.MultibyteIncrementalDecoder.decode
  957. input: Py_buffer
  958. final: bool = False
  959. [clinic start generated code]*/
  960. static PyObject *
  961. _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
  962. Py_buffer *input,
  963. int final)
  964. /*[clinic end generated code: output=b9b9090e8a9ce2ba input=8795fbb20860027a]*/
  965. {
  966. MultibyteDecodeBuffer buf;
  967. char *data, *wdata = NULL;
  968. Py_ssize_t wsize, size, origpending;
  969. PyObject *res;
  970. data = input->buf;
  971. size = input->len;
  972. _PyUnicodeWriter_Init(&buf.writer);
  973. buf.excobj = NULL;
  974. origpending = self->pendingsize;
  975. if (self->pendingsize == 0) {
  976. wsize = size;
  977. wdata = data;
  978. }
  979. else {
  980. if (size > PY_SSIZE_T_MAX - self->pendingsize) {
  981. PyErr_NoMemory();
  982. goto errorexit;
  983. }
  984. wsize = size + self->pendingsize;
  985. wdata = PyMem_Malloc(wsize);
  986. if (wdata == NULL) {
  987. PyErr_NoMemory();
  988. goto errorexit;
  989. }
  990. memcpy(wdata, self->pending, self->pendingsize);
  991. memcpy(wdata + self->pendingsize, data, size);
  992. self->pendingsize = 0;
  993. }
  994. if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
  995. goto errorexit;
  996. if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
  997. goto errorexit;
  998. if (final && buf.inbuf < buf.inbuf_end) {
  999. if (multibytecodec_decerror(self->codec, &self->state,
  1000. &buf, self->errors, MBERR_TOOFEW)) {
  1001. /* recover the original pending buffer */
  1002. memcpy(self->pending, wdata, origpending);
  1003. self->pendingsize = origpending;
  1004. goto errorexit;
  1005. }
  1006. }
  1007. if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
  1008. if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
  1009. goto errorexit;
  1010. }
  1011. res = _PyUnicodeWriter_Finish(&buf.writer);
  1012. if (res == NULL)
  1013. goto errorexit;
  1014. if (wdata != data)
  1015. PyMem_Free(wdata);
  1016. Py_XDECREF(buf.excobj);
  1017. return res;
  1018. errorexit:
  1019. if (wdata != NULL && wdata != data)
  1020. PyMem_Free(wdata);
  1021. Py_XDECREF(buf.excobj);
  1022. _PyUnicodeWriter_Dealloc(&buf.writer);
  1023. return NULL;
  1024. }
  1025. /*[clinic input]
  1026. _multibytecodec.MultibyteIncrementalDecoder.getstate
  1027. [clinic start generated code]*/
  1028. static PyObject *
  1029. _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
  1030. /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
  1031. {
  1032. PyObject *buffer;
  1033. PyObject *statelong;
  1034. buffer = PyBytes_FromStringAndSize((const char *)self->pending,
  1035. self->pendingsize);
  1036. if (buffer == NULL) {
  1037. return NULL;
  1038. }
  1039. statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
  1040. sizeof(self->state.c),
  1041. 1 /* little-endian */ ,
  1042. 0 /* unsigned */ );
  1043. if (statelong == NULL) {
  1044. Py_DECREF(buffer);
  1045. return NULL;
  1046. }
  1047. return Py_BuildValue("NN", buffer, statelong);
  1048. }
  1049. /*[clinic input]
  1050. _multibytecodec.MultibyteIncrementalDecoder.setstate
  1051. state: object(subclass_of='&PyTuple_Type')
  1052. /
  1053. [clinic start generated code]*/
  1054. static PyObject *
  1055. _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
  1056. PyObject *state)
  1057. /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
  1058. {
  1059. PyObject *buffer;
  1060. PyLongObject *statelong;
  1061. Py_ssize_t buffersize;
  1062. const char *bufferstr;
  1063. unsigned char statebytes[8];
  1064. if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
  1065. &buffer, &PyLong_Type, &statelong))
  1066. {
  1067. return NULL;
  1068. }
  1069. if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
  1070. 1 /* little-endian */ ,
  1071. 0 /* unsigned */ ) < 0) {
  1072. return NULL;
  1073. }
  1074. buffersize = PyBytes_Size(buffer);
  1075. if (buffersize == -1) {
  1076. return NULL;
  1077. }
  1078. if (buffersize > MAXDECPENDING) {
  1079. PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
  1080. return NULL;
  1081. }
  1082. bufferstr = PyBytes_AsString(buffer);
  1083. if (bufferstr == NULL) {
  1084. return NULL;
  1085. }
  1086. self->pendingsize = buffersize;
  1087. memcpy(self->pending, bufferstr, self->pendingsize);
  1088. memcpy(self->state.c, statebytes, sizeof(statebytes));
  1089. Py_RETURN_NONE;
  1090. }
  1091. /*[clinic input]
  1092. _multibytecodec.MultibyteIncrementalDecoder.reset
  1093. [clinic start generated code]*/
  1094. static PyObject *
  1095. _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
  1096. /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
  1097. {
  1098. if (self->codec->decreset != NULL &&
  1099. self->codec->decreset(&self->state, self->codec) != 0)
  1100. return NULL;
  1101. self->pendingsize = 0;
  1102. Py_RETURN_NONE;
  1103. }
  1104. static struct PyMethodDef mbidecoder_methods[] = {
  1105. _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
  1106. _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
  1107. _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
  1108. _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
  1109. {NULL, NULL},
  1110. };
  1111. static PyObject *
  1112. mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  1113. {
  1114. MultibyteIncrementalDecoderObject *self;
  1115. PyObject *codec = NULL;
  1116. char *errors = NULL;
  1117. if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
  1118. incnewkwarglist, &errors))
  1119. return NULL;
  1120. self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
  1121. if (self == NULL)
  1122. return NULL;
  1123. codec = PyObject_GetAttrString((PyObject *)type, "codec");
  1124. if (codec == NULL)
  1125. goto errorexit;
  1126. module_state *state = find_state_by_def(type);
  1127. if (!MultibyteCodec_Check(state, codec)) {
  1128. PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
  1129. goto errorexit;
  1130. }
  1131. self->codec = ((MultibyteCodecObject *)codec)->codec;
  1132. self->pendingsize = 0;
  1133. self->errors = internal_error_callback(errors);
  1134. if (self->errors == NULL)
  1135. goto errorexit;
  1136. if (self->codec->decinit != NULL &&
  1137. self->codec->decinit(&self->state, self->codec) != 0)
  1138. goto errorexit;
  1139. Py_DECREF(codec);
  1140. return (PyObject *)self;
  1141. errorexit:
  1142. Py_XDECREF(self);
  1143. Py_XDECREF(codec);
  1144. return NULL;
  1145. }
  1146. static int
  1147. mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
  1148. {
  1149. return 0;
  1150. }
  1151. static int
  1152. mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
  1153. visitproc visit, void *arg)
  1154. {
  1155. if (ERROR_ISCUSTOM(self->errors))
  1156. Py_VISIT(self->errors);
  1157. return 0;
  1158. }
  1159. static void
  1160. mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
  1161. {
  1162. PyTypeObject *tp = Py_TYPE(self);
  1163. PyObject_GC_UnTrack(self);
  1164. ERROR_DECREF(self->errors);
  1165. tp->tp_free(self);
  1166. Py_DECREF(tp);
  1167. }
  1168. static PyType_Slot decoder_slots[] = {
  1169. {Py_tp_dealloc, mbidecoder_dealloc},
  1170. {Py_tp_getattro, PyObject_GenericGetAttr},
  1171. {Py_tp_traverse, mbidecoder_traverse},
  1172. {Py_tp_methods, mbidecoder_methods},
  1173. {Py_tp_getset, codecctx_getsets},
  1174. {Py_tp_init, mbidecoder_init},
  1175. {Py_tp_new, mbidecoder_new},
  1176. {0, NULL},
  1177. };
  1178. static PyType_Spec decoder_spec = {
  1179. .name = MODULE_NAME ".MultibyteIncrementalDecoder",
  1180. .basicsize = sizeof(MultibyteIncrementalDecoderObject),
  1181. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
  1182. Py_TPFLAGS_IMMUTABLETYPE),
  1183. .slots = decoder_slots,
  1184. };
  1185. static PyObject *
  1186. mbstreamreader_iread(MultibyteStreamReaderObject *self,
  1187. const char *method, Py_ssize_t sizehint)
  1188. {
  1189. MultibyteDecodeBuffer buf;
  1190. PyObject *cres, *res;
  1191. Py_ssize_t rsize;
  1192. if (sizehint == 0)
  1193. return PyUnicode_New(0, 0);
  1194. _PyUnicodeWriter_Init(&buf.writer);
  1195. buf.excobj = NULL;
  1196. cres = NULL;
  1197. for (;;) {
  1198. int endoffile;
  1199. if (sizehint < 0)
  1200. cres = PyObject_CallMethod(self->stream,
  1201. method, NULL);
  1202. else
  1203. cres = PyObject_CallMethod(self->stream,
  1204. method, "i", sizehint);
  1205. if (cres == NULL)
  1206. goto errorexit;
  1207. if (!PyBytes_Check(cres)) {
  1208. PyErr_Format(PyExc_TypeError,
  1209. "stream function returned a "
  1210. "non-bytes object (%.100s)",
  1211. Py_TYPE(cres)->tp_name);
  1212. goto errorexit;
  1213. }
  1214. endoffile = (PyBytes_GET_SIZE(cres) == 0);
  1215. if (self->pendingsize > 0) {
  1216. PyObject *ctr;
  1217. char *ctrdata;
  1218. if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
  1219. PyErr_NoMemory();
  1220. goto errorexit;
  1221. }
  1222. rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
  1223. ctr = PyBytes_FromStringAndSize(NULL, rsize);
  1224. if (ctr == NULL)
  1225. goto errorexit;
  1226. ctrdata = PyBytes_AS_STRING(ctr);
  1227. memcpy(ctrdata, self->pending, self->pendingsize);
  1228. memcpy(ctrdata + self->pendingsize,
  1229. PyBytes_AS_STRING(cres),
  1230. PyBytes_GET_SIZE(cres));
  1231. Py_SETREF(cres, ctr);
  1232. self->pendingsize = 0;
  1233. }
  1234. rsize = PyBytes_GET_SIZE(cres);
  1235. if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
  1236. rsize) != 0)
  1237. goto errorexit;
  1238. if (rsize > 0 && decoder_feed_buffer(
  1239. (MultibyteStatefulDecoderContext *)self, &buf))
  1240. goto errorexit;
  1241. if (endoffile || sizehint < 0) {
  1242. if (buf.inbuf < buf.inbuf_end &&
  1243. multibytecodec_decerror(self->codec, &self->state,
  1244. &buf, self->errors, MBERR_TOOFEW))
  1245. goto errorexit;
  1246. }
  1247. if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
  1248. if (decoder_append_pending(STATEFUL_DCTX(self),
  1249. &buf) != 0)
  1250. goto errorexit;
  1251. }
  1252. Py_SETREF(cres, NULL);
  1253. if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
  1254. break;
  1255. sizehint = 1; /* read 1 more byte and retry */
  1256. }
  1257. res = _PyUnicodeWriter_Finish(&buf.writer);
  1258. if (res == NULL)
  1259. goto errorexit;
  1260. Py_XDECREF(cres);
  1261. Py_XDECREF(buf.excobj);
  1262. return res;
  1263. errorexit:
  1264. Py_XDECREF(cres);
  1265. Py_XDECREF(buf.excobj);
  1266. _PyUnicodeWriter_Dealloc(&buf.writer);
  1267. return NULL;
  1268. }
  1269. /*[clinic input]
  1270. _multibytecodec.MultibyteStreamReader.read
  1271. sizeobj: object = None
  1272. /
  1273. [clinic start generated code]*/
  1274. static PyObject *
  1275. _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
  1276. PyObject *sizeobj)
  1277. /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
  1278. {
  1279. Py_ssize_t size;
  1280. if (sizeobj == Py_None)
  1281. size = -1;
  1282. else if (PyLong_Check(sizeobj))
  1283. size = PyLong_AsSsize_t(sizeobj);
  1284. else {
  1285. PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
  1286. return NULL;
  1287. }
  1288. if (size == -1 && PyErr_Occurred())
  1289. return NULL;
  1290. return mbstreamreader_iread(self, "read", size);
  1291. }
  1292. /*[clinic input]
  1293. _multibytecodec.MultibyteStreamReader.readline
  1294. sizeobj: object = None
  1295. /
  1296. [clinic start generated code]*/
  1297. static PyObject *
  1298. _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
  1299. PyObject *sizeobj)
  1300. /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
  1301. {
  1302. Py_ssize_t size;
  1303. if (sizeobj == Py_None)
  1304. size = -1;
  1305. else if (PyLong_Check(sizeobj))
  1306. size = PyLong_AsSsize_t(sizeobj);
  1307. else {
  1308. PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
  1309. return NULL;
  1310. }
  1311. if (size == -1 && PyErr_Occurred())
  1312. return NULL;
  1313. return mbstreamreader_iread(self, "readline", size);
  1314. }
  1315. /*[clinic input]
  1316. _multibytecodec.MultibyteStreamReader.readlines
  1317. sizehintobj: object = None
  1318. /
  1319. [clinic start generated code]*/
  1320. static PyObject *
  1321. _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
  1322. PyObject *sizehintobj)
  1323. /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
  1324. {
  1325. PyObject *r, *sr;
  1326. Py_ssize_t sizehint;
  1327. if (sizehintobj == Py_None)
  1328. sizehint = -1;
  1329. else if (PyLong_Check(sizehintobj))
  1330. sizehint = PyLong_AsSsize_t(sizehintobj);
  1331. else {
  1332. PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
  1333. return NULL;
  1334. }
  1335. if (sizehint == -1 && PyErr_Occurred())
  1336. return NULL;
  1337. r = mbstreamreader_iread(self, "read", sizehint);
  1338. if (r == NULL)
  1339. return NULL;
  1340. sr = PyUnicode_Splitlines(r, 1);
  1341. Py_DECREF(r);
  1342. return sr;
  1343. }
  1344. /*[clinic input]
  1345. _multibytecodec.MultibyteStreamReader.reset
  1346. [clinic start generated code]*/
  1347. static PyObject *
  1348. _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
  1349. /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
  1350. {
  1351. if (self->codec->decreset != NULL &&
  1352. self->codec->decreset(&self->state, self->codec) != 0)
  1353. return NULL;
  1354. self->pendingsize = 0;
  1355. Py_RETURN_NONE;
  1356. }
  1357. static struct PyMethodDef mbstreamreader_methods[] = {
  1358. _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
  1359. _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
  1360. _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
  1361. _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
  1362. {NULL, NULL},
  1363. };
  1364. static PyMemberDef mbstreamreader_members[] = {
  1365. {"stream", T_OBJECT,
  1366. offsetof(MultibyteStreamReaderObject, stream),
  1367. READONLY, NULL},
  1368. {NULL,}
  1369. };
  1370. static PyObject *
  1371. mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  1372. {
  1373. MultibyteStreamReaderObject *self;
  1374. PyObject *stream, *codec = NULL;
  1375. char *errors = NULL;
  1376. if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
  1377. streamkwarglist, &stream, &errors))
  1378. return NULL;
  1379. self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
  1380. if (self == NULL)
  1381. return NULL;
  1382. codec = PyObject_GetAttrString((PyObject *)type, "codec");
  1383. if (codec == NULL)
  1384. goto errorexit;
  1385. module_state *state = find_state_by_def(type);
  1386. if (!MultibyteCodec_Check(state, codec)) {
  1387. PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
  1388. goto errorexit;
  1389. }
  1390. self->codec = ((MultibyteCodecObject *)codec)->codec;
  1391. self->stream = Py_NewRef(stream);
  1392. self->pendingsize = 0;
  1393. self->errors = internal_error_callback(errors);
  1394. if (self->errors == NULL)
  1395. goto errorexit;
  1396. if (self->codec->decinit != NULL &&
  1397. self->codec->decinit(&self->state, self->codec) != 0)
  1398. goto errorexit;
  1399. Py_DECREF(codec);
  1400. return (PyObject *)self;
  1401. errorexit:
  1402. Py_XDECREF(self);
  1403. Py_XDECREF(codec);
  1404. return NULL;
  1405. }
  1406. static int
  1407. mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
  1408. {
  1409. return 0;
  1410. }
  1411. static int
  1412. mbstreamreader_traverse(MultibyteStreamReaderObject *self,
  1413. visitproc visit, void *arg)
  1414. {
  1415. if (ERROR_ISCUSTOM(self->errors))
  1416. Py_VISIT(self->errors);
  1417. Py_VISIT(self->stream);
  1418. return 0;
  1419. }
  1420. static void
  1421. mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
  1422. {
  1423. PyTypeObject *tp = Py_TYPE(self);
  1424. PyObject_GC_UnTrack(self);
  1425. ERROR_DECREF(self->errors);
  1426. Py_XDECREF(self->stream);
  1427. tp->tp_free(self);
  1428. Py_DECREF(tp);
  1429. }
  1430. static PyType_Slot reader_slots[] = {
  1431. {Py_tp_dealloc, mbstreamreader_dealloc},
  1432. {Py_tp_getattro, PyObject_GenericGetAttr},
  1433. {Py_tp_traverse, mbstreamreader_traverse},
  1434. {Py_tp_methods, mbstreamreader_methods},
  1435. {Py_tp_members, mbstreamreader_members},
  1436. {Py_tp_getset, codecctx_getsets},
  1437. {Py_tp_init, mbstreamreader_init},
  1438. {Py_tp_new, mbstreamreader_new},
  1439. {0, NULL},
  1440. };
  1441. static PyType_Spec reader_spec = {
  1442. .name = MODULE_NAME ".MultibyteStreamReader",
  1443. .basicsize = sizeof(MultibyteStreamReaderObject),
  1444. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
  1445. Py_TPFLAGS_IMMUTABLETYPE),
  1446. .slots = reader_slots,
  1447. };
  1448. static int
  1449. mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
  1450. PyObject *unistr, PyObject *str_write)
  1451. {
  1452. PyObject *str, *wr;
  1453. str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
  1454. if (str == NULL)
  1455. return -1;
  1456. wr = _PyObject_CallMethodOneArg(self->stream, str_write, str);
  1457. Py_DECREF(str);
  1458. if (wr == NULL)
  1459. return -1;
  1460. Py_DECREF(wr);
  1461. return 0;
  1462. }
  1463. /*[clinic input]
  1464. _multibytecodec.MultibyteStreamWriter.write
  1465. cls: defining_class
  1466. strobj: object
  1467. /
  1468. [clinic start generated code]*/
  1469. static PyObject *
  1470. _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *self,
  1471. PyTypeObject *cls,
  1472. PyObject *strobj)
  1473. /*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/
  1474. {
  1475. module_state *state = PyType_GetModuleState(cls);
  1476. assert(state != NULL);
  1477. if (mbstreamwriter_iwrite(self, strobj, state->str_write)) {
  1478. return NULL;
  1479. }
  1480. Py_RETURN_NONE;
  1481. }
  1482. /*[clinic input]
  1483. _multibytecodec.MultibyteStreamWriter.writelines
  1484. cls: defining_class
  1485. lines: object
  1486. /
  1487. [clinic start generated code]*/
  1488. static PyObject *
  1489. _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject *self,
  1490. PyTypeObject *cls,
  1491. PyObject *lines)
  1492. /*[clinic end generated code: output=b4c99d2cf23ffb88 input=a6d5fe7c74972a34]*/
  1493. {
  1494. PyObject *strobj;
  1495. int i, r;
  1496. if (!PySequence_Check(lines)) {
  1497. PyErr_SetString(PyExc_TypeError,
  1498. "arg must be a sequence object");
  1499. return NULL;
  1500. }
  1501. module_state *state = PyType_GetModuleState(cls);
  1502. assert(state != NULL);
  1503. for (i = 0; i < PySequence_Length(lines); i++) {
  1504. /* length can be changed even within this loop */
  1505. strobj = PySequence_GetItem(lines, i);
  1506. if (strobj == NULL)
  1507. return NULL;
  1508. r = mbstreamwriter_iwrite(self, strobj, state->str_write);
  1509. Py_DECREF(strobj);
  1510. if (r == -1)
  1511. return NULL;
  1512. }
  1513. /* PySequence_Length() can fail */
  1514. if (PyErr_Occurred())
  1515. return NULL;
  1516. Py_RETURN_NONE;
  1517. }
  1518. /*[clinic input]
  1519. _multibytecodec.MultibyteStreamWriter.reset
  1520. cls: defining_class
  1521. /
  1522. [clinic start generated code]*/
  1523. static PyObject *
  1524. _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self,
  1525. PyTypeObject *cls)
  1526. /*[clinic end generated code: output=32ef224c2a38aa3d input=28af6a9cd38d1979]*/
  1527. {
  1528. PyObject *pwrt;
  1529. if (!self->pending)
  1530. Py_RETURN_NONE;
  1531. pwrt = multibytecodec_encode(self->codec, &self->state,
  1532. self->pending, NULL, self->errors,
  1533. MBENC_FLUSH | MBENC_RESET);
  1534. /* some pending buffer can be truncated when UnicodeEncodeError is
  1535. * raised on 'strict' mode. but, 'reset' method is designed to
  1536. * reset the pending buffer or states so failed string sequence
  1537. * ought to be missed */
  1538. Py_CLEAR(self->pending);
  1539. if (pwrt == NULL)
  1540. return NULL;
  1541. assert(PyBytes_Check(pwrt));
  1542. module_state *state = PyType_GetModuleState(cls);
  1543. assert(state != NULL);
  1544. if (PyBytes_Size(pwrt) > 0) {
  1545. PyObject *wr;
  1546. wr = _PyObject_CallMethodOneArg(self->stream, state->str_write, pwrt);
  1547. if (wr == NULL) {
  1548. Py_DECREF(pwrt);
  1549. return NULL;
  1550. }
  1551. }
  1552. Py_DECREF(pwrt);
  1553. Py_RETURN_NONE;
  1554. }
  1555. static PyObject *
  1556. mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
  1557. {
  1558. MultibyteStreamWriterObject *self;
  1559. PyObject *stream, *codec = NULL;
  1560. char *errors = NULL;
  1561. if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
  1562. streamkwarglist, &stream, &errors))
  1563. return NULL;
  1564. self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
  1565. if (self == NULL)
  1566. return NULL;
  1567. codec = PyObject_GetAttrString((PyObject *)type, "codec");
  1568. if (codec == NULL)
  1569. goto errorexit;
  1570. module_state *state = find_state_by_def(type);
  1571. if (!MultibyteCodec_Check(state, codec)) {
  1572. PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
  1573. goto errorexit;
  1574. }
  1575. self->codec = ((MultibyteCodecObject *)codec)->codec;
  1576. self->stream = Py_NewRef(stream);
  1577. self->pending = NULL;
  1578. self->errors = internal_error_callback(errors);
  1579. if (self->errors == NULL)
  1580. goto errorexit;
  1581. if (self->codec->encinit != NULL &&
  1582. self->codec->encinit(&self->state, self->codec) != 0)
  1583. goto errorexit;
  1584. Py_DECREF(codec);
  1585. return (PyObject *)self;
  1586. errorexit:
  1587. Py_XDECREF(self);
  1588. Py_XDECREF(codec);
  1589. return NULL;
  1590. }
  1591. static int
  1592. mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
  1593. {
  1594. return 0;
  1595. }
  1596. static int
  1597. mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
  1598. visitproc visit, void *arg)
  1599. {
  1600. if (ERROR_ISCUSTOM(self->errors))
  1601. Py_VISIT(self->errors);
  1602. Py_VISIT(self->stream);
  1603. return 0;
  1604. }
  1605. static void
  1606. mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
  1607. {
  1608. PyTypeObject *tp = Py_TYPE(self);
  1609. PyObject_GC_UnTrack(self);
  1610. ERROR_DECREF(self->errors);
  1611. Py_XDECREF(self->stream);
  1612. tp->tp_free(self);
  1613. Py_DECREF(tp);
  1614. }
  1615. static struct PyMethodDef mbstreamwriter_methods[] = {
  1616. _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
  1617. _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
  1618. _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
  1619. {NULL, NULL},
  1620. };
  1621. static PyMemberDef mbstreamwriter_members[] = {
  1622. {"stream", T_OBJECT,
  1623. offsetof(MultibyteStreamWriterObject, stream),
  1624. READONLY, NULL},
  1625. {NULL,}
  1626. };
  1627. static PyType_Slot writer_slots[] = {
  1628. {Py_tp_dealloc, mbstreamwriter_dealloc},
  1629. {Py_tp_getattro, PyObject_GenericGetAttr},
  1630. {Py_tp_traverse, mbstreamwriter_traverse},
  1631. {Py_tp_methods, mbstreamwriter_methods},
  1632. {Py_tp_members, mbstreamwriter_members},
  1633. {Py_tp_getset, codecctx_getsets},
  1634. {Py_tp_init, mbstreamwriter_init},
  1635. {Py_tp_new, mbstreamwriter_new},
  1636. {0, NULL},
  1637. };
  1638. static PyType_Spec writer_spec = {
  1639. .name = MODULE_NAME ".MultibyteStreamWriter",
  1640. .basicsize = sizeof(MultibyteStreamWriterObject),
  1641. .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
  1642. Py_TPFLAGS_IMMUTABLETYPE),
  1643. .slots = writer_slots,
  1644. };
  1645. /*[clinic input]
  1646. _multibytecodec.__create_codec
  1647. arg: object
  1648. /
  1649. [clinic start generated code]*/
  1650. static PyObject *
  1651. _multibytecodec___create_codec(PyObject *module, PyObject *arg)
  1652. /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
  1653. {
  1654. MultibyteCodecObject *self;
  1655. if (!PyCapsule_IsValid(arg, CODEC_CAPSULE)) {
  1656. PyErr_SetString(PyExc_ValueError, "argument type invalid");
  1657. return NULL;
  1658. }
  1659. codec_capsule *data = PyCapsule_GetPointer(arg, CODEC_CAPSULE);
  1660. const MultibyteCodec *codec = data->codec;
  1661. if (codec->codecinit != NULL && codec->codecinit(codec) != 0)
  1662. return NULL;
  1663. module_state *state = get_module_state(module);
  1664. self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
  1665. if (self == NULL)
  1666. return NULL;
  1667. self->codec = codec;
  1668. self->cjk_module = Py_NewRef(data->cjk_module);
  1669. PyObject_GC_Track(self);
  1670. return (PyObject *)self;
  1671. }
  1672. static int
  1673. _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
  1674. {
  1675. module_state *state = get_module_state(mod);
  1676. Py_VISIT(state->multibytecodec_type);
  1677. Py_VISIT(state->encoder_type);
  1678. Py_VISIT(state->decoder_type);
  1679. Py_VISIT(state->reader_type);
  1680. Py_VISIT(state->writer_type);
  1681. return 0;
  1682. }
  1683. static int
  1684. _multibytecodec_clear(PyObject *mod)
  1685. {
  1686. module_state *state = get_module_state(mod);
  1687. Py_CLEAR(state->multibytecodec_type);
  1688. Py_CLEAR(state->encoder_type);
  1689. Py_CLEAR(state->decoder_type);
  1690. Py_CLEAR(state->reader_type);
  1691. Py_CLEAR(state->writer_type);
  1692. Py_CLEAR(state->str_write);
  1693. return 0;
  1694. }
  1695. static void
  1696. _multibytecodec_free(void *mod)
  1697. {
  1698. _multibytecodec_clear((PyObject *)mod);
  1699. }
  1700. #define CREATE_TYPE(module, type, spec) \
  1701. do { \
  1702. type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
  1703. if (!type) { \
  1704. return -1; \
  1705. } \
  1706. } while (0)
  1707. #define ADD_TYPE(module, type) \
  1708. do { \
  1709. if (PyModule_AddType(module, type) < 0) { \
  1710. return -1; \
  1711. } \
  1712. } while (0)
  1713. static int
  1714. _multibytecodec_exec(PyObject *mod)
  1715. {
  1716. module_state *state = get_module_state(mod);
  1717. state->str_write = PyUnicode_InternFromString("write");
  1718. if (state->str_write == NULL) {
  1719. return -1;
  1720. }
  1721. CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
  1722. CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
  1723. CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
  1724. CREATE_TYPE(mod, state->reader_type, &reader_spec);
  1725. CREATE_TYPE(mod, state->writer_type, &writer_spec);
  1726. ADD_TYPE(mod, state->encoder_type);
  1727. ADD_TYPE(mod, state->decoder_type);
  1728. ADD_TYPE(mod, state->reader_type);
  1729. ADD_TYPE(mod, state->writer_type);
  1730. return 0;
  1731. }
  1732. #undef CREATE_TYPE
  1733. #undef ADD_TYPE
  1734. static struct PyMethodDef _multibytecodec_methods[] = {
  1735. _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
  1736. {NULL, NULL},
  1737. };
  1738. static PyModuleDef_Slot _multibytecodec_slots[] = {
  1739. {Py_mod_exec, _multibytecodec_exec},
  1740. {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
  1741. {0, NULL}
  1742. };
  1743. static struct PyModuleDef _multibytecodecmodule = {
  1744. .m_base = PyModuleDef_HEAD_INIT,
  1745. .m_name = "_multibytecodec",
  1746. .m_size = sizeof(module_state),
  1747. .m_methods = _multibytecodec_methods,
  1748. .m_slots = _multibytecodec_slots,
  1749. .m_traverse = _multibytecodec_traverse,
  1750. .m_clear = _multibytecodec_clear,
  1751. .m_free = _multibytecodec_free,
  1752. };
  1753. PyMODINIT_FUNC
  1754. PyInit__multibytecodec(void)
  1755. {
  1756. return PyModuleDef_Init(&_multibytecodecmodule);
  1757. }