_codecsmodule.c 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072
  1. /* ------------------------------------------------------------------------
  2. _codecs -- Provides access to the codec registry and the builtin
  3. codecs.
  4. This module should never be imported directly. The standard library
  5. module "codecs" wraps this builtin module for use within Python.
  6. The codec registry is accessible via:
  7. register(search_function) -> None
  8. lookup(encoding) -> CodecInfo object
  9. The builtin Unicode codecs use the following interface:
  10. <encoding>_encode(Unicode_object[,errors='strict']) ->
  11. (string object, bytes consumed)
  12. <encoding>_decode(char_buffer_obj[,errors='strict']) ->
  13. (Unicode object, bytes consumed)
  14. These <encoding>s are available: utf_8, unicode_escape,
  15. raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
  16. Written by Marc-Andre Lemburg (mal@lemburg.com).
  17. Copyright (c) Corporation for National Research Initiatives.
  18. ------------------------------------------------------------------------ */
  19. #define PY_SSIZE_T_CLEAN
  20. #include "Python.h"
  21. #ifdef MS_WINDOWS
  22. #include <windows.h>
  23. #endif
  24. /*[clinic input]
  25. module _codecs
  26. [clinic start generated code]*/
  27. /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
  28. #include "pycore_runtime.h"
  29. #include "clinic/_codecsmodule.c.h"
  30. /* --- Registry ----------------------------------------------------------- */
  31. /*[clinic input]
  32. _codecs.register
  33. search_function: object
  34. /
  35. Register a codec search function.
  36. Search functions are expected to take one argument, the encoding name in
  37. all lower case letters, and either return None, or a tuple of functions
  38. (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
  39. [clinic start generated code]*/
  40. static PyObject *
  41. _codecs_register(PyObject *module, PyObject *search_function)
  42. /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
  43. {
  44. if (PyCodec_Register(search_function))
  45. return NULL;
  46. Py_RETURN_NONE;
  47. }
  48. /*[clinic input]
  49. _codecs.unregister
  50. search_function: object
  51. /
  52. Unregister a codec search function and clear the registry's cache.
  53. If the search function is not registered, do nothing.
  54. [clinic start generated code]*/
  55. static PyObject *
  56. _codecs_unregister(PyObject *module, PyObject *search_function)
  57. /*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
  58. {
  59. if (PyCodec_Unregister(search_function) < 0) {
  60. return NULL;
  61. }
  62. Py_RETURN_NONE;
  63. }
  64. /*[clinic input]
  65. _codecs.lookup
  66. encoding: str
  67. /
  68. Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
  69. [clinic start generated code]*/
  70. static PyObject *
  71. _codecs_lookup_impl(PyObject *module, const char *encoding)
  72. /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
  73. {
  74. return _PyCodec_Lookup(encoding);
  75. }
  76. /*[clinic input]
  77. _codecs.encode
  78. obj: object
  79. encoding: str(c_default="NULL") = "utf-8"
  80. errors: str(c_default="NULL") = "strict"
  81. Encodes obj using the codec registered for encoding.
  82. The default encoding is 'utf-8'. errors may be given to set a
  83. different error handling scheme. Default is 'strict' meaning that encoding
  84. errors raise a ValueError. Other possible values are 'ignore', 'replace'
  85. and 'backslashreplace' as well as any other name registered with
  86. codecs.register_error that can handle ValueErrors.
  87. [clinic start generated code]*/
  88. static PyObject *
  89. _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
  90. const char *errors)
  91. /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
  92. {
  93. if (encoding == NULL)
  94. encoding = PyUnicode_GetDefaultEncoding();
  95. /* Encode via the codec registry */
  96. return PyCodec_Encode(obj, encoding, errors);
  97. }
  98. /*[clinic input]
  99. _codecs.decode
  100. obj: object
  101. encoding: str(c_default="NULL") = "utf-8"
  102. errors: str(c_default="NULL") = "strict"
  103. Decodes obj using the codec registered for encoding.
  104. Default encoding is 'utf-8'. errors may be given to set a
  105. different error handling scheme. Default is 'strict' meaning that encoding
  106. errors raise a ValueError. Other possible values are 'ignore', 'replace'
  107. and 'backslashreplace' as well as any other name registered with
  108. codecs.register_error that can handle ValueErrors.
  109. [clinic start generated code]*/
  110. static PyObject *
  111. _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
  112. const char *errors)
  113. /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
  114. {
  115. if (encoding == NULL)
  116. encoding = PyUnicode_GetDefaultEncoding();
  117. /* Decode via the codec registry */
  118. return PyCodec_Decode(obj, encoding, errors);
  119. }
  120. /* --- Helpers ------------------------------------------------------------ */
  121. static
  122. PyObject *codec_tuple(PyObject *decoded,
  123. Py_ssize_t len)
  124. {
  125. if (decoded == NULL)
  126. return NULL;
  127. return Py_BuildValue("Nn", decoded, len);
  128. }
  129. /* --- String codecs ------------------------------------------------------ */
  130. /*[clinic input]
  131. _codecs.escape_decode
  132. data: Py_buffer(accept={str, buffer})
  133. errors: str(accept={str, NoneType}) = None
  134. /
  135. [clinic start generated code]*/
  136. static PyObject *
  137. _codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
  138. const char *errors)
  139. /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
  140. {
  141. PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
  142. errors, 0, NULL);
  143. return codec_tuple(decoded, data->len);
  144. }
  145. /*[clinic input]
  146. _codecs.escape_encode
  147. data: object(subclass_of='&PyBytes_Type')
  148. errors: str(accept={str, NoneType}) = None
  149. /
  150. [clinic start generated code]*/
  151. static PyObject *
  152. _codecs_escape_encode_impl(PyObject *module, PyObject *data,
  153. const char *errors)
  154. /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
  155. {
  156. Py_ssize_t size;
  157. Py_ssize_t newsize;
  158. PyObject *v;
  159. size = PyBytes_GET_SIZE(data);
  160. if (size > PY_SSIZE_T_MAX / 4) {
  161. PyErr_SetString(PyExc_OverflowError,
  162. "string is too large to encode");
  163. return NULL;
  164. }
  165. newsize = 4*size;
  166. v = PyBytes_FromStringAndSize(NULL, newsize);
  167. if (v == NULL) {
  168. return NULL;
  169. }
  170. else {
  171. Py_ssize_t i;
  172. char c;
  173. char *p = PyBytes_AS_STRING(v);
  174. for (i = 0; i < size; i++) {
  175. /* There's at least enough room for a hex escape */
  176. assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
  177. c = PyBytes_AS_STRING(data)[i];
  178. if (c == '\'' || c == '\\')
  179. *p++ = '\\', *p++ = c;
  180. else if (c == '\t')
  181. *p++ = '\\', *p++ = 't';
  182. else if (c == '\n')
  183. *p++ = '\\', *p++ = 'n';
  184. else if (c == '\r')
  185. *p++ = '\\', *p++ = 'r';
  186. else if (c < ' ' || c >= 0x7f) {
  187. *p++ = '\\';
  188. *p++ = 'x';
  189. *p++ = Py_hexdigits[(c & 0xf0) >> 4];
  190. *p++ = Py_hexdigits[c & 0xf];
  191. }
  192. else
  193. *p++ = c;
  194. }
  195. *p = '\0';
  196. if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
  197. return NULL;
  198. }
  199. }
  200. return codec_tuple(v, size);
  201. }
  202. /* --- Decoder ------------------------------------------------------------ */
  203. /*[clinic input]
  204. _codecs.utf_7_decode
  205. data: Py_buffer
  206. errors: str(accept={str, NoneType}) = None
  207. final: bool = False
  208. /
  209. [clinic start generated code]*/
  210. static PyObject *
  211. _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
  212. const char *errors, int final)
  213. /*[clinic end generated code: output=0cd3a944a32a4089 input=dbf8c8998102dc7d]*/
  214. {
  215. Py_ssize_t consumed = data->len;
  216. PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
  217. errors,
  218. final ? NULL : &consumed);
  219. return codec_tuple(decoded, consumed);
  220. }
  221. /*[clinic input]
  222. _codecs.utf_8_decode
  223. data: Py_buffer
  224. errors: str(accept={str, NoneType}) = None
  225. final: bool = False
  226. /
  227. [clinic start generated code]*/
  228. static PyObject *
  229. _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
  230. const char *errors, int final)
  231. /*[clinic end generated code: output=10f74dec8d9bb8bf input=ca06bc8a9c970e25]*/
  232. {
  233. Py_ssize_t consumed = data->len;
  234. PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
  235. errors,
  236. final ? NULL : &consumed);
  237. return codec_tuple(decoded, consumed);
  238. }
  239. /*[clinic input]
  240. _codecs.utf_16_decode
  241. data: Py_buffer
  242. errors: str(accept={str, NoneType}) = None
  243. final: bool = False
  244. /
  245. [clinic start generated code]*/
  246. static PyObject *
  247. _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
  248. const char *errors, int final)
  249. /*[clinic end generated code: output=783b442abcbcc2d0 input=5b0f52071ba6cadc]*/
  250. {
  251. int byteorder = 0;
  252. /* This is overwritten unless final is true. */
  253. Py_ssize_t consumed = data->len;
  254. PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
  255. errors, &byteorder,
  256. final ? NULL : &consumed);
  257. return codec_tuple(decoded, consumed);
  258. }
  259. /*[clinic input]
  260. _codecs.utf_16_le_decode
  261. data: Py_buffer
  262. errors: str(accept={str, NoneType}) = None
  263. final: bool = False
  264. /
  265. [clinic start generated code]*/
  266. static PyObject *
  267. _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
  268. const char *errors, int final)
  269. /*[clinic end generated code: output=899b9e6364379dcd input=115bd8c7b783d0bf]*/
  270. {
  271. int byteorder = -1;
  272. /* This is overwritten unless final is true. */
  273. Py_ssize_t consumed = data->len;
  274. PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
  275. errors, &byteorder,
  276. final ? NULL : &consumed);
  277. return codec_tuple(decoded, consumed);
  278. }
  279. /*[clinic input]
  280. _codecs.utf_16_be_decode
  281. data: Py_buffer
  282. errors: str(accept={str, NoneType}) = None
  283. final: bool = False
  284. /
  285. [clinic start generated code]*/
  286. static PyObject *
  287. _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
  288. const char *errors, int final)
  289. /*[clinic end generated code: output=49f6465ea07669c8 input=63131422b01f9cb4]*/
  290. {
  291. int byteorder = 1;
  292. /* This is overwritten unless final is true. */
  293. Py_ssize_t consumed = data->len;
  294. PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
  295. errors, &byteorder,
  296. final ? NULL : &consumed);
  297. return codec_tuple(decoded, consumed);
  298. }
  299. /* This non-standard version also provides access to the byteorder
  300. parameter of the builtin UTF-16 codec.
  301. It returns a tuple (unicode, bytesread, byteorder) with byteorder
  302. being the value in effect at the end of data.
  303. */
  304. /*[clinic input]
  305. _codecs.utf_16_ex_decode
  306. data: Py_buffer
  307. errors: str(accept={str, NoneType}) = None
  308. byteorder: int = 0
  309. final: bool = False
  310. /
  311. [clinic start generated code]*/
  312. static PyObject *
  313. _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
  314. const char *errors, int byteorder, int final)
  315. /*[clinic end generated code: output=0f385f251ecc1988 input=f368a51cf384bf4c]*/
  316. {
  317. /* This is overwritten unless final is true. */
  318. Py_ssize_t consumed = data->len;
  319. PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
  320. errors, &byteorder,
  321. final ? NULL : &consumed);
  322. if (decoded == NULL)
  323. return NULL;
  324. return Py_BuildValue("Nni", decoded, consumed, byteorder);
  325. }
  326. /*[clinic input]
  327. _codecs.utf_32_decode
  328. data: Py_buffer
  329. errors: str(accept={str, NoneType}) = None
  330. final: bool = False
  331. /
  332. [clinic start generated code]*/
  333. static PyObject *
  334. _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
  335. const char *errors, int final)
  336. /*[clinic end generated code: output=2fc961807f7b145f input=fcdf3658c5e9b5f3]*/
  337. {
  338. int byteorder = 0;
  339. /* This is overwritten unless final is true. */
  340. Py_ssize_t consumed = data->len;
  341. PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
  342. errors, &byteorder,
  343. final ? NULL : &consumed);
  344. return codec_tuple(decoded, consumed);
  345. }
  346. /*[clinic input]
  347. _codecs.utf_32_le_decode
  348. data: Py_buffer
  349. errors: str(accept={str, NoneType}) = None
  350. final: bool = False
  351. /
  352. [clinic start generated code]*/
  353. static PyObject *
  354. _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
  355. const char *errors, int final)
  356. /*[clinic end generated code: output=ec8f46b67a94f3e6 input=12220556e885f817]*/
  357. {
  358. int byteorder = -1;
  359. /* This is overwritten unless final is true. */
  360. Py_ssize_t consumed = data->len;
  361. PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
  362. errors, &byteorder,
  363. final ? NULL : &consumed);
  364. return codec_tuple(decoded, consumed);
  365. }
  366. /*[clinic input]
  367. _codecs.utf_32_be_decode
  368. data: Py_buffer
  369. errors: str(accept={str, NoneType}) = None
  370. final: bool = False
  371. /
  372. [clinic start generated code]*/
  373. static PyObject *
  374. _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
  375. const char *errors, int final)
  376. /*[clinic end generated code: output=ff82bae862c92c4e input=2bc669b4781598db]*/
  377. {
  378. int byteorder = 1;
  379. /* This is overwritten unless final is true. */
  380. Py_ssize_t consumed = data->len;
  381. PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
  382. errors, &byteorder,
  383. final ? NULL : &consumed);
  384. return codec_tuple(decoded, consumed);
  385. }
  386. /* This non-standard version also provides access to the byteorder
  387. parameter of the builtin UTF-32 codec.
  388. It returns a tuple (unicode, bytesread, byteorder) with byteorder
  389. being the value in effect at the end of data.
  390. */
  391. /*[clinic input]
  392. _codecs.utf_32_ex_decode
  393. data: Py_buffer
  394. errors: str(accept={str, NoneType}) = None
  395. byteorder: int = 0
  396. final: bool = False
  397. /
  398. [clinic start generated code]*/
  399. static PyObject *
  400. _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
  401. const char *errors, int byteorder, int final)
  402. /*[clinic end generated code: output=6bfb177dceaf4848 input=4a2323d0013620df]*/
  403. {
  404. Py_ssize_t consumed = data->len;
  405. PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
  406. errors, &byteorder,
  407. final ? NULL : &consumed);
  408. if (decoded == NULL)
  409. return NULL;
  410. return Py_BuildValue("Nni", decoded, consumed, byteorder);
  411. }
  412. /*[clinic input]
  413. _codecs.unicode_escape_decode
  414. data: Py_buffer(accept={str, buffer})
  415. errors: str(accept={str, NoneType}) = None
  416. final: bool = True
  417. /
  418. [clinic start generated code]*/
  419. static PyObject *
  420. _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
  421. const char *errors, int final)
  422. /*[clinic end generated code: output=b284f97b12c635ee input=15019f081ffe272b]*/
  423. {
  424. Py_ssize_t consumed = data->len;
  425. PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
  426. errors,
  427. final ? NULL : &consumed);
  428. return codec_tuple(decoded, consumed);
  429. }
  430. /*[clinic input]
  431. _codecs.raw_unicode_escape_decode
  432. data: Py_buffer(accept={str, buffer})
  433. errors: str(accept={str, NoneType}) = None
  434. final: bool = True
  435. /
  436. [clinic start generated code]*/
  437. static PyObject *
  438. _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
  439. const char *errors, int final)
  440. /*[clinic end generated code: output=11dbd96301e2879e input=b93f823aa8c343ad]*/
  441. {
  442. Py_ssize_t consumed = data->len;
  443. PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
  444. errors,
  445. final ? NULL : &consumed);
  446. return codec_tuple(decoded, consumed);
  447. }
  448. /*[clinic input]
  449. _codecs.latin_1_decode
  450. data: Py_buffer
  451. errors: str(accept={str, NoneType}) = None
  452. /
  453. [clinic start generated code]*/
  454. static PyObject *
  455. _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
  456. const char *errors)
  457. /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
  458. {
  459. PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
  460. return codec_tuple(decoded, data->len);
  461. }
  462. /*[clinic input]
  463. _codecs.ascii_decode
  464. data: Py_buffer
  465. errors: str(accept={str, NoneType}) = None
  466. /
  467. [clinic start generated code]*/
  468. static PyObject *
  469. _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
  470. const char *errors)
  471. /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
  472. {
  473. PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
  474. return codec_tuple(decoded, data->len);
  475. }
  476. /*[clinic input]
  477. _codecs.charmap_decode
  478. data: Py_buffer
  479. errors: str(accept={str, NoneType}) = None
  480. mapping: object = None
  481. /
  482. [clinic start generated code]*/
  483. static PyObject *
  484. _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
  485. const char *errors, PyObject *mapping)
  486. /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
  487. {
  488. PyObject *decoded;
  489. if (mapping == Py_None)
  490. mapping = NULL;
  491. decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
  492. return codec_tuple(decoded, data->len);
  493. }
  494. #ifdef MS_WINDOWS
  495. /*[clinic input]
  496. _codecs.mbcs_decode
  497. data: Py_buffer
  498. errors: str(accept={str, NoneType}) = None
  499. final: bool = False
  500. /
  501. [clinic start generated code]*/
  502. static PyObject *
  503. _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
  504. const char *errors, int final)
  505. /*[clinic end generated code: output=39b65b8598938c4b input=f144ad1ed6d8f5a6]*/
  506. {
  507. Py_ssize_t consumed = data->len;
  508. PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
  509. errors, final ? NULL : &consumed);
  510. return codec_tuple(decoded, consumed);
  511. }
  512. /*[clinic input]
  513. _codecs.oem_decode
  514. data: Py_buffer
  515. errors: str(accept={str, NoneType}) = None
  516. final: bool = False
  517. /
  518. [clinic start generated code]*/
  519. static PyObject *
  520. _codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
  521. const char *errors, int final)
  522. /*[clinic end generated code: output=da1617612f3fcad8 input=629bf87376d211b4]*/
  523. {
  524. Py_ssize_t consumed = data->len;
  525. PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
  526. data->buf, data->len, errors, final ? NULL : &consumed);
  527. return codec_tuple(decoded, consumed);
  528. }
  529. /*[clinic input]
  530. _codecs.code_page_decode
  531. codepage: int
  532. data: Py_buffer
  533. errors: str(accept={str, NoneType}) = None
  534. final: bool = False
  535. /
  536. [clinic start generated code]*/
  537. static PyObject *
  538. _codecs_code_page_decode_impl(PyObject *module, int codepage,
  539. Py_buffer *data, const char *errors, int final)
  540. /*[clinic end generated code: output=53008ea967da3fff input=6a32589b0658c277]*/
  541. {
  542. Py_ssize_t consumed = data->len;
  543. PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
  544. data->buf, data->len,
  545. errors,
  546. final ? NULL : &consumed);
  547. return codec_tuple(decoded, consumed);
  548. }
  549. #endif /* MS_WINDOWS */
  550. /* --- Encoder ------------------------------------------------------------ */
  551. /*[clinic input]
  552. _codecs.readbuffer_encode
  553. data: Py_buffer(accept={str, buffer})
  554. errors: str(accept={str, NoneType}) = None
  555. /
  556. [clinic start generated code]*/
  557. static PyObject *
  558. _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
  559. const char *errors)
  560. /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
  561. {
  562. PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
  563. return codec_tuple(result, data->len);
  564. }
  565. /*[clinic input]
  566. _codecs.utf_7_encode
  567. str: unicode
  568. errors: str(accept={str, NoneType}) = None
  569. /
  570. [clinic start generated code]*/
  571. static PyObject *
  572. _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
  573. const char *errors)
  574. /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
  575. {
  576. return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
  577. PyUnicode_GET_LENGTH(str));
  578. }
  579. /*[clinic input]
  580. _codecs.utf_8_encode
  581. str: unicode
  582. errors: str(accept={str, NoneType}) = None
  583. /
  584. [clinic start generated code]*/
  585. static PyObject *
  586. _codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
  587. const char *errors)
  588. /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
  589. {
  590. return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
  591. PyUnicode_GET_LENGTH(str));
  592. }
  593. /* This version provides access to the byteorder parameter of the
  594. builtin UTF-16 codecs as optional third argument. It defaults to 0
  595. which means: use the native byte order and prepend the data with a
  596. BOM mark.
  597. */
  598. /*[clinic input]
  599. _codecs.utf_16_encode
  600. str: unicode
  601. errors: str(accept={str, NoneType}) = None
  602. byteorder: int = 0
  603. /
  604. [clinic start generated code]*/
  605. static PyObject *
  606. _codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
  607. const char *errors, int byteorder)
  608. /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
  609. {
  610. return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
  611. PyUnicode_GET_LENGTH(str));
  612. }
  613. /*[clinic input]
  614. _codecs.utf_16_le_encode
  615. str: unicode
  616. errors: str(accept={str, NoneType}) = None
  617. /
  618. [clinic start generated code]*/
  619. static PyObject *
  620. _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
  621. const char *errors)
  622. /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
  623. {
  624. return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
  625. PyUnicode_GET_LENGTH(str));
  626. }
  627. /*[clinic input]
  628. _codecs.utf_16_be_encode
  629. str: unicode
  630. errors: str(accept={str, NoneType}) = None
  631. /
  632. [clinic start generated code]*/
  633. static PyObject *
  634. _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
  635. const char *errors)
  636. /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
  637. {
  638. return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
  639. PyUnicode_GET_LENGTH(str));
  640. }
  641. /* This version provides access to the byteorder parameter of the
  642. builtin UTF-32 codecs as optional third argument. It defaults to 0
  643. which means: use the native byte order and prepend the data with a
  644. BOM mark.
  645. */
  646. /*[clinic input]
  647. _codecs.utf_32_encode
  648. str: unicode
  649. errors: str(accept={str, NoneType}) = None
  650. byteorder: int = 0
  651. /
  652. [clinic start generated code]*/
  653. static PyObject *
  654. _codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
  655. const char *errors, int byteorder)
  656. /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
  657. {
  658. return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
  659. PyUnicode_GET_LENGTH(str));
  660. }
  661. /*[clinic input]
  662. _codecs.utf_32_le_encode
  663. str: unicode
  664. errors: str(accept={str, NoneType}) = None
  665. /
  666. [clinic start generated code]*/
  667. static PyObject *
  668. _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
  669. const char *errors)
  670. /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
  671. {
  672. return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
  673. PyUnicode_GET_LENGTH(str));
  674. }
  675. /*[clinic input]
  676. _codecs.utf_32_be_encode
  677. str: unicode
  678. errors: str(accept={str, NoneType}) = None
  679. /
  680. [clinic start generated code]*/
  681. static PyObject *
  682. _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
  683. const char *errors)
  684. /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
  685. {
  686. return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
  687. PyUnicode_GET_LENGTH(str));
  688. }
  689. /*[clinic input]
  690. _codecs.unicode_escape_encode
  691. str: unicode
  692. errors: str(accept={str, NoneType}) = None
  693. /
  694. [clinic start generated code]*/
  695. static PyObject *
  696. _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
  697. const char *errors)
  698. /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
  699. {
  700. return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
  701. PyUnicode_GET_LENGTH(str));
  702. }
  703. /*[clinic input]
  704. _codecs.raw_unicode_escape_encode
  705. str: unicode
  706. errors: str(accept={str, NoneType}) = None
  707. /
  708. [clinic start generated code]*/
  709. static PyObject *
  710. _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
  711. const char *errors)
  712. /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
  713. {
  714. return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
  715. PyUnicode_GET_LENGTH(str));
  716. }
  717. /*[clinic input]
  718. _codecs.latin_1_encode
  719. str: unicode
  720. errors: str(accept={str, NoneType}) = None
  721. /
  722. [clinic start generated code]*/
  723. static PyObject *
  724. _codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
  725. const char *errors)
  726. /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
  727. {
  728. return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
  729. PyUnicode_GET_LENGTH(str));
  730. }
  731. /*[clinic input]
  732. _codecs.ascii_encode
  733. str: unicode
  734. errors: str(accept={str, NoneType}) = None
  735. /
  736. [clinic start generated code]*/
  737. static PyObject *
  738. _codecs_ascii_encode_impl(PyObject *module, PyObject *str,
  739. const char *errors)
  740. /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
  741. {
  742. return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
  743. PyUnicode_GET_LENGTH(str));
  744. }
  745. /*[clinic input]
  746. _codecs.charmap_encode
  747. str: unicode
  748. errors: str(accept={str, NoneType}) = None
  749. mapping: object = None
  750. /
  751. [clinic start generated code]*/
  752. static PyObject *
  753. _codecs_charmap_encode_impl(PyObject *module, PyObject *str,
  754. const char *errors, PyObject *mapping)
  755. /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
  756. {
  757. if (mapping == Py_None)
  758. mapping = NULL;
  759. return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
  760. PyUnicode_GET_LENGTH(str));
  761. }
  762. /*[clinic input]
  763. _codecs.charmap_build
  764. map: unicode
  765. /
  766. [clinic start generated code]*/
  767. static PyObject *
  768. _codecs_charmap_build_impl(PyObject *module, PyObject *map)
  769. /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
  770. {
  771. return PyUnicode_BuildEncodingMap(map);
  772. }
  773. #ifdef MS_WINDOWS
  774. /*[clinic input]
  775. _codecs.mbcs_encode
  776. str: unicode
  777. errors: str(accept={str, NoneType}) = None
  778. /
  779. [clinic start generated code]*/
  780. static PyObject *
  781. _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
  782. /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
  783. {
  784. return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
  785. PyUnicode_GET_LENGTH(str));
  786. }
  787. /*[clinic input]
  788. _codecs.oem_encode
  789. str: unicode
  790. errors: str(accept={str, NoneType}) = None
  791. /
  792. [clinic start generated code]*/
  793. static PyObject *
  794. _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
  795. /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
  796. {
  797. return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
  798. PyUnicode_GET_LENGTH(str));
  799. }
  800. /*[clinic input]
  801. _codecs.code_page_encode
  802. code_page: int
  803. str: unicode
  804. errors: str(accept={str, NoneType}) = None
  805. /
  806. [clinic start generated code]*/
  807. static PyObject *
  808. _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
  809. const char *errors)
  810. /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
  811. {
  812. return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
  813. PyUnicode_GET_LENGTH(str));
  814. }
  815. #endif /* MS_WINDOWS */
  816. /* --- Error handler registry --------------------------------------------- */
  817. /*[clinic input]
  818. _codecs.register_error
  819. errors: str
  820. handler: object
  821. /
  822. Register the specified error handler under the name errors.
  823. handler must be a callable object, that will be called with an exception
  824. instance containing information about the location of the encoding/decoding
  825. error and must return a (replacement, new position) tuple.
  826. [clinic start generated code]*/
  827. static PyObject *
  828. _codecs_register_error_impl(PyObject *module, const char *errors,
  829. PyObject *handler)
  830. /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
  831. {
  832. if (PyCodec_RegisterError(errors, handler))
  833. return NULL;
  834. Py_RETURN_NONE;
  835. }
  836. /*[clinic input]
  837. _codecs.lookup_error
  838. name: str
  839. /
  840. lookup_error(errors) -> handler
  841. Return the error handler for the specified error handling name or raise a
  842. LookupError, if no handler exists under this name.
  843. [clinic start generated code]*/
  844. static PyObject *
  845. _codecs_lookup_error_impl(PyObject *module, const char *name)
  846. /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
  847. {
  848. return PyCodec_LookupError(name);
  849. }
  850. /* --- Module API --------------------------------------------------------- */
  851. static PyMethodDef _codecs_functions[] = {
  852. _CODECS_REGISTER_METHODDEF
  853. _CODECS_UNREGISTER_METHODDEF
  854. _CODECS_LOOKUP_METHODDEF
  855. _CODECS_ENCODE_METHODDEF
  856. _CODECS_DECODE_METHODDEF
  857. _CODECS_ESCAPE_ENCODE_METHODDEF
  858. _CODECS_ESCAPE_DECODE_METHODDEF
  859. _CODECS_UTF_8_ENCODE_METHODDEF
  860. _CODECS_UTF_8_DECODE_METHODDEF
  861. _CODECS_UTF_7_ENCODE_METHODDEF
  862. _CODECS_UTF_7_DECODE_METHODDEF
  863. _CODECS_UTF_16_ENCODE_METHODDEF
  864. _CODECS_UTF_16_LE_ENCODE_METHODDEF
  865. _CODECS_UTF_16_BE_ENCODE_METHODDEF
  866. _CODECS_UTF_16_DECODE_METHODDEF
  867. _CODECS_UTF_16_LE_DECODE_METHODDEF
  868. _CODECS_UTF_16_BE_DECODE_METHODDEF
  869. _CODECS_UTF_16_EX_DECODE_METHODDEF
  870. _CODECS_UTF_32_ENCODE_METHODDEF
  871. _CODECS_UTF_32_LE_ENCODE_METHODDEF
  872. _CODECS_UTF_32_BE_ENCODE_METHODDEF
  873. _CODECS_UTF_32_DECODE_METHODDEF
  874. _CODECS_UTF_32_LE_DECODE_METHODDEF
  875. _CODECS_UTF_32_BE_DECODE_METHODDEF
  876. _CODECS_UTF_32_EX_DECODE_METHODDEF
  877. _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
  878. _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
  879. _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
  880. _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
  881. _CODECS_LATIN_1_ENCODE_METHODDEF
  882. _CODECS_LATIN_1_DECODE_METHODDEF
  883. _CODECS_ASCII_ENCODE_METHODDEF
  884. _CODECS_ASCII_DECODE_METHODDEF
  885. _CODECS_CHARMAP_ENCODE_METHODDEF
  886. _CODECS_CHARMAP_DECODE_METHODDEF
  887. _CODECS_CHARMAP_BUILD_METHODDEF
  888. _CODECS_READBUFFER_ENCODE_METHODDEF
  889. _CODECS_MBCS_ENCODE_METHODDEF
  890. _CODECS_MBCS_DECODE_METHODDEF
  891. _CODECS_OEM_ENCODE_METHODDEF
  892. _CODECS_OEM_DECODE_METHODDEF
  893. _CODECS_CODE_PAGE_ENCODE_METHODDEF
  894. _CODECS_CODE_PAGE_DECODE_METHODDEF
  895. _CODECS_REGISTER_ERROR_METHODDEF
  896. _CODECS_LOOKUP_ERROR_METHODDEF
  897. {NULL, NULL} /* sentinel */
  898. };
  899. static PyModuleDef_Slot _codecs_slots[] = {
  900. {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
  901. {0, NULL}
  902. };
  903. static struct PyModuleDef codecsmodule = {
  904. PyModuleDef_HEAD_INIT,
  905. "_codecs",
  906. NULL,
  907. 0,
  908. _codecs_functions,
  909. _codecs_slots,
  910. NULL,
  911. NULL,
  912. NULL
  913. };
  914. PyMODINIT_FUNC
  915. PyInit__codecs(void)
  916. {
  917. return PyModuleDef_Init(&codecsmodule);
  918. }