binascii.c 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353
  1. /*
  2. ** Routines to represent binary data in ASCII and vice-versa
  3. **
  4. ** This module currently supports the following encodings:
  5. ** uuencode:
  6. ** each line encodes 45 bytes (except possibly the last)
  7. ** First char encodes (binary) length, rest data
  8. ** each char encodes 6 bits, as follows:
  9. ** binary: 01234567 abcdefgh ijklmnop
  10. ** ascii: 012345 67abcd efghij klmnop
  11. ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
  12. ** short binary data is zero-extended (so the bits are always in the
  13. ** right place), this does *not* reflect in the length.
  14. ** base64:
  15. ** Line breaks are insignificant, but lines are at most 76 chars
  16. ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
  17. ** is done via a table.
  18. ** Short binary data is filled (in ASCII) with '='.
  19. ** hqx:
  20. ** File starts with introductory text, real data starts and ends
  21. ** with colons.
  22. ** Data consists of three similar parts: info, datafork, resourcefork.
  23. ** Each part is protected (at the end) with a 16-bit crc
  24. ** The binary data is run-length encoded, and then ascii-fied:
  25. ** binary: 01234567 abcdefgh ijklmnop
  26. ** ascii: 012345 67abcd efghij klmnop
  27. ** ASCII encoding is table-driven, see the code.
  28. ** Short binary data results in the runt ascii-byte being output with
  29. ** the bits in the right place.
  30. **
  31. ** While I was reading dozens of programs that encode or decode the formats
  32. ** here (documentation? hihi:-) I have formulated Jansen's Observation:
  33. **
  34. ** Programs that encode binary data in ASCII are written in
  35. ** such a style that they are as unreadable as possible. Devices used
  36. ** include unnecessary global variables, burying important tables
  37. ** in unrelated sourcefiles, putting functions in include files,
  38. ** using seemingly-descriptive variable names for different purposes,
  39. ** calls to empty subroutines and a host of others.
  40. **
  41. ** I have attempted to break with this tradition, but I guess that that
  42. ** does make the performance sub-optimal. Oh well, too bad...
  43. **
  44. ** Jack Jansen, CWI, July 1995.
  45. **
  46. ** Added support for quoted-printable encoding, based on rfc 1521 et al
  47. ** quoted-printable encoding specifies that non printable characters (anything
  48. ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
  49. ** of the character. It also specifies some other behavior to enable 8bit data
  50. ** in a mail message with little difficulty (maximum line sizes, protecting
  51. ** some cases of whitespace, etc).
  52. **
  53. ** Brandon Long, September 2001.
  54. */
  55. #ifndef Py_BUILD_CORE_BUILTIN
  56. # define Py_BUILD_CORE_MODULE 1
  57. #endif
  58. #define PY_SSIZE_T_CLEAN
  59. #include "Python.h"
  60. #include "pycore_long.h" // _PyLong_DigitValue
  61. #include "pycore_strhex.h" // _Py_strhex_bytes_with_sep()
  62. #ifdef USE_ZLIB_CRC32
  63. # include "zlib.h"
  64. #endif
  65. typedef struct binascii_state {
  66. PyObject *Error;
  67. PyObject *Incomplete;
  68. } binascii_state;
  69. static inline binascii_state *
  70. get_binascii_state(PyObject *module)
  71. {
  72. return (binascii_state *)PyModule_GetState(module);
  73. }
  74. static const unsigned char table_a2b_base64[] = {
  75. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  76. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  77. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
  78. 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
  79. -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
  80. 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
  81. -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
  82. 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
  83. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  84. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  85. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  86. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  87. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  88. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  89. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  90. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  91. };
  92. #define BASE64_PAD '='
  93. /* Max binary chunk size; limited only by available memory */
  94. #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
  95. static const unsigned char table_b2a_base64[] =
  96. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  97. static const unsigned short crctab_hqx[256] = {
  98. 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
  99. 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
  100. 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
  101. 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
  102. 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
  103. 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
  104. 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
  105. 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
  106. 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
  107. 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
  108. 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
  109. 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
  110. 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
  111. 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
  112. 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
  113. 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
  114. 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
  115. 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
  116. 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
  117. 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
  118. 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
  119. 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
  120. 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
  121. 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
  122. 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
  123. 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
  124. 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
  125. 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
  126. 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
  127. 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
  128. 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
  129. 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
  130. };
  131. /*[clinic input]
  132. module binascii
  133. [clinic start generated code]*/
  134. /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
  135. /*[python input]
  136. class ascii_buffer_converter(CConverter):
  137. type = 'Py_buffer'
  138. converter = 'ascii_buffer_converter'
  139. impl_by_reference = True
  140. c_default = "{NULL, NULL}"
  141. def cleanup(self):
  142. name = self.name
  143. return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
  144. [python start generated code]*/
  145. /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
  146. static int
  147. ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
  148. {
  149. if (arg == NULL) {
  150. PyBuffer_Release(buf);
  151. return 1;
  152. }
  153. if (PyUnicode_Check(arg)) {
  154. if (PyUnicode_READY(arg) < 0)
  155. return 0;
  156. if (!PyUnicode_IS_ASCII(arg)) {
  157. PyErr_SetString(PyExc_ValueError,
  158. "string argument should contain only ASCII characters");
  159. return 0;
  160. }
  161. assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
  162. buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
  163. buf->len = PyUnicode_GET_LENGTH(arg);
  164. buf->obj = NULL;
  165. return 1;
  166. }
  167. if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
  168. PyErr_Format(PyExc_TypeError,
  169. "argument should be bytes, buffer or ASCII string, "
  170. "not '%.100s'", Py_TYPE(arg)->tp_name);
  171. return 0;
  172. }
  173. if (!PyBuffer_IsContiguous(buf, 'C')) {
  174. PyErr_Format(PyExc_TypeError,
  175. "argument should be a contiguous buffer, "
  176. "not '%.100s'", Py_TYPE(arg)->tp_name);
  177. PyBuffer_Release(buf);
  178. return 0;
  179. }
  180. return Py_CLEANUP_SUPPORTED;
  181. }
  182. #include "clinic/binascii.c.h"
  183. /*[clinic input]
  184. binascii.a2b_uu
  185. data: ascii_buffer
  186. /
  187. Decode a line of uuencoded data.
  188. [clinic start generated code]*/
  189. static PyObject *
  190. binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
  191. /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
  192. {
  193. const unsigned char *ascii_data;
  194. unsigned char *bin_data;
  195. int leftbits = 0;
  196. unsigned char this_ch;
  197. unsigned int leftchar = 0;
  198. PyObject *rv;
  199. Py_ssize_t ascii_len, bin_len;
  200. binascii_state *state;
  201. ascii_data = data->buf;
  202. ascii_len = data->len;
  203. assert(ascii_len >= 0);
  204. /* First byte: binary data length (in bytes) */
  205. bin_len = (*ascii_data++ - ' ') & 077;
  206. ascii_len--;
  207. /* Allocate the buffer */
  208. if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
  209. return NULL;
  210. bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
  211. for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
  212. /* XXX is it really best to add NULs if there's no more data */
  213. this_ch = (ascii_len > 0) ? *ascii_data : 0;
  214. if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
  215. /*
  216. ** Whitespace. Assume some spaces got eaten at
  217. ** end-of-line. (We check this later)
  218. */
  219. this_ch = 0;
  220. } else {
  221. /* Check the character for legality
  222. ** The 64 in stead of the expected 63 is because
  223. ** there are a few uuencodes out there that use
  224. ** '`' as zero instead of space.
  225. */
  226. if ( this_ch < ' ' || this_ch > (' ' + 64)) {
  227. state = get_binascii_state(module);
  228. if (state == NULL) {
  229. return NULL;
  230. }
  231. PyErr_SetString(state->Error, "Illegal char");
  232. Py_DECREF(rv);
  233. return NULL;
  234. }
  235. this_ch = (this_ch - ' ') & 077;
  236. }
  237. /*
  238. ** Shift it in on the low end, and see if there's
  239. ** a byte ready for output.
  240. */
  241. leftchar = (leftchar << 6) | (this_ch);
  242. leftbits += 6;
  243. if ( leftbits >= 8 ) {
  244. leftbits -= 8;
  245. *bin_data++ = (leftchar >> leftbits) & 0xff;
  246. leftchar &= ((1 << leftbits) - 1);
  247. bin_len--;
  248. }
  249. }
  250. /*
  251. ** Finally, check that if there's anything left on the line
  252. ** that it's whitespace only.
  253. */
  254. while( ascii_len-- > 0 ) {
  255. this_ch = *ascii_data++;
  256. /* Extra '`' may be written as padding in some cases */
  257. if ( this_ch != ' ' && this_ch != ' '+64 &&
  258. this_ch != '\n' && this_ch != '\r' ) {
  259. state = get_binascii_state(module);
  260. if (state == NULL) {
  261. return NULL;
  262. }
  263. PyErr_SetString(state->Error, "Trailing garbage");
  264. Py_DECREF(rv);
  265. return NULL;
  266. }
  267. }
  268. return rv;
  269. }
  270. /*[clinic input]
  271. binascii.b2a_uu
  272. data: Py_buffer
  273. /
  274. *
  275. backtick: bool = False
  276. Uuencode line of data.
  277. [clinic start generated code]*/
  278. static PyObject *
  279. binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
  280. /*[clinic end generated code: output=b1b99de62d9bbeb8 input=beb27822241095cd]*/
  281. {
  282. unsigned char *ascii_data;
  283. const unsigned char *bin_data;
  284. int leftbits = 0;
  285. unsigned char this_ch;
  286. unsigned int leftchar = 0;
  287. binascii_state *state;
  288. Py_ssize_t bin_len, out_len;
  289. _PyBytesWriter writer;
  290. _PyBytesWriter_Init(&writer);
  291. bin_data = data->buf;
  292. bin_len = data->len;
  293. if ( bin_len > 45 ) {
  294. /* The 45 is a limit that appears in all uuencode's */
  295. state = get_binascii_state(module);
  296. if (state == NULL) {
  297. return NULL;
  298. }
  299. PyErr_SetString(state->Error, "At most 45 bytes at once");
  300. return NULL;
  301. }
  302. /* We're lazy and allocate to much (fixed up later) */
  303. out_len = 2 + (bin_len + 2) / 3 * 4;
  304. ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
  305. if (ascii_data == NULL)
  306. return NULL;
  307. /* Store the length */
  308. if (backtick && !bin_len)
  309. *ascii_data++ = '`';
  310. else
  311. *ascii_data++ = ' ' + (unsigned char)bin_len;
  312. for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
  313. /* Shift the data (or padding) into our buffer */
  314. if ( bin_len > 0 ) /* Data */
  315. leftchar = (leftchar << 8) | *bin_data;
  316. else /* Padding */
  317. leftchar <<= 8;
  318. leftbits += 8;
  319. /* See if there are 6-bit groups ready */
  320. while ( leftbits >= 6 ) {
  321. this_ch = (leftchar >> (leftbits-6)) & 0x3f;
  322. leftbits -= 6;
  323. if (backtick && !this_ch)
  324. *ascii_data++ = '`';
  325. else
  326. *ascii_data++ = this_ch + ' ';
  327. }
  328. }
  329. *ascii_data++ = '\n'; /* Append a courtesy newline */
  330. return _PyBytesWriter_Finish(&writer, ascii_data);
  331. }
  332. /*[clinic input]
  333. binascii.a2b_base64
  334. data: ascii_buffer
  335. /
  336. *
  337. strict_mode: bool = False
  338. Decode a line of base64 data.
  339. strict_mode
  340. When set to True, bytes that are not part of the base64 standard are not allowed.
  341. The same applies to excess data after padding (= / ==).
  342. [clinic start generated code]*/
  343. static PyObject *
  344. binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
  345. /*[clinic end generated code: output=5409557788d4f975 input=c0c15fd0f8f9a62d]*/
  346. {
  347. assert(data->len >= 0);
  348. const unsigned char *ascii_data = data->buf;
  349. size_t ascii_len = data->len;
  350. binascii_state *state = NULL;
  351. char padding_started = 0;
  352. /* Allocate the buffer */
  353. Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
  354. _PyBytesWriter writer;
  355. _PyBytesWriter_Init(&writer);
  356. unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
  357. if (bin_data == NULL)
  358. return NULL;
  359. unsigned char *bin_data_start = bin_data;
  360. if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
  361. state = get_binascii_state(module);
  362. if (state) {
  363. PyErr_SetString(state->Error, "Leading padding not allowed");
  364. }
  365. goto error_end;
  366. }
  367. int quad_pos = 0;
  368. unsigned char leftchar = 0;
  369. int pads = 0;
  370. for (size_t i = 0; i < ascii_len; i++) {
  371. unsigned char this_ch = ascii_data[i];
  372. /* Check for pad sequences and ignore
  373. ** the invalid ones.
  374. */
  375. if (this_ch == BASE64_PAD) {
  376. padding_started = 1;
  377. if (strict_mode && quad_pos == 0) {
  378. state = get_binascii_state(module);
  379. if (state) {
  380. PyErr_SetString(state->Error, "Excess padding not allowed");
  381. }
  382. goto error_end;
  383. }
  384. if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
  385. /* A pad sequence means we should not parse more input.
  386. ** We've already interpreted the data from the quad at this point.
  387. ** in strict mode, an error should raise if there's excess data after the padding.
  388. */
  389. if (strict_mode && i + 1 < ascii_len) {
  390. state = get_binascii_state(module);
  391. if (state) {
  392. PyErr_SetString(state->Error, "Excess data after padding");
  393. }
  394. goto error_end;
  395. }
  396. goto done;
  397. }
  398. continue;
  399. }
  400. this_ch = table_a2b_base64[this_ch];
  401. if (this_ch >= 64) {
  402. if (strict_mode) {
  403. state = get_binascii_state(module);
  404. if (state) {
  405. PyErr_SetString(state->Error, "Only base64 data is allowed");
  406. }
  407. goto error_end;
  408. }
  409. continue;
  410. }
  411. // Characters that are not '=', in the middle of the padding, are not allowed
  412. if (strict_mode && padding_started) {
  413. state = get_binascii_state(module);
  414. if (state) {
  415. PyErr_SetString(state->Error, "Discontinuous padding not allowed");
  416. }
  417. goto error_end;
  418. }
  419. pads = 0;
  420. switch (quad_pos) {
  421. case 0:
  422. quad_pos = 1;
  423. leftchar = this_ch;
  424. break;
  425. case 1:
  426. quad_pos = 2;
  427. *bin_data++ = (leftchar << 2) | (this_ch >> 4);
  428. leftchar = this_ch & 0x0f;
  429. break;
  430. case 2:
  431. quad_pos = 3;
  432. *bin_data++ = (leftchar << 4) | (this_ch >> 2);
  433. leftchar = this_ch & 0x03;
  434. break;
  435. case 3:
  436. quad_pos = 0;
  437. *bin_data++ = (leftchar << 6) | (this_ch);
  438. leftchar = 0;
  439. break;
  440. }
  441. }
  442. if (quad_pos != 0) {
  443. state = get_binascii_state(module);
  444. if (state == NULL) {
  445. /* error already set, from get_binascii_state */
  446. } else if (quad_pos == 1) {
  447. /*
  448. ** There is exactly one extra valid, non-padding, base64 character.
  449. ** This is an invalid length, as there is no possible input that
  450. ** could encoded into such a base64 string.
  451. */
  452. PyErr_Format(state->Error,
  453. "Invalid base64-encoded string: "
  454. "number of data characters (%zd) cannot be 1 more "
  455. "than a multiple of 4",
  456. (bin_data - bin_data_start) / 3 * 4 + 1);
  457. } else {
  458. PyErr_SetString(state->Error, "Incorrect padding");
  459. }
  460. error_end:
  461. _PyBytesWriter_Dealloc(&writer);
  462. return NULL;
  463. }
  464. done:
  465. return _PyBytesWriter_Finish(&writer, bin_data);
  466. }
  467. /*[clinic input]
  468. binascii.b2a_base64
  469. data: Py_buffer
  470. /
  471. *
  472. newline: bool = True
  473. Base64-code line of data.
  474. [clinic start generated code]*/
  475. static PyObject *
  476. binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
  477. /*[clinic end generated code: output=4ad62c8e8485d3b3 input=0e20ff59c5f2e3e1]*/
  478. {
  479. unsigned char *ascii_data;
  480. const unsigned char *bin_data;
  481. int leftbits = 0;
  482. unsigned char this_ch;
  483. unsigned int leftchar = 0;
  484. Py_ssize_t bin_len, out_len;
  485. _PyBytesWriter writer;
  486. binascii_state *state;
  487. bin_data = data->buf;
  488. bin_len = data->len;
  489. _PyBytesWriter_Init(&writer);
  490. assert(bin_len >= 0);
  491. if ( bin_len > BASE64_MAXBIN ) {
  492. state = get_binascii_state(module);
  493. if (state == NULL) {
  494. return NULL;
  495. }
  496. PyErr_SetString(state->Error, "Too much data for base64 line");
  497. return NULL;
  498. }
  499. /* We're lazy and allocate too much (fixed up later).
  500. "+2" leaves room for up to two pad characters.
  501. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
  502. out_len = bin_len*2 + 2;
  503. if (newline)
  504. out_len++;
  505. ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
  506. if (ascii_data == NULL)
  507. return NULL;
  508. for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
  509. /* Shift the data into our buffer */
  510. leftchar = (leftchar << 8) | *bin_data;
  511. leftbits += 8;
  512. /* See if there are 6-bit groups ready */
  513. while ( leftbits >= 6 ) {
  514. this_ch = (leftchar >> (leftbits-6)) & 0x3f;
  515. leftbits -= 6;
  516. *ascii_data++ = table_b2a_base64[this_ch];
  517. }
  518. }
  519. if ( leftbits == 2 ) {
  520. *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
  521. *ascii_data++ = BASE64_PAD;
  522. *ascii_data++ = BASE64_PAD;
  523. } else if ( leftbits == 4 ) {
  524. *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
  525. *ascii_data++ = BASE64_PAD;
  526. }
  527. if (newline)
  528. *ascii_data++ = '\n'; /* Append a courtesy newline */
  529. return _PyBytesWriter_Finish(&writer, ascii_data);
  530. }
  531. /*[clinic input]
  532. binascii.crc_hqx
  533. data: Py_buffer
  534. crc: unsigned_int(bitwise=True)
  535. /
  536. Compute CRC-CCITT incrementally.
  537. [clinic start generated code]*/
  538. static PyObject *
  539. binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
  540. /*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
  541. {
  542. const unsigned char *bin_data;
  543. Py_ssize_t len;
  544. crc &= 0xffff;
  545. bin_data = data->buf;
  546. len = data->len;
  547. while(len-- > 0) {
  548. crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
  549. }
  550. return PyLong_FromUnsignedLong(crc);
  551. }
  552. #ifndef USE_ZLIB_CRC32
  553. /* Crc - 32 BIT ANSI X3.66 CRC checksum files
  554. Also known as: ISO 3307
  555. **********************************************************************|
  556. * *|
  557. * Demonstration program to compute the 32-bit CRC used as the frame *|
  558. * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
  559. * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
  560. * protocol). The 32-bit FCS was added via the Federal Register, *|
  561. * 1 June 1982, p.23798. I presume but don't know for certain that *|
  562. * this polynomial is or will be included in CCITT V.41, which *|
  563. * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
  564. * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
  565. * errors by a factor of 10^-5 over 16-bit FCS. *|
  566. * *|
  567. **********************************************************************|
  568. Copyright (C) 1986 Gary S. Brown. You may use this program, or
  569. code or tables extracted from it, as desired without restriction.
  570. First, the polynomial itself and its table of feedback terms. The
  571. polynomial is
  572. X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
  573. Note that we take it "backwards" and put the highest-order term in
  574. the lowest-order bit. The X^32 term is "implied"; the LSB is the
  575. X^31 term, etc. The X^0 term (usually shown as "+1") results in
  576. the MSB being 1.
  577. Note that the usual hardware shift register implementation, which
  578. is what we're using (we're merely optimizing it by doing eight-bit
  579. chunks at a time) shifts bits into the lowest-order term. In our
  580. implementation, that means shifting towards the right. Why do we
  581. do it this way? Because the calculated CRC must be transmitted in
  582. order from highest-order term to lowest-order term. UARTs transmit
  583. characters in order from LSB to MSB. By storing the CRC this way,
  584. we hand it to the UART in the order low-byte to high-byte; the UART
  585. sends each low-bit to hight-bit; and the result is transmission bit
  586. by bit from highest- to lowest-order term without requiring any bit
  587. shuffling on our part. Reception works similarly.
  588. The feedback terms table consists of 256, 32-bit entries. Notes:
  589. 1. The table can be generated at runtime if desired; code to do so
  590. is shown later. It might not be obvious, but the feedback
  591. terms simply represent the results of eight shift/xor opera-
  592. tions for all combinations of data and CRC register values.
  593. 2. The CRC accumulation logic is the same for all CRC polynomials,
  594. be they sixteen or thirty-two bits wide. You simply choose the
  595. appropriate table. Alternatively, because the table can be
  596. generated at runtime, you can start by generating the table for
  597. the polynomial in question and use exactly the same "updcrc",
  598. if your application needn't simultaneously handle two CRC
  599. polynomials. (Note, however, that XMODEM is strange.)
  600. 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
  601. of course, 32-bit entries work OK if the high 16 bits are zero.
  602. 4. The values must be right-shifted by eight bits by the "updcrc"
  603. logic; the shift must be unsigned (bring in zeroes). On some
  604. hardware you could probably optimize the shift in assembler by
  605. using byte-swap instructions.
  606. ********************************************************************/
  607. static const unsigned int crc_32_tab[256] = {
  608. 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
  609. 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
  610. 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
  611. 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
  612. 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
  613. 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
  614. 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
  615. 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
  616. 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
  617. 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
  618. 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
  619. 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
  620. 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
  621. 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
  622. 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
  623. 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
  624. 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
  625. 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
  626. 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
  627. 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
  628. 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
  629. 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
  630. 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
  631. 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
  632. 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
  633. 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
  634. 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
  635. 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
  636. 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
  637. 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
  638. 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
  639. 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
  640. 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
  641. 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
  642. 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
  643. 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
  644. 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
  645. 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
  646. 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
  647. 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
  648. 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
  649. 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
  650. 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
  651. 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
  652. 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
  653. 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
  654. 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
  655. 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
  656. 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
  657. 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
  658. 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
  659. 0x2d02ef8dU
  660. };
  661. static unsigned int
  662. internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc)
  663. { /* By Jim Ahlstrom; All rights transferred to CNRI */
  664. unsigned int result;
  665. crc = ~ crc;
  666. while (len-- > 0) {
  667. crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
  668. /* Note: (crc >> 8) MUST zero fill on left */
  669. }
  670. result = (crc ^ 0xFFFFFFFF);
  671. return result & 0xffffffff;
  672. }
  673. #endif /* USE_ZLIB_CRC32 */
  674. /*[clinic input]
  675. binascii.crc32 -> unsigned_int
  676. data: Py_buffer
  677. crc: unsigned_int(bitwise=True) = 0
  678. /
  679. Compute CRC-32 incrementally.
  680. [clinic start generated code]*/
  681. static unsigned int
  682. binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
  683. /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
  684. #ifdef USE_ZLIB_CRC32
  685. /* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two
  686. * modules for historical reasons. */
  687. {
  688. /* Releasing the GIL for very small buffers is inefficient
  689. and may lower performance */
  690. if (data->len > 1024*5) {
  691. unsigned char *buf = data->buf;
  692. Py_ssize_t len = data->len;
  693. Py_BEGIN_ALLOW_THREADS
  694. /* Avoid truncation of length for very large buffers. crc32() takes
  695. length as an unsigned int, which may be narrower than Py_ssize_t.
  696. We further limit size due to bugs in Apple's macOS zlib.
  697. See https://github.com/python/cpython/issues/105967
  698. */
  699. #define ZLIB_CRC_CHUNK_SIZE 0x40000000
  700. #if ZLIB_CRC_CHUNK_SIZE > INT_MAX
  701. # error "unsupported less than 32-bit platform?"
  702. #endif
  703. while ((size_t)len > ZLIB_CRC_CHUNK_SIZE) {
  704. crc = crc32(crc, buf, ZLIB_CRC_CHUNK_SIZE);
  705. buf += (size_t) ZLIB_CRC_CHUNK_SIZE;
  706. len -= (size_t) ZLIB_CRC_CHUNK_SIZE;
  707. }
  708. #undef ZLIB_CRC_CHUNK_SIZE
  709. crc = crc32(crc, buf, (unsigned int)len);
  710. Py_END_ALLOW_THREADS
  711. } else {
  712. crc = crc32(crc, data->buf, (unsigned int)data->len);
  713. }
  714. return crc & 0xffffffff;
  715. }
  716. #else /* USE_ZLIB_CRC32 */
  717. {
  718. const unsigned char *bin_data = data->buf;
  719. Py_ssize_t len = data->len;
  720. /* Releasing the GIL for very small buffers is inefficient
  721. and may lower performance */
  722. if (len > 1024*5) {
  723. unsigned int result;
  724. Py_BEGIN_ALLOW_THREADS
  725. result = internal_crc32(bin_data, len, crc);
  726. Py_END_ALLOW_THREADS
  727. return result;
  728. } else {
  729. return internal_crc32(bin_data, len, crc);
  730. }
  731. }
  732. #endif /* USE_ZLIB_CRC32 */
  733. /*[clinic input]
  734. binascii.b2a_hex
  735. data: Py_buffer
  736. sep: object = NULL
  737. An optional single character or byte to separate hex bytes.
  738. bytes_per_sep: int = 1
  739. How many bytes between separators. Positive values count from the
  740. right, negative values count from the left.
  741. Hexadecimal representation of binary data.
  742. The return value is a bytes object. This function is also
  743. available as "hexlify()".
  744. Example:
  745. >>> binascii.b2a_hex(b'\xb9\x01\xef')
  746. b'b901ef'
  747. >>> binascii.hexlify(b'\xb9\x01\xef', ':')
  748. b'b9:01:ef'
  749. >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
  750. b'b9_01ef'
  751. [clinic start generated code]*/
  752. static PyObject *
  753. binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
  754. int bytes_per_sep)
  755. /*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
  756. {
  757. return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
  758. sep, bytes_per_sep);
  759. }
  760. /*[clinic input]
  761. binascii.hexlify = binascii.b2a_hex
  762. Hexadecimal representation of binary data.
  763. The return value is a bytes object. This function is also
  764. available as "b2a_hex()".
  765. [clinic start generated code]*/
  766. static PyObject *
  767. binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
  768. int bytes_per_sep)
  769. /*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
  770. {
  771. return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
  772. sep, bytes_per_sep);
  773. }
  774. /*[clinic input]
  775. binascii.a2b_hex
  776. hexstr: ascii_buffer
  777. /
  778. Binary data of hexadecimal representation.
  779. hexstr must contain an even number of hex digits (upper or lower case).
  780. This function is also available as "unhexlify()".
  781. [clinic start generated code]*/
  782. static PyObject *
  783. binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
  784. /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
  785. {
  786. const char* argbuf;
  787. Py_ssize_t arglen;
  788. PyObject *retval;
  789. char* retbuf;
  790. Py_ssize_t i, j;
  791. binascii_state *state;
  792. argbuf = hexstr->buf;
  793. arglen = hexstr->len;
  794. assert(arglen >= 0);
  795. /* XXX What should we do about strings with an odd length? Should
  796. * we add an implicit leading zero, or a trailing zero? For now,
  797. * raise an exception.
  798. */
  799. if (arglen % 2) {
  800. state = get_binascii_state(module);
  801. if (state == NULL) {
  802. return NULL;
  803. }
  804. PyErr_SetString(state->Error, "Odd-length string");
  805. return NULL;
  806. }
  807. retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
  808. if (!retval)
  809. return NULL;
  810. retbuf = PyBytes_AS_STRING(retval);
  811. for (i=j=0; i < arglen; i += 2) {
  812. unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
  813. unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
  814. if (top >= 16 || bot >= 16) {
  815. state = get_binascii_state(module);
  816. if (state == NULL) {
  817. return NULL;
  818. }
  819. PyErr_SetString(state->Error,
  820. "Non-hexadecimal digit found");
  821. goto finally;
  822. }
  823. retbuf[j++] = (top << 4) + bot;
  824. }
  825. return retval;
  826. finally:
  827. Py_DECREF(retval);
  828. return NULL;
  829. }
  830. /*[clinic input]
  831. binascii.unhexlify = binascii.a2b_hex
  832. Binary data of hexadecimal representation.
  833. hexstr must contain an even number of hex digits (upper or lower case).
  834. [clinic start generated code]*/
  835. static PyObject *
  836. binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
  837. /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
  838. {
  839. return binascii_a2b_hex_impl(module, hexstr);
  840. }
  841. #define MAXLINESIZE 76
  842. /*[clinic input]
  843. binascii.a2b_qp
  844. data: ascii_buffer
  845. header: bool = False
  846. Decode a string of qp-encoded data.
  847. [clinic start generated code]*/
  848. static PyObject *
  849. binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
  850. /*[clinic end generated code: output=e99f7846cfb9bc53 input=bdfb31598d4e47b9]*/
  851. {
  852. Py_ssize_t in, out;
  853. char ch;
  854. const unsigned char *ascii_data;
  855. unsigned char *odata;
  856. Py_ssize_t datalen = 0;
  857. PyObject *rv;
  858. ascii_data = data->buf;
  859. datalen = data->len;
  860. /* We allocate the output same size as input, this is overkill.
  861. */
  862. odata = (unsigned char *) PyMem_Calloc(1, datalen);
  863. if (odata == NULL) {
  864. PyErr_NoMemory();
  865. return NULL;
  866. }
  867. in = out = 0;
  868. while (in < datalen) {
  869. if (ascii_data[in] == '=') {
  870. in++;
  871. if (in >= datalen) break;
  872. /* Soft line breaks */
  873. if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
  874. if (ascii_data[in] != '\n') {
  875. while (in < datalen && ascii_data[in] != '\n') in++;
  876. }
  877. if (in < datalen) in++;
  878. }
  879. else if (ascii_data[in] == '=') {
  880. /* broken case from broken python qp */
  881. odata[out++] = '=';
  882. in++;
  883. }
  884. else if ((in + 1 < datalen) &&
  885. ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
  886. (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
  887. (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
  888. ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
  889. (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
  890. (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
  891. /* hexval */
  892. ch = _PyLong_DigitValue[ascii_data[in]] << 4;
  893. in++;
  894. ch |= _PyLong_DigitValue[ascii_data[in]];
  895. in++;
  896. odata[out++] = ch;
  897. }
  898. else {
  899. odata[out++] = '=';
  900. }
  901. }
  902. else if (header && ascii_data[in] == '_') {
  903. odata[out++] = ' ';
  904. in++;
  905. }
  906. else {
  907. odata[out] = ascii_data[in];
  908. in++;
  909. out++;
  910. }
  911. }
  912. rv = PyBytes_FromStringAndSize((char *)odata, out);
  913. PyMem_Free(odata);
  914. return rv;
  915. }
  916. static int
  917. to_hex (unsigned char ch, unsigned char *s)
  918. {
  919. unsigned int uvalue = ch;
  920. s[1] = "0123456789ABCDEF"[uvalue % 16];
  921. uvalue = (uvalue / 16);
  922. s[0] = "0123456789ABCDEF"[uvalue % 16];
  923. return 0;
  924. }
  925. /* XXX: This is ridiculously complicated to be backward compatible
  926. * (mostly) with the quopri module. It doesn't re-create the quopri
  927. * module bug where text ending in CRLF has the CR encoded */
  928. /*[clinic input]
  929. binascii.b2a_qp
  930. data: Py_buffer
  931. quotetabs: bool = False
  932. istext: bool = True
  933. header: bool = False
  934. Encode a string using quoted-printable encoding.
  935. On encoding, when istext is set, newlines are not encoded, and white
  936. space at end of lines is. When istext is not set, \r and \n (CR/LF)
  937. are both encoded. When quotetabs is set, space and tabs are encoded.
  938. [clinic start generated code]*/
  939. static PyObject *
  940. binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
  941. int istext, int header)
  942. /*[clinic end generated code: output=e9884472ebb1a94c input=e9102879afb0defd]*/
  943. {
  944. Py_ssize_t in, out;
  945. const unsigned char *databuf;
  946. unsigned char *odata;
  947. Py_ssize_t datalen = 0, odatalen = 0;
  948. PyObject *rv;
  949. unsigned int linelen = 0;
  950. unsigned char ch;
  951. int crlf = 0;
  952. const unsigned char *p;
  953. databuf = data->buf;
  954. datalen = data->len;
  955. /* See if this string is using CRLF line ends */
  956. /* XXX: this function has the side effect of converting all of
  957. * the end of lines to be the same depending on this detection
  958. * here */
  959. p = (const unsigned char *) memchr(databuf, '\n', datalen);
  960. if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
  961. crlf = 1;
  962. /* First, scan to see how many characters need to be encoded */
  963. in = 0;
  964. while (in < datalen) {
  965. Py_ssize_t delta = 0;
  966. if ((databuf[in] > 126) ||
  967. (databuf[in] == '=') ||
  968. (header && databuf[in] == '_') ||
  969. ((databuf[in] == '.') && (linelen == 0) &&
  970. (in + 1 == datalen || databuf[in+1] == '\n' ||
  971. databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
  972. (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
  973. ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
  974. ((databuf[in] < 33) &&
  975. (databuf[in] != '\r') && (databuf[in] != '\n') &&
  976. (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
  977. {
  978. if ((linelen + 3) >= MAXLINESIZE) {
  979. linelen = 0;
  980. if (crlf)
  981. delta += 3;
  982. else
  983. delta += 2;
  984. }
  985. linelen += 3;
  986. delta += 3;
  987. in++;
  988. }
  989. else {
  990. if (istext &&
  991. ((databuf[in] == '\n') ||
  992. ((in+1 < datalen) && (databuf[in] == '\r') &&
  993. (databuf[in+1] == '\n'))))
  994. {
  995. linelen = 0;
  996. /* Protect against whitespace on end of line */
  997. if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
  998. delta += 2;
  999. if (crlf)
  1000. delta += 2;
  1001. else
  1002. delta += 1;
  1003. if (databuf[in] == '\r')
  1004. in += 2;
  1005. else
  1006. in++;
  1007. }
  1008. else {
  1009. if ((in + 1 != datalen) &&
  1010. (databuf[in+1] != '\n') &&
  1011. (linelen + 1) >= MAXLINESIZE) {
  1012. linelen = 0;
  1013. if (crlf)
  1014. delta += 3;
  1015. else
  1016. delta += 2;
  1017. }
  1018. linelen++;
  1019. delta++;
  1020. in++;
  1021. }
  1022. }
  1023. if (PY_SSIZE_T_MAX - delta < odatalen) {
  1024. PyErr_NoMemory();
  1025. return NULL;
  1026. }
  1027. odatalen += delta;
  1028. }
  1029. /* We allocate the output same size as input, this is overkill.
  1030. */
  1031. odata = (unsigned char *) PyMem_Calloc(1, odatalen);
  1032. if (odata == NULL) {
  1033. PyErr_NoMemory();
  1034. return NULL;
  1035. }
  1036. in = out = linelen = 0;
  1037. while (in < datalen) {
  1038. if ((databuf[in] > 126) ||
  1039. (databuf[in] == '=') ||
  1040. (header && databuf[in] == '_') ||
  1041. ((databuf[in] == '.') && (linelen == 0) &&
  1042. (in + 1 == datalen || databuf[in+1] == '\n' ||
  1043. databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
  1044. (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
  1045. ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
  1046. ((databuf[in] < 33) &&
  1047. (databuf[in] != '\r') && (databuf[in] != '\n') &&
  1048. (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
  1049. {
  1050. if ((linelen + 3 )>= MAXLINESIZE) {
  1051. odata[out++] = '=';
  1052. if (crlf) odata[out++] = '\r';
  1053. odata[out++] = '\n';
  1054. linelen = 0;
  1055. }
  1056. odata[out++] = '=';
  1057. to_hex(databuf[in], &odata[out]);
  1058. out += 2;
  1059. in++;
  1060. linelen += 3;
  1061. }
  1062. else {
  1063. if (istext &&
  1064. ((databuf[in] == '\n') ||
  1065. ((in+1 < datalen) && (databuf[in] == '\r') &&
  1066. (databuf[in+1] == '\n'))))
  1067. {
  1068. linelen = 0;
  1069. /* Protect against whitespace on end of line */
  1070. if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
  1071. ch = odata[out-1];
  1072. odata[out-1] = '=';
  1073. to_hex(ch, &odata[out]);
  1074. out += 2;
  1075. }
  1076. if (crlf) odata[out++] = '\r';
  1077. odata[out++] = '\n';
  1078. if (databuf[in] == '\r')
  1079. in += 2;
  1080. else
  1081. in++;
  1082. }
  1083. else {
  1084. if ((in + 1 != datalen) &&
  1085. (databuf[in+1] != '\n') &&
  1086. (linelen + 1) >= MAXLINESIZE) {
  1087. odata[out++] = '=';
  1088. if (crlf) odata[out++] = '\r';
  1089. odata[out++] = '\n';
  1090. linelen = 0;
  1091. }
  1092. linelen++;
  1093. if (header && databuf[in] == ' ') {
  1094. odata[out++] = '_';
  1095. in++;
  1096. }
  1097. else {
  1098. odata[out++] = databuf[in++];
  1099. }
  1100. }
  1101. }
  1102. }
  1103. rv = PyBytes_FromStringAndSize((char *)odata, out);
  1104. PyMem_Free(odata);
  1105. return rv;
  1106. }
  1107. /* List of functions defined in the module */
  1108. static struct PyMethodDef binascii_module_methods[] = {
  1109. BINASCII_A2B_UU_METHODDEF
  1110. BINASCII_B2A_UU_METHODDEF
  1111. BINASCII_A2B_BASE64_METHODDEF
  1112. BINASCII_B2A_BASE64_METHODDEF
  1113. BINASCII_A2B_HEX_METHODDEF
  1114. BINASCII_B2A_HEX_METHODDEF
  1115. BINASCII_HEXLIFY_METHODDEF
  1116. BINASCII_UNHEXLIFY_METHODDEF
  1117. BINASCII_CRC_HQX_METHODDEF
  1118. BINASCII_CRC32_METHODDEF
  1119. BINASCII_A2B_QP_METHODDEF
  1120. BINASCII_B2A_QP_METHODDEF
  1121. {NULL, NULL} /* sentinel */
  1122. };
  1123. /* Initialization function for the module (*must* be called PyInit_binascii) */
  1124. PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
  1125. static int
  1126. binascii_exec(PyObject *module) {
  1127. int result;
  1128. binascii_state *state = PyModule_GetState(module);
  1129. if (state == NULL) {
  1130. return -1;
  1131. }
  1132. state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
  1133. if (state->Error == NULL) {
  1134. return -1;
  1135. }
  1136. Py_INCREF(state->Error);
  1137. result = PyModule_AddObject(module, "Error", state->Error);
  1138. if (result == -1) {
  1139. Py_DECREF(state->Error);
  1140. return -1;
  1141. }
  1142. state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
  1143. if (state->Incomplete == NULL) {
  1144. return -1;
  1145. }
  1146. Py_INCREF(state->Incomplete);
  1147. result = PyModule_AddObject(module, "Incomplete", state->Incomplete);
  1148. if (result == -1) {
  1149. Py_DECREF(state->Incomplete);
  1150. return -1;
  1151. }
  1152. return 0;
  1153. }
  1154. static PyModuleDef_Slot binascii_slots[] = {
  1155. {Py_mod_exec, binascii_exec},
  1156. {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
  1157. {0, NULL}
  1158. };
  1159. static int
  1160. binascii_traverse(PyObject *module, visitproc visit, void *arg)
  1161. {
  1162. binascii_state *state = get_binascii_state(module);
  1163. Py_VISIT(state->Error);
  1164. Py_VISIT(state->Incomplete);
  1165. return 0;
  1166. }
  1167. static int
  1168. binascii_clear(PyObject *module)
  1169. {
  1170. binascii_state *state = get_binascii_state(module);
  1171. Py_CLEAR(state->Error);
  1172. Py_CLEAR(state->Incomplete);
  1173. return 0;
  1174. }
  1175. static void
  1176. binascii_free(void *module)
  1177. {
  1178. binascii_clear((PyObject *)module);
  1179. }
  1180. static struct PyModuleDef binasciimodule = {
  1181. PyModuleDef_HEAD_INIT,
  1182. "binascii",
  1183. doc_binascii,
  1184. sizeof(binascii_state),
  1185. binascii_module_methods,
  1186. binascii_slots,
  1187. binascii_traverse,
  1188. binascii_clear,
  1189. binascii_free
  1190. };
  1191. PyMODINIT_FUNC
  1192. PyInit_binascii(void)
  1193. {
  1194. return PyModuleDef_Init(&binasciimodule);
  1195. }