binascii.c 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346
  1. /*
  2. ** Routines to represent binary data in ASCII and vice-versa
  3. **
  4. ** This module currently supports the following encodings:
  5. ** uuencode:
  6. ** each line encodes 45 bytes (except possibly the last)
  7. ** First char encodes (binary) length, rest data
  8. ** each char encodes 6 bits, as follows:
  9. ** binary: 01234567 abcdefgh ijklmnop
  10. ** ascii: 012345 67abcd efghij klmnop
  11. ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
  12. ** short binary data is zero-extended (so the bits are always in the
  13. ** right place), this does *not* reflect in the length.
  14. ** base64:
  15. ** Line breaks are insignificant, but lines are at most 76 chars
  16. ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
  17. ** is done via a table.
  18. ** Short binary data is filled (in ASCII) with '='.
  19. ** hqx:
  20. ** File starts with introductory text, real data starts and ends
  21. ** with colons.
  22. ** Data consists of three similar parts: info, datafork, resourcefork.
  23. ** Each part is protected (at the end) with a 16-bit crc
  24. ** The binary data is run-length encoded, and then ascii-fied:
  25. ** binary: 01234567 abcdefgh ijklmnop
  26. ** ascii: 012345 67abcd efghij klmnop
  27. ** ASCII encoding is table-driven, see the code.
  28. ** Short binary data results in the runt ascii-byte being output with
  29. ** the bits in the right place.
  30. **
  31. ** While I was reading dozens of programs that encode or decode the formats
  32. ** here (documentation? hihi:-) I have formulated Jansen's Observation:
  33. **
  34. ** Programs that encode binary data in ASCII are written in
  35. ** such a style that they are as unreadable as possible. Devices used
  36. ** include unnecessary global variables, burying important tables
  37. ** in unrelated sourcefiles, putting functions in include files,
  38. ** using seemingly-descriptive variable names for different purposes,
  39. ** calls to empty subroutines and a host of others.
  40. **
  41. ** I have attempted to break with this tradition, but I guess that that
  42. ** does make the performance sub-optimal. Oh well, too bad...
  43. **
  44. ** Jack Jansen, CWI, July 1995.
  45. **
  46. ** Added support for quoted-printable encoding, based on rfc 1521 et al
  47. ** quoted-printable encoding specifies that non printable characters (anything
  48. ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
  49. ** of the character. It also specifies some other behavior to enable 8bit data
  50. ** in a mail message with little difficulty (maximum line sizes, protecting
  51. ** some cases of whitespace, etc).
  52. **
  53. ** Brandon Long, September 2001.
  54. */
  55. #ifndef Py_BUILD_CORE_BUILTIN
  56. # define Py_BUILD_CORE_MODULE 1
  57. #endif
  58. #define PY_SSIZE_T_CLEAN
  59. #include "Python.h"
  60. #include "pycore_long.h" // _PyLong_DigitValue
  61. #include "pycore_strhex.h" // _Py_strhex_bytes_with_sep()
  62. #ifdef USE_ZLIB_CRC32
  63. # include "zlib.h"
  64. #endif
  65. typedef struct binascii_state {
  66. PyObject *Error;
  67. PyObject *Incomplete;
  68. } binascii_state;
  69. static inline binascii_state *
  70. get_binascii_state(PyObject *module)
  71. {
  72. return (binascii_state *)PyModule_GetState(module);
  73. }
  74. static const unsigned char table_a2b_base64[] = {
  75. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  76. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  77. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
  78. 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
  79. -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
  80. 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
  81. -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
  82. 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
  83. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  84. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  85. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  86. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  87. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  88. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  89. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  90. -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
  91. };
  92. #define BASE64_PAD '='
  93. /* Max binary chunk size; limited only by available memory */
  94. #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
  95. static const unsigned char table_b2a_base64[] =
  96. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  97. static const unsigned short crctab_hqx[256] = {
  98. 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
  99. 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
  100. 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
  101. 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
  102. 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
  103. 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
  104. 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
  105. 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
  106. 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
  107. 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
  108. 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
  109. 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
  110. 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
  111. 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
  112. 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
  113. 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
  114. 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
  115. 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
  116. 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
  117. 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
  118. 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
  119. 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
  120. 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
  121. 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
  122. 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
  123. 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
  124. 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
  125. 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
  126. 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
  127. 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
  128. 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
  129. 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
  130. };
  131. /*[clinic input]
  132. module binascii
  133. [clinic start generated code]*/
  134. /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
  135. /*[python input]
  136. class ascii_buffer_converter(CConverter):
  137. type = 'Py_buffer'
  138. converter = 'ascii_buffer_converter'
  139. impl_by_reference = True
  140. c_default = "{NULL, NULL}"
  141. def cleanup(self):
  142. name = self.name
  143. return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
  144. [python start generated code]*/
  145. /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
  146. static int
  147. ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
  148. {
  149. if (arg == NULL) {
  150. PyBuffer_Release(buf);
  151. return 1;
  152. }
  153. if (PyUnicode_Check(arg)) {
  154. if (PyUnicode_READY(arg) < 0)
  155. return 0;
  156. if (!PyUnicode_IS_ASCII(arg)) {
  157. PyErr_SetString(PyExc_ValueError,
  158. "string argument should contain only ASCII characters");
  159. return 0;
  160. }
  161. assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
  162. buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
  163. buf->len = PyUnicode_GET_LENGTH(arg);
  164. buf->obj = NULL;
  165. return 1;
  166. }
  167. if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
  168. PyErr_Format(PyExc_TypeError,
  169. "argument should be bytes, buffer or ASCII string, "
  170. "not '%.100s'", Py_TYPE(arg)->tp_name);
  171. return 0;
  172. }
  173. if (!PyBuffer_IsContiguous(buf, 'C')) {
  174. PyErr_Format(PyExc_TypeError,
  175. "argument should be a contiguous buffer, "
  176. "not '%.100s'", Py_TYPE(arg)->tp_name);
  177. PyBuffer_Release(buf);
  178. return 0;
  179. }
  180. return Py_CLEANUP_SUPPORTED;
  181. }
  182. #include "clinic/binascii.c.h"
  183. /*[clinic input]
  184. binascii.a2b_uu
  185. data: ascii_buffer
  186. /
  187. Decode a line of uuencoded data.
  188. [clinic start generated code]*/
  189. static PyObject *
  190. binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
  191. /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
  192. {
  193. const unsigned char *ascii_data;
  194. unsigned char *bin_data;
  195. int leftbits = 0;
  196. unsigned char this_ch;
  197. unsigned int leftchar = 0;
  198. PyObject *rv;
  199. Py_ssize_t ascii_len, bin_len;
  200. binascii_state *state;
  201. ascii_data = data->buf;
  202. ascii_len = data->len;
  203. assert(ascii_len >= 0);
  204. /* First byte: binary data length (in bytes) */
  205. bin_len = (*ascii_data++ - ' ') & 077;
  206. ascii_len--;
  207. /* Allocate the buffer */
  208. if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
  209. return NULL;
  210. bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
  211. for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
  212. /* XXX is it really best to add NULs if there's no more data */
  213. this_ch = (ascii_len > 0) ? *ascii_data : 0;
  214. if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
  215. /*
  216. ** Whitespace. Assume some spaces got eaten at
  217. ** end-of-line. (We check this later)
  218. */
  219. this_ch = 0;
  220. } else {
  221. /* Check the character for legality
  222. ** The 64 in stead of the expected 63 is because
  223. ** there are a few uuencodes out there that use
  224. ** '`' as zero instead of space.
  225. */
  226. if ( this_ch < ' ' || this_ch > (' ' + 64)) {
  227. state = get_binascii_state(module);
  228. if (state == NULL) {
  229. return NULL;
  230. }
  231. PyErr_SetString(state->Error, "Illegal char");
  232. Py_DECREF(rv);
  233. return NULL;
  234. }
  235. this_ch = (this_ch - ' ') & 077;
  236. }
  237. /*
  238. ** Shift it in on the low end, and see if there's
  239. ** a byte ready for output.
  240. */
  241. leftchar = (leftchar << 6) | (this_ch);
  242. leftbits += 6;
  243. if ( leftbits >= 8 ) {
  244. leftbits -= 8;
  245. *bin_data++ = (leftchar >> leftbits) & 0xff;
  246. leftchar &= ((1 << leftbits) - 1);
  247. bin_len--;
  248. }
  249. }
  250. /*
  251. ** Finally, check that if there's anything left on the line
  252. ** that it's whitespace only.
  253. */
  254. while( ascii_len-- > 0 ) {
  255. this_ch = *ascii_data++;
  256. /* Extra '`' may be written as padding in some cases */
  257. if ( this_ch != ' ' && this_ch != ' '+64 &&
  258. this_ch != '\n' && this_ch != '\r' ) {
  259. state = get_binascii_state(module);
  260. if (state == NULL) {
  261. return NULL;
  262. }
  263. PyErr_SetString(state->Error, "Trailing garbage");
  264. Py_DECREF(rv);
  265. return NULL;
  266. }
  267. }
  268. return rv;
  269. }
  270. /*[clinic input]
  271. binascii.b2a_uu
  272. data: Py_buffer
  273. /
  274. *
  275. backtick: bool = False
  276. Uuencode line of data.
  277. [clinic start generated code]*/
  278. static PyObject *
  279. binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
  280. /*[clinic end generated code: output=b1b99de62d9bbeb8 input=beb27822241095cd]*/
  281. {
  282. unsigned char *ascii_data;
  283. const unsigned char *bin_data;
  284. int leftbits = 0;
  285. unsigned char this_ch;
  286. unsigned int leftchar = 0;
  287. binascii_state *state;
  288. Py_ssize_t bin_len, out_len;
  289. _PyBytesWriter writer;
  290. _PyBytesWriter_Init(&writer);
  291. bin_data = data->buf;
  292. bin_len = data->len;
  293. if ( bin_len > 45 ) {
  294. /* The 45 is a limit that appears in all uuencode's */
  295. state = get_binascii_state(module);
  296. if (state == NULL) {
  297. return NULL;
  298. }
  299. PyErr_SetString(state->Error, "At most 45 bytes at once");
  300. return NULL;
  301. }
  302. /* We're lazy and allocate to much (fixed up later) */
  303. out_len = 2 + (bin_len + 2) / 3 * 4;
  304. ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
  305. if (ascii_data == NULL)
  306. return NULL;
  307. /* Store the length */
  308. if (backtick && !bin_len)
  309. *ascii_data++ = '`';
  310. else
  311. *ascii_data++ = ' ' + (unsigned char)bin_len;
  312. for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
  313. /* Shift the data (or padding) into our buffer */
  314. if ( bin_len > 0 ) /* Data */
  315. leftchar = (leftchar << 8) | *bin_data;
  316. else /* Padding */
  317. leftchar <<= 8;
  318. leftbits += 8;
  319. /* See if there are 6-bit groups ready */
  320. while ( leftbits >= 6 ) {
  321. this_ch = (leftchar >> (leftbits-6)) & 0x3f;
  322. leftbits -= 6;
  323. if (backtick && !this_ch)
  324. *ascii_data++ = '`';
  325. else
  326. *ascii_data++ = this_ch + ' ';
  327. }
  328. }
  329. *ascii_data++ = '\n'; /* Append a courtesy newline */
  330. return _PyBytesWriter_Finish(&writer, ascii_data);
  331. }
  332. /*[clinic input]
  333. binascii.a2b_base64
  334. data: ascii_buffer
  335. /
  336. *
  337. strict_mode: bool = False
  338. Decode a line of base64 data.
  339. strict_mode
  340. When set to True, bytes that are not part of the base64 standard are not allowed.
  341. The same applies to excess data after padding (= / ==).
  342. [clinic start generated code]*/
  343. static PyObject *
  344. binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
  345. /*[clinic end generated code: output=5409557788d4f975 input=c0c15fd0f8f9a62d]*/
  346. {
  347. assert(data->len >= 0);
  348. const unsigned char *ascii_data = data->buf;
  349. size_t ascii_len = data->len;
  350. binascii_state *state = NULL;
  351. char padding_started = 0;
  352. /* Allocate the buffer */
  353. Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
  354. _PyBytesWriter writer;
  355. _PyBytesWriter_Init(&writer);
  356. unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
  357. if (bin_data == NULL)
  358. return NULL;
  359. unsigned char *bin_data_start = bin_data;
  360. if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
  361. state = get_binascii_state(module);
  362. if (state) {
  363. PyErr_SetString(state->Error, "Leading padding not allowed");
  364. }
  365. goto error_end;
  366. }
  367. int quad_pos = 0;
  368. unsigned char leftchar = 0;
  369. int pads = 0;
  370. for (size_t i = 0; i < ascii_len; i++) {
  371. unsigned char this_ch = ascii_data[i];
  372. /* Check for pad sequences and ignore
  373. ** the invalid ones.
  374. */
  375. if (this_ch == BASE64_PAD) {
  376. padding_started = 1;
  377. if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
  378. /* A pad sequence means we should not parse more input.
  379. ** We've already interpreted the data from the quad at this point.
  380. ** in strict mode, an error should raise if there's excess data after the padding.
  381. */
  382. if (strict_mode && i + 1 < ascii_len) {
  383. state = get_binascii_state(module);
  384. if (state) {
  385. PyErr_SetString(state->Error, "Excess data after padding");
  386. }
  387. goto error_end;
  388. }
  389. goto done;
  390. }
  391. continue;
  392. }
  393. this_ch = table_a2b_base64[this_ch];
  394. if (this_ch >= 64) {
  395. if (strict_mode) {
  396. state = get_binascii_state(module);
  397. if (state) {
  398. PyErr_SetString(state->Error, "Only base64 data is allowed");
  399. }
  400. goto error_end;
  401. }
  402. continue;
  403. }
  404. // Characters that are not '=', in the middle of the padding, are not allowed
  405. if (strict_mode && padding_started) {
  406. state = get_binascii_state(module);
  407. if (state) {
  408. PyErr_SetString(state->Error, "Discontinuous padding not allowed");
  409. }
  410. goto error_end;
  411. }
  412. pads = 0;
  413. switch (quad_pos) {
  414. case 0:
  415. quad_pos = 1;
  416. leftchar = this_ch;
  417. break;
  418. case 1:
  419. quad_pos = 2;
  420. *bin_data++ = (leftchar << 2) | (this_ch >> 4);
  421. leftchar = this_ch & 0x0f;
  422. break;
  423. case 2:
  424. quad_pos = 3;
  425. *bin_data++ = (leftchar << 4) | (this_ch >> 2);
  426. leftchar = this_ch & 0x03;
  427. break;
  428. case 3:
  429. quad_pos = 0;
  430. *bin_data++ = (leftchar << 6) | (this_ch);
  431. leftchar = 0;
  432. break;
  433. }
  434. }
  435. if (quad_pos != 0) {
  436. state = get_binascii_state(module);
  437. if (state == NULL) {
  438. /* error already set, from get_binascii_state */
  439. } else if (quad_pos == 1) {
  440. /*
  441. ** There is exactly one extra valid, non-padding, base64 character.
  442. ** This is an invalid length, as there is no possible input that
  443. ** could encoded into such a base64 string.
  444. */
  445. PyErr_Format(state->Error,
  446. "Invalid base64-encoded string: "
  447. "number of data characters (%zd) cannot be 1 more "
  448. "than a multiple of 4",
  449. (bin_data - bin_data_start) / 3 * 4 + 1);
  450. } else {
  451. PyErr_SetString(state->Error, "Incorrect padding");
  452. }
  453. error_end:
  454. _PyBytesWriter_Dealloc(&writer);
  455. return NULL;
  456. }
  457. done:
  458. return _PyBytesWriter_Finish(&writer, bin_data);
  459. }
  460. /*[clinic input]
  461. binascii.b2a_base64
  462. data: Py_buffer
  463. /
  464. *
  465. newline: bool = True
  466. Base64-code line of data.
  467. [clinic start generated code]*/
  468. static PyObject *
  469. binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
  470. /*[clinic end generated code: output=4ad62c8e8485d3b3 input=0e20ff59c5f2e3e1]*/
  471. {
  472. unsigned char *ascii_data;
  473. const unsigned char *bin_data;
  474. int leftbits = 0;
  475. unsigned char this_ch;
  476. unsigned int leftchar = 0;
  477. Py_ssize_t bin_len, out_len;
  478. _PyBytesWriter writer;
  479. binascii_state *state;
  480. bin_data = data->buf;
  481. bin_len = data->len;
  482. _PyBytesWriter_Init(&writer);
  483. assert(bin_len >= 0);
  484. if ( bin_len > BASE64_MAXBIN ) {
  485. state = get_binascii_state(module);
  486. if (state == NULL) {
  487. return NULL;
  488. }
  489. PyErr_SetString(state->Error, "Too much data for base64 line");
  490. return NULL;
  491. }
  492. /* We're lazy and allocate too much (fixed up later).
  493. "+2" leaves room for up to two pad characters.
  494. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
  495. out_len = bin_len*2 + 2;
  496. if (newline)
  497. out_len++;
  498. ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
  499. if (ascii_data == NULL)
  500. return NULL;
  501. for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
  502. /* Shift the data into our buffer */
  503. leftchar = (leftchar << 8) | *bin_data;
  504. leftbits += 8;
  505. /* See if there are 6-bit groups ready */
  506. while ( leftbits >= 6 ) {
  507. this_ch = (leftchar >> (leftbits-6)) & 0x3f;
  508. leftbits -= 6;
  509. *ascii_data++ = table_b2a_base64[this_ch];
  510. }
  511. }
  512. if ( leftbits == 2 ) {
  513. *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
  514. *ascii_data++ = BASE64_PAD;
  515. *ascii_data++ = BASE64_PAD;
  516. } else if ( leftbits == 4 ) {
  517. *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
  518. *ascii_data++ = BASE64_PAD;
  519. }
  520. if (newline)
  521. *ascii_data++ = '\n'; /* Append a courtesy newline */
  522. return _PyBytesWriter_Finish(&writer, ascii_data);
  523. }
  524. /*[clinic input]
  525. binascii.crc_hqx
  526. data: Py_buffer
  527. crc: unsigned_int(bitwise=True)
  528. /
  529. Compute CRC-CCITT incrementally.
  530. [clinic start generated code]*/
  531. static PyObject *
  532. binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
  533. /*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
  534. {
  535. const unsigned char *bin_data;
  536. Py_ssize_t len;
  537. crc &= 0xffff;
  538. bin_data = data->buf;
  539. len = data->len;
  540. while(len-- > 0) {
  541. crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
  542. }
  543. return PyLong_FromUnsignedLong(crc);
  544. }
  545. #ifndef USE_ZLIB_CRC32
  546. /* Crc - 32 BIT ANSI X3.66 CRC checksum files
  547. Also known as: ISO 3307
  548. **********************************************************************|
  549. * *|
  550. * Demonstration program to compute the 32-bit CRC used as the frame *|
  551. * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
  552. * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
  553. * protocol). The 32-bit FCS was added via the Federal Register, *|
  554. * 1 June 1982, p.23798. I presume but don't know for certain that *|
  555. * this polynomial is or will be included in CCITT V.41, which *|
  556. * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
  557. * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
  558. * errors by a factor of 10^-5 over 16-bit FCS. *|
  559. * *|
  560. **********************************************************************|
  561. Copyright (C) 1986 Gary S. Brown. You may use this program, or
  562. code or tables extracted from it, as desired without restriction.
  563. First, the polynomial itself and its table of feedback terms. The
  564. polynomial is
  565. X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
  566. Note that we take it "backwards" and put the highest-order term in
  567. the lowest-order bit. The X^32 term is "implied"; the LSB is the
  568. X^31 term, etc. The X^0 term (usually shown as "+1") results in
  569. the MSB being 1.
  570. Note that the usual hardware shift register implementation, which
  571. is what we're using (we're merely optimizing it by doing eight-bit
  572. chunks at a time) shifts bits into the lowest-order term. In our
  573. implementation, that means shifting towards the right. Why do we
  574. do it this way? Because the calculated CRC must be transmitted in
  575. order from highest-order term to lowest-order term. UARTs transmit
  576. characters in order from LSB to MSB. By storing the CRC this way,
  577. we hand it to the UART in the order low-byte to high-byte; the UART
  578. sends each low-bit to hight-bit; and the result is transmission bit
  579. by bit from highest- to lowest-order term without requiring any bit
  580. shuffling on our part. Reception works similarly.
  581. The feedback terms table consists of 256, 32-bit entries. Notes:
  582. 1. The table can be generated at runtime if desired; code to do so
  583. is shown later. It might not be obvious, but the feedback
  584. terms simply represent the results of eight shift/xor opera-
  585. tions for all combinations of data and CRC register values.
  586. 2. The CRC accumulation logic is the same for all CRC polynomials,
  587. be they sixteen or thirty-two bits wide. You simply choose the
  588. appropriate table. Alternatively, because the table can be
  589. generated at runtime, you can start by generating the table for
  590. the polynomial in question and use exactly the same "updcrc",
  591. if your application needn't simultaneously handle two CRC
  592. polynomials. (Note, however, that XMODEM is strange.)
  593. 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
  594. of course, 32-bit entries work OK if the high 16 bits are zero.
  595. 4. The values must be right-shifted by eight bits by the "updcrc"
  596. logic; the shift must be unsigned (bring in zeroes). On some
  597. hardware you could probably optimize the shift in assembler by
  598. using byte-swap instructions.
  599. ********************************************************************/
  600. static const unsigned int crc_32_tab[256] = {
  601. 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
  602. 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
  603. 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
  604. 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
  605. 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
  606. 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
  607. 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
  608. 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
  609. 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
  610. 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
  611. 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
  612. 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
  613. 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
  614. 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
  615. 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
  616. 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
  617. 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
  618. 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
  619. 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
  620. 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
  621. 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
  622. 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
  623. 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
  624. 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
  625. 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
  626. 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
  627. 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
  628. 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
  629. 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
  630. 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
  631. 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
  632. 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
  633. 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
  634. 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
  635. 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
  636. 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
  637. 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
  638. 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
  639. 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
  640. 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
  641. 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
  642. 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
  643. 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
  644. 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
  645. 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
  646. 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
  647. 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
  648. 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
  649. 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
  650. 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
  651. 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
  652. 0x2d02ef8dU
  653. };
  654. static unsigned int
  655. internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc)
  656. { /* By Jim Ahlstrom; All rights transferred to CNRI */
  657. unsigned int result;
  658. crc = ~ crc;
  659. while (len-- > 0) {
  660. crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
  661. /* Note: (crc >> 8) MUST zero fill on left */
  662. }
  663. result = (crc ^ 0xFFFFFFFF);
  664. return result & 0xffffffff;
  665. }
  666. #endif /* USE_ZLIB_CRC32 */
  667. /*[clinic input]
  668. binascii.crc32 -> unsigned_int
  669. data: Py_buffer
  670. crc: unsigned_int(bitwise=True) = 0
  671. /
  672. Compute CRC-32 incrementally.
  673. [clinic start generated code]*/
  674. static unsigned int
  675. binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
  676. /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
  677. #ifdef USE_ZLIB_CRC32
  678. /* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two
  679. * modules for historical reasons. */
  680. {
  681. /* Releasing the GIL for very small buffers is inefficient
  682. and may lower performance */
  683. if (data->len > 1024*5) {
  684. unsigned char *buf = data->buf;
  685. Py_ssize_t len = data->len;
  686. Py_BEGIN_ALLOW_THREADS
  687. /* Avoid truncation of length for very large buffers. crc32() takes
  688. length as an unsigned int, which may be narrower than Py_ssize_t.
  689. We further limit size due to bugs in Apple's macOS zlib.
  690. See https://github.com/python/cpython/issues/105967
  691. */
  692. #define ZLIB_CRC_CHUNK_SIZE 0x40000000
  693. #if ZLIB_CRC_CHUNK_SIZE > INT_MAX
  694. # error "unsupported less than 32-bit platform?"
  695. #endif
  696. while ((size_t)len > ZLIB_CRC_CHUNK_SIZE) {
  697. crc = crc32(crc, buf, ZLIB_CRC_CHUNK_SIZE);
  698. buf += (size_t) ZLIB_CRC_CHUNK_SIZE;
  699. len -= (size_t) ZLIB_CRC_CHUNK_SIZE;
  700. }
  701. #undef ZLIB_CRC_CHUNK_SIZE
  702. crc = crc32(crc, buf, (unsigned int)len);
  703. Py_END_ALLOW_THREADS
  704. } else {
  705. crc = crc32(crc, data->buf, (unsigned int)data->len);
  706. }
  707. return crc & 0xffffffff;
  708. }
  709. #else /* USE_ZLIB_CRC32 */
  710. {
  711. const unsigned char *bin_data = data->buf;
  712. Py_ssize_t len = data->len;
  713. /* Releasing the GIL for very small buffers is inefficient
  714. and may lower performance */
  715. if (len > 1024*5) {
  716. unsigned int result;
  717. Py_BEGIN_ALLOW_THREADS
  718. result = internal_crc32(bin_data, len, crc);
  719. Py_END_ALLOW_THREADS
  720. return result;
  721. } else {
  722. return internal_crc32(bin_data, len, crc);
  723. }
  724. }
  725. #endif /* USE_ZLIB_CRC32 */
  726. /*[clinic input]
  727. binascii.b2a_hex
  728. data: Py_buffer
  729. sep: object = NULL
  730. An optional single character or byte to separate hex bytes.
  731. bytes_per_sep: int = 1
  732. How many bytes between separators. Positive values count from the
  733. right, negative values count from the left.
  734. Hexadecimal representation of binary data.
  735. The return value is a bytes object. This function is also
  736. available as "hexlify()".
  737. Example:
  738. >>> binascii.b2a_hex(b'\xb9\x01\xef')
  739. b'b901ef'
  740. >>> binascii.hexlify(b'\xb9\x01\xef', ':')
  741. b'b9:01:ef'
  742. >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
  743. b'b9_01ef'
  744. [clinic start generated code]*/
  745. static PyObject *
  746. binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
  747. int bytes_per_sep)
  748. /*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
  749. {
  750. return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
  751. sep, bytes_per_sep);
  752. }
  753. /*[clinic input]
  754. binascii.hexlify = binascii.b2a_hex
  755. Hexadecimal representation of binary data.
  756. The return value is a bytes object. This function is also
  757. available as "b2a_hex()".
  758. [clinic start generated code]*/
  759. static PyObject *
  760. binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
  761. int bytes_per_sep)
  762. /*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
  763. {
  764. return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
  765. sep, bytes_per_sep);
  766. }
  767. /*[clinic input]
  768. binascii.a2b_hex
  769. hexstr: ascii_buffer
  770. /
  771. Binary data of hexadecimal representation.
  772. hexstr must contain an even number of hex digits (upper or lower case).
  773. This function is also available as "unhexlify()".
  774. [clinic start generated code]*/
  775. static PyObject *
  776. binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
  777. /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
  778. {
  779. const char* argbuf;
  780. Py_ssize_t arglen;
  781. PyObject *retval;
  782. char* retbuf;
  783. Py_ssize_t i, j;
  784. binascii_state *state;
  785. argbuf = hexstr->buf;
  786. arglen = hexstr->len;
  787. assert(arglen >= 0);
  788. /* XXX What should we do about strings with an odd length? Should
  789. * we add an implicit leading zero, or a trailing zero? For now,
  790. * raise an exception.
  791. */
  792. if (arglen % 2) {
  793. state = get_binascii_state(module);
  794. if (state == NULL) {
  795. return NULL;
  796. }
  797. PyErr_SetString(state->Error, "Odd-length string");
  798. return NULL;
  799. }
  800. retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
  801. if (!retval)
  802. return NULL;
  803. retbuf = PyBytes_AS_STRING(retval);
  804. for (i=j=0; i < arglen; i += 2) {
  805. unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
  806. unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
  807. if (top >= 16 || bot >= 16) {
  808. state = get_binascii_state(module);
  809. if (state == NULL) {
  810. return NULL;
  811. }
  812. PyErr_SetString(state->Error,
  813. "Non-hexadecimal digit found");
  814. goto finally;
  815. }
  816. retbuf[j++] = (top << 4) + bot;
  817. }
  818. return retval;
  819. finally:
  820. Py_DECREF(retval);
  821. return NULL;
  822. }
  823. /*[clinic input]
  824. binascii.unhexlify = binascii.a2b_hex
  825. Binary data of hexadecimal representation.
  826. hexstr must contain an even number of hex digits (upper or lower case).
  827. [clinic start generated code]*/
  828. static PyObject *
  829. binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
  830. /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
  831. {
  832. return binascii_a2b_hex_impl(module, hexstr);
  833. }
  834. #define MAXLINESIZE 76
  835. /*[clinic input]
  836. binascii.a2b_qp
  837. data: ascii_buffer
  838. header: bool = False
  839. Decode a string of qp-encoded data.
  840. [clinic start generated code]*/
  841. static PyObject *
  842. binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
  843. /*[clinic end generated code: output=e99f7846cfb9bc53 input=bdfb31598d4e47b9]*/
  844. {
  845. Py_ssize_t in, out;
  846. char ch;
  847. const unsigned char *ascii_data;
  848. unsigned char *odata;
  849. Py_ssize_t datalen = 0;
  850. PyObject *rv;
  851. ascii_data = data->buf;
  852. datalen = data->len;
  853. /* We allocate the output same size as input, this is overkill.
  854. */
  855. odata = (unsigned char *) PyMem_Calloc(1, datalen);
  856. if (odata == NULL) {
  857. PyErr_NoMemory();
  858. return NULL;
  859. }
  860. in = out = 0;
  861. while (in < datalen) {
  862. if (ascii_data[in] == '=') {
  863. in++;
  864. if (in >= datalen) break;
  865. /* Soft line breaks */
  866. if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
  867. if (ascii_data[in] != '\n') {
  868. while (in < datalen && ascii_data[in] != '\n') in++;
  869. }
  870. if (in < datalen) in++;
  871. }
  872. else if (ascii_data[in] == '=') {
  873. /* broken case from broken python qp */
  874. odata[out++] = '=';
  875. in++;
  876. }
  877. else if ((in + 1 < datalen) &&
  878. ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
  879. (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
  880. (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
  881. ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
  882. (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
  883. (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
  884. /* hexval */
  885. ch = _PyLong_DigitValue[ascii_data[in]] << 4;
  886. in++;
  887. ch |= _PyLong_DigitValue[ascii_data[in]];
  888. in++;
  889. odata[out++] = ch;
  890. }
  891. else {
  892. odata[out++] = '=';
  893. }
  894. }
  895. else if (header && ascii_data[in] == '_') {
  896. odata[out++] = ' ';
  897. in++;
  898. }
  899. else {
  900. odata[out] = ascii_data[in];
  901. in++;
  902. out++;
  903. }
  904. }
  905. rv = PyBytes_FromStringAndSize((char *)odata, out);
  906. PyMem_Free(odata);
  907. return rv;
  908. }
  909. static int
  910. to_hex (unsigned char ch, unsigned char *s)
  911. {
  912. unsigned int uvalue = ch;
  913. s[1] = "0123456789ABCDEF"[uvalue % 16];
  914. uvalue = (uvalue / 16);
  915. s[0] = "0123456789ABCDEF"[uvalue % 16];
  916. return 0;
  917. }
  918. /* XXX: This is ridiculously complicated to be backward compatible
  919. * (mostly) with the quopri module. It doesn't re-create the quopri
  920. * module bug where text ending in CRLF has the CR encoded */
  921. /*[clinic input]
  922. binascii.b2a_qp
  923. data: Py_buffer
  924. quotetabs: bool = False
  925. istext: bool = True
  926. header: bool = False
  927. Encode a string using quoted-printable encoding.
  928. On encoding, when istext is set, newlines are not encoded, and white
  929. space at end of lines is. When istext is not set, \r and \n (CR/LF)
  930. are both encoded. When quotetabs is set, space and tabs are encoded.
  931. [clinic start generated code]*/
  932. static PyObject *
  933. binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
  934. int istext, int header)
  935. /*[clinic end generated code: output=e9884472ebb1a94c input=e9102879afb0defd]*/
  936. {
  937. Py_ssize_t in, out;
  938. const unsigned char *databuf;
  939. unsigned char *odata;
  940. Py_ssize_t datalen = 0, odatalen = 0;
  941. PyObject *rv;
  942. unsigned int linelen = 0;
  943. unsigned char ch;
  944. int crlf = 0;
  945. const unsigned char *p;
  946. databuf = data->buf;
  947. datalen = data->len;
  948. /* See if this string is using CRLF line ends */
  949. /* XXX: this function has the side effect of converting all of
  950. * the end of lines to be the same depending on this detection
  951. * here */
  952. p = (const unsigned char *) memchr(databuf, '\n', datalen);
  953. if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
  954. crlf = 1;
  955. /* First, scan to see how many characters need to be encoded */
  956. in = 0;
  957. while (in < datalen) {
  958. Py_ssize_t delta = 0;
  959. if ((databuf[in] > 126) ||
  960. (databuf[in] == '=') ||
  961. (header && databuf[in] == '_') ||
  962. ((databuf[in] == '.') && (linelen == 0) &&
  963. (in + 1 == datalen || databuf[in+1] == '\n' ||
  964. databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
  965. (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
  966. ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
  967. ((databuf[in] < 33) &&
  968. (databuf[in] != '\r') && (databuf[in] != '\n') &&
  969. (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
  970. {
  971. if ((linelen + 3) >= MAXLINESIZE) {
  972. linelen = 0;
  973. if (crlf)
  974. delta += 3;
  975. else
  976. delta += 2;
  977. }
  978. linelen += 3;
  979. delta += 3;
  980. in++;
  981. }
  982. else {
  983. if (istext &&
  984. ((databuf[in] == '\n') ||
  985. ((in+1 < datalen) && (databuf[in] == '\r') &&
  986. (databuf[in+1] == '\n'))))
  987. {
  988. linelen = 0;
  989. /* Protect against whitespace on end of line */
  990. if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
  991. delta += 2;
  992. if (crlf)
  993. delta += 2;
  994. else
  995. delta += 1;
  996. if (databuf[in] == '\r')
  997. in += 2;
  998. else
  999. in++;
  1000. }
  1001. else {
  1002. if ((in + 1 != datalen) &&
  1003. (databuf[in+1] != '\n') &&
  1004. (linelen + 1) >= MAXLINESIZE) {
  1005. linelen = 0;
  1006. if (crlf)
  1007. delta += 3;
  1008. else
  1009. delta += 2;
  1010. }
  1011. linelen++;
  1012. delta++;
  1013. in++;
  1014. }
  1015. }
  1016. if (PY_SSIZE_T_MAX - delta < odatalen) {
  1017. PyErr_NoMemory();
  1018. return NULL;
  1019. }
  1020. odatalen += delta;
  1021. }
  1022. /* We allocate the output same size as input, this is overkill.
  1023. */
  1024. odata = (unsigned char *) PyMem_Calloc(1, odatalen);
  1025. if (odata == NULL) {
  1026. PyErr_NoMemory();
  1027. return NULL;
  1028. }
  1029. in = out = linelen = 0;
  1030. while (in < datalen) {
  1031. if ((databuf[in] > 126) ||
  1032. (databuf[in] == '=') ||
  1033. (header && databuf[in] == '_') ||
  1034. ((databuf[in] == '.') && (linelen == 0) &&
  1035. (in + 1 == datalen || databuf[in+1] == '\n' ||
  1036. databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
  1037. (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
  1038. ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
  1039. ((databuf[in] < 33) &&
  1040. (databuf[in] != '\r') && (databuf[in] != '\n') &&
  1041. (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
  1042. {
  1043. if ((linelen + 3 )>= MAXLINESIZE) {
  1044. odata[out++] = '=';
  1045. if (crlf) odata[out++] = '\r';
  1046. odata[out++] = '\n';
  1047. linelen = 0;
  1048. }
  1049. odata[out++] = '=';
  1050. to_hex(databuf[in], &odata[out]);
  1051. out += 2;
  1052. in++;
  1053. linelen += 3;
  1054. }
  1055. else {
  1056. if (istext &&
  1057. ((databuf[in] == '\n') ||
  1058. ((in+1 < datalen) && (databuf[in] == '\r') &&
  1059. (databuf[in+1] == '\n'))))
  1060. {
  1061. linelen = 0;
  1062. /* Protect against whitespace on end of line */
  1063. if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
  1064. ch = odata[out-1];
  1065. odata[out-1] = '=';
  1066. to_hex(ch, &odata[out]);
  1067. out += 2;
  1068. }
  1069. if (crlf) odata[out++] = '\r';
  1070. odata[out++] = '\n';
  1071. if (databuf[in] == '\r')
  1072. in += 2;
  1073. else
  1074. in++;
  1075. }
  1076. else {
  1077. if ((in + 1 != datalen) &&
  1078. (databuf[in+1] != '\n') &&
  1079. (linelen + 1) >= MAXLINESIZE) {
  1080. odata[out++] = '=';
  1081. if (crlf) odata[out++] = '\r';
  1082. odata[out++] = '\n';
  1083. linelen = 0;
  1084. }
  1085. linelen++;
  1086. if (header && databuf[in] == ' ') {
  1087. odata[out++] = '_';
  1088. in++;
  1089. }
  1090. else {
  1091. odata[out++] = databuf[in++];
  1092. }
  1093. }
  1094. }
  1095. }
  1096. rv = PyBytes_FromStringAndSize((char *)odata, out);
  1097. PyMem_Free(odata);
  1098. return rv;
  1099. }
  1100. /* List of functions defined in the module */
  1101. static struct PyMethodDef binascii_module_methods[] = {
  1102. BINASCII_A2B_UU_METHODDEF
  1103. BINASCII_B2A_UU_METHODDEF
  1104. BINASCII_A2B_BASE64_METHODDEF
  1105. BINASCII_B2A_BASE64_METHODDEF
  1106. BINASCII_A2B_HEX_METHODDEF
  1107. BINASCII_B2A_HEX_METHODDEF
  1108. BINASCII_HEXLIFY_METHODDEF
  1109. BINASCII_UNHEXLIFY_METHODDEF
  1110. BINASCII_CRC_HQX_METHODDEF
  1111. BINASCII_CRC32_METHODDEF
  1112. BINASCII_A2B_QP_METHODDEF
  1113. BINASCII_B2A_QP_METHODDEF
  1114. {NULL, NULL} /* sentinel */
  1115. };
  1116. /* Initialization function for the module (*must* be called PyInit_binascii) */
  1117. PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
  1118. static int
  1119. binascii_exec(PyObject *module) {
  1120. int result;
  1121. binascii_state *state = PyModule_GetState(module);
  1122. if (state == NULL) {
  1123. return -1;
  1124. }
  1125. state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
  1126. if (state->Error == NULL) {
  1127. return -1;
  1128. }
  1129. Py_INCREF(state->Error);
  1130. result = PyModule_AddObject(module, "Error", state->Error);
  1131. if (result == -1) {
  1132. Py_DECREF(state->Error);
  1133. return -1;
  1134. }
  1135. state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
  1136. if (state->Incomplete == NULL) {
  1137. return -1;
  1138. }
  1139. Py_INCREF(state->Incomplete);
  1140. result = PyModule_AddObject(module, "Incomplete", state->Incomplete);
  1141. if (result == -1) {
  1142. Py_DECREF(state->Incomplete);
  1143. return -1;
  1144. }
  1145. return 0;
  1146. }
  1147. static PyModuleDef_Slot binascii_slots[] = {
  1148. {Py_mod_exec, binascii_exec},
  1149. {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
  1150. {0, NULL}
  1151. };
  1152. static int
  1153. binascii_traverse(PyObject *module, visitproc visit, void *arg)
  1154. {
  1155. binascii_state *state = get_binascii_state(module);
  1156. Py_VISIT(state->Error);
  1157. Py_VISIT(state->Incomplete);
  1158. return 0;
  1159. }
  1160. static int
  1161. binascii_clear(PyObject *module)
  1162. {
  1163. binascii_state *state = get_binascii_state(module);
  1164. Py_CLEAR(state->Error);
  1165. Py_CLEAR(state->Incomplete);
  1166. return 0;
  1167. }
  1168. static void
  1169. binascii_free(void *module)
  1170. {
  1171. binascii_clear((PyObject *)module);
  1172. }
  1173. static struct PyModuleDef binasciimodule = {
  1174. PyModuleDef_HEAD_INIT,
  1175. "binascii",
  1176. doc_binascii,
  1177. sizeof(binascii_state),
  1178. binascii_module_methods,
  1179. binascii_slots,
  1180. binascii_traverse,
  1181. binascii_clear,
  1182. binascii_free
  1183. };
  1184. PyMODINIT_FUNC
  1185. PyInit_binascii(void)
  1186. {
  1187. return PyModuleDef_Init(&binasciimodule);
  1188. }