1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 **      each line encodes 45 bytes (except possibly the last)
7 **      First char encodes (binary) length, rest data
8 **      each char encodes 6 bits, as follows:
9 **      binary: 01234567 abcdefgh ijklmnop
10 **      ascii:  012345 67abcd efghij klmnop
11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 **      short binary data is zero-extended (so the bits are always in the
13 **      right place), this does *not* reflect in the length.
14 ** base64:
15 **      Line breaks are insignificant, but lines are at most 76 chars
16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 **      is done via a table.
18 **      Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 **      File starts with introductory text, real data starts and ends
21 **      with colons.
22 **      Data consists of three similar parts: info, datafork, resourcefork.
23 **      Each part is protected (at the end) with a 16-bit crc
24 **      The binary data is run-length encoded, and then ascii-fied:
25 **      binary: 01234567 abcdefgh ijklmnop
26 **      ascii:  012345 67abcd efghij klmnop
27 **      ASCII encoding is table-driven, see the code.
28 **      Short binary data results in the runt ascii-byte being output with
29 **      the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 **      Programs that encode binary data in ASCII are written in
35 **      such a style that they are as unreadable as possible. Devices used
36 **      include unnecessary global variables, burying important tables
37 **      in unrelated sourcefiles, putting functions in include files,
38 **      using seemingly-descriptive variable names for different purposes,
39 **      calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character.  It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55 
56 #define PY_SSIZE_T_CLEAN
57 
58 #include "Python.h"
59 #include "pystrhex.h"
60 #ifdef USE_ZLIB_CRC32
61 #include "zlib.h"
62 #endif
63 
64 typedef struct binascii_state {
65     PyObject *Error;
66     PyObject *Incomplete;
67 } binascii_state;
68 
69 /*
70 ** hqx lookup table, ascii->binary.
71 */
72 
73 #define RUNCHAR 0x90
74 
75 #define DONE 0x7F
76 #define SKIP 0x7E
77 #define FAIL 0x7D
78 
79 static const unsigned char table_a2b_hqx[256] = {
80 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
81 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
82 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
83 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
84 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
85 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
86 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
87 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
88 /*              !     "     #     $     %     &     '   */
89 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
90 /*        (     )     *     +     ,     -     .     /   */
91 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
92 /*        0     1     2     3     4     5     6     7   */
93 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
94 /*        8     9     :     ;     <     =     >     ?   */
95 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
96 /*        @     A     B     C     D     E     F     G   */
97 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
98 /*        H     I     J     K     L     M     N     O   */
99 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
100 /*        P     Q     R     S     T     U     V     W   */
101 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
102 /*        X     Y     Z     [     \     ]     ^     _   */
103 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
104 /*        `     a     b     c     d     e     f     g   */
105 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
106 /*        h     i     j     k     l     m     n     o   */
107 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
108 /*        p     q     r     s     t     u     v     w   */
109 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
110 /*        x     y     z     {     |     }     ~    ^?   */
111 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
127     FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
128 };
129 
130 static const unsigned char table_b2a_hqx[] =
131 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
132 
133 static const char table_a2b_base64[] = {
134     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
135     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
136     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
137     52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
138     -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
139     15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
140     -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
141     41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
142 };
143 
144 #define BASE64_PAD '='
145 
146 /* Max binary chunk size; limited only by available memory */
147 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
148 
149 static const unsigned char table_b2a_base64[] =
150 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
151 
152 
153 
154 static const unsigned short crctab_hqx[256] = {
155     0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
156     0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
157     0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
158     0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
159     0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
160     0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
161     0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
162     0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
163     0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
164     0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
165     0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
166     0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
167     0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
168     0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
169     0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
170     0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
171     0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
172     0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
173     0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
174     0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
175     0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
176     0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
177     0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
178     0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
179     0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
180     0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
181     0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
182     0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
183     0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
184     0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
185     0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
186     0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
187 };
188 
189 /*[clinic input]
190 module binascii
191 [clinic start generated code]*/
192 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
193 
194 /*[python input]
195 
196 class ascii_buffer_converter(CConverter):
197     type = 'Py_buffer'
198     converter = 'ascii_buffer_converter'
199     impl_by_reference = True
200     c_default = "{NULL, NULL}"
201 
202     def cleanup(self):
203         name = self.name
204         return "".join(["if (", name, ".obj)\n   PyBuffer_Release(&", name, ");\n"])
205 
206 [python start generated code]*/
207 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
208 
209 static int
ascii_buffer_converter(PyObject * arg,Py_buffer * buf)210 ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
211 {
212     if (arg == NULL) {
213         PyBuffer_Release(buf);
214         return 1;
215     }
216     if (PyUnicode_Check(arg)) {
217         if (PyUnicode_READY(arg) < 0)
218             return 0;
219         if (!PyUnicode_IS_ASCII(arg)) {
220             PyErr_SetString(PyExc_ValueError,
221                             "string argument should contain only ASCII characters");
222             return 0;
223         }
224         assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
225         buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
226         buf->len = PyUnicode_GET_LENGTH(arg);
227         buf->obj = NULL;
228         return 1;
229     }
230     if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
231         PyErr_Format(PyExc_TypeError,
232                      "argument should be bytes, buffer or ASCII string, "
233                      "not '%.100s'", Py_TYPE(arg)->tp_name);
234         return 0;
235     }
236     if (!PyBuffer_IsContiguous(buf, 'C')) {
237         PyErr_Format(PyExc_TypeError,
238                      "argument should be a contiguous buffer, "
239                      "not '%.100s'", Py_TYPE(arg)->tp_name);
240         PyBuffer_Release(buf);
241         return 0;
242     }
243     return Py_CLEANUP_SUPPORTED;
244 }
245 
246 #include "clinic/binascii.c.h"
247 
248 /*[clinic input]
249 binascii.a2b_uu
250 
251     data: ascii_buffer
252     /
253 
254 Decode a line of uuencoded data.
255 [clinic start generated code]*/
256 
257 static PyObject *
binascii_a2b_uu_impl(PyObject * module,Py_buffer * data)258 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
259 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
260 {
261     const unsigned char *ascii_data;
262     unsigned char *bin_data;
263     int leftbits = 0;
264     unsigned char this_ch;
265     unsigned int leftchar = 0;
266     PyObject *rv;
267     Py_ssize_t ascii_len, bin_len;
268     binascii_state *state;
269 
270     ascii_data = data->buf;
271     ascii_len = data->len;
272 
273     assert(ascii_len >= 0);
274 
275     /* First byte: binary data length (in bytes) */
276     bin_len = (*ascii_data++ - ' ') & 077;
277     ascii_len--;
278 
279     /* Allocate the buffer */
280     if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
281         return NULL;
282     bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
283 
284     for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
285         /* XXX is it really best to add NULs if there's no more data */
286         this_ch = (ascii_len > 0) ? *ascii_data : 0;
287         if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
288             /*
289             ** Whitespace. Assume some spaces got eaten at
290             ** end-of-line. (We check this later)
291             */
292             this_ch = 0;
293         } else {
294             /* Check the character for legality
295             ** The 64 in stead of the expected 63 is because
296             ** there are a few uuencodes out there that use
297             ** '`' as zero instead of space.
298             */
299             if ( this_ch < ' ' || this_ch > (' ' + 64)) {
300                 state = PyModule_GetState(module);
301                 if (state == NULL) {
302                     return NULL;
303                 }
304                 PyErr_SetString(state->Error, "Illegal char");
305                 Py_DECREF(rv);
306                 return NULL;
307             }
308             this_ch = (this_ch - ' ') & 077;
309         }
310         /*
311         ** Shift it in on the low end, and see if there's
312         ** a byte ready for output.
313         */
314         leftchar = (leftchar << 6) | (this_ch);
315         leftbits += 6;
316         if ( leftbits >= 8 ) {
317             leftbits -= 8;
318             *bin_data++ = (leftchar >> leftbits) & 0xff;
319             leftchar &= ((1 << leftbits) - 1);
320             bin_len--;
321         }
322     }
323     /*
324     ** Finally, check that if there's anything left on the line
325     ** that it's whitespace only.
326     */
327     while( ascii_len-- > 0 ) {
328         this_ch = *ascii_data++;
329         /* Extra '`' may be written as padding in some cases */
330         if ( this_ch != ' ' && this_ch != ' '+64 &&
331              this_ch != '\n' && this_ch != '\r' ) {
332             state = PyModule_GetState(module);
333             if (state == NULL) {
334                 return NULL;
335             }
336             PyErr_SetString(state->Error, "Trailing garbage");
337             Py_DECREF(rv);
338             return NULL;
339         }
340     }
341     return rv;
342 }
343 
344 /*[clinic input]
345 binascii.b2a_uu
346 
347     data: Py_buffer
348     /
349     *
350     backtick: bool(accept={int}) = False
351 
352 Uuencode line of data.
353 [clinic start generated code]*/
354 
355 static PyObject *
binascii_b2a_uu_impl(PyObject * module,Py_buffer * data,int backtick)356 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
357 /*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
358 {
359     unsigned char *ascii_data;
360     const unsigned char *bin_data;
361     int leftbits = 0;
362     unsigned char this_ch;
363     unsigned int leftchar = 0;
364     binascii_state *state;
365     Py_ssize_t bin_len, out_len;
366     _PyBytesWriter writer;
367 
368     _PyBytesWriter_Init(&writer);
369     bin_data = data->buf;
370     bin_len = data->len;
371     if ( bin_len > 45 ) {
372         /* The 45 is a limit that appears in all uuencode's */
373         state = PyModule_GetState(module);
374         if (state == NULL) {
375             return NULL;
376         }
377         PyErr_SetString(state->Error, "At most 45 bytes at once");
378         return NULL;
379     }
380 
381     /* We're lazy and allocate to much (fixed up later) */
382     out_len = 2 + (bin_len + 2) / 3 * 4;
383     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
384     if (ascii_data == NULL)
385         return NULL;
386 
387     /* Store the length */
388     if (backtick && !bin_len)
389         *ascii_data++ = '`';
390     else
391         *ascii_data++ = ' ' + (unsigned char)bin_len;
392 
393     for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
394         /* Shift the data (or padding) into our buffer */
395         if ( bin_len > 0 )              /* Data */
396             leftchar = (leftchar << 8) | *bin_data;
397         else                            /* Padding */
398             leftchar <<= 8;
399         leftbits += 8;
400 
401         /* See if there are 6-bit groups ready */
402         while ( leftbits >= 6 ) {
403             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
404             leftbits -= 6;
405             if (backtick && !this_ch)
406                 *ascii_data++ = '`';
407             else
408                 *ascii_data++ = this_ch + ' ';
409         }
410     }
411     *ascii_data++ = '\n';       /* Append a courtesy newline */
412 
413     return _PyBytesWriter_Finish(&writer, ascii_data);
414 }
415 
416 
417 static int
binascii_find_valid(const unsigned char * s,Py_ssize_t slen,int num)418 binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
419 {
420     /* Finds & returns the (num+1)th
421     ** valid character for base64, or -1 if none.
422     */
423 
424     int ret = -1;
425     unsigned char c, b64val;
426 
427     while ((slen > 0) && (ret == -1)) {
428         c = *s;
429         b64val = table_a2b_base64[c & 0x7f];
430         if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
431             if (num == 0)
432                 ret = *s;
433             num--;
434         }
435 
436         s++;
437         slen--;
438     }
439     return ret;
440 }
441 
442 /*[clinic input]
443 binascii.a2b_base64
444 
445     data: ascii_buffer
446     /
447 
448 Decode a line of base64 data.
449 [clinic start generated code]*/
450 
451 static PyObject *
binascii_a2b_base64_impl(PyObject * module,Py_buffer * data)452 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
453 /*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
454 {
455     const unsigned char *ascii_data;
456     unsigned char *bin_data;
457     unsigned char *bin_data_start;
458     int leftbits = 0;
459     unsigned char this_ch;
460     unsigned int leftchar = 0;
461     Py_ssize_t ascii_len, bin_len;
462     int quad_pos = 0;
463     _PyBytesWriter writer;
464     binascii_state *state;
465 
466     ascii_data = data->buf;
467     ascii_len = data->len;
468 
469     assert(ascii_len >= 0);
470 
471     if (ascii_len > PY_SSIZE_T_MAX - 3)
472         return PyErr_NoMemory();
473 
474     bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
475 
476     _PyBytesWriter_Init(&writer);
477 
478     /* Allocate the buffer */
479     bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
480     if (bin_data == NULL)
481         return NULL;
482     bin_data_start = bin_data;
483 
484     for( ; ascii_len > 0; ascii_len--, ascii_data++) {
485         this_ch = *ascii_data;
486 
487         if (this_ch > 0x7f ||
488             this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
489             continue;
490 
491         /* Check for pad sequences and ignore
492         ** the invalid ones.
493         */
494         if (this_ch == BASE64_PAD) {
495             if ( (quad_pos < 2) ||
496                  ((quad_pos == 2) &&
497                   (binascii_find_valid(ascii_data, ascii_len, 1)
498                    != BASE64_PAD)) )
499             {
500                 continue;
501             }
502             else {
503                 /* A pad sequence means no more input.
504                 ** We've already interpreted the data
505                 ** from the quad at this point.
506                 */
507                 leftbits = 0;
508                 break;
509             }
510         }
511 
512         this_ch = table_a2b_base64[*ascii_data];
513         if ( this_ch == (unsigned char) -1 )
514             continue;
515 
516         /*
517         ** Shift it in on the low end, and see if there's
518         ** a byte ready for output.
519         */
520         quad_pos = (quad_pos + 1) & 0x03;
521         leftchar = (leftchar << 6) | (this_ch);
522         leftbits += 6;
523 
524         if ( leftbits >= 8 ) {
525             leftbits -= 8;
526             *bin_data++ = (leftchar >> leftbits) & 0xff;
527             leftchar &= ((1 << leftbits) - 1);
528         }
529     }
530 
531     if (leftbits != 0) {
532         state = PyModule_GetState(module);
533         if (state == NULL) {
534             return NULL;
535         }
536         if (leftbits == 6) {
537             /*
538             ** There is exactly one extra valid, non-padding, base64 character.
539             ** This is an invalid length, as there is no possible input that
540             ** could encoded into such a base64 string.
541             */
542             PyErr_Format(state->Error,
543                          "Invalid base64-encoded string: "
544                          "number of data characters (%zd) cannot be 1 more "
545                          "than a multiple of 4",
546                          (bin_data - bin_data_start) / 3 * 4 + 1);
547         } else {
548             PyErr_SetString(state->Error, "Incorrect padding");
549         }
550         _PyBytesWriter_Dealloc(&writer);
551         return NULL;
552     }
553 
554     return _PyBytesWriter_Finish(&writer, bin_data);
555 }
556 
557 
558 /*[clinic input]
559 binascii.b2a_base64
560 
561     data: Py_buffer
562     /
563     *
564     newline: bool(accept={int}) = True
565 
566 Base64-code line of data.
567 [clinic start generated code]*/
568 
569 static PyObject *
binascii_b2a_base64_impl(PyObject * module,Py_buffer * data,int newline)570 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
571 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
572 {
573     unsigned char *ascii_data;
574     const unsigned char *bin_data;
575     int leftbits = 0;
576     unsigned char this_ch;
577     unsigned int leftchar = 0;
578     Py_ssize_t bin_len, out_len;
579     _PyBytesWriter writer;
580     binascii_state *state;
581 
582     bin_data = data->buf;
583     bin_len = data->len;
584     _PyBytesWriter_Init(&writer);
585 
586     assert(bin_len >= 0);
587 
588     if ( bin_len > BASE64_MAXBIN ) {
589         state = PyModule_GetState(module);
590         if (state == NULL) {
591             return NULL;
592         }
593         PyErr_SetString(state->Error, "Too much data for base64 line");
594         return NULL;
595     }
596 
597     /* We're lazy and allocate too much (fixed up later).
598        "+2" leaves room for up to two pad characters.
599        Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
600     out_len = bin_len*2 + 2;
601     if (newline)
602         out_len++;
603     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
604     if (ascii_data == NULL)
605         return NULL;
606 
607     for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
608         /* Shift the data into our buffer */
609         leftchar = (leftchar << 8) | *bin_data;
610         leftbits += 8;
611 
612         /* See if there are 6-bit groups ready */
613         while ( leftbits >= 6 ) {
614             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
615             leftbits -= 6;
616             *ascii_data++ = table_b2a_base64[this_ch];
617         }
618     }
619     if ( leftbits == 2 ) {
620         *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
621         *ascii_data++ = BASE64_PAD;
622         *ascii_data++ = BASE64_PAD;
623     } else if ( leftbits == 4 ) {
624         *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
625         *ascii_data++ = BASE64_PAD;
626     }
627     if (newline)
628         *ascii_data++ = '\n';       /* Append a courtesy newline */
629 
630     return _PyBytesWriter_Finish(&writer, ascii_data);
631 }
632 
633 /*[clinic input]
634 binascii.a2b_hqx
635 
636     data: ascii_buffer
637     /
638 
639 Decode .hqx coding.
640 [clinic start generated code]*/
641 
642 static PyObject *
binascii_a2b_hqx_impl(PyObject * module,Py_buffer * data)643 binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
644 /*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
645 {
646     const unsigned char *ascii_data;
647     unsigned char *bin_data;
648     int leftbits = 0;
649     unsigned char this_ch;
650     unsigned int leftchar = 0;
651     PyObject *res;
652     Py_ssize_t len;
653     int done = 0;
654     _PyBytesWriter writer;
655     binascii_state *state;
656 
657     ascii_data = data->buf;
658     len = data->len;
659     _PyBytesWriter_Init(&writer);
660 
661     assert(len >= 0);
662 
663     if (len > PY_SSIZE_T_MAX - 2)
664         return PyErr_NoMemory();
665 
666     /* Allocate a string that is too big (fixed later)
667        Add two to the initial length to prevent interning which
668        would preclude subsequent resizing.  */
669     bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
670     if (bin_data == NULL)
671         return NULL;
672 
673     for( ; len > 0 ; len--, ascii_data++ ) {
674         /* Get the byte and look it up */
675         this_ch = table_a2b_hqx[*ascii_data];
676         if ( this_ch == SKIP )
677             continue;
678         if ( this_ch == FAIL ) {
679             state = PyModule_GetState(module);
680             if (state == NULL) {
681                 return NULL;
682             }
683             PyErr_SetString(state->Error, "Illegal char");
684             _PyBytesWriter_Dealloc(&writer);
685             return NULL;
686         }
687         if ( this_ch == DONE ) {
688             /* The terminating colon */
689             done = 1;
690             break;
691         }
692 
693         /* Shift it into the buffer and see if any bytes are ready */
694         leftchar = (leftchar << 6) | (this_ch);
695         leftbits += 6;
696         if ( leftbits >= 8 ) {
697             leftbits -= 8;
698             *bin_data++ = (leftchar >> leftbits) & 0xff;
699             leftchar &= ((1 << leftbits) - 1);
700         }
701     }
702 
703     if ( leftbits && !done ) {
704         state = PyModule_GetState(module);
705         if (state == NULL) {
706             return NULL;
707         }
708         PyErr_SetString(state->Incomplete,
709                         "String has incomplete number of bytes");
710         _PyBytesWriter_Dealloc(&writer);
711         return NULL;
712     }
713 
714     res = _PyBytesWriter_Finish(&writer, bin_data);
715     if (res == NULL)
716         return NULL;
717     return Py_BuildValue("Ni", res, done);
718 }
719 
720 
721 /*[clinic input]
722 binascii.rlecode_hqx
723 
724     data: Py_buffer
725     /
726 
727 Binhex RLE-code binary data.
728 [clinic start generated code]*/
729 
730 static PyObject *
binascii_rlecode_hqx_impl(PyObject * module,Py_buffer * data)731 binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
732 /*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
733 {
734     const unsigned char *in_data;
735     unsigned char *out_data;
736     unsigned char ch;
737     Py_ssize_t in, inend, len;
738     _PyBytesWriter writer;
739 
740     _PyBytesWriter_Init(&writer);
741     in_data = data->buf;
742     len = data->len;
743 
744     assert(len >= 0);
745 
746     if (len > PY_SSIZE_T_MAX / 2 - 2)
747         return PyErr_NoMemory();
748 
749     /* Worst case: output is twice as big as input (fixed later) */
750     out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
751     if (out_data == NULL)
752         return NULL;
753 
754     for( in=0; in<len; in++) {
755         ch = in_data[in];
756         if ( ch == RUNCHAR ) {
757             /* RUNCHAR. Escape it. */
758             *out_data++ = RUNCHAR;
759             *out_data++ = 0;
760         } else {
761             /* Check how many following are the same */
762             for(inend=in+1;
763                 inend<len && in_data[inend] == ch &&
764                     inend < in+255;
765                 inend++) ;
766             if ( inend - in > 3 ) {
767                 /* More than 3 in a row. Output RLE. */
768                 *out_data++ = ch;
769                 *out_data++ = RUNCHAR;
770                 *out_data++ = (unsigned char) (inend-in);
771                 in = inend-1;
772             } else {
773                 /* Less than 3. Output the byte itself */
774                 *out_data++ = ch;
775             }
776         }
777     }
778 
779     return _PyBytesWriter_Finish(&writer, out_data);
780 }
781 
782 
783 /*[clinic input]
784 binascii.b2a_hqx
785 
786     data: Py_buffer
787     /
788 
789 Encode .hqx data.
790 [clinic start generated code]*/
791 
792 static PyObject *
binascii_b2a_hqx_impl(PyObject * module,Py_buffer * data)793 binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
794 /*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
795 {
796     unsigned char *ascii_data;
797     const unsigned char *bin_data;
798     int leftbits = 0;
799     unsigned char this_ch;
800     unsigned int leftchar = 0;
801     Py_ssize_t len;
802     _PyBytesWriter writer;
803 
804     bin_data = data->buf;
805     len = data->len;
806     _PyBytesWriter_Init(&writer);
807 
808     assert(len >= 0);
809 
810     if (len > PY_SSIZE_T_MAX / 2 - 2)
811         return PyErr_NoMemory();
812 
813     /* Allocate a buffer that is at least large enough */
814     ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
815     if (ascii_data == NULL)
816         return NULL;
817 
818     for( ; len > 0 ; len--, bin_data++ ) {
819         /* Shift into our buffer, and output any 6bits ready */
820         leftchar = (leftchar << 8) | *bin_data;
821         leftbits += 8;
822         while ( leftbits >= 6 ) {
823             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
824             leftbits -= 6;
825             *ascii_data++ = table_b2a_hqx[this_ch];
826         }
827     }
828     /* Output a possible runt byte */
829     if ( leftbits ) {
830         leftchar <<= (6-leftbits);
831         *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
832     }
833 
834     return _PyBytesWriter_Finish(&writer, ascii_data);
835 }
836 
837 
838 /*[clinic input]
839 binascii.rledecode_hqx
840 
841     data: Py_buffer
842     /
843 
844 Decode hexbin RLE-coded string.
845 [clinic start generated code]*/
846 
847 static PyObject *
binascii_rledecode_hqx_impl(PyObject * module,Py_buffer * data)848 binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
849 /*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
850 {
851     const unsigned char *in_data;
852     unsigned char *out_data;
853     unsigned char in_byte, in_repeat;
854     Py_ssize_t in_len;
855     _PyBytesWriter writer;
856 
857     in_data = data->buf;
858     in_len = data->len;
859     _PyBytesWriter_Init(&writer);
860     binascii_state *state;
861 
862     assert(in_len >= 0);
863 
864     /* Empty string is a special case */
865     if ( in_len == 0 )
866         return PyBytes_FromStringAndSize("", 0);
867     else if (in_len > PY_SSIZE_T_MAX / 2)
868         return PyErr_NoMemory();
869 
870     /* Allocate a buffer of reasonable size. Resized when needed */
871     out_data = _PyBytesWriter_Alloc(&writer, in_len);
872     if (out_data == NULL)
873         return NULL;
874 
875     /* Use overallocation */
876     writer.overallocate = 1;
877 
878     /*
879     ** We need two macros here to get/put bytes and handle
880     ** end-of-buffer for input and output strings.
881     */
882 #define INBYTE(b)                                                       \
883     do {                                                                \
884          if ( --in_len < 0 ) {                                          \
885            state = PyModule_GetState(module);           \
886            if (state == NULL) {                                         \
887                return NULL;                                             \
888            }                                                            \
889            PyErr_SetString(state->Incomplete, "");                      \
890            goto error;                                                  \
891          }                                                              \
892          b = *in_data++;                                                \
893     } while(0)
894 
895     /*
896     ** Handle first byte separately (since we have to get angry
897     ** in case of an orphaned RLE code).
898     */
899     INBYTE(in_byte);
900 
901     if (in_byte == RUNCHAR) {
902         INBYTE(in_repeat);
903         /* only 1 byte will be written, but 2 bytes were preallocated:
904            subtract 1 byte to prevent overallocation */
905         writer.min_size--;
906 
907         if (in_repeat != 0) {
908             /* Note Error, not Incomplete (which is at the end
909             ** of the string only). This is a programmer error.
910             */
911             state = PyModule_GetState(module);
912             if (state == NULL) {
913                 return NULL;
914             }
915             PyErr_SetString(state->Error, "Orphaned RLE code at start");
916             goto error;
917         }
918         *out_data++ = RUNCHAR;
919     } else {
920         *out_data++ = in_byte;
921     }
922 
923     while( in_len > 0 ) {
924         INBYTE(in_byte);
925 
926         if (in_byte == RUNCHAR) {
927             INBYTE(in_repeat);
928             /* only 1 byte will be written, but 2 bytes were preallocated:
929                subtract 1 byte to prevent overallocation */
930             writer.min_size--;
931 
932             if ( in_repeat == 0 ) {
933                 /* Just an escaped RUNCHAR value */
934                 *out_data++ = RUNCHAR;
935             } else {
936                 /* Pick up value and output a sequence of it */
937                 in_byte = out_data[-1];
938 
939                 /* enlarge the buffer if needed */
940                 if (in_repeat > 1) {
941                     /* -1 because we already preallocated 1 byte */
942                     out_data = _PyBytesWriter_Prepare(&writer, out_data,
943                                                       in_repeat - 1);
944                     if (out_data == NULL)
945                         goto error;
946                 }
947 
948                 while ( --in_repeat > 0 )
949                     *out_data++ = in_byte;
950             }
951         } else {
952             /* Normal byte */
953             *out_data++ = in_byte;
954         }
955     }
956     return _PyBytesWriter_Finish(&writer, out_data);
957 
958 error:
959     _PyBytesWriter_Dealloc(&writer);
960     return NULL;
961 }
962 
963 
964 /*[clinic input]
965 binascii.crc_hqx -> unsigned_int
966 
967     data: Py_buffer
968     crc: unsigned_int(bitwise=True)
969     /
970 
971 Compute CRC-CCITT incrementally.
972 [clinic start generated code]*/
973 
974 static unsigned int
binascii_crc_hqx_impl(PyObject * module,Py_buffer * data,unsigned int crc)975 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
976 /*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/
977 {
978     const unsigned char *bin_data;
979     Py_ssize_t len;
980 
981     crc &= 0xffff;
982     bin_data = data->buf;
983     len = data->len;
984 
985     while(len-- > 0) {
986         crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
987     }
988 
989     return crc;
990 }
991 
992 #ifndef USE_ZLIB_CRC32
993 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
994     Also known as: ISO 3307
995 **********************************************************************|
996 *                                                                    *|
997 * Demonstration program to compute the 32-bit CRC used as the frame  *|
998 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
999 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
1000 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
1001 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
1002 * this polynomial is or will be included in CCITT V.41, which        *|
1003 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
1004 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
1005 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
1006 *                                                                    *|
1007 **********************************************************************|
1008 
1009  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
1010  code or tables extracted from it, as desired without restriction.
1011 
1012  First, the polynomial itself and its table of feedback terms.  The
1013  polynomial is
1014  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
1015  Note that we take it "backwards" and put the highest-order term in
1016  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
1017  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
1018  the MSB being 1.
1019 
1020  Note that the usual hardware shift register implementation, which
1021  is what we're using (we're merely optimizing it by doing eight-bit
1022  chunks at a time) shifts bits into the lowest-order term.  In our
1023  implementation, that means shifting towards the right.  Why do we
1024  do it this way?  Because the calculated CRC must be transmitted in
1025  order from highest-order term to lowest-order term.  UARTs transmit
1026  characters in order from LSB to MSB.  By storing the CRC this way,
1027  we hand it to the UART in the order low-byte to high-byte; the UART
1028  sends each low-bit to hight-bit; and the result is transmission bit
1029  by bit from highest- to lowest-order term without requiring any bit
1030  shuffling on our part.  Reception works similarly.
1031 
1032  The feedback terms table consists of 256, 32-bit entries.  Notes:
1033 
1034   1. The table can be generated at runtime if desired; code to do so
1035      is shown later.  It might not be obvious, but the feedback
1036      terms simply represent the results of eight shift/xor opera-
1037      tions for all combinations of data and CRC register values.
1038 
1039   2. The CRC accumulation logic is the same for all CRC polynomials,
1040      be they sixteen or thirty-two bits wide.  You simply choose the
1041      appropriate table.  Alternatively, because the table can be
1042      generated at runtime, you can start by generating the table for
1043      the polynomial in question and use exactly the same "updcrc",
1044      if your application needn't simultaneously handle two CRC
1045      polynomials.  (Note, however, that XMODEM is strange.)
1046 
1047   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
1048      of course, 32-bit entries work OK if the high 16 bits are zero.
1049 
1050   4. The values must be right-shifted by eight bits by the "updcrc"
1051      logic; the shift must be unsigned (bring in zeroes).  On some
1052      hardware you could probably optimize the shift in assembler by
1053      using byte-swap instructions.
1054 ********************************************************************/
1055 
1056 static const unsigned int crc_32_tab[256] = {
1057 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
1058 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
1059 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
1060 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
1061 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
1062 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
1063 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
1064 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
1065 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
1066 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
1067 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
1068 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
1069 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
1070 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
1071 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
1072 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
1073 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
1074 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
1075 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
1076 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
1077 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
1078 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
1079 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
1080 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
1081 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
1082 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
1083 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
1084 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
1085 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
1086 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
1087 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
1088 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
1089 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
1090 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
1091 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
1092 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
1093 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
1094 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
1095 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
1096 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
1097 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1098 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1099 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1100 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1101 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1102 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1103 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1104 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1105 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1106 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1107 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1108 0x2d02ef8dU
1109 };
1110 #endif  /* USE_ZLIB_CRC32 */
1111 
1112 /*[clinic input]
1113 binascii.crc32 -> unsigned_int
1114 
1115     data: Py_buffer
1116     crc: unsigned_int(bitwise=True) = 0
1117     /
1118 
1119 Compute CRC-32 incrementally.
1120 [clinic start generated code]*/
1121 
1122 static unsigned int
binascii_crc32_impl(PyObject * module,Py_buffer * data,unsigned int crc)1123 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1124 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
1125 
1126 #ifdef USE_ZLIB_CRC32
1127 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1128 {
1129     const Byte *buf;
1130     Py_ssize_t len;
1131     int signed_val;
1132 
1133     buf = (Byte*)data->buf;
1134     len = data->len;
1135     signed_val = crc32(crc, buf, len);
1136     return (unsigned int)signed_val & 0xffffffffU;
1137 }
1138 #else  /* USE_ZLIB_CRC32 */
1139 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1140     const unsigned char *bin_data;
1141     Py_ssize_t len;
1142     unsigned int result;
1143 
1144     bin_data = data->buf;
1145     len = data->len;
1146 
1147     crc = ~ crc;
1148     while (len-- > 0) {
1149         crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1150         /* Note:  (crc >> 8) MUST zero fill on left */
1151     }
1152 
1153     result = (crc ^ 0xFFFFFFFF);
1154     return result & 0xffffffff;
1155 }
1156 #endif  /* USE_ZLIB_CRC32 */
1157 
1158 /*[clinic input]
1159 binascii.b2a_hex
1160 
1161     data: Py_buffer
1162     sep: object = NULL
1163         An optional single character or byte to separate hex bytes.
1164     bytes_per_sep: int = 1
1165         How many bytes between separators.  Positive values count from the
1166         right, negative values count from the left.
1167 
1168 Hexadecimal representation of binary data.
1169 
1170 The return value is a bytes object.  This function is also
1171 available as "hexlify()".
1172 
1173 Example:
1174 >>> binascii.b2a_hex(b'\xb9\x01\xef')
1175 b'b901ef'
1176 >>> binascii.hexlify(b'\xb9\x01\xef', ':')
1177 b'b9:01:ef'
1178 >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
1179 b'b9_01ef'
1180 [clinic start generated code]*/
1181 
1182 static PyObject *
binascii_b2a_hex_impl(PyObject * module,Py_buffer * data,PyObject * sep,int bytes_per_sep)1183 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1184                       int bytes_per_sep)
1185 /*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
1186 {
1187     return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1188                                      sep, bytes_per_sep);
1189 }
1190 
1191 /*[clinic input]
1192 binascii.hexlify = binascii.b2a_hex
1193 
1194 Hexadecimal representation of binary data.
1195 
1196 The return value is a bytes object.  This function is also
1197 available as "b2a_hex()".
1198 [clinic start generated code]*/
1199 
1200 static PyObject *
binascii_hexlify_impl(PyObject * module,Py_buffer * data,PyObject * sep,int bytes_per_sep)1201 binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1202                       int bytes_per_sep)
1203 /*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
1204 {
1205     return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1206                                      sep, bytes_per_sep);
1207 }
1208 
1209 /*[clinic input]
1210 binascii.a2b_hex
1211 
1212     hexstr: ascii_buffer
1213     /
1214 
1215 Binary data of hexadecimal representation.
1216 
1217 hexstr must contain an even number of hex digits (upper or lower case).
1218 This function is also available as "unhexlify()".
1219 [clinic start generated code]*/
1220 
1221 static PyObject *
binascii_a2b_hex_impl(PyObject * module,Py_buffer * hexstr)1222 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1223 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
1224 {
1225     const char* argbuf;
1226     Py_ssize_t arglen;
1227     PyObject *retval;
1228     char* retbuf;
1229     Py_ssize_t i, j;
1230     binascii_state *state;
1231 
1232     argbuf = hexstr->buf;
1233     arglen = hexstr->len;
1234 
1235     assert(arglen >= 0);
1236 
1237     /* XXX What should we do about strings with an odd length?  Should
1238      * we add an implicit leading zero, or a trailing zero?  For now,
1239      * raise an exception.
1240      */
1241     if (arglen % 2) {
1242         state = PyModule_GetState(module);
1243         if (state == NULL) {
1244             return NULL;
1245         }
1246         PyErr_SetString(state->Error, "Odd-length string");
1247         return NULL;
1248     }
1249 
1250     retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1251     if (!retval)
1252         return NULL;
1253     retbuf = PyBytes_AS_STRING(retval);
1254 
1255     for (i=j=0; i < arglen; i += 2) {
1256         unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
1257         unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
1258         if (top >= 16 || bot >= 16) {
1259             state = PyModule_GetState(module);
1260             if (state == NULL) {
1261                 return NULL;
1262             }
1263             PyErr_SetString(state->Error,
1264                             "Non-hexadecimal digit found");
1265             goto finally;
1266         }
1267         retbuf[j++] = (top << 4) + bot;
1268     }
1269     return retval;
1270 
1271   finally:
1272     Py_DECREF(retval);
1273     return NULL;
1274 }
1275 
1276 /*[clinic input]
1277 binascii.unhexlify = binascii.a2b_hex
1278 
1279 Binary data of hexadecimal representation.
1280 
1281 hexstr must contain an even number of hex digits (upper or lower case).
1282 [clinic start generated code]*/
1283 
1284 static PyObject *
binascii_unhexlify_impl(PyObject * module,Py_buffer * hexstr)1285 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1286 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
1287 {
1288     return binascii_a2b_hex_impl(module, hexstr);
1289 }
1290 
1291 #define MAXLINESIZE 76
1292 
1293 
1294 /*[clinic input]
1295 binascii.a2b_qp
1296 
1297     data: ascii_buffer
1298     header: bool(accept={int}) = False
1299 
1300 Decode a string of qp-encoded data.
1301 [clinic start generated code]*/
1302 
1303 static PyObject *
binascii_a2b_qp_impl(PyObject * module,Py_buffer * data,int header)1304 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
1305 /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
1306 {
1307     Py_ssize_t in, out;
1308     char ch;
1309     const unsigned char *ascii_data;
1310     unsigned char *odata;
1311     Py_ssize_t datalen = 0;
1312     PyObject *rv;
1313 
1314     ascii_data = data->buf;
1315     datalen = data->len;
1316 
1317     /* We allocate the output same size as input, this is overkill.
1318      * The previous implementation used calloc() so we'll zero out the
1319      * memory here too, since PyMem_Malloc() does not guarantee that.
1320      */
1321     odata = (unsigned char *) PyMem_Malloc(datalen);
1322     if (odata == NULL) {
1323         PyErr_NoMemory();
1324         return NULL;
1325     }
1326     memset(odata, 0, datalen);
1327 
1328     in = out = 0;
1329     while (in < datalen) {
1330         if (ascii_data[in] == '=') {
1331             in++;
1332             if (in >= datalen) break;
1333             /* Soft line breaks */
1334             if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1335                 if (ascii_data[in] != '\n') {
1336                     while (in < datalen && ascii_data[in] != '\n') in++;
1337                 }
1338                 if (in < datalen) in++;
1339             }
1340             else if (ascii_data[in] == '=') {
1341                 /* broken case from broken python qp */
1342                 odata[out++] = '=';
1343                 in++;
1344             }
1345             else if ((in + 1 < datalen) &&
1346                      ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1347                       (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1348                       (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1349                      ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1350                       (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1351                       (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1352                 /* hexval */
1353                 ch = _PyLong_DigitValue[ascii_data[in]] << 4;
1354                 in++;
1355                 ch |= _PyLong_DigitValue[ascii_data[in]];
1356                 in++;
1357                 odata[out++] = ch;
1358             }
1359             else {
1360               odata[out++] = '=';
1361             }
1362         }
1363         else if (header && ascii_data[in] == '_') {
1364             odata[out++] = ' ';
1365             in++;
1366         }
1367         else {
1368             odata[out] = ascii_data[in];
1369             in++;
1370             out++;
1371         }
1372     }
1373     if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1374         PyMem_Free(odata);
1375         return NULL;
1376     }
1377     PyMem_Free(odata);
1378     return rv;
1379 }
1380 
1381 static int
to_hex(unsigned char ch,unsigned char * s)1382 to_hex (unsigned char ch, unsigned char *s)
1383 {
1384     unsigned int uvalue = ch;
1385 
1386     s[1] = "0123456789ABCDEF"[uvalue % 16];
1387     uvalue = (uvalue / 16);
1388     s[0] = "0123456789ABCDEF"[uvalue % 16];
1389     return 0;
1390 }
1391 
1392 /* XXX: This is ridiculously complicated to be backward compatible
1393  * (mostly) with the quopri module.  It doesn't re-create the quopri
1394  * module bug where text ending in CRLF has the CR encoded */
1395 
1396 /*[clinic input]
1397 binascii.b2a_qp
1398 
1399     data: Py_buffer
1400     quotetabs: bool(accept={int}) = False
1401     istext: bool(accept={int}) = True
1402     header: bool(accept={int}) = False
1403 
1404 Encode a string using quoted-printable encoding.
1405 
1406 On encoding, when istext is set, newlines are not encoded, and white
1407 space at end of lines is.  When istext is not set, \r and \n (CR/LF)
1408 are both encoded.  When quotetabs is set, space and tabs are encoded.
1409 [clinic start generated code]*/
1410 
1411 static PyObject *
binascii_b2a_qp_impl(PyObject * module,Py_buffer * data,int quotetabs,int istext,int header)1412 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1413                      int istext, int header)
1414 /*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
1415 {
1416     Py_ssize_t in, out;
1417     const unsigned char *databuf;
1418     unsigned char *odata;
1419     Py_ssize_t datalen = 0, odatalen = 0;
1420     PyObject *rv;
1421     unsigned int linelen = 0;
1422     unsigned char ch;
1423     int crlf = 0;
1424     const unsigned char *p;
1425 
1426     databuf = data->buf;
1427     datalen = data->len;
1428 
1429     /* See if this string is using CRLF line ends */
1430     /* XXX: this function has the side effect of converting all of
1431      * the end of lines to be the same depending on this detection
1432      * here */
1433     p = (const unsigned char *) memchr(databuf, '\n', datalen);
1434     if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1435         crlf = 1;
1436 
1437     /* First, scan to see how many characters need to be encoded */
1438     in = 0;
1439     while (in < datalen) {
1440         Py_ssize_t delta = 0;
1441         if ((databuf[in] > 126) ||
1442             (databuf[in] == '=') ||
1443             (header && databuf[in] == '_') ||
1444             ((databuf[in] == '.') && (linelen == 0) &&
1445              (in + 1 == datalen || databuf[in+1] == '\n' ||
1446               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1447             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1448             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1449             ((databuf[in] < 33) &&
1450              (databuf[in] != '\r') && (databuf[in] != '\n') &&
1451              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1452         {
1453             if ((linelen + 3) >= MAXLINESIZE) {
1454                 linelen = 0;
1455                 if (crlf)
1456                     delta += 3;
1457                 else
1458                     delta += 2;
1459             }
1460             linelen += 3;
1461             delta += 3;
1462             in++;
1463         }
1464         else {
1465             if (istext &&
1466                 ((databuf[in] == '\n') ||
1467                  ((in+1 < datalen) && (databuf[in] == '\r') &&
1468                  (databuf[in+1] == '\n'))))
1469             {
1470                 linelen = 0;
1471                 /* Protect against whitespace on end of line */
1472                 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1473                     delta += 2;
1474                 if (crlf)
1475                     delta += 2;
1476                 else
1477                     delta += 1;
1478                 if (databuf[in] == '\r')
1479                     in += 2;
1480                 else
1481                     in++;
1482             }
1483             else {
1484                 if ((in + 1 != datalen) &&
1485                     (databuf[in+1] != '\n') &&
1486                     (linelen + 1) >= MAXLINESIZE) {
1487                     linelen = 0;
1488                     if (crlf)
1489                         delta += 3;
1490                     else
1491                         delta += 2;
1492                 }
1493                 linelen++;
1494                 delta++;
1495                 in++;
1496             }
1497         }
1498         if (PY_SSIZE_T_MAX - delta < odatalen) {
1499             PyErr_NoMemory();
1500             return NULL;
1501         }
1502         odatalen += delta;
1503     }
1504 
1505     /* We allocate the output same size as input, this is overkill.
1506      * The previous implementation used calloc() so we'll zero out the
1507      * memory here too, since PyMem_Malloc() does not guarantee that.
1508      */
1509     odata = (unsigned char *) PyMem_Malloc(odatalen);
1510     if (odata == NULL) {
1511         PyErr_NoMemory();
1512         return NULL;
1513     }
1514     memset(odata, 0, odatalen);
1515 
1516     in = out = linelen = 0;
1517     while (in < datalen) {
1518         if ((databuf[in] > 126) ||
1519             (databuf[in] == '=') ||
1520             (header && databuf[in] == '_') ||
1521             ((databuf[in] == '.') && (linelen == 0) &&
1522              (in + 1 == datalen || databuf[in+1] == '\n' ||
1523               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1524             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1525             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1526             ((databuf[in] < 33) &&
1527              (databuf[in] != '\r') && (databuf[in] != '\n') &&
1528              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1529         {
1530             if ((linelen + 3 )>= MAXLINESIZE) {
1531                 odata[out++] = '=';
1532                 if (crlf) odata[out++] = '\r';
1533                 odata[out++] = '\n';
1534                 linelen = 0;
1535             }
1536             odata[out++] = '=';
1537             to_hex(databuf[in], &odata[out]);
1538             out += 2;
1539             in++;
1540             linelen += 3;
1541         }
1542         else {
1543             if (istext &&
1544                 ((databuf[in] == '\n') ||
1545                  ((in+1 < datalen) && (databuf[in] == '\r') &&
1546                  (databuf[in+1] == '\n'))))
1547             {
1548                 linelen = 0;
1549                 /* Protect against whitespace on end of line */
1550                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1551                     ch = odata[out-1];
1552                     odata[out-1] = '=';
1553                     to_hex(ch, &odata[out]);
1554                     out += 2;
1555                 }
1556 
1557                 if (crlf) odata[out++] = '\r';
1558                 odata[out++] = '\n';
1559                 if (databuf[in] == '\r')
1560                     in += 2;
1561                 else
1562                     in++;
1563             }
1564             else {
1565                 if ((in + 1 != datalen) &&
1566                     (databuf[in+1] != '\n') &&
1567                     (linelen + 1) >= MAXLINESIZE) {
1568                     odata[out++] = '=';
1569                     if (crlf) odata[out++] = '\r';
1570                     odata[out++] = '\n';
1571                     linelen = 0;
1572                 }
1573                 linelen++;
1574                 if (header && databuf[in] == ' ') {
1575                     odata[out++] = '_';
1576                     in++;
1577                 }
1578                 else {
1579                     odata[out++] = databuf[in++];
1580                 }
1581             }
1582         }
1583     }
1584     if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1585         PyMem_Free(odata);
1586         return NULL;
1587     }
1588     PyMem_Free(odata);
1589     return rv;
1590 }
1591 
1592 /* List of functions defined in the module */
1593 
1594 static struct PyMethodDef binascii_module_methods[] = {
1595     BINASCII_A2B_UU_METHODDEF
1596     BINASCII_B2A_UU_METHODDEF
1597     BINASCII_A2B_BASE64_METHODDEF
1598     BINASCII_B2A_BASE64_METHODDEF
1599     BINASCII_A2B_HQX_METHODDEF
1600     BINASCII_B2A_HQX_METHODDEF
1601     BINASCII_A2B_HEX_METHODDEF
1602     BINASCII_B2A_HEX_METHODDEF
1603     BINASCII_HEXLIFY_METHODDEF
1604     BINASCII_UNHEXLIFY_METHODDEF
1605     BINASCII_RLECODE_HQX_METHODDEF
1606     BINASCII_RLEDECODE_HQX_METHODDEF
1607     BINASCII_CRC_HQX_METHODDEF
1608     BINASCII_CRC32_METHODDEF
1609     BINASCII_A2B_QP_METHODDEF
1610     BINASCII_B2A_QP_METHODDEF
1611     {NULL, NULL}                             /* sentinel */
1612 };
1613 
1614 
1615 /* Initialization function for the module (*must* be called PyInit_binascii) */
1616 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1617 
1618 static int
binascii_exec(PyObject * m)1619 binascii_exec(PyObject *m) {
1620     int result;
1621     binascii_state *state = PyModule_GetState(m);
1622     if (state == NULL) {
1623         return -1;
1624     }
1625 
1626     state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1627     if (state->Error == NULL) {
1628         return -1;
1629     }
1630     result = PyModule_AddObject(m, "Error", state->Error);
1631     if (result == -1) {
1632         return -1;
1633     }
1634 
1635     state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1636     if (state->Incomplete == NULL) {
1637         return -1;
1638     }
1639     result = PyModule_AddObject(m, "Incomplete", state->Incomplete);
1640     if (result == -1) {
1641         return -1;
1642     }
1643 
1644     return 0;
1645 }
1646 
1647 static PyModuleDef_Slot binascii_slots[] = {
1648     {Py_mod_exec, binascii_exec},
1649     {0, NULL}
1650 };
1651 
1652 static struct PyModuleDef binasciimodule = {
1653     PyModuleDef_HEAD_INIT,
1654     "binascii",
1655     doc_binascii,
1656     sizeof(binascii_state),
1657     binascii_module_methods,
1658     binascii_slots,
1659     NULL,
1660     NULL,
1661     NULL
1662 };
1663 
1664 PyMODINIT_FUNC
PyInit_binascii(void)1665 PyInit_binascii(void)
1666 {
1667     return PyModuleDef_Init(&binasciimodule);
1668 }
1669