1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55
56 #define PY_SSIZE_T_CLEAN
57
58 #include "Python.h"
59 #include "pystrhex.h"
60 #ifdef USE_ZLIB_CRC32
61 #include "zlib.h"
62 #endif
63
64 typedef struct binascii_state {
65 PyObject *Error;
66 PyObject *Incomplete;
67 } binascii_state;
68
69 /*
70 ** hqx lookup table, ascii->binary.
71 */
72
73 #define RUNCHAR 0x90
74
75 #define DONE 0x7F
76 #define SKIP 0x7E
77 #define FAIL 0x7D
78
79 static const unsigned char table_a2b_hqx[256] = {
80 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
81 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
82 /* \b \t \n ^K ^L \r ^N ^O */
83 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
84 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
85 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
86 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
87 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
88 /* ! " # $ % & ' */
89 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
90 /* ( ) * + , - . / */
91 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
92 /* 0 1 2 3 4 5 6 7 */
93 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
94 /* 8 9 : ; < = > ? */
95 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
96 /* @ A B C D E F G */
97 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
98 /* H I J K L M N O */
99 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
100 /* P Q R S T U V W */
101 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
102 /* X Y Z [ \ ] ^ _ */
103 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
104 /* ` a b c d e f g */
105 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
106 /* h i j k l m n o */
107 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
108 /* p q r s t u v w */
109 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
110 /* x y z { | } ~ ^? */
111 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
125 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
126 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
127 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
128 };
129
130 static const unsigned char table_b2a_hqx[] =
131 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
132
133 static const char table_a2b_base64[] = {
134 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
135 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
136 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
137 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
138 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
139 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
140 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
141 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
142 };
143
144 #define BASE64_PAD '='
145
146 /* Max binary chunk size; limited only by available memory */
147 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
148
149 static const unsigned char table_b2a_base64[] =
150 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
151
152
153
154 static const unsigned short crctab_hqx[256] = {
155 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
156 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
157 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
158 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
159 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
160 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
161 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
162 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
163 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
164 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
165 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
166 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
167 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
168 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
169 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
170 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
171 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
172 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
173 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
174 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
175 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
176 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
177 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
178 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
179 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
180 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
181 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
182 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
183 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
184 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
185 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
186 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
187 };
188
189 /*[clinic input]
190 module binascii
191 [clinic start generated code]*/
192 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
193
194 /*[python input]
195
196 class ascii_buffer_converter(CConverter):
197 type = 'Py_buffer'
198 converter = 'ascii_buffer_converter'
199 impl_by_reference = True
200 c_default = "{NULL, NULL}"
201
202 def cleanup(self):
203 name = self.name
204 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
205
206 [python start generated code]*/
207 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
208
209 static int
ascii_buffer_converter(PyObject * arg,Py_buffer * buf)210 ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
211 {
212 if (arg == NULL) {
213 PyBuffer_Release(buf);
214 return 1;
215 }
216 if (PyUnicode_Check(arg)) {
217 if (PyUnicode_READY(arg) < 0)
218 return 0;
219 if (!PyUnicode_IS_ASCII(arg)) {
220 PyErr_SetString(PyExc_ValueError,
221 "string argument should contain only ASCII characters");
222 return 0;
223 }
224 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
225 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
226 buf->len = PyUnicode_GET_LENGTH(arg);
227 buf->obj = NULL;
228 return 1;
229 }
230 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
231 PyErr_Format(PyExc_TypeError,
232 "argument should be bytes, buffer or ASCII string, "
233 "not '%.100s'", Py_TYPE(arg)->tp_name);
234 return 0;
235 }
236 if (!PyBuffer_IsContiguous(buf, 'C')) {
237 PyErr_Format(PyExc_TypeError,
238 "argument should be a contiguous buffer, "
239 "not '%.100s'", Py_TYPE(arg)->tp_name);
240 PyBuffer_Release(buf);
241 return 0;
242 }
243 return Py_CLEANUP_SUPPORTED;
244 }
245
246 #include "clinic/binascii.c.h"
247
248 /*[clinic input]
249 binascii.a2b_uu
250
251 data: ascii_buffer
252 /
253
254 Decode a line of uuencoded data.
255 [clinic start generated code]*/
256
257 static PyObject *
binascii_a2b_uu_impl(PyObject * module,Py_buffer * data)258 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
259 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
260 {
261 const unsigned char *ascii_data;
262 unsigned char *bin_data;
263 int leftbits = 0;
264 unsigned char this_ch;
265 unsigned int leftchar = 0;
266 PyObject *rv;
267 Py_ssize_t ascii_len, bin_len;
268 binascii_state *state;
269
270 ascii_data = data->buf;
271 ascii_len = data->len;
272
273 assert(ascii_len >= 0);
274
275 /* First byte: binary data length (in bytes) */
276 bin_len = (*ascii_data++ - ' ') & 077;
277 ascii_len--;
278
279 /* Allocate the buffer */
280 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
281 return NULL;
282 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
283
284 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
285 /* XXX is it really best to add NULs if there's no more data */
286 this_ch = (ascii_len > 0) ? *ascii_data : 0;
287 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
288 /*
289 ** Whitespace. Assume some spaces got eaten at
290 ** end-of-line. (We check this later)
291 */
292 this_ch = 0;
293 } else {
294 /* Check the character for legality
295 ** The 64 in stead of the expected 63 is because
296 ** there are a few uuencodes out there that use
297 ** '`' as zero instead of space.
298 */
299 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
300 state = PyModule_GetState(module);
301 if (state == NULL) {
302 return NULL;
303 }
304 PyErr_SetString(state->Error, "Illegal char");
305 Py_DECREF(rv);
306 return NULL;
307 }
308 this_ch = (this_ch - ' ') & 077;
309 }
310 /*
311 ** Shift it in on the low end, and see if there's
312 ** a byte ready for output.
313 */
314 leftchar = (leftchar << 6) | (this_ch);
315 leftbits += 6;
316 if ( leftbits >= 8 ) {
317 leftbits -= 8;
318 *bin_data++ = (leftchar >> leftbits) & 0xff;
319 leftchar &= ((1 << leftbits) - 1);
320 bin_len--;
321 }
322 }
323 /*
324 ** Finally, check that if there's anything left on the line
325 ** that it's whitespace only.
326 */
327 while( ascii_len-- > 0 ) {
328 this_ch = *ascii_data++;
329 /* Extra '`' may be written as padding in some cases */
330 if ( this_ch != ' ' && this_ch != ' '+64 &&
331 this_ch != '\n' && this_ch != '\r' ) {
332 state = PyModule_GetState(module);
333 if (state == NULL) {
334 return NULL;
335 }
336 PyErr_SetString(state->Error, "Trailing garbage");
337 Py_DECREF(rv);
338 return NULL;
339 }
340 }
341 return rv;
342 }
343
344 /*[clinic input]
345 binascii.b2a_uu
346
347 data: Py_buffer
348 /
349 *
350 backtick: bool(accept={int}) = False
351
352 Uuencode line of data.
353 [clinic start generated code]*/
354
355 static PyObject *
binascii_b2a_uu_impl(PyObject * module,Py_buffer * data,int backtick)356 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
357 /*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
358 {
359 unsigned char *ascii_data;
360 const unsigned char *bin_data;
361 int leftbits = 0;
362 unsigned char this_ch;
363 unsigned int leftchar = 0;
364 binascii_state *state;
365 Py_ssize_t bin_len, out_len;
366 _PyBytesWriter writer;
367
368 _PyBytesWriter_Init(&writer);
369 bin_data = data->buf;
370 bin_len = data->len;
371 if ( bin_len > 45 ) {
372 /* The 45 is a limit that appears in all uuencode's */
373 state = PyModule_GetState(module);
374 if (state == NULL) {
375 return NULL;
376 }
377 PyErr_SetString(state->Error, "At most 45 bytes at once");
378 return NULL;
379 }
380
381 /* We're lazy and allocate to much (fixed up later) */
382 out_len = 2 + (bin_len + 2) / 3 * 4;
383 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
384 if (ascii_data == NULL)
385 return NULL;
386
387 /* Store the length */
388 if (backtick && !bin_len)
389 *ascii_data++ = '`';
390 else
391 *ascii_data++ = ' ' + (unsigned char)bin_len;
392
393 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
394 /* Shift the data (or padding) into our buffer */
395 if ( bin_len > 0 ) /* Data */
396 leftchar = (leftchar << 8) | *bin_data;
397 else /* Padding */
398 leftchar <<= 8;
399 leftbits += 8;
400
401 /* See if there are 6-bit groups ready */
402 while ( leftbits >= 6 ) {
403 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
404 leftbits -= 6;
405 if (backtick && !this_ch)
406 *ascii_data++ = '`';
407 else
408 *ascii_data++ = this_ch + ' ';
409 }
410 }
411 *ascii_data++ = '\n'; /* Append a courtesy newline */
412
413 return _PyBytesWriter_Finish(&writer, ascii_data);
414 }
415
416
417 static int
binascii_find_valid(const unsigned char * s,Py_ssize_t slen,int num)418 binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
419 {
420 /* Finds & returns the (num+1)th
421 ** valid character for base64, or -1 if none.
422 */
423
424 int ret = -1;
425 unsigned char c, b64val;
426
427 while ((slen > 0) && (ret == -1)) {
428 c = *s;
429 b64val = table_a2b_base64[c & 0x7f];
430 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
431 if (num == 0)
432 ret = *s;
433 num--;
434 }
435
436 s++;
437 slen--;
438 }
439 return ret;
440 }
441
442 /*[clinic input]
443 binascii.a2b_base64
444
445 data: ascii_buffer
446 /
447
448 Decode a line of base64 data.
449 [clinic start generated code]*/
450
451 static PyObject *
binascii_a2b_base64_impl(PyObject * module,Py_buffer * data)452 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
453 /*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
454 {
455 const unsigned char *ascii_data;
456 unsigned char *bin_data;
457 unsigned char *bin_data_start;
458 int leftbits = 0;
459 unsigned char this_ch;
460 unsigned int leftchar = 0;
461 Py_ssize_t ascii_len, bin_len;
462 int quad_pos = 0;
463 _PyBytesWriter writer;
464 binascii_state *state;
465
466 ascii_data = data->buf;
467 ascii_len = data->len;
468
469 assert(ascii_len >= 0);
470
471 if (ascii_len > PY_SSIZE_T_MAX - 3)
472 return PyErr_NoMemory();
473
474 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
475
476 _PyBytesWriter_Init(&writer);
477
478 /* Allocate the buffer */
479 bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
480 if (bin_data == NULL)
481 return NULL;
482 bin_data_start = bin_data;
483
484 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
485 this_ch = *ascii_data;
486
487 if (this_ch > 0x7f ||
488 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
489 continue;
490
491 /* Check for pad sequences and ignore
492 ** the invalid ones.
493 */
494 if (this_ch == BASE64_PAD) {
495 if ( (quad_pos < 2) ||
496 ((quad_pos == 2) &&
497 (binascii_find_valid(ascii_data, ascii_len, 1)
498 != BASE64_PAD)) )
499 {
500 continue;
501 }
502 else {
503 /* A pad sequence means no more input.
504 ** We've already interpreted the data
505 ** from the quad at this point.
506 */
507 leftbits = 0;
508 break;
509 }
510 }
511
512 this_ch = table_a2b_base64[*ascii_data];
513 if ( this_ch == (unsigned char) -1 )
514 continue;
515
516 /*
517 ** Shift it in on the low end, and see if there's
518 ** a byte ready for output.
519 */
520 quad_pos = (quad_pos + 1) & 0x03;
521 leftchar = (leftchar << 6) | (this_ch);
522 leftbits += 6;
523
524 if ( leftbits >= 8 ) {
525 leftbits -= 8;
526 *bin_data++ = (leftchar >> leftbits) & 0xff;
527 leftchar &= ((1 << leftbits) - 1);
528 }
529 }
530
531 if (leftbits != 0) {
532 state = PyModule_GetState(module);
533 if (state == NULL) {
534 return NULL;
535 }
536 if (leftbits == 6) {
537 /*
538 ** There is exactly one extra valid, non-padding, base64 character.
539 ** This is an invalid length, as there is no possible input that
540 ** could encoded into such a base64 string.
541 */
542 PyErr_Format(state->Error,
543 "Invalid base64-encoded string: "
544 "number of data characters (%zd) cannot be 1 more "
545 "than a multiple of 4",
546 (bin_data - bin_data_start) / 3 * 4 + 1);
547 } else {
548 PyErr_SetString(state->Error, "Incorrect padding");
549 }
550 _PyBytesWriter_Dealloc(&writer);
551 return NULL;
552 }
553
554 return _PyBytesWriter_Finish(&writer, bin_data);
555 }
556
557
558 /*[clinic input]
559 binascii.b2a_base64
560
561 data: Py_buffer
562 /
563 *
564 newline: bool(accept={int}) = True
565
566 Base64-code line of data.
567 [clinic start generated code]*/
568
569 static PyObject *
binascii_b2a_base64_impl(PyObject * module,Py_buffer * data,int newline)570 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
571 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
572 {
573 unsigned char *ascii_data;
574 const unsigned char *bin_data;
575 int leftbits = 0;
576 unsigned char this_ch;
577 unsigned int leftchar = 0;
578 Py_ssize_t bin_len, out_len;
579 _PyBytesWriter writer;
580 binascii_state *state;
581
582 bin_data = data->buf;
583 bin_len = data->len;
584 _PyBytesWriter_Init(&writer);
585
586 assert(bin_len >= 0);
587
588 if ( bin_len > BASE64_MAXBIN ) {
589 state = PyModule_GetState(module);
590 if (state == NULL) {
591 return NULL;
592 }
593 PyErr_SetString(state->Error, "Too much data for base64 line");
594 return NULL;
595 }
596
597 /* We're lazy and allocate too much (fixed up later).
598 "+2" leaves room for up to two pad characters.
599 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
600 out_len = bin_len*2 + 2;
601 if (newline)
602 out_len++;
603 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
604 if (ascii_data == NULL)
605 return NULL;
606
607 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
608 /* Shift the data into our buffer */
609 leftchar = (leftchar << 8) | *bin_data;
610 leftbits += 8;
611
612 /* See if there are 6-bit groups ready */
613 while ( leftbits >= 6 ) {
614 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
615 leftbits -= 6;
616 *ascii_data++ = table_b2a_base64[this_ch];
617 }
618 }
619 if ( leftbits == 2 ) {
620 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
621 *ascii_data++ = BASE64_PAD;
622 *ascii_data++ = BASE64_PAD;
623 } else if ( leftbits == 4 ) {
624 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
625 *ascii_data++ = BASE64_PAD;
626 }
627 if (newline)
628 *ascii_data++ = '\n'; /* Append a courtesy newline */
629
630 return _PyBytesWriter_Finish(&writer, ascii_data);
631 }
632
633 /*[clinic input]
634 binascii.a2b_hqx
635
636 data: ascii_buffer
637 /
638
639 Decode .hqx coding.
640 [clinic start generated code]*/
641
642 static PyObject *
binascii_a2b_hqx_impl(PyObject * module,Py_buffer * data)643 binascii_a2b_hqx_impl(PyObject *module, Py_buffer *data)
644 /*[clinic end generated code: output=4d6d8c54d54ea1c1 input=0d914c680e0eed55]*/
645 {
646 const unsigned char *ascii_data;
647 unsigned char *bin_data;
648 int leftbits = 0;
649 unsigned char this_ch;
650 unsigned int leftchar = 0;
651 PyObject *res;
652 Py_ssize_t len;
653 int done = 0;
654 _PyBytesWriter writer;
655 binascii_state *state;
656
657 ascii_data = data->buf;
658 len = data->len;
659 _PyBytesWriter_Init(&writer);
660
661 assert(len >= 0);
662
663 if (len > PY_SSIZE_T_MAX - 2)
664 return PyErr_NoMemory();
665
666 /* Allocate a string that is too big (fixed later)
667 Add two to the initial length to prevent interning which
668 would preclude subsequent resizing. */
669 bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
670 if (bin_data == NULL)
671 return NULL;
672
673 for( ; len > 0 ; len--, ascii_data++ ) {
674 /* Get the byte and look it up */
675 this_ch = table_a2b_hqx[*ascii_data];
676 if ( this_ch == SKIP )
677 continue;
678 if ( this_ch == FAIL ) {
679 state = PyModule_GetState(module);
680 if (state == NULL) {
681 return NULL;
682 }
683 PyErr_SetString(state->Error, "Illegal char");
684 _PyBytesWriter_Dealloc(&writer);
685 return NULL;
686 }
687 if ( this_ch == DONE ) {
688 /* The terminating colon */
689 done = 1;
690 break;
691 }
692
693 /* Shift it into the buffer and see if any bytes are ready */
694 leftchar = (leftchar << 6) | (this_ch);
695 leftbits += 6;
696 if ( leftbits >= 8 ) {
697 leftbits -= 8;
698 *bin_data++ = (leftchar >> leftbits) & 0xff;
699 leftchar &= ((1 << leftbits) - 1);
700 }
701 }
702
703 if ( leftbits && !done ) {
704 state = PyModule_GetState(module);
705 if (state == NULL) {
706 return NULL;
707 }
708 PyErr_SetString(state->Incomplete,
709 "String has incomplete number of bytes");
710 _PyBytesWriter_Dealloc(&writer);
711 return NULL;
712 }
713
714 res = _PyBytesWriter_Finish(&writer, bin_data);
715 if (res == NULL)
716 return NULL;
717 return Py_BuildValue("Ni", res, done);
718 }
719
720
721 /*[clinic input]
722 binascii.rlecode_hqx
723
724 data: Py_buffer
725 /
726
727 Binhex RLE-code binary data.
728 [clinic start generated code]*/
729
730 static PyObject *
binascii_rlecode_hqx_impl(PyObject * module,Py_buffer * data)731 binascii_rlecode_hqx_impl(PyObject *module, Py_buffer *data)
732 /*[clinic end generated code: output=393d79338f5f5629 input=e1f1712447a82b09]*/
733 {
734 const unsigned char *in_data;
735 unsigned char *out_data;
736 unsigned char ch;
737 Py_ssize_t in, inend, len;
738 _PyBytesWriter writer;
739
740 _PyBytesWriter_Init(&writer);
741 in_data = data->buf;
742 len = data->len;
743
744 assert(len >= 0);
745
746 if (len > PY_SSIZE_T_MAX / 2 - 2)
747 return PyErr_NoMemory();
748
749 /* Worst case: output is twice as big as input (fixed later) */
750 out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
751 if (out_data == NULL)
752 return NULL;
753
754 for( in=0; in<len; in++) {
755 ch = in_data[in];
756 if ( ch == RUNCHAR ) {
757 /* RUNCHAR. Escape it. */
758 *out_data++ = RUNCHAR;
759 *out_data++ = 0;
760 } else {
761 /* Check how many following are the same */
762 for(inend=in+1;
763 inend<len && in_data[inend] == ch &&
764 inend < in+255;
765 inend++) ;
766 if ( inend - in > 3 ) {
767 /* More than 3 in a row. Output RLE. */
768 *out_data++ = ch;
769 *out_data++ = RUNCHAR;
770 *out_data++ = (unsigned char) (inend-in);
771 in = inend-1;
772 } else {
773 /* Less than 3. Output the byte itself */
774 *out_data++ = ch;
775 }
776 }
777 }
778
779 return _PyBytesWriter_Finish(&writer, out_data);
780 }
781
782
783 /*[clinic input]
784 binascii.b2a_hqx
785
786 data: Py_buffer
787 /
788
789 Encode .hqx data.
790 [clinic start generated code]*/
791
792 static PyObject *
binascii_b2a_hqx_impl(PyObject * module,Py_buffer * data)793 binascii_b2a_hqx_impl(PyObject *module, Py_buffer *data)
794 /*[clinic end generated code: output=d0aa5a704bc9f7de input=9596ebe019fe12ba]*/
795 {
796 unsigned char *ascii_data;
797 const unsigned char *bin_data;
798 int leftbits = 0;
799 unsigned char this_ch;
800 unsigned int leftchar = 0;
801 Py_ssize_t len;
802 _PyBytesWriter writer;
803
804 bin_data = data->buf;
805 len = data->len;
806 _PyBytesWriter_Init(&writer);
807
808 assert(len >= 0);
809
810 if (len > PY_SSIZE_T_MAX / 2 - 2)
811 return PyErr_NoMemory();
812
813 /* Allocate a buffer that is at least large enough */
814 ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
815 if (ascii_data == NULL)
816 return NULL;
817
818 for( ; len > 0 ; len--, bin_data++ ) {
819 /* Shift into our buffer, and output any 6bits ready */
820 leftchar = (leftchar << 8) | *bin_data;
821 leftbits += 8;
822 while ( leftbits >= 6 ) {
823 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
824 leftbits -= 6;
825 *ascii_data++ = table_b2a_hqx[this_ch];
826 }
827 }
828 /* Output a possible runt byte */
829 if ( leftbits ) {
830 leftchar <<= (6-leftbits);
831 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
832 }
833
834 return _PyBytesWriter_Finish(&writer, ascii_data);
835 }
836
837
838 /*[clinic input]
839 binascii.rledecode_hqx
840
841 data: Py_buffer
842 /
843
844 Decode hexbin RLE-coded string.
845 [clinic start generated code]*/
846
847 static PyObject *
binascii_rledecode_hqx_impl(PyObject * module,Py_buffer * data)848 binascii_rledecode_hqx_impl(PyObject *module, Py_buffer *data)
849 /*[clinic end generated code: output=9826619565de1c6c input=54cdd49fc014402c]*/
850 {
851 const unsigned char *in_data;
852 unsigned char *out_data;
853 unsigned char in_byte, in_repeat;
854 Py_ssize_t in_len;
855 _PyBytesWriter writer;
856
857 in_data = data->buf;
858 in_len = data->len;
859 _PyBytesWriter_Init(&writer);
860 binascii_state *state;
861
862 assert(in_len >= 0);
863
864 /* Empty string is a special case */
865 if ( in_len == 0 )
866 return PyBytes_FromStringAndSize("", 0);
867 else if (in_len > PY_SSIZE_T_MAX / 2)
868 return PyErr_NoMemory();
869
870 /* Allocate a buffer of reasonable size. Resized when needed */
871 out_data = _PyBytesWriter_Alloc(&writer, in_len);
872 if (out_data == NULL)
873 return NULL;
874
875 /* Use overallocation */
876 writer.overallocate = 1;
877
878 /*
879 ** We need two macros here to get/put bytes and handle
880 ** end-of-buffer for input and output strings.
881 */
882 #define INBYTE(b) \
883 do { \
884 if ( --in_len < 0 ) { \
885 state = PyModule_GetState(module); \
886 if (state == NULL) { \
887 return NULL; \
888 } \
889 PyErr_SetString(state->Incomplete, ""); \
890 goto error; \
891 } \
892 b = *in_data++; \
893 } while(0)
894
895 /*
896 ** Handle first byte separately (since we have to get angry
897 ** in case of an orphaned RLE code).
898 */
899 INBYTE(in_byte);
900
901 if (in_byte == RUNCHAR) {
902 INBYTE(in_repeat);
903 /* only 1 byte will be written, but 2 bytes were preallocated:
904 subtract 1 byte to prevent overallocation */
905 writer.min_size--;
906
907 if (in_repeat != 0) {
908 /* Note Error, not Incomplete (which is at the end
909 ** of the string only). This is a programmer error.
910 */
911 state = PyModule_GetState(module);
912 if (state == NULL) {
913 return NULL;
914 }
915 PyErr_SetString(state->Error, "Orphaned RLE code at start");
916 goto error;
917 }
918 *out_data++ = RUNCHAR;
919 } else {
920 *out_data++ = in_byte;
921 }
922
923 while( in_len > 0 ) {
924 INBYTE(in_byte);
925
926 if (in_byte == RUNCHAR) {
927 INBYTE(in_repeat);
928 /* only 1 byte will be written, but 2 bytes were preallocated:
929 subtract 1 byte to prevent overallocation */
930 writer.min_size--;
931
932 if ( in_repeat == 0 ) {
933 /* Just an escaped RUNCHAR value */
934 *out_data++ = RUNCHAR;
935 } else {
936 /* Pick up value and output a sequence of it */
937 in_byte = out_data[-1];
938
939 /* enlarge the buffer if needed */
940 if (in_repeat > 1) {
941 /* -1 because we already preallocated 1 byte */
942 out_data = _PyBytesWriter_Prepare(&writer, out_data,
943 in_repeat - 1);
944 if (out_data == NULL)
945 goto error;
946 }
947
948 while ( --in_repeat > 0 )
949 *out_data++ = in_byte;
950 }
951 } else {
952 /* Normal byte */
953 *out_data++ = in_byte;
954 }
955 }
956 return _PyBytesWriter_Finish(&writer, out_data);
957
958 error:
959 _PyBytesWriter_Dealloc(&writer);
960 return NULL;
961 }
962
963
964 /*[clinic input]
965 binascii.crc_hqx -> unsigned_int
966
967 data: Py_buffer
968 crc: unsigned_int(bitwise=True)
969 /
970
971 Compute CRC-CCITT incrementally.
972 [clinic start generated code]*/
973
974 static unsigned int
binascii_crc_hqx_impl(PyObject * module,Py_buffer * data,unsigned int crc)975 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
976 /*[clinic end generated code: output=8ec2a78590d19170 input=f18240ff8c705b79]*/
977 {
978 const unsigned char *bin_data;
979 Py_ssize_t len;
980
981 crc &= 0xffff;
982 bin_data = data->buf;
983 len = data->len;
984
985 while(len-- > 0) {
986 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
987 }
988
989 return crc;
990 }
991
992 #ifndef USE_ZLIB_CRC32
993 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
994 Also known as: ISO 3307
995 **********************************************************************|
996 * *|
997 * Demonstration program to compute the 32-bit CRC used as the frame *|
998 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
999 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
1000 * protocol). The 32-bit FCS was added via the Federal Register, *|
1001 * 1 June 1982, p.23798. I presume but don't know for certain that *|
1002 * this polynomial is or will be included in CCITT V.41, which *|
1003 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
1004 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
1005 * errors by a factor of 10^-5 over 16-bit FCS. *|
1006 * *|
1007 **********************************************************************|
1008
1009 Copyright (C) 1986 Gary S. Brown. You may use this program, or
1010 code or tables extracted from it, as desired without restriction.
1011
1012 First, the polynomial itself and its table of feedback terms. The
1013 polynomial is
1014 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
1015 Note that we take it "backwards" and put the highest-order term in
1016 the lowest-order bit. The X^32 term is "implied"; the LSB is the
1017 X^31 term, etc. The X^0 term (usually shown as "+1") results in
1018 the MSB being 1.
1019
1020 Note that the usual hardware shift register implementation, which
1021 is what we're using (we're merely optimizing it by doing eight-bit
1022 chunks at a time) shifts bits into the lowest-order term. In our
1023 implementation, that means shifting towards the right. Why do we
1024 do it this way? Because the calculated CRC must be transmitted in
1025 order from highest-order term to lowest-order term. UARTs transmit
1026 characters in order from LSB to MSB. By storing the CRC this way,
1027 we hand it to the UART in the order low-byte to high-byte; the UART
1028 sends each low-bit to hight-bit; and the result is transmission bit
1029 by bit from highest- to lowest-order term without requiring any bit
1030 shuffling on our part. Reception works similarly.
1031
1032 The feedback terms table consists of 256, 32-bit entries. Notes:
1033
1034 1. The table can be generated at runtime if desired; code to do so
1035 is shown later. It might not be obvious, but the feedback
1036 terms simply represent the results of eight shift/xor opera-
1037 tions for all combinations of data and CRC register values.
1038
1039 2. The CRC accumulation logic is the same for all CRC polynomials,
1040 be they sixteen or thirty-two bits wide. You simply choose the
1041 appropriate table. Alternatively, because the table can be
1042 generated at runtime, you can start by generating the table for
1043 the polynomial in question and use exactly the same "updcrc",
1044 if your application needn't simultaneously handle two CRC
1045 polynomials. (Note, however, that XMODEM is strange.)
1046
1047 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
1048 of course, 32-bit entries work OK if the high 16 bits are zero.
1049
1050 4. The values must be right-shifted by eight bits by the "updcrc"
1051 logic; the shift must be unsigned (bring in zeroes). On some
1052 hardware you could probably optimize the shift in assembler by
1053 using byte-swap instructions.
1054 ********************************************************************/
1055
1056 static const unsigned int crc_32_tab[256] = {
1057 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
1058 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
1059 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
1060 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
1061 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
1062 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
1063 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
1064 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
1065 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
1066 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
1067 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
1068 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
1069 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
1070 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
1071 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
1072 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
1073 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
1074 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
1075 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
1076 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
1077 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
1078 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
1079 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
1080 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
1081 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
1082 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
1083 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
1084 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
1085 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
1086 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
1087 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
1088 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
1089 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
1090 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
1091 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
1092 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
1093 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
1094 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
1095 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
1096 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
1097 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1098 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1099 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1100 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1101 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1102 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1103 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1104 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1105 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1106 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1107 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1108 0x2d02ef8dU
1109 };
1110 #endif /* USE_ZLIB_CRC32 */
1111
1112 /*[clinic input]
1113 binascii.crc32 -> unsigned_int
1114
1115 data: Py_buffer
1116 crc: unsigned_int(bitwise=True) = 0
1117 /
1118
1119 Compute CRC-32 incrementally.
1120 [clinic start generated code]*/
1121
1122 static unsigned int
binascii_crc32_impl(PyObject * module,Py_buffer * data,unsigned int crc)1123 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
1124 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
1125
1126 #ifdef USE_ZLIB_CRC32
1127 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
1128 {
1129 const Byte *buf;
1130 Py_ssize_t len;
1131 int signed_val;
1132
1133 buf = (Byte*)data->buf;
1134 len = data->len;
1135 signed_val = crc32(crc, buf, len);
1136 return (unsigned int)signed_val & 0xffffffffU;
1137 }
1138 #else /* USE_ZLIB_CRC32 */
1139 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1140 const unsigned char *bin_data;
1141 Py_ssize_t len;
1142 unsigned int result;
1143
1144 bin_data = data->buf;
1145 len = data->len;
1146
1147 crc = ~ crc;
1148 while (len-- > 0) {
1149 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1150 /* Note: (crc >> 8) MUST zero fill on left */
1151 }
1152
1153 result = (crc ^ 0xFFFFFFFF);
1154 return result & 0xffffffff;
1155 }
1156 #endif /* USE_ZLIB_CRC32 */
1157
1158 /*[clinic input]
1159 binascii.b2a_hex
1160
1161 data: Py_buffer
1162 sep: object = NULL
1163 An optional single character or byte to separate hex bytes.
1164 bytes_per_sep: int = 1
1165 How many bytes between separators. Positive values count from the
1166 right, negative values count from the left.
1167
1168 Hexadecimal representation of binary data.
1169
1170 The return value is a bytes object. This function is also
1171 available as "hexlify()".
1172
1173 Example:
1174 >>> binascii.b2a_hex(b'\xb9\x01\xef')
1175 b'b901ef'
1176 >>> binascii.hexlify(b'\xb9\x01\xef', ':')
1177 b'b9:01:ef'
1178 >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
1179 b'b9_01ef'
1180 [clinic start generated code]*/
1181
1182 static PyObject *
binascii_b2a_hex_impl(PyObject * module,Py_buffer * data,PyObject * sep,int bytes_per_sep)1183 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1184 int bytes_per_sep)
1185 /*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
1186 {
1187 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1188 sep, bytes_per_sep);
1189 }
1190
1191 /*[clinic input]
1192 binascii.hexlify = binascii.b2a_hex
1193
1194 Hexadecimal representation of binary data.
1195
1196 The return value is a bytes object. This function is also
1197 available as "b2a_hex()".
1198 [clinic start generated code]*/
1199
1200 static PyObject *
binascii_hexlify_impl(PyObject * module,Py_buffer * data,PyObject * sep,int bytes_per_sep)1201 binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1202 int bytes_per_sep)
1203 /*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
1204 {
1205 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1206 sep, bytes_per_sep);
1207 }
1208
1209 /*[clinic input]
1210 binascii.a2b_hex
1211
1212 hexstr: ascii_buffer
1213 /
1214
1215 Binary data of hexadecimal representation.
1216
1217 hexstr must contain an even number of hex digits (upper or lower case).
1218 This function is also available as "unhexlify()".
1219 [clinic start generated code]*/
1220
1221 static PyObject *
binascii_a2b_hex_impl(PyObject * module,Py_buffer * hexstr)1222 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
1223 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
1224 {
1225 const char* argbuf;
1226 Py_ssize_t arglen;
1227 PyObject *retval;
1228 char* retbuf;
1229 Py_ssize_t i, j;
1230 binascii_state *state;
1231
1232 argbuf = hexstr->buf;
1233 arglen = hexstr->len;
1234
1235 assert(arglen >= 0);
1236
1237 /* XXX What should we do about strings with an odd length? Should
1238 * we add an implicit leading zero, or a trailing zero? For now,
1239 * raise an exception.
1240 */
1241 if (arglen % 2) {
1242 state = PyModule_GetState(module);
1243 if (state == NULL) {
1244 return NULL;
1245 }
1246 PyErr_SetString(state->Error, "Odd-length string");
1247 return NULL;
1248 }
1249
1250 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1251 if (!retval)
1252 return NULL;
1253 retbuf = PyBytes_AS_STRING(retval);
1254
1255 for (i=j=0; i < arglen; i += 2) {
1256 unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
1257 unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
1258 if (top >= 16 || bot >= 16) {
1259 state = PyModule_GetState(module);
1260 if (state == NULL) {
1261 return NULL;
1262 }
1263 PyErr_SetString(state->Error,
1264 "Non-hexadecimal digit found");
1265 goto finally;
1266 }
1267 retbuf[j++] = (top << 4) + bot;
1268 }
1269 return retval;
1270
1271 finally:
1272 Py_DECREF(retval);
1273 return NULL;
1274 }
1275
1276 /*[clinic input]
1277 binascii.unhexlify = binascii.a2b_hex
1278
1279 Binary data of hexadecimal representation.
1280
1281 hexstr must contain an even number of hex digits (upper or lower case).
1282 [clinic start generated code]*/
1283
1284 static PyObject *
binascii_unhexlify_impl(PyObject * module,Py_buffer * hexstr)1285 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
1286 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
1287 {
1288 return binascii_a2b_hex_impl(module, hexstr);
1289 }
1290
1291 #define MAXLINESIZE 76
1292
1293
1294 /*[clinic input]
1295 binascii.a2b_qp
1296
1297 data: ascii_buffer
1298 header: bool(accept={int}) = False
1299
1300 Decode a string of qp-encoded data.
1301 [clinic start generated code]*/
1302
1303 static PyObject *
binascii_a2b_qp_impl(PyObject * module,Py_buffer * data,int header)1304 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
1305 /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
1306 {
1307 Py_ssize_t in, out;
1308 char ch;
1309 const unsigned char *ascii_data;
1310 unsigned char *odata;
1311 Py_ssize_t datalen = 0;
1312 PyObject *rv;
1313
1314 ascii_data = data->buf;
1315 datalen = data->len;
1316
1317 /* We allocate the output same size as input, this is overkill.
1318 * The previous implementation used calloc() so we'll zero out the
1319 * memory here too, since PyMem_Malloc() does not guarantee that.
1320 */
1321 odata = (unsigned char *) PyMem_Malloc(datalen);
1322 if (odata == NULL) {
1323 PyErr_NoMemory();
1324 return NULL;
1325 }
1326 memset(odata, 0, datalen);
1327
1328 in = out = 0;
1329 while (in < datalen) {
1330 if (ascii_data[in] == '=') {
1331 in++;
1332 if (in >= datalen) break;
1333 /* Soft line breaks */
1334 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
1335 if (ascii_data[in] != '\n') {
1336 while (in < datalen && ascii_data[in] != '\n') in++;
1337 }
1338 if (in < datalen) in++;
1339 }
1340 else if (ascii_data[in] == '=') {
1341 /* broken case from broken python qp */
1342 odata[out++] = '=';
1343 in++;
1344 }
1345 else if ((in + 1 < datalen) &&
1346 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1347 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1348 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1349 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1350 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1351 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1352 /* hexval */
1353 ch = _PyLong_DigitValue[ascii_data[in]] << 4;
1354 in++;
1355 ch |= _PyLong_DigitValue[ascii_data[in]];
1356 in++;
1357 odata[out++] = ch;
1358 }
1359 else {
1360 odata[out++] = '=';
1361 }
1362 }
1363 else if (header && ascii_data[in] == '_') {
1364 odata[out++] = ' ';
1365 in++;
1366 }
1367 else {
1368 odata[out] = ascii_data[in];
1369 in++;
1370 out++;
1371 }
1372 }
1373 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1374 PyMem_Free(odata);
1375 return NULL;
1376 }
1377 PyMem_Free(odata);
1378 return rv;
1379 }
1380
1381 static int
to_hex(unsigned char ch,unsigned char * s)1382 to_hex (unsigned char ch, unsigned char *s)
1383 {
1384 unsigned int uvalue = ch;
1385
1386 s[1] = "0123456789ABCDEF"[uvalue % 16];
1387 uvalue = (uvalue / 16);
1388 s[0] = "0123456789ABCDEF"[uvalue % 16];
1389 return 0;
1390 }
1391
1392 /* XXX: This is ridiculously complicated to be backward compatible
1393 * (mostly) with the quopri module. It doesn't re-create the quopri
1394 * module bug where text ending in CRLF has the CR encoded */
1395
1396 /*[clinic input]
1397 binascii.b2a_qp
1398
1399 data: Py_buffer
1400 quotetabs: bool(accept={int}) = False
1401 istext: bool(accept={int}) = True
1402 header: bool(accept={int}) = False
1403
1404 Encode a string using quoted-printable encoding.
1405
1406 On encoding, when istext is set, newlines are not encoded, and white
1407 space at end of lines is. When istext is not set, \r and \n (CR/LF)
1408 are both encoded. When quotetabs is set, space and tabs are encoded.
1409 [clinic start generated code]*/
1410
1411 static PyObject *
binascii_b2a_qp_impl(PyObject * module,Py_buffer * data,int quotetabs,int istext,int header)1412 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1413 int istext, int header)
1414 /*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
1415 {
1416 Py_ssize_t in, out;
1417 const unsigned char *databuf;
1418 unsigned char *odata;
1419 Py_ssize_t datalen = 0, odatalen = 0;
1420 PyObject *rv;
1421 unsigned int linelen = 0;
1422 unsigned char ch;
1423 int crlf = 0;
1424 const unsigned char *p;
1425
1426 databuf = data->buf;
1427 datalen = data->len;
1428
1429 /* See if this string is using CRLF line ends */
1430 /* XXX: this function has the side effect of converting all of
1431 * the end of lines to be the same depending on this detection
1432 * here */
1433 p = (const unsigned char *) memchr(databuf, '\n', datalen);
1434 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1435 crlf = 1;
1436
1437 /* First, scan to see how many characters need to be encoded */
1438 in = 0;
1439 while (in < datalen) {
1440 Py_ssize_t delta = 0;
1441 if ((databuf[in] > 126) ||
1442 (databuf[in] == '=') ||
1443 (header && databuf[in] == '_') ||
1444 ((databuf[in] == '.') && (linelen == 0) &&
1445 (in + 1 == datalen || databuf[in+1] == '\n' ||
1446 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1447 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1448 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1449 ((databuf[in] < 33) &&
1450 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1451 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1452 {
1453 if ((linelen + 3) >= MAXLINESIZE) {
1454 linelen = 0;
1455 if (crlf)
1456 delta += 3;
1457 else
1458 delta += 2;
1459 }
1460 linelen += 3;
1461 delta += 3;
1462 in++;
1463 }
1464 else {
1465 if (istext &&
1466 ((databuf[in] == '\n') ||
1467 ((in+1 < datalen) && (databuf[in] == '\r') &&
1468 (databuf[in+1] == '\n'))))
1469 {
1470 linelen = 0;
1471 /* Protect against whitespace on end of line */
1472 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1473 delta += 2;
1474 if (crlf)
1475 delta += 2;
1476 else
1477 delta += 1;
1478 if (databuf[in] == '\r')
1479 in += 2;
1480 else
1481 in++;
1482 }
1483 else {
1484 if ((in + 1 != datalen) &&
1485 (databuf[in+1] != '\n') &&
1486 (linelen + 1) >= MAXLINESIZE) {
1487 linelen = 0;
1488 if (crlf)
1489 delta += 3;
1490 else
1491 delta += 2;
1492 }
1493 linelen++;
1494 delta++;
1495 in++;
1496 }
1497 }
1498 if (PY_SSIZE_T_MAX - delta < odatalen) {
1499 PyErr_NoMemory();
1500 return NULL;
1501 }
1502 odatalen += delta;
1503 }
1504
1505 /* We allocate the output same size as input, this is overkill.
1506 * The previous implementation used calloc() so we'll zero out the
1507 * memory here too, since PyMem_Malloc() does not guarantee that.
1508 */
1509 odata = (unsigned char *) PyMem_Malloc(odatalen);
1510 if (odata == NULL) {
1511 PyErr_NoMemory();
1512 return NULL;
1513 }
1514 memset(odata, 0, odatalen);
1515
1516 in = out = linelen = 0;
1517 while (in < datalen) {
1518 if ((databuf[in] > 126) ||
1519 (databuf[in] == '=') ||
1520 (header && databuf[in] == '_') ||
1521 ((databuf[in] == '.') && (linelen == 0) &&
1522 (in + 1 == datalen || databuf[in+1] == '\n' ||
1523 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1524 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1525 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1526 ((databuf[in] < 33) &&
1527 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1528 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1529 {
1530 if ((linelen + 3 )>= MAXLINESIZE) {
1531 odata[out++] = '=';
1532 if (crlf) odata[out++] = '\r';
1533 odata[out++] = '\n';
1534 linelen = 0;
1535 }
1536 odata[out++] = '=';
1537 to_hex(databuf[in], &odata[out]);
1538 out += 2;
1539 in++;
1540 linelen += 3;
1541 }
1542 else {
1543 if (istext &&
1544 ((databuf[in] == '\n') ||
1545 ((in+1 < datalen) && (databuf[in] == '\r') &&
1546 (databuf[in+1] == '\n'))))
1547 {
1548 linelen = 0;
1549 /* Protect against whitespace on end of line */
1550 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1551 ch = odata[out-1];
1552 odata[out-1] = '=';
1553 to_hex(ch, &odata[out]);
1554 out += 2;
1555 }
1556
1557 if (crlf) odata[out++] = '\r';
1558 odata[out++] = '\n';
1559 if (databuf[in] == '\r')
1560 in += 2;
1561 else
1562 in++;
1563 }
1564 else {
1565 if ((in + 1 != datalen) &&
1566 (databuf[in+1] != '\n') &&
1567 (linelen + 1) >= MAXLINESIZE) {
1568 odata[out++] = '=';
1569 if (crlf) odata[out++] = '\r';
1570 odata[out++] = '\n';
1571 linelen = 0;
1572 }
1573 linelen++;
1574 if (header && databuf[in] == ' ') {
1575 odata[out++] = '_';
1576 in++;
1577 }
1578 else {
1579 odata[out++] = databuf[in++];
1580 }
1581 }
1582 }
1583 }
1584 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1585 PyMem_Free(odata);
1586 return NULL;
1587 }
1588 PyMem_Free(odata);
1589 return rv;
1590 }
1591
1592 /* List of functions defined in the module */
1593
1594 static struct PyMethodDef binascii_module_methods[] = {
1595 BINASCII_A2B_UU_METHODDEF
1596 BINASCII_B2A_UU_METHODDEF
1597 BINASCII_A2B_BASE64_METHODDEF
1598 BINASCII_B2A_BASE64_METHODDEF
1599 BINASCII_A2B_HQX_METHODDEF
1600 BINASCII_B2A_HQX_METHODDEF
1601 BINASCII_A2B_HEX_METHODDEF
1602 BINASCII_B2A_HEX_METHODDEF
1603 BINASCII_HEXLIFY_METHODDEF
1604 BINASCII_UNHEXLIFY_METHODDEF
1605 BINASCII_RLECODE_HQX_METHODDEF
1606 BINASCII_RLEDECODE_HQX_METHODDEF
1607 BINASCII_CRC_HQX_METHODDEF
1608 BINASCII_CRC32_METHODDEF
1609 BINASCII_A2B_QP_METHODDEF
1610 BINASCII_B2A_QP_METHODDEF
1611 {NULL, NULL} /* sentinel */
1612 };
1613
1614
1615 /* Initialization function for the module (*must* be called PyInit_binascii) */
1616 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1617
1618 static int
binascii_exec(PyObject * m)1619 binascii_exec(PyObject *m) {
1620 int result;
1621 binascii_state *state = PyModule_GetState(m);
1622 if (state == NULL) {
1623 return -1;
1624 }
1625
1626 state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1627 if (state->Error == NULL) {
1628 return -1;
1629 }
1630 result = PyModule_AddObject(m, "Error", state->Error);
1631 if (result == -1) {
1632 return -1;
1633 }
1634
1635 state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1636 if (state->Incomplete == NULL) {
1637 return -1;
1638 }
1639 result = PyModule_AddObject(m, "Incomplete", state->Incomplete);
1640 if (result == -1) {
1641 return -1;
1642 }
1643
1644 return 0;
1645 }
1646
1647 static PyModuleDef_Slot binascii_slots[] = {
1648 {Py_mod_exec, binascii_exec},
1649 {0, NULL}
1650 };
1651
1652 static struct PyModuleDef binasciimodule = {
1653 PyModuleDef_HEAD_INIT,
1654 "binascii",
1655 doc_binascii,
1656 sizeof(binascii_state),
1657 binascii_module_methods,
1658 binascii_slots,
1659 NULL,
1660 NULL,
1661 NULL
1662 };
1663
1664 PyMODINIT_FUNC
PyInit_binascii(void)1665 PyInit_binascii(void)
1666 {
1667 return PyModuleDef_Init(&binasciimodule);
1668 }
1669