1 /* bytes object implementation */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include "pycore_object.h"
7 #include "pycore_pymem.h"
8 #include "pycore_pystate.h"
9
10 #include "bytes_methods.h"
11 #include "pystrhex.h"
12 #include <stddef.h>
13
14 /*[clinic input]
15 class bytes "PyBytesObject *" "&PyBytes_Type"
16 [clinic start generated code]*/
17 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
18
19 #include "clinic/bytesobject.c.h"
20
21 #ifdef COUNT_ALLOCS
22 Py_ssize_t _Py_null_strings, _Py_one_strings;
23 #endif
24
25 static PyBytesObject *characters[UCHAR_MAX + 1];
26 static PyBytesObject *nullstring;
27
28 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
29 for a string of length n should request PyBytesObject_SIZE + n bytes.
30
31 Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32 3 bytes per string allocation on a typical system.
33 */
34 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35
36 /* Forward declaration */
37 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
38 char *str);
39
40 /*
41 For PyBytes_FromString(), the parameter `str' points to a null-terminated
42 string containing exactly `size' bytes.
43
44 For PyBytes_FromStringAndSize(), the parameter `str' is
45 either NULL or else points to a string containing at least `size' bytes.
46 For PyBytes_FromStringAndSize(), the string in the `str' parameter does
47 not have to be null-terminated. (Therefore it is safe to construct a
48 substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
49 If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
50 bytes (setting the last byte to the null terminating character) and you can
51 fill in the data yourself. If `str' is non-NULL then the resulting
52 PyBytes object must be treated as immutable and you must not fill in nor
53 alter the data yourself, since the strings may be shared.
54
55 The PyObject member `op->ob_size', which denotes the number of "extra
56 items" in a variable-size object, will contain the number of bytes
57 allocated for string data, not counting the null terminating character.
58 It is therefore equal to the `size' parameter (for
59 PyBytes_FromStringAndSize()) or the length of the string in the `str'
60 parameter (for PyBytes_FromString()).
61 */
62 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)63 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
64 {
65 PyBytesObject *op;
66 assert(size >= 0);
67
68 if (size == 0 && (op = nullstring) != NULL) {
69 #ifdef COUNT_ALLOCS
70 _Py_null_strings++;
71 #endif
72 Py_INCREF(op);
73 return (PyObject *)op;
74 }
75
76 if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
77 PyErr_SetString(PyExc_OverflowError,
78 "byte string is too large");
79 return NULL;
80 }
81
82 /* Inline PyObject_NewVar */
83 if (use_calloc)
84 op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
85 else
86 op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
87 if (op == NULL)
88 return PyErr_NoMemory();
89 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
90 op->ob_shash = -1;
91 if (!use_calloc)
92 op->ob_sval[size] = '\0';
93 /* empty byte string singleton */
94 if (size == 0) {
95 nullstring = op;
96 Py_INCREF(op);
97 }
98 return (PyObject *) op;
99 }
100
101 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)102 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
103 {
104 PyBytesObject *op;
105 if (size < 0) {
106 PyErr_SetString(PyExc_SystemError,
107 "Negative size passed to PyBytes_FromStringAndSize");
108 return NULL;
109 }
110 if (size == 1 && str != NULL &&
111 (op = characters[*str & UCHAR_MAX]) != NULL)
112 {
113 #ifdef COUNT_ALLOCS
114 _Py_one_strings++;
115 #endif
116 Py_INCREF(op);
117 return (PyObject *)op;
118 }
119
120 op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
121 if (op == NULL)
122 return NULL;
123 if (str == NULL)
124 return (PyObject *) op;
125
126 memcpy(op->ob_sval, str, size);
127 /* share short strings */
128 if (size == 1) {
129 characters[*str & UCHAR_MAX] = op;
130 Py_INCREF(op);
131 }
132 return (PyObject *) op;
133 }
134
135 PyObject *
PyBytes_FromString(const char * str)136 PyBytes_FromString(const char *str)
137 {
138 size_t size;
139 PyBytesObject *op;
140
141 assert(str != NULL);
142 size = strlen(str);
143 if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
144 PyErr_SetString(PyExc_OverflowError,
145 "byte string is too long");
146 return NULL;
147 }
148 if (size == 0 && (op = nullstring) != NULL) {
149 #ifdef COUNT_ALLOCS
150 _Py_null_strings++;
151 #endif
152 Py_INCREF(op);
153 return (PyObject *)op;
154 }
155 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
156 #ifdef COUNT_ALLOCS
157 _Py_one_strings++;
158 #endif
159 Py_INCREF(op);
160 return (PyObject *)op;
161 }
162
163 /* Inline PyObject_NewVar */
164 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
165 if (op == NULL)
166 return PyErr_NoMemory();
167 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
168 op->ob_shash = -1;
169 memcpy(op->ob_sval, str, size+1);
170 /* share short strings */
171 if (size == 0) {
172 nullstring = op;
173 Py_INCREF(op);
174 } else if (size == 1) {
175 characters[*str & UCHAR_MAX] = op;
176 Py_INCREF(op);
177 }
178 return (PyObject *) op;
179 }
180
181 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)182 PyBytes_FromFormatV(const char *format, va_list vargs)
183 {
184 char *s;
185 const char *f;
186 const char *p;
187 Py_ssize_t prec;
188 int longflag;
189 int size_tflag;
190 /* Longest 64-bit formatted numbers:
191 - "18446744073709551615\0" (21 bytes)
192 - "-9223372036854775808\0" (21 bytes)
193 Decimal takes the most space (it isn't enough for octal.)
194
195 Longest 64-bit pointer representation:
196 "0xffffffffffffffff\0" (19 bytes). */
197 char buffer[21];
198 _PyBytesWriter writer;
199
200 _PyBytesWriter_Init(&writer);
201
202 s = _PyBytesWriter_Alloc(&writer, strlen(format));
203 if (s == NULL)
204 return NULL;
205 writer.overallocate = 1;
206
207 #define WRITE_BYTES(str) \
208 do { \
209 s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
210 if (s == NULL) \
211 goto error; \
212 } while (0)
213
214 for (f = format; *f; f++) {
215 if (*f != '%') {
216 *s++ = *f;
217 continue;
218 }
219
220 p = f++;
221
222 /* ignore the width (ex: 10 in "%10s") */
223 while (Py_ISDIGIT(*f))
224 f++;
225
226 /* parse the precision (ex: 10 in "%.10s") */
227 prec = 0;
228 if (*f == '.') {
229 f++;
230 for (; Py_ISDIGIT(*f); f++) {
231 prec = (prec * 10) + (*f - '0');
232 }
233 }
234
235 while (*f && *f != '%' && !Py_ISALPHA(*f))
236 f++;
237
238 /* handle the long flag ('l'), but only for %ld and %lu.
239 others can be added when necessary. */
240 longflag = 0;
241 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
242 longflag = 1;
243 ++f;
244 }
245
246 /* handle the size_t flag ('z'). */
247 size_tflag = 0;
248 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
249 size_tflag = 1;
250 ++f;
251 }
252
253 /* subtract bytes preallocated for the format string
254 (ex: 2 for "%s") */
255 writer.min_size -= (f - p + 1);
256
257 switch (*f) {
258 case 'c':
259 {
260 int c = va_arg(vargs, int);
261 if (c < 0 || c > 255) {
262 PyErr_SetString(PyExc_OverflowError,
263 "PyBytes_FromFormatV(): %c format "
264 "expects an integer in range [0; 255]");
265 goto error;
266 }
267 writer.min_size++;
268 *s++ = (unsigned char)c;
269 break;
270 }
271
272 case 'd':
273 if (longflag)
274 sprintf(buffer, "%ld", va_arg(vargs, long));
275 else if (size_tflag)
276 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
277 va_arg(vargs, Py_ssize_t));
278 else
279 sprintf(buffer, "%d", va_arg(vargs, int));
280 assert(strlen(buffer) < sizeof(buffer));
281 WRITE_BYTES(buffer);
282 break;
283
284 case 'u':
285 if (longflag)
286 sprintf(buffer, "%lu",
287 va_arg(vargs, unsigned long));
288 else if (size_tflag)
289 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
290 va_arg(vargs, size_t));
291 else
292 sprintf(buffer, "%u",
293 va_arg(vargs, unsigned int));
294 assert(strlen(buffer) < sizeof(buffer));
295 WRITE_BYTES(buffer);
296 break;
297
298 case 'i':
299 sprintf(buffer, "%i", va_arg(vargs, int));
300 assert(strlen(buffer) < sizeof(buffer));
301 WRITE_BYTES(buffer);
302 break;
303
304 case 'x':
305 sprintf(buffer, "%x", va_arg(vargs, int));
306 assert(strlen(buffer) < sizeof(buffer));
307 WRITE_BYTES(buffer);
308 break;
309
310 case 's':
311 {
312 Py_ssize_t i;
313
314 p = va_arg(vargs, const char*);
315 if (prec <= 0) {
316 i = strlen(p);
317 }
318 else {
319 i = 0;
320 while (i < prec && p[i]) {
321 i++;
322 }
323 }
324 s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
325 if (s == NULL)
326 goto error;
327 break;
328 }
329
330 case 'p':
331 sprintf(buffer, "%p", va_arg(vargs, void*));
332 assert(strlen(buffer) < sizeof(buffer));
333 /* %p is ill-defined: ensure leading 0x. */
334 if (buffer[1] == 'X')
335 buffer[1] = 'x';
336 else if (buffer[1] != 'x') {
337 memmove(buffer+2, buffer, strlen(buffer)+1);
338 buffer[0] = '0';
339 buffer[1] = 'x';
340 }
341 WRITE_BYTES(buffer);
342 break;
343
344 case '%':
345 writer.min_size++;
346 *s++ = '%';
347 break;
348
349 default:
350 if (*f == 0) {
351 /* fix min_size if we reached the end of the format string */
352 writer.min_size++;
353 }
354
355 /* invalid format string: copy unformatted string and exit */
356 WRITE_BYTES(p);
357 return _PyBytesWriter_Finish(&writer, s);
358 }
359 }
360
361 #undef WRITE_BYTES
362
363 return _PyBytesWriter_Finish(&writer, s);
364
365 error:
366 _PyBytesWriter_Dealloc(&writer);
367 return NULL;
368 }
369
370 PyObject *
PyBytes_FromFormat(const char * format,...)371 PyBytes_FromFormat(const char *format, ...)
372 {
373 PyObject* ret;
374 va_list vargs;
375
376 #ifdef HAVE_STDARG_PROTOTYPES
377 va_start(vargs, format);
378 #else
379 va_start(vargs);
380 #endif
381 ret = PyBytes_FromFormatV(format, vargs);
382 va_end(vargs);
383 return ret;
384 }
385
386 /* Helpers for formatstring */
387
388 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)389 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
390 {
391 Py_ssize_t argidx = *p_argidx;
392 if (argidx < arglen) {
393 (*p_argidx)++;
394 if (arglen < 0)
395 return args;
396 else
397 return PyTuple_GetItem(args, argidx);
398 }
399 PyErr_SetString(PyExc_TypeError,
400 "not enough arguments for format string");
401 return NULL;
402 }
403
404 /* Format codes
405 * F_LJUST '-'
406 * F_SIGN '+'
407 * F_BLANK ' '
408 * F_ALT '#'
409 * F_ZERO '0'
410 */
411 #define F_LJUST (1<<0)
412 #define F_SIGN (1<<1)
413 #define F_BLANK (1<<2)
414 #define F_ALT (1<<3)
415 #define F_ZERO (1<<4)
416
417 /* Returns a new reference to a PyBytes object, or NULL on failure. */
418
419 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)420 formatfloat(PyObject *v, int flags, int prec, int type,
421 PyObject **p_result, _PyBytesWriter *writer, char *str)
422 {
423 char *p;
424 PyObject *result;
425 double x;
426 size_t len;
427
428 x = PyFloat_AsDouble(v);
429 if (x == -1.0 && PyErr_Occurred()) {
430 PyErr_Format(PyExc_TypeError, "float argument required, "
431 "not %.200s", Py_TYPE(v)->tp_name);
432 return NULL;
433 }
434
435 if (prec < 0)
436 prec = 6;
437
438 p = PyOS_double_to_string(x, type, prec,
439 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
440
441 if (p == NULL)
442 return NULL;
443
444 len = strlen(p);
445 if (writer != NULL) {
446 str = _PyBytesWriter_Prepare(writer, str, len);
447 if (str == NULL)
448 return NULL;
449 memcpy(str, p, len);
450 PyMem_Free(p);
451 str += len;
452 return str;
453 }
454
455 result = PyBytes_FromStringAndSize(p, len);
456 PyMem_Free(p);
457 *p_result = result;
458 return result != NULL ? str : NULL;
459 }
460
461 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)462 formatlong(PyObject *v, int flags, int prec, int type)
463 {
464 PyObject *result, *iobj;
465 if (type == 'i')
466 type = 'd';
467 if (PyLong_Check(v))
468 return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
469 if (PyNumber_Check(v)) {
470 /* make sure number is a type of integer for o, x, and X */
471 if (type == 'o' || type == 'x' || type == 'X')
472 iobj = PyNumber_Index(v);
473 else
474 iobj = PyNumber_Long(v);
475 if (iobj == NULL) {
476 if (!PyErr_ExceptionMatches(PyExc_TypeError))
477 return NULL;
478 }
479 else if (!PyLong_Check(iobj))
480 Py_CLEAR(iobj);
481 if (iobj != NULL) {
482 result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483 Py_DECREF(iobj);
484 return result;
485 }
486 }
487 PyErr_Format(PyExc_TypeError,
488 "%%%c format: %s is required, not %.200s", type,
489 (type == 'o' || type == 'x' || type == 'X') ? "an integer"
490 : "a number",
491 Py_TYPE(v)->tp_name);
492 return NULL;
493 }
494
495 static int
byte_converter(PyObject * arg,char * p)496 byte_converter(PyObject *arg, char *p)
497 {
498 if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
499 *p = PyBytes_AS_STRING(arg)[0];
500 return 1;
501 }
502 else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
503 *p = PyByteArray_AS_STRING(arg)[0];
504 return 1;
505 }
506 else {
507 PyObject *iobj;
508 long ival;
509 int overflow;
510 /* make sure number is a type of integer */
511 if (PyLong_Check(arg)) {
512 ival = PyLong_AsLongAndOverflow(arg, &overflow);
513 }
514 else {
515 iobj = PyNumber_Index(arg);
516 if (iobj == NULL) {
517 if (!PyErr_ExceptionMatches(PyExc_TypeError))
518 return 0;
519 goto onError;
520 }
521 ival = PyLong_AsLongAndOverflow(iobj, &overflow);
522 Py_DECREF(iobj);
523 }
524 if (!overflow && ival == -1 && PyErr_Occurred())
525 goto onError;
526 if (overflow || !(0 <= ival && ival <= 255)) {
527 PyErr_SetString(PyExc_OverflowError,
528 "%c arg not in range(256)");
529 return 0;
530 }
531 *p = (char)ival;
532 return 1;
533 }
534 onError:
535 PyErr_SetString(PyExc_TypeError,
536 "%c requires an integer in range(256) or a single byte");
537 return 0;
538 }
539
540 static PyObject *_PyBytes_FromBuffer(PyObject *x);
541
542 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)543 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
544 {
545 PyObject *func, *result;
546 _Py_IDENTIFIER(__bytes__);
547 /* is it a bytes object? */
548 if (PyBytes_Check(v)) {
549 *pbuf = PyBytes_AS_STRING(v);
550 *plen = PyBytes_GET_SIZE(v);
551 Py_INCREF(v);
552 return v;
553 }
554 if (PyByteArray_Check(v)) {
555 *pbuf = PyByteArray_AS_STRING(v);
556 *plen = PyByteArray_GET_SIZE(v);
557 Py_INCREF(v);
558 return v;
559 }
560 /* does it support __bytes__? */
561 func = _PyObject_LookupSpecial(v, &PyId___bytes__);
562 if (func != NULL) {
563 result = _PyObject_CallNoArg(func);
564 Py_DECREF(func);
565 if (result == NULL)
566 return NULL;
567 if (!PyBytes_Check(result)) {
568 PyErr_Format(PyExc_TypeError,
569 "__bytes__ returned non-bytes (type %.200s)",
570 Py_TYPE(result)->tp_name);
571 Py_DECREF(result);
572 return NULL;
573 }
574 *pbuf = PyBytes_AS_STRING(result);
575 *plen = PyBytes_GET_SIZE(result);
576 return result;
577 }
578 /* does it support buffer protocol? */
579 if (PyObject_CheckBuffer(v)) {
580 /* maybe we can avoid making a copy of the buffer object here? */
581 result = _PyBytes_FromBuffer(v);
582 if (result == NULL)
583 return NULL;
584 *pbuf = PyBytes_AS_STRING(result);
585 *plen = PyBytes_GET_SIZE(result);
586 return result;
587 }
588 PyErr_Format(PyExc_TypeError,
589 "%%b requires a bytes-like object, "
590 "or an object that implements __bytes__, not '%.100s'",
591 Py_TYPE(v)->tp_name);
592 return NULL;
593 }
594
595 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
596
597 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)598 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599 PyObject *args, int use_bytearray)
600 {
601 const char *fmt;
602 char *res;
603 Py_ssize_t arglen, argidx;
604 Py_ssize_t fmtcnt;
605 int args_owned = 0;
606 PyObject *dict = NULL;
607 _PyBytesWriter writer;
608
609 if (args == NULL) {
610 PyErr_BadInternalCall();
611 return NULL;
612 }
613 fmt = format;
614 fmtcnt = format_len;
615
616 _PyBytesWriter_Init(&writer);
617 writer.use_bytearray = use_bytearray;
618
619 res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620 if (res == NULL)
621 return NULL;
622 if (!use_bytearray)
623 writer.overallocate = 1;
624
625 if (PyTuple_Check(args)) {
626 arglen = PyTuple_GET_SIZE(args);
627 argidx = 0;
628 }
629 else {
630 arglen = -1;
631 argidx = -2;
632 }
633 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634 !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635 !PyByteArray_Check(args)) {
636 dict = args;
637 }
638
639 while (--fmtcnt >= 0) {
640 if (*fmt != '%') {
641 Py_ssize_t len;
642 char *pos;
643
644 pos = (char *)memchr(fmt + 1, '%', fmtcnt);
645 if (pos != NULL)
646 len = pos - fmt;
647 else
648 len = fmtcnt + 1;
649 assert(len != 0);
650
651 memcpy(res, fmt, len);
652 res += len;
653 fmt += len;
654 fmtcnt -= (len - 1);
655 }
656 else {
657 /* Got a format specifier */
658 int flags = 0;
659 Py_ssize_t width = -1;
660 int prec = -1;
661 int c = '\0';
662 int fill;
663 PyObject *v = NULL;
664 PyObject *temp = NULL;
665 const char *pbuf = NULL;
666 int sign;
667 Py_ssize_t len = 0;
668 char onechar; /* For byte_converter() */
669 Py_ssize_t alloc;
670
671 fmt++;
672 if (*fmt == '%') {
673 *res++ = '%';
674 fmt++;
675 fmtcnt--;
676 continue;
677 }
678 if (*fmt == '(') {
679 const char *keystart;
680 Py_ssize_t keylen;
681 PyObject *key;
682 int pcount = 1;
683
684 if (dict == NULL) {
685 PyErr_SetString(PyExc_TypeError,
686 "format requires a mapping");
687 goto error;
688 }
689 ++fmt;
690 --fmtcnt;
691 keystart = fmt;
692 /* Skip over balanced parentheses */
693 while (pcount > 0 && --fmtcnt >= 0) {
694 if (*fmt == ')')
695 --pcount;
696 else if (*fmt == '(')
697 ++pcount;
698 fmt++;
699 }
700 keylen = fmt - keystart - 1;
701 if (fmtcnt < 0 || pcount > 0) {
702 PyErr_SetString(PyExc_ValueError,
703 "incomplete format key");
704 goto error;
705 }
706 key = PyBytes_FromStringAndSize(keystart,
707 keylen);
708 if (key == NULL)
709 goto error;
710 if (args_owned) {
711 Py_DECREF(args);
712 args_owned = 0;
713 }
714 args = PyObject_GetItem(dict, key);
715 Py_DECREF(key);
716 if (args == NULL) {
717 goto error;
718 }
719 args_owned = 1;
720 arglen = -1;
721 argidx = -2;
722 }
723
724 /* Parse flags. Example: "%+i" => flags=F_SIGN. */
725 while (--fmtcnt >= 0) {
726 switch (c = *fmt++) {
727 case '-': flags |= F_LJUST; continue;
728 case '+': flags |= F_SIGN; continue;
729 case ' ': flags |= F_BLANK; continue;
730 case '#': flags |= F_ALT; continue;
731 case '0': flags |= F_ZERO; continue;
732 }
733 break;
734 }
735
736 /* Parse width. Example: "%10s" => width=10 */
737 if (c == '*') {
738 v = getnextarg(args, arglen, &argidx);
739 if (v == NULL)
740 goto error;
741 if (!PyLong_Check(v)) {
742 PyErr_SetString(PyExc_TypeError,
743 "* wants int");
744 goto error;
745 }
746 width = PyLong_AsSsize_t(v);
747 if (width == -1 && PyErr_Occurred())
748 goto error;
749 if (width < 0) {
750 flags |= F_LJUST;
751 width = -width;
752 }
753 if (--fmtcnt >= 0)
754 c = *fmt++;
755 }
756 else if (c >= 0 && isdigit(c)) {
757 width = c - '0';
758 while (--fmtcnt >= 0) {
759 c = Py_CHARMASK(*fmt++);
760 if (!isdigit(c))
761 break;
762 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
763 PyErr_SetString(
764 PyExc_ValueError,
765 "width too big");
766 goto error;
767 }
768 width = width*10 + (c - '0');
769 }
770 }
771
772 /* Parse precision. Example: "%.3f" => prec=3 */
773 if (c == '.') {
774 prec = 0;
775 if (--fmtcnt >= 0)
776 c = *fmt++;
777 if (c == '*') {
778 v = getnextarg(args, arglen, &argidx);
779 if (v == NULL)
780 goto error;
781 if (!PyLong_Check(v)) {
782 PyErr_SetString(
783 PyExc_TypeError,
784 "* wants int");
785 goto error;
786 }
787 prec = _PyLong_AsInt(v);
788 if (prec == -1 && PyErr_Occurred())
789 goto error;
790 if (prec < 0)
791 prec = 0;
792 if (--fmtcnt >= 0)
793 c = *fmt++;
794 }
795 else if (c >= 0 && isdigit(c)) {
796 prec = c - '0';
797 while (--fmtcnt >= 0) {
798 c = Py_CHARMASK(*fmt++);
799 if (!isdigit(c))
800 break;
801 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
802 PyErr_SetString(
803 PyExc_ValueError,
804 "prec too big");
805 goto error;
806 }
807 prec = prec*10 + (c - '0');
808 }
809 }
810 } /* prec */
811 if (fmtcnt >= 0) {
812 if (c == 'h' || c == 'l' || c == 'L') {
813 if (--fmtcnt >= 0)
814 c = *fmt++;
815 }
816 }
817 if (fmtcnt < 0) {
818 PyErr_SetString(PyExc_ValueError,
819 "incomplete format");
820 goto error;
821 }
822 v = getnextarg(args, arglen, &argidx);
823 if (v == NULL)
824 goto error;
825
826 if (fmtcnt == 0) {
827 /* last write: disable writer overallocation */
828 writer.overallocate = 0;
829 }
830
831 sign = 0;
832 fill = ' ';
833 switch (c) {
834 case 'r':
835 // %r is only for 2/3 code; 3 only code should use %a
836 case 'a':
837 temp = PyObject_ASCII(v);
838 if (temp == NULL)
839 goto error;
840 assert(PyUnicode_IS_ASCII(temp));
841 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
842 len = PyUnicode_GET_LENGTH(temp);
843 if (prec >= 0 && len > prec)
844 len = prec;
845 break;
846
847 case 's':
848 // %s is only for 2/3 code; 3 only code should use %b
849 case 'b':
850 temp = format_obj(v, &pbuf, &len);
851 if (temp == NULL)
852 goto error;
853 if (prec >= 0 && len > prec)
854 len = prec;
855 break;
856
857 case 'i':
858 case 'd':
859 case 'u':
860 case 'o':
861 case 'x':
862 case 'X':
863 if (PyLong_CheckExact(v)
864 && width == -1 && prec == -1
865 && !(flags & (F_SIGN | F_BLANK))
866 && c != 'X')
867 {
868 /* Fast path */
869 int alternate = flags & F_ALT;
870 int base;
871
872 switch(c)
873 {
874 default:
875 Py_UNREACHABLE();
876 case 'd':
877 case 'i':
878 case 'u':
879 base = 10;
880 break;
881 case 'o':
882 base = 8;
883 break;
884 case 'x':
885 case 'X':
886 base = 16;
887 break;
888 }
889
890 /* Fast path */
891 writer.min_size -= 2; /* size preallocated for "%d" */
892 res = _PyLong_FormatBytesWriter(&writer, res,
893 v, base, alternate);
894 if (res == NULL)
895 goto error;
896 continue;
897 }
898
899 temp = formatlong(v, flags, prec, c);
900 if (!temp)
901 goto error;
902 assert(PyUnicode_IS_ASCII(temp));
903 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
904 len = PyUnicode_GET_LENGTH(temp);
905 sign = 1;
906 if (flags & F_ZERO)
907 fill = '0';
908 break;
909
910 case 'e':
911 case 'E':
912 case 'f':
913 case 'F':
914 case 'g':
915 case 'G':
916 if (width == -1 && prec == -1
917 && !(flags & (F_SIGN | F_BLANK)))
918 {
919 /* Fast path */
920 writer.min_size -= 2; /* size preallocated for "%f" */
921 res = formatfloat(v, flags, prec, c, NULL, &writer, res);
922 if (res == NULL)
923 goto error;
924 continue;
925 }
926
927 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
928 goto error;
929 pbuf = PyBytes_AS_STRING(temp);
930 len = PyBytes_GET_SIZE(temp);
931 sign = 1;
932 if (flags & F_ZERO)
933 fill = '0';
934 break;
935
936 case 'c':
937 pbuf = &onechar;
938 len = byte_converter(v, &onechar);
939 if (!len)
940 goto error;
941 if (width == -1) {
942 /* Fast path */
943 *res++ = onechar;
944 continue;
945 }
946 break;
947
948 default:
949 PyErr_Format(PyExc_ValueError,
950 "unsupported format character '%c' (0x%x) "
951 "at index %zd",
952 c, c,
953 (Py_ssize_t)(fmt - 1 - format));
954 goto error;
955 }
956
957 if (sign) {
958 if (*pbuf == '-' || *pbuf == '+') {
959 sign = *pbuf++;
960 len--;
961 }
962 else if (flags & F_SIGN)
963 sign = '+';
964 else if (flags & F_BLANK)
965 sign = ' ';
966 else
967 sign = 0;
968 }
969 if (width < len)
970 width = len;
971
972 alloc = width;
973 if (sign != 0 && len == width)
974 alloc++;
975 /* 2: size preallocated for %s */
976 if (alloc > 2) {
977 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
978 if (res == NULL)
979 goto error;
980 }
981 #ifndef NDEBUG
982 char *before = res;
983 #endif
984
985 /* Write the sign if needed */
986 if (sign) {
987 if (fill != ' ')
988 *res++ = sign;
989 if (width > len)
990 width--;
991 }
992
993 /* Write the numeric prefix for "x", "X" and "o" formats
994 if the alternate form is used.
995 For example, write "0x" for the "%#x" format. */
996 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
997 assert(pbuf[0] == '0');
998 assert(pbuf[1] == c);
999 if (fill != ' ') {
1000 *res++ = *pbuf++;
1001 *res++ = *pbuf++;
1002 }
1003 width -= 2;
1004 if (width < 0)
1005 width = 0;
1006 len -= 2;
1007 }
1008
1009 /* Pad left with the fill character if needed */
1010 if (width > len && !(flags & F_LJUST)) {
1011 memset(res, fill, width - len);
1012 res += (width - len);
1013 width = len;
1014 }
1015
1016 /* If padding with spaces: write sign if needed and/or numeric
1017 prefix if the alternate form is used */
1018 if (fill == ' ') {
1019 if (sign)
1020 *res++ = sign;
1021 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1022 assert(pbuf[0] == '0');
1023 assert(pbuf[1] == c);
1024 *res++ = *pbuf++;
1025 *res++ = *pbuf++;
1026 }
1027 }
1028
1029 /* Copy bytes */
1030 memcpy(res, pbuf, len);
1031 res += len;
1032
1033 /* Pad right with the fill character if needed */
1034 if (width > len) {
1035 memset(res, ' ', width - len);
1036 res += (width - len);
1037 }
1038
1039 if (dict && (argidx < arglen)) {
1040 PyErr_SetString(PyExc_TypeError,
1041 "not all arguments converted during bytes formatting");
1042 Py_XDECREF(temp);
1043 goto error;
1044 }
1045 Py_XDECREF(temp);
1046
1047 #ifndef NDEBUG
1048 /* check that we computed the exact size for this write */
1049 assert((res - before) == alloc);
1050 #endif
1051 } /* '%' */
1052
1053 /* If overallocation was disabled, ensure that it was the last
1054 write. Otherwise, we missed an optimization */
1055 assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1056 } /* until end */
1057
1058 if (argidx < arglen && !dict) {
1059 PyErr_SetString(PyExc_TypeError,
1060 "not all arguments converted during bytes formatting");
1061 goto error;
1062 }
1063
1064 if (args_owned) {
1065 Py_DECREF(args);
1066 }
1067 return _PyBytesWriter_Finish(&writer, res);
1068
1069 error:
1070 _PyBytesWriter_Dealloc(&writer);
1071 if (args_owned) {
1072 Py_DECREF(args);
1073 }
1074 return NULL;
1075 }
1076
1077 /* Unescape a backslash-escaped string. If unicode is non-zero,
1078 the string is a u-literal. If recode_encoding is non-zero,
1079 the string is UTF-8 encoded and should be re-encoded in the
1080 specified encoding. */
1081
1082 static char *
_PyBytes_DecodeEscapeRecode(const char ** s,const char * end,const char * errors,const char * recode_encoding,_PyBytesWriter * writer,char * p)1083 _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1084 const char *errors, const char *recode_encoding,
1085 _PyBytesWriter *writer, char *p)
1086 {
1087 PyObject *u, *w;
1088 const char* t;
1089
1090 t = *s;
1091 /* Decode non-ASCII bytes as UTF-8. */
1092 while (t < end && (*t & 0x80))
1093 t++;
1094 u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1095 if (u == NULL)
1096 return NULL;
1097
1098 /* Recode them in target encoding. */
1099 w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1100 Py_DECREF(u);
1101 if (w == NULL)
1102 return NULL;
1103 assert(PyBytes_Check(w));
1104
1105 /* Append bytes to output buffer. */
1106 writer->min_size--; /* subtract 1 preallocated byte */
1107 p = _PyBytesWriter_WriteBytes(writer, p,
1108 PyBytes_AS_STRING(w),
1109 PyBytes_GET_SIZE(w));
1110 Py_DECREF(w);
1111 if (p == NULL)
1112 return NULL;
1113
1114 *s = t;
1115 return p;
1116 }
1117
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding,const char ** first_invalid_escape)1118 PyObject *_PyBytes_DecodeEscape(const char *s,
1119 Py_ssize_t len,
1120 const char *errors,
1121 Py_ssize_t unicode,
1122 const char *recode_encoding,
1123 const char **first_invalid_escape)
1124 {
1125 int c;
1126 char *p;
1127 const char *end;
1128 _PyBytesWriter writer;
1129
1130 _PyBytesWriter_Init(&writer);
1131
1132 p = _PyBytesWriter_Alloc(&writer, len);
1133 if (p == NULL)
1134 return NULL;
1135 writer.overallocate = 1;
1136
1137 *first_invalid_escape = NULL;
1138
1139 end = s + len;
1140 while (s < end) {
1141 if (*s != '\\') {
1142 non_esc:
1143 if (!(recode_encoding && (*s & 0x80))) {
1144 *p++ = *s++;
1145 }
1146 else {
1147 /* non-ASCII character and need to recode */
1148 p = _PyBytes_DecodeEscapeRecode(&s, end,
1149 errors, recode_encoding,
1150 &writer, p);
1151 if (p == NULL)
1152 goto failed;
1153 }
1154 continue;
1155 }
1156
1157 s++;
1158 if (s == end) {
1159 PyErr_SetString(PyExc_ValueError,
1160 "Trailing \\ in string");
1161 goto failed;
1162 }
1163
1164 switch (*s++) {
1165 /* XXX This assumes ASCII! */
1166 case '\n': break;
1167 case '\\': *p++ = '\\'; break;
1168 case '\'': *p++ = '\''; break;
1169 case '\"': *p++ = '\"'; break;
1170 case 'b': *p++ = '\b'; break;
1171 case 'f': *p++ = '\014'; break; /* FF */
1172 case 't': *p++ = '\t'; break;
1173 case 'n': *p++ = '\n'; break;
1174 case 'r': *p++ = '\r'; break;
1175 case 'v': *p++ = '\013'; break; /* VT */
1176 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1177 case '0': case '1': case '2': case '3':
1178 case '4': case '5': case '6': case '7':
1179 c = s[-1] - '0';
1180 if (s < end && '0' <= *s && *s <= '7') {
1181 c = (c<<3) + *s++ - '0';
1182 if (s < end && '0' <= *s && *s <= '7')
1183 c = (c<<3) + *s++ - '0';
1184 }
1185 *p++ = c;
1186 break;
1187 case 'x':
1188 if (s+1 < end) {
1189 int digit1, digit2;
1190 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1191 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1192 if (digit1 < 16 && digit2 < 16) {
1193 *p++ = (unsigned char)((digit1 << 4) + digit2);
1194 s += 2;
1195 break;
1196 }
1197 }
1198 /* invalid hexadecimal digits */
1199
1200 if (!errors || strcmp(errors, "strict") == 0) {
1201 PyErr_Format(PyExc_ValueError,
1202 "invalid \\x escape at position %zd",
1203 s - 2 - (end - len));
1204 goto failed;
1205 }
1206 if (strcmp(errors, "replace") == 0) {
1207 *p++ = '?';
1208 } else if (strcmp(errors, "ignore") == 0)
1209 /* do nothing */;
1210 else {
1211 PyErr_Format(PyExc_ValueError,
1212 "decoding error; unknown "
1213 "error handling code: %.400s",
1214 errors);
1215 goto failed;
1216 }
1217 /* skip \x */
1218 if (s < end && Py_ISXDIGIT(s[0]))
1219 s++; /* and a hexdigit */
1220 break;
1221
1222 default:
1223 if (*first_invalid_escape == NULL) {
1224 *first_invalid_escape = s-1; /* Back up one char, since we've
1225 already incremented s. */
1226 }
1227 *p++ = '\\';
1228 s--;
1229 goto non_esc; /* an arbitrary number of unescaped
1230 UTF-8 bytes may follow. */
1231 }
1232 }
1233
1234 return _PyBytesWriter_Finish(&writer, p);
1235
1236 failed:
1237 _PyBytesWriter_Dealloc(&writer);
1238 return NULL;
1239 }
1240
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)1241 PyObject *PyBytes_DecodeEscape(const char *s,
1242 Py_ssize_t len,
1243 const char *errors,
1244 Py_ssize_t unicode,
1245 const char *recode_encoding)
1246 {
1247 const char* first_invalid_escape;
1248 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1249 recode_encoding,
1250 &first_invalid_escape);
1251 if (result == NULL)
1252 return NULL;
1253 if (first_invalid_escape != NULL) {
1254 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1255 "invalid escape sequence '\\%c'",
1256 (unsigned char)*first_invalid_escape) < 0) {
1257 Py_DECREF(result);
1258 return NULL;
1259 }
1260 }
1261 return result;
1262
1263 }
1264 /* -------------------------------------------------------------------- */
1265 /* object api */
1266
1267 Py_ssize_t
PyBytes_Size(PyObject * op)1268 PyBytes_Size(PyObject *op)
1269 {
1270 if (!PyBytes_Check(op)) {
1271 PyErr_Format(PyExc_TypeError,
1272 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1273 return -1;
1274 }
1275 return Py_SIZE(op);
1276 }
1277
1278 char *
PyBytes_AsString(PyObject * op)1279 PyBytes_AsString(PyObject *op)
1280 {
1281 if (!PyBytes_Check(op)) {
1282 PyErr_Format(PyExc_TypeError,
1283 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1284 return NULL;
1285 }
1286 return ((PyBytesObject *)op)->ob_sval;
1287 }
1288
1289 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1290 PyBytes_AsStringAndSize(PyObject *obj,
1291 char **s,
1292 Py_ssize_t *len)
1293 {
1294 if (s == NULL) {
1295 PyErr_BadInternalCall();
1296 return -1;
1297 }
1298
1299 if (!PyBytes_Check(obj)) {
1300 PyErr_Format(PyExc_TypeError,
1301 "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1302 return -1;
1303 }
1304
1305 *s = PyBytes_AS_STRING(obj);
1306 if (len != NULL)
1307 *len = PyBytes_GET_SIZE(obj);
1308 else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1309 PyErr_SetString(PyExc_ValueError,
1310 "embedded null byte");
1311 return -1;
1312 }
1313 return 0;
1314 }
1315
1316 /* -------------------------------------------------------------------- */
1317 /* Methods */
1318
1319 #include "stringlib/stringdefs.h"
1320
1321 #include "stringlib/fastsearch.h"
1322 #include "stringlib/count.h"
1323 #include "stringlib/find.h"
1324 #include "stringlib/join.h"
1325 #include "stringlib/partition.h"
1326 #include "stringlib/split.h"
1327 #include "stringlib/ctype.h"
1328
1329 #include "stringlib/transmogrify.h"
1330
1331 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1332 PyBytes_Repr(PyObject *obj, int smartquotes)
1333 {
1334 PyBytesObject* op = (PyBytesObject*) obj;
1335 Py_ssize_t i, length = Py_SIZE(op);
1336 Py_ssize_t newsize, squotes, dquotes;
1337 PyObject *v;
1338 unsigned char quote, *s, *p;
1339
1340 /* Compute size of output string */
1341 squotes = dquotes = 0;
1342 newsize = 3; /* b'' */
1343 s = (unsigned char*)op->ob_sval;
1344 for (i = 0; i < length; i++) {
1345 Py_ssize_t incr = 1;
1346 switch(s[i]) {
1347 case '\'': squotes++; break;
1348 case '"': dquotes++; break;
1349 case '\\': case '\t': case '\n': case '\r':
1350 incr = 2; break; /* \C */
1351 default:
1352 if (s[i] < ' ' || s[i] >= 0x7f)
1353 incr = 4; /* \xHH */
1354 }
1355 if (newsize > PY_SSIZE_T_MAX - incr)
1356 goto overflow;
1357 newsize += incr;
1358 }
1359 quote = '\'';
1360 if (smartquotes && squotes && !dquotes)
1361 quote = '"';
1362 if (squotes && quote == '\'') {
1363 if (newsize > PY_SSIZE_T_MAX - squotes)
1364 goto overflow;
1365 newsize += squotes;
1366 }
1367
1368 v = PyUnicode_New(newsize, 127);
1369 if (v == NULL) {
1370 return NULL;
1371 }
1372 p = PyUnicode_1BYTE_DATA(v);
1373
1374 *p++ = 'b', *p++ = quote;
1375 for (i = 0; i < length; i++) {
1376 unsigned char c = op->ob_sval[i];
1377 if (c == quote || c == '\\')
1378 *p++ = '\\', *p++ = c;
1379 else if (c == '\t')
1380 *p++ = '\\', *p++ = 't';
1381 else if (c == '\n')
1382 *p++ = '\\', *p++ = 'n';
1383 else if (c == '\r')
1384 *p++ = '\\', *p++ = 'r';
1385 else if (c < ' ' || c >= 0x7f) {
1386 *p++ = '\\';
1387 *p++ = 'x';
1388 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1389 *p++ = Py_hexdigits[c & 0xf];
1390 }
1391 else
1392 *p++ = c;
1393 }
1394 *p++ = quote;
1395 assert(_PyUnicode_CheckConsistency(v, 1));
1396 return v;
1397
1398 overflow:
1399 PyErr_SetString(PyExc_OverflowError,
1400 "bytes object is too large to make repr");
1401 return NULL;
1402 }
1403
1404 static PyObject *
bytes_repr(PyObject * op)1405 bytes_repr(PyObject *op)
1406 {
1407 return PyBytes_Repr(op, 1);
1408 }
1409
1410 static PyObject *
bytes_str(PyObject * op)1411 bytes_str(PyObject *op)
1412 {
1413 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
1414 if (config->bytes_warning) {
1415 if (PyErr_WarnEx(PyExc_BytesWarning,
1416 "str() on a bytes instance", 1)) {
1417 return NULL;
1418 }
1419 }
1420 return bytes_repr(op);
1421 }
1422
1423 static Py_ssize_t
bytes_length(PyBytesObject * a)1424 bytes_length(PyBytesObject *a)
1425 {
1426 return Py_SIZE(a);
1427 }
1428
1429 /* This is also used by PyBytes_Concat() */
1430 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1431 bytes_concat(PyObject *a, PyObject *b)
1432 {
1433 Py_buffer va, vb;
1434 PyObject *result = NULL;
1435
1436 va.len = -1;
1437 vb.len = -1;
1438 if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1439 PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1440 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1441 Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1442 goto done;
1443 }
1444
1445 /* Optimize end cases */
1446 if (va.len == 0 && PyBytes_CheckExact(b)) {
1447 result = b;
1448 Py_INCREF(result);
1449 goto done;
1450 }
1451 if (vb.len == 0 && PyBytes_CheckExact(a)) {
1452 result = a;
1453 Py_INCREF(result);
1454 goto done;
1455 }
1456
1457 if (va.len > PY_SSIZE_T_MAX - vb.len) {
1458 PyErr_NoMemory();
1459 goto done;
1460 }
1461
1462 result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1463 if (result != NULL) {
1464 memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1465 memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1466 }
1467
1468 done:
1469 if (va.len != -1)
1470 PyBuffer_Release(&va);
1471 if (vb.len != -1)
1472 PyBuffer_Release(&vb);
1473 return result;
1474 }
1475
1476 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1477 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1478 {
1479 Py_ssize_t i;
1480 Py_ssize_t j;
1481 Py_ssize_t size;
1482 PyBytesObject *op;
1483 size_t nbytes;
1484 if (n < 0)
1485 n = 0;
1486 /* watch out for overflows: the size can overflow int,
1487 * and the # of bytes needed can overflow size_t
1488 */
1489 if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1490 PyErr_SetString(PyExc_OverflowError,
1491 "repeated bytes are too long");
1492 return NULL;
1493 }
1494 size = Py_SIZE(a) * n;
1495 if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1496 Py_INCREF(a);
1497 return (PyObject *)a;
1498 }
1499 nbytes = (size_t)size;
1500 if (nbytes + PyBytesObject_SIZE <= nbytes) {
1501 PyErr_SetString(PyExc_OverflowError,
1502 "repeated bytes are too long");
1503 return NULL;
1504 }
1505 op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1506 if (op == NULL)
1507 return PyErr_NoMemory();
1508 (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1509 op->ob_shash = -1;
1510 op->ob_sval[size] = '\0';
1511 if (Py_SIZE(a) == 1 && n > 0) {
1512 memset(op->ob_sval, a->ob_sval[0] , n);
1513 return (PyObject *) op;
1514 }
1515 i = 0;
1516 if (i < size) {
1517 memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1518 i = Py_SIZE(a);
1519 }
1520 while (i < size) {
1521 j = (i <= size-i) ? i : size-i;
1522 memcpy(op->ob_sval+i, op->ob_sval, j);
1523 i += j;
1524 }
1525 return (PyObject *) op;
1526 }
1527
1528 static int
bytes_contains(PyObject * self,PyObject * arg)1529 bytes_contains(PyObject *self, PyObject *arg)
1530 {
1531 return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1532 }
1533
1534 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1535 bytes_item(PyBytesObject *a, Py_ssize_t i)
1536 {
1537 if (i < 0 || i >= Py_SIZE(a)) {
1538 PyErr_SetString(PyExc_IndexError, "index out of range");
1539 return NULL;
1540 }
1541 return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1542 }
1543
1544 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1545 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1546 {
1547 int cmp;
1548 Py_ssize_t len;
1549
1550 len = Py_SIZE(a);
1551 if (Py_SIZE(b) != len)
1552 return 0;
1553
1554 if (a->ob_sval[0] != b->ob_sval[0])
1555 return 0;
1556
1557 cmp = memcmp(a->ob_sval, b->ob_sval, len);
1558 return (cmp == 0);
1559 }
1560
1561 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1562 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1563 {
1564 int c;
1565 Py_ssize_t len_a, len_b;
1566 Py_ssize_t min_len;
1567 int rc;
1568
1569 /* Make sure both arguments are strings. */
1570 if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1571 PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
1572 if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1573 rc = PyObject_IsInstance((PyObject*)a,
1574 (PyObject*)&PyUnicode_Type);
1575 if (!rc)
1576 rc = PyObject_IsInstance((PyObject*)b,
1577 (PyObject*)&PyUnicode_Type);
1578 if (rc < 0)
1579 return NULL;
1580 if (rc) {
1581 if (PyErr_WarnEx(PyExc_BytesWarning,
1582 "Comparison between bytes and string", 1))
1583 return NULL;
1584 }
1585 else {
1586 rc = PyObject_IsInstance((PyObject*)a,
1587 (PyObject*)&PyLong_Type);
1588 if (!rc)
1589 rc = PyObject_IsInstance((PyObject*)b,
1590 (PyObject*)&PyLong_Type);
1591 if (rc < 0)
1592 return NULL;
1593 if (rc) {
1594 if (PyErr_WarnEx(PyExc_BytesWarning,
1595 "Comparison between bytes and int", 1))
1596 return NULL;
1597 }
1598 }
1599 }
1600 Py_RETURN_NOTIMPLEMENTED;
1601 }
1602 else if (a == b) {
1603 switch (op) {
1604 case Py_EQ:
1605 case Py_LE:
1606 case Py_GE:
1607 /* a string is equal to itself */
1608 Py_RETURN_TRUE;
1609 case Py_NE:
1610 case Py_LT:
1611 case Py_GT:
1612 Py_RETURN_FALSE;
1613 default:
1614 PyErr_BadArgument();
1615 return NULL;
1616 }
1617 }
1618 else if (op == Py_EQ || op == Py_NE) {
1619 int eq = bytes_compare_eq(a, b);
1620 eq ^= (op == Py_NE);
1621 return PyBool_FromLong(eq);
1622 }
1623 else {
1624 len_a = Py_SIZE(a);
1625 len_b = Py_SIZE(b);
1626 min_len = Py_MIN(len_a, len_b);
1627 if (min_len > 0) {
1628 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1629 if (c == 0)
1630 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1631 }
1632 else
1633 c = 0;
1634 if (c != 0)
1635 Py_RETURN_RICHCOMPARE(c, 0, op);
1636 Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1637 }
1638 }
1639
1640 static Py_hash_t
bytes_hash(PyBytesObject * a)1641 bytes_hash(PyBytesObject *a)
1642 {
1643 if (a->ob_shash == -1) {
1644 /* Can't fail */
1645 a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1646 }
1647 return a->ob_shash;
1648 }
1649
1650 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1651 bytes_subscript(PyBytesObject* self, PyObject* item)
1652 {
1653 if (PyIndex_Check(item)) {
1654 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1655 if (i == -1 && PyErr_Occurred())
1656 return NULL;
1657 if (i < 0)
1658 i += PyBytes_GET_SIZE(self);
1659 if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1660 PyErr_SetString(PyExc_IndexError,
1661 "index out of range");
1662 return NULL;
1663 }
1664 return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1665 }
1666 else if (PySlice_Check(item)) {
1667 Py_ssize_t start, stop, step, slicelength, i;
1668 size_t cur;
1669 char* source_buf;
1670 char* result_buf;
1671 PyObject* result;
1672
1673 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1674 return NULL;
1675 }
1676 slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1677 &stop, step);
1678
1679 if (slicelength <= 0) {
1680 return PyBytes_FromStringAndSize("", 0);
1681 }
1682 else if (start == 0 && step == 1 &&
1683 slicelength == PyBytes_GET_SIZE(self) &&
1684 PyBytes_CheckExact(self)) {
1685 Py_INCREF(self);
1686 return (PyObject *)self;
1687 }
1688 else if (step == 1) {
1689 return PyBytes_FromStringAndSize(
1690 PyBytes_AS_STRING(self) + start,
1691 slicelength);
1692 }
1693 else {
1694 source_buf = PyBytes_AS_STRING(self);
1695 result = PyBytes_FromStringAndSize(NULL, slicelength);
1696 if (result == NULL)
1697 return NULL;
1698
1699 result_buf = PyBytes_AS_STRING(result);
1700 for (cur = start, i = 0; i < slicelength;
1701 cur += step, i++) {
1702 result_buf[i] = source_buf[cur];
1703 }
1704
1705 return result;
1706 }
1707 }
1708 else {
1709 PyErr_Format(PyExc_TypeError,
1710 "byte indices must be integers or slices, not %.200s",
1711 Py_TYPE(item)->tp_name);
1712 return NULL;
1713 }
1714 }
1715
1716 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1717 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1718 {
1719 return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1720 1, flags);
1721 }
1722
1723 static PySequenceMethods bytes_as_sequence = {
1724 (lenfunc)bytes_length, /*sq_length*/
1725 (binaryfunc)bytes_concat, /*sq_concat*/
1726 (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1727 (ssizeargfunc)bytes_item, /*sq_item*/
1728 0, /*sq_slice*/
1729 0, /*sq_ass_item*/
1730 0, /*sq_ass_slice*/
1731 (objobjproc)bytes_contains /*sq_contains*/
1732 };
1733
1734 static PyMappingMethods bytes_as_mapping = {
1735 (lenfunc)bytes_length,
1736 (binaryfunc)bytes_subscript,
1737 0,
1738 };
1739
1740 static PyBufferProcs bytes_as_buffer = {
1741 (getbufferproc)bytes_buffer_getbuffer,
1742 NULL,
1743 };
1744
1745
1746 #define LEFTSTRIP 0
1747 #define RIGHTSTRIP 1
1748 #define BOTHSTRIP 2
1749
1750 /*[clinic input]
1751 bytes.split
1752
1753 sep: object = None
1754 The delimiter according which to split the bytes.
1755 None (the default value) means split on ASCII whitespace characters
1756 (space, tab, return, newline, formfeed, vertical tab).
1757 maxsplit: Py_ssize_t = -1
1758 Maximum number of splits to do.
1759 -1 (the default value) means no limit.
1760
1761 Return a list of the sections in the bytes, using sep as the delimiter.
1762 [clinic start generated code]*/
1763
1764 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1765 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1766 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1767 {
1768 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1769 const char *s = PyBytes_AS_STRING(self), *sub;
1770 Py_buffer vsub;
1771 PyObject *list;
1772
1773 if (maxsplit < 0)
1774 maxsplit = PY_SSIZE_T_MAX;
1775 if (sep == Py_None)
1776 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1777 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1778 return NULL;
1779 sub = vsub.buf;
1780 n = vsub.len;
1781
1782 list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1783 PyBuffer_Release(&vsub);
1784 return list;
1785 }
1786
1787 /*[clinic input]
1788 bytes.partition
1789
1790 sep: Py_buffer
1791 /
1792
1793 Partition the bytes into three parts using the given separator.
1794
1795 This will search for the separator sep in the bytes. If the separator is found,
1796 returns a 3-tuple containing the part before the separator, the separator
1797 itself, and the part after it.
1798
1799 If the separator is not found, returns a 3-tuple containing the original bytes
1800 object and two empty bytes objects.
1801 [clinic start generated code]*/
1802
1803 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1804 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1805 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1806 {
1807 return stringlib_partition(
1808 (PyObject*) self,
1809 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1810 sep->obj, (const char *)sep->buf, sep->len
1811 );
1812 }
1813
1814 /*[clinic input]
1815 bytes.rpartition
1816
1817 sep: Py_buffer
1818 /
1819
1820 Partition the bytes into three parts using the given separator.
1821
1822 This will search for the separator sep in the bytes, starting at the end. If
1823 the separator is found, returns a 3-tuple containing the part before the
1824 separator, the separator itself, and the part after it.
1825
1826 If the separator is not found, returns a 3-tuple containing two empty bytes
1827 objects and the original bytes object.
1828 [clinic start generated code]*/
1829
1830 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1831 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1832 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1833 {
1834 return stringlib_rpartition(
1835 (PyObject*) self,
1836 PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1837 sep->obj, (const char *)sep->buf, sep->len
1838 );
1839 }
1840
1841 /*[clinic input]
1842 bytes.rsplit = bytes.split
1843
1844 Return a list of the sections in the bytes, using sep as the delimiter.
1845
1846 Splitting is done starting at the end of the bytes and working to the front.
1847 [clinic start generated code]*/
1848
1849 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1850 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1851 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1852 {
1853 Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1854 const char *s = PyBytes_AS_STRING(self), *sub;
1855 Py_buffer vsub;
1856 PyObject *list;
1857
1858 if (maxsplit < 0)
1859 maxsplit = PY_SSIZE_T_MAX;
1860 if (sep == Py_None)
1861 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1862 if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1863 return NULL;
1864 sub = vsub.buf;
1865 n = vsub.len;
1866
1867 list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1868 PyBuffer_Release(&vsub);
1869 return list;
1870 }
1871
1872
1873 /*[clinic input]
1874 bytes.join
1875
1876 iterable_of_bytes: object
1877 /
1878
1879 Concatenate any number of bytes objects.
1880
1881 The bytes whose method is called is inserted in between each pair.
1882
1883 The result is returned as a new bytes object.
1884
1885 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1886 [clinic start generated code]*/
1887
1888 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1889 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1890 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1891 {
1892 return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1893 }
1894
1895 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1896 _PyBytes_Join(PyObject *sep, PyObject *x)
1897 {
1898 assert(sep != NULL && PyBytes_Check(sep));
1899 assert(x != NULL);
1900 return bytes_join((PyBytesObject*)sep, x);
1901 }
1902
1903 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1904 bytes_find(PyBytesObject *self, PyObject *args)
1905 {
1906 return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1907 }
1908
1909 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1910 bytes_index(PyBytesObject *self, PyObject *args)
1911 {
1912 return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1913 }
1914
1915
1916 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1917 bytes_rfind(PyBytesObject *self, PyObject *args)
1918 {
1919 return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920 }
1921
1922
1923 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1924 bytes_rindex(PyBytesObject *self, PyObject *args)
1925 {
1926 return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1927 }
1928
1929
1930 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1931 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1932 {
1933 Py_buffer vsep;
1934 char *s = PyBytes_AS_STRING(self);
1935 Py_ssize_t len = PyBytes_GET_SIZE(self);
1936 char *sep;
1937 Py_ssize_t seplen;
1938 Py_ssize_t i, j;
1939
1940 if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1941 return NULL;
1942 sep = vsep.buf;
1943 seplen = vsep.len;
1944
1945 i = 0;
1946 if (striptype != RIGHTSTRIP) {
1947 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1948 i++;
1949 }
1950 }
1951
1952 j = len;
1953 if (striptype != LEFTSTRIP) {
1954 do {
1955 j--;
1956 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1957 j++;
1958 }
1959
1960 PyBuffer_Release(&vsep);
1961
1962 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1963 Py_INCREF(self);
1964 return (PyObject*)self;
1965 }
1966 else
1967 return PyBytes_FromStringAndSize(s+i, j-i);
1968 }
1969
1970
1971 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1972 do_strip(PyBytesObject *self, int striptype)
1973 {
1974 char *s = PyBytes_AS_STRING(self);
1975 Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1976
1977 i = 0;
1978 if (striptype != RIGHTSTRIP) {
1979 while (i < len && Py_ISSPACE(s[i])) {
1980 i++;
1981 }
1982 }
1983
1984 j = len;
1985 if (striptype != LEFTSTRIP) {
1986 do {
1987 j--;
1988 } while (j >= i && Py_ISSPACE(s[j]));
1989 j++;
1990 }
1991
1992 if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1993 Py_INCREF(self);
1994 return (PyObject*)self;
1995 }
1996 else
1997 return PyBytes_FromStringAndSize(s+i, j-i);
1998 }
1999
2000
2001 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)2002 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2003 {
2004 if (bytes != Py_None) {
2005 return do_xstrip(self, striptype, bytes);
2006 }
2007 return do_strip(self, striptype);
2008 }
2009
2010 /*[clinic input]
2011 bytes.strip
2012
2013 bytes: object = None
2014 /
2015
2016 Strip leading and trailing bytes contained in the argument.
2017
2018 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2019 [clinic start generated code]*/
2020
2021 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)2022 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2023 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2024 {
2025 return do_argstrip(self, BOTHSTRIP, bytes);
2026 }
2027
2028 /*[clinic input]
2029 bytes.lstrip
2030
2031 bytes: object = None
2032 /
2033
2034 Strip leading bytes contained in the argument.
2035
2036 If the argument is omitted or None, strip leading ASCII whitespace.
2037 [clinic start generated code]*/
2038
2039 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)2040 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2041 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2042 {
2043 return do_argstrip(self, LEFTSTRIP, bytes);
2044 }
2045
2046 /*[clinic input]
2047 bytes.rstrip
2048
2049 bytes: object = None
2050 /
2051
2052 Strip trailing bytes contained in the argument.
2053
2054 If the argument is omitted or None, strip trailing ASCII whitespace.
2055 [clinic start generated code]*/
2056
2057 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2058 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2059 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2060 {
2061 return do_argstrip(self, RIGHTSTRIP, bytes);
2062 }
2063
2064
2065 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2066 bytes_count(PyBytesObject *self, PyObject *args)
2067 {
2068 return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2069 }
2070
2071
2072 /*[clinic input]
2073 bytes.translate
2074
2075 table: object
2076 Translation table, which must be a bytes object of length 256.
2077 /
2078 delete as deletechars: object(c_default="NULL") = b''
2079
2080 Return a copy with each character mapped by the given translation table.
2081
2082 All characters occurring in the optional argument delete are removed.
2083 The remaining characters are mapped through the given translation table.
2084 [clinic start generated code]*/
2085
2086 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2087 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2088 PyObject *deletechars)
2089 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2090 {
2091 char *input, *output;
2092 Py_buffer table_view = {NULL, NULL};
2093 Py_buffer del_table_view = {NULL, NULL};
2094 const char *table_chars;
2095 Py_ssize_t i, c, changed = 0;
2096 PyObject *input_obj = (PyObject*)self;
2097 const char *output_start, *del_table_chars=NULL;
2098 Py_ssize_t inlen, tablen, dellen = 0;
2099 PyObject *result;
2100 int trans_table[256];
2101
2102 if (PyBytes_Check(table)) {
2103 table_chars = PyBytes_AS_STRING(table);
2104 tablen = PyBytes_GET_SIZE(table);
2105 }
2106 else if (table == Py_None) {
2107 table_chars = NULL;
2108 tablen = 256;
2109 }
2110 else {
2111 if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2112 return NULL;
2113 table_chars = table_view.buf;
2114 tablen = table_view.len;
2115 }
2116
2117 if (tablen != 256) {
2118 PyErr_SetString(PyExc_ValueError,
2119 "translation table must be 256 characters long");
2120 PyBuffer_Release(&table_view);
2121 return NULL;
2122 }
2123
2124 if (deletechars != NULL) {
2125 if (PyBytes_Check(deletechars)) {
2126 del_table_chars = PyBytes_AS_STRING(deletechars);
2127 dellen = PyBytes_GET_SIZE(deletechars);
2128 }
2129 else {
2130 if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2131 PyBuffer_Release(&table_view);
2132 return NULL;
2133 }
2134 del_table_chars = del_table_view.buf;
2135 dellen = del_table_view.len;
2136 }
2137 }
2138 else {
2139 del_table_chars = NULL;
2140 dellen = 0;
2141 }
2142
2143 inlen = PyBytes_GET_SIZE(input_obj);
2144 result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2145 if (result == NULL) {
2146 PyBuffer_Release(&del_table_view);
2147 PyBuffer_Release(&table_view);
2148 return NULL;
2149 }
2150 output_start = output = PyBytes_AS_STRING(result);
2151 input = PyBytes_AS_STRING(input_obj);
2152
2153 if (dellen == 0 && table_chars != NULL) {
2154 /* If no deletions are required, use faster code */
2155 for (i = inlen; --i >= 0; ) {
2156 c = Py_CHARMASK(*input++);
2157 if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2158 changed = 1;
2159 }
2160 if (!changed && PyBytes_CheckExact(input_obj)) {
2161 Py_INCREF(input_obj);
2162 Py_DECREF(result);
2163 result = input_obj;
2164 }
2165 PyBuffer_Release(&del_table_view);
2166 PyBuffer_Release(&table_view);
2167 return result;
2168 }
2169
2170 if (table_chars == NULL) {
2171 for (i = 0; i < 256; i++)
2172 trans_table[i] = Py_CHARMASK(i);
2173 } else {
2174 for (i = 0; i < 256; i++)
2175 trans_table[i] = Py_CHARMASK(table_chars[i]);
2176 }
2177 PyBuffer_Release(&table_view);
2178
2179 for (i = 0; i < dellen; i++)
2180 trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2181 PyBuffer_Release(&del_table_view);
2182
2183 for (i = inlen; --i >= 0; ) {
2184 c = Py_CHARMASK(*input++);
2185 if (trans_table[c] != -1)
2186 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2187 continue;
2188 changed = 1;
2189 }
2190 if (!changed && PyBytes_CheckExact(input_obj)) {
2191 Py_DECREF(result);
2192 Py_INCREF(input_obj);
2193 return input_obj;
2194 }
2195 /* Fix the size of the resulting string */
2196 if (inlen > 0)
2197 _PyBytes_Resize(&result, output - output_start);
2198 return result;
2199 }
2200
2201
2202 /*[clinic input]
2203
2204 @staticmethod
2205 bytes.maketrans
2206
2207 frm: Py_buffer
2208 to: Py_buffer
2209 /
2210
2211 Return a translation table useable for the bytes or bytearray translate method.
2212
2213 The returned table will be one where each byte in frm is mapped to the byte at
2214 the same position in to.
2215
2216 The bytes objects frm and to must be of the same length.
2217 [clinic start generated code]*/
2218
2219 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2220 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2221 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2222 {
2223 return _Py_bytes_maketrans(frm, to);
2224 }
2225
2226
2227 /*[clinic input]
2228 bytes.replace
2229
2230 old: Py_buffer
2231 new: Py_buffer
2232 count: Py_ssize_t = -1
2233 Maximum number of occurrences to replace.
2234 -1 (the default value) means replace all occurrences.
2235 /
2236
2237 Return a copy with all occurrences of substring old replaced by new.
2238
2239 If the optional argument count is given, only the first count occurrences are
2240 replaced.
2241 [clinic start generated code]*/
2242
2243 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2244 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2245 Py_ssize_t count)
2246 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2247 {
2248 return stringlib_replace((PyObject *)self,
2249 (const char *)old->buf, old->len,
2250 (const char *)new->buf, new->len, count);
2251 }
2252
2253 /** End DALKE **/
2254
2255
2256 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2257 bytes_startswith(PyBytesObject *self, PyObject *args)
2258 {
2259 return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2260 }
2261
2262 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2263 bytes_endswith(PyBytesObject *self, PyObject *args)
2264 {
2265 return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2266 }
2267
2268
2269 /*[clinic input]
2270 bytes.decode
2271
2272 encoding: str(c_default="NULL") = 'utf-8'
2273 The encoding with which to decode the bytes.
2274 errors: str(c_default="NULL") = 'strict'
2275 The error handling scheme to use for the handling of decoding errors.
2276 The default is 'strict' meaning that decoding errors raise a
2277 UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2278 as well as any other name registered with codecs.register_error that
2279 can handle UnicodeDecodeErrors.
2280
2281 Decode the bytes using the codec registered for encoding.
2282 [clinic start generated code]*/
2283
2284 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2285 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2286 const char *errors)
2287 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2288 {
2289 return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2290 }
2291
2292
2293 /*[clinic input]
2294 bytes.splitlines
2295
2296 keepends: bool(accept={int}) = False
2297
2298 Return a list of the lines in the bytes, breaking at line boundaries.
2299
2300 Line breaks are not included in the resulting list unless keepends is given and
2301 true.
2302 [clinic start generated code]*/
2303
2304 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2305 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2306 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2307 {
2308 return stringlib_splitlines(
2309 (PyObject*) self, PyBytes_AS_STRING(self),
2310 PyBytes_GET_SIZE(self), keepends
2311 );
2312 }
2313
2314 /*[clinic input]
2315 @classmethod
2316 bytes.fromhex
2317
2318 string: unicode
2319 /
2320
2321 Create a bytes object from a string of hexadecimal numbers.
2322
2323 Spaces between two numbers are accepted.
2324 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2325 [clinic start generated code]*/
2326
2327 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2328 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2329 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2330 {
2331 PyObject *result = _PyBytes_FromHex(string, 0);
2332 if (type != &PyBytes_Type && result != NULL) {
2333 Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2334 result, NULL));
2335 }
2336 return result;
2337 }
2338
2339 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2340 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2341 {
2342 char *buf;
2343 Py_ssize_t hexlen, invalid_char;
2344 unsigned int top, bot;
2345 Py_UCS1 *str, *end;
2346 _PyBytesWriter writer;
2347
2348 _PyBytesWriter_Init(&writer);
2349 writer.use_bytearray = use_bytearray;
2350
2351 assert(PyUnicode_Check(string));
2352 if (PyUnicode_READY(string))
2353 return NULL;
2354 hexlen = PyUnicode_GET_LENGTH(string);
2355
2356 if (!PyUnicode_IS_ASCII(string)) {
2357 void *data = PyUnicode_DATA(string);
2358 unsigned int kind = PyUnicode_KIND(string);
2359 Py_ssize_t i;
2360
2361 /* search for the first non-ASCII character */
2362 for (i = 0; i < hexlen; i++) {
2363 if (PyUnicode_READ(kind, data, i) >= 128)
2364 break;
2365 }
2366 invalid_char = i;
2367 goto error;
2368 }
2369
2370 assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2371 str = PyUnicode_1BYTE_DATA(string);
2372
2373 /* This overestimates if there are spaces */
2374 buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2375 if (buf == NULL)
2376 return NULL;
2377
2378 end = str + hexlen;
2379 while (str < end) {
2380 /* skip over spaces in the input */
2381 if (Py_ISSPACE(*str)) {
2382 do {
2383 str++;
2384 } while (Py_ISSPACE(*str));
2385 if (str >= end)
2386 break;
2387 }
2388
2389 top = _PyLong_DigitValue[*str];
2390 if (top >= 16) {
2391 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2392 goto error;
2393 }
2394 str++;
2395
2396 bot = _PyLong_DigitValue[*str];
2397 if (bot >= 16) {
2398 invalid_char = str - PyUnicode_1BYTE_DATA(string);
2399 goto error;
2400 }
2401 str++;
2402
2403 *buf++ = (unsigned char)((top << 4) + bot);
2404 }
2405
2406 return _PyBytesWriter_Finish(&writer, buf);
2407
2408 error:
2409 PyErr_Format(PyExc_ValueError,
2410 "non-hexadecimal number found in "
2411 "fromhex() arg at position %zd", invalid_char);
2412 _PyBytesWriter_Dealloc(&writer);
2413 return NULL;
2414 }
2415
2416 /*[clinic input]
2417 bytes.hex
2418
2419 sep: object = NULL
2420 An optional single character or byte to separate hex bytes.
2421 bytes_per_sep: int = 1
2422 How many bytes between separators. Positive values count from the
2423 right, negative values count from the left.
2424
2425 Create a str of hexadecimal numbers from a bytes object.
2426
2427 Example:
2428 >>> value = b'\xb9\x01\xef'
2429 >>> value.hex()
2430 'b901ef'
2431 >>> value.hex(':')
2432 'b9:01:ef'
2433 >>> value.hex(':', 2)
2434 'b9:01ef'
2435 >>> value.hex(':', -2)
2436 'b901:ef'
2437 [clinic start generated code]*/
2438
2439 static PyObject *
bytes_hex_impl(PyBytesObject * self,PyObject * sep,int bytes_per_sep)2440 bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2441 /*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
2442 {
2443 char* argbuf = PyBytes_AS_STRING(self);
2444 Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2445 return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2446 }
2447
2448 static PyObject *
bytes_getnewargs(PyBytesObject * v,PyObject * Py_UNUSED (ignored))2449 bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2450 {
2451 return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2452 }
2453
2454
2455 static PyMethodDef
2456 bytes_methods[] = {
2457 {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2458 {"capitalize", stringlib_capitalize, METH_NOARGS,
2459 _Py_capitalize__doc__},
2460 STRINGLIB_CENTER_METHODDEF
2461 {"count", (PyCFunction)bytes_count, METH_VARARGS,
2462 _Py_count__doc__},
2463 BYTES_DECODE_METHODDEF
2464 {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2465 _Py_endswith__doc__},
2466 STRINGLIB_EXPANDTABS_METHODDEF
2467 {"find", (PyCFunction)bytes_find, METH_VARARGS,
2468 _Py_find__doc__},
2469 BYTES_FROMHEX_METHODDEF
2470 BYTES_HEX_METHODDEF
2471 {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2472 {"isalnum", stringlib_isalnum, METH_NOARGS,
2473 _Py_isalnum__doc__},
2474 {"isalpha", stringlib_isalpha, METH_NOARGS,
2475 _Py_isalpha__doc__},
2476 {"isascii", stringlib_isascii, METH_NOARGS,
2477 _Py_isascii__doc__},
2478 {"isdigit", stringlib_isdigit, METH_NOARGS,
2479 _Py_isdigit__doc__},
2480 {"islower", stringlib_islower, METH_NOARGS,
2481 _Py_islower__doc__},
2482 {"isspace", stringlib_isspace, METH_NOARGS,
2483 _Py_isspace__doc__},
2484 {"istitle", stringlib_istitle, METH_NOARGS,
2485 _Py_istitle__doc__},
2486 {"isupper", stringlib_isupper, METH_NOARGS,
2487 _Py_isupper__doc__},
2488 BYTES_JOIN_METHODDEF
2489 STRINGLIB_LJUST_METHODDEF
2490 {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2491 BYTES_LSTRIP_METHODDEF
2492 BYTES_MAKETRANS_METHODDEF
2493 BYTES_PARTITION_METHODDEF
2494 BYTES_REPLACE_METHODDEF
2495 {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2496 {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2497 STRINGLIB_RJUST_METHODDEF
2498 BYTES_RPARTITION_METHODDEF
2499 BYTES_RSPLIT_METHODDEF
2500 BYTES_RSTRIP_METHODDEF
2501 BYTES_SPLIT_METHODDEF
2502 BYTES_SPLITLINES_METHODDEF
2503 {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2504 _Py_startswith__doc__},
2505 BYTES_STRIP_METHODDEF
2506 {"swapcase", stringlib_swapcase, METH_NOARGS,
2507 _Py_swapcase__doc__},
2508 {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2509 BYTES_TRANSLATE_METHODDEF
2510 {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2511 STRINGLIB_ZFILL_METHODDEF
2512 {NULL, NULL} /* sentinel */
2513 };
2514
2515 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2516 bytes_mod(PyObject *self, PyObject *arg)
2517 {
2518 if (!PyBytes_Check(self)) {
2519 Py_RETURN_NOTIMPLEMENTED;
2520 }
2521 return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2522 arg, 0);
2523 }
2524
2525 static PyNumberMethods bytes_as_number = {
2526 0, /*nb_add*/
2527 0, /*nb_subtract*/
2528 0, /*nb_multiply*/
2529 bytes_mod, /*nb_remainder*/
2530 };
2531
2532 static PyObject *
2533 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2534
2535 static PyObject *
bytes_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2536 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2537 {
2538 PyObject *x = NULL;
2539 const char *encoding = NULL;
2540 const char *errors = NULL;
2541 PyObject *new = NULL;
2542 PyObject *func;
2543 Py_ssize_t size;
2544 static char *kwlist[] = {"source", "encoding", "errors", 0};
2545 _Py_IDENTIFIER(__bytes__);
2546
2547 if (type != &PyBytes_Type)
2548 return bytes_subtype_new(type, args, kwds);
2549 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2550 &encoding, &errors))
2551 return NULL;
2552 if (x == NULL) {
2553 if (encoding != NULL || errors != NULL) {
2554 PyErr_SetString(PyExc_TypeError,
2555 encoding != NULL ?
2556 "encoding without a string argument" :
2557 "errors without a string argument");
2558 return NULL;
2559 }
2560 return PyBytes_FromStringAndSize(NULL, 0);
2561 }
2562
2563 if (encoding != NULL) {
2564 /* Encode via the codec registry */
2565 if (!PyUnicode_Check(x)) {
2566 PyErr_SetString(PyExc_TypeError,
2567 "encoding without a string argument");
2568 return NULL;
2569 }
2570 new = PyUnicode_AsEncodedString(x, encoding, errors);
2571 if (new == NULL)
2572 return NULL;
2573 assert(PyBytes_Check(new));
2574 return new;
2575 }
2576
2577 if (errors != NULL) {
2578 PyErr_SetString(PyExc_TypeError,
2579 PyUnicode_Check(x) ?
2580 "string argument without an encoding" :
2581 "errors without a string argument");
2582 return NULL;
2583 }
2584
2585 /* We'd like to call PyObject_Bytes here, but we need to check for an
2586 integer argument before deferring to PyBytes_FromObject, something
2587 PyObject_Bytes doesn't do. */
2588 func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2589 if (func != NULL) {
2590 new = _PyObject_CallNoArg(func);
2591 Py_DECREF(func);
2592 if (new == NULL)
2593 return NULL;
2594 if (!PyBytes_Check(new)) {
2595 PyErr_Format(PyExc_TypeError,
2596 "__bytes__ returned non-bytes (type %.200s)",
2597 Py_TYPE(new)->tp_name);
2598 Py_DECREF(new);
2599 return NULL;
2600 }
2601 return new;
2602 }
2603 else if (PyErr_Occurred())
2604 return NULL;
2605
2606 if (PyUnicode_Check(x)) {
2607 PyErr_SetString(PyExc_TypeError,
2608 "string argument without an encoding");
2609 return NULL;
2610 }
2611 /* Is it an integer? */
2612 if (PyIndex_Check(x)) {
2613 size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2614 if (size == -1 && PyErr_Occurred()) {
2615 if (!PyErr_ExceptionMatches(PyExc_TypeError))
2616 return NULL;
2617 PyErr_Clear(); /* fall through */
2618 }
2619 else {
2620 if (size < 0) {
2621 PyErr_SetString(PyExc_ValueError, "negative count");
2622 return NULL;
2623 }
2624 new = _PyBytes_FromSize(size, 1);
2625 if (new == NULL)
2626 return NULL;
2627 return new;
2628 }
2629 }
2630
2631 return PyBytes_FromObject(x);
2632 }
2633
2634 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2635 _PyBytes_FromBuffer(PyObject *x)
2636 {
2637 PyObject *new;
2638 Py_buffer view;
2639
2640 if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2641 return NULL;
2642
2643 new = PyBytes_FromStringAndSize(NULL, view.len);
2644 if (!new)
2645 goto fail;
2646 if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2647 &view, view.len, 'C') < 0)
2648 goto fail;
2649 PyBuffer_Release(&view);
2650 return new;
2651
2652 fail:
2653 Py_XDECREF(new);
2654 PyBuffer_Release(&view);
2655 return NULL;
2656 }
2657
2658 static PyObject*
_PyBytes_FromList(PyObject * x)2659 _PyBytes_FromList(PyObject *x)
2660 {
2661 Py_ssize_t i, size = PyList_GET_SIZE(x);
2662 Py_ssize_t value;
2663 char *str;
2664 PyObject *item;
2665 _PyBytesWriter writer;
2666
2667 _PyBytesWriter_Init(&writer);
2668 str = _PyBytesWriter_Alloc(&writer, size);
2669 if (str == NULL)
2670 return NULL;
2671 writer.overallocate = 1;
2672 size = writer.allocated;
2673
2674 for (i = 0; i < PyList_GET_SIZE(x); i++) {
2675 item = PyList_GET_ITEM(x, i);
2676 Py_INCREF(item);
2677 value = PyNumber_AsSsize_t(item, NULL);
2678 Py_DECREF(item);
2679 if (value == -1 && PyErr_Occurred())
2680 goto error;
2681
2682 if (value < 0 || value >= 256) {
2683 PyErr_SetString(PyExc_ValueError,
2684 "bytes must be in range(0, 256)");
2685 goto error;
2686 }
2687
2688 if (i >= size) {
2689 str = _PyBytesWriter_Resize(&writer, str, size+1);
2690 if (str == NULL)
2691 return NULL;
2692 size = writer.allocated;
2693 }
2694 *str++ = (char) value;
2695 }
2696 return _PyBytesWriter_Finish(&writer, str);
2697
2698 error:
2699 _PyBytesWriter_Dealloc(&writer);
2700 return NULL;
2701 }
2702
2703 static PyObject*
_PyBytes_FromTuple(PyObject * x)2704 _PyBytes_FromTuple(PyObject *x)
2705 {
2706 PyObject *bytes;
2707 Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2708 Py_ssize_t value;
2709 char *str;
2710 PyObject *item;
2711
2712 bytes = PyBytes_FromStringAndSize(NULL, size);
2713 if (bytes == NULL)
2714 return NULL;
2715 str = ((PyBytesObject *)bytes)->ob_sval;
2716
2717 for (i = 0; i < size; i++) {
2718 item = PyTuple_GET_ITEM(x, i);
2719 value = PyNumber_AsSsize_t(item, NULL);
2720 if (value == -1 && PyErr_Occurred())
2721 goto error;
2722
2723 if (value < 0 || value >= 256) {
2724 PyErr_SetString(PyExc_ValueError,
2725 "bytes must be in range(0, 256)");
2726 goto error;
2727 }
2728 *str++ = (char) value;
2729 }
2730 return bytes;
2731
2732 error:
2733 Py_DECREF(bytes);
2734 return NULL;
2735 }
2736
2737 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2738 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2739 {
2740 char *str;
2741 Py_ssize_t i, size;
2742 _PyBytesWriter writer;
2743
2744 /* For iterator version, create a string object and resize as needed */
2745 size = PyObject_LengthHint(x, 64);
2746 if (size == -1 && PyErr_Occurred())
2747 return NULL;
2748
2749 _PyBytesWriter_Init(&writer);
2750 str = _PyBytesWriter_Alloc(&writer, size);
2751 if (str == NULL)
2752 return NULL;
2753 writer.overallocate = 1;
2754 size = writer.allocated;
2755
2756 /* Run the iterator to exhaustion */
2757 for (i = 0; ; i++) {
2758 PyObject *item;
2759 Py_ssize_t value;
2760
2761 /* Get the next item */
2762 item = PyIter_Next(it);
2763 if (item == NULL) {
2764 if (PyErr_Occurred())
2765 goto error;
2766 break;
2767 }
2768
2769 /* Interpret it as an int (__index__) */
2770 value = PyNumber_AsSsize_t(item, NULL);
2771 Py_DECREF(item);
2772 if (value == -1 && PyErr_Occurred())
2773 goto error;
2774
2775 /* Range check */
2776 if (value < 0 || value >= 256) {
2777 PyErr_SetString(PyExc_ValueError,
2778 "bytes must be in range(0, 256)");
2779 goto error;
2780 }
2781
2782 /* Append the byte */
2783 if (i >= size) {
2784 str = _PyBytesWriter_Resize(&writer, str, size+1);
2785 if (str == NULL)
2786 return NULL;
2787 size = writer.allocated;
2788 }
2789 *str++ = (char) value;
2790 }
2791
2792 return _PyBytesWriter_Finish(&writer, str);
2793
2794 error:
2795 _PyBytesWriter_Dealloc(&writer);
2796 return NULL;
2797 }
2798
2799 PyObject *
PyBytes_FromObject(PyObject * x)2800 PyBytes_FromObject(PyObject *x)
2801 {
2802 PyObject *it, *result;
2803
2804 if (x == NULL) {
2805 PyErr_BadInternalCall();
2806 return NULL;
2807 }
2808
2809 if (PyBytes_CheckExact(x)) {
2810 Py_INCREF(x);
2811 return x;
2812 }
2813
2814 /* Use the modern buffer interface */
2815 if (PyObject_CheckBuffer(x))
2816 return _PyBytes_FromBuffer(x);
2817
2818 if (PyList_CheckExact(x))
2819 return _PyBytes_FromList(x);
2820
2821 if (PyTuple_CheckExact(x))
2822 return _PyBytes_FromTuple(x);
2823
2824 if (!PyUnicode_Check(x)) {
2825 it = PyObject_GetIter(x);
2826 if (it != NULL) {
2827 result = _PyBytes_FromIterator(it, x);
2828 Py_DECREF(it);
2829 return result;
2830 }
2831 if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2832 return NULL;
2833 }
2834 }
2835
2836 PyErr_Format(PyExc_TypeError,
2837 "cannot convert '%.200s' object to bytes",
2838 x->ob_type->tp_name);
2839 return NULL;
2840 }
2841
2842 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2843 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2844 {
2845 PyObject *tmp, *pnew;
2846 Py_ssize_t n;
2847
2848 assert(PyType_IsSubtype(type, &PyBytes_Type));
2849 tmp = bytes_new(&PyBytes_Type, args, kwds);
2850 if (tmp == NULL)
2851 return NULL;
2852 assert(PyBytes_Check(tmp));
2853 n = PyBytes_GET_SIZE(tmp);
2854 pnew = type->tp_alloc(type, n);
2855 if (pnew != NULL) {
2856 memcpy(PyBytes_AS_STRING(pnew),
2857 PyBytes_AS_STRING(tmp), n+1);
2858 ((PyBytesObject *)pnew)->ob_shash =
2859 ((PyBytesObject *)tmp)->ob_shash;
2860 }
2861 Py_DECREF(tmp);
2862 return pnew;
2863 }
2864
2865 PyDoc_STRVAR(bytes_doc,
2866 "bytes(iterable_of_ints) -> bytes\n\
2867 bytes(string, encoding[, errors]) -> bytes\n\
2868 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2869 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2870 bytes() -> empty bytes object\n\
2871 \n\
2872 Construct an immutable array of bytes from:\n\
2873 - an iterable yielding integers in range(256)\n\
2874 - a text string encoded using the specified encoding\n\
2875 - any object implementing the buffer API.\n\
2876 - an integer");
2877
2878 static PyObject *bytes_iter(PyObject *seq);
2879
2880 PyTypeObject PyBytes_Type = {
2881 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2882 "bytes",
2883 PyBytesObject_SIZE,
2884 sizeof(char),
2885 0, /* tp_dealloc */
2886 0, /* tp_vectorcall_offset */
2887 0, /* tp_getattr */
2888 0, /* tp_setattr */
2889 0, /* tp_as_async */
2890 (reprfunc)bytes_repr, /* tp_repr */
2891 &bytes_as_number, /* tp_as_number */
2892 &bytes_as_sequence, /* tp_as_sequence */
2893 &bytes_as_mapping, /* tp_as_mapping */
2894 (hashfunc)bytes_hash, /* tp_hash */
2895 0, /* tp_call */
2896 bytes_str, /* tp_str */
2897 PyObject_GenericGetAttr, /* tp_getattro */
2898 0, /* tp_setattro */
2899 &bytes_as_buffer, /* tp_as_buffer */
2900 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2901 Py_TPFLAGS_BYTES_SUBCLASS, /* tp_flags */
2902 bytes_doc, /* tp_doc */
2903 0, /* tp_traverse */
2904 0, /* tp_clear */
2905 (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2906 0, /* tp_weaklistoffset */
2907 bytes_iter, /* tp_iter */
2908 0, /* tp_iternext */
2909 bytes_methods, /* tp_methods */
2910 0, /* tp_members */
2911 0, /* tp_getset */
2912 &PyBaseObject_Type, /* tp_base */
2913 0, /* tp_dict */
2914 0, /* tp_descr_get */
2915 0, /* tp_descr_set */
2916 0, /* tp_dictoffset */
2917 0, /* tp_init */
2918 0, /* tp_alloc */
2919 bytes_new, /* tp_new */
2920 PyObject_Del, /* tp_free */
2921 };
2922
2923 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2924 PyBytes_Concat(PyObject **pv, PyObject *w)
2925 {
2926 assert(pv != NULL);
2927 if (*pv == NULL)
2928 return;
2929 if (w == NULL) {
2930 Py_CLEAR(*pv);
2931 return;
2932 }
2933
2934 if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2935 /* Only one reference, so we can resize in place */
2936 Py_ssize_t oldsize;
2937 Py_buffer wb;
2938
2939 if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2940 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2941 Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2942 Py_CLEAR(*pv);
2943 return;
2944 }
2945
2946 oldsize = PyBytes_GET_SIZE(*pv);
2947 if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2948 PyErr_NoMemory();
2949 goto error;
2950 }
2951 if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2952 goto error;
2953
2954 memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2955 PyBuffer_Release(&wb);
2956 return;
2957
2958 error:
2959 PyBuffer_Release(&wb);
2960 Py_CLEAR(*pv);
2961 return;
2962 }
2963
2964 else {
2965 /* Multiple references, need to create new object */
2966 PyObject *v;
2967 v = bytes_concat(*pv, w);
2968 Py_SETREF(*pv, v);
2969 }
2970 }
2971
2972 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)2973 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2974 {
2975 PyBytes_Concat(pv, w);
2976 Py_XDECREF(w);
2977 }
2978
2979
2980 /* The following function breaks the notion that bytes are immutable:
2981 it changes the size of a bytes object. We get away with this only if there
2982 is only one module referencing the object. You can also think of it
2983 as creating a new bytes object and destroying the old one, only
2984 more efficiently. In any case, don't use this if the bytes object may
2985 already be known to some other part of the code...
2986 Note that if there's not enough memory to resize the bytes object, the
2987 original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2988 memory" exception is set, and -1 is returned. Else (on success) 0 is
2989 returned, and the value in *pv may or may not be the same as on input.
2990 As always, an extra byte is allocated for a trailing \0 byte (newsize
2991 does *not* include that), and a trailing \0 byte is stored.
2992 */
2993
2994 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)2995 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2996 {
2997 PyObject *v;
2998 PyBytesObject *sv;
2999 v = *pv;
3000 if (!PyBytes_Check(v) || newsize < 0) {
3001 goto error;
3002 }
3003 if (Py_SIZE(v) == newsize) {
3004 /* return early if newsize equals to v->ob_size */
3005 return 0;
3006 }
3007 if (Py_SIZE(v) == 0) {
3008 if (newsize == 0) {
3009 return 0;
3010 }
3011 *pv = _PyBytes_FromSize(newsize, 0);
3012 Py_DECREF(v);
3013 return (*pv == NULL) ? -1 : 0;
3014 }
3015 if (Py_REFCNT(v) != 1) {
3016 goto error;
3017 }
3018 if (newsize == 0) {
3019 *pv = _PyBytes_FromSize(0, 0);
3020 Py_DECREF(v);
3021 return (*pv == NULL) ? -1 : 0;
3022 }
3023 /* XXX UNREF/NEWREF interface should be more symmetrical */
3024 _Py_DEC_REFTOTAL;
3025 _Py_ForgetReference(v);
3026 *pv = (PyObject *)
3027 PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
3028 if (*pv == NULL) {
3029 PyObject_Del(v);
3030 PyErr_NoMemory();
3031 return -1;
3032 }
3033 _Py_NewReference(*pv);
3034 sv = (PyBytesObject *) *pv;
3035 Py_SIZE(sv) = newsize;
3036 sv->ob_sval[newsize] = '\0';
3037 sv->ob_shash = -1; /* invalidate cached hash value */
3038 return 0;
3039 error:
3040 *pv = 0;
3041 Py_DECREF(v);
3042 PyErr_BadInternalCall();
3043 return -1;
3044 }
3045
3046 void
PyBytes_Fini(void)3047 PyBytes_Fini(void)
3048 {
3049 int i;
3050 for (i = 0; i < UCHAR_MAX + 1; i++)
3051 Py_CLEAR(characters[i]);
3052 Py_CLEAR(nullstring);
3053 }
3054
3055 /*********************** Bytes Iterator ****************************/
3056
3057 typedef struct {
3058 PyObject_HEAD
3059 Py_ssize_t it_index;
3060 PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3061 } striterobject;
3062
3063 static void
striter_dealloc(striterobject * it)3064 striter_dealloc(striterobject *it)
3065 {
3066 _PyObject_GC_UNTRACK(it);
3067 Py_XDECREF(it->it_seq);
3068 PyObject_GC_Del(it);
3069 }
3070
3071 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3072 striter_traverse(striterobject *it, visitproc visit, void *arg)
3073 {
3074 Py_VISIT(it->it_seq);
3075 return 0;
3076 }
3077
3078 static PyObject *
striter_next(striterobject * it)3079 striter_next(striterobject *it)
3080 {
3081 PyBytesObject *seq;
3082 PyObject *item;
3083
3084 assert(it != NULL);
3085 seq = it->it_seq;
3086 if (seq == NULL)
3087 return NULL;
3088 assert(PyBytes_Check(seq));
3089
3090 if (it->it_index < PyBytes_GET_SIZE(seq)) {
3091 item = PyLong_FromLong(
3092 (unsigned char)seq->ob_sval[it->it_index]);
3093 if (item != NULL)
3094 ++it->it_index;
3095 return item;
3096 }
3097
3098 it->it_seq = NULL;
3099 Py_DECREF(seq);
3100 return NULL;
3101 }
3102
3103 static PyObject *
striter_len(striterobject * it,PyObject * Py_UNUSED (ignored))3104 striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3105 {
3106 Py_ssize_t len = 0;
3107 if (it->it_seq)
3108 len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3109 return PyLong_FromSsize_t(len);
3110 }
3111
3112 PyDoc_STRVAR(length_hint_doc,
3113 "Private method returning an estimate of len(list(it)).");
3114
3115 static PyObject *
striter_reduce(striterobject * it,PyObject * Py_UNUSED (ignored))3116 striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3117 {
3118 _Py_IDENTIFIER(iter);
3119 if (it->it_seq != NULL) {
3120 return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
3121 it->it_seq, it->it_index);
3122 } else {
3123 return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
3124 }
3125 }
3126
3127 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3128
3129 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3130 striter_setstate(striterobject *it, PyObject *state)
3131 {
3132 Py_ssize_t index = PyLong_AsSsize_t(state);
3133 if (index == -1 && PyErr_Occurred())
3134 return NULL;
3135 if (it->it_seq != NULL) {
3136 if (index < 0)
3137 index = 0;
3138 else if (index > PyBytes_GET_SIZE(it->it_seq))
3139 index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3140 it->it_index = index;
3141 }
3142 Py_RETURN_NONE;
3143 }
3144
3145 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3146
3147 static PyMethodDef striter_methods[] = {
3148 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3149 length_hint_doc},
3150 {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3151 reduce_doc},
3152 {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3153 setstate_doc},
3154 {NULL, NULL} /* sentinel */
3155 };
3156
3157 PyTypeObject PyBytesIter_Type = {
3158 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3159 "bytes_iterator", /* tp_name */
3160 sizeof(striterobject), /* tp_basicsize */
3161 0, /* tp_itemsize */
3162 /* methods */
3163 (destructor)striter_dealloc, /* tp_dealloc */
3164 0, /* tp_vectorcall_offset */
3165 0, /* tp_getattr */
3166 0, /* tp_setattr */
3167 0, /* tp_as_async */
3168 0, /* tp_repr */
3169 0, /* tp_as_number */
3170 0, /* tp_as_sequence */
3171 0, /* tp_as_mapping */
3172 0, /* tp_hash */
3173 0, /* tp_call */
3174 0, /* tp_str */
3175 PyObject_GenericGetAttr, /* tp_getattro */
3176 0, /* tp_setattro */
3177 0, /* tp_as_buffer */
3178 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3179 0, /* tp_doc */
3180 (traverseproc)striter_traverse, /* tp_traverse */
3181 0, /* tp_clear */
3182 0, /* tp_richcompare */
3183 0, /* tp_weaklistoffset */
3184 PyObject_SelfIter, /* tp_iter */
3185 (iternextfunc)striter_next, /* tp_iternext */
3186 striter_methods, /* tp_methods */
3187 0,
3188 };
3189
3190 static PyObject *
bytes_iter(PyObject * seq)3191 bytes_iter(PyObject *seq)
3192 {
3193 striterobject *it;
3194
3195 if (!PyBytes_Check(seq)) {
3196 PyErr_BadInternalCall();
3197 return NULL;
3198 }
3199 it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3200 if (it == NULL)
3201 return NULL;
3202 it->it_index = 0;
3203 Py_INCREF(seq);
3204 it->it_seq = (PyBytesObject *)seq;
3205 _PyObject_GC_TRACK(it);
3206 return (PyObject *)it;
3207 }
3208
3209
3210 /* _PyBytesWriter API */
3211
3212 #ifdef MS_WINDOWS
3213 /* On Windows, overallocate by 50% is the best factor */
3214 # define OVERALLOCATE_FACTOR 2
3215 #else
3216 /* On Linux, overallocate by 25% is the best factor */
3217 # define OVERALLOCATE_FACTOR 4
3218 #endif
3219
3220 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3221 _PyBytesWriter_Init(_PyBytesWriter *writer)
3222 {
3223 /* Set all attributes before small_buffer to 0 */
3224 memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3225 #ifndef NDEBUG
3226 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3227 sizeof(writer->small_buffer));
3228 #endif
3229 }
3230
3231 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3232 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3233 {
3234 Py_CLEAR(writer->buffer);
3235 }
3236
3237 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3238 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3239 {
3240 if (writer->use_small_buffer) {
3241 assert(writer->buffer == NULL);
3242 return writer->small_buffer;
3243 }
3244 else if (writer->use_bytearray) {
3245 assert(writer->buffer != NULL);
3246 return PyByteArray_AS_STRING(writer->buffer);
3247 }
3248 else {
3249 assert(writer->buffer != NULL);
3250 return PyBytes_AS_STRING(writer->buffer);
3251 }
3252 }
3253
3254 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3255 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3256 {
3257 char *start = _PyBytesWriter_AsString(writer);
3258 assert(str != NULL);
3259 assert(str >= start);
3260 assert(str - start <= writer->allocated);
3261 return str - start;
3262 }
3263
3264 #ifndef NDEBUG
3265 Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3266 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3267 {
3268 char *start, *end;
3269
3270 if (writer->use_small_buffer) {
3271 assert(writer->buffer == NULL);
3272 }
3273 else {
3274 assert(writer->buffer != NULL);
3275 if (writer->use_bytearray)
3276 assert(PyByteArray_CheckExact(writer->buffer));
3277 else
3278 assert(PyBytes_CheckExact(writer->buffer));
3279 assert(Py_REFCNT(writer->buffer) == 1);
3280 }
3281
3282 if (writer->use_bytearray) {
3283 /* bytearray has its own overallocation algorithm,
3284 writer overallocation must be disabled */
3285 assert(!writer->overallocate);
3286 }
3287
3288 assert(0 <= writer->allocated);
3289 assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3290 /* the last byte must always be null */
3291 start = _PyBytesWriter_AsString(writer);
3292 assert(start[writer->allocated] == 0);
3293
3294 end = start + writer->allocated;
3295 assert(str != NULL);
3296 assert(start <= str && str <= end);
3297 return 1;
3298 }
3299 #endif
3300
3301 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3302 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3303 {
3304 Py_ssize_t allocated, pos;
3305
3306 assert(_PyBytesWriter_CheckConsistency(writer, str));
3307 assert(writer->allocated < size);
3308
3309 allocated = size;
3310 if (writer->overallocate
3311 && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3312 /* overallocate to limit the number of realloc() */
3313 allocated += allocated / OVERALLOCATE_FACTOR;
3314 }
3315
3316 pos = _PyBytesWriter_GetSize(writer, str);
3317 if (!writer->use_small_buffer) {
3318 if (writer->use_bytearray) {
3319 if (PyByteArray_Resize(writer->buffer, allocated))
3320 goto error;
3321 /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3322 but we cannot use ob_alloc because bytes may need to be moved
3323 to use the whole buffer. bytearray uses an internal optimization
3324 to avoid moving or copying bytes when bytes are removed at the
3325 beginning (ex: del bytearray[:1]). */
3326 }
3327 else {
3328 if (_PyBytes_Resize(&writer->buffer, allocated))
3329 goto error;
3330 }
3331 }
3332 else {
3333 /* convert from stack buffer to bytes object buffer */
3334 assert(writer->buffer == NULL);
3335
3336 if (writer->use_bytearray)
3337 writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3338 else
3339 writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3340 if (writer->buffer == NULL)
3341 goto error;
3342
3343 if (pos != 0) {
3344 char *dest;
3345 if (writer->use_bytearray)
3346 dest = PyByteArray_AS_STRING(writer->buffer);
3347 else
3348 dest = PyBytes_AS_STRING(writer->buffer);
3349 memcpy(dest,
3350 writer->small_buffer,
3351 pos);
3352 }
3353
3354 writer->use_small_buffer = 0;
3355 #ifndef NDEBUG
3356 memset(writer->small_buffer, PYMEM_CLEANBYTE,
3357 sizeof(writer->small_buffer));
3358 #endif
3359 }
3360 writer->allocated = allocated;
3361
3362 str = _PyBytesWriter_AsString(writer) + pos;
3363 assert(_PyBytesWriter_CheckConsistency(writer, str));
3364 return str;
3365
3366 error:
3367 _PyBytesWriter_Dealloc(writer);
3368 return NULL;
3369 }
3370
3371 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3372 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3373 {
3374 Py_ssize_t new_min_size;
3375
3376 assert(_PyBytesWriter_CheckConsistency(writer, str));
3377 assert(size >= 0);
3378
3379 if (size == 0) {
3380 /* nothing to do */
3381 return str;
3382 }
3383
3384 if (writer->min_size > PY_SSIZE_T_MAX - size) {
3385 PyErr_NoMemory();
3386 _PyBytesWriter_Dealloc(writer);
3387 return NULL;
3388 }
3389 new_min_size = writer->min_size + size;
3390
3391 if (new_min_size > writer->allocated)
3392 str = _PyBytesWriter_Resize(writer, str, new_min_size);
3393
3394 writer->min_size = new_min_size;
3395 return str;
3396 }
3397
3398 /* Allocate the buffer to write size bytes.
3399 Return the pointer to the beginning of buffer data.
3400 Raise an exception and return NULL on error. */
3401 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3402 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3403 {
3404 /* ensure that _PyBytesWriter_Alloc() is only called once */
3405 assert(writer->min_size == 0 && writer->buffer == NULL);
3406 assert(size >= 0);
3407
3408 writer->use_small_buffer = 1;
3409 #ifndef NDEBUG
3410 writer->allocated = sizeof(writer->small_buffer) - 1;
3411 /* In debug mode, don't use the full small buffer because it is less
3412 efficient than bytes and bytearray objects to detect buffer underflow
3413 and buffer overflow. Use 10 bytes of the small buffer to test also
3414 code using the smaller buffer in debug mode.
3415
3416 Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3417 in debug mode to also be able to detect stack overflow when running
3418 tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3419 if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3420 stack overflow. */
3421 writer->allocated = Py_MIN(writer->allocated, 10);
3422 /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3423 to detect buffer overflow */
3424 writer->small_buffer[writer->allocated] = 0;
3425 #else
3426 writer->allocated = sizeof(writer->small_buffer);
3427 #endif
3428 return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3429 }
3430
3431 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3432 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3433 {
3434 Py_ssize_t size;
3435 PyObject *result;
3436
3437 assert(_PyBytesWriter_CheckConsistency(writer, str));
3438
3439 size = _PyBytesWriter_GetSize(writer, str);
3440 if (size == 0 && !writer->use_bytearray) {
3441 Py_CLEAR(writer->buffer);
3442 /* Get the empty byte string singleton */
3443 result = PyBytes_FromStringAndSize(NULL, 0);
3444 }
3445 else if (writer->use_small_buffer) {
3446 if (writer->use_bytearray) {
3447 result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3448 }
3449 else {
3450 result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3451 }
3452 }
3453 else {
3454 result = writer->buffer;
3455 writer->buffer = NULL;
3456
3457 if (size != writer->allocated) {
3458 if (writer->use_bytearray) {
3459 if (PyByteArray_Resize(result, size)) {
3460 Py_DECREF(result);
3461 return NULL;
3462 }
3463 }
3464 else {
3465 if (_PyBytes_Resize(&result, size)) {
3466 assert(result == NULL);
3467 return NULL;
3468 }
3469 }
3470 }
3471 }
3472 return result;
3473 }
3474
3475 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3476 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3477 const void *bytes, Py_ssize_t size)
3478 {
3479 char *str = (char *)ptr;
3480
3481 str = _PyBytesWriter_Prepare(writer, str, size);
3482 if (str == NULL)
3483 return NULL;
3484
3485 memcpy(str, bytes, size);
3486 str += size;
3487
3488 return str;
3489 }
3490