1 /* bytes object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include "pycore_object.h"
7 #include "pycore_pymem.h"
8 #include "pycore_pystate.h"
9 
10 #include "bytes_methods.h"
11 #include "pystrhex.h"
12 #include <stddef.h>
13 
14 /*[clinic input]
15 class bytes "PyBytesObject *" "&PyBytes_Type"
16 [clinic start generated code]*/
17 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
18 
19 #include "clinic/bytesobject.c.h"
20 
21 #ifdef COUNT_ALLOCS
22 Py_ssize_t _Py_null_strings, _Py_one_strings;
23 #endif
24 
25 static PyBytesObject *characters[UCHAR_MAX + 1];
26 static PyBytesObject *nullstring;
27 
28 /* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
29    for a string of length n should request PyBytesObject_SIZE + n bytes.
30 
31    Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
32    3 bytes per string allocation on a typical system.
33 */
34 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
35 
36 /* Forward declaration */
37 Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
38                                                    char *str);
39 
40 /*
41    For PyBytes_FromString(), the parameter `str' points to a null-terminated
42    string containing exactly `size' bytes.
43 
44    For PyBytes_FromStringAndSize(), the parameter `str' is
45    either NULL or else points to a string containing at least `size' bytes.
46    For PyBytes_FromStringAndSize(), the string in the `str' parameter does
47    not have to be null-terminated.  (Therefore it is safe to construct a
48    substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
49    If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
50    bytes (setting the last byte to the null terminating character) and you can
51    fill in the data yourself.  If `str' is non-NULL then the resulting
52    PyBytes object must be treated as immutable and you must not fill in nor
53    alter the data yourself, since the strings may be shared.
54 
55    The PyObject member `op->ob_size', which denotes the number of "extra
56    items" in a variable-size object, will contain the number of bytes
57    allocated for string data, not counting the null terminating character.
58    It is therefore equal to the `size' parameter (for
59    PyBytes_FromStringAndSize()) or the length of the string in the `str'
60    parameter (for PyBytes_FromString()).
61 */
62 static PyObject *
_PyBytes_FromSize(Py_ssize_t size,int use_calloc)63 _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
64 {
65     PyBytesObject *op;
66     assert(size >= 0);
67 
68     if (size == 0 && (op = nullstring) != NULL) {
69 #ifdef COUNT_ALLOCS
70         _Py_null_strings++;
71 #endif
72         Py_INCREF(op);
73         return (PyObject *)op;
74     }
75 
76     if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
77         PyErr_SetString(PyExc_OverflowError,
78                         "byte string is too large");
79         return NULL;
80     }
81 
82     /* Inline PyObject_NewVar */
83     if (use_calloc)
84         op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
85     else
86         op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
87     if (op == NULL)
88         return PyErr_NoMemory();
89     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
90     op->ob_shash = -1;
91     if (!use_calloc)
92         op->ob_sval[size] = '\0';
93     /* empty byte string singleton */
94     if (size == 0) {
95         nullstring = op;
96         Py_INCREF(op);
97     }
98     return (PyObject *) op;
99 }
100 
101 PyObject *
PyBytes_FromStringAndSize(const char * str,Py_ssize_t size)102 PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
103 {
104     PyBytesObject *op;
105     if (size < 0) {
106         PyErr_SetString(PyExc_SystemError,
107             "Negative size passed to PyBytes_FromStringAndSize");
108         return NULL;
109     }
110     if (size == 1 && str != NULL &&
111         (op = characters[*str & UCHAR_MAX]) != NULL)
112     {
113 #ifdef COUNT_ALLOCS
114         _Py_one_strings++;
115 #endif
116         Py_INCREF(op);
117         return (PyObject *)op;
118     }
119 
120     op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
121     if (op == NULL)
122         return NULL;
123     if (str == NULL)
124         return (PyObject *) op;
125 
126     memcpy(op->ob_sval, str, size);
127     /* share short strings */
128     if (size == 1) {
129         characters[*str & UCHAR_MAX] = op;
130         Py_INCREF(op);
131     }
132     return (PyObject *) op;
133 }
134 
135 PyObject *
PyBytes_FromString(const char * str)136 PyBytes_FromString(const char *str)
137 {
138     size_t size;
139     PyBytesObject *op;
140 
141     assert(str != NULL);
142     size = strlen(str);
143     if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
144         PyErr_SetString(PyExc_OverflowError,
145             "byte string is too long");
146         return NULL;
147     }
148     if (size == 0 && (op = nullstring) != NULL) {
149 #ifdef COUNT_ALLOCS
150         _Py_null_strings++;
151 #endif
152         Py_INCREF(op);
153         return (PyObject *)op;
154     }
155     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
156 #ifdef COUNT_ALLOCS
157         _Py_one_strings++;
158 #endif
159         Py_INCREF(op);
160         return (PyObject *)op;
161     }
162 
163     /* Inline PyObject_NewVar */
164     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
165     if (op == NULL)
166         return PyErr_NoMemory();
167     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
168     op->ob_shash = -1;
169     memcpy(op->ob_sval, str, size+1);
170     /* share short strings */
171     if (size == 0) {
172         nullstring = op;
173         Py_INCREF(op);
174     } else if (size == 1) {
175         characters[*str & UCHAR_MAX] = op;
176         Py_INCREF(op);
177     }
178     return (PyObject *) op;
179 }
180 
181 PyObject *
PyBytes_FromFormatV(const char * format,va_list vargs)182 PyBytes_FromFormatV(const char *format, va_list vargs)
183 {
184     char *s;
185     const char *f;
186     const char *p;
187     Py_ssize_t prec;
188     int longflag;
189     int size_tflag;
190     /* Longest 64-bit formatted numbers:
191        - "18446744073709551615\0" (21 bytes)
192        - "-9223372036854775808\0" (21 bytes)
193        Decimal takes the most space (it isn't enough for octal.)
194 
195        Longest 64-bit pointer representation:
196        "0xffffffffffffffff\0" (19 bytes). */
197     char buffer[21];
198     _PyBytesWriter writer;
199 
200     _PyBytesWriter_Init(&writer);
201 
202     s = _PyBytesWriter_Alloc(&writer, strlen(format));
203     if (s == NULL)
204         return NULL;
205     writer.overallocate = 1;
206 
207 #define WRITE_BYTES(str) \
208     do { \
209         s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
210         if (s == NULL) \
211             goto error; \
212     } while (0)
213 
214     for (f = format; *f; f++) {
215         if (*f != '%') {
216             *s++ = *f;
217             continue;
218         }
219 
220         p = f++;
221 
222         /* ignore the width (ex: 10 in "%10s") */
223         while (Py_ISDIGIT(*f))
224             f++;
225 
226         /* parse the precision (ex: 10 in "%.10s") */
227         prec = 0;
228         if (*f == '.') {
229             f++;
230             for (; Py_ISDIGIT(*f); f++) {
231                 prec = (prec * 10) + (*f - '0');
232             }
233         }
234 
235         while (*f && *f != '%' && !Py_ISALPHA(*f))
236             f++;
237 
238         /* handle the long flag ('l'), but only for %ld and %lu.
239            others can be added when necessary. */
240         longflag = 0;
241         if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
242             longflag = 1;
243             ++f;
244         }
245 
246         /* handle the size_t flag ('z'). */
247         size_tflag = 0;
248         if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
249             size_tflag = 1;
250             ++f;
251         }
252 
253         /* subtract bytes preallocated for the format string
254            (ex: 2 for "%s") */
255         writer.min_size -= (f - p + 1);
256 
257         switch (*f) {
258         case 'c':
259         {
260             int c = va_arg(vargs, int);
261             if (c < 0 || c > 255) {
262                 PyErr_SetString(PyExc_OverflowError,
263                                 "PyBytes_FromFormatV(): %c format "
264                                 "expects an integer in range [0; 255]");
265                 goto error;
266             }
267             writer.min_size++;
268             *s++ = (unsigned char)c;
269             break;
270         }
271 
272         case 'd':
273             if (longflag)
274                 sprintf(buffer, "%ld", va_arg(vargs, long));
275             else if (size_tflag)
276                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
277                     va_arg(vargs, Py_ssize_t));
278             else
279                 sprintf(buffer, "%d", va_arg(vargs, int));
280             assert(strlen(buffer) < sizeof(buffer));
281             WRITE_BYTES(buffer);
282             break;
283 
284         case 'u':
285             if (longflag)
286                 sprintf(buffer, "%lu",
287                     va_arg(vargs, unsigned long));
288             else if (size_tflag)
289                 sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
290                     va_arg(vargs, size_t));
291             else
292                 sprintf(buffer, "%u",
293                     va_arg(vargs, unsigned int));
294             assert(strlen(buffer) < sizeof(buffer));
295             WRITE_BYTES(buffer);
296             break;
297 
298         case 'i':
299             sprintf(buffer, "%i", va_arg(vargs, int));
300             assert(strlen(buffer) < sizeof(buffer));
301             WRITE_BYTES(buffer);
302             break;
303 
304         case 'x':
305             sprintf(buffer, "%x", va_arg(vargs, int));
306             assert(strlen(buffer) < sizeof(buffer));
307             WRITE_BYTES(buffer);
308             break;
309 
310         case 's':
311         {
312             Py_ssize_t i;
313 
314             p = va_arg(vargs, const char*);
315             if (prec <= 0) {
316                 i = strlen(p);
317             }
318             else {
319                 i = 0;
320                 while (i < prec && p[i]) {
321                     i++;
322                 }
323             }
324             s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
325             if (s == NULL)
326                 goto error;
327             break;
328         }
329 
330         case 'p':
331             sprintf(buffer, "%p", va_arg(vargs, void*));
332             assert(strlen(buffer) < sizeof(buffer));
333             /* %p is ill-defined:  ensure leading 0x. */
334             if (buffer[1] == 'X')
335                 buffer[1] = 'x';
336             else if (buffer[1] != 'x') {
337                 memmove(buffer+2, buffer, strlen(buffer)+1);
338                 buffer[0] = '0';
339                 buffer[1] = 'x';
340             }
341             WRITE_BYTES(buffer);
342             break;
343 
344         case '%':
345             writer.min_size++;
346             *s++ = '%';
347             break;
348 
349         default:
350             if (*f == 0) {
351                 /* fix min_size if we reached the end of the format string */
352                 writer.min_size++;
353             }
354 
355             /* invalid format string: copy unformatted string and exit */
356             WRITE_BYTES(p);
357             return _PyBytesWriter_Finish(&writer, s);
358         }
359     }
360 
361 #undef WRITE_BYTES
362 
363     return _PyBytesWriter_Finish(&writer, s);
364 
365  error:
366     _PyBytesWriter_Dealloc(&writer);
367     return NULL;
368 }
369 
370 PyObject *
PyBytes_FromFormat(const char * format,...)371 PyBytes_FromFormat(const char *format, ...)
372 {
373     PyObject* ret;
374     va_list vargs;
375 
376 #ifdef HAVE_STDARG_PROTOTYPES
377     va_start(vargs, format);
378 #else
379     va_start(vargs);
380 #endif
381     ret = PyBytes_FromFormatV(format, vargs);
382     va_end(vargs);
383     return ret;
384 }
385 
386 /* Helpers for formatstring */
387 
388 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)389 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
390 {
391     Py_ssize_t argidx = *p_argidx;
392     if (argidx < arglen) {
393         (*p_argidx)++;
394         if (arglen < 0)
395             return args;
396         else
397             return PyTuple_GetItem(args, argidx);
398     }
399     PyErr_SetString(PyExc_TypeError,
400                     "not enough arguments for format string");
401     return NULL;
402 }
403 
404 /* Format codes
405  * F_LJUST      '-'
406  * F_SIGN       '+'
407  * F_BLANK      ' '
408  * F_ALT        '#'
409  * F_ZERO       '0'
410  */
411 #define F_LJUST (1<<0)
412 #define F_SIGN  (1<<1)
413 #define F_BLANK (1<<2)
414 #define F_ALT   (1<<3)
415 #define F_ZERO  (1<<4)
416 
417 /* Returns a new reference to a PyBytes object, or NULL on failure. */
418 
419 static char*
formatfloat(PyObject * v,int flags,int prec,int type,PyObject ** p_result,_PyBytesWriter * writer,char * str)420 formatfloat(PyObject *v, int flags, int prec, int type,
421             PyObject **p_result, _PyBytesWriter *writer, char *str)
422 {
423     char *p;
424     PyObject *result;
425     double x;
426     size_t len;
427 
428     x = PyFloat_AsDouble(v);
429     if (x == -1.0 && PyErr_Occurred()) {
430         PyErr_Format(PyExc_TypeError, "float argument required, "
431                      "not %.200s", Py_TYPE(v)->tp_name);
432         return NULL;
433     }
434 
435     if (prec < 0)
436         prec = 6;
437 
438     p = PyOS_double_to_string(x, type, prec,
439                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
440 
441     if (p == NULL)
442         return NULL;
443 
444     len = strlen(p);
445     if (writer != NULL) {
446         str = _PyBytesWriter_Prepare(writer, str, len);
447         if (str == NULL)
448             return NULL;
449         memcpy(str, p, len);
450         PyMem_Free(p);
451         str += len;
452         return str;
453     }
454 
455     result = PyBytes_FromStringAndSize(p, len);
456     PyMem_Free(p);
457     *p_result = result;
458     return result != NULL ? str : NULL;
459 }
460 
461 static PyObject *
formatlong(PyObject * v,int flags,int prec,int type)462 formatlong(PyObject *v, int flags, int prec, int type)
463 {
464     PyObject *result, *iobj;
465     if (type == 'i')
466         type = 'd';
467     if (PyLong_Check(v))
468         return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
469     if (PyNumber_Check(v)) {
470         /* make sure number is a type of integer for o, x, and X */
471         if (type == 'o' || type == 'x' || type == 'X')
472             iobj = PyNumber_Index(v);
473         else
474             iobj = PyNumber_Long(v);
475         if (iobj == NULL) {
476             if (!PyErr_ExceptionMatches(PyExc_TypeError))
477                 return NULL;
478         }
479         else if (!PyLong_Check(iobj))
480             Py_CLEAR(iobj);
481         if (iobj != NULL) {
482             result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
483             Py_DECREF(iobj);
484             return result;
485         }
486     }
487     PyErr_Format(PyExc_TypeError,
488         "%%%c format: %s is required, not %.200s", type,
489         (type == 'o' || type == 'x' || type == 'X') ? "an integer"
490                                                     : "a number",
491         Py_TYPE(v)->tp_name);
492     return NULL;
493 }
494 
495 static int
byte_converter(PyObject * arg,char * p)496 byte_converter(PyObject *arg, char *p)
497 {
498     if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
499         *p = PyBytes_AS_STRING(arg)[0];
500         return 1;
501     }
502     else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
503         *p = PyByteArray_AS_STRING(arg)[0];
504         return 1;
505     }
506     else {
507         PyObject *iobj;
508         long ival;
509         int overflow;
510         /* make sure number is a type of integer */
511         if (PyLong_Check(arg)) {
512             ival = PyLong_AsLongAndOverflow(arg, &overflow);
513         }
514         else {
515             iobj = PyNumber_Index(arg);
516             if (iobj == NULL) {
517                 if (!PyErr_ExceptionMatches(PyExc_TypeError))
518                     return 0;
519                 goto onError;
520             }
521             ival = PyLong_AsLongAndOverflow(iobj, &overflow);
522             Py_DECREF(iobj);
523         }
524         if (!overflow && ival == -1 && PyErr_Occurred())
525             goto onError;
526         if (overflow || !(0 <= ival && ival <= 255)) {
527             PyErr_SetString(PyExc_OverflowError,
528                             "%c arg not in range(256)");
529             return 0;
530         }
531         *p = (char)ival;
532         return 1;
533     }
534   onError:
535     PyErr_SetString(PyExc_TypeError,
536         "%c requires an integer in range(256) or a single byte");
537     return 0;
538 }
539 
540 static PyObject *_PyBytes_FromBuffer(PyObject *x);
541 
542 static PyObject *
format_obj(PyObject * v,const char ** pbuf,Py_ssize_t * plen)543 format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
544 {
545     PyObject *func, *result;
546     _Py_IDENTIFIER(__bytes__);
547     /* is it a bytes object? */
548     if (PyBytes_Check(v)) {
549         *pbuf = PyBytes_AS_STRING(v);
550         *plen = PyBytes_GET_SIZE(v);
551         Py_INCREF(v);
552         return v;
553     }
554     if (PyByteArray_Check(v)) {
555         *pbuf = PyByteArray_AS_STRING(v);
556         *plen = PyByteArray_GET_SIZE(v);
557         Py_INCREF(v);
558         return v;
559     }
560     /* does it support __bytes__? */
561     func = _PyObject_LookupSpecial(v, &PyId___bytes__);
562     if (func != NULL) {
563         result = _PyObject_CallNoArg(func);
564         Py_DECREF(func);
565         if (result == NULL)
566             return NULL;
567         if (!PyBytes_Check(result)) {
568             PyErr_Format(PyExc_TypeError,
569                          "__bytes__ returned non-bytes (type %.200s)",
570                          Py_TYPE(result)->tp_name);
571             Py_DECREF(result);
572             return NULL;
573         }
574         *pbuf = PyBytes_AS_STRING(result);
575         *plen = PyBytes_GET_SIZE(result);
576         return result;
577     }
578     /* does it support buffer protocol? */
579     if (PyObject_CheckBuffer(v)) {
580         /* maybe we can avoid making a copy of the buffer object here? */
581         result = _PyBytes_FromBuffer(v);
582         if (result == NULL)
583             return NULL;
584         *pbuf = PyBytes_AS_STRING(result);
585         *plen = PyBytes_GET_SIZE(result);
586         return result;
587     }
588     PyErr_Format(PyExc_TypeError,
589                  "%%b requires a bytes-like object, "
590                  "or an object that implements __bytes__, not '%.100s'",
591                  Py_TYPE(v)->tp_name);
592     return NULL;
593 }
594 
595 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
596 
597 PyObject *
_PyBytes_FormatEx(const char * format,Py_ssize_t format_len,PyObject * args,int use_bytearray)598 _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
599                   PyObject *args, int use_bytearray)
600 {
601     const char *fmt;
602     char *res;
603     Py_ssize_t arglen, argidx;
604     Py_ssize_t fmtcnt;
605     int args_owned = 0;
606     PyObject *dict = NULL;
607     _PyBytesWriter writer;
608 
609     if (args == NULL) {
610         PyErr_BadInternalCall();
611         return NULL;
612     }
613     fmt = format;
614     fmtcnt = format_len;
615 
616     _PyBytesWriter_Init(&writer);
617     writer.use_bytearray = use_bytearray;
618 
619     res = _PyBytesWriter_Alloc(&writer, fmtcnt);
620     if (res == NULL)
621         return NULL;
622     if (!use_bytearray)
623         writer.overallocate = 1;
624 
625     if (PyTuple_Check(args)) {
626         arglen = PyTuple_GET_SIZE(args);
627         argidx = 0;
628     }
629     else {
630         arglen = -1;
631         argidx = -2;
632     }
633     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
634         !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
635         !PyByteArray_Check(args)) {
636             dict = args;
637     }
638 
639     while (--fmtcnt >= 0) {
640         if (*fmt != '%') {
641             Py_ssize_t len;
642             char *pos;
643 
644             pos = (char *)memchr(fmt + 1, '%', fmtcnt);
645             if (pos != NULL)
646                 len = pos - fmt;
647             else
648                 len = fmtcnt + 1;
649             assert(len != 0);
650 
651             memcpy(res, fmt, len);
652             res += len;
653             fmt += len;
654             fmtcnt -= (len - 1);
655         }
656         else {
657             /* Got a format specifier */
658             int flags = 0;
659             Py_ssize_t width = -1;
660             int prec = -1;
661             int c = '\0';
662             int fill;
663             PyObject *v = NULL;
664             PyObject *temp = NULL;
665             const char *pbuf = NULL;
666             int sign;
667             Py_ssize_t len = 0;
668             char onechar; /* For byte_converter() */
669             Py_ssize_t alloc;
670 
671             fmt++;
672             if (*fmt == '%') {
673                 *res++ = '%';
674                 fmt++;
675                 fmtcnt--;
676                 continue;
677             }
678             if (*fmt == '(') {
679                 const char *keystart;
680                 Py_ssize_t keylen;
681                 PyObject *key;
682                 int pcount = 1;
683 
684                 if (dict == NULL) {
685                     PyErr_SetString(PyExc_TypeError,
686                              "format requires a mapping");
687                     goto error;
688                 }
689                 ++fmt;
690                 --fmtcnt;
691                 keystart = fmt;
692                 /* Skip over balanced parentheses */
693                 while (pcount > 0 && --fmtcnt >= 0) {
694                     if (*fmt == ')')
695                         --pcount;
696                     else if (*fmt == '(')
697                         ++pcount;
698                     fmt++;
699                 }
700                 keylen = fmt - keystart - 1;
701                 if (fmtcnt < 0 || pcount > 0) {
702                     PyErr_SetString(PyExc_ValueError,
703                                "incomplete format key");
704                     goto error;
705                 }
706                 key = PyBytes_FromStringAndSize(keystart,
707                                                  keylen);
708                 if (key == NULL)
709                     goto error;
710                 if (args_owned) {
711                     Py_DECREF(args);
712                     args_owned = 0;
713                 }
714                 args = PyObject_GetItem(dict, key);
715                 Py_DECREF(key);
716                 if (args == NULL) {
717                     goto error;
718                 }
719                 args_owned = 1;
720                 arglen = -1;
721                 argidx = -2;
722             }
723 
724             /* Parse flags. Example: "%+i" => flags=F_SIGN. */
725             while (--fmtcnt >= 0) {
726                 switch (c = *fmt++) {
727                 case '-': flags |= F_LJUST; continue;
728                 case '+': flags |= F_SIGN; continue;
729                 case ' ': flags |= F_BLANK; continue;
730                 case '#': flags |= F_ALT; continue;
731                 case '0': flags |= F_ZERO; continue;
732                 }
733                 break;
734             }
735 
736             /* Parse width. Example: "%10s" => width=10 */
737             if (c == '*') {
738                 v = getnextarg(args, arglen, &argidx);
739                 if (v == NULL)
740                     goto error;
741                 if (!PyLong_Check(v)) {
742                     PyErr_SetString(PyExc_TypeError,
743                                     "* wants int");
744                     goto error;
745                 }
746                 width = PyLong_AsSsize_t(v);
747                 if (width == -1 && PyErr_Occurred())
748                     goto error;
749                 if (width < 0) {
750                     flags |= F_LJUST;
751                     width = -width;
752                 }
753                 if (--fmtcnt >= 0)
754                     c = *fmt++;
755             }
756             else if (c >= 0 && isdigit(c)) {
757                 width = c - '0';
758                 while (--fmtcnt >= 0) {
759                     c = Py_CHARMASK(*fmt++);
760                     if (!isdigit(c))
761                         break;
762                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
763                         PyErr_SetString(
764                             PyExc_ValueError,
765                             "width too big");
766                         goto error;
767                     }
768                     width = width*10 + (c - '0');
769                 }
770             }
771 
772             /* Parse precision. Example: "%.3f" => prec=3 */
773             if (c == '.') {
774                 prec = 0;
775                 if (--fmtcnt >= 0)
776                     c = *fmt++;
777                 if (c == '*') {
778                     v = getnextarg(args, arglen, &argidx);
779                     if (v == NULL)
780                         goto error;
781                     if (!PyLong_Check(v)) {
782                         PyErr_SetString(
783                             PyExc_TypeError,
784                             "* wants int");
785                         goto error;
786                     }
787                     prec = _PyLong_AsInt(v);
788                     if (prec == -1 && PyErr_Occurred())
789                         goto error;
790                     if (prec < 0)
791                         prec = 0;
792                     if (--fmtcnt >= 0)
793                         c = *fmt++;
794                 }
795                 else if (c >= 0 && isdigit(c)) {
796                     prec = c - '0';
797                     while (--fmtcnt >= 0) {
798                         c = Py_CHARMASK(*fmt++);
799                         if (!isdigit(c))
800                             break;
801                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
802                             PyErr_SetString(
803                                 PyExc_ValueError,
804                                 "prec too big");
805                             goto error;
806                         }
807                         prec = prec*10 + (c - '0');
808                     }
809                 }
810             } /* prec */
811             if (fmtcnt >= 0) {
812                 if (c == 'h' || c == 'l' || c == 'L') {
813                     if (--fmtcnt >= 0)
814                         c = *fmt++;
815                 }
816             }
817             if (fmtcnt < 0) {
818                 PyErr_SetString(PyExc_ValueError,
819                                 "incomplete format");
820                 goto error;
821             }
822             v = getnextarg(args, arglen, &argidx);
823             if (v == NULL)
824                 goto error;
825 
826             if (fmtcnt == 0) {
827                 /* last write: disable writer overallocation */
828                 writer.overallocate = 0;
829             }
830 
831             sign = 0;
832             fill = ' ';
833             switch (c) {
834             case 'r':
835                 // %r is only for 2/3 code; 3 only code should use %a
836             case 'a':
837                 temp = PyObject_ASCII(v);
838                 if (temp == NULL)
839                     goto error;
840                 assert(PyUnicode_IS_ASCII(temp));
841                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
842                 len = PyUnicode_GET_LENGTH(temp);
843                 if (prec >= 0 && len > prec)
844                     len = prec;
845                 break;
846 
847             case 's':
848                 // %s is only for 2/3 code; 3 only code should use %b
849             case 'b':
850                 temp = format_obj(v, &pbuf, &len);
851                 if (temp == NULL)
852                     goto error;
853                 if (prec >= 0 && len > prec)
854                     len = prec;
855                 break;
856 
857             case 'i':
858             case 'd':
859             case 'u':
860             case 'o':
861             case 'x':
862             case 'X':
863                 if (PyLong_CheckExact(v)
864                     && width == -1 && prec == -1
865                     && !(flags & (F_SIGN | F_BLANK))
866                     && c != 'X')
867                 {
868                     /* Fast path */
869                     int alternate = flags & F_ALT;
870                     int base;
871 
872                     switch(c)
873                     {
874                         default:
875                             Py_UNREACHABLE();
876                         case 'd':
877                         case 'i':
878                         case 'u':
879                             base = 10;
880                             break;
881                         case 'o':
882                             base = 8;
883                             break;
884                         case 'x':
885                         case 'X':
886                             base = 16;
887                             break;
888                     }
889 
890                     /* Fast path */
891                     writer.min_size -= 2; /* size preallocated for "%d" */
892                     res = _PyLong_FormatBytesWriter(&writer, res,
893                                                     v, base, alternate);
894                     if (res == NULL)
895                         goto error;
896                     continue;
897                 }
898 
899                 temp = formatlong(v, flags, prec, c);
900                 if (!temp)
901                     goto error;
902                 assert(PyUnicode_IS_ASCII(temp));
903                 pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
904                 len = PyUnicode_GET_LENGTH(temp);
905                 sign = 1;
906                 if (flags & F_ZERO)
907                     fill = '0';
908                 break;
909 
910             case 'e':
911             case 'E':
912             case 'f':
913             case 'F':
914             case 'g':
915             case 'G':
916                 if (width == -1 && prec == -1
917                     && !(flags & (F_SIGN | F_BLANK)))
918                 {
919                     /* Fast path */
920                     writer.min_size -= 2; /* size preallocated for "%f" */
921                     res = formatfloat(v, flags, prec, c, NULL, &writer, res);
922                     if (res == NULL)
923                         goto error;
924                     continue;
925                 }
926 
927                 if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
928                     goto error;
929                 pbuf = PyBytes_AS_STRING(temp);
930                 len = PyBytes_GET_SIZE(temp);
931                 sign = 1;
932                 if (flags & F_ZERO)
933                     fill = '0';
934                 break;
935 
936             case 'c':
937                 pbuf = &onechar;
938                 len = byte_converter(v, &onechar);
939                 if (!len)
940                     goto error;
941                 if (width == -1) {
942                     /* Fast path */
943                     *res++ = onechar;
944                     continue;
945                 }
946                 break;
947 
948             default:
949                 PyErr_Format(PyExc_ValueError,
950                   "unsupported format character '%c' (0x%x) "
951                   "at index %zd",
952                   c, c,
953                   (Py_ssize_t)(fmt - 1 - format));
954                 goto error;
955             }
956 
957             if (sign) {
958                 if (*pbuf == '-' || *pbuf == '+') {
959                     sign = *pbuf++;
960                     len--;
961                 }
962                 else if (flags & F_SIGN)
963                     sign = '+';
964                 else if (flags & F_BLANK)
965                     sign = ' ';
966                 else
967                     sign = 0;
968             }
969             if (width < len)
970                 width = len;
971 
972             alloc = width;
973             if (sign != 0 && len == width)
974                 alloc++;
975             /* 2: size preallocated for %s */
976             if (alloc > 2) {
977                 res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
978                 if (res == NULL)
979                     goto error;
980             }
981 #ifndef NDEBUG
982             char *before = res;
983 #endif
984 
985             /* Write the sign if needed */
986             if (sign) {
987                 if (fill != ' ')
988                     *res++ = sign;
989                 if (width > len)
990                     width--;
991             }
992 
993             /* Write the numeric prefix for "x", "X" and "o" formats
994                if the alternate form is used.
995                For example, write "0x" for the "%#x" format. */
996             if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
997                 assert(pbuf[0] == '0');
998                 assert(pbuf[1] == c);
999                 if (fill != ' ') {
1000                     *res++ = *pbuf++;
1001                     *res++ = *pbuf++;
1002                 }
1003                 width -= 2;
1004                 if (width < 0)
1005                     width = 0;
1006                 len -= 2;
1007             }
1008 
1009             /* Pad left with the fill character if needed */
1010             if (width > len && !(flags & F_LJUST)) {
1011                 memset(res, fill, width - len);
1012                 res += (width - len);
1013                 width = len;
1014             }
1015 
1016             /* If padding with spaces: write sign if needed and/or numeric
1017                prefix if the alternate form is used */
1018             if (fill == ' ') {
1019                 if (sign)
1020                     *res++ = sign;
1021                 if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1022                     assert(pbuf[0] == '0');
1023                     assert(pbuf[1] == c);
1024                     *res++ = *pbuf++;
1025                     *res++ = *pbuf++;
1026                 }
1027             }
1028 
1029             /* Copy bytes */
1030             memcpy(res, pbuf, len);
1031             res += len;
1032 
1033             /* Pad right with the fill character if needed */
1034             if (width > len) {
1035                 memset(res, ' ', width - len);
1036                 res += (width - len);
1037             }
1038 
1039             if (dict && (argidx < arglen)) {
1040                 PyErr_SetString(PyExc_TypeError,
1041                            "not all arguments converted during bytes formatting");
1042                 Py_XDECREF(temp);
1043                 goto error;
1044             }
1045             Py_XDECREF(temp);
1046 
1047 #ifndef NDEBUG
1048             /* check that we computed the exact size for this write */
1049             assert((res - before) == alloc);
1050 #endif
1051         } /* '%' */
1052 
1053         /* If overallocation was disabled, ensure that it was the last
1054            write. Otherwise, we missed an optimization */
1055         assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1056     } /* until end */
1057 
1058     if (argidx < arglen && !dict) {
1059         PyErr_SetString(PyExc_TypeError,
1060                         "not all arguments converted during bytes formatting");
1061         goto error;
1062     }
1063 
1064     if (args_owned) {
1065         Py_DECREF(args);
1066     }
1067     return _PyBytesWriter_Finish(&writer, res);
1068 
1069  error:
1070     _PyBytesWriter_Dealloc(&writer);
1071     if (args_owned) {
1072         Py_DECREF(args);
1073     }
1074     return NULL;
1075 }
1076 
1077 /* Unescape a backslash-escaped string. If unicode is non-zero,
1078    the string is a u-literal. If recode_encoding is non-zero,
1079    the string is UTF-8 encoded and should be re-encoded in the
1080    specified encoding.  */
1081 
1082 static char *
_PyBytes_DecodeEscapeRecode(const char ** s,const char * end,const char * errors,const char * recode_encoding,_PyBytesWriter * writer,char * p)1083 _PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1084                             const char *errors, const char *recode_encoding,
1085                             _PyBytesWriter *writer, char *p)
1086 {
1087     PyObject *u, *w;
1088     const char* t;
1089 
1090     t = *s;
1091     /* Decode non-ASCII bytes as UTF-8. */
1092     while (t < end && (*t & 0x80))
1093         t++;
1094     u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1095     if (u == NULL)
1096         return NULL;
1097 
1098     /* Recode them in target encoding. */
1099     w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1100     Py_DECREF(u);
1101     if  (w == NULL)
1102         return NULL;
1103     assert(PyBytes_Check(w));
1104 
1105     /* Append bytes to output buffer. */
1106     writer->min_size--;   /* subtract 1 preallocated byte */
1107     p = _PyBytesWriter_WriteBytes(writer, p,
1108                                   PyBytes_AS_STRING(w),
1109                                   PyBytes_GET_SIZE(w));
1110     Py_DECREF(w);
1111     if (p == NULL)
1112         return NULL;
1113 
1114     *s = t;
1115     return p;
1116 }
1117 
_PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding,const char ** first_invalid_escape)1118 PyObject *_PyBytes_DecodeEscape(const char *s,
1119                                 Py_ssize_t len,
1120                                 const char *errors,
1121                                 Py_ssize_t unicode,
1122                                 const char *recode_encoding,
1123                                 const char **first_invalid_escape)
1124 {
1125     int c;
1126     char *p;
1127     const char *end;
1128     _PyBytesWriter writer;
1129 
1130     _PyBytesWriter_Init(&writer);
1131 
1132     p = _PyBytesWriter_Alloc(&writer, len);
1133     if (p == NULL)
1134         return NULL;
1135     writer.overallocate = 1;
1136 
1137     *first_invalid_escape = NULL;
1138 
1139     end = s + len;
1140     while (s < end) {
1141         if (*s != '\\') {
1142           non_esc:
1143             if (!(recode_encoding && (*s & 0x80))) {
1144                 *p++ = *s++;
1145             }
1146             else {
1147                 /* non-ASCII character and need to recode */
1148                 p = _PyBytes_DecodeEscapeRecode(&s, end,
1149                                                 errors, recode_encoding,
1150                                                 &writer, p);
1151                 if (p == NULL)
1152                     goto failed;
1153             }
1154             continue;
1155         }
1156 
1157         s++;
1158         if (s == end) {
1159             PyErr_SetString(PyExc_ValueError,
1160                             "Trailing \\ in string");
1161             goto failed;
1162         }
1163 
1164         switch (*s++) {
1165         /* XXX This assumes ASCII! */
1166         case '\n': break;
1167         case '\\': *p++ = '\\'; break;
1168         case '\'': *p++ = '\''; break;
1169         case '\"': *p++ = '\"'; break;
1170         case 'b': *p++ = '\b'; break;
1171         case 'f': *p++ = '\014'; break; /* FF */
1172         case 't': *p++ = '\t'; break;
1173         case 'n': *p++ = '\n'; break;
1174         case 'r': *p++ = '\r'; break;
1175         case 'v': *p++ = '\013'; break; /* VT */
1176         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1177         case '0': case '1': case '2': case '3':
1178         case '4': case '5': case '6': case '7':
1179             c = s[-1] - '0';
1180             if (s < end && '0' <= *s && *s <= '7') {
1181                 c = (c<<3) + *s++ - '0';
1182                 if (s < end && '0' <= *s && *s <= '7')
1183                     c = (c<<3) + *s++ - '0';
1184             }
1185             *p++ = c;
1186             break;
1187         case 'x':
1188             if (s+1 < end) {
1189                 int digit1, digit2;
1190                 digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1191                 digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1192                 if (digit1 < 16 && digit2 < 16) {
1193                     *p++ = (unsigned char)((digit1 << 4) + digit2);
1194                     s += 2;
1195                     break;
1196                 }
1197             }
1198             /* invalid hexadecimal digits */
1199 
1200             if (!errors || strcmp(errors, "strict") == 0) {
1201                 PyErr_Format(PyExc_ValueError,
1202                              "invalid \\x escape at position %zd",
1203                              s - 2 - (end - len));
1204                 goto failed;
1205             }
1206             if (strcmp(errors, "replace") == 0) {
1207                 *p++ = '?';
1208             } else if (strcmp(errors, "ignore") == 0)
1209                 /* do nothing */;
1210             else {
1211                 PyErr_Format(PyExc_ValueError,
1212                              "decoding error; unknown "
1213                              "error handling code: %.400s",
1214                              errors);
1215                 goto failed;
1216             }
1217             /* skip \x */
1218             if (s < end && Py_ISXDIGIT(s[0]))
1219                 s++; /* and a hexdigit */
1220             break;
1221 
1222         default:
1223             if (*first_invalid_escape == NULL) {
1224                 *first_invalid_escape = s-1; /* Back up one char, since we've
1225                                                 already incremented s. */
1226             }
1227             *p++ = '\\';
1228             s--;
1229             goto non_esc; /* an arbitrary number of unescaped
1230                              UTF-8 bytes may follow. */
1231         }
1232     }
1233 
1234     return _PyBytesWriter_Finish(&writer, p);
1235 
1236   failed:
1237     _PyBytesWriter_Dealloc(&writer);
1238     return NULL;
1239 }
1240 
PyBytes_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)1241 PyObject *PyBytes_DecodeEscape(const char *s,
1242                                 Py_ssize_t len,
1243                                 const char *errors,
1244                                 Py_ssize_t unicode,
1245                                 const char *recode_encoding)
1246 {
1247     const char* first_invalid_escape;
1248     PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1249                                              recode_encoding,
1250                                              &first_invalid_escape);
1251     if (result == NULL)
1252         return NULL;
1253     if (first_invalid_escape != NULL) {
1254         if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1255                              "invalid escape sequence '\\%c'",
1256                              (unsigned char)*first_invalid_escape) < 0) {
1257             Py_DECREF(result);
1258             return NULL;
1259         }
1260     }
1261     return result;
1262 
1263 }
1264 /* -------------------------------------------------------------------- */
1265 /* object api */
1266 
1267 Py_ssize_t
PyBytes_Size(PyObject * op)1268 PyBytes_Size(PyObject *op)
1269 {
1270     if (!PyBytes_Check(op)) {
1271         PyErr_Format(PyExc_TypeError,
1272              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1273         return -1;
1274     }
1275     return Py_SIZE(op);
1276 }
1277 
1278 char *
PyBytes_AsString(PyObject * op)1279 PyBytes_AsString(PyObject *op)
1280 {
1281     if (!PyBytes_Check(op)) {
1282         PyErr_Format(PyExc_TypeError,
1283              "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1284         return NULL;
1285     }
1286     return ((PyBytesObject *)op)->ob_sval;
1287 }
1288 
1289 int
PyBytes_AsStringAndSize(PyObject * obj,char ** s,Py_ssize_t * len)1290 PyBytes_AsStringAndSize(PyObject *obj,
1291                          char **s,
1292                          Py_ssize_t *len)
1293 {
1294     if (s == NULL) {
1295         PyErr_BadInternalCall();
1296         return -1;
1297     }
1298 
1299     if (!PyBytes_Check(obj)) {
1300         PyErr_Format(PyExc_TypeError,
1301              "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1302         return -1;
1303     }
1304 
1305     *s = PyBytes_AS_STRING(obj);
1306     if (len != NULL)
1307         *len = PyBytes_GET_SIZE(obj);
1308     else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1309         PyErr_SetString(PyExc_ValueError,
1310                         "embedded null byte");
1311         return -1;
1312     }
1313     return 0;
1314 }
1315 
1316 /* -------------------------------------------------------------------- */
1317 /* Methods */
1318 
1319 #include "stringlib/stringdefs.h"
1320 
1321 #include "stringlib/fastsearch.h"
1322 #include "stringlib/count.h"
1323 #include "stringlib/find.h"
1324 #include "stringlib/join.h"
1325 #include "stringlib/partition.h"
1326 #include "stringlib/split.h"
1327 #include "stringlib/ctype.h"
1328 
1329 #include "stringlib/transmogrify.h"
1330 
1331 PyObject *
PyBytes_Repr(PyObject * obj,int smartquotes)1332 PyBytes_Repr(PyObject *obj, int smartquotes)
1333 {
1334     PyBytesObject* op = (PyBytesObject*) obj;
1335     Py_ssize_t i, length = Py_SIZE(op);
1336     Py_ssize_t newsize, squotes, dquotes;
1337     PyObject *v;
1338     unsigned char quote, *s, *p;
1339 
1340     /* Compute size of output string */
1341     squotes = dquotes = 0;
1342     newsize = 3; /* b'' */
1343     s = (unsigned char*)op->ob_sval;
1344     for (i = 0; i < length; i++) {
1345         Py_ssize_t incr = 1;
1346         switch(s[i]) {
1347         case '\'': squotes++; break;
1348         case '"':  dquotes++; break;
1349         case '\\': case '\t': case '\n': case '\r':
1350             incr = 2; break; /* \C */
1351         default:
1352             if (s[i] < ' ' || s[i] >= 0x7f)
1353                 incr = 4; /* \xHH */
1354         }
1355         if (newsize > PY_SSIZE_T_MAX - incr)
1356             goto overflow;
1357         newsize += incr;
1358     }
1359     quote = '\'';
1360     if (smartquotes && squotes && !dquotes)
1361         quote = '"';
1362     if (squotes && quote == '\'') {
1363         if (newsize > PY_SSIZE_T_MAX - squotes)
1364             goto overflow;
1365         newsize += squotes;
1366     }
1367 
1368     v = PyUnicode_New(newsize, 127);
1369     if (v == NULL) {
1370         return NULL;
1371     }
1372     p = PyUnicode_1BYTE_DATA(v);
1373 
1374     *p++ = 'b', *p++ = quote;
1375     for (i = 0; i < length; i++) {
1376         unsigned char c = op->ob_sval[i];
1377         if (c == quote || c == '\\')
1378             *p++ = '\\', *p++ = c;
1379         else if (c == '\t')
1380             *p++ = '\\', *p++ = 't';
1381         else if (c == '\n')
1382             *p++ = '\\', *p++ = 'n';
1383         else if (c == '\r')
1384             *p++ = '\\', *p++ = 'r';
1385         else if (c < ' ' || c >= 0x7f) {
1386             *p++ = '\\';
1387             *p++ = 'x';
1388             *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1389             *p++ = Py_hexdigits[c & 0xf];
1390         }
1391         else
1392             *p++ = c;
1393     }
1394     *p++ = quote;
1395     assert(_PyUnicode_CheckConsistency(v, 1));
1396     return v;
1397 
1398   overflow:
1399     PyErr_SetString(PyExc_OverflowError,
1400                     "bytes object is too large to make repr");
1401     return NULL;
1402 }
1403 
1404 static PyObject *
bytes_repr(PyObject * op)1405 bytes_repr(PyObject *op)
1406 {
1407     return PyBytes_Repr(op, 1);
1408 }
1409 
1410 static PyObject *
bytes_str(PyObject * op)1411 bytes_str(PyObject *op)
1412 {
1413     PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
1414     if (config->bytes_warning) {
1415         if (PyErr_WarnEx(PyExc_BytesWarning,
1416                          "str() on a bytes instance", 1)) {
1417             return NULL;
1418         }
1419     }
1420     return bytes_repr(op);
1421 }
1422 
1423 static Py_ssize_t
bytes_length(PyBytesObject * a)1424 bytes_length(PyBytesObject *a)
1425 {
1426     return Py_SIZE(a);
1427 }
1428 
1429 /* This is also used by PyBytes_Concat() */
1430 static PyObject *
bytes_concat(PyObject * a,PyObject * b)1431 bytes_concat(PyObject *a, PyObject *b)
1432 {
1433     Py_buffer va, vb;
1434     PyObject *result = NULL;
1435 
1436     va.len = -1;
1437     vb.len = -1;
1438     if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1439         PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1440         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1441                      Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1442         goto done;
1443     }
1444 
1445     /* Optimize end cases */
1446     if (va.len == 0 && PyBytes_CheckExact(b)) {
1447         result = b;
1448         Py_INCREF(result);
1449         goto done;
1450     }
1451     if (vb.len == 0 && PyBytes_CheckExact(a)) {
1452         result = a;
1453         Py_INCREF(result);
1454         goto done;
1455     }
1456 
1457     if (va.len > PY_SSIZE_T_MAX - vb.len) {
1458         PyErr_NoMemory();
1459         goto done;
1460     }
1461 
1462     result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1463     if (result != NULL) {
1464         memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1465         memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1466     }
1467 
1468   done:
1469     if (va.len != -1)
1470         PyBuffer_Release(&va);
1471     if (vb.len != -1)
1472         PyBuffer_Release(&vb);
1473     return result;
1474 }
1475 
1476 static PyObject *
bytes_repeat(PyBytesObject * a,Py_ssize_t n)1477 bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1478 {
1479     Py_ssize_t i;
1480     Py_ssize_t j;
1481     Py_ssize_t size;
1482     PyBytesObject *op;
1483     size_t nbytes;
1484     if (n < 0)
1485         n = 0;
1486     /* watch out for overflows:  the size can overflow int,
1487      * and the # of bytes needed can overflow size_t
1488      */
1489     if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1490         PyErr_SetString(PyExc_OverflowError,
1491             "repeated bytes are too long");
1492         return NULL;
1493     }
1494     size = Py_SIZE(a) * n;
1495     if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1496         Py_INCREF(a);
1497         return (PyObject *)a;
1498     }
1499     nbytes = (size_t)size;
1500     if (nbytes + PyBytesObject_SIZE <= nbytes) {
1501         PyErr_SetString(PyExc_OverflowError,
1502             "repeated bytes are too long");
1503         return NULL;
1504     }
1505     op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1506     if (op == NULL)
1507         return PyErr_NoMemory();
1508     (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1509     op->ob_shash = -1;
1510     op->ob_sval[size] = '\0';
1511     if (Py_SIZE(a) == 1 && n > 0) {
1512         memset(op->ob_sval, a->ob_sval[0] , n);
1513         return (PyObject *) op;
1514     }
1515     i = 0;
1516     if (i < size) {
1517         memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1518         i = Py_SIZE(a);
1519     }
1520     while (i < size) {
1521         j = (i <= size-i)  ?  i  :  size-i;
1522         memcpy(op->ob_sval+i, op->ob_sval, j);
1523         i += j;
1524     }
1525     return (PyObject *) op;
1526 }
1527 
1528 static int
bytes_contains(PyObject * self,PyObject * arg)1529 bytes_contains(PyObject *self, PyObject *arg)
1530 {
1531     return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1532 }
1533 
1534 static PyObject *
bytes_item(PyBytesObject * a,Py_ssize_t i)1535 bytes_item(PyBytesObject *a, Py_ssize_t i)
1536 {
1537     if (i < 0 || i >= Py_SIZE(a)) {
1538         PyErr_SetString(PyExc_IndexError, "index out of range");
1539         return NULL;
1540     }
1541     return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1542 }
1543 
1544 static int
bytes_compare_eq(PyBytesObject * a,PyBytesObject * b)1545 bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1546 {
1547     int cmp;
1548     Py_ssize_t len;
1549 
1550     len = Py_SIZE(a);
1551     if (Py_SIZE(b) != len)
1552         return 0;
1553 
1554     if (a->ob_sval[0] != b->ob_sval[0])
1555         return 0;
1556 
1557     cmp = memcmp(a->ob_sval, b->ob_sval, len);
1558     return (cmp == 0);
1559 }
1560 
1561 static PyObject*
bytes_richcompare(PyBytesObject * a,PyBytesObject * b,int op)1562 bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1563 {
1564     int c;
1565     Py_ssize_t len_a, len_b;
1566     Py_ssize_t min_len;
1567     int rc;
1568 
1569     /* Make sure both arguments are strings. */
1570     if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1571         PyConfig *config = &_PyInterpreterState_GET_UNSAFE()->config;
1572         if (config->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1573             rc = PyObject_IsInstance((PyObject*)a,
1574                                      (PyObject*)&PyUnicode_Type);
1575             if (!rc)
1576                 rc = PyObject_IsInstance((PyObject*)b,
1577                                          (PyObject*)&PyUnicode_Type);
1578             if (rc < 0)
1579                 return NULL;
1580             if (rc) {
1581                 if (PyErr_WarnEx(PyExc_BytesWarning,
1582                                  "Comparison between bytes and string", 1))
1583                     return NULL;
1584             }
1585             else {
1586                 rc = PyObject_IsInstance((PyObject*)a,
1587                                          (PyObject*)&PyLong_Type);
1588                 if (!rc)
1589                     rc = PyObject_IsInstance((PyObject*)b,
1590                                              (PyObject*)&PyLong_Type);
1591                 if (rc < 0)
1592                     return NULL;
1593                 if (rc) {
1594                     if (PyErr_WarnEx(PyExc_BytesWarning,
1595                                      "Comparison between bytes and int", 1))
1596                         return NULL;
1597                 }
1598             }
1599         }
1600         Py_RETURN_NOTIMPLEMENTED;
1601     }
1602     else if (a == b) {
1603         switch (op) {
1604         case Py_EQ:
1605         case Py_LE:
1606         case Py_GE:
1607             /* a string is equal to itself */
1608             Py_RETURN_TRUE;
1609         case Py_NE:
1610         case Py_LT:
1611         case Py_GT:
1612             Py_RETURN_FALSE;
1613         default:
1614             PyErr_BadArgument();
1615             return NULL;
1616         }
1617     }
1618     else if (op == Py_EQ || op == Py_NE) {
1619         int eq = bytes_compare_eq(a, b);
1620         eq ^= (op == Py_NE);
1621         return PyBool_FromLong(eq);
1622     }
1623     else {
1624         len_a = Py_SIZE(a);
1625         len_b = Py_SIZE(b);
1626         min_len = Py_MIN(len_a, len_b);
1627         if (min_len > 0) {
1628             c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1629             if (c == 0)
1630                 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1631         }
1632         else
1633             c = 0;
1634         if (c != 0)
1635             Py_RETURN_RICHCOMPARE(c, 0, op);
1636         Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1637     }
1638 }
1639 
1640 static Py_hash_t
bytes_hash(PyBytesObject * a)1641 bytes_hash(PyBytesObject *a)
1642 {
1643     if (a->ob_shash == -1) {
1644         /* Can't fail */
1645         a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1646     }
1647     return a->ob_shash;
1648 }
1649 
1650 static PyObject*
bytes_subscript(PyBytesObject * self,PyObject * item)1651 bytes_subscript(PyBytesObject* self, PyObject* item)
1652 {
1653     if (PyIndex_Check(item)) {
1654         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1655         if (i == -1 && PyErr_Occurred())
1656             return NULL;
1657         if (i < 0)
1658             i += PyBytes_GET_SIZE(self);
1659         if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1660             PyErr_SetString(PyExc_IndexError,
1661                             "index out of range");
1662             return NULL;
1663         }
1664         return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1665     }
1666     else if (PySlice_Check(item)) {
1667         Py_ssize_t start, stop, step, slicelength, i;
1668         size_t cur;
1669         char* source_buf;
1670         char* result_buf;
1671         PyObject* result;
1672 
1673         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1674             return NULL;
1675         }
1676         slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1677                                             &stop, step);
1678 
1679         if (slicelength <= 0) {
1680             return PyBytes_FromStringAndSize("", 0);
1681         }
1682         else if (start == 0 && step == 1 &&
1683                  slicelength == PyBytes_GET_SIZE(self) &&
1684                  PyBytes_CheckExact(self)) {
1685             Py_INCREF(self);
1686             return (PyObject *)self;
1687         }
1688         else if (step == 1) {
1689             return PyBytes_FromStringAndSize(
1690                 PyBytes_AS_STRING(self) + start,
1691                 slicelength);
1692         }
1693         else {
1694             source_buf = PyBytes_AS_STRING(self);
1695             result = PyBytes_FromStringAndSize(NULL, slicelength);
1696             if (result == NULL)
1697                 return NULL;
1698 
1699             result_buf = PyBytes_AS_STRING(result);
1700             for (cur = start, i = 0; i < slicelength;
1701                  cur += step, i++) {
1702                 result_buf[i] = source_buf[cur];
1703             }
1704 
1705             return result;
1706         }
1707     }
1708     else {
1709         PyErr_Format(PyExc_TypeError,
1710                      "byte indices must be integers or slices, not %.200s",
1711                      Py_TYPE(item)->tp_name);
1712         return NULL;
1713     }
1714 }
1715 
1716 static int
bytes_buffer_getbuffer(PyBytesObject * self,Py_buffer * view,int flags)1717 bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1718 {
1719     return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1720                              1, flags);
1721 }
1722 
1723 static PySequenceMethods bytes_as_sequence = {
1724     (lenfunc)bytes_length, /*sq_length*/
1725     (binaryfunc)bytes_concat, /*sq_concat*/
1726     (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1727     (ssizeargfunc)bytes_item, /*sq_item*/
1728     0,                  /*sq_slice*/
1729     0,                  /*sq_ass_item*/
1730     0,                  /*sq_ass_slice*/
1731     (objobjproc)bytes_contains /*sq_contains*/
1732 };
1733 
1734 static PyMappingMethods bytes_as_mapping = {
1735     (lenfunc)bytes_length,
1736     (binaryfunc)bytes_subscript,
1737     0,
1738 };
1739 
1740 static PyBufferProcs bytes_as_buffer = {
1741     (getbufferproc)bytes_buffer_getbuffer,
1742     NULL,
1743 };
1744 
1745 
1746 #define LEFTSTRIP 0
1747 #define RIGHTSTRIP 1
1748 #define BOTHSTRIP 2
1749 
1750 /*[clinic input]
1751 bytes.split
1752 
1753     sep: object = None
1754         The delimiter according which to split the bytes.
1755         None (the default value) means split on ASCII whitespace characters
1756         (space, tab, return, newline, formfeed, vertical tab).
1757     maxsplit: Py_ssize_t = -1
1758         Maximum number of splits to do.
1759         -1 (the default value) means no limit.
1760 
1761 Return a list of the sections in the bytes, using sep as the delimiter.
1762 [clinic start generated code]*/
1763 
1764 static PyObject *
bytes_split_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1765 bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1766 /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1767 {
1768     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1769     const char *s = PyBytes_AS_STRING(self), *sub;
1770     Py_buffer vsub;
1771     PyObject *list;
1772 
1773     if (maxsplit < 0)
1774         maxsplit = PY_SSIZE_T_MAX;
1775     if (sep == Py_None)
1776         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1777     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1778         return NULL;
1779     sub = vsub.buf;
1780     n = vsub.len;
1781 
1782     list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1783     PyBuffer_Release(&vsub);
1784     return list;
1785 }
1786 
1787 /*[clinic input]
1788 bytes.partition
1789 
1790     sep: Py_buffer
1791     /
1792 
1793 Partition the bytes into three parts using the given separator.
1794 
1795 This will search for the separator sep in the bytes. If the separator is found,
1796 returns a 3-tuple containing the part before the separator, the separator
1797 itself, and the part after it.
1798 
1799 If the separator is not found, returns a 3-tuple containing the original bytes
1800 object and two empty bytes objects.
1801 [clinic start generated code]*/
1802 
1803 static PyObject *
bytes_partition_impl(PyBytesObject * self,Py_buffer * sep)1804 bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1805 /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1806 {
1807     return stringlib_partition(
1808         (PyObject*) self,
1809         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1810         sep->obj, (const char *)sep->buf, sep->len
1811         );
1812 }
1813 
1814 /*[clinic input]
1815 bytes.rpartition
1816 
1817     sep: Py_buffer
1818     /
1819 
1820 Partition the bytes into three parts using the given separator.
1821 
1822 This will search for the separator sep in the bytes, starting at the end. If
1823 the separator is found, returns a 3-tuple containing the part before the
1824 separator, the separator itself, and the part after it.
1825 
1826 If the separator is not found, returns a 3-tuple containing two empty bytes
1827 objects and the original bytes object.
1828 [clinic start generated code]*/
1829 
1830 static PyObject *
bytes_rpartition_impl(PyBytesObject * self,Py_buffer * sep)1831 bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1832 /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1833 {
1834     return stringlib_rpartition(
1835         (PyObject*) self,
1836         PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1837         sep->obj, (const char *)sep->buf, sep->len
1838         );
1839 }
1840 
1841 /*[clinic input]
1842 bytes.rsplit = bytes.split
1843 
1844 Return a list of the sections in the bytes, using sep as the delimiter.
1845 
1846 Splitting is done starting at the end of the bytes and working to the front.
1847 [clinic start generated code]*/
1848 
1849 static PyObject *
bytes_rsplit_impl(PyBytesObject * self,PyObject * sep,Py_ssize_t maxsplit)1850 bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1851 /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1852 {
1853     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1854     const char *s = PyBytes_AS_STRING(self), *sub;
1855     Py_buffer vsub;
1856     PyObject *list;
1857 
1858     if (maxsplit < 0)
1859         maxsplit = PY_SSIZE_T_MAX;
1860     if (sep == Py_None)
1861         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1862     if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1863         return NULL;
1864     sub = vsub.buf;
1865     n = vsub.len;
1866 
1867     list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1868     PyBuffer_Release(&vsub);
1869     return list;
1870 }
1871 
1872 
1873 /*[clinic input]
1874 bytes.join
1875 
1876     iterable_of_bytes: object
1877     /
1878 
1879 Concatenate any number of bytes objects.
1880 
1881 The bytes whose method is called is inserted in between each pair.
1882 
1883 The result is returned as a new bytes object.
1884 
1885 Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1886 [clinic start generated code]*/
1887 
1888 static PyObject *
bytes_join(PyBytesObject * self,PyObject * iterable_of_bytes)1889 bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1890 /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1891 {
1892     return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1893 }
1894 
1895 PyObject *
_PyBytes_Join(PyObject * sep,PyObject * x)1896 _PyBytes_Join(PyObject *sep, PyObject *x)
1897 {
1898     assert(sep != NULL && PyBytes_Check(sep));
1899     assert(x != NULL);
1900     return bytes_join((PyBytesObject*)sep, x);
1901 }
1902 
1903 static PyObject *
bytes_find(PyBytesObject * self,PyObject * args)1904 bytes_find(PyBytesObject *self, PyObject *args)
1905 {
1906     return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1907 }
1908 
1909 static PyObject *
bytes_index(PyBytesObject * self,PyObject * args)1910 bytes_index(PyBytesObject *self, PyObject *args)
1911 {
1912     return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1913 }
1914 
1915 
1916 static PyObject *
bytes_rfind(PyBytesObject * self,PyObject * args)1917 bytes_rfind(PyBytesObject *self, PyObject *args)
1918 {
1919     return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920 }
1921 
1922 
1923 static PyObject *
bytes_rindex(PyBytesObject * self,PyObject * args)1924 bytes_rindex(PyBytesObject *self, PyObject *args)
1925 {
1926     return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1927 }
1928 
1929 
1930 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyBytesObject * self,int striptype,PyObject * sepobj)1931 do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1932 {
1933     Py_buffer vsep;
1934     char *s = PyBytes_AS_STRING(self);
1935     Py_ssize_t len = PyBytes_GET_SIZE(self);
1936     char *sep;
1937     Py_ssize_t seplen;
1938     Py_ssize_t i, j;
1939 
1940     if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1941         return NULL;
1942     sep = vsep.buf;
1943     seplen = vsep.len;
1944 
1945     i = 0;
1946     if (striptype != RIGHTSTRIP) {
1947         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1948             i++;
1949         }
1950     }
1951 
1952     j = len;
1953     if (striptype != LEFTSTRIP) {
1954         do {
1955             j--;
1956         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1957         j++;
1958     }
1959 
1960     PyBuffer_Release(&vsep);
1961 
1962     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1963         Py_INCREF(self);
1964         return (PyObject*)self;
1965     }
1966     else
1967         return PyBytes_FromStringAndSize(s+i, j-i);
1968 }
1969 
1970 
1971 Py_LOCAL_INLINE(PyObject *)
do_strip(PyBytesObject * self,int striptype)1972 do_strip(PyBytesObject *self, int striptype)
1973 {
1974     char *s = PyBytes_AS_STRING(self);
1975     Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1976 
1977     i = 0;
1978     if (striptype != RIGHTSTRIP) {
1979         while (i < len && Py_ISSPACE(s[i])) {
1980             i++;
1981         }
1982     }
1983 
1984     j = len;
1985     if (striptype != LEFTSTRIP) {
1986         do {
1987             j--;
1988         } while (j >= i && Py_ISSPACE(s[j]));
1989         j++;
1990     }
1991 
1992     if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1993         Py_INCREF(self);
1994         return (PyObject*)self;
1995     }
1996     else
1997         return PyBytes_FromStringAndSize(s+i, j-i);
1998 }
1999 
2000 
2001 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyBytesObject * self,int striptype,PyObject * bytes)2002 do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2003 {
2004     if (bytes != Py_None) {
2005         return do_xstrip(self, striptype, bytes);
2006     }
2007     return do_strip(self, striptype);
2008 }
2009 
2010 /*[clinic input]
2011 bytes.strip
2012 
2013     bytes: object = None
2014     /
2015 
2016 Strip leading and trailing bytes contained in the argument.
2017 
2018 If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2019 [clinic start generated code]*/
2020 
2021 static PyObject *
bytes_strip_impl(PyBytesObject * self,PyObject * bytes)2022 bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2023 /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2024 {
2025     return do_argstrip(self, BOTHSTRIP, bytes);
2026 }
2027 
2028 /*[clinic input]
2029 bytes.lstrip
2030 
2031     bytes: object = None
2032     /
2033 
2034 Strip leading bytes contained in the argument.
2035 
2036 If the argument is omitted or None, strip leading  ASCII whitespace.
2037 [clinic start generated code]*/
2038 
2039 static PyObject *
bytes_lstrip_impl(PyBytesObject * self,PyObject * bytes)2040 bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2041 /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2042 {
2043     return do_argstrip(self, LEFTSTRIP, bytes);
2044 }
2045 
2046 /*[clinic input]
2047 bytes.rstrip
2048 
2049     bytes: object = None
2050     /
2051 
2052 Strip trailing bytes contained in the argument.
2053 
2054 If the argument is omitted or None, strip trailing ASCII whitespace.
2055 [clinic start generated code]*/
2056 
2057 static PyObject *
bytes_rstrip_impl(PyBytesObject * self,PyObject * bytes)2058 bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2059 /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2060 {
2061     return do_argstrip(self, RIGHTSTRIP, bytes);
2062 }
2063 
2064 
2065 static PyObject *
bytes_count(PyBytesObject * self,PyObject * args)2066 bytes_count(PyBytesObject *self, PyObject *args)
2067 {
2068     return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2069 }
2070 
2071 
2072 /*[clinic input]
2073 bytes.translate
2074 
2075     table: object
2076         Translation table, which must be a bytes object of length 256.
2077     /
2078     delete as deletechars: object(c_default="NULL") = b''
2079 
2080 Return a copy with each character mapped by the given translation table.
2081 
2082 All characters occurring in the optional argument delete are removed.
2083 The remaining characters are mapped through the given translation table.
2084 [clinic start generated code]*/
2085 
2086 static PyObject *
bytes_translate_impl(PyBytesObject * self,PyObject * table,PyObject * deletechars)2087 bytes_translate_impl(PyBytesObject *self, PyObject *table,
2088                      PyObject *deletechars)
2089 /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2090 {
2091     char *input, *output;
2092     Py_buffer table_view = {NULL, NULL};
2093     Py_buffer del_table_view = {NULL, NULL};
2094     const char *table_chars;
2095     Py_ssize_t i, c, changed = 0;
2096     PyObject *input_obj = (PyObject*)self;
2097     const char *output_start, *del_table_chars=NULL;
2098     Py_ssize_t inlen, tablen, dellen = 0;
2099     PyObject *result;
2100     int trans_table[256];
2101 
2102     if (PyBytes_Check(table)) {
2103         table_chars = PyBytes_AS_STRING(table);
2104         tablen = PyBytes_GET_SIZE(table);
2105     }
2106     else if (table == Py_None) {
2107         table_chars = NULL;
2108         tablen = 256;
2109     }
2110     else {
2111         if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2112             return NULL;
2113         table_chars = table_view.buf;
2114         tablen = table_view.len;
2115     }
2116 
2117     if (tablen != 256) {
2118         PyErr_SetString(PyExc_ValueError,
2119           "translation table must be 256 characters long");
2120         PyBuffer_Release(&table_view);
2121         return NULL;
2122     }
2123 
2124     if (deletechars != NULL) {
2125         if (PyBytes_Check(deletechars)) {
2126             del_table_chars = PyBytes_AS_STRING(deletechars);
2127             dellen = PyBytes_GET_SIZE(deletechars);
2128         }
2129         else {
2130             if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2131                 PyBuffer_Release(&table_view);
2132                 return NULL;
2133             }
2134             del_table_chars = del_table_view.buf;
2135             dellen = del_table_view.len;
2136         }
2137     }
2138     else {
2139         del_table_chars = NULL;
2140         dellen = 0;
2141     }
2142 
2143     inlen = PyBytes_GET_SIZE(input_obj);
2144     result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2145     if (result == NULL) {
2146         PyBuffer_Release(&del_table_view);
2147         PyBuffer_Release(&table_view);
2148         return NULL;
2149     }
2150     output_start = output = PyBytes_AS_STRING(result);
2151     input = PyBytes_AS_STRING(input_obj);
2152 
2153     if (dellen == 0 && table_chars != NULL) {
2154         /* If no deletions are required, use faster code */
2155         for (i = inlen; --i >= 0; ) {
2156             c = Py_CHARMASK(*input++);
2157             if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2158                 changed = 1;
2159         }
2160         if (!changed && PyBytes_CheckExact(input_obj)) {
2161             Py_INCREF(input_obj);
2162             Py_DECREF(result);
2163             result = input_obj;
2164         }
2165         PyBuffer_Release(&del_table_view);
2166         PyBuffer_Release(&table_view);
2167         return result;
2168     }
2169 
2170     if (table_chars == NULL) {
2171         for (i = 0; i < 256; i++)
2172             trans_table[i] = Py_CHARMASK(i);
2173     } else {
2174         for (i = 0; i < 256; i++)
2175             trans_table[i] = Py_CHARMASK(table_chars[i]);
2176     }
2177     PyBuffer_Release(&table_view);
2178 
2179     for (i = 0; i < dellen; i++)
2180         trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2181     PyBuffer_Release(&del_table_view);
2182 
2183     for (i = inlen; --i >= 0; ) {
2184         c = Py_CHARMASK(*input++);
2185         if (trans_table[c] != -1)
2186             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2187                 continue;
2188         changed = 1;
2189     }
2190     if (!changed && PyBytes_CheckExact(input_obj)) {
2191         Py_DECREF(result);
2192         Py_INCREF(input_obj);
2193         return input_obj;
2194     }
2195     /* Fix the size of the resulting string */
2196     if (inlen > 0)
2197         _PyBytes_Resize(&result, output - output_start);
2198     return result;
2199 }
2200 
2201 
2202 /*[clinic input]
2203 
2204 @staticmethod
2205 bytes.maketrans
2206 
2207     frm: Py_buffer
2208     to: Py_buffer
2209     /
2210 
2211 Return a translation table useable for the bytes or bytearray translate method.
2212 
2213 The returned table will be one where each byte in frm is mapped to the byte at
2214 the same position in to.
2215 
2216 The bytes objects frm and to must be of the same length.
2217 [clinic start generated code]*/
2218 
2219 static PyObject *
bytes_maketrans_impl(Py_buffer * frm,Py_buffer * to)2220 bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2221 /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2222 {
2223     return _Py_bytes_maketrans(frm, to);
2224 }
2225 
2226 
2227 /*[clinic input]
2228 bytes.replace
2229 
2230     old: Py_buffer
2231     new: Py_buffer
2232     count: Py_ssize_t = -1
2233         Maximum number of occurrences to replace.
2234         -1 (the default value) means replace all occurrences.
2235     /
2236 
2237 Return a copy with all occurrences of substring old replaced by new.
2238 
2239 If the optional argument count is given, only the first count occurrences are
2240 replaced.
2241 [clinic start generated code]*/
2242 
2243 static PyObject *
bytes_replace_impl(PyBytesObject * self,Py_buffer * old,Py_buffer * new,Py_ssize_t count)2244 bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2245                    Py_ssize_t count)
2246 /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2247 {
2248     return stringlib_replace((PyObject *)self,
2249                              (const char *)old->buf, old->len,
2250                              (const char *)new->buf, new->len, count);
2251 }
2252 
2253 /** End DALKE **/
2254 
2255 
2256 static PyObject *
bytes_startswith(PyBytesObject * self,PyObject * args)2257 bytes_startswith(PyBytesObject *self, PyObject *args)
2258 {
2259     return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2260 }
2261 
2262 static PyObject *
bytes_endswith(PyBytesObject * self,PyObject * args)2263 bytes_endswith(PyBytesObject *self, PyObject *args)
2264 {
2265     return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2266 }
2267 
2268 
2269 /*[clinic input]
2270 bytes.decode
2271 
2272     encoding: str(c_default="NULL") = 'utf-8'
2273         The encoding with which to decode the bytes.
2274     errors: str(c_default="NULL") = 'strict'
2275         The error handling scheme to use for the handling of decoding errors.
2276         The default is 'strict' meaning that decoding errors raise a
2277         UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2278         as well as any other name registered with codecs.register_error that
2279         can handle UnicodeDecodeErrors.
2280 
2281 Decode the bytes using the codec registered for encoding.
2282 [clinic start generated code]*/
2283 
2284 static PyObject *
bytes_decode_impl(PyBytesObject * self,const char * encoding,const char * errors)2285 bytes_decode_impl(PyBytesObject *self, const char *encoding,
2286                   const char *errors)
2287 /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2288 {
2289     return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2290 }
2291 
2292 
2293 /*[clinic input]
2294 bytes.splitlines
2295 
2296     keepends: bool(accept={int}) = False
2297 
2298 Return a list of the lines in the bytes, breaking at line boundaries.
2299 
2300 Line breaks are not included in the resulting list unless keepends is given and
2301 true.
2302 [clinic start generated code]*/
2303 
2304 static PyObject *
bytes_splitlines_impl(PyBytesObject * self,int keepends)2305 bytes_splitlines_impl(PyBytesObject *self, int keepends)
2306 /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2307 {
2308     return stringlib_splitlines(
2309         (PyObject*) self, PyBytes_AS_STRING(self),
2310         PyBytes_GET_SIZE(self), keepends
2311         );
2312 }
2313 
2314 /*[clinic input]
2315 @classmethod
2316 bytes.fromhex
2317 
2318     string: unicode
2319     /
2320 
2321 Create a bytes object from a string of hexadecimal numbers.
2322 
2323 Spaces between two numbers are accepted.
2324 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2325 [clinic start generated code]*/
2326 
2327 static PyObject *
bytes_fromhex_impl(PyTypeObject * type,PyObject * string)2328 bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2329 /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2330 {
2331     PyObject *result = _PyBytes_FromHex(string, 0);
2332     if (type != &PyBytes_Type && result != NULL) {
2333         Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2334                                                        result, NULL));
2335     }
2336     return result;
2337 }
2338 
2339 PyObject*
_PyBytes_FromHex(PyObject * string,int use_bytearray)2340 _PyBytes_FromHex(PyObject *string, int use_bytearray)
2341 {
2342     char *buf;
2343     Py_ssize_t hexlen, invalid_char;
2344     unsigned int top, bot;
2345     Py_UCS1 *str, *end;
2346     _PyBytesWriter writer;
2347 
2348     _PyBytesWriter_Init(&writer);
2349     writer.use_bytearray = use_bytearray;
2350 
2351     assert(PyUnicode_Check(string));
2352     if (PyUnicode_READY(string))
2353         return NULL;
2354     hexlen = PyUnicode_GET_LENGTH(string);
2355 
2356     if (!PyUnicode_IS_ASCII(string)) {
2357         void *data = PyUnicode_DATA(string);
2358         unsigned int kind = PyUnicode_KIND(string);
2359         Py_ssize_t i;
2360 
2361         /* search for the first non-ASCII character */
2362         for (i = 0; i < hexlen; i++) {
2363             if (PyUnicode_READ(kind, data, i) >= 128)
2364                 break;
2365         }
2366         invalid_char = i;
2367         goto error;
2368     }
2369 
2370     assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2371     str = PyUnicode_1BYTE_DATA(string);
2372 
2373     /* This overestimates if there are spaces */
2374     buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2375     if (buf == NULL)
2376         return NULL;
2377 
2378     end = str + hexlen;
2379     while (str < end) {
2380         /* skip over spaces in the input */
2381         if (Py_ISSPACE(*str)) {
2382             do {
2383                 str++;
2384             } while (Py_ISSPACE(*str));
2385             if (str >= end)
2386                 break;
2387         }
2388 
2389         top = _PyLong_DigitValue[*str];
2390         if (top >= 16) {
2391             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2392             goto error;
2393         }
2394         str++;
2395 
2396         bot = _PyLong_DigitValue[*str];
2397         if (bot >= 16) {
2398             invalid_char = str - PyUnicode_1BYTE_DATA(string);
2399             goto error;
2400         }
2401         str++;
2402 
2403         *buf++ = (unsigned char)((top << 4) + bot);
2404     }
2405 
2406     return _PyBytesWriter_Finish(&writer, buf);
2407 
2408   error:
2409     PyErr_Format(PyExc_ValueError,
2410                  "non-hexadecimal number found in "
2411                  "fromhex() arg at position %zd", invalid_char);
2412     _PyBytesWriter_Dealloc(&writer);
2413     return NULL;
2414 }
2415 
2416 /*[clinic input]
2417 bytes.hex
2418 
2419     sep: object = NULL
2420         An optional single character or byte to separate hex bytes.
2421     bytes_per_sep: int = 1
2422         How many bytes between separators.  Positive values count from the
2423         right, negative values count from the left.
2424 
2425 Create a str of hexadecimal numbers from a bytes object.
2426 
2427 Example:
2428 >>> value = b'\xb9\x01\xef'
2429 >>> value.hex()
2430 'b901ef'
2431 >>> value.hex(':')
2432 'b9:01:ef'
2433 >>> value.hex(':', 2)
2434 'b9:01ef'
2435 >>> value.hex(':', -2)
2436 'b901:ef'
2437 [clinic start generated code]*/
2438 
2439 static PyObject *
bytes_hex_impl(PyBytesObject * self,PyObject * sep,int bytes_per_sep)2440 bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2441 /*[clinic end generated code: output=1f134da504064139 input=f1238d3455990218]*/
2442 {
2443     char* argbuf = PyBytes_AS_STRING(self);
2444     Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2445     return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2446 }
2447 
2448 static PyObject *
bytes_getnewargs(PyBytesObject * v,PyObject * Py_UNUSED (ignored))2449 bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2450 {
2451     return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2452 }
2453 
2454 
2455 static PyMethodDef
2456 bytes_methods[] = {
2457     {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
2458     {"capitalize", stringlib_capitalize, METH_NOARGS,
2459      _Py_capitalize__doc__},
2460     STRINGLIB_CENTER_METHODDEF
2461     {"count", (PyCFunction)bytes_count, METH_VARARGS,
2462      _Py_count__doc__},
2463     BYTES_DECODE_METHODDEF
2464     {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2465      _Py_endswith__doc__},
2466     STRINGLIB_EXPANDTABS_METHODDEF
2467     {"find", (PyCFunction)bytes_find, METH_VARARGS,
2468      _Py_find__doc__},
2469     BYTES_FROMHEX_METHODDEF
2470     BYTES_HEX_METHODDEF
2471     {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2472     {"isalnum", stringlib_isalnum, METH_NOARGS,
2473      _Py_isalnum__doc__},
2474     {"isalpha", stringlib_isalpha, METH_NOARGS,
2475      _Py_isalpha__doc__},
2476     {"isascii", stringlib_isascii, METH_NOARGS,
2477      _Py_isascii__doc__},
2478     {"isdigit", stringlib_isdigit, METH_NOARGS,
2479      _Py_isdigit__doc__},
2480     {"islower", stringlib_islower, METH_NOARGS,
2481      _Py_islower__doc__},
2482     {"isspace", stringlib_isspace, METH_NOARGS,
2483      _Py_isspace__doc__},
2484     {"istitle", stringlib_istitle, METH_NOARGS,
2485      _Py_istitle__doc__},
2486     {"isupper", stringlib_isupper, METH_NOARGS,
2487      _Py_isupper__doc__},
2488     BYTES_JOIN_METHODDEF
2489     STRINGLIB_LJUST_METHODDEF
2490     {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2491     BYTES_LSTRIP_METHODDEF
2492     BYTES_MAKETRANS_METHODDEF
2493     BYTES_PARTITION_METHODDEF
2494     BYTES_REPLACE_METHODDEF
2495     {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2496     {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2497     STRINGLIB_RJUST_METHODDEF
2498     BYTES_RPARTITION_METHODDEF
2499     BYTES_RSPLIT_METHODDEF
2500     BYTES_RSTRIP_METHODDEF
2501     BYTES_SPLIT_METHODDEF
2502     BYTES_SPLITLINES_METHODDEF
2503     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2504      _Py_startswith__doc__},
2505     BYTES_STRIP_METHODDEF
2506     {"swapcase", stringlib_swapcase, METH_NOARGS,
2507      _Py_swapcase__doc__},
2508     {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2509     BYTES_TRANSLATE_METHODDEF
2510     {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2511     STRINGLIB_ZFILL_METHODDEF
2512     {NULL,     NULL}                         /* sentinel */
2513 };
2514 
2515 static PyObject *
bytes_mod(PyObject * self,PyObject * arg)2516 bytes_mod(PyObject *self, PyObject *arg)
2517 {
2518     if (!PyBytes_Check(self)) {
2519         Py_RETURN_NOTIMPLEMENTED;
2520     }
2521     return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2522                              arg, 0);
2523 }
2524 
2525 static PyNumberMethods bytes_as_number = {
2526     0,              /*nb_add*/
2527     0,              /*nb_subtract*/
2528     0,              /*nb_multiply*/
2529     bytes_mod,      /*nb_remainder*/
2530 };
2531 
2532 static PyObject *
2533 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2534 
2535 static PyObject *
bytes_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2536 bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2537 {
2538     PyObject *x = NULL;
2539     const char *encoding = NULL;
2540     const char *errors = NULL;
2541     PyObject *new = NULL;
2542     PyObject *func;
2543     Py_ssize_t size;
2544     static char *kwlist[] = {"source", "encoding", "errors", 0};
2545     _Py_IDENTIFIER(__bytes__);
2546 
2547     if (type != &PyBytes_Type)
2548         return bytes_subtype_new(type, args, kwds);
2549     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2550                                      &encoding, &errors))
2551         return NULL;
2552     if (x == NULL) {
2553         if (encoding != NULL || errors != NULL) {
2554             PyErr_SetString(PyExc_TypeError,
2555                             encoding != NULL ?
2556                             "encoding without a string argument" :
2557                             "errors without a string argument");
2558             return NULL;
2559         }
2560         return PyBytes_FromStringAndSize(NULL, 0);
2561     }
2562 
2563     if (encoding != NULL) {
2564         /* Encode via the codec registry */
2565         if (!PyUnicode_Check(x)) {
2566             PyErr_SetString(PyExc_TypeError,
2567                             "encoding without a string argument");
2568             return NULL;
2569         }
2570         new = PyUnicode_AsEncodedString(x, encoding, errors);
2571         if (new == NULL)
2572             return NULL;
2573         assert(PyBytes_Check(new));
2574         return new;
2575     }
2576 
2577     if (errors != NULL) {
2578         PyErr_SetString(PyExc_TypeError,
2579                         PyUnicode_Check(x) ?
2580                         "string argument without an encoding" :
2581                         "errors without a string argument");
2582         return NULL;
2583     }
2584 
2585     /* We'd like to call PyObject_Bytes here, but we need to check for an
2586        integer argument before deferring to PyBytes_FromObject, something
2587        PyObject_Bytes doesn't do. */
2588     func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2589     if (func != NULL) {
2590         new = _PyObject_CallNoArg(func);
2591         Py_DECREF(func);
2592         if (new == NULL)
2593             return NULL;
2594         if (!PyBytes_Check(new)) {
2595             PyErr_Format(PyExc_TypeError,
2596                          "__bytes__ returned non-bytes (type %.200s)",
2597                          Py_TYPE(new)->tp_name);
2598             Py_DECREF(new);
2599             return NULL;
2600         }
2601         return new;
2602     }
2603     else if (PyErr_Occurred())
2604         return NULL;
2605 
2606     if (PyUnicode_Check(x)) {
2607         PyErr_SetString(PyExc_TypeError,
2608                         "string argument without an encoding");
2609         return NULL;
2610     }
2611     /* Is it an integer? */
2612     if (PyIndex_Check(x)) {
2613         size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2614         if (size == -1 && PyErr_Occurred()) {
2615             if (!PyErr_ExceptionMatches(PyExc_TypeError))
2616                 return NULL;
2617             PyErr_Clear();  /* fall through */
2618         }
2619         else {
2620             if (size < 0) {
2621                 PyErr_SetString(PyExc_ValueError, "negative count");
2622                 return NULL;
2623             }
2624             new = _PyBytes_FromSize(size, 1);
2625             if (new == NULL)
2626                 return NULL;
2627             return new;
2628         }
2629     }
2630 
2631     return PyBytes_FromObject(x);
2632 }
2633 
2634 static PyObject*
_PyBytes_FromBuffer(PyObject * x)2635 _PyBytes_FromBuffer(PyObject *x)
2636 {
2637     PyObject *new;
2638     Py_buffer view;
2639 
2640     if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2641         return NULL;
2642 
2643     new = PyBytes_FromStringAndSize(NULL, view.len);
2644     if (!new)
2645         goto fail;
2646     if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2647                 &view, view.len, 'C') < 0)
2648         goto fail;
2649     PyBuffer_Release(&view);
2650     return new;
2651 
2652 fail:
2653     Py_XDECREF(new);
2654     PyBuffer_Release(&view);
2655     return NULL;
2656 }
2657 
2658 static PyObject*
_PyBytes_FromList(PyObject * x)2659 _PyBytes_FromList(PyObject *x)
2660 {
2661     Py_ssize_t i, size = PyList_GET_SIZE(x);
2662     Py_ssize_t value;
2663     char *str;
2664     PyObject *item;
2665     _PyBytesWriter writer;
2666 
2667     _PyBytesWriter_Init(&writer);
2668     str = _PyBytesWriter_Alloc(&writer, size);
2669     if (str == NULL)
2670         return NULL;
2671     writer.overallocate = 1;
2672     size = writer.allocated;
2673 
2674     for (i = 0; i < PyList_GET_SIZE(x); i++) {
2675         item = PyList_GET_ITEM(x, i);
2676         Py_INCREF(item);
2677         value = PyNumber_AsSsize_t(item, NULL);
2678         Py_DECREF(item);
2679         if (value == -1 && PyErr_Occurred())
2680             goto error;
2681 
2682         if (value < 0 || value >= 256) {
2683             PyErr_SetString(PyExc_ValueError,
2684                             "bytes must be in range(0, 256)");
2685             goto error;
2686         }
2687 
2688         if (i >= size) {
2689             str = _PyBytesWriter_Resize(&writer, str, size+1);
2690             if (str == NULL)
2691                 return NULL;
2692             size = writer.allocated;
2693         }
2694         *str++ = (char) value;
2695     }
2696     return _PyBytesWriter_Finish(&writer, str);
2697 
2698   error:
2699     _PyBytesWriter_Dealloc(&writer);
2700     return NULL;
2701 }
2702 
2703 static PyObject*
_PyBytes_FromTuple(PyObject * x)2704 _PyBytes_FromTuple(PyObject *x)
2705 {
2706     PyObject *bytes;
2707     Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2708     Py_ssize_t value;
2709     char *str;
2710     PyObject *item;
2711 
2712     bytes = PyBytes_FromStringAndSize(NULL, size);
2713     if (bytes == NULL)
2714         return NULL;
2715     str = ((PyBytesObject *)bytes)->ob_sval;
2716 
2717     for (i = 0; i < size; i++) {
2718         item = PyTuple_GET_ITEM(x, i);
2719         value = PyNumber_AsSsize_t(item, NULL);
2720         if (value == -1 && PyErr_Occurred())
2721             goto error;
2722 
2723         if (value < 0 || value >= 256) {
2724             PyErr_SetString(PyExc_ValueError,
2725                             "bytes must be in range(0, 256)");
2726             goto error;
2727         }
2728         *str++ = (char) value;
2729     }
2730     return bytes;
2731 
2732   error:
2733     Py_DECREF(bytes);
2734     return NULL;
2735 }
2736 
2737 static PyObject *
_PyBytes_FromIterator(PyObject * it,PyObject * x)2738 _PyBytes_FromIterator(PyObject *it, PyObject *x)
2739 {
2740     char *str;
2741     Py_ssize_t i, size;
2742     _PyBytesWriter writer;
2743 
2744     /* For iterator version, create a string object and resize as needed */
2745     size = PyObject_LengthHint(x, 64);
2746     if (size == -1 && PyErr_Occurred())
2747         return NULL;
2748 
2749     _PyBytesWriter_Init(&writer);
2750     str = _PyBytesWriter_Alloc(&writer, size);
2751     if (str == NULL)
2752         return NULL;
2753     writer.overallocate = 1;
2754     size = writer.allocated;
2755 
2756     /* Run the iterator to exhaustion */
2757     for (i = 0; ; i++) {
2758         PyObject *item;
2759         Py_ssize_t value;
2760 
2761         /* Get the next item */
2762         item = PyIter_Next(it);
2763         if (item == NULL) {
2764             if (PyErr_Occurred())
2765                 goto error;
2766             break;
2767         }
2768 
2769         /* Interpret it as an int (__index__) */
2770         value = PyNumber_AsSsize_t(item, NULL);
2771         Py_DECREF(item);
2772         if (value == -1 && PyErr_Occurred())
2773             goto error;
2774 
2775         /* Range check */
2776         if (value < 0 || value >= 256) {
2777             PyErr_SetString(PyExc_ValueError,
2778                             "bytes must be in range(0, 256)");
2779             goto error;
2780         }
2781 
2782         /* Append the byte */
2783         if (i >= size) {
2784             str = _PyBytesWriter_Resize(&writer, str, size+1);
2785             if (str == NULL)
2786                 return NULL;
2787             size = writer.allocated;
2788         }
2789         *str++ = (char) value;
2790     }
2791 
2792     return _PyBytesWriter_Finish(&writer, str);
2793 
2794   error:
2795     _PyBytesWriter_Dealloc(&writer);
2796     return NULL;
2797 }
2798 
2799 PyObject *
PyBytes_FromObject(PyObject * x)2800 PyBytes_FromObject(PyObject *x)
2801 {
2802     PyObject *it, *result;
2803 
2804     if (x == NULL) {
2805         PyErr_BadInternalCall();
2806         return NULL;
2807     }
2808 
2809     if (PyBytes_CheckExact(x)) {
2810         Py_INCREF(x);
2811         return x;
2812     }
2813 
2814     /* Use the modern buffer interface */
2815     if (PyObject_CheckBuffer(x))
2816         return _PyBytes_FromBuffer(x);
2817 
2818     if (PyList_CheckExact(x))
2819         return _PyBytes_FromList(x);
2820 
2821     if (PyTuple_CheckExact(x))
2822         return _PyBytes_FromTuple(x);
2823 
2824     if (!PyUnicode_Check(x)) {
2825         it = PyObject_GetIter(x);
2826         if (it != NULL) {
2827             result = _PyBytes_FromIterator(it, x);
2828             Py_DECREF(it);
2829             return result;
2830         }
2831         if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2832             return NULL;
2833         }
2834     }
2835 
2836     PyErr_Format(PyExc_TypeError,
2837                  "cannot convert '%.200s' object to bytes",
2838                  x->ob_type->tp_name);
2839     return NULL;
2840 }
2841 
2842 static PyObject *
bytes_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2843 bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2844 {
2845     PyObject *tmp, *pnew;
2846     Py_ssize_t n;
2847 
2848     assert(PyType_IsSubtype(type, &PyBytes_Type));
2849     tmp = bytes_new(&PyBytes_Type, args, kwds);
2850     if (tmp == NULL)
2851         return NULL;
2852     assert(PyBytes_Check(tmp));
2853     n = PyBytes_GET_SIZE(tmp);
2854     pnew = type->tp_alloc(type, n);
2855     if (pnew != NULL) {
2856         memcpy(PyBytes_AS_STRING(pnew),
2857                   PyBytes_AS_STRING(tmp), n+1);
2858         ((PyBytesObject *)pnew)->ob_shash =
2859             ((PyBytesObject *)tmp)->ob_shash;
2860     }
2861     Py_DECREF(tmp);
2862     return pnew;
2863 }
2864 
2865 PyDoc_STRVAR(bytes_doc,
2866 "bytes(iterable_of_ints) -> bytes\n\
2867 bytes(string, encoding[, errors]) -> bytes\n\
2868 bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2869 bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2870 bytes() -> empty bytes object\n\
2871 \n\
2872 Construct an immutable array of bytes from:\n\
2873   - an iterable yielding integers in range(256)\n\
2874   - a text string encoded using the specified encoding\n\
2875   - any object implementing the buffer API.\n\
2876   - an integer");
2877 
2878 static PyObject *bytes_iter(PyObject *seq);
2879 
2880 PyTypeObject PyBytes_Type = {
2881     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2882     "bytes",
2883     PyBytesObject_SIZE,
2884     sizeof(char),
2885     0,                                          /* tp_dealloc */
2886     0,                                          /* tp_vectorcall_offset */
2887     0,                                          /* tp_getattr */
2888     0,                                          /* tp_setattr */
2889     0,                                          /* tp_as_async */
2890     (reprfunc)bytes_repr,                       /* tp_repr */
2891     &bytes_as_number,                           /* tp_as_number */
2892     &bytes_as_sequence,                         /* tp_as_sequence */
2893     &bytes_as_mapping,                          /* tp_as_mapping */
2894     (hashfunc)bytes_hash,                       /* tp_hash */
2895     0,                                          /* tp_call */
2896     bytes_str,                                  /* tp_str */
2897     PyObject_GenericGetAttr,                    /* tp_getattro */
2898     0,                                          /* tp_setattro */
2899     &bytes_as_buffer,                           /* tp_as_buffer */
2900     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2901         Py_TPFLAGS_BYTES_SUBCLASS,              /* tp_flags */
2902     bytes_doc,                                  /* tp_doc */
2903     0,                                          /* tp_traverse */
2904     0,                                          /* tp_clear */
2905     (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
2906     0,                                          /* tp_weaklistoffset */
2907     bytes_iter,                                 /* tp_iter */
2908     0,                                          /* tp_iternext */
2909     bytes_methods,                              /* tp_methods */
2910     0,                                          /* tp_members */
2911     0,                                          /* tp_getset */
2912     &PyBaseObject_Type,                         /* tp_base */
2913     0,                                          /* tp_dict */
2914     0,                                          /* tp_descr_get */
2915     0,                                          /* tp_descr_set */
2916     0,                                          /* tp_dictoffset */
2917     0,                                          /* tp_init */
2918     0,                                          /* tp_alloc */
2919     bytes_new,                                  /* tp_new */
2920     PyObject_Del,                               /* tp_free */
2921 };
2922 
2923 void
PyBytes_Concat(PyObject ** pv,PyObject * w)2924 PyBytes_Concat(PyObject **pv, PyObject *w)
2925 {
2926     assert(pv != NULL);
2927     if (*pv == NULL)
2928         return;
2929     if (w == NULL) {
2930         Py_CLEAR(*pv);
2931         return;
2932     }
2933 
2934     if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2935         /* Only one reference, so we can resize in place */
2936         Py_ssize_t oldsize;
2937         Py_buffer wb;
2938 
2939         if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2940             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2941                          Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2942             Py_CLEAR(*pv);
2943             return;
2944         }
2945 
2946         oldsize = PyBytes_GET_SIZE(*pv);
2947         if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2948             PyErr_NoMemory();
2949             goto error;
2950         }
2951         if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2952             goto error;
2953 
2954         memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2955         PyBuffer_Release(&wb);
2956         return;
2957 
2958       error:
2959         PyBuffer_Release(&wb);
2960         Py_CLEAR(*pv);
2961         return;
2962     }
2963 
2964     else {
2965         /* Multiple references, need to create new object */
2966         PyObject *v;
2967         v = bytes_concat(*pv, w);
2968         Py_SETREF(*pv, v);
2969     }
2970 }
2971 
2972 void
PyBytes_ConcatAndDel(PyObject ** pv,PyObject * w)2973 PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2974 {
2975     PyBytes_Concat(pv, w);
2976     Py_XDECREF(w);
2977 }
2978 
2979 
2980 /* The following function breaks the notion that bytes are immutable:
2981    it changes the size of a bytes object.  We get away with this only if there
2982    is only one module referencing the object.  You can also think of it
2983    as creating a new bytes object and destroying the old one, only
2984    more efficiently.  In any case, don't use this if the bytes object may
2985    already be known to some other part of the code...
2986    Note that if there's not enough memory to resize the bytes object, the
2987    original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2988    memory" exception is set, and -1 is returned.  Else (on success) 0 is
2989    returned, and the value in *pv may or may not be the same as on input.
2990    As always, an extra byte is allocated for a trailing \0 byte (newsize
2991    does *not* include that), and a trailing \0 byte is stored.
2992 */
2993 
2994 int
_PyBytes_Resize(PyObject ** pv,Py_ssize_t newsize)2995 _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2996 {
2997     PyObject *v;
2998     PyBytesObject *sv;
2999     v = *pv;
3000     if (!PyBytes_Check(v) || newsize < 0) {
3001         goto error;
3002     }
3003     if (Py_SIZE(v) == newsize) {
3004         /* return early if newsize equals to v->ob_size */
3005         return 0;
3006     }
3007     if (Py_SIZE(v) == 0) {
3008         if (newsize == 0) {
3009             return 0;
3010         }
3011         *pv = _PyBytes_FromSize(newsize, 0);
3012         Py_DECREF(v);
3013         return (*pv == NULL) ? -1 : 0;
3014     }
3015     if (Py_REFCNT(v) != 1) {
3016         goto error;
3017     }
3018     if (newsize == 0) {
3019         *pv = _PyBytes_FromSize(0, 0);
3020         Py_DECREF(v);
3021         return (*pv == NULL) ? -1 : 0;
3022     }
3023     /* XXX UNREF/NEWREF interface should be more symmetrical */
3024     _Py_DEC_REFTOTAL;
3025     _Py_ForgetReference(v);
3026     *pv = (PyObject *)
3027         PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
3028     if (*pv == NULL) {
3029         PyObject_Del(v);
3030         PyErr_NoMemory();
3031         return -1;
3032     }
3033     _Py_NewReference(*pv);
3034     sv = (PyBytesObject *) *pv;
3035     Py_SIZE(sv) = newsize;
3036     sv->ob_sval[newsize] = '\0';
3037     sv->ob_shash = -1;          /* invalidate cached hash value */
3038     return 0;
3039 error:
3040     *pv = 0;
3041     Py_DECREF(v);
3042     PyErr_BadInternalCall();
3043     return -1;
3044 }
3045 
3046 void
PyBytes_Fini(void)3047 PyBytes_Fini(void)
3048 {
3049     int i;
3050     for (i = 0; i < UCHAR_MAX + 1; i++)
3051         Py_CLEAR(characters[i]);
3052     Py_CLEAR(nullstring);
3053 }
3054 
3055 /*********************** Bytes Iterator ****************************/
3056 
3057 typedef struct {
3058     PyObject_HEAD
3059     Py_ssize_t it_index;
3060     PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3061 } striterobject;
3062 
3063 static void
striter_dealloc(striterobject * it)3064 striter_dealloc(striterobject *it)
3065 {
3066     _PyObject_GC_UNTRACK(it);
3067     Py_XDECREF(it->it_seq);
3068     PyObject_GC_Del(it);
3069 }
3070 
3071 static int
striter_traverse(striterobject * it,visitproc visit,void * arg)3072 striter_traverse(striterobject *it, visitproc visit, void *arg)
3073 {
3074     Py_VISIT(it->it_seq);
3075     return 0;
3076 }
3077 
3078 static PyObject *
striter_next(striterobject * it)3079 striter_next(striterobject *it)
3080 {
3081     PyBytesObject *seq;
3082     PyObject *item;
3083 
3084     assert(it != NULL);
3085     seq = it->it_seq;
3086     if (seq == NULL)
3087         return NULL;
3088     assert(PyBytes_Check(seq));
3089 
3090     if (it->it_index < PyBytes_GET_SIZE(seq)) {
3091         item = PyLong_FromLong(
3092             (unsigned char)seq->ob_sval[it->it_index]);
3093         if (item != NULL)
3094             ++it->it_index;
3095         return item;
3096     }
3097 
3098     it->it_seq = NULL;
3099     Py_DECREF(seq);
3100     return NULL;
3101 }
3102 
3103 static PyObject *
striter_len(striterobject * it,PyObject * Py_UNUSED (ignored))3104 striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3105 {
3106     Py_ssize_t len = 0;
3107     if (it->it_seq)
3108         len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3109     return PyLong_FromSsize_t(len);
3110 }
3111 
3112 PyDoc_STRVAR(length_hint_doc,
3113              "Private method returning an estimate of len(list(it)).");
3114 
3115 static PyObject *
striter_reduce(striterobject * it,PyObject * Py_UNUSED (ignored))3116 striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3117 {
3118     _Py_IDENTIFIER(iter);
3119     if (it->it_seq != NULL) {
3120         return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
3121                              it->it_seq, it->it_index);
3122     } else {
3123         return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
3124     }
3125 }
3126 
3127 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3128 
3129 static PyObject *
striter_setstate(striterobject * it,PyObject * state)3130 striter_setstate(striterobject *it, PyObject *state)
3131 {
3132     Py_ssize_t index = PyLong_AsSsize_t(state);
3133     if (index == -1 && PyErr_Occurred())
3134         return NULL;
3135     if (it->it_seq != NULL) {
3136         if (index < 0)
3137             index = 0;
3138         else if (index > PyBytes_GET_SIZE(it->it_seq))
3139             index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3140         it->it_index = index;
3141     }
3142     Py_RETURN_NONE;
3143 }
3144 
3145 PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3146 
3147 static PyMethodDef striter_methods[] = {
3148     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3149      length_hint_doc},
3150     {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
3151      reduce_doc},
3152     {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
3153      setstate_doc},
3154     {NULL,              NULL}           /* sentinel */
3155 };
3156 
3157 PyTypeObject PyBytesIter_Type = {
3158     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3159     "bytes_iterator",                           /* tp_name */
3160     sizeof(striterobject),                      /* tp_basicsize */
3161     0,                                          /* tp_itemsize */
3162     /* methods */
3163     (destructor)striter_dealloc,                /* tp_dealloc */
3164     0,                                          /* tp_vectorcall_offset */
3165     0,                                          /* tp_getattr */
3166     0,                                          /* tp_setattr */
3167     0,                                          /* tp_as_async */
3168     0,                                          /* tp_repr */
3169     0,                                          /* tp_as_number */
3170     0,                                          /* tp_as_sequence */
3171     0,                                          /* tp_as_mapping */
3172     0,                                          /* tp_hash */
3173     0,                                          /* tp_call */
3174     0,                                          /* tp_str */
3175     PyObject_GenericGetAttr,                    /* tp_getattro */
3176     0,                                          /* tp_setattro */
3177     0,                                          /* tp_as_buffer */
3178     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3179     0,                                          /* tp_doc */
3180     (traverseproc)striter_traverse,     /* tp_traverse */
3181     0,                                          /* tp_clear */
3182     0,                                          /* tp_richcompare */
3183     0,                                          /* tp_weaklistoffset */
3184     PyObject_SelfIter,                          /* tp_iter */
3185     (iternextfunc)striter_next,                 /* tp_iternext */
3186     striter_methods,                            /* tp_methods */
3187     0,
3188 };
3189 
3190 static PyObject *
bytes_iter(PyObject * seq)3191 bytes_iter(PyObject *seq)
3192 {
3193     striterobject *it;
3194 
3195     if (!PyBytes_Check(seq)) {
3196         PyErr_BadInternalCall();
3197         return NULL;
3198     }
3199     it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3200     if (it == NULL)
3201         return NULL;
3202     it->it_index = 0;
3203     Py_INCREF(seq);
3204     it->it_seq = (PyBytesObject *)seq;
3205     _PyObject_GC_TRACK(it);
3206     return (PyObject *)it;
3207 }
3208 
3209 
3210 /* _PyBytesWriter API */
3211 
3212 #ifdef MS_WINDOWS
3213    /* On Windows, overallocate by 50% is the best factor */
3214 #  define OVERALLOCATE_FACTOR 2
3215 #else
3216    /* On Linux, overallocate by 25% is the best factor */
3217 #  define OVERALLOCATE_FACTOR 4
3218 #endif
3219 
3220 void
_PyBytesWriter_Init(_PyBytesWriter * writer)3221 _PyBytesWriter_Init(_PyBytesWriter *writer)
3222 {
3223     /* Set all attributes before small_buffer to 0 */
3224     memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3225 #ifndef NDEBUG
3226     memset(writer->small_buffer, PYMEM_CLEANBYTE,
3227            sizeof(writer->small_buffer));
3228 #endif
3229 }
3230 
3231 void
_PyBytesWriter_Dealloc(_PyBytesWriter * writer)3232 _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3233 {
3234     Py_CLEAR(writer->buffer);
3235 }
3236 
3237 Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter * writer)3238 _PyBytesWriter_AsString(_PyBytesWriter *writer)
3239 {
3240     if (writer->use_small_buffer) {
3241         assert(writer->buffer == NULL);
3242         return writer->small_buffer;
3243     }
3244     else if (writer->use_bytearray) {
3245         assert(writer->buffer != NULL);
3246         return PyByteArray_AS_STRING(writer->buffer);
3247     }
3248     else {
3249         assert(writer->buffer != NULL);
3250         return PyBytes_AS_STRING(writer->buffer);
3251     }
3252 }
3253 
3254 Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter * writer,char * str)3255 _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3256 {
3257     char *start = _PyBytesWriter_AsString(writer);
3258     assert(str != NULL);
3259     assert(str >= start);
3260     assert(str - start <= writer->allocated);
3261     return str - start;
3262 }
3263 
3264 #ifndef NDEBUG
3265 Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter * writer,char * str)3266 _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3267 {
3268     char *start, *end;
3269 
3270     if (writer->use_small_buffer) {
3271         assert(writer->buffer == NULL);
3272     }
3273     else {
3274         assert(writer->buffer != NULL);
3275         if (writer->use_bytearray)
3276             assert(PyByteArray_CheckExact(writer->buffer));
3277         else
3278             assert(PyBytes_CheckExact(writer->buffer));
3279         assert(Py_REFCNT(writer->buffer) == 1);
3280     }
3281 
3282     if (writer->use_bytearray) {
3283         /* bytearray has its own overallocation algorithm,
3284            writer overallocation must be disabled */
3285         assert(!writer->overallocate);
3286     }
3287 
3288     assert(0 <= writer->allocated);
3289     assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3290     /* the last byte must always be null */
3291     start = _PyBytesWriter_AsString(writer);
3292     assert(start[writer->allocated] == 0);
3293 
3294     end = start + writer->allocated;
3295     assert(str != NULL);
3296     assert(start <= str && str <= end);
3297     return 1;
3298 }
3299 #endif
3300 
3301 void*
_PyBytesWriter_Resize(_PyBytesWriter * writer,void * str,Py_ssize_t size)3302 _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3303 {
3304     Py_ssize_t allocated, pos;
3305 
3306     assert(_PyBytesWriter_CheckConsistency(writer, str));
3307     assert(writer->allocated < size);
3308 
3309     allocated = size;
3310     if (writer->overallocate
3311         && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3312         /* overallocate to limit the number of realloc() */
3313         allocated += allocated / OVERALLOCATE_FACTOR;
3314     }
3315 
3316     pos = _PyBytesWriter_GetSize(writer, str);
3317     if (!writer->use_small_buffer) {
3318         if (writer->use_bytearray) {
3319             if (PyByteArray_Resize(writer->buffer, allocated))
3320                 goto error;
3321             /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3322                but we cannot use ob_alloc because bytes may need to be moved
3323                to use the whole buffer. bytearray uses an internal optimization
3324                to avoid moving or copying bytes when bytes are removed at the
3325                beginning (ex: del bytearray[:1]). */
3326         }
3327         else {
3328             if (_PyBytes_Resize(&writer->buffer, allocated))
3329                 goto error;
3330         }
3331     }
3332     else {
3333         /* convert from stack buffer to bytes object buffer */
3334         assert(writer->buffer == NULL);
3335 
3336         if (writer->use_bytearray)
3337             writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3338         else
3339             writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3340         if (writer->buffer == NULL)
3341             goto error;
3342 
3343         if (pos != 0) {
3344             char *dest;
3345             if (writer->use_bytearray)
3346                 dest = PyByteArray_AS_STRING(writer->buffer);
3347             else
3348                 dest = PyBytes_AS_STRING(writer->buffer);
3349             memcpy(dest,
3350                       writer->small_buffer,
3351                       pos);
3352         }
3353 
3354         writer->use_small_buffer = 0;
3355 #ifndef NDEBUG
3356         memset(writer->small_buffer, PYMEM_CLEANBYTE,
3357                sizeof(writer->small_buffer));
3358 #endif
3359     }
3360     writer->allocated = allocated;
3361 
3362     str = _PyBytesWriter_AsString(writer) + pos;
3363     assert(_PyBytesWriter_CheckConsistency(writer, str));
3364     return str;
3365 
3366 error:
3367     _PyBytesWriter_Dealloc(writer);
3368     return NULL;
3369 }
3370 
3371 void*
_PyBytesWriter_Prepare(_PyBytesWriter * writer,void * str,Py_ssize_t size)3372 _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3373 {
3374     Py_ssize_t new_min_size;
3375 
3376     assert(_PyBytesWriter_CheckConsistency(writer, str));
3377     assert(size >= 0);
3378 
3379     if (size == 0) {
3380         /* nothing to do */
3381         return str;
3382     }
3383 
3384     if (writer->min_size > PY_SSIZE_T_MAX - size) {
3385         PyErr_NoMemory();
3386         _PyBytesWriter_Dealloc(writer);
3387         return NULL;
3388     }
3389     new_min_size = writer->min_size + size;
3390 
3391     if (new_min_size > writer->allocated)
3392         str = _PyBytesWriter_Resize(writer, str, new_min_size);
3393 
3394     writer->min_size = new_min_size;
3395     return str;
3396 }
3397 
3398 /* Allocate the buffer to write size bytes.
3399    Return the pointer to the beginning of buffer data.
3400    Raise an exception and return NULL on error. */
3401 void*
_PyBytesWriter_Alloc(_PyBytesWriter * writer,Py_ssize_t size)3402 _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3403 {
3404     /* ensure that _PyBytesWriter_Alloc() is only called once */
3405     assert(writer->min_size == 0 && writer->buffer == NULL);
3406     assert(size >= 0);
3407 
3408     writer->use_small_buffer = 1;
3409 #ifndef NDEBUG
3410     writer->allocated = sizeof(writer->small_buffer) - 1;
3411     /* In debug mode, don't use the full small buffer because it is less
3412        efficient than bytes and bytearray objects to detect buffer underflow
3413        and buffer overflow. Use 10 bytes of the small buffer to test also
3414        code using the smaller buffer in debug mode.
3415 
3416        Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3417        in debug mode to also be able to detect stack overflow when running
3418        tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3419        if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3420        stack overflow. */
3421     writer->allocated = Py_MIN(writer->allocated, 10);
3422     /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3423        to detect buffer overflow */
3424     writer->small_buffer[writer->allocated] = 0;
3425 #else
3426     writer->allocated = sizeof(writer->small_buffer);
3427 #endif
3428     return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3429 }
3430 
3431 PyObject *
_PyBytesWriter_Finish(_PyBytesWriter * writer,void * str)3432 _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3433 {
3434     Py_ssize_t size;
3435     PyObject *result;
3436 
3437     assert(_PyBytesWriter_CheckConsistency(writer, str));
3438 
3439     size = _PyBytesWriter_GetSize(writer, str);
3440     if (size == 0 && !writer->use_bytearray) {
3441         Py_CLEAR(writer->buffer);
3442         /* Get the empty byte string singleton */
3443         result = PyBytes_FromStringAndSize(NULL, 0);
3444     }
3445     else if (writer->use_small_buffer) {
3446         if (writer->use_bytearray) {
3447             result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3448         }
3449         else {
3450             result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3451         }
3452     }
3453     else {
3454         result = writer->buffer;
3455         writer->buffer = NULL;
3456 
3457         if (size != writer->allocated) {
3458             if (writer->use_bytearray) {
3459                 if (PyByteArray_Resize(result, size)) {
3460                     Py_DECREF(result);
3461                     return NULL;
3462                 }
3463             }
3464             else {
3465                 if (_PyBytes_Resize(&result, size)) {
3466                     assert(result == NULL);
3467                     return NULL;
3468                 }
3469             }
3470         }
3471     }
3472     return result;
3473 }
3474 
3475 void*
_PyBytesWriter_WriteBytes(_PyBytesWriter * writer,void * ptr,const void * bytes,Py_ssize_t size)3476 _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3477                           const void *bytes, Py_ssize_t size)
3478 {
3479     char *str = (char *)ptr;
3480 
3481     str = _PyBytesWriter_Prepare(writer, str, size);
3482     if (str == NULL)
3483         return NULL;
3484 
3485     memcpy(str, bytes, size);
3486     str += size;
3487 
3488     return str;
3489 }
3490