1 /* PyBytes (bytearray) implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 #include "bytes_methods.h"
7 
8 char _PyByteArray_empty_string[] = "";
9 
10 void
PyByteArray_Fini(void)11 PyByteArray_Fini(void)
12 {
13 }
14 
15 int
PyByteArray_Init(void)16 PyByteArray_Init(void)
17 {
18     return 1;
19 }
20 
21 /* end nullbytes support */
22 
23 /* Helpers */
24 
25 static int
_getbytevalue(PyObject * arg,int * value)26 _getbytevalue(PyObject* arg, int *value)
27 {
28     long face_value;
29 
30     if (PyBytes_CheckExact(arg)) {
31         if (Py_SIZE(arg) != 1) {
32             PyErr_SetString(PyExc_ValueError, "string must be of size 1");
33             return 0;
34         }
35         *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
36         return 1;
37     }
38     else if (_PyAnyInt_Check(arg)) {
39         face_value = PyLong_AsLong(arg);
40     }
41     else {
42         PyObject *index = PyNumber_Index(arg);
43         if (index == NULL) {
44             if (PyErr_ExceptionMatches(PyExc_TypeError)) {
45                 PyErr_Format(PyExc_TypeError,
46                              "an integer or string of size 1 is required");
47             }
48             return 0;
49         }
50         face_value = PyLong_AsLong(index);
51         Py_DECREF(index);
52     }
53 
54     if (face_value < 0 || face_value >= 256) {
55         /* this includes the OverflowError in case the long is too large */
56         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
57         return 0;
58     }
59 
60     *value = face_value;
61     return 1;
62 }
63 
64 static Py_ssize_t
bytearray_buffer_getreadbuf(PyByteArrayObject * self,Py_ssize_t index,const void ** ptr)65 bytearray_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
66 {
67     if ( index != 0 ) {
68         PyErr_SetString(PyExc_SystemError,
69                 "accessing non-existent bytes segment");
70         return -1;
71     }
72     *ptr = (void *)PyByteArray_AS_STRING(self);
73     return Py_SIZE(self);
74 }
75 
76 static Py_ssize_t
bytearray_buffer_getwritebuf(PyByteArrayObject * self,Py_ssize_t index,const void ** ptr)77 bytearray_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
78 {
79     if ( index != 0 ) {
80         PyErr_SetString(PyExc_SystemError,
81                 "accessing non-existent bytes segment");
82         return -1;
83     }
84     *ptr = (void *)PyByteArray_AS_STRING(self);
85     return Py_SIZE(self);
86 }
87 
88 static Py_ssize_t
bytearray_buffer_getsegcount(PyByteArrayObject * self,Py_ssize_t * lenp)89 bytearray_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
90 {
91     if ( lenp )
92         *lenp = Py_SIZE(self);
93     return 1;
94 }
95 
96 static Py_ssize_t
bytearray_buffer_getcharbuf(PyByteArrayObject * self,Py_ssize_t index,const char ** ptr)97 bytearray_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
98 {
99     if ( index != 0 ) {
100         PyErr_SetString(PyExc_SystemError,
101                 "accessing non-existent bytes segment");
102         return -1;
103     }
104     *ptr = PyByteArray_AS_STRING(self);
105     return Py_SIZE(self);
106 }
107 
108 static int
bytearray_getbuffer(PyByteArrayObject * obj,Py_buffer * view,int flags)109 bytearray_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
110 {
111     int ret;
112     void *ptr;
113     if (view == NULL) {
114         obj->ob_exports++;
115         return 0;
116     }
117     ptr = (void *) PyByteArray_AS_STRING(obj);
118     ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
119     if (ret >= 0) {
120         obj->ob_exports++;
121     }
122     return ret;
123 }
124 
125 static void
bytearray_releasebuffer(PyByteArrayObject * obj,Py_buffer * view)126 bytearray_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
127 {
128     obj->ob_exports--;
129 }
130 
131 static Py_ssize_t
_getbuffer(PyObject * obj,Py_buffer * view)132 _getbuffer(PyObject *obj, Py_buffer *view)
133 {
134     PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
135 
136     if (buffer == NULL || buffer->bf_getbuffer == NULL)
137     {
138         PyErr_Format(PyExc_TypeError,
139                      "Type %.100s doesn't support the buffer API",
140                      Py_TYPE(obj)->tp_name);
141         return -1;
142     }
143 
144     if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
145             return -1;
146     return view->len;
147 }
148 
149 static int
_canresize(PyByteArrayObject * self)150 _canresize(PyByteArrayObject *self)
151 {
152     if (self->ob_exports > 0) {
153         PyErr_SetString(PyExc_BufferError,
154                 "Existing exports of data: object cannot be re-sized");
155         return 0;
156     }
157     return 1;
158 }
159 
160 /* Direct API functions */
161 
162 PyObject *
PyByteArray_FromObject(PyObject * input)163 PyByteArray_FromObject(PyObject *input)
164 {
165     return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
166                                         input, NULL);
167 }
168 
169 static PyObject *
_PyByteArray_FromBufferObject(PyObject * obj)170 _PyByteArray_FromBufferObject(PyObject *obj)
171 {
172     PyObject *result;
173     Py_buffer view;
174 
175     if (PyObject_GetBuffer(obj, &view, PyBUF_FULL_RO) < 0) {
176         return NULL;
177     }
178     result = PyByteArray_FromStringAndSize(NULL, view.len);
179     if (result != NULL &&
180         PyBuffer_ToContiguous(PyByteArray_AS_STRING(result),
181                               &view, view.len, 'C') < 0)
182     {
183         Py_CLEAR(result);
184     }
185     PyBuffer_Release(&view);
186     return result;
187 }
188 
189 PyObject *
PyByteArray_FromStringAndSize(const char * bytes,Py_ssize_t size)190 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
191 {
192     PyByteArrayObject *new;
193     Py_ssize_t alloc;
194 
195     if (size < 0) {
196         PyErr_SetString(PyExc_SystemError,
197             "Negative size passed to PyByteArray_FromStringAndSize");
198         return NULL;
199     }
200 
201     new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
202     if (new == NULL)
203         return NULL;
204 
205     if (size == 0) {
206         new->ob_bytes = NULL;
207         alloc = 0;
208     }
209     else {
210         alloc = size + 1;
211         new->ob_bytes = PyMem_Malloc(alloc);
212         if (new->ob_bytes == NULL) {
213             Py_DECREF(new);
214             return PyErr_NoMemory();
215         }
216         if (bytes != NULL && size > 0)
217             memcpy(new->ob_bytes, bytes, size);
218         new->ob_bytes[size] = '\0';  /* Trailing null byte */
219     }
220     Py_SIZE(new) = size;
221     new->ob_alloc = alloc;
222     new->ob_exports = 0;
223 
224     return (PyObject *)new;
225 }
226 
227 Py_ssize_t
PyByteArray_Size(PyObject * self)228 PyByteArray_Size(PyObject *self)
229 {
230     assert(self != NULL);
231     assert(PyByteArray_Check(self));
232 
233     return PyByteArray_GET_SIZE(self);
234 }
235 
236 char  *
PyByteArray_AsString(PyObject * self)237 PyByteArray_AsString(PyObject *self)
238 {
239     assert(self != NULL);
240     assert(PyByteArray_Check(self));
241 
242     return PyByteArray_AS_STRING(self);
243 }
244 
245 int
PyByteArray_Resize(PyObject * self,Py_ssize_t size)246 PyByteArray_Resize(PyObject *self, Py_ssize_t size)
247 {
248     void *sval;
249     Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
250 
251     assert(self != NULL);
252     assert(PyByteArray_Check(self));
253     assert(size >= 0);
254 
255     if (size == Py_SIZE(self)) {
256         return 0;
257     }
258     if (!_canresize((PyByteArrayObject *)self)) {
259         return -1;
260     }
261 
262     if (size < alloc / 2) {
263         /* Major downsize; resize down to exact size */
264         alloc = size + 1;
265     }
266     else if (size < alloc) {
267         /* Within allocated size; quick exit */
268         Py_SIZE(self) = size;
269         ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
270         return 0;
271     }
272     else if (size <= alloc * 1.125) {
273         /* Moderate upsize; overallocate similar to list_resize() */
274         alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
275     }
276     else {
277         /* Major upsize; resize up to exact size */
278         alloc = size + 1;
279     }
280 
281     sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
282     if (sval == NULL) {
283         PyErr_NoMemory();
284         return -1;
285     }
286 
287     ((PyByteArrayObject *)self)->ob_bytes = sval;
288     Py_SIZE(self) = size;
289     ((PyByteArrayObject *)self)->ob_alloc = alloc;
290     ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
291 
292     return 0;
293 }
294 
295 PyObject *
PyByteArray_Concat(PyObject * a,PyObject * b)296 PyByteArray_Concat(PyObject *a, PyObject *b)
297 {
298     Py_buffer va, vb;
299     PyByteArrayObject *result = NULL;
300 
301     va.len = -1;
302     vb.len = -1;
303     if (_getbuffer(a, &va) < 0  ||
304         _getbuffer(b, &vb) < 0) {
305             PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
306                          Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
307             goto done;
308     }
309 
310     if (va.len > PY_SSIZE_T_MAX - vb.len) {
311         PyErr_NoMemory();
312         goto done;
313     }
314 
315     result = (PyByteArrayObject *) \
316         PyByteArray_FromStringAndSize(NULL, va.len + vb.len);
317     if (result != NULL) {
318         memcpy(result->ob_bytes, va.buf, va.len);
319         memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
320     }
321 
322   done:
323     if (va.len != -1)
324         PyBuffer_Release(&va);
325     if (vb.len != -1)
326         PyBuffer_Release(&vb);
327     return (PyObject *)result;
328 }
329 
330 /* Functions stuffed into the type object */
331 
332 static Py_ssize_t
bytearray_length(PyByteArrayObject * self)333 bytearray_length(PyByteArrayObject *self)
334 {
335     return Py_SIZE(self);
336 }
337 
338 static PyObject *
bytearray_iconcat(PyByteArrayObject * self,PyObject * other)339 bytearray_iconcat(PyByteArrayObject *self, PyObject *other)
340 {
341     Py_ssize_t mysize;
342     Py_ssize_t size;
343     Py_buffer vo;
344 
345     if (_getbuffer(other, &vo) < 0) {
346         PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
347                      Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
348         return NULL;
349     }
350 
351     mysize = Py_SIZE(self);
352     if (mysize > PY_SSIZE_T_MAX - vo.len) {
353         PyBuffer_Release(&vo);
354         return PyErr_NoMemory();
355     }
356     size = mysize + vo.len;
357     if (size < self->ob_alloc) {
358         Py_SIZE(self) = size;
359         self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
360     }
361     else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
362         PyBuffer_Release(&vo);
363         return NULL;
364     }
365     memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
366     PyBuffer_Release(&vo);
367     Py_INCREF(self);
368     return (PyObject *)self;
369 }
370 
371 static PyObject *
bytearray_repeat(PyByteArrayObject * self,Py_ssize_t count)372 bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count)
373 {
374     PyByteArrayObject *result;
375     Py_ssize_t mysize;
376     Py_ssize_t size;
377 
378     if (count < 0)
379         count = 0;
380     mysize = Py_SIZE(self);
381     if (count != 0 && mysize > PY_SSIZE_T_MAX / count)
382         return PyErr_NoMemory();
383     size = mysize * count;
384     result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
385     if (result != NULL && size != 0) {
386         if (mysize == 1)
387             memset(result->ob_bytes, self->ob_bytes[0], size);
388         else {
389             Py_ssize_t i;
390             for (i = 0; i < count; i++)
391                 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
392         }
393     }
394     return (PyObject *)result;
395 }
396 
397 static PyObject *
bytearray_irepeat(PyByteArrayObject * self,Py_ssize_t count)398 bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count)
399 {
400     Py_ssize_t mysize;
401     Py_ssize_t size;
402 
403     if (count < 0)
404         count = 0;
405     mysize = Py_SIZE(self);
406     if (count != 0 && mysize > PY_SSIZE_T_MAX / count)
407         return PyErr_NoMemory();
408     size = mysize * count;
409     if (size < self->ob_alloc) {
410         Py_SIZE(self) = size;
411         self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
412     }
413     else if (PyByteArray_Resize((PyObject *)self, size) < 0)
414         return NULL;
415 
416     if (mysize == 1)
417         memset(self->ob_bytes, self->ob_bytes[0], size);
418     else {
419         Py_ssize_t i;
420         for (i = 1; i < count; i++)
421             memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
422     }
423 
424     Py_INCREF(self);
425     return (PyObject *)self;
426 }
427 
428 static PyObject *
bytearray_getitem(PyByteArrayObject * self,Py_ssize_t i)429 bytearray_getitem(PyByteArrayObject *self, Py_ssize_t i)
430 {
431     if (i < 0)
432         i += Py_SIZE(self);
433     if (i < 0 || i >= Py_SIZE(self)) {
434         PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
435         return NULL;
436     }
437     return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
438 }
439 
440 static PyObject *
bytearray_subscript(PyByteArrayObject * self,PyObject * index)441 bytearray_subscript(PyByteArrayObject *self, PyObject *index)
442 {
443     if (PyIndex_Check(index)) {
444         Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
445 
446         if (i == -1 && PyErr_Occurred())
447             return NULL;
448 
449         if (i < 0)
450             i += PyByteArray_GET_SIZE(self);
451 
452         if (i < 0 || i >= Py_SIZE(self)) {
453             PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
454             return NULL;
455         }
456         return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
457     }
458     else if (PySlice_Check(index)) {
459         Py_ssize_t start, stop, step, slicelength, cur, i;
460         if (_PySlice_Unpack(index, &start, &stop, &step) < 0) {
461             return NULL;
462         }
463         slicelength = _PySlice_AdjustIndices(PyByteArray_GET_SIZE(self),
464                                             &start, &stop, step);
465 
466         if (slicelength <= 0)
467             return PyByteArray_FromStringAndSize("", 0);
468         else if (step == 1) {
469             return PyByteArray_FromStringAndSize(self->ob_bytes + start,
470                                              slicelength);
471         }
472         else {
473             char *source_buf = PyByteArray_AS_STRING(self);
474             char *result_buf = (char *)PyMem_Malloc(slicelength);
475             PyObject *result;
476 
477             if (result_buf == NULL)
478                 return PyErr_NoMemory();
479 
480             for (cur = start, i = 0; i < slicelength;
481                  cur += step, i++) {
482                      result_buf[i] = source_buf[cur];
483             }
484             result = PyByteArray_FromStringAndSize(result_buf, slicelength);
485             PyMem_Free(result_buf);
486             return result;
487         }
488     }
489     else {
490         PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
491         return NULL;
492     }
493 }
494 
495 static int
bytearray_setslice(PyByteArrayObject * self,Py_ssize_t lo,Py_ssize_t hi,PyObject * values)496 bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
497                PyObject *values)
498 {
499     Py_ssize_t avail, needed;
500     void *bytes;
501     Py_buffer vbytes;
502     int res = 0;
503 
504     vbytes.len = -1;
505     if (values == (PyObject *)self) {
506         /* Make a copy and call this function recursively */
507         int err;
508         values = PyByteArray_FromStringAndSize(PyByteArray_AS_STRING(values),
509                                                PyByteArray_GET_SIZE(values));
510         if (values == NULL)
511             return -1;
512         err = bytearray_setslice(self, lo, hi, values);
513         Py_DECREF(values);
514         return err;
515     }
516     if (values == NULL) {
517         /* del b[lo:hi] */
518         bytes = NULL;
519         needed = 0;
520     }
521     else {
522             if (_getbuffer(values, &vbytes) < 0) {
523                     PyErr_Format(PyExc_TypeError,
524                                  "can't set bytearray slice from %.100s",
525                                  Py_TYPE(values)->tp_name);
526                     return -1;
527             }
528             needed = vbytes.len;
529             bytes = vbytes.buf;
530     }
531 
532     if (lo < 0)
533         lo = 0;
534     if (hi < lo)
535         hi = lo;
536     if (hi > Py_SIZE(self))
537         hi = Py_SIZE(self);
538 
539     avail = hi - lo;
540     if (avail < 0)
541         lo = hi = avail = 0;
542 
543     if (avail != needed) {
544         if (avail > needed) {
545             if (!_canresize(self)) {
546                 res = -1;
547                 goto finish;
548             }
549             /*
550               0   lo               hi               old_size
551               |   |<----avail----->|<-----tomove------>|
552               |   |<-needed->|<-----tomove------>|
553               0   lo      new_hi              new_size
554             */
555             memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
556                     Py_SIZE(self) - hi);
557         }
558         /* XXX(nnorwitz): need to verify this can't overflow! */
559         if (PyByteArray_Resize((PyObject *)self,
560                            Py_SIZE(self) + needed - avail) < 0) {
561                 res = -1;
562                 goto finish;
563         }
564         if (avail < needed) {
565             /*
566               0   lo        hi               old_size
567               |   |<-avail->|<-----tomove------>|
568               |   |<----needed---->|<-----tomove------>|
569               0   lo            new_hi              new_size
570              */
571             memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
572                     Py_SIZE(self) - lo - needed);
573         }
574     }
575 
576     if (needed > 0)
577         memcpy(self->ob_bytes + lo, bytes, needed);
578 
579 
580  finish:
581     if (vbytes.len != -1)
582             PyBuffer_Release(&vbytes);
583     return res;
584 }
585 
586 static int
bytearray_setitem(PyByteArrayObject * self,Py_ssize_t i,PyObject * value)587 bytearray_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
588 {
589     int ival;
590 
591     if (i < 0)
592         i += Py_SIZE(self);
593 
594     if (i < 0 || i >= Py_SIZE(self)) {
595         PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
596         return -1;
597     }
598 
599     if (value == NULL)
600         return bytearray_setslice(self, i, i+1, NULL);
601 
602     if (!_getbytevalue(value, &ival))
603         return -1;
604 
605     self->ob_bytes[i] = ival;
606     return 0;
607 }
608 
609 static int
bytearray_ass_subscript(PyByteArrayObject * self,PyObject * index,PyObject * values)610 bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
611 {
612     Py_ssize_t start, stop, step, slicelen, needed;
613     char *bytes;
614 
615     if (PyIndex_Check(index)) {
616         Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
617 
618         if (i == -1 && PyErr_Occurred())
619             return -1;
620 
621         if (i < 0)
622             i += PyByteArray_GET_SIZE(self);
623 
624         if (i < 0 || i >= Py_SIZE(self)) {
625             PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
626             return -1;
627         }
628 
629         if (values == NULL) {
630             /* Fall through to slice assignment */
631             start = i;
632             stop = i + 1;
633             step = 1;
634             slicelen = 1;
635         }
636         else {
637             int ival;
638             if (!_getbytevalue(values, &ival))
639                 return -1;
640             self->ob_bytes[i] = (char)ival;
641             return 0;
642         }
643     }
644     else if (PySlice_Check(index)) {
645         if (_PySlice_Unpack(index, &start, &stop, &step) < 0) {
646             return -1;
647         }
648         slicelen = _PySlice_AdjustIndices(PyByteArray_GET_SIZE(self), &start,
649                                          &stop, step);
650     }
651     else {
652         PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
653         return -1;
654     }
655 
656     if (values == NULL) {
657         bytes = NULL;
658         needed = 0;
659     }
660     else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
661         int err;
662         if (PyNumber_Check(values) || PyUnicode_Check(values)) {
663             PyErr_SetString(PyExc_TypeError,
664                             "can assign only bytes, buffers, or iterables "
665                             "of ints in range(0, 256)");
666             return -1;
667         }
668         /* Make a copy and call this function recursively */
669         values = PyByteArray_FromObject(values);
670         if (values == NULL)
671             return -1;
672         err = bytearray_ass_subscript(self, index, values);
673         Py_DECREF(values);
674         return err;
675     }
676     else {
677         assert(PyByteArray_Check(values));
678         bytes = ((PyByteArrayObject *)values)->ob_bytes;
679         needed = Py_SIZE(values);
680     }
681     /* Make sure b[5:2] = ... inserts before 5, not before 2. */
682     if ((step < 0 && start < stop) ||
683         (step > 0 && start > stop))
684         stop = start;
685     if (step == 1) {
686         if (slicelen != needed) {
687             if (!_canresize(self))
688                 return -1;
689             if (slicelen > needed) {
690                 /*
691                   0   start           stop              old_size
692                   |   |<---slicelen--->|<-----tomove------>|
693                   |   |<-needed->|<-----tomove------>|
694                   0   lo      new_hi              new_size
695                 */
696                 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
697                         Py_SIZE(self) - stop);
698             }
699             if (PyByteArray_Resize((PyObject *)self,
700                                Py_SIZE(self) + needed - slicelen) < 0)
701                 return -1;
702             if (slicelen < needed) {
703                 /*
704                   0   lo        hi               old_size
705                   |   |<-avail->|<-----tomove------>|
706                   |   |<----needed---->|<-----tomove------>|
707                   0   lo            new_hi              new_size
708                  */
709                 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
710                         Py_SIZE(self) - start - needed);
711             }
712         }
713 
714         if (needed > 0)
715             memcpy(self->ob_bytes + start, bytes, needed);
716 
717         return 0;
718     }
719     else {
720         if (needed == 0) {
721             /* Delete slice */
722             size_t cur;
723             Py_ssize_t i;
724 
725             if (!_canresize(self))
726                 return -1;
727             if (step < 0) {
728                 stop = start + 1;
729                 start = stop + step * (slicelen - 1) - 1;
730                 step = -step;
731             }
732             for (cur = start, i = 0;
733                  i < slicelen; cur += step, i++) {
734                 Py_ssize_t lim = step - 1;
735 
736                 if (cur + step >= (size_t)PyByteArray_GET_SIZE(self))
737                     lim = PyByteArray_GET_SIZE(self) - cur - 1;
738 
739                 memmove(self->ob_bytes + cur - i,
740                         self->ob_bytes + cur + 1, lim);
741             }
742             /* Move the tail of the bytes, in one chunk */
743             cur = start + slicelen*step;
744             if (cur < (size_t)PyByteArray_GET_SIZE(self)) {
745                 memmove(self->ob_bytes + cur - slicelen,
746                         self->ob_bytes + cur,
747                         PyByteArray_GET_SIZE(self) - cur);
748             }
749             if (PyByteArray_Resize((PyObject *)self,
750                                PyByteArray_GET_SIZE(self) - slicelen) < 0)
751                 return -1;
752 
753             return 0;
754         }
755         else {
756             /* Assign slice */
757             Py_ssize_t cur, i;
758 
759             if (needed != slicelen) {
760                 PyErr_Format(PyExc_ValueError,
761                              "attempt to assign bytes of size %zd "
762                              "to extended slice of size %zd",
763                              needed, slicelen);
764                 return -1;
765             }
766             for (cur = start, i = 0; i < slicelen; cur += step, i++)
767                 self->ob_bytes[cur] = bytes[i];
768             return 0;
769         }
770     }
771 }
772 
773 static int
bytearray_init(PyByteArrayObject * self,PyObject * args,PyObject * kwds)774 bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
775 {
776     static char *kwlist[] = {"source", "encoding", "errors", 0};
777     PyObject *arg = NULL;
778     const char *encoding = NULL;
779     const char *errors = NULL;
780     Py_ssize_t count;
781     PyObject *it;
782     PyObject *(*iternext)(PyObject *);
783 
784     if (Py_SIZE(self) != 0) {
785         /* Empty previous contents (yes, do this first of all!) */
786         if (PyByteArray_Resize((PyObject *)self, 0) < 0)
787             return -1;
788     }
789 
790     /* Parse arguments */
791     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
792                                      &arg, &encoding, &errors))
793         return -1;
794 
795     /* Make a quick exit if no first argument */
796     if (arg == NULL) {
797         if (encoding != NULL || errors != NULL) {
798             PyErr_SetString(PyExc_TypeError,
799                             "encoding or errors without sequence argument");
800             return -1;
801         }
802         return 0;
803     }
804 
805     if (PyBytes_Check(arg)) {
806         PyObject *new, *encoded;
807         if (encoding != NULL) {
808             encoded = _PyCodec_EncodeText(arg, encoding, errors);
809             if (encoded == NULL)
810                 return -1;
811             assert(PyBytes_Check(encoded));
812         }
813         else {
814             encoded = arg;
815             Py_INCREF(arg);
816         }
817         new = bytearray_iconcat(self, arg);
818         Py_DECREF(encoded);
819         if (new == NULL)
820             return -1;
821         Py_DECREF(new);
822         return 0;
823     }
824 
825 #ifdef Py_USING_UNICODE
826     if (PyUnicode_Check(arg)) {
827         /* Encode via the codec registry */
828         PyObject *encoded, *new;
829         if (encoding == NULL) {
830             PyErr_SetString(PyExc_TypeError,
831                             "unicode argument without an encoding");
832             return -1;
833         }
834         encoded = _PyCodec_EncodeText(arg, encoding, errors);
835         if (encoded == NULL)
836             return -1;
837         assert(PyBytes_Check(encoded));
838         new = bytearray_iconcat(self, encoded);
839         Py_DECREF(encoded);
840         if (new == NULL)
841             return -1;
842         Py_DECREF(new);
843         return 0;
844     }
845 #endif
846 
847     /* If it's not unicode, there can't be encoding or errors */
848     if (encoding != NULL || errors != NULL) {
849         PyErr_SetString(PyExc_TypeError,
850                         "encoding or errors without a string argument");
851         return -1;
852     }
853 
854     /* Is it an int? */
855     count = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
856     if (count == -1 && PyErr_Occurred()) {
857         if (!PyErr_ExceptionMatches(PyExc_TypeError))
858             return -1;
859         PyErr_Clear();
860     }
861     else if (count < 0) {
862         PyErr_SetString(PyExc_ValueError, "negative count");
863         return -1;
864     }
865     else {
866         if (count > 0) {
867             if (PyByteArray_Resize((PyObject *)self, count))
868                 return -1;
869             memset(self->ob_bytes, 0, count);
870         }
871         return 0;
872     }
873 
874     /* Use the buffer API */
875     if (PyObject_CheckBuffer(arg)) {
876         Py_ssize_t size;
877         Py_buffer view;
878         if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
879             return -1;
880         size = view.len;
881         if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
882         if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
883                 goto fail;
884         PyBuffer_Release(&view);
885         return 0;
886     fail:
887         PyBuffer_Release(&view);
888         return -1;
889     }
890 
891     /* XXX Optimize this if the arguments is a list, tuple */
892 
893     /* Get the iterator */
894     it = PyObject_GetIter(arg);
895     if (it == NULL)
896         return -1;
897     iternext = *Py_TYPE(it)->tp_iternext;
898 
899     /* Run the iterator to exhaustion */
900     for (;;) {
901         PyObject *item;
902         int rc, value;
903 
904         /* Get the next item */
905         item = iternext(it);
906         if (item == NULL) {
907             if (PyErr_Occurred()) {
908                 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
909                     goto error;
910                 PyErr_Clear();
911             }
912             break;
913         }
914 
915         /* Interpret it as an int (__index__) */
916         rc = _getbytevalue(item, &value);
917         Py_DECREF(item);
918         if (!rc)
919             goto error;
920 
921         /* Append the byte */
922         if (Py_SIZE(self) + 1 < self->ob_alloc) {
923             Py_SIZE(self)++;
924             PyByteArray_AS_STRING(self)[Py_SIZE(self)] = '\0';
925         }
926         else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
927             goto error;
928         self->ob_bytes[Py_SIZE(self)-1] = value;
929     }
930 
931     /* Clean up and return success */
932     Py_DECREF(it);
933     return 0;
934 
935  error:
936     /* Error handling when it != NULL */
937     Py_DECREF(it);
938     return -1;
939 }
940 
941 /* Mostly copied from string_repr, but without the
942    "smart quote" functionality. */
943 static PyObject *
bytearray_repr(PyByteArrayObject * self)944 bytearray_repr(PyByteArrayObject *self)
945 {
946     static const char *hexdigits = "0123456789abcdef";
947     const char *quote_prefix = "bytearray(b";
948     const char *quote_postfix = ")";
949     Py_ssize_t length = Py_SIZE(self);
950     /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
951     size_t newsize;
952     PyObject *v;
953     if (length > (PY_SSIZE_T_MAX - 14) / 4) {
954         PyErr_SetString(PyExc_OverflowError,
955             "bytearray object is too large to make repr");
956         return NULL;
957     }
958     newsize = 14 + 4 * length;
959     v = PyString_FromStringAndSize(NULL, newsize);
960     if (v == NULL) {
961         return NULL;
962     }
963     else {
964         register Py_ssize_t i;
965         register char c;
966         register char *p;
967         int quote;
968 
969         /* Figure out which quote to use; single is preferred */
970         quote = '\'';
971         {
972             char *test, *start;
973             start = PyByteArray_AS_STRING(self);
974             for (test = start; test < start+length; ++test) {
975                 if (*test == '"') {
976                     quote = '\''; /* back to single */
977                     goto decided;
978                 }
979                 else if (*test == '\'')
980                     quote = '"';
981             }
982           decided:
983             ;
984         }
985 
986         p = PyString_AS_STRING(v);
987         while (*quote_prefix)
988             *p++ = *quote_prefix++;
989         *p++ = quote;
990 
991         for (i = 0; i < length; i++) {
992             /* There's at least enough room for a hex escape
993                and a closing quote. */
994             assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
995             c = self->ob_bytes[i];
996             if (c == '\'' || c == '\\')
997                 *p++ = '\\', *p++ = c;
998             else if (c == '\t')
999                 *p++ = '\\', *p++ = 't';
1000             else if (c == '\n')
1001                 *p++ = '\\', *p++ = 'n';
1002             else if (c == '\r')
1003                 *p++ = '\\', *p++ = 'r';
1004             else if (c == 0)
1005                 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
1006             else if (c < ' ' || c >= 0x7f) {
1007                 *p++ = '\\';
1008                 *p++ = 'x';
1009                 *p++ = hexdigits[(c & 0xf0) >> 4];
1010                 *p++ = hexdigits[c & 0xf];
1011             }
1012             else
1013                 *p++ = c;
1014         }
1015         assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
1016         *p++ = quote;
1017         while (*quote_postfix) {
1018            *p++ = *quote_postfix++;
1019         }
1020         *p = '\0';
1021         /* v is cleared on error */
1022         (void)_PyString_Resize(&v, (p - PyString_AS_STRING(v)));
1023         return v;
1024     }
1025 }
1026 
1027 static PyObject *
bytearray_str(PyObject * op)1028 bytearray_str(PyObject *op)
1029 {
1030     return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1031 }
1032 
1033 static PyObject *
bytearray_richcompare(PyObject * self,PyObject * other,int op)1034 bytearray_richcompare(PyObject *self, PyObject *other, int op)
1035 {
1036     Py_ssize_t self_size, other_size;
1037     Py_buffer self_bytes, other_bytes;
1038     PyObject *res;
1039     Py_ssize_t minsize;
1040     int cmp, rc;
1041 
1042     /* Bytes can be compared to anything that supports the (binary)
1043        buffer API.  Except that a comparison with Unicode is always an
1044        error, even if the comparison is for equality. */
1045 #ifdef Py_USING_UNICODE
1046     rc = PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type);
1047     if (!rc)
1048         rc = PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type);
1049     if (rc < 0)
1050         return NULL;
1051     if (rc) {
1052         if (Py_BytesWarningFlag && op == Py_EQ) {
1053             if (PyErr_WarnEx(PyExc_BytesWarning,
1054                             "Comparison between bytearray and unicode", 1))
1055                 return NULL;
1056         }
1057 
1058         Py_INCREF(Py_NotImplemented);
1059         return Py_NotImplemented;
1060     }
1061 #endif
1062 
1063     self_size = _getbuffer(self, &self_bytes);
1064     if (self_size < 0) {
1065         PyErr_Clear();
1066         Py_INCREF(Py_NotImplemented);
1067         return Py_NotImplemented;
1068     }
1069 
1070     other_size = _getbuffer(other, &other_bytes);
1071     if (other_size < 0) {
1072         PyErr_Clear();
1073         PyBuffer_Release(&self_bytes);
1074         Py_INCREF(Py_NotImplemented);
1075         return Py_NotImplemented;
1076     }
1077 
1078     if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1079         /* Shortcut: if the lengths differ, the objects differ */
1080         cmp = (op == Py_NE);
1081     }
1082     else {
1083         minsize = self_size;
1084         if (other_size < minsize)
1085             minsize = other_size;
1086 
1087         cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1088         /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1089 
1090         if (cmp == 0) {
1091             if (self_size < other_size)
1092                 cmp = -1;
1093             else if (self_size > other_size)
1094                 cmp = 1;
1095         }
1096 
1097         switch (op) {
1098         case Py_LT: cmp = cmp <  0; break;
1099         case Py_LE: cmp = cmp <= 0; break;
1100         case Py_EQ: cmp = cmp == 0; break;
1101         case Py_NE: cmp = cmp != 0; break;
1102         case Py_GT: cmp = cmp >  0; break;
1103         case Py_GE: cmp = cmp >= 0; break;
1104         }
1105     }
1106 
1107     res = cmp ? Py_True : Py_False;
1108     PyBuffer_Release(&self_bytes);
1109     PyBuffer_Release(&other_bytes);
1110     Py_INCREF(res);
1111     return res;
1112 }
1113 
1114 static void
bytearray_dealloc(PyByteArrayObject * self)1115 bytearray_dealloc(PyByteArrayObject *self)
1116 {
1117     if (self->ob_exports > 0) {
1118         PyErr_SetString(PyExc_SystemError,
1119                         "deallocated bytearray object has exported buffers");
1120         PyErr_Print();
1121     }
1122     if (self->ob_bytes != 0) {
1123         PyMem_Free(self->ob_bytes);
1124     }
1125     Py_TYPE(self)->tp_free((PyObject *)self);
1126 }
1127 
1128 
1129 /* -------------------------------------------------------------------- */
1130 /* Methods */
1131 
1132 #define STRINGLIB_CHAR char
1133 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1134 #define STRINGLIB_STR PyByteArray_AS_STRING
1135 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1136 #define STRINGLIB_ISSPACE Py_ISSPACE
1137 #define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
1138 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1139 #define STRINGLIB_MUTABLE 1
1140 
1141 #include "stringlib/fastsearch.h"
1142 #include "stringlib/count.h"
1143 #include "stringlib/find.h"
1144 #include "stringlib/partition.h"
1145 #include "stringlib/split.h"
1146 #include "stringlib/ctype.h"
1147 #include "stringlib/transmogrify.h"
1148 
1149 
1150 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1151 were copied from the old char* style string object. */
1152 
1153 /* helper macro to fixup start/end slice values */
1154 #define ADJUST_INDICES(start, end, len)         \
1155     if (end > len)                              \
1156         end = len;                              \
1157     else if (end < 0) {                         \
1158         end += len;                             \
1159         if (end < 0)                            \
1160             end = 0;                            \
1161     }                                           \
1162     if (start < 0) {                            \
1163         start += len;                           \
1164         if (start < 0)                          \
1165             start = 0;                          \
1166     }
1167 
1168 Py_LOCAL_INLINE(Py_ssize_t)
bytearray_find_internal(PyByteArrayObject * self,PyObject * args,int dir)1169 bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1170 {
1171     PyObject *subobj;
1172     Py_buffer subbuf;
1173     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1174     Py_ssize_t res;
1175 
1176     if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1177                                     args, &subobj, &start, &end))
1178         return -2;
1179     if (_getbuffer(subobj, &subbuf) < 0)
1180         return -2;
1181     if (dir > 0)
1182         res = stringlib_find_slice(
1183             PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1184             subbuf.buf, subbuf.len, start, end);
1185     else
1186         res = stringlib_rfind_slice(
1187             PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1188             subbuf.buf, subbuf.len, start, end);
1189     PyBuffer_Release(&subbuf);
1190     return res;
1191 }
1192 
1193 PyDoc_STRVAR(find__doc__,
1194 "B.find(sub [,start [,end]]) -> int\n\
1195 \n\
1196 Return the lowest index in B where subsection sub is found,\n\
1197 such that sub is contained within B[start,end].  Optional\n\
1198 arguments start and end are interpreted as in slice notation.\n\
1199 \n\
1200 Return -1 on failure.");
1201 
1202 static PyObject *
bytearray_find(PyByteArrayObject * self,PyObject * args)1203 bytearray_find(PyByteArrayObject *self, PyObject *args)
1204 {
1205     Py_ssize_t result = bytearray_find_internal(self, args, +1);
1206     if (result == -2)
1207         return NULL;
1208     return PyInt_FromSsize_t(result);
1209 }
1210 
1211 PyDoc_STRVAR(count__doc__,
1212 "B.count(sub [,start [,end]]) -> int\n\
1213 \n\
1214 Return the number of non-overlapping occurrences of subsection sub in\n\
1215 bytes B[start:end].  Optional arguments start and end are interpreted\n\
1216 as in slice notation.");
1217 
1218 static PyObject *
bytearray_count(PyByteArrayObject * self,PyObject * args)1219 bytearray_count(PyByteArrayObject *self, PyObject *args)
1220 {
1221     PyObject *sub_obj;
1222     const char *str = PyByteArray_AS_STRING(self);
1223     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1224     Py_buffer vsub;
1225     PyObject *count_obj;
1226 
1227     if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
1228         return NULL;
1229 
1230     if (_getbuffer(sub_obj, &vsub) < 0)
1231         return NULL;
1232 
1233     ADJUST_INDICES(start, end, PyByteArray_GET_SIZE(self));
1234 
1235     count_obj = PyInt_FromSsize_t(
1236         stringlib_count(str + start, end - start, vsub.buf, vsub.len, PY_SSIZE_T_MAX)
1237         );
1238     PyBuffer_Release(&vsub);
1239     return count_obj;
1240 }
1241 
1242 
1243 PyDoc_STRVAR(index__doc__,
1244 "B.index(sub [,start [,end]]) -> int\n\
1245 \n\
1246 Like B.find() but raise ValueError when the subsection is not found.");
1247 
1248 static PyObject *
bytearray_index(PyByteArrayObject * self,PyObject * args)1249 bytearray_index(PyByteArrayObject *self, PyObject *args)
1250 {
1251     Py_ssize_t result = bytearray_find_internal(self, args, +1);
1252     if (result == -2)
1253         return NULL;
1254     if (result == -1) {
1255         PyErr_SetString(PyExc_ValueError,
1256                         "subsection not found");
1257         return NULL;
1258     }
1259     return PyInt_FromSsize_t(result);
1260 }
1261 
1262 
1263 PyDoc_STRVAR(rfind__doc__,
1264 "B.rfind(sub [,start [,end]]) -> int\n\
1265 \n\
1266 Return the highest index in B where subsection sub is found,\n\
1267 such that sub is contained within B[start,end].  Optional\n\
1268 arguments start and end are interpreted as in slice notation.\n\
1269 \n\
1270 Return -1 on failure.");
1271 
1272 static PyObject *
bytearray_rfind(PyByteArrayObject * self,PyObject * args)1273 bytearray_rfind(PyByteArrayObject *self, PyObject *args)
1274 {
1275     Py_ssize_t result = bytearray_find_internal(self, args, -1);
1276     if (result == -2)
1277         return NULL;
1278     return PyInt_FromSsize_t(result);
1279 }
1280 
1281 
1282 PyDoc_STRVAR(rindex__doc__,
1283 "B.rindex(sub [,start [,end]]) -> int\n\
1284 \n\
1285 Like B.rfind() but raise ValueError when the subsection is not found.");
1286 
1287 static PyObject *
bytearray_rindex(PyByteArrayObject * self,PyObject * args)1288 bytearray_rindex(PyByteArrayObject *self, PyObject *args)
1289 {
1290     Py_ssize_t result = bytearray_find_internal(self, args, -1);
1291     if (result == -2)
1292         return NULL;
1293     if (result == -1) {
1294         PyErr_SetString(PyExc_ValueError,
1295                         "subsection not found");
1296         return NULL;
1297     }
1298     return PyInt_FromSsize_t(result);
1299 }
1300 
1301 
1302 static int
bytearray_contains(PyObject * self,PyObject * arg)1303 bytearray_contains(PyObject *self, PyObject *arg)
1304 {
1305     Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1306     if (ival == -1 && PyErr_Occurred()) {
1307         Py_buffer varg;
1308         int pos;
1309         PyErr_Clear();
1310         if (_getbuffer(arg, &varg) < 0)
1311             return -1;
1312         pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1313                              varg.buf, varg.len, 0);
1314         PyBuffer_Release(&varg);
1315         return pos >= 0;
1316     }
1317     if (ival < 0 || ival >= 256) {
1318         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1319         return -1;
1320     }
1321 
1322     return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1323 }
1324 
1325 
1326 /* Matches the end (direction >= 0) or start (direction < 0) of self
1327  * against substr, using the start and end arguments. Returns
1328  * -1 on error, 0 if not found and 1 if found.
1329  */
1330 Py_LOCAL(int)
_bytearray_tailmatch(PyByteArrayObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)1331 _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1332                  Py_ssize_t end, int direction)
1333 {
1334     Py_ssize_t len = PyByteArray_GET_SIZE(self);
1335     const char* str;
1336     Py_buffer vsubstr;
1337     int rv = 0;
1338 
1339     str = PyByteArray_AS_STRING(self);
1340 
1341     if (_getbuffer(substr, &vsubstr) < 0)
1342         return -1;
1343 
1344     ADJUST_INDICES(start, end, len);
1345 
1346     if (direction < 0) {
1347         /* startswith */
1348         if (start+vsubstr.len > len) {
1349             goto done;
1350         }
1351     } else {
1352         /* endswith */
1353         if (end-start < vsubstr.len || start > len) {
1354             goto done;
1355         }
1356 
1357         if (end-vsubstr.len > start)
1358             start = end - vsubstr.len;
1359     }
1360     if (end-start >= vsubstr.len)
1361         rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1362 
1363 done:
1364     PyBuffer_Release(&vsubstr);
1365     return rv;
1366 }
1367 
1368 
1369 PyDoc_STRVAR(startswith__doc__,
1370 "B.startswith(prefix [,start [,end]]) -> bool\n\
1371 \n\
1372 Return True if B starts with the specified prefix, False otherwise.\n\
1373 With optional start, test B beginning at that position.\n\
1374 With optional end, stop comparing B at that position.\n\
1375 prefix can also be a tuple of strings to try.");
1376 
1377 static PyObject *
bytearray_startswith(PyByteArrayObject * self,PyObject * args)1378 bytearray_startswith(PyByteArrayObject *self, PyObject *args)
1379 {
1380     Py_ssize_t start = 0;
1381     Py_ssize_t end = PY_SSIZE_T_MAX;
1382     PyObject *subobj;
1383     int result;
1384 
1385     if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
1386         return NULL;
1387     if (PyTuple_Check(subobj)) {
1388         Py_ssize_t i;
1389         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1390             result = _bytearray_tailmatch(self,
1391                                       PyTuple_GET_ITEM(subobj, i),
1392                                       start, end, -1);
1393             if (result == -1)
1394                 return NULL;
1395             else if (result) {
1396                 Py_RETURN_TRUE;
1397             }
1398         }
1399         Py_RETURN_FALSE;
1400     }
1401     result = _bytearray_tailmatch(self, subobj, start, end, -1);
1402     if (result == -1)
1403         return NULL;
1404     else
1405         return PyBool_FromLong(result);
1406 }
1407 
1408 PyDoc_STRVAR(endswith__doc__,
1409 "B.endswith(suffix [,start [,end]]) -> bool\n\
1410 \n\
1411 Return True if B ends with the specified suffix, False otherwise.\n\
1412 With optional start, test B beginning at that position.\n\
1413 With optional end, stop comparing B at that position.\n\
1414 suffix can also be a tuple of strings to try.");
1415 
1416 static PyObject *
bytearray_endswith(PyByteArrayObject * self,PyObject * args)1417 bytearray_endswith(PyByteArrayObject *self, PyObject *args)
1418 {
1419     Py_ssize_t start = 0;
1420     Py_ssize_t end = PY_SSIZE_T_MAX;
1421     PyObject *subobj;
1422     int result;
1423 
1424     if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
1425         return NULL;
1426     if (PyTuple_Check(subobj)) {
1427         Py_ssize_t i;
1428         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1429             result = _bytearray_tailmatch(self,
1430                                       PyTuple_GET_ITEM(subobj, i),
1431                                       start, end, +1);
1432             if (result == -1)
1433                 return NULL;
1434             else if (result) {
1435                 Py_RETURN_TRUE;
1436             }
1437         }
1438         Py_RETURN_FALSE;
1439     }
1440     result = _bytearray_tailmatch(self, subobj, start, end, +1);
1441     if (result == -1)
1442         return NULL;
1443     else
1444         return PyBool_FromLong(result);
1445 }
1446 
1447 
1448 PyDoc_STRVAR(translate__doc__,
1449 "B.translate(table[, deletechars]) -> bytearray\n\
1450 \n\
1451 Return a copy of B, where all characters occurring in the\n\
1452 optional argument deletechars are removed, and the remaining\n\
1453 characters have been mapped through the given translation\n\
1454 table, which must be a bytes object of length 256.");
1455 
1456 static PyObject *
bytearray_translate(PyByteArrayObject * self,PyObject * args)1457 bytearray_translate(PyByteArrayObject *self, PyObject *args)
1458 {
1459     register char *input, *output;
1460     register const char *table;
1461     register Py_ssize_t i, c;
1462     PyObject *input_obj = (PyObject*)self;
1463     const char *output_start;
1464     Py_ssize_t inlen;
1465     PyObject *result = NULL;
1466     int trans_table[256];
1467     PyObject *tableobj = NULL, *delobj = NULL;
1468     Py_buffer vtable, vdel;
1469 
1470     if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1471                            &tableobj, &delobj))
1472           return NULL;
1473 
1474     if (tableobj == Py_None) {
1475         table = NULL;
1476         tableobj = NULL;
1477     } else if (_getbuffer(tableobj, &vtable) < 0) {
1478         return NULL;
1479     } else {
1480         if (vtable.len != 256) {
1481             PyErr_SetString(PyExc_ValueError,
1482                             "translation table must be 256 characters long");
1483             PyBuffer_Release(&vtable);
1484             return NULL;
1485         }
1486         table = (const char*)vtable.buf;
1487     }
1488 
1489     if (delobj != NULL) {
1490         if (_getbuffer(delobj, &vdel) < 0) {
1491             if (tableobj != NULL)
1492                 PyBuffer_Release(&vtable);
1493             return NULL;
1494         }
1495     }
1496     else {
1497         vdel.buf = NULL;
1498         vdel.len = 0;
1499     }
1500 
1501     inlen = PyByteArray_GET_SIZE(input_obj);
1502     result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1503     if (result == NULL)
1504         goto done;
1505     output_start = output = PyByteArray_AsString(result);
1506     input = PyByteArray_AS_STRING(input_obj);
1507 
1508     if (vdel.len == 0 && table != NULL) {
1509         /* If no deletions are required, use faster code */
1510         for (i = inlen; --i >= 0; ) {
1511             c = Py_CHARMASK(*input++);
1512             *output++ = table[c];
1513         }
1514         goto done;
1515     }
1516 
1517     if (table == NULL) {
1518         for (i = 0; i < 256; i++)
1519             trans_table[i] = Py_CHARMASK(i);
1520     } else {
1521         for (i = 0; i < 256; i++)
1522             trans_table[i] = Py_CHARMASK(table[i]);
1523     }
1524 
1525     for (i = 0; i < vdel.len; i++)
1526         trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1527 
1528     for (i = inlen; --i >= 0; ) {
1529         c = Py_CHARMASK(*input++);
1530         if (trans_table[c] != -1)
1531             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1532                     continue;
1533     }
1534     /* Fix the size of the resulting string */
1535     if (inlen > 0)
1536         PyByteArray_Resize(result, output - output_start);
1537 
1538 done:
1539     if (tableobj != NULL)
1540         PyBuffer_Release(&vtable);
1541     if (delobj != NULL)
1542         PyBuffer_Release(&vdel);
1543     return result;
1544 }
1545 
1546 
1547 /* find and count characters and substrings */
1548 
1549 #define findchar(target, target_len, c)                         \
1550   ((char *)memchr((const void *)(target), c, target_len))
1551 
1552 
1553 /* Bytes ops must return a string, create a copy */
1554 Py_LOCAL(PyByteArrayObject *)
return_self(PyByteArrayObject * self)1555 return_self(PyByteArrayObject *self)
1556 {
1557     return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1558             PyByteArray_AS_STRING(self),
1559             PyByteArray_GET_SIZE(self));
1560 }
1561 
1562 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)1563 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1564 {
1565     Py_ssize_t count=0;
1566     const char *start=target;
1567     const char *end=target+target_len;
1568 
1569     while ( (start=findchar(start, end-start, c)) != NULL ) {
1570         count++;
1571         if (count >= maxcount)
1572             break;
1573         start += 1;
1574     }
1575     return count;
1576 }
1577 
1578 
1579 /* Algorithms for different cases of string replacement */
1580 
1581 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1582 Py_LOCAL(PyByteArrayObject *)
replace_interleave(PyByteArrayObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1583 replace_interleave(PyByteArrayObject *self,
1584                    const char *to_s, Py_ssize_t to_len,
1585                    Py_ssize_t maxcount)
1586 {
1587     char *self_s, *result_s;
1588     Py_ssize_t self_len, result_len;
1589     Py_ssize_t count, i;
1590     PyByteArrayObject *result;
1591 
1592     self_len = PyByteArray_GET_SIZE(self);
1593 
1594     /* 1 at the end plus 1 after every character;
1595        count = min(maxcount, self_len + 1) */
1596     if (maxcount <= self_len) {
1597         count = maxcount;
1598     }
1599     else {
1600         /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1601         count = self_len + 1;
1602     }
1603 
1604     /* Check for overflow */
1605     /*   result_len = count * to_len + self_len; */
1606     assert(count > 0);
1607     if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
1608         PyErr_SetString(PyExc_OverflowError,
1609                         "replace bytes is too long");
1610         return NULL;
1611     }
1612     result_len = count * to_len + self_len;
1613     if (! (result = (PyByteArrayObject *)
1614                      PyByteArray_FromStringAndSize(NULL, result_len)) )
1615         return NULL;
1616 
1617     self_s = PyByteArray_AS_STRING(self);
1618     result_s = PyByteArray_AS_STRING(result);
1619 
1620     /* TODO: special case single character, which doesn't need memcpy */
1621 
1622     /* Lay the first one down (guaranteed this will occur) */
1623     Py_MEMCPY(result_s, to_s, to_len);
1624     result_s += to_len;
1625     count -= 1;
1626 
1627     for (i=0; i<count; i++) {
1628         *result_s++ = *self_s++;
1629         Py_MEMCPY(result_s, to_s, to_len);
1630         result_s += to_len;
1631     }
1632 
1633     /* Copy the rest of the original string */
1634     Py_MEMCPY(result_s, self_s, self_len-i);
1635 
1636     return result;
1637 }
1638 
1639 /* Special case for deleting a single character */
1640 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1641 Py_LOCAL(PyByteArrayObject *)
replace_delete_single_character(PyByteArrayObject * self,char from_c,Py_ssize_t maxcount)1642 replace_delete_single_character(PyByteArrayObject *self,
1643                                 char from_c, Py_ssize_t maxcount)
1644 {
1645     char *self_s, *result_s;
1646     char *start, *next, *end;
1647     Py_ssize_t self_len, result_len;
1648     Py_ssize_t count;
1649     PyByteArrayObject *result;
1650 
1651     self_len = PyByteArray_GET_SIZE(self);
1652     self_s = PyByteArray_AS_STRING(self);
1653 
1654     count = countchar(self_s, self_len, from_c, maxcount);
1655     if (count == 0) {
1656         return return_self(self);
1657     }
1658 
1659     result_len = self_len - count;  /* from_len == 1 */
1660     assert(result_len>=0);
1661 
1662     if ( (result = (PyByteArrayObject *)
1663                     PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1664         return NULL;
1665     result_s = PyByteArray_AS_STRING(result);
1666 
1667     start = self_s;
1668     end = self_s + self_len;
1669     while (count-- > 0) {
1670         next = findchar(start, end-start, from_c);
1671         if (next == NULL)
1672             break;
1673         Py_MEMCPY(result_s, start, next-start);
1674         result_s += (next-start);
1675         start = next+1;
1676     }
1677     Py_MEMCPY(result_s, start, end-start);
1678 
1679     return result;
1680 }
1681 
1682 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1683 
1684 Py_LOCAL(PyByteArrayObject *)
replace_delete_substring(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)1685 replace_delete_substring(PyByteArrayObject *self,
1686                          const char *from_s, Py_ssize_t from_len,
1687                          Py_ssize_t maxcount)
1688 {
1689     char *self_s, *result_s;
1690     char *start, *next, *end;
1691     Py_ssize_t self_len, result_len;
1692     Py_ssize_t count, offset;
1693     PyByteArrayObject *result;
1694 
1695     self_len = PyByteArray_GET_SIZE(self);
1696     self_s = PyByteArray_AS_STRING(self);
1697 
1698     count = stringlib_count(self_s, self_len,
1699                             from_s, from_len,
1700                             maxcount);
1701 
1702     if (count == 0) {
1703         /* no matches */
1704         return return_self(self);
1705     }
1706 
1707     result_len = self_len - (count * from_len);
1708     assert (result_len>=0);
1709 
1710     if ( (result = (PyByteArrayObject *)
1711         PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1712             return NULL;
1713 
1714     result_s = PyByteArray_AS_STRING(result);
1715 
1716     start = self_s;
1717     end = self_s + self_len;
1718     while (count-- > 0) {
1719         offset = stringlib_find(start, end-start,
1720                                 from_s, from_len,
1721                                 0);
1722         if (offset == -1)
1723             break;
1724         next = start + offset;
1725 
1726         Py_MEMCPY(result_s, start, next-start);
1727 
1728         result_s += (next-start);
1729         start = next+from_len;
1730     }
1731     Py_MEMCPY(result_s, start, end-start);
1732     return result;
1733 }
1734 
1735 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1736 Py_LOCAL(PyByteArrayObject *)
replace_single_character_in_place(PyByteArrayObject * self,char from_c,char to_c,Py_ssize_t maxcount)1737 replace_single_character_in_place(PyByteArrayObject *self,
1738                                   char from_c, char to_c,
1739                                   Py_ssize_t maxcount)
1740 {
1741     char *self_s, *result_s, *start, *end, *next;
1742     Py_ssize_t self_len;
1743     PyByteArrayObject *result;
1744 
1745     /* The result string will be the same size */
1746     self_s = PyByteArray_AS_STRING(self);
1747     self_len = PyByteArray_GET_SIZE(self);
1748 
1749     next = findchar(self_s, self_len, from_c);
1750 
1751     if (next == NULL) {
1752         /* No matches; return the original bytes */
1753         return return_self(self);
1754     }
1755 
1756     /* Need to make a new bytes */
1757     result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1758     if (result == NULL)
1759         return NULL;
1760     result_s = PyByteArray_AS_STRING(result);
1761     Py_MEMCPY(result_s, self_s, self_len);
1762 
1763     /* change everything in-place, starting with this one */
1764     start =  result_s + (next-self_s);
1765     *start = to_c;
1766     start++;
1767     end = result_s + self_len;
1768 
1769     while (--maxcount > 0) {
1770         next = findchar(start, end-start, from_c);
1771         if (next == NULL)
1772             break;
1773         *next = to_c;
1774         start = next+1;
1775     }
1776 
1777     return result;
1778 }
1779 
1780 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1781 Py_LOCAL(PyByteArrayObject *)
replace_substring_in_place(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1782 replace_substring_in_place(PyByteArrayObject *self,
1783                            const char *from_s, Py_ssize_t from_len,
1784                            const char *to_s, Py_ssize_t to_len,
1785                            Py_ssize_t maxcount)
1786 {
1787     char *result_s, *start, *end;
1788     char *self_s;
1789     Py_ssize_t self_len, offset;
1790     PyByteArrayObject *result;
1791 
1792     /* The result bytes will be the same size */
1793 
1794     self_s = PyByteArray_AS_STRING(self);
1795     self_len = PyByteArray_GET_SIZE(self);
1796 
1797     offset = stringlib_find(self_s, self_len,
1798                             from_s, from_len,
1799                             0);
1800     if (offset == -1) {
1801         /* No matches; return the original bytes */
1802         return return_self(self);
1803     }
1804 
1805     /* Need to make a new bytes */
1806     result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1807     if (result == NULL)
1808         return NULL;
1809     result_s = PyByteArray_AS_STRING(result);
1810     Py_MEMCPY(result_s, self_s, self_len);
1811 
1812     /* change everything in-place, starting with this one */
1813     start =  result_s + offset;
1814     Py_MEMCPY(start, to_s, from_len);
1815     start += from_len;
1816     end = result_s + self_len;
1817 
1818     while ( --maxcount > 0) {
1819         offset = stringlib_find(start, end-start,
1820                                 from_s, from_len,
1821                                 0);
1822         if (offset==-1)
1823             break;
1824         Py_MEMCPY(start+offset, to_s, from_len);
1825         start += offset+from_len;
1826     }
1827 
1828     return result;
1829 }
1830 
1831 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1832 Py_LOCAL(PyByteArrayObject *)
replace_single_character(PyByteArrayObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1833 replace_single_character(PyByteArrayObject *self,
1834                          char from_c,
1835                          const char *to_s, Py_ssize_t to_len,
1836                          Py_ssize_t maxcount)
1837 {
1838     char *self_s, *result_s;
1839     char *start, *next, *end;
1840     Py_ssize_t self_len, result_len;
1841     Py_ssize_t count;
1842     PyByteArrayObject *result;
1843 
1844     self_s = PyByteArray_AS_STRING(self);
1845     self_len = PyByteArray_GET_SIZE(self);
1846 
1847     count = countchar(self_s, self_len, from_c, maxcount);
1848     if (count == 0) {
1849         /* no matches, return unchanged */
1850         return return_self(self);
1851     }
1852 
1853     /* use the difference between current and new, hence the "-1" */
1854     /*   result_len = self_len + count * (to_len-1)  */
1855     assert(count > 0);
1856     if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
1857         PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1858         return NULL;
1859     }
1860     result_len = self_len + count * (to_len - 1);
1861 
1862     if ( (result = (PyByteArrayObject *)
1863           PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1864             return NULL;
1865     result_s = PyByteArray_AS_STRING(result);
1866 
1867     start = self_s;
1868     end = self_s + self_len;
1869     while (count-- > 0) {
1870         next = findchar(start, end-start, from_c);
1871         if (next == NULL)
1872             break;
1873 
1874         if (next == start) {
1875             /* replace with the 'to' */
1876             Py_MEMCPY(result_s, to_s, to_len);
1877             result_s += to_len;
1878             start += 1;
1879         } else {
1880             /* copy the unchanged old then the 'to' */
1881             Py_MEMCPY(result_s, start, next-start);
1882             result_s += (next-start);
1883             Py_MEMCPY(result_s, to_s, to_len);
1884             result_s += to_len;
1885             start = next+1;
1886         }
1887     }
1888     /* Copy the remainder of the remaining bytes */
1889     Py_MEMCPY(result_s, start, end-start);
1890 
1891     return result;
1892 }
1893 
1894 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1895 Py_LOCAL(PyByteArrayObject *)
replace_substring(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1896 replace_substring(PyByteArrayObject *self,
1897                   const char *from_s, Py_ssize_t from_len,
1898                   const char *to_s, Py_ssize_t to_len,
1899                   Py_ssize_t maxcount)
1900 {
1901     char *self_s, *result_s;
1902     char *start, *next, *end;
1903     Py_ssize_t self_len, result_len;
1904     Py_ssize_t count, offset;
1905     PyByteArrayObject *result;
1906 
1907     self_s = PyByteArray_AS_STRING(self);
1908     self_len = PyByteArray_GET_SIZE(self);
1909 
1910     count = stringlib_count(self_s, self_len,
1911                             from_s, from_len,
1912                             maxcount);
1913 
1914     if (count == 0) {
1915         /* no matches, return unchanged */
1916         return return_self(self);
1917     }
1918 
1919     /* Check for overflow */
1920     /*    result_len = self_len + count * (to_len-from_len) */
1921     assert(count > 0);
1922     if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
1923         PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1924         return NULL;
1925     }
1926     result_len = self_len + count * (to_len - from_len);
1927 
1928     if ( (result = (PyByteArrayObject *)
1929           PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1930         return NULL;
1931     result_s = PyByteArray_AS_STRING(result);
1932 
1933     start = self_s;
1934     end = self_s + self_len;
1935     while (count-- > 0) {
1936         offset = stringlib_find(start, end-start,
1937                                 from_s, from_len,
1938                                 0);
1939         if (offset == -1)
1940             break;
1941         next = start+offset;
1942         if (next == start) {
1943             /* replace with the 'to' */
1944             Py_MEMCPY(result_s, to_s, to_len);
1945             result_s += to_len;
1946             start += from_len;
1947         } else {
1948             /* copy the unchanged old then the 'to' */
1949             Py_MEMCPY(result_s, start, next-start);
1950             result_s += (next-start);
1951             Py_MEMCPY(result_s, to_s, to_len);
1952             result_s += to_len;
1953             start = next+from_len;
1954         }
1955     }
1956     /* Copy the remainder of the remaining bytes */
1957     Py_MEMCPY(result_s, start, end-start);
1958 
1959     return result;
1960 }
1961 
1962 
1963 Py_LOCAL(PyByteArrayObject *)
replace(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1964 replace(PyByteArrayObject *self,
1965         const char *from_s, Py_ssize_t from_len,
1966         const char *to_s, Py_ssize_t to_len,
1967         Py_ssize_t maxcount)
1968 {
1969     if (maxcount < 0) {
1970         maxcount = PY_SSIZE_T_MAX;
1971     } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
1972         /* nothing to do; return the original bytes */
1973         return return_self(self);
1974     }
1975 
1976     if (maxcount == 0 ||
1977         (from_len == 0 && to_len == 0)) {
1978         /* nothing to do; return the original bytes */
1979         return return_self(self);
1980     }
1981 
1982     /* Handle zero-length special cases */
1983 
1984     if (from_len == 0) {
1985         /* insert the 'to' bytes everywhere.   */
1986         /*    >>> "Python".replace("", ".")     */
1987         /*    '.P.y.t.h.o.n.'                   */
1988         return replace_interleave(self, to_s, to_len, maxcount);
1989     }
1990 
1991     /* Except for "".replace("", "A") == "A" there is no way beyond this */
1992     /* point for an empty self bytes to generate a non-empty bytes */
1993     /* Special case so the remaining code always gets a non-empty bytes */
1994     if (PyByteArray_GET_SIZE(self) == 0) {
1995         return return_self(self);
1996     }
1997 
1998     if (to_len == 0) {
1999         /* delete all occurrences of 'from' bytes */
2000         if (from_len == 1) {
2001             return replace_delete_single_character(
2002                     self, from_s[0], maxcount);
2003         } else {
2004             return replace_delete_substring(self, from_s, from_len, maxcount);
2005         }
2006     }
2007 
2008     /* Handle special case where both bytes have the same length */
2009 
2010     if (from_len == to_len) {
2011         if (from_len == 1) {
2012             return replace_single_character_in_place(
2013                     self,
2014                     from_s[0],
2015                     to_s[0],
2016                     maxcount);
2017         } else {
2018             return replace_substring_in_place(
2019                 self, from_s, from_len, to_s, to_len, maxcount);
2020         }
2021     }
2022 
2023     /* Otherwise use the more generic algorithms */
2024     if (from_len == 1) {
2025         return replace_single_character(self, from_s[0],
2026                                         to_s, to_len, maxcount);
2027     } else {
2028         /* len('from')>=2, len('to')>=1 */
2029         return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2030     }
2031 }
2032 
2033 
2034 PyDoc_STRVAR(replace__doc__,
2035 "B.replace(old, new[, count]) -> bytes\n\
2036 \n\
2037 Return a copy of B with all occurrences of subsection\n\
2038 old replaced by new.  If the optional argument count is\n\
2039 given, only the first count occurrences are replaced.");
2040 
2041 static PyObject *
bytearray_replace(PyByteArrayObject * self,PyObject * args)2042 bytearray_replace(PyByteArrayObject *self, PyObject *args)
2043 {
2044     Py_ssize_t count = -1;
2045     PyObject *from, *to, *res;
2046     Py_buffer vfrom, vto;
2047 
2048     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2049         return NULL;
2050 
2051     if (_getbuffer(from, &vfrom) < 0)
2052         return NULL;
2053     if (_getbuffer(to, &vto) < 0) {
2054         PyBuffer_Release(&vfrom);
2055         return NULL;
2056     }
2057 
2058     res = (PyObject *)replace((PyByteArrayObject *) self,
2059                               vfrom.buf, vfrom.len,
2060                               vto.buf, vto.len, count);
2061 
2062     PyBuffer_Release(&vfrom);
2063     PyBuffer_Release(&vto);
2064     return res;
2065 }
2066 
2067 PyDoc_STRVAR(split__doc__,
2068 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2069 \n\
2070 Return a list of the sections in B, using sep as the delimiter.\n\
2071 If sep is not given, B is split on ASCII whitespace characters\n\
2072 (space, tab, return, newline, formfeed, vertical tab).\n\
2073 If maxsplit is given, at most maxsplit splits are done.");
2074 
2075 static PyObject *
bytearray_split(PyByteArrayObject * self,PyObject * args)2076 bytearray_split(PyByteArrayObject *self, PyObject *args)
2077 {
2078     Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
2079     Py_ssize_t maxsplit = -1;
2080     const char *s = PyByteArray_AS_STRING(self), *sub;
2081     PyObject *list, *subobj = Py_None;
2082     Py_buffer vsub;
2083 
2084     if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2085         return NULL;
2086     if (maxsplit < 0)
2087         maxsplit = PY_SSIZE_T_MAX;
2088 
2089     if (subobj == Py_None)
2090         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
2091 
2092     if (_getbuffer(subobj, &vsub) < 0)
2093         return NULL;
2094     sub = vsub.buf;
2095     n = vsub.len;
2096 
2097     list = stringlib_split(
2098         (PyObject*) self, s, len, sub, n, maxsplit
2099         );
2100     PyBuffer_Release(&vsub);
2101     return list;
2102 }
2103 
2104 PyDoc_STRVAR(partition__doc__,
2105 "B.partition(sep) -> (head, sep, tail)\n\
2106 \n\
2107 Searches for the separator sep in B, and returns the part before it,\n\
2108 the separator itself, and the part after it.  If the separator is not\n\
2109 found, returns B and two empty bytearray objects.");
2110 
2111 static PyObject *
bytearray_partition(PyByteArrayObject * self,PyObject * sep_obj)2112 bytearray_partition(PyByteArrayObject *self, PyObject *sep_obj)
2113 {
2114     PyObject *bytesep, *result;
2115 
2116     bytesep = _PyByteArray_FromBufferObject(sep_obj);
2117     if (! bytesep)
2118         return NULL;
2119 
2120     result = stringlib_partition(
2121             (PyObject*) self,
2122             PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2123             bytesep,
2124             PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2125             );
2126 
2127     Py_DECREF(bytesep);
2128     return result;
2129 }
2130 
2131 PyDoc_STRVAR(rpartition__doc__,
2132 "B.rpartition(sep) -> (head, sep, tail)\n\
2133 \n\
2134 Searches for the separator sep in B, starting at the end of B,\n\
2135 and returns the part before it, the separator itself, and the\n\
2136 part after it.  If the separator is not found, returns two empty\n\
2137 bytearray objects and B.");
2138 
2139 static PyObject *
bytearray_rpartition(PyByteArrayObject * self,PyObject * sep_obj)2140 bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2141 {
2142     PyObject *bytesep, *result;
2143 
2144     bytesep = _PyByteArray_FromBufferObject(sep_obj);
2145     if (! bytesep)
2146         return NULL;
2147 
2148     result = stringlib_rpartition(
2149             (PyObject*) self,
2150             PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2151             bytesep,
2152             PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2153             );
2154 
2155     Py_DECREF(bytesep);
2156     return result;
2157 }
2158 
2159 PyDoc_STRVAR(rsplit__doc__,
2160 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2161 \n\
2162 Return a list of the sections in B, using sep as the delimiter,\n\
2163 starting at the end of B and working to the front.\n\
2164 If sep is not given, B is split on ASCII whitespace characters\n\
2165 (space, tab, return, newline, formfeed, vertical tab).\n\
2166 If maxsplit is given, at most maxsplit splits are done.");
2167 
2168 static PyObject *
bytearray_rsplit(PyByteArrayObject * self,PyObject * args)2169 bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
2170 {
2171     Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
2172     Py_ssize_t maxsplit = -1;
2173     const char *s = PyByteArray_AS_STRING(self), *sub;
2174     PyObject *list, *subobj = Py_None;
2175     Py_buffer vsub;
2176 
2177     if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2178         return NULL;
2179     if (maxsplit < 0)
2180         maxsplit = PY_SSIZE_T_MAX;
2181 
2182     if (subobj == Py_None)
2183         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
2184 
2185     if (_getbuffer(subobj, &vsub) < 0)
2186         return NULL;
2187     sub = vsub.buf;
2188     n = vsub.len;
2189 
2190     list = stringlib_rsplit(
2191         (PyObject*) self, s, len, sub, n, maxsplit
2192         );
2193     PyBuffer_Release(&vsub);
2194     return list;
2195 }
2196 
2197 PyDoc_STRVAR(reverse__doc__,
2198 "B.reverse() -> None\n\
2199 \n\
2200 Reverse the order of the values in B in place.");
2201 static PyObject *
bytearray_reverse(PyByteArrayObject * self,PyObject * unused)2202 bytearray_reverse(PyByteArrayObject *self, PyObject *unused)
2203 {
2204     char swap, *head, *tail;
2205     Py_ssize_t i, j, n = Py_SIZE(self);
2206 
2207     j = n / 2;
2208     head = self->ob_bytes;
2209     tail = head + n - 1;
2210     for (i = 0; i < j; i++) {
2211         swap = *head;
2212         *head++ = *tail;
2213         *tail-- = swap;
2214     }
2215 
2216     Py_RETURN_NONE;
2217 }
2218 
2219 PyDoc_STRVAR(insert__doc__,
2220 "B.insert(index, int) -> None\n\
2221 \n\
2222 Insert a single item into the bytearray before the given index.");
2223 static PyObject *
bytearray_insert(PyByteArrayObject * self,PyObject * args)2224 bytearray_insert(PyByteArrayObject *self, PyObject *args)
2225 {
2226     PyObject *value;
2227     int ival;
2228     Py_ssize_t where, n = Py_SIZE(self);
2229 
2230     if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
2231         return NULL;
2232 
2233     if (n == PY_SSIZE_T_MAX) {
2234         PyErr_SetString(PyExc_OverflowError,
2235                         "cannot add more objects to bytearray");
2236         return NULL;
2237     }
2238     if (!_getbytevalue(value, &ival))
2239         return NULL;
2240     if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2241         return NULL;
2242 
2243     if (where < 0) {
2244         where += n;
2245         if (where < 0)
2246             where = 0;
2247     }
2248     if (where > n)
2249         where = n;
2250     memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2251     self->ob_bytes[where] = ival;
2252 
2253     Py_RETURN_NONE;
2254 }
2255 
2256 PyDoc_STRVAR(append__doc__,
2257 "B.append(int) -> None\n\
2258 \n\
2259 Append a single item to the end of B.");
2260 static PyObject *
bytearray_append(PyByteArrayObject * self,PyObject * arg)2261 bytearray_append(PyByteArrayObject *self, PyObject *arg)
2262 {
2263     int value;
2264     Py_ssize_t n = Py_SIZE(self);
2265 
2266     if (! _getbytevalue(arg, &value))
2267         return NULL;
2268     if (n == PY_SSIZE_T_MAX) {
2269         PyErr_SetString(PyExc_OverflowError,
2270                         "cannot add more objects to bytearray");
2271         return NULL;
2272     }
2273     if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2274         return NULL;
2275 
2276     self->ob_bytes[n] = value;
2277 
2278     Py_RETURN_NONE;
2279 }
2280 
2281 PyDoc_STRVAR(extend__doc__,
2282 "B.extend(iterable int) -> None\n\
2283 \n\
2284 Append all the elements from the iterator or sequence to the\n\
2285 end of B.");
2286 static PyObject *
bytearray_extend(PyByteArrayObject * self,PyObject * arg)2287 bytearray_extend(PyByteArrayObject *self, PyObject *arg)
2288 {
2289     PyObject *it, *item, *bytearray_obj;
2290     Py_ssize_t buf_size = 0, len = 0;
2291     int value;
2292     char *buf;
2293 
2294     /* bytearray_setslice code only accepts something supporting PEP 3118. */
2295     if (PyObject_CheckBuffer(arg)) {
2296         if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2297             return NULL;
2298 
2299         Py_RETURN_NONE;
2300     }
2301 
2302     it = PyObject_GetIter(arg);
2303     if (it == NULL)
2304         return NULL;
2305 
2306     /* Try to determine the length of the argument. 32 is arbitrary. */
2307     buf_size = _PyObject_LengthHint(arg, 32);
2308     if (buf_size == -1) {
2309         Py_DECREF(it);
2310         return NULL;
2311     }
2312 
2313     bytearray_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2314     if (bytearray_obj == NULL) {
2315         Py_DECREF(it);
2316         return NULL;
2317     }
2318     buf = PyByteArray_AS_STRING(bytearray_obj);
2319 
2320     while ((item = PyIter_Next(it)) != NULL) {
2321         if (! _getbytevalue(item, &value)) {
2322             Py_DECREF(item);
2323             Py_DECREF(it);
2324             Py_DECREF(bytearray_obj);
2325             return NULL;
2326         }
2327         buf[len++] = value;
2328         Py_DECREF(item);
2329 
2330         if (len >= buf_size) {
2331             Py_ssize_t addition;
2332             if (len == PY_SSIZE_T_MAX) {
2333                 Py_DECREF(it);
2334                 Py_DECREF(bytearray_obj);
2335                 return PyErr_NoMemory();
2336             }
2337             addition = len >> 1;
2338             if (addition > PY_SSIZE_T_MAX - len - 1)
2339                 buf_size = PY_SSIZE_T_MAX;
2340             else
2341                 buf_size = len + addition + 1;
2342             if (PyByteArray_Resize((PyObject *)bytearray_obj, buf_size) < 0) {
2343                 Py_DECREF(it);
2344                 Py_DECREF(bytearray_obj);
2345                 return NULL;
2346             }
2347             /* Recompute the `buf' pointer, since the resizing operation may
2348                have invalidated it. */
2349             buf = PyByteArray_AS_STRING(bytearray_obj);
2350         }
2351     }
2352     Py_DECREF(it);
2353 
2354     /* Resize down to exact size. */
2355     if (PyByteArray_Resize((PyObject *)bytearray_obj, len) < 0) {
2356         Py_DECREF(bytearray_obj);
2357         return NULL;
2358     }
2359 
2360     if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), bytearray_obj) == -1) {
2361         Py_DECREF(bytearray_obj);
2362         return NULL;
2363     }
2364     Py_DECREF(bytearray_obj);
2365 
2366     Py_RETURN_NONE;
2367 }
2368 
2369 PyDoc_STRVAR(pop__doc__,
2370 "B.pop([index]) -> int\n\
2371 \n\
2372 Remove and return a single item from B. If no index\n\
2373 argument is given, will pop the last value.");
2374 static PyObject *
bytearray_pop(PyByteArrayObject * self,PyObject * args)2375 bytearray_pop(PyByteArrayObject *self, PyObject *args)
2376 {
2377     int value;
2378     Py_ssize_t where = -1, n = Py_SIZE(self);
2379 
2380     if (!PyArg_ParseTuple(args, "|n:pop", &where))
2381         return NULL;
2382 
2383     if (n == 0) {
2384         PyErr_SetString(PyExc_IndexError,
2385                         "pop from empty bytearray");
2386         return NULL;
2387     }
2388     if (where < 0)
2389         where += Py_SIZE(self);
2390     if (where < 0 || where >= Py_SIZE(self)) {
2391         PyErr_SetString(PyExc_IndexError, "pop index out of range");
2392         return NULL;
2393     }
2394     if (!_canresize(self))
2395         return NULL;
2396 
2397     value = self->ob_bytes[where];
2398     memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2399     if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2400         return NULL;
2401 
2402     return PyInt_FromLong((unsigned char)value);
2403 }
2404 
2405 PyDoc_STRVAR(remove__doc__,
2406 "B.remove(int) -> None\n\
2407 \n\
2408 Remove the first occurrence of a value in B.");
2409 static PyObject *
bytearray_remove(PyByteArrayObject * self,PyObject * arg)2410 bytearray_remove(PyByteArrayObject *self, PyObject *arg)
2411 {
2412     int value;
2413     Py_ssize_t n = Py_SIZE(self);
2414     char *where;
2415 
2416     if (! _getbytevalue(arg, &value))
2417         return NULL;
2418 
2419     where = memchr(self->ob_bytes, value, n);
2420     if (!where) {
2421         PyErr_SetString(PyExc_ValueError, "value not found in bytearray");
2422         return NULL;
2423     }
2424     if (!_canresize(self))
2425         return NULL;
2426 
2427     memmove(where, where + 1, self->ob_bytes + n - where);
2428     if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2429         return NULL;
2430 
2431     Py_RETURN_NONE;
2432 }
2433 
2434 /* XXX These two helpers could be optimized if argsize == 1 */
2435 
2436 static Py_ssize_t
lstrip_helper(unsigned char * myptr,Py_ssize_t mysize,void * argptr,Py_ssize_t argsize)2437 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2438               void *argptr, Py_ssize_t argsize)
2439 {
2440     Py_ssize_t i = 0;
2441     while (i < mysize && memchr(argptr, myptr[i], argsize))
2442         i++;
2443     return i;
2444 }
2445 
2446 static Py_ssize_t
rstrip_helper(unsigned char * myptr,Py_ssize_t mysize,void * argptr,Py_ssize_t argsize)2447 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2448               void *argptr, Py_ssize_t argsize)
2449 {
2450     Py_ssize_t i = mysize - 1;
2451     while (i >= 0 && memchr(argptr, myptr[i], argsize))
2452         i--;
2453     return i + 1;
2454 }
2455 
2456 PyDoc_STRVAR(strip__doc__,
2457 "B.strip([bytes]) -> bytearray\n\
2458 \n\
2459 Strip leading and trailing bytes contained in the argument.\n\
2460 If the argument is omitted, strip ASCII whitespace.");
2461 static PyObject *
bytearray_strip(PyByteArrayObject * self,PyObject * args)2462 bytearray_strip(PyByteArrayObject *self, PyObject *args)
2463 {
2464     Py_ssize_t left, right, mysize, argsize;
2465     void *myptr, *argptr;
2466     PyObject *arg = Py_None;
2467     Py_buffer varg;
2468     if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2469         return NULL;
2470     if (arg == Py_None) {
2471         argptr = "\t\n\r\f\v ";
2472         argsize = 6;
2473     }
2474     else {
2475         if (_getbuffer(arg, &varg) < 0)
2476             return NULL;
2477         argptr = varg.buf;
2478         argsize = varg.len;
2479     }
2480     myptr = self->ob_bytes;
2481     mysize = Py_SIZE(self);
2482     left = lstrip_helper(myptr, mysize, argptr, argsize);
2483     if (left == mysize)
2484         right = left;
2485     else
2486         right = rstrip_helper(myptr, mysize, argptr, argsize);
2487     if (arg != Py_None)
2488         PyBuffer_Release(&varg);
2489     return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2490 }
2491 
2492 PyDoc_STRVAR(lstrip__doc__,
2493 "B.lstrip([bytes]) -> bytearray\n\
2494 \n\
2495 Strip leading bytes contained in the argument.\n\
2496 If the argument is omitted, strip leading ASCII whitespace.");
2497 static PyObject *
bytearray_lstrip(PyByteArrayObject * self,PyObject * args)2498 bytearray_lstrip(PyByteArrayObject *self, PyObject *args)
2499 {
2500     Py_ssize_t left, right, mysize, argsize;
2501     void *myptr, *argptr;
2502     PyObject *arg = Py_None;
2503     Py_buffer varg;
2504     if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2505         return NULL;
2506     if (arg == Py_None) {
2507         argptr = "\t\n\r\f\v ";
2508         argsize = 6;
2509     }
2510     else {
2511         if (_getbuffer(arg, &varg) < 0)
2512             return NULL;
2513         argptr = varg.buf;
2514         argsize = varg.len;
2515     }
2516     myptr = self->ob_bytes;
2517     mysize = Py_SIZE(self);
2518     left = lstrip_helper(myptr, mysize, argptr, argsize);
2519     right = mysize;
2520     if (arg != Py_None)
2521         PyBuffer_Release(&varg);
2522     return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2523 }
2524 
2525 PyDoc_STRVAR(rstrip__doc__,
2526 "B.rstrip([bytes]) -> bytearray\n\
2527 \n\
2528 Strip trailing bytes contained in the argument.\n\
2529 If the argument is omitted, strip trailing ASCII whitespace.");
2530 static PyObject *
bytearray_rstrip(PyByteArrayObject * self,PyObject * args)2531 bytearray_rstrip(PyByteArrayObject *self, PyObject *args)
2532 {
2533     Py_ssize_t left, right, mysize, argsize;
2534     void *myptr, *argptr;
2535     PyObject *arg = Py_None;
2536     Py_buffer varg;
2537     if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2538         return NULL;
2539     if (arg == Py_None) {
2540         argptr = "\t\n\r\f\v ";
2541         argsize = 6;
2542     }
2543     else {
2544         if (_getbuffer(arg, &varg) < 0)
2545             return NULL;
2546         argptr = varg.buf;
2547         argsize = varg.len;
2548     }
2549     myptr = self->ob_bytes;
2550     mysize = Py_SIZE(self);
2551     left = 0;
2552     right = rstrip_helper(myptr, mysize, argptr, argsize);
2553     if (arg != Py_None)
2554         PyBuffer_Release(&varg);
2555     return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2556 }
2557 
2558 PyDoc_STRVAR(decode_doc,
2559 "B.decode([encoding[, errors]]) -> unicode object.\n\
2560 \n\
2561 Decodes B using the codec registered for encoding. encoding defaults\n\
2562 to the default encoding. errors may be given to set a different error\n\
2563 handling scheme.  Default is 'strict' meaning that encoding errors raise\n\
2564 a UnicodeDecodeError.  Other possible values are 'ignore' and 'replace'\n\
2565 as well as any other name registered with codecs.register_error that is\n\
2566 able to handle UnicodeDecodeErrors.");
2567 
2568 static PyObject *
bytearray_decode(PyObject * self,PyObject * args,PyObject * kwargs)2569 bytearray_decode(PyObject *self, PyObject *args, PyObject *kwargs)
2570 {
2571     const char *encoding = NULL;
2572     const char *errors = NULL;
2573     static char *kwlist[] = {"encoding", "errors", 0};
2574 
2575     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2576         return NULL;
2577     if (encoding == NULL) {
2578 #ifdef Py_USING_UNICODE
2579         encoding = PyUnicode_GetDefaultEncoding();
2580 #else
2581         PyErr_SetString(PyExc_ValueError, "no encoding specified");
2582         return NULL;
2583 #endif
2584     }
2585     return _PyCodec_DecodeText(self, encoding, errors);
2586 }
2587 
2588 PyDoc_STRVAR(alloc_doc,
2589 "B.__alloc__() -> int\n\
2590 \n\
2591 Returns the number of bytes actually allocated.");
2592 
2593 static PyObject *
bytearray_alloc(PyByteArrayObject * self)2594 bytearray_alloc(PyByteArrayObject *self)
2595 {
2596     return PyInt_FromSsize_t(self->ob_alloc);
2597 }
2598 
2599 PyDoc_STRVAR(join_doc,
2600 "B.join(iterable_of_bytes) -> bytes\n\
2601 \n\
2602 Concatenates any number of bytearray objects, with B in between each pair.");
2603 
2604 static PyObject *
bytearray_join(PyByteArrayObject * self,PyObject * it)2605 bytearray_join(PyByteArrayObject *self, PyObject *it)
2606 {
2607     PyObject *seq;
2608     Py_ssize_t mysize = Py_SIZE(self);
2609     Py_ssize_t i;
2610     Py_ssize_t n;
2611     PyObject **items;
2612     Py_ssize_t totalsize = 0;
2613     PyObject *result;
2614     char *dest;
2615 
2616     seq = PySequence_Fast(it, "can only join an iterable");
2617     if (seq == NULL)
2618         return NULL;
2619     n = PySequence_Fast_GET_SIZE(seq);
2620     items = PySequence_Fast_ITEMS(seq);
2621 
2622     /* Compute the total size, and check that they are all bytes */
2623     /* XXX Shouldn't we use _getbuffer() on these items instead? */
2624     for (i = 0; i < n; i++) {
2625         PyObject *obj = items[i];
2626         if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2627             PyErr_Format(PyExc_TypeError,
2628                          "can only join an iterable of bytes "
2629                          "(item %ld has type '%.100s')",
2630                          /* XXX %ld isn't right on Win64 */
2631                          (long)i, Py_TYPE(obj)->tp_name);
2632             goto error;
2633         }
2634         if (i > 0)
2635             totalsize += mysize;
2636         totalsize += Py_SIZE(obj);
2637         if (totalsize < 0) {
2638             PyErr_NoMemory();
2639             goto error;
2640         }
2641     }
2642 
2643     /* Allocate the result, and copy the bytes */
2644     result = PyByteArray_FromStringAndSize(NULL, totalsize);
2645     if (result == NULL)
2646         goto error;
2647     dest = PyByteArray_AS_STRING(result);
2648     for (i = 0; i < n; i++) {
2649         PyObject *obj = items[i];
2650         Py_ssize_t size = Py_SIZE(obj);
2651         char *buf;
2652         if (PyByteArray_Check(obj))
2653            buf = PyByteArray_AS_STRING(obj);
2654         else
2655            buf = PyBytes_AS_STRING(obj);
2656         if (i) {
2657             memcpy(dest, self->ob_bytes, mysize);
2658             dest += mysize;
2659         }
2660         memcpy(dest, buf, size);
2661         dest += size;
2662     }
2663 
2664     /* Done */
2665     Py_DECREF(seq);
2666     return result;
2667 
2668     /* Error handling */
2669   error:
2670     Py_DECREF(seq);
2671     return NULL;
2672 }
2673 
2674 PyDoc_STRVAR(splitlines__doc__,
2675 "B.splitlines(keepends=False) -> list of lines\n\
2676 \n\
2677 Return a list of the lines in B, breaking at line boundaries.\n\
2678 Line breaks are not included in the resulting list unless keepends\n\
2679 is given and true.");
2680 
2681 static PyObject*
bytearray_splitlines(PyObject * self,PyObject * args)2682 bytearray_splitlines(PyObject *self, PyObject *args)
2683 {
2684     int keepends = 0;
2685 
2686     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2687         return NULL;
2688 
2689     return stringlib_splitlines(
2690         (PyObject*) self, PyByteArray_AS_STRING(self),
2691         PyByteArray_GET_SIZE(self), keepends
2692         );
2693 }
2694 
2695 PyDoc_STRVAR(fromhex_doc,
2696 "bytearray.fromhex(string) -> bytearray\n\
2697 \n\
2698 Create a bytearray object from a string of hexadecimal numbers.\n\
2699 Spaces between two numbers are accepted.\n\
2700 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
2701 
2702 static int
hex_digit_to_int(char c)2703 hex_digit_to_int(char c)
2704 {
2705     if (Py_ISDIGIT(c))
2706         return c - '0';
2707     else {
2708         if (Py_ISUPPER(c))
2709             c = Py_TOLOWER(c);
2710         if (c >= 'a' && c <= 'f')
2711             return c - 'a' + 10;
2712     }
2713     return -1;
2714 }
2715 
2716 static PyObject *
bytearray_fromhex(PyObject * cls,PyObject * args)2717 bytearray_fromhex(PyObject *cls, PyObject *args)
2718 {
2719     PyObject *newbytes;
2720     char *buf;
2721     char *hex;
2722     Py_ssize_t hexlen, byteslen, i, j;
2723     int top, bot;
2724 
2725     if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &hexlen))
2726         return NULL;
2727     byteslen = hexlen/2; /* This overestimates if there are spaces */
2728     newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
2729     if (!newbytes)
2730         return NULL;
2731     buf = PyByteArray_AS_STRING(newbytes);
2732     for (i = j = 0; i < hexlen; i += 2) {
2733         /* skip over spaces in the input */
2734         while (hex[i] == ' ')
2735             i++;
2736         if (i >= hexlen)
2737             break;
2738         top = hex_digit_to_int(hex[i]);
2739         bot = hex_digit_to_int(hex[i+1]);
2740         if (top == -1 || bot == -1) {
2741             PyErr_Format(PyExc_ValueError,
2742                          "non-hexadecimal number found in "
2743                          "fromhex() arg at position %zd", i);
2744             goto error;
2745         }
2746         buf[j++] = (top << 4) + bot;
2747     }
2748     if (PyByteArray_Resize(newbytes, j) < 0)
2749         goto error;
2750     return newbytes;
2751 
2752   error:
2753     Py_DECREF(newbytes);
2754     return NULL;
2755 }
2756 
2757 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2758 
2759 static PyObject *
bytearray_reduce(PyByteArrayObject * self)2760 bytearray_reduce(PyByteArrayObject *self)
2761 {
2762     PyObject *latin1, *dict;
2763     if (self->ob_bytes)
2764 #ifdef Py_USING_UNICODE
2765         latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2766                                         Py_SIZE(self), NULL);
2767 #else
2768         latin1 = PyString_FromStringAndSize(self->ob_bytes, Py_SIZE(self));
2769 #endif
2770     else
2771 #ifdef Py_USING_UNICODE
2772         latin1 = PyUnicode_FromString("");
2773 #else
2774         latin1 = PyString_FromString("");
2775 #endif
2776 
2777     dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
2778     if (dict == NULL) {
2779         PyErr_Clear();
2780         dict = Py_None;
2781         Py_INCREF(dict);
2782     }
2783 
2784     return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
2785 }
2786 
2787 PyDoc_STRVAR(sizeof_doc,
2788 "B.__sizeof__() -> int\n\
2789  \n\
2790 Returns the size of B in memory, in bytes");
2791 static PyObject *
bytearray_sizeof(PyByteArrayObject * self)2792 bytearray_sizeof(PyByteArrayObject *self)
2793 {
2794     Py_ssize_t res;
2795 
2796     res = _PyObject_SIZE(Py_TYPE(self)) + self->ob_alloc * sizeof(char);
2797     return PyInt_FromSsize_t(res);
2798 }
2799 
2800 static PySequenceMethods bytearray_as_sequence = {
2801     (lenfunc)bytearray_length,              /* sq_length */
2802     (binaryfunc)PyByteArray_Concat,         /* sq_concat */
2803     (ssizeargfunc)bytearray_repeat,         /* sq_repeat */
2804     (ssizeargfunc)bytearray_getitem,        /* sq_item */
2805     0,                                      /* sq_slice */
2806     (ssizeobjargproc)bytearray_setitem,     /* sq_ass_item */
2807     0,                                      /* sq_ass_slice */
2808     (objobjproc)bytearray_contains,         /* sq_contains */
2809     (binaryfunc)bytearray_iconcat,          /* sq_inplace_concat */
2810     (ssizeargfunc)bytearray_irepeat,        /* sq_inplace_repeat */
2811 };
2812 
2813 static PyMappingMethods bytearray_as_mapping = {
2814     (lenfunc)bytearray_length,
2815     (binaryfunc)bytearray_subscript,
2816     (objobjargproc)bytearray_ass_subscript,
2817 };
2818 
2819 static PyBufferProcs bytearray_as_buffer = {
2820     (readbufferproc)bytearray_buffer_getreadbuf,
2821     (writebufferproc)bytearray_buffer_getwritebuf,
2822     (segcountproc)bytearray_buffer_getsegcount,
2823     (charbufferproc)bytearray_buffer_getcharbuf,
2824     (getbufferproc)bytearray_getbuffer,
2825     (releasebufferproc)bytearray_releasebuffer,
2826 };
2827 
2828 static PyMethodDef
2829 bytearray_methods[] = {
2830     {"__alloc__", (PyCFunction)bytearray_alloc, METH_NOARGS, alloc_doc},
2831     {"__reduce__", (PyCFunction)bytearray_reduce, METH_NOARGS, reduce_doc},
2832     {"__sizeof__", (PyCFunction)bytearray_sizeof, METH_NOARGS, sizeof_doc},
2833     {"append", (PyCFunction)bytearray_append, METH_O, append__doc__},
2834     {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2835      _Py_capitalize__doc__},
2836     {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2837     {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__},
2838     {"decode", (PyCFunction)bytearray_decode, METH_VARARGS | METH_KEYWORDS, decode_doc},
2839     {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__},
2840     {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2841      expandtabs__doc__},
2842     {"extend", (PyCFunction)bytearray_extend, METH_O, extend__doc__},
2843     {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__},
2844     {"fromhex", (PyCFunction)bytearray_fromhex, METH_VARARGS|METH_CLASS,
2845      fromhex_doc},
2846     {"index", (PyCFunction)bytearray_index, METH_VARARGS, index__doc__},
2847     {"insert", (PyCFunction)bytearray_insert, METH_VARARGS, insert__doc__},
2848     {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2849      _Py_isalnum__doc__},
2850     {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2851      _Py_isalpha__doc__},
2852     {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2853      _Py_isdigit__doc__},
2854     {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2855      _Py_islower__doc__},
2856     {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2857      _Py_isspace__doc__},
2858     {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2859      _Py_istitle__doc__},
2860     {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2861      _Py_isupper__doc__},
2862     {"join", (PyCFunction)bytearray_join, METH_O, join_doc},
2863     {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2864     {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2865     {"lstrip", (PyCFunction)bytearray_lstrip, METH_VARARGS, lstrip__doc__},
2866     {"partition", (PyCFunction)bytearray_partition, METH_O, partition__doc__},
2867     {"pop", (PyCFunction)bytearray_pop, METH_VARARGS, pop__doc__},
2868     {"remove", (PyCFunction)bytearray_remove, METH_O, remove__doc__},
2869     {"replace", (PyCFunction)bytearray_replace, METH_VARARGS, replace__doc__},
2870     {"reverse", (PyCFunction)bytearray_reverse, METH_NOARGS, reverse__doc__},
2871     {"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, rfind__doc__},
2872     {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, rindex__doc__},
2873     {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2874     {"rpartition", (PyCFunction)bytearray_rpartition, METH_O, rpartition__doc__},
2875     {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__},
2876     {"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__},
2877     {"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__},
2878     {"splitlines", (PyCFunction)bytearray_splitlines, METH_VARARGS,
2879      splitlines__doc__},
2880     {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
2881      startswith__doc__},
2882     {"strip", (PyCFunction)bytearray_strip, METH_VARARGS, strip__doc__},
2883     {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2884      _Py_swapcase__doc__},
2885     {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2886     {"translate", (PyCFunction)bytearray_translate, METH_VARARGS,
2887      translate__doc__},
2888     {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2889     {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2890     {NULL}
2891 };
2892 
2893 PyDoc_STRVAR(bytearray_doc,
2894 "bytearray(iterable_of_ints) -> bytearray.\n\
2895 bytearray(string, encoding[, errors]) -> bytearray.\n\
2896 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
2897 bytearray(memory_view) -> bytearray.\n\
2898 \n\
2899 Construct a mutable bytearray object from:\n\
2900   - an iterable yielding integers in range(256)\n\
2901   - a text string encoded using the specified encoding\n\
2902   - a bytes or a bytearray object\n\
2903   - any object implementing the buffer API.\n\
2904 \n\
2905 bytearray(int) -> bytearray.\n\
2906 \n\
2907 Construct a zero-initialized bytearray of the given length.");
2908 
2909 
2910 static PyObject *bytearray_iter(PyObject *seq);
2911 
2912 PyTypeObject PyByteArray_Type = {
2913     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2914     "bytearray",
2915     sizeof(PyByteArrayObject),
2916     0,
2917     (destructor)bytearray_dealloc,       /* tp_dealloc */
2918     0,                                  /* tp_print */
2919     0,                                  /* tp_getattr */
2920     0,                                  /* tp_setattr */
2921     0,                                  /* tp_compare */
2922     (reprfunc)bytearray_repr,           /* tp_repr */
2923     0,                                  /* tp_as_number */
2924     &bytearray_as_sequence,             /* tp_as_sequence */
2925     &bytearray_as_mapping,              /* tp_as_mapping */
2926     0,                                  /* tp_hash */
2927     0,                                  /* tp_call */
2928     bytearray_str,                      /* tp_str */
2929     PyObject_GenericGetAttr,            /* tp_getattro */
2930     0,                                  /* tp_setattro */
2931     &bytearray_as_buffer,               /* tp_as_buffer */
2932     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2933     Py_TPFLAGS_HAVE_NEWBUFFER,          /* tp_flags */
2934     bytearray_doc,                      /* tp_doc */
2935     0,                                  /* tp_traverse */
2936     0,                                  /* tp_clear */
2937     (richcmpfunc)bytearray_richcompare, /* tp_richcompare */
2938     0,                                  /* tp_weaklistoffset */
2939     bytearray_iter,                     /* tp_iter */
2940     0,                                  /* tp_iternext */
2941     bytearray_methods,                  /* tp_methods */
2942     0,                                  /* tp_members */
2943     0,                                  /* tp_getset */
2944     0,                                  /* tp_base */
2945     0,                                  /* tp_dict */
2946     0,                                  /* tp_descr_get */
2947     0,                                  /* tp_descr_set */
2948     0,                                  /* tp_dictoffset */
2949     (initproc)bytearray_init,           /* tp_init */
2950     PyType_GenericAlloc,                /* tp_alloc */
2951     PyType_GenericNew,                  /* tp_new */
2952     PyObject_Del,                       /* tp_free */
2953 };
2954 
2955 /*********************** Bytes Iterator ****************************/
2956 
2957 typedef struct {
2958     PyObject_HEAD
2959     Py_ssize_t it_index;
2960     PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
2961 } bytesiterobject;
2962 
2963 static void
bytearrayiter_dealloc(bytesiterobject * it)2964 bytearrayiter_dealloc(bytesiterobject *it)
2965 {
2966     _PyObject_GC_UNTRACK(it);
2967     Py_XDECREF(it->it_seq);
2968     PyObject_GC_Del(it);
2969 }
2970 
2971 static int
bytearrayiter_traverse(bytesiterobject * it,visitproc visit,void * arg)2972 bytearrayiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
2973 {
2974     Py_VISIT(it->it_seq);
2975     return 0;
2976 }
2977 
2978 static PyObject *
bytearrayiter_next(bytesiterobject * it)2979 bytearrayiter_next(bytesiterobject *it)
2980 {
2981     PyByteArrayObject *seq;
2982     PyObject *item;
2983 
2984     assert(it != NULL);
2985     seq = it->it_seq;
2986     if (seq == NULL)
2987         return NULL;
2988     assert(PyByteArray_Check(seq));
2989 
2990     if (it->it_index < PyByteArray_GET_SIZE(seq)) {
2991         item = PyInt_FromLong(
2992             (unsigned char)seq->ob_bytes[it->it_index]);
2993         if (item != NULL)
2994             ++it->it_index;
2995         return item;
2996     }
2997 
2998     it->it_seq = NULL;
2999     Py_DECREF(seq);
3000     return NULL;
3001 }
3002 
3003 static PyObject *
bytesarrayiter_length_hint(bytesiterobject * it)3004 bytesarrayiter_length_hint(bytesiterobject *it)
3005 {
3006     Py_ssize_t len = 0;
3007     if (it->it_seq)
3008         len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3009     return PyInt_FromSsize_t(len);
3010 }
3011 
3012 PyDoc_STRVAR(length_hint_doc,
3013     "Private method returning an estimate of len(list(it)).");
3014 
3015 static PyMethodDef bytearrayiter_methods[] = {
3016     {"__length_hint__", (PyCFunction)bytesarrayiter_length_hint, METH_NOARGS,
3017      length_hint_doc},
3018     {NULL, NULL} /* sentinel */
3019 };
3020 
3021 PyTypeObject PyByteArrayIter_Type = {
3022     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3023     "bytearray_iterator",              /* tp_name */
3024     sizeof(bytesiterobject),           /* tp_basicsize */
3025     0,                                 /* tp_itemsize */
3026     /* methods */
3027     (destructor)bytearrayiter_dealloc, /* tp_dealloc */
3028     0,                                 /* tp_print */
3029     0,                                 /* tp_getattr */
3030     0,                                 /* tp_setattr */
3031     0,                                 /* tp_compare */
3032     0,                                 /* tp_repr */
3033     0,                                 /* tp_as_number */
3034     0,                                 /* tp_as_sequence */
3035     0,                                 /* tp_as_mapping */
3036     0,                                 /* tp_hash */
3037     0,                                 /* tp_call */
3038     0,                                 /* tp_str */
3039     PyObject_GenericGetAttr,           /* tp_getattro */
3040     0,                                 /* tp_setattro */
3041     0,                                 /* tp_as_buffer */
3042     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3043     0,                                 /* tp_doc */
3044     (traverseproc)bytearrayiter_traverse,  /* tp_traverse */
3045     0,                                 /* tp_clear */
3046     0,                                 /* tp_richcompare */
3047     0,                                 /* tp_weaklistoffset */
3048     PyObject_SelfIter,                 /* tp_iter */
3049     (iternextfunc)bytearrayiter_next,  /* tp_iternext */
3050     bytearrayiter_methods,             /* tp_methods */
3051     0,
3052 };
3053 
3054 static PyObject *
bytearray_iter(PyObject * seq)3055 bytearray_iter(PyObject *seq)
3056 {
3057     bytesiterobject *it;
3058 
3059     if (!PyByteArray_Check(seq)) {
3060         PyErr_BadInternalCall();
3061         return NULL;
3062     }
3063     it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3064     if (it == NULL)
3065         return NULL;
3066     it->it_index = 0;
3067     Py_INCREF(seq);
3068     it->it_seq = (PyByteArrayObject *)seq;
3069     _PyObject_GC_TRACK(it);
3070     return (PyObject *)it;
3071 }
3072