1 /* PyBytes (bytearray) implementation */
2
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
6 #include "bytes_methods.h"
7
8 char _PyByteArray_empty_string[] = "";
9
10 void
PyByteArray_Fini(void)11 PyByteArray_Fini(void)
12 {
13 }
14
15 int
PyByteArray_Init(void)16 PyByteArray_Init(void)
17 {
18 return 1;
19 }
20
21 /* end nullbytes support */
22
23 /* Helpers */
24
25 static int
_getbytevalue(PyObject * arg,int * value)26 _getbytevalue(PyObject* arg, int *value)
27 {
28 long face_value;
29
30 if (PyBytes_CheckExact(arg)) {
31 if (Py_SIZE(arg) != 1) {
32 PyErr_SetString(PyExc_ValueError, "string must be of size 1");
33 return 0;
34 }
35 *value = Py_CHARMASK(((PyBytesObject*)arg)->ob_sval[0]);
36 return 1;
37 }
38 else if (_PyAnyInt_Check(arg)) {
39 face_value = PyLong_AsLong(arg);
40 }
41 else {
42 PyObject *index = PyNumber_Index(arg);
43 if (index == NULL) {
44 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
45 PyErr_Format(PyExc_TypeError,
46 "an integer or string of size 1 is required");
47 }
48 return 0;
49 }
50 face_value = PyLong_AsLong(index);
51 Py_DECREF(index);
52 }
53
54 if (face_value < 0 || face_value >= 256) {
55 /* this includes the OverflowError in case the long is too large */
56 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
57 return 0;
58 }
59
60 *value = face_value;
61 return 1;
62 }
63
64 static Py_ssize_t
bytearray_buffer_getreadbuf(PyByteArrayObject * self,Py_ssize_t index,const void ** ptr)65 bytearray_buffer_getreadbuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
66 {
67 if ( index != 0 ) {
68 PyErr_SetString(PyExc_SystemError,
69 "accessing non-existent bytes segment");
70 return -1;
71 }
72 *ptr = (void *)PyByteArray_AS_STRING(self);
73 return Py_SIZE(self);
74 }
75
76 static Py_ssize_t
bytearray_buffer_getwritebuf(PyByteArrayObject * self,Py_ssize_t index,const void ** ptr)77 bytearray_buffer_getwritebuf(PyByteArrayObject *self, Py_ssize_t index, const void **ptr)
78 {
79 if ( index != 0 ) {
80 PyErr_SetString(PyExc_SystemError,
81 "accessing non-existent bytes segment");
82 return -1;
83 }
84 *ptr = (void *)PyByteArray_AS_STRING(self);
85 return Py_SIZE(self);
86 }
87
88 static Py_ssize_t
bytearray_buffer_getsegcount(PyByteArrayObject * self,Py_ssize_t * lenp)89 bytearray_buffer_getsegcount(PyByteArrayObject *self, Py_ssize_t *lenp)
90 {
91 if ( lenp )
92 *lenp = Py_SIZE(self);
93 return 1;
94 }
95
96 static Py_ssize_t
bytearray_buffer_getcharbuf(PyByteArrayObject * self,Py_ssize_t index,const char ** ptr)97 bytearray_buffer_getcharbuf(PyByteArrayObject *self, Py_ssize_t index, const char **ptr)
98 {
99 if ( index != 0 ) {
100 PyErr_SetString(PyExc_SystemError,
101 "accessing non-existent bytes segment");
102 return -1;
103 }
104 *ptr = PyByteArray_AS_STRING(self);
105 return Py_SIZE(self);
106 }
107
108 static int
bytearray_getbuffer(PyByteArrayObject * obj,Py_buffer * view,int flags)109 bytearray_getbuffer(PyByteArrayObject *obj, Py_buffer *view, int flags)
110 {
111 int ret;
112 void *ptr;
113 if (view == NULL) {
114 obj->ob_exports++;
115 return 0;
116 }
117 ptr = (void *) PyByteArray_AS_STRING(obj);
118 ret = PyBuffer_FillInfo(view, (PyObject*)obj, ptr, Py_SIZE(obj), 0, flags);
119 if (ret >= 0) {
120 obj->ob_exports++;
121 }
122 return ret;
123 }
124
125 static void
bytearray_releasebuffer(PyByteArrayObject * obj,Py_buffer * view)126 bytearray_releasebuffer(PyByteArrayObject *obj, Py_buffer *view)
127 {
128 obj->ob_exports--;
129 }
130
131 static Py_ssize_t
_getbuffer(PyObject * obj,Py_buffer * view)132 _getbuffer(PyObject *obj, Py_buffer *view)
133 {
134 PyBufferProcs *buffer = Py_TYPE(obj)->tp_as_buffer;
135
136 if (buffer == NULL || buffer->bf_getbuffer == NULL)
137 {
138 PyErr_Format(PyExc_TypeError,
139 "Type %.100s doesn't support the buffer API",
140 Py_TYPE(obj)->tp_name);
141 return -1;
142 }
143
144 if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
145 return -1;
146 return view->len;
147 }
148
149 static int
_canresize(PyByteArrayObject * self)150 _canresize(PyByteArrayObject *self)
151 {
152 if (self->ob_exports > 0) {
153 PyErr_SetString(PyExc_BufferError,
154 "Existing exports of data: object cannot be re-sized");
155 return 0;
156 }
157 return 1;
158 }
159
160 /* Direct API functions */
161
162 PyObject *
PyByteArray_FromObject(PyObject * input)163 PyByteArray_FromObject(PyObject *input)
164 {
165 return PyObject_CallFunctionObjArgs((PyObject *)&PyByteArray_Type,
166 input, NULL);
167 }
168
169 static PyObject *
_PyByteArray_FromBufferObject(PyObject * obj)170 _PyByteArray_FromBufferObject(PyObject *obj)
171 {
172 PyObject *result;
173 Py_buffer view;
174
175 if (PyObject_GetBuffer(obj, &view, PyBUF_FULL_RO) < 0) {
176 return NULL;
177 }
178 result = PyByteArray_FromStringAndSize(NULL, view.len);
179 if (result != NULL &&
180 PyBuffer_ToContiguous(PyByteArray_AS_STRING(result),
181 &view, view.len, 'C') < 0)
182 {
183 Py_CLEAR(result);
184 }
185 PyBuffer_Release(&view);
186 return result;
187 }
188
189 PyObject *
PyByteArray_FromStringAndSize(const char * bytes,Py_ssize_t size)190 PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size)
191 {
192 PyByteArrayObject *new;
193 Py_ssize_t alloc;
194
195 if (size < 0) {
196 PyErr_SetString(PyExc_SystemError,
197 "Negative size passed to PyByteArray_FromStringAndSize");
198 return NULL;
199 }
200
201 new = PyObject_New(PyByteArrayObject, &PyByteArray_Type);
202 if (new == NULL)
203 return NULL;
204
205 if (size == 0) {
206 new->ob_bytes = NULL;
207 alloc = 0;
208 }
209 else {
210 alloc = size + 1;
211 new->ob_bytes = PyMem_Malloc(alloc);
212 if (new->ob_bytes == NULL) {
213 Py_DECREF(new);
214 return PyErr_NoMemory();
215 }
216 if (bytes != NULL && size > 0)
217 memcpy(new->ob_bytes, bytes, size);
218 new->ob_bytes[size] = '\0'; /* Trailing null byte */
219 }
220 Py_SIZE(new) = size;
221 new->ob_alloc = alloc;
222 new->ob_exports = 0;
223
224 return (PyObject *)new;
225 }
226
227 Py_ssize_t
PyByteArray_Size(PyObject * self)228 PyByteArray_Size(PyObject *self)
229 {
230 assert(self != NULL);
231 assert(PyByteArray_Check(self));
232
233 return PyByteArray_GET_SIZE(self);
234 }
235
236 char *
PyByteArray_AsString(PyObject * self)237 PyByteArray_AsString(PyObject *self)
238 {
239 assert(self != NULL);
240 assert(PyByteArray_Check(self));
241
242 return PyByteArray_AS_STRING(self);
243 }
244
245 int
PyByteArray_Resize(PyObject * self,Py_ssize_t size)246 PyByteArray_Resize(PyObject *self, Py_ssize_t size)
247 {
248 void *sval;
249 Py_ssize_t alloc = ((PyByteArrayObject *)self)->ob_alloc;
250
251 assert(self != NULL);
252 assert(PyByteArray_Check(self));
253 assert(size >= 0);
254
255 if (size == Py_SIZE(self)) {
256 return 0;
257 }
258 if (!_canresize((PyByteArrayObject *)self)) {
259 return -1;
260 }
261
262 if (size < alloc / 2) {
263 /* Major downsize; resize down to exact size */
264 alloc = size + 1;
265 }
266 else if (size < alloc) {
267 /* Within allocated size; quick exit */
268 Py_SIZE(self) = size;
269 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
270 return 0;
271 }
272 else if (size <= alloc * 1.125) {
273 /* Moderate upsize; overallocate similar to list_resize() */
274 alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
275 }
276 else {
277 /* Major upsize; resize up to exact size */
278 alloc = size + 1;
279 }
280
281 sval = PyMem_Realloc(((PyByteArrayObject *)self)->ob_bytes, alloc);
282 if (sval == NULL) {
283 PyErr_NoMemory();
284 return -1;
285 }
286
287 ((PyByteArrayObject *)self)->ob_bytes = sval;
288 Py_SIZE(self) = size;
289 ((PyByteArrayObject *)self)->ob_alloc = alloc;
290 ((PyByteArrayObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
291
292 return 0;
293 }
294
295 PyObject *
PyByteArray_Concat(PyObject * a,PyObject * b)296 PyByteArray_Concat(PyObject *a, PyObject *b)
297 {
298 Py_buffer va, vb;
299 PyByteArrayObject *result = NULL;
300
301 va.len = -1;
302 vb.len = -1;
303 if (_getbuffer(a, &va) < 0 ||
304 _getbuffer(b, &vb) < 0) {
305 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
306 Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
307 goto done;
308 }
309
310 if (va.len > PY_SSIZE_T_MAX - vb.len) {
311 PyErr_NoMemory();
312 goto done;
313 }
314
315 result = (PyByteArrayObject *) \
316 PyByteArray_FromStringAndSize(NULL, va.len + vb.len);
317 if (result != NULL) {
318 memcpy(result->ob_bytes, va.buf, va.len);
319 memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
320 }
321
322 done:
323 if (va.len != -1)
324 PyBuffer_Release(&va);
325 if (vb.len != -1)
326 PyBuffer_Release(&vb);
327 return (PyObject *)result;
328 }
329
330 /* Functions stuffed into the type object */
331
332 static Py_ssize_t
bytearray_length(PyByteArrayObject * self)333 bytearray_length(PyByteArrayObject *self)
334 {
335 return Py_SIZE(self);
336 }
337
338 static PyObject *
bytearray_iconcat(PyByteArrayObject * self,PyObject * other)339 bytearray_iconcat(PyByteArrayObject *self, PyObject *other)
340 {
341 Py_ssize_t mysize;
342 Py_ssize_t size;
343 Py_buffer vo;
344
345 if (_getbuffer(other, &vo) < 0) {
346 PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
347 Py_TYPE(other)->tp_name, Py_TYPE(self)->tp_name);
348 return NULL;
349 }
350
351 mysize = Py_SIZE(self);
352 if (mysize > PY_SSIZE_T_MAX - vo.len) {
353 PyBuffer_Release(&vo);
354 return PyErr_NoMemory();
355 }
356 size = mysize + vo.len;
357 if (size < self->ob_alloc) {
358 Py_SIZE(self) = size;
359 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
360 }
361 else if (PyByteArray_Resize((PyObject *)self, size) < 0) {
362 PyBuffer_Release(&vo);
363 return NULL;
364 }
365 memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
366 PyBuffer_Release(&vo);
367 Py_INCREF(self);
368 return (PyObject *)self;
369 }
370
371 static PyObject *
bytearray_repeat(PyByteArrayObject * self,Py_ssize_t count)372 bytearray_repeat(PyByteArrayObject *self, Py_ssize_t count)
373 {
374 PyByteArrayObject *result;
375 Py_ssize_t mysize;
376 Py_ssize_t size;
377
378 if (count < 0)
379 count = 0;
380 mysize = Py_SIZE(self);
381 if (count != 0 && mysize > PY_SSIZE_T_MAX / count)
382 return PyErr_NoMemory();
383 size = mysize * count;
384 result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
385 if (result != NULL && size != 0) {
386 if (mysize == 1)
387 memset(result->ob_bytes, self->ob_bytes[0], size);
388 else {
389 Py_ssize_t i;
390 for (i = 0; i < count; i++)
391 memcpy(result->ob_bytes + i*mysize, self->ob_bytes, mysize);
392 }
393 }
394 return (PyObject *)result;
395 }
396
397 static PyObject *
bytearray_irepeat(PyByteArrayObject * self,Py_ssize_t count)398 bytearray_irepeat(PyByteArrayObject *self, Py_ssize_t count)
399 {
400 Py_ssize_t mysize;
401 Py_ssize_t size;
402
403 if (count < 0)
404 count = 0;
405 mysize = Py_SIZE(self);
406 if (count != 0 && mysize > PY_SSIZE_T_MAX / count)
407 return PyErr_NoMemory();
408 size = mysize * count;
409 if (size < self->ob_alloc) {
410 Py_SIZE(self) = size;
411 self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
412 }
413 else if (PyByteArray_Resize((PyObject *)self, size) < 0)
414 return NULL;
415
416 if (mysize == 1)
417 memset(self->ob_bytes, self->ob_bytes[0], size);
418 else {
419 Py_ssize_t i;
420 for (i = 1; i < count; i++)
421 memcpy(self->ob_bytes + i*mysize, self->ob_bytes, mysize);
422 }
423
424 Py_INCREF(self);
425 return (PyObject *)self;
426 }
427
428 static PyObject *
bytearray_getitem(PyByteArrayObject * self,Py_ssize_t i)429 bytearray_getitem(PyByteArrayObject *self, Py_ssize_t i)
430 {
431 if (i < 0)
432 i += Py_SIZE(self);
433 if (i < 0 || i >= Py_SIZE(self)) {
434 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
435 return NULL;
436 }
437 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
438 }
439
440 static PyObject *
bytearray_subscript(PyByteArrayObject * self,PyObject * index)441 bytearray_subscript(PyByteArrayObject *self, PyObject *index)
442 {
443 if (PyIndex_Check(index)) {
444 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
445
446 if (i == -1 && PyErr_Occurred())
447 return NULL;
448
449 if (i < 0)
450 i += PyByteArray_GET_SIZE(self);
451
452 if (i < 0 || i >= Py_SIZE(self)) {
453 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
454 return NULL;
455 }
456 return PyInt_FromLong((unsigned char)(self->ob_bytes[i]));
457 }
458 else if (PySlice_Check(index)) {
459 Py_ssize_t start, stop, step, slicelength, cur, i;
460 if (_PySlice_Unpack(index, &start, &stop, &step) < 0) {
461 return NULL;
462 }
463 slicelength = _PySlice_AdjustIndices(PyByteArray_GET_SIZE(self),
464 &start, &stop, step);
465
466 if (slicelength <= 0)
467 return PyByteArray_FromStringAndSize("", 0);
468 else if (step == 1) {
469 return PyByteArray_FromStringAndSize(self->ob_bytes + start,
470 slicelength);
471 }
472 else {
473 char *source_buf = PyByteArray_AS_STRING(self);
474 char *result_buf = (char *)PyMem_Malloc(slicelength);
475 PyObject *result;
476
477 if (result_buf == NULL)
478 return PyErr_NoMemory();
479
480 for (cur = start, i = 0; i < slicelength;
481 cur += step, i++) {
482 result_buf[i] = source_buf[cur];
483 }
484 result = PyByteArray_FromStringAndSize(result_buf, slicelength);
485 PyMem_Free(result_buf);
486 return result;
487 }
488 }
489 else {
490 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integers");
491 return NULL;
492 }
493 }
494
495 static int
bytearray_setslice(PyByteArrayObject * self,Py_ssize_t lo,Py_ssize_t hi,PyObject * values)496 bytearray_setslice(PyByteArrayObject *self, Py_ssize_t lo, Py_ssize_t hi,
497 PyObject *values)
498 {
499 Py_ssize_t avail, needed;
500 void *bytes;
501 Py_buffer vbytes;
502 int res = 0;
503
504 vbytes.len = -1;
505 if (values == (PyObject *)self) {
506 /* Make a copy and call this function recursively */
507 int err;
508 values = PyByteArray_FromStringAndSize(PyByteArray_AS_STRING(values),
509 PyByteArray_GET_SIZE(values));
510 if (values == NULL)
511 return -1;
512 err = bytearray_setslice(self, lo, hi, values);
513 Py_DECREF(values);
514 return err;
515 }
516 if (values == NULL) {
517 /* del b[lo:hi] */
518 bytes = NULL;
519 needed = 0;
520 }
521 else {
522 if (_getbuffer(values, &vbytes) < 0) {
523 PyErr_Format(PyExc_TypeError,
524 "can't set bytearray slice from %.100s",
525 Py_TYPE(values)->tp_name);
526 return -1;
527 }
528 needed = vbytes.len;
529 bytes = vbytes.buf;
530 }
531
532 if (lo < 0)
533 lo = 0;
534 if (hi < lo)
535 hi = lo;
536 if (hi > Py_SIZE(self))
537 hi = Py_SIZE(self);
538
539 avail = hi - lo;
540 if (avail < 0)
541 lo = hi = avail = 0;
542
543 if (avail != needed) {
544 if (avail > needed) {
545 if (!_canresize(self)) {
546 res = -1;
547 goto finish;
548 }
549 /*
550 0 lo hi old_size
551 | |<----avail----->|<-----tomove------>|
552 | |<-needed->|<-----tomove------>|
553 0 lo new_hi new_size
554 */
555 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
556 Py_SIZE(self) - hi);
557 }
558 /* XXX(nnorwitz): need to verify this can't overflow! */
559 if (PyByteArray_Resize((PyObject *)self,
560 Py_SIZE(self) + needed - avail) < 0) {
561 res = -1;
562 goto finish;
563 }
564 if (avail < needed) {
565 /*
566 0 lo hi old_size
567 | |<-avail->|<-----tomove------>|
568 | |<----needed---->|<-----tomove------>|
569 0 lo new_hi new_size
570 */
571 memmove(self->ob_bytes + lo + needed, self->ob_bytes + hi,
572 Py_SIZE(self) - lo - needed);
573 }
574 }
575
576 if (needed > 0)
577 memcpy(self->ob_bytes + lo, bytes, needed);
578
579
580 finish:
581 if (vbytes.len != -1)
582 PyBuffer_Release(&vbytes);
583 return res;
584 }
585
586 static int
bytearray_setitem(PyByteArrayObject * self,Py_ssize_t i,PyObject * value)587 bytearray_setitem(PyByteArrayObject *self, Py_ssize_t i, PyObject *value)
588 {
589 int ival;
590
591 if (i < 0)
592 i += Py_SIZE(self);
593
594 if (i < 0 || i >= Py_SIZE(self)) {
595 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
596 return -1;
597 }
598
599 if (value == NULL)
600 return bytearray_setslice(self, i, i+1, NULL);
601
602 if (!_getbytevalue(value, &ival))
603 return -1;
604
605 self->ob_bytes[i] = ival;
606 return 0;
607 }
608
609 static int
bytearray_ass_subscript(PyByteArrayObject * self,PyObject * index,PyObject * values)610 bytearray_ass_subscript(PyByteArrayObject *self, PyObject *index, PyObject *values)
611 {
612 Py_ssize_t start, stop, step, slicelen, needed;
613 char *bytes;
614
615 if (PyIndex_Check(index)) {
616 Py_ssize_t i = PyNumber_AsSsize_t(index, PyExc_IndexError);
617
618 if (i == -1 && PyErr_Occurred())
619 return -1;
620
621 if (i < 0)
622 i += PyByteArray_GET_SIZE(self);
623
624 if (i < 0 || i >= Py_SIZE(self)) {
625 PyErr_SetString(PyExc_IndexError, "bytearray index out of range");
626 return -1;
627 }
628
629 if (values == NULL) {
630 /* Fall through to slice assignment */
631 start = i;
632 stop = i + 1;
633 step = 1;
634 slicelen = 1;
635 }
636 else {
637 int ival;
638 if (!_getbytevalue(values, &ival))
639 return -1;
640 self->ob_bytes[i] = (char)ival;
641 return 0;
642 }
643 }
644 else if (PySlice_Check(index)) {
645 if (_PySlice_Unpack(index, &start, &stop, &step) < 0) {
646 return -1;
647 }
648 slicelen = _PySlice_AdjustIndices(PyByteArray_GET_SIZE(self), &start,
649 &stop, step);
650 }
651 else {
652 PyErr_SetString(PyExc_TypeError, "bytearray indices must be integer");
653 return -1;
654 }
655
656 if (values == NULL) {
657 bytes = NULL;
658 needed = 0;
659 }
660 else if (values == (PyObject *)self || !PyByteArray_Check(values)) {
661 int err;
662 if (PyNumber_Check(values) || PyUnicode_Check(values)) {
663 PyErr_SetString(PyExc_TypeError,
664 "can assign only bytes, buffers, or iterables "
665 "of ints in range(0, 256)");
666 return -1;
667 }
668 /* Make a copy and call this function recursively */
669 values = PyByteArray_FromObject(values);
670 if (values == NULL)
671 return -1;
672 err = bytearray_ass_subscript(self, index, values);
673 Py_DECREF(values);
674 return err;
675 }
676 else {
677 assert(PyByteArray_Check(values));
678 bytes = ((PyByteArrayObject *)values)->ob_bytes;
679 needed = Py_SIZE(values);
680 }
681 /* Make sure b[5:2] = ... inserts before 5, not before 2. */
682 if ((step < 0 && start < stop) ||
683 (step > 0 && start > stop))
684 stop = start;
685 if (step == 1) {
686 if (slicelen != needed) {
687 if (!_canresize(self))
688 return -1;
689 if (slicelen > needed) {
690 /*
691 0 start stop old_size
692 | |<---slicelen--->|<-----tomove------>|
693 | |<-needed->|<-----tomove------>|
694 0 lo new_hi new_size
695 */
696 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
697 Py_SIZE(self) - stop);
698 }
699 if (PyByteArray_Resize((PyObject *)self,
700 Py_SIZE(self) + needed - slicelen) < 0)
701 return -1;
702 if (slicelen < needed) {
703 /*
704 0 lo hi old_size
705 | |<-avail->|<-----tomove------>|
706 | |<----needed---->|<-----tomove------>|
707 0 lo new_hi new_size
708 */
709 memmove(self->ob_bytes + start + needed, self->ob_bytes + stop,
710 Py_SIZE(self) - start - needed);
711 }
712 }
713
714 if (needed > 0)
715 memcpy(self->ob_bytes + start, bytes, needed);
716
717 return 0;
718 }
719 else {
720 if (needed == 0) {
721 /* Delete slice */
722 size_t cur;
723 Py_ssize_t i;
724
725 if (!_canresize(self))
726 return -1;
727 if (step < 0) {
728 stop = start + 1;
729 start = stop + step * (slicelen - 1) - 1;
730 step = -step;
731 }
732 for (cur = start, i = 0;
733 i < slicelen; cur += step, i++) {
734 Py_ssize_t lim = step - 1;
735
736 if (cur + step >= (size_t)PyByteArray_GET_SIZE(self))
737 lim = PyByteArray_GET_SIZE(self) - cur - 1;
738
739 memmove(self->ob_bytes + cur - i,
740 self->ob_bytes + cur + 1, lim);
741 }
742 /* Move the tail of the bytes, in one chunk */
743 cur = start + slicelen*step;
744 if (cur < (size_t)PyByteArray_GET_SIZE(self)) {
745 memmove(self->ob_bytes + cur - slicelen,
746 self->ob_bytes + cur,
747 PyByteArray_GET_SIZE(self) - cur);
748 }
749 if (PyByteArray_Resize((PyObject *)self,
750 PyByteArray_GET_SIZE(self) - slicelen) < 0)
751 return -1;
752
753 return 0;
754 }
755 else {
756 /* Assign slice */
757 Py_ssize_t cur, i;
758
759 if (needed != slicelen) {
760 PyErr_Format(PyExc_ValueError,
761 "attempt to assign bytes of size %zd "
762 "to extended slice of size %zd",
763 needed, slicelen);
764 return -1;
765 }
766 for (cur = start, i = 0; i < slicelen; cur += step, i++)
767 self->ob_bytes[cur] = bytes[i];
768 return 0;
769 }
770 }
771 }
772
773 static int
bytearray_init(PyByteArrayObject * self,PyObject * args,PyObject * kwds)774 bytearray_init(PyByteArrayObject *self, PyObject *args, PyObject *kwds)
775 {
776 static char *kwlist[] = {"source", "encoding", "errors", 0};
777 PyObject *arg = NULL;
778 const char *encoding = NULL;
779 const char *errors = NULL;
780 Py_ssize_t count;
781 PyObject *it;
782 PyObject *(*iternext)(PyObject *);
783
784 if (Py_SIZE(self) != 0) {
785 /* Empty previous contents (yes, do this first of all!) */
786 if (PyByteArray_Resize((PyObject *)self, 0) < 0)
787 return -1;
788 }
789
790 /* Parse arguments */
791 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytearray", kwlist,
792 &arg, &encoding, &errors))
793 return -1;
794
795 /* Make a quick exit if no first argument */
796 if (arg == NULL) {
797 if (encoding != NULL || errors != NULL) {
798 PyErr_SetString(PyExc_TypeError,
799 "encoding or errors without sequence argument");
800 return -1;
801 }
802 return 0;
803 }
804
805 if (PyBytes_Check(arg)) {
806 PyObject *new, *encoded;
807 if (encoding != NULL) {
808 encoded = _PyCodec_EncodeText(arg, encoding, errors);
809 if (encoded == NULL)
810 return -1;
811 assert(PyBytes_Check(encoded));
812 }
813 else {
814 encoded = arg;
815 Py_INCREF(arg);
816 }
817 new = bytearray_iconcat(self, arg);
818 Py_DECREF(encoded);
819 if (new == NULL)
820 return -1;
821 Py_DECREF(new);
822 return 0;
823 }
824
825 #ifdef Py_USING_UNICODE
826 if (PyUnicode_Check(arg)) {
827 /* Encode via the codec registry */
828 PyObject *encoded, *new;
829 if (encoding == NULL) {
830 PyErr_SetString(PyExc_TypeError,
831 "unicode argument without an encoding");
832 return -1;
833 }
834 encoded = _PyCodec_EncodeText(arg, encoding, errors);
835 if (encoded == NULL)
836 return -1;
837 assert(PyBytes_Check(encoded));
838 new = bytearray_iconcat(self, encoded);
839 Py_DECREF(encoded);
840 if (new == NULL)
841 return -1;
842 Py_DECREF(new);
843 return 0;
844 }
845 #endif
846
847 /* If it's not unicode, there can't be encoding or errors */
848 if (encoding != NULL || errors != NULL) {
849 PyErr_SetString(PyExc_TypeError,
850 "encoding or errors without a string argument");
851 return -1;
852 }
853
854 /* Is it an int? */
855 count = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
856 if (count == -1 && PyErr_Occurred()) {
857 if (!PyErr_ExceptionMatches(PyExc_TypeError))
858 return -1;
859 PyErr_Clear();
860 }
861 else if (count < 0) {
862 PyErr_SetString(PyExc_ValueError, "negative count");
863 return -1;
864 }
865 else {
866 if (count > 0) {
867 if (PyByteArray_Resize((PyObject *)self, count))
868 return -1;
869 memset(self->ob_bytes, 0, count);
870 }
871 return 0;
872 }
873
874 /* Use the buffer API */
875 if (PyObject_CheckBuffer(arg)) {
876 Py_ssize_t size;
877 Py_buffer view;
878 if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
879 return -1;
880 size = view.len;
881 if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
882 if (PyBuffer_ToContiguous(self->ob_bytes, &view, size, 'C') < 0)
883 goto fail;
884 PyBuffer_Release(&view);
885 return 0;
886 fail:
887 PyBuffer_Release(&view);
888 return -1;
889 }
890
891 /* XXX Optimize this if the arguments is a list, tuple */
892
893 /* Get the iterator */
894 it = PyObject_GetIter(arg);
895 if (it == NULL)
896 return -1;
897 iternext = *Py_TYPE(it)->tp_iternext;
898
899 /* Run the iterator to exhaustion */
900 for (;;) {
901 PyObject *item;
902 int rc, value;
903
904 /* Get the next item */
905 item = iternext(it);
906 if (item == NULL) {
907 if (PyErr_Occurred()) {
908 if (!PyErr_ExceptionMatches(PyExc_StopIteration))
909 goto error;
910 PyErr_Clear();
911 }
912 break;
913 }
914
915 /* Interpret it as an int (__index__) */
916 rc = _getbytevalue(item, &value);
917 Py_DECREF(item);
918 if (!rc)
919 goto error;
920
921 /* Append the byte */
922 if (Py_SIZE(self) + 1 < self->ob_alloc) {
923 Py_SIZE(self)++;
924 PyByteArray_AS_STRING(self)[Py_SIZE(self)] = '\0';
925 }
926 else if (PyByteArray_Resize((PyObject *)self, Py_SIZE(self)+1) < 0)
927 goto error;
928 self->ob_bytes[Py_SIZE(self)-1] = value;
929 }
930
931 /* Clean up and return success */
932 Py_DECREF(it);
933 return 0;
934
935 error:
936 /* Error handling when it != NULL */
937 Py_DECREF(it);
938 return -1;
939 }
940
941 /* Mostly copied from string_repr, but without the
942 "smart quote" functionality. */
943 static PyObject *
bytearray_repr(PyByteArrayObject * self)944 bytearray_repr(PyByteArrayObject *self)
945 {
946 static const char *hexdigits = "0123456789abcdef";
947 const char *quote_prefix = "bytearray(b";
948 const char *quote_postfix = ")";
949 Py_ssize_t length = Py_SIZE(self);
950 /* 14 == strlen(quote_prefix) + 2 + strlen(quote_postfix) */
951 size_t newsize;
952 PyObject *v;
953 if (length > (PY_SSIZE_T_MAX - 14) / 4) {
954 PyErr_SetString(PyExc_OverflowError,
955 "bytearray object is too large to make repr");
956 return NULL;
957 }
958 newsize = 14 + 4 * length;
959 v = PyString_FromStringAndSize(NULL, newsize);
960 if (v == NULL) {
961 return NULL;
962 }
963 else {
964 register Py_ssize_t i;
965 register char c;
966 register char *p;
967 int quote;
968
969 /* Figure out which quote to use; single is preferred */
970 quote = '\'';
971 {
972 char *test, *start;
973 start = PyByteArray_AS_STRING(self);
974 for (test = start; test < start+length; ++test) {
975 if (*test == '"') {
976 quote = '\''; /* back to single */
977 goto decided;
978 }
979 else if (*test == '\'')
980 quote = '"';
981 }
982 decided:
983 ;
984 }
985
986 p = PyString_AS_STRING(v);
987 while (*quote_prefix)
988 *p++ = *quote_prefix++;
989 *p++ = quote;
990
991 for (i = 0; i < length; i++) {
992 /* There's at least enough room for a hex escape
993 and a closing quote. */
994 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
995 c = self->ob_bytes[i];
996 if (c == '\'' || c == '\\')
997 *p++ = '\\', *p++ = c;
998 else if (c == '\t')
999 *p++ = '\\', *p++ = 't';
1000 else if (c == '\n')
1001 *p++ = '\\', *p++ = 'n';
1002 else if (c == '\r')
1003 *p++ = '\\', *p++ = 'r';
1004 else if (c == 0)
1005 *p++ = '\\', *p++ = 'x', *p++ = '0', *p++ = '0';
1006 else if (c < ' ' || c >= 0x7f) {
1007 *p++ = '\\';
1008 *p++ = 'x';
1009 *p++ = hexdigits[(c & 0xf0) >> 4];
1010 *p++ = hexdigits[c & 0xf];
1011 }
1012 else
1013 *p++ = c;
1014 }
1015 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
1016 *p++ = quote;
1017 while (*quote_postfix) {
1018 *p++ = *quote_postfix++;
1019 }
1020 *p = '\0';
1021 /* v is cleared on error */
1022 (void)_PyString_Resize(&v, (p - PyString_AS_STRING(v)));
1023 return v;
1024 }
1025 }
1026
1027 static PyObject *
bytearray_str(PyObject * op)1028 bytearray_str(PyObject *op)
1029 {
1030 return PyBytes_FromStringAndSize(((PyByteArrayObject*)op)->ob_bytes, Py_SIZE(op));
1031 }
1032
1033 static PyObject *
bytearray_richcompare(PyObject * self,PyObject * other,int op)1034 bytearray_richcompare(PyObject *self, PyObject *other, int op)
1035 {
1036 Py_ssize_t self_size, other_size;
1037 Py_buffer self_bytes, other_bytes;
1038 PyObject *res;
1039 Py_ssize_t minsize;
1040 int cmp, rc;
1041
1042 /* Bytes can be compared to anything that supports the (binary)
1043 buffer API. Except that a comparison with Unicode is always an
1044 error, even if the comparison is for equality. */
1045 #ifdef Py_USING_UNICODE
1046 rc = PyObject_IsInstance(self, (PyObject*)&PyUnicode_Type);
1047 if (!rc)
1048 rc = PyObject_IsInstance(other, (PyObject*)&PyUnicode_Type);
1049 if (rc < 0)
1050 return NULL;
1051 if (rc) {
1052 if (Py_BytesWarningFlag && op == Py_EQ) {
1053 if (PyErr_WarnEx(PyExc_BytesWarning,
1054 "Comparison between bytearray and unicode", 1))
1055 return NULL;
1056 }
1057
1058 Py_INCREF(Py_NotImplemented);
1059 return Py_NotImplemented;
1060 }
1061 #endif
1062
1063 self_size = _getbuffer(self, &self_bytes);
1064 if (self_size < 0) {
1065 PyErr_Clear();
1066 Py_INCREF(Py_NotImplemented);
1067 return Py_NotImplemented;
1068 }
1069
1070 other_size = _getbuffer(other, &other_bytes);
1071 if (other_size < 0) {
1072 PyErr_Clear();
1073 PyBuffer_Release(&self_bytes);
1074 Py_INCREF(Py_NotImplemented);
1075 return Py_NotImplemented;
1076 }
1077
1078 if (self_size != other_size && (op == Py_EQ || op == Py_NE)) {
1079 /* Shortcut: if the lengths differ, the objects differ */
1080 cmp = (op == Py_NE);
1081 }
1082 else {
1083 minsize = self_size;
1084 if (other_size < minsize)
1085 minsize = other_size;
1086
1087 cmp = memcmp(self_bytes.buf, other_bytes.buf, minsize);
1088 /* In ISO C, memcmp() guarantees to use unsigned bytes! */
1089
1090 if (cmp == 0) {
1091 if (self_size < other_size)
1092 cmp = -1;
1093 else if (self_size > other_size)
1094 cmp = 1;
1095 }
1096
1097 switch (op) {
1098 case Py_LT: cmp = cmp < 0; break;
1099 case Py_LE: cmp = cmp <= 0; break;
1100 case Py_EQ: cmp = cmp == 0; break;
1101 case Py_NE: cmp = cmp != 0; break;
1102 case Py_GT: cmp = cmp > 0; break;
1103 case Py_GE: cmp = cmp >= 0; break;
1104 }
1105 }
1106
1107 res = cmp ? Py_True : Py_False;
1108 PyBuffer_Release(&self_bytes);
1109 PyBuffer_Release(&other_bytes);
1110 Py_INCREF(res);
1111 return res;
1112 }
1113
1114 static void
bytearray_dealloc(PyByteArrayObject * self)1115 bytearray_dealloc(PyByteArrayObject *self)
1116 {
1117 if (self->ob_exports > 0) {
1118 PyErr_SetString(PyExc_SystemError,
1119 "deallocated bytearray object has exported buffers");
1120 PyErr_Print();
1121 }
1122 if (self->ob_bytes != 0) {
1123 PyMem_Free(self->ob_bytes);
1124 }
1125 Py_TYPE(self)->tp_free((PyObject *)self);
1126 }
1127
1128
1129 /* -------------------------------------------------------------------- */
1130 /* Methods */
1131
1132 #define STRINGLIB_CHAR char
1133 #define STRINGLIB_LEN PyByteArray_GET_SIZE
1134 #define STRINGLIB_STR PyByteArray_AS_STRING
1135 #define STRINGLIB_NEW PyByteArray_FromStringAndSize
1136 #define STRINGLIB_ISSPACE Py_ISSPACE
1137 #define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
1138 #define STRINGLIB_CHECK_EXACT PyByteArray_CheckExact
1139 #define STRINGLIB_MUTABLE 1
1140
1141 #include "stringlib/fastsearch.h"
1142 #include "stringlib/count.h"
1143 #include "stringlib/find.h"
1144 #include "stringlib/partition.h"
1145 #include "stringlib/split.h"
1146 #include "stringlib/ctype.h"
1147 #include "stringlib/transmogrify.h"
1148
1149
1150 /* The following Py_LOCAL_INLINE and Py_LOCAL functions
1151 were copied from the old char* style string object. */
1152
1153 /* helper macro to fixup start/end slice values */
1154 #define ADJUST_INDICES(start, end, len) \
1155 if (end > len) \
1156 end = len; \
1157 else if (end < 0) { \
1158 end += len; \
1159 if (end < 0) \
1160 end = 0; \
1161 } \
1162 if (start < 0) { \
1163 start += len; \
1164 if (start < 0) \
1165 start = 0; \
1166 }
1167
1168 Py_LOCAL_INLINE(Py_ssize_t)
bytearray_find_internal(PyByteArrayObject * self,PyObject * args,int dir)1169 bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
1170 {
1171 PyObject *subobj;
1172 Py_buffer subbuf;
1173 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1174 Py_ssize_t res;
1175
1176 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1177 args, &subobj, &start, &end))
1178 return -2;
1179 if (_getbuffer(subobj, &subbuf) < 0)
1180 return -2;
1181 if (dir > 0)
1182 res = stringlib_find_slice(
1183 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1184 subbuf.buf, subbuf.len, start, end);
1185 else
1186 res = stringlib_rfind_slice(
1187 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
1188 subbuf.buf, subbuf.len, start, end);
1189 PyBuffer_Release(&subbuf);
1190 return res;
1191 }
1192
1193 PyDoc_STRVAR(find__doc__,
1194 "B.find(sub [,start [,end]]) -> int\n\
1195 \n\
1196 Return the lowest index in B where subsection sub is found,\n\
1197 such that sub is contained within B[start,end]. Optional\n\
1198 arguments start and end are interpreted as in slice notation.\n\
1199 \n\
1200 Return -1 on failure.");
1201
1202 static PyObject *
bytearray_find(PyByteArrayObject * self,PyObject * args)1203 bytearray_find(PyByteArrayObject *self, PyObject *args)
1204 {
1205 Py_ssize_t result = bytearray_find_internal(self, args, +1);
1206 if (result == -2)
1207 return NULL;
1208 return PyInt_FromSsize_t(result);
1209 }
1210
1211 PyDoc_STRVAR(count__doc__,
1212 "B.count(sub [,start [,end]]) -> int\n\
1213 \n\
1214 Return the number of non-overlapping occurrences of subsection sub in\n\
1215 bytes B[start:end]. Optional arguments start and end are interpreted\n\
1216 as in slice notation.");
1217
1218 static PyObject *
bytearray_count(PyByteArrayObject * self,PyObject * args)1219 bytearray_count(PyByteArrayObject *self, PyObject *args)
1220 {
1221 PyObject *sub_obj;
1222 const char *str = PyByteArray_AS_STRING(self);
1223 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
1224 Py_buffer vsub;
1225 PyObject *count_obj;
1226
1227 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
1228 return NULL;
1229
1230 if (_getbuffer(sub_obj, &vsub) < 0)
1231 return NULL;
1232
1233 ADJUST_INDICES(start, end, PyByteArray_GET_SIZE(self));
1234
1235 count_obj = PyInt_FromSsize_t(
1236 stringlib_count(str + start, end - start, vsub.buf, vsub.len, PY_SSIZE_T_MAX)
1237 );
1238 PyBuffer_Release(&vsub);
1239 return count_obj;
1240 }
1241
1242
1243 PyDoc_STRVAR(index__doc__,
1244 "B.index(sub [,start [,end]]) -> int\n\
1245 \n\
1246 Like B.find() but raise ValueError when the subsection is not found.");
1247
1248 static PyObject *
bytearray_index(PyByteArrayObject * self,PyObject * args)1249 bytearray_index(PyByteArrayObject *self, PyObject *args)
1250 {
1251 Py_ssize_t result = bytearray_find_internal(self, args, +1);
1252 if (result == -2)
1253 return NULL;
1254 if (result == -1) {
1255 PyErr_SetString(PyExc_ValueError,
1256 "subsection not found");
1257 return NULL;
1258 }
1259 return PyInt_FromSsize_t(result);
1260 }
1261
1262
1263 PyDoc_STRVAR(rfind__doc__,
1264 "B.rfind(sub [,start [,end]]) -> int\n\
1265 \n\
1266 Return the highest index in B where subsection sub is found,\n\
1267 such that sub is contained within B[start,end]. Optional\n\
1268 arguments start and end are interpreted as in slice notation.\n\
1269 \n\
1270 Return -1 on failure.");
1271
1272 static PyObject *
bytearray_rfind(PyByteArrayObject * self,PyObject * args)1273 bytearray_rfind(PyByteArrayObject *self, PyObject *args)
1274 {
1275 Py_ssize_t result = bytearray_find_internal(self, args, -1);
1276 if (result == -2)
1277 return NULL;
1278 return PyInt_FromSsize_t(result);
1279 }
1280
1281
1282 PyDoc_STRVAR(rindex__doc__,
1283 "B.rindex(sub [,start [,end]]) -> int\n\
1284 \n\
1285 Like B.rfind() but raise ValueError when the subsection is not found.");
1286
1287 static PyObject *
bytearray_rindex(PyByteArrayObject * self,PyObject * args)1288 bytearray_rindex(PyByteArrayObject *self, PyObject *args)
1289 {
1290 Py_ssize_t result = bytearray_find_internal(self, args, -1);
1291 if (result == -2)
1292 return NULL;
1293 if (result == -1) {
1294 PyErr_SetString(PyExc_ValueError,
1295 "subsection not found");
1296 return NULL;
1297 }
1298 return PyInt_FromSsize_t(result);
1299 }
1300
1301
1302 static int
bytearray_contains(PyObject * self,PyObject * arg)1303 bytearray_contains(PyObject *self, PyObject *arg)
1304 {
1305 Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError);
1306 if (ival == -1 && PyErr_Occurred()) {
1307 Py_buffer varg;
1308 int pos;
1309 PyErr_Clear();
1310 if (_getbuffer(arg, &varg) < 0)
1311 return -1;
1312 pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self),
1313 varg.buf, varg.len, 0);
1314 PyBuffer_Release(&varg);
1315 return pos >= 0;
1316 }
1317 if (ival < 0 || ival >= 256) {
1318 PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
1319 return -1;
1320 }
1321
1322 return memchr(PyByteArray_AS_STRING(self), ival, Py_SIZE(self)) != NULL;
1323 }
1324
1325
1326 /* Matches the end (direction >= 0) or start (direction < 0) of self
1327 * against substr, using the start and end arguments. Returns
1328 * -1 on error, 0 if not found and 1 if found.
1329 */
1330 Py_LOCAL(int)
_bytearray_tailmatch(PyByteArrayObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)1331 _bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start,
1332 Py_ssize_t end, int direction)
1333 {
1334 Py_ssize_t len = PyByteArray_GET_SIZE(self);
1335 const char* str;
1336 Py_buffer vsubstr;
1337 int rv = 0;
1338
1339 str = PyByteArray_AS_STRING(self);
1340
1341 if (_getbuffer(substr, &vsubstr) < 0)
1342 return -1;
1343
1344 ADJUST_INDICES(start, end, len);
1345
1346 if (direction < 0) {
1347 /* startswith */
1348 if (start+vsubstr.len > len) {
1349 goto done;
1350 }
1351 } else {
1352 /* endswith */
1353 if (end-start < vsubstr.len || start > len) {
1354 goto done;
1355 }
1356
1357 if (end-vsubstr.len > start)
1358 start = end - vsubstr.len;
1359 }
1360 if (end-start >= vsubstr.len)
1361 rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len);
1362
1363 done:
1364 PyBuffer_Release(&vsubstr);
1365 return rv;
1366 }
1367
1368
1369 PyDoc_STRVAR(startswith__doc__,
1370 "B.startswith(prefix [,start [,end]]) -> bool\n\
1371 \n\
1372 Return True if B starts with the specified prefix, False otherwise.\n\
1373 With optional start, test B beginning at that position.\n\
1374 With optional end, stop comparing B at that position.\n\
1375 prefix can also be a tuple of strings to try.");
1376
1377 static PyObject *
bytearray_startswith(PyByteArrayObject * self,PyObject * args)1378 bytearray_startswith(PyByteArrayObject *self, PyObject *args)
1379 {
1380 Py_ssize_t start = 0;
1381 Py_ssize_t end = PY_SSIZE_T_MAX;
1382 PyObject *subobj;
1383 int result;
1384
1385 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
1386 return NULL;
1387 if (PyTuple_Check(subobj)) {
1388 Py_ssize_t i;
1389 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1390 result = _bytearray_tailmatch(self,
1391 PyTuple_GET_ITEM(subobj, i),
1392 start, end, -1);
1393 if (result == -1)
1394 return NULL;
1395 else if (result) {
1396 Py_RETURN_TRUE;
1397 }
1398 }
1399 Py_RETURN_FALSE;
1400 }
1401 result = _bytearray_tailmatch(self, subobj, start, end, -1);
1402 if (result == -1)
1403 return NULL;
1404 else
1405 return PyBool_FromLong(result);
1406 }
1407
1408 PyDoc_STRVAR(endswith__doc__,
1409 "B.endswith(suffix [,start [,end]]) -> bool\n\
1410 \n\
1411 Return True if B ends with the specified suffix, False otherwise.\n\
1412 With optional start, test B beginning at that position.\n\
1413 With optional end, stop comparing B at that position.\n\
1414 suffix can also be a tuple of strings to try.");
1415
1416 static PyObject *
bytearray_endswith(PyByteArrayObject * self,PyObject * args)1417 bytearray_endswith(PyByteArrayObject *self, PyObject *args)
1418 {
1419 Py_ssize_t start = 0;
1420 Py_ssize_t end = PY_SSIZE_T_MAX;
1421 PyObject *subobj;
1422 int result;
1423
1424 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
1425 return NULL;
1426 if (PyTuple_Check(subobj)) {
1427 Py_ssize_t i;
1428 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
1429 result = _bytearray_tailmatch(self,
1430 PyTuple_GET_ITEM(subobj, i),
1431 start, end, +1);
1432 if (result == -1)
1433 return NULL;
1434 else if (result) {
1435 Py_RETURN_TRUE;
1436 }
1437 }
1438 Py_RETURN_FALSE;
1439 }
1440 result = _bytearray_tailmatch(self, subobj, start, end, +1);
1441 if (result == -1)
1442 return NULL;
1443 else
1444 return PyBool_FromLong(result);
1445 }
1446
1447
1448 PyDoc_STRVAR(translate__doc__,
1449 "B.translate(table[, deletechars]) -> bytearray\n\
1450 \n\
1451 Return a copy of B, where all characters occurring in the\n\
1452 optional argument deletechars are removed, and the remaining\n\
1453 characters have been mapped through the given translation\n\
1454 table, which must be a bytes object of length 256.");
1455
1456 static PyObject *
bytearray_translate(PyByteArrayObject * self,PyObject * args)1457 bytearray_translate(PyByteArrayObject *self, PyObject *args)
1458 {
1459 register char *input, *output;
1460 register const char *table;
1461 register Py_ssize_t i, c;
1462 PyObject *input_obj = (PyObject*)self;
1463 const char *output_start;
1464 Py_ssize_t inlen;
1465 PyObject *result = NULL;
1466 int trans_table[256];
1467 PyObject *tableobj = NULL, *delobj = NULL;
1468 Py_buffer vtable, vdel;
1469
1470 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
1471 &tableobj, &delobj))
1472 return NULL;
1473
1474 if (tableobj == Py_None) {
1475 table = NULL;
1476 tableobj = NULL;
1477 } else if (_getbuffer(tableobj, &vtable) < 0) {
1478 return NULL;
1479 } else {
1480 if (vtable.len != 256) {
1481 PyErr_SetString(PyExc_ValueError,
1482 "translation table must be 256 characters long");
1483 PyBuffer_Release(&vtable);
1484 return NULL;
1485 }
1486 table = (const char*)vtable.buf;
1487 }
1488
1489 if (delobj != NULL) {
1490 if (_getbuffer(delobj, &vdel) < 0) {
1491 if (tableobj != NULL)
1492 PyBuffer_Release(&vtable);
1493 return NULL;
1494 }
1495 }
1496 else {
1497 vdel.buf = NULL;
1498 vdel.len = 0;
1499 }
1500
1501 inlen = PyByteArray_GET_SIZE(input_obj);
1502 result = PyByteArray_FromStringAndSize((char *)NULL, inlen);
1503 if (result == NULL)
1504 goto done;
1505 output_start = output = PyByteArray_AsString(result);
1506 input = PyByteArray_AS_STRING(input_obj);
1507
1508 if (vdel.len == 0 && table != NULL) {
1509 /* If no deletions are required, use faster code */
1510 for (i = inlen; --i >= 0; ) {
1511 c = Py_CHARMASK(*input++);
1512 *output++ = table[c];
1513 }
1514 goto done;
1515 }
1516
1517 if (table == NULL) {
1518 for (i = 0; i < 256; i++)
1519 trans_table[i] = Py_CHARMASK(i);
1520 } else {
1521 for (i = 0; i < 256; i++)
1522 trans_table[i] = Py_CHARMASK(table[i]);
1523 }
1524
1525 for (i = 0; i < vdel.len; i++)
1526 trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1;
1527
1528 for (i = inlen; --i >= 0; ) {
1529 c = Py_CHARMASK(*input++);
1530 if (trans_table[c] != -1)
1531 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1532 continue;
1533 }
1534 /* Fix the size of the resulting string */
1535 if (inlen > 0)
1536 PyByteArray_Resize(result, output - output_start);
1537
1538 done:
1539 if (tableobj != NULL)
1540 PyBuffer_Release(&vtable);
1541 if (delobj != NULL)
1542 PyBuffer_Release(&vdel);
1543 return result;
1544 }
1545
1546
1547 /* find and count characters and substrings */
1548
1549 #define findchar(target, target_len, c) \
1550 ((char *)memchr((const void *)(target), c, target_len))
1551
1552
1553 /* Bytes ops must return a string, create a copy */
1554 Py_LOCAL(PyByteArrayObject *)
return_self(PyByteArrayObject * self)1555 return_self(PyByteArrayObject *self)
1556 {
1557 return (PyByteArrayObject *)PyByteArray_FromStringAndSize(
1558 PyByteArray_AS_STRING(self),
1559 PyByteArray_GET_SIZE(self));
1560 }
1561
1562 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)1563 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
1564 {
1565 Py_ssize_t count=0;
1566 const char *start=target;
1567 const char *end=target+target_len;
1568
1569 while ( (start=findchar(start, end-start, c)) != NULL ) {
1570 count++;
1571 if (count >= maxcount)
1572 break;
1573 start += 1;
1574 }
1575 return count;
1576 }
1577
1578
1579 /* Algorithms for different cases of string replacement */
1580
1581 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
1582 Py_LOCAL(PyByteArrayObject *)
replace_interleave(PyByteArrayObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1583 replace_interleave(PyByteArrayObject *self,
1584 const char *to_s, Py_ssize_t to_len,
1585 Py_ssize_t maxcount)
1586 {
1587 char *self_s, *result_s;
1588 Py_ssize_t self_len, result_len;
1589 Py_ssize_t count, i;
1590 PyByteArrayObject *result;
1591
1592 self_len = PyByteArray_GET_SIZE(self);
1593
1594 /* 1 at the end plus 1 after every character;
1595 count = min(maxcount, self_len + 1) */
1596 if (maxcount <= self_len) {
1597 count = maxcount;
1598 }
1599 else {
1600 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
1601 count = self_len + 1;
1602 }
1603
1604 /* Check for overflow */
1605 /* result_len = count * to_len + self_len; */
1606 assert(count > 0);
1607 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
1608 PyErr_SetString(PyExc_OverflowError,
1609 "replace bytes is too long");
1610 return NULL;
1611 }
1612 result_len = count * to_len + self_len;
1613 if (! (result = (PyByteArrayObject *)
1614 PyByteArray_FromStringAndSize(NULL, result_len)) )
1615 return NULL;
1616
1617 self_s = PyByteArray_AS_STRING(self);
1618 result_s = PyByteArray_AS_STRING(result);
1619
1620 /* TODO: special case single character, which doesn't need memcpy */
1621
1622 /* Lay the first one down (guaranteed this will occur) */
1623 Py_MEMCPY(result_s, to_s, to_len);
1624 result_s += to_len;
1625 count -= 1;
1626
1627 for (i=0; i<count; i++) {
1628 *result_s++ = *self_s++;
1629 Py_MEMCPY(result_s, to_s, to_len);
1630 result_s += to_len;
1631 }
1632
1633 /* Copy the rest of the original string */
1634 Py_MEMCPY(result_s, self_s, self_len-i);
1635
1636 return result;
1637 }
1638
1639 /* Special case for deleting a single character */
1640 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
1641 Py_LOCAL(PyByteArrayObject *)
replace_delete_single_character(PyByteArrayObject * self,char from_c,Py_ssize_t maxcount)1642 replace_delete_single_character(PyByteArrayObject *self,
1643 char from_c, Py_ssize_t maxcount)
1644 {
1645 char *self_s, *result_s;
1646 char *start, *next, *end;
1647 Py_ssize_t self_len, result_len;
1648 Py_ssize_t count;
1649 PyByteArrayObject *result;
1650
1651 self_len = PyByteArray_GET_SIZE(self);
1652 self_s = PyByteArray_AS_STRING(self);
1653
1654 count = countchar(self_s, self_len, from_c, maxcount);
1655 if (count == 0) {
1656 return return_self(self);
1657 }
1658
1659 result_len = self_len - count; /* from_len == 1 */
1660 assert(result_len>=0);
1661
1662 if ( (result = (PyByteArrayObject *)
1663 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1664 return NULL;
1665 result_s = PyByteArray_AS_STRING(result);
1666
1667 start = self_s;
1668 end = self_s + self_len;
1669 while (count-- > 0) {
1670 next = findchar(start, end-start, from_c);
1671 if (next == NULL)
1672 break;
1673 Py_MEMCPY(result_s, start, next-start);
1674 result_s += (next-start);
1675 start = next+1;
1676 }
1677 Py_MEMCPY(result_s, start, end-start);
1678
1679 return result;
1680 }
1681
1682 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
1683
1684 Py_LOCAL(PyByteArrayObject *)
replace_delete_substring(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)1685 replace_delete_substring(PyByteArrayObject *self,
1686 const char *from_s, Py_ssize_t from_len,
1687 Py_ssize_t maxcount)
1688 {
1689 char *self_s, *result_s;
1690 char *start, *next, *end;
1691 Py_ssize_t self_len, result_len;
1692 Py_ssize_t count, offset;
1693 PyByteArrayObject *result;
1694
1695 self_len = PyByteArray_GET_SIZE(self);
1696 self_s = PyByteArray_AS_STRING(self);
1697
1698 count = stringlib_count(self_s, self_len,
1699 from_s, from_len,
1700 maxcount);
1701
1702 if (count == 0) {
1703 /* no matches */
1704 return return_self(self);
1705 }
1706
1707 result_len = self_len - (count * from_len);
1708 assert (result_len>=0);
1709
1710 if ( (result = (PyByteArrayObject *)
1711 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL )
1712 return NULL;
1713
1714 result_s = PyByteArray_AS_STRING(result);
1715
1716 start = self_s;
1717 end = self_s + self_len;
1718 while (count-- > 0) {
1719 offset = stringlib_find(start, end-start,
1720 from_s, from_len,
1721 0);
1722 if (offset == -1)
1723 break;
1724 next = start + offset;
1725
1726 Py_MEMCPY(result_s, start, next-start);
1727
1728 result_s += (next-start);
1729 start = next+from_len;
1730 }
1731 Py_MEMCPY(result_s, start, end-start);
1732 return result;
1733 }
1734
1735 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
1736 Py_LOCAL(PyByteArrayObject *)
replace_single_character_in_place(PyByteArrayObject * self,char from_c,char to_c,Py_ssize_t maxcount)1737 replace_single_character_in_place(PyByteArrayObject *self,
1738 char from_c, char to_c,
1739 Py_ssize_t maxcount)
1740 {
1741 char *self_s, *result_s, *start, *end, *next;
1742 Py_ssize_t self_len;
1743 PyByteArrayObject *result;
1744
1745 /* The result string will be the same size */
1746 self_s = PyByteArray_AS_STRING(self);
1747 self_len = PyByteArray_GET_SIZE(self);
1748
1749 next = findchar(self_s, self_len, from_c);
1750
1751 if (next == NULL) {
1752 /* No matches; return the original bytes */
1753 return return_self(self);
1754 }
1755
1756 /* Need to make a new bytes */
1757 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1758 if (result == NULL)
1759 return NULL;
1760 result_s = PyByteArray_AS_STRING(result);
1761 Py_MEMCPY(result_s, self_s, self_len);
1762
1763 /* change everything in-place, starting with this one */
1764 start = result_s + (next-self_s);
1765 *start = to_c;
1766 start++;
1767 end = result_s + self_len;
1768
1769 while (--maxcount > 0) {
1770 next = findchar(start, end-start, from_c);
1771 if (next == NULL)
1772 break;
1773 *next = to_c;
1774 start = next+1;
1775 }
1776
1777 return result;
1778 }
1779
1780 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
1781 Py_LOCAL(PyByteArrayObject *)
replace_substring_in_place(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1782 replace_substring_in_place(PyByteArrayObject *self,
1783 const char *from_s, Py_ssize_t from_len,
1784 const char *to_s, Py_ssize_t to_len,
1785 Py_ssize_t maxcount)
1786 {
1787 char *result_s, *start, *end;
1788 char *self_s;
1789 Py_ssize_t self_len, offset;
1790 PyByteArrayObject *result;
1791
1792 /* The result bytes will be the same size */
1793
1794 self_s = PyByteArray_AS_STRING(self);
1795 self_len = PyByteArray_GET_SIZE(self);
1796
1797 offset = stringlib_find(self_s, self_len,
1798 from_s, from_len,
1799 0);
1800 if (offset == -1) {
1801 /* No matches; return the original bytes */
1802 return return_self(self);
1803 }
1804
1805 /* Need to make a new bytes */
1806 result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len);
1807 if (result == NULL)
1808 return NULL;
1809 result_s = PyByteArray_AS_STRING(result);
1810 Py_MEMCPY(result_s, self_s, self_len);
1811
1812 /* change everything in-place, starting with this one */
1813 start = result_s + offset;
1814 Py_MEMCPY(start, to_s, from_len);
1815 start += from_len;
1816 end = result_s + self_len;
1817
1818 while ( --maxcount > 0) {
1819 offset = stringlib_find(start, end-start,
1820 from_s, from_len,
1821 0);
1822 if (offset==-1)
1823 break;
1824 Py_MEMCPY(start+offset, to_s, from_len);
1825 start += offset+from_len;
1826 }
1827
1828 return result;
1829 }
1830
1831 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
1832 Py_LOCAL(PyByteArrayObject *)
replace_single_character(PyByteArrayObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1833 replace_single_character(PyByteArrayObject *self,
1834 char from_c,
1835 const char *to_s, Py_ssize_t to_len,
1836 Py_ssize_t maxcount)
1837 {
1838 char *self_s, *result_s;
1839 char *start, *next, *end;
1840 Py_ssize_t self_len, result_len;
1841 Py_ssize_t count;
1842 PyByteArrayObject *result;
1843
1844 self_s = PyByteArray_AS_STRING(self);
1845 self_len = PyByteArray_GET_SIZE(self);
1846
1847 count = countchar(self_s, self_len, from_c, maxcount);
1848 if (count == 0) {
1849 /* no matches, return unchanged */
1850 return return_self(self);
1851 }
1852
1853 /* use the difference between current and new, hence the "-1" */
1854 /* result_len = self_len + count * (to_len-1) */
1855 assert(count > 0);
1856 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
1857 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1858 return NULL;
1859 }
1860 result_len = self_len + count * (to_len - 1);
1861
1862 if ( (result = (PyByteArrayObject *)
1863 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1864 return NULL;
1865 result_s = PyByteArray_AS_STRING(result);
1866
1867 start = self_s;
1868 end = self_s + self_len;
1869 while (count-- > 0) {
1870 next = findchar(start, end-start, from_c);
1871 if (next == NULL)
1872 break;
1873
1874 if (next == start) {
1875 /* replace with the 'to' */
1876 Py_MEMCPY(result_s, to_s, to_len);
1877 result_s += to_len;
1878 start += 1;
1879 } else {
1880 /* copy the unchanged old then the 'to' */
1881 Py_MEMCPY(result_s, start, next-start);
1882 result_s += (next-start);
1883 Py_MEMCPY(result_s, to_s, to_len);
1884 result_s += to_len;
1885 start = next+1;
1886 }
1887 }
1888 /* Copy the remainder of the remaining bytes */
1889 Py_MEMCPY(result_s, start, end-start);
1890
1891 return result;
1892 }
1893
1894 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
1895 Py_LOCAL(PyByteArrayObject *)
replace_substring(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1896 replace_substring(PyByteArrayObject *self,
1897 const char *from_s, Py_ssize_t from_len,
1898 const char *to_s, Py_ssize_t to_len,
1899 Py_ssize_t maxcount)
1900 {
1901 char *self_s, *result_s;
1902 char *start, *next, *end;
1903 Py_ssize_t self_len, result_len;
1904 Py_ssize_t count, offset;
1905 PyByteArrayObject *result;
1906
1907 self_s = PyByteArray_AS_STRING(self);
1908 self_len = PyByteArray_GET_SIZE(self);
1909
1910 count = stringlib_count(self_s, self_len,
1911 from_s, from_len,
1912 maxcount);
1913
1914 if (count == 0) {
1915 /* no matches, return unchanged */
1916 return return_self(self);
1917 }
1918
1919 /* Check for overflow */
1920 /* result_len = self_len + count * (to_len-from_len) */
1921 assert(count > 0);
1922 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
1923 PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
1924 return NULL;
1925 }
1926 result_len = self_len + count * (to_len - from_len);
1927
1928 if ( (result = (PyByteArrayObject *)
1929 PyByteArray_FromStringAndSize(NULL, result_len)) == NULL)
1930 return NULL;
1931 result_s = PyByteArray_AS_STRING(result);
1932
1933 start = self_s;
1934 end = self_s + self_len;
1935 while (count-- > 0) {
1936 offset = stringlib_find(start, end-start,
1937 from_s, from_len,
1938 0);
1939 if (offset == -1)
1940 break;
1941 next = start+offset;
1942 if (next == start) {
1943 /* replace with the 'to' */
1944 Py_MEMCPY(result_s, to_s, to_len);
1945 result_s += to_len;
1946 start += from_len;
1947 } else {
1948 /* copy the unchanged old then the 'to' */
1949 Py_MEMCPY(result_s, start, next-start);
1950 result_s += (next-start);
1951 Py_MEMCPY(result_s, to_s, to_len);
1952 result_s += to_len;
1953 start = next+from_len;
1954 }
1955 }
1956 /* Copy the remainder of the remaining bytes */
1957 Py_MEMCPY(result_s, start, end-start);
1958
1959 return result;
1960 }
1961
1962
1963 Py_LOCAL(PyByteArrayObject *)
replace(PyByteArrayObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)1964 replace(PyByteArrayObject *self,
1965 const char *from_s, Py_ssize_t from_len,
1966 const char *to_s, Py_ssize_t to_len,
1967 Py_ssize_t maxcount)
1968 {
1969 if (maxcount < 0) {
1970 maxcount = PY_SSIZE_T_MAX;
1971 } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) {
1972 /* nothing to do; return the original bytes */
1973 return return_self(self);
1974 }
1975
1976 if (maxcount == 0 ||
1977 (from_len == 0 && to_len == 0)) {
1978 /* nothing to do; return the original bytes */
1979 return return_self(self);
1980 }
1981
1982 /* Handle zero-length special cases */
1983
1984 if (from_len == 0) {
1985 /* insert the 'to' bytes everywhere. */
1986 /* >>> "Python".replace("", ".") */
1987 /* '.P.y.t.h.o.n.' */
1988 return replace_interleave(self, to_s, to_len, maxcount);
1989 }
1990
1991 /* Except for "".replace("", "A") == "A" there is no way beyond this */
1992 /* point for an empty self bytes to generate a non-empty bytes */
1993 /* Special case so the remaining code always gets a non-empty bytes */
1994 if (PyByteArray_GET_SIZE(self) == 0) {
1995 return return_self(self);
1996 }
1997
1998 if (to_len == 0) {
1999 /* delete all occurrences of 'from' bytes */
2000 if (from_len == 1) {
2001 return replace_delete_single_character(
2002 self, from_s[0], maxcount);
2003 } else {
2004 return replace_delete_substring(self, from_s, from_len, maxcount);
2005 }
2006 }
2007
2008 /* Handle special case where both bytes have the same length */
2009
2010 if (from_len == to_len) {
2011 if (from_len == 1) {
2012 return replace_single_character_in_place(
2013 self,
2014 from_s[0],
2015 to_s[0],
2016 maxcount);
2017 } else {
2018 return replace_substring_in_place(
2019 self, from_s, from_len, to_s, to_len, maxcount);
2020 }
2021 }
2022
2023 /* Otherwise use the more generic algorithms */
2024 if (from_len == 1) {
2025 return replace_single_character(self, from_s[0],
2026 to_s, to_len, maxcount);
2027 } else {
2028 /* len('from')>=2, len('to')>=1 */
2029 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2030 }
2031 }
2032
2033
2034 PyDoc_STRVAR(replace__doc__,
2035 "B.replace(old, new[, count]) -> bytes\n\
2036 \n\
2037 Return a copy of B with all occurrences of subsection\n\
2038 old replaced by new. If the optional argument count is\n\
2039 given, only the first count occurrences are replaced.");
2040
2041 static PyObject *
bytearray_replace(PyByteArrayObject * self,PyObject * args)2042 bytearray_replace(PyByteArrayObject *self, PyObject *args)
2043 {
2044 Py_ssize_t count = -1;
2045 PyObject *from, *to, *res;
2046 Py_buffer vfrom, vto;
2047
2048 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2049 return NULL;
2050
2051 if (_getbuffer(from, &vfrom) < 0)
2052 return NULL;
2053 if (_getbuffer(to, &vto) < 0) {
2054 PyBuffer_Release(&vfrom);
2055 return NULL;
2056 }
2057
2058 res = (PyObject *)replace((PyByteArrayObject *) self,
2059 vfrom.buf, vfrom.len,
2060 vto.buf, vto.len, count);
2061
2062 PyBuffer_Release(&vfrom);
2063 PyBuffer_Release(&vto);
2064 return res;
2065 }
2066
2067 PyDoc_STRVAR(split__doc__,
2068 "B.split([sep[, maxsplit]]) -> list of bytearray\n\
2069 \n\
2070 Return a list of the sections in B, using sep as the delimiter.\n\
2071 If sep is not given, B is split on ASCII whitespace characters\n\
2072 (space, tab, return, newline, formfeed, vertical tab).\n\
2073 If maxsplit is given, at most maxsplit splits are done.");
2074
2075 static PyObject *
bytearray_split(PyByteArrayObject * self,PyObject * args)2076 bytearray_split(PyByteArrayObject *self, PyObject *args)
2077 {
2078 Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
2079 Py_ssize_t maxsplit = -1;
2080 const char *s = PyByteArray_AS_STRING(self), *sub;
2081 PyObject *list, *subobj = Py_None;
2082 Py_buffer vsub;
2083
2084 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
2085 return NULL;
2086 if (maxsplit < 0)
2087 maxsplit = PY_SSIZE_T_MAX;
2088
2089 if (subobj == Py_None)
2090 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
2091
2092 if (_getbuffer(subobj, &vsub) < 0)
2093 return NULL;
2094 sub = vsub.buf;
2095 n = vsub.len;
2096
2097 list = stringlib_split(
2098 (PyObject*) self, s, len, sub, n, maxsplit
2099 );
2100 PyBuffer_Release(&vsub);
2101 return list;
2102 }
2103
2104 PyDoc_STRVAR(partition__doc__,
2105 "B.partition(sep) -> (head, sep, tail)\n\
2106 \n\
2107 Searches for the separator sep in B, and returns the part before it,\n\
2108 the separator itself, and the part after it. If the separator is not\n\
2109 found, returns B and two empty bytearray objects.");
2110
2111 static PyObject *
bytearray_partition(PyByteArrayObject * self,PyObject * sep_obj)2112 bytearray_partition(PyByteArrayObject *self, PyObject *sep_obj)
2113 {
2114 PyObject *bytesep, *result;
2115
2116 bytesep = _PyByteArray_FromBufferObject(sep_obj);
2117 if (! bytesep)
2118 return NULL;
2119
2120 result = stringlib_partition(
2121 (PyObject*) self,
2122 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2123 bytesep,
2124 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2125 );
2126
2127 Py_DECREF(bytesep);
2128 return result;
2129 }
2130
2131 PyDoc_STRVAR(rpartition__doc__,
2132 "B.rpartition(sep) -> (head, sep, tail)\n\
2133 \n\
2134 Searches for the separator sep in B, starting at the end of B,\n\
2135 and returns the part before it, the separator itself, and the\n\
2136 part after it. If the separator is not found, returns two empty\n\
2137 bytearray objects and B.");
2138
2139 static PyObject *
bytearray_rpartition(PyByteArrayObject * self,PyObject * sep_obj)2140 bytearray_rpartition(PyByteArrayObject *self, PyObject *sep_obj)
2141 {
2142 PyObject *bytesep, *result;
2143
2144 bytesep = _PyByteArray_FromBufferObject(sep_obj);
2145 if (! bytesep)
2146 return NULL;
2147
2148 result = stringlib_rpartition(
2149 (PyObject*) self,
2150 PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
2151 bytesep,
2152 PyByteArray_AS_STRING(bytesep), PyByteArray_GET_SIZE(bytesep)
2153 );
2154
2155 Py_DECREF(bytesep);
2156 return result;
2157 }
2158
2159 PyDoc_STRVAR(rsplit__doc__,
2160 "B.rsplit(sep[, maxsplit]) -> list of bytearray\n\
2161 \n\
2162 Return a list of the sections in B, using sep as the delimiter,\n\
2163 starting at the end of B and working to the front.\n\
2164 If sep is not given, B is split on ASCII whitespace characters\n\
2165 (space, tab, return, newline, formfeed, vertical tab).\n\
2166 If maxsplit is given, at most maxsplit splits are done.");
2167
2168 static PyObject *
bytearray_rsplit(PyByteArrayObject * self,PyObject * args)2169 bytearray_rsplit(PyByteArrayObject *self, PyObject *args)
2170 {
2171 Py_ssize_t len = PyByteArray_GET_SIZE(self), n;
2172 Py_ssize_t maxsplit = -1;
2173 const char *s = PyByteArray_AS_STRING(self), *sub;
2174 PyObject *list, *subobj = Py_None;
2175 Py_buffer vsub;
2176
2177 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
2178 return NULL;
2179 if (maxsplit < 0)
2180 maxsplit = PY_SSIZE_T_MAX;
2181
2182 if (subobj == Py_None)
2183 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
2184
2185 if (_getbuffer(subobj, &vsub) < 0)
2186 return NULL;
2187 sub = vsub.buf;
2188 n = vsub.len;
2189
2190 list = stringlib_rsplit(
2191 (PyObject*) self, s, len, sub, n, maxsplit
2192 );
2193 PyBuffer_Release(&vsub);
2194 return list;
2195 }
2196
2197 PyDoc_STRVAR(reverse__doc__,
2198 "B.reverse() -> None\n\
2199 \n\
2200 Reverse the order of the values in B in place.");
2201 static PyObject *
bytearray_reverse(PyByteArrayObject * self,PyObject * unused)2202 bytearray_reverse(PyByteArrayObject *self, PyObject *unused)
2203 {
2204 char swap, *head, *tail;
2205 Py_ssize_t i, j, n = Py_SIZE(self);
2206
2207 j = n / 2;
2208 head = self->ob_bytes;
2209 tail = head + n - 1;
2210 for (i = 0; i < j; i++) {
2211 swap = *head;
2212 *head++ = *tail;
2213 *tail-- = swap;
2214 }
2215
2216 Py_RETURN_NONE;
2217 }
2218
2219 PyDoc_STRVAR(insert__doc__,
2220 "B.insert(index, int) -> None\n\
2221 \n\
2222 Insert a single item into the bytearray before the given index.");
2223 static PyObject *
bytearray_insert(PyByteArrayObject * self,PyObject * args)2224 bytearray_insert(PyByteArrayObject *self, PyObject *args)
2225 {
2226 PyObject *value;
2227 int ival;
2228 Py_ssize_t where, n = Py_SIZE(self);
2229
2230 if (!PyArg_ParseTuple(args, "nO:insert", &where, &value))
2231 return NULL;
2232
2233 if (n == PY_SSIZE_T_MAX) {
2234 PyErr_SetString(PyExc_OverflowError,
2235 "cannot add more objects to bytearray");
2236 return NULL;
2237 }
2238 if (!_getbytevalue(value, &ival))
2239 return NULL;
2240 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2241 return NULL;
2242
2243 if (where < 0) {
2244 where += n;
2245 if (where < 0)
2246 where = 0;
2247 }
2248 if (where > n)
2249 where = n;
2250 memmove(self->ob_bytes + where + 1, self->ob_bytes + where, n - where);
2251 self->ob_bytes[where] = ival;
2252
2253 Py_RETURN_NONE;
2254 }
2255
2256 PyDoc_STRVAR(append__doc__,
2257 "B.append(int) -> None\n\
2258 \n\
2259 Append a single item to the end of B.");
2260 static PyObject *
bytearray_append(PyByteArrayObject * self,PyObject * arg)2261 bytearray_append(PyByteArrayObject *self, PyObject *arg)
2262 {
2263 int value;
2264 Py_ssize_t n = Py_SIZE(self);
2265
2266 if (! _getbytevalue(arg, &value))
2267 return NULL;
2268 if (n == PY_SSIZE_T_MAX) {
2269 PyErr_SetString(PyExc_OverflowError,
2270 "cannot add more objects to bytearray");
2271 return NULL;
2272 }
2273 if (PyByteArray_Resize((PyObject *)self, n + 1) < 0)
2274 return NULL;
2275
2276 self->ob_bytes[n] = value;
2277
2278 Py_RETURN_NONE;
2279 }
2280
2281 PyDoc_STRVAR(extend__doc__,
2282 "B.extend(iterable int) -> None\n\
2283 \n\
2284 Append all the elements from the iterator or sequence to the\n\
2285 end of B.");
2286 static PyObject *
bytearray_extend(PyByteArrayObject * self,PyObject * arg)2287 bytearray_extend(PyByteArrayObject *self, PyObject *arg)
2288 {
2289 PyObject *it, *item, *bytearray_obj;
2290 Py_ssize_t buf_size = 0, len = 0;
2291 int value;
2292 char *buf;
2293
2294 /* bytearray_setslice code only accepts something supporting PEP 3118. */
2295 if (PyObject_CheckBuffer(arg)) {
2296 if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), arg) == -1)
2297 return NULL;
2298
2299 Py_RETURN_NONE;
2300 }
2301
2302 it = PyObject_GetIter(arg);
2303 if (it == NULL)
2304 return NULL;
2305
2306 /* Try to determine the length of the argument. 32 is arbitrary. */
2307 buf_size = _PyObject_LengthHint(arg, 32);
2308 if (buf_size == -1) {
2309 Py_DECREF(it);
2310 return NULL;
2311 }
2312
2313 bytearray_obj = PyByteArray_FromStringAndSize(NULL, buf_size);
2314 if (bytearray_obj == NULL) {
2315 Py_DECREF(it);
2316 return NULL;
2317 }
2318 buf = PyByteArray_AS_STRING(bytearray_obj);
2319
2320 while ((item = PyIter_Next(it)) != NULL) {
2321 if (! _getbytevalue(item, &value)) {
2322 Py_DECREF(item);
2323 Py_DECREF(it);
2324 Py_DECREF(bytearray_obj);
2325 return NULL;
2326 }
2327 buf[len++] = value;
2328 Py_DECREF(item);
2329
2330 if (len >= buf_size) {
2331 Py_ssize_t addition;
2332 if (len == PY_SSIZE_T_MAX) {
2333 Py_DECREF(it);
2334 Py_DECREF(bytearray_obj);
2335 return PyErr_NoMemory();
2336 }
2337 addition = len >> 1;
2338 if (addition > PY_SSIZE_T_MAX - len - 1)
2339 buf_size = PY_SSIZE_T_MAX;
2340 else
2341 buf_size = len + addition + 1;
2342 if (PyByteArray_Resize((PyObject *)bytearray_obj, buf_size) < 0) {
2343 Py_DECREF(it);
2344 Py_DECREF(bytearray_obj);
2345 return NULL;
2346 }
2347 /* Recompute the `buf' pointer, since the resizing operation may
2348 have invalidated it. */
2349 buf = PyByteArray_AS_STRING(bytearray_obj);
2350 }
2351 }
2352 Py_DECREF(it);
2353
2354 /* Resize down to exact size. */
2355 if (PyByteArray_Resize((PyObject *)bytearray_obj, len) < 0) {
2356 Py_DECREF(bytearray_obj);
2357 return NULL;
2358 }
2359
2360 if (bytearray_setslice(self, Py_SIZE(self), Py_SIZE(self), bytearray_obj) == -1) {
2361 Py_DECREF(bytearray_obj);
2362 return NULL;
2363 }
2364 Py_DECREF(bytearray_obj);
2365
2366 Py_RETURN_NONE;
2367 }
2368
2369 PyDoc_STRVAR(pop__doc__,
2370 "B.pop([index]) -> int\n\
2371 \n\
2372 Remove and return a single item from B. If no index\n\
2373 argument is given, will pop the last value.");
2374 static PyObject *
bytearray_pop(PyByteArrayObject * self,PyObject * args)2375 bytearray_pop(PyByteArrayObject *self, PyObject *args)
2376 {
2377 int value;
2378 Py_ssize_t where = -1, n = Py_SIZE(self);
2379
2380 if (!PyArg_ParseTuple(args, "|n:pop", &where))
2381 return NULL;
2382
2383 if (n == 0) {
2384 PyErr_SetString(PyExc_IndexError,
2385 "pop from empty bytearray");
2386 return NULL;
2387 }
2388 if (where < 0)
2389 where += Py_SIZE(self);
2390 if (where < 0 || where >= Py_SIZE(self)) {
2391 PyErr_SetString(PyExc_IndexError, "pop index out of range");
2392 return NULL;
2393 }
2394 if (!_canresize(self))
2395 return NULL;
2396
2397 value = self->ob_bytes[where];
2398 memmove(self->ob_bytes + where, self->ob_bytes + where + 1, n - where);
2399 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2400 return NULL;
2401
2402 return PyInt_FromLong((unsigned char)value);
2403 }
2404
2405 PyDoc_STRVAR(remove__doc__,
2406 "B.remove(int) -> None\n\
2407 \n\
2408 Remove the first occurrence of a value in B.");
2409 static PyObject *
bytearray_remove(PyByteArrayObject * self,PyObject * arg)2410 bytearray_remove(PyByteArrayObject *self, PyObject *arg)
2411 {
2412 int value;
2413 Py_ssize_t n = Py_SIZE(self);
2414 char *where;
2415
2416 if (! _getbytevalue(arg, &value))
2417 return NULL;
2418
2419 where = memchr(self->ob_bytes, value, n);
2420 if (!where) {
2421 PyErr_SetString(PyExc_ValueError, "value not found in bytearray");
2422 return NULL;
2423 }
2424 if (!_canresize(self))
2425 return NULL;
2426
2427 memmove(where, where + 1, self->ob_bytes + n - where);
2428 if (PyByteArray_Resize((PyObject *)self, n - 1) < 0)
2429 return NULL;
2430
2431 Py_RETURN_NONE;
2432 }
2433
2434 /* XXX These two helpers could be optimized if argsize == 1 */
2435
2436 static Py_ssize_t
lstrip_helper(unsigned char * myptr,Py_ssize_t mysize,void * argptr,Py_ssize_t argsize)2437 lstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2438 void *argptr, Py_ssize_t argsize)
2439 {
2440 Py_ssize_t i = 0;
2441 while (i < mysize && memchr(argptr, myptr[i], argsize))
2442 i++;
2443 return i;
2444 }
2445
2446 static Py_ssize_t
rstrip_helper(unsigned char * myptr,Py_ssize_t mysize,void * argptr,Py_ssize_t argsize)2447 rstrip_helper(unsigned char *myptr, Py_ssize_t mysize,
2448 void *argptr, Py_ssize_t argsize)
2449 {
2450 Py_ssize_t i = mysize - 1;
2451 while (i >= 0 && memchr(argptr, myptr[i], argsize))
2452 i--;
2453 return i + 1;
2454 }
2455
2456 PyDoc_STRVAR(strip__doc__,
2457 "B.strip([bytes]) -> bytearray\n\
2458 \n\
2459 Strip leading and trailing bytes contained in the argument.\n\
2460 If the argument is omitted, strip ASCII whitespace.");
2461 static PyObject *
bytearray_strip(PyByteArrayObject * self,PyObject * args)2462 bytearray_strip(PyByteArrayObject *self, PyObject *args)
2463 {
2464 Py_ssize_t left, right, mysize, argsize;
2465 void *myptr, *argptr;
2466 PyObject *arg = Py_None;
2467 Py_buffer varg;
2468 if (!PyArg_ParseTuple(args, "|O:strip", &arg))
2469 return NULL;
2470 if (arg == Py_None) {
2471 argptr = "\t\n\r\f\v ";
2472 argsize = 6;
2473 }
2474 else {
2475 if (_getbuffer(arg, &varg) < 0)
2476 return NULL;
2477 argptr = varg.buf;
2478 argsize = varg.len;
2479 }
2480 myptr = self->ob_bytes;
2481 mysize = Py_SIZE(self);
2482 left = lstrip_helper(myptr, mysize, argptr, argsize);
2483 if (left == mysize)
2484 right = left;
2485 else
2486 right = rstrip_helper(myptr, mysize, argptr, argsize);
2487 if (arg != Py_None)
2488 PyBuffer_Release(&varg);
2489 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2490 }
2491
2492 PyDoc_STRVAR(lstrip__doc__,
2493 "B.lstrip([bytes]) -> bytearray\n\
2494 \n\
2495 Strip leading bytes contained in the argument.\n\
2496 If the argument is omitted, strip leading ASCII whitespace.");
2497 static PyObject *
bytearray_lstrip(PyByteArrayObject * self,PyObject * args)2498 bytearray_lstrip(PyByteArrayObject *self, PyObject *args)
2499 {
2500 Py_ssize_t left, right, mysize, argsize;
2501 void *myptr, *argptr;
2502 PyObject *arg = Py_None;
2503 Py_buffer varg;
2504 if (!PyArg_ParseTuple(args, "|O:lstrip", &arg))
2505 return NULL;
2506 if (arg == Py_None) {
2507 argptr = "\t\n\r\f\v ";
2508 argsize = 6;
2509 }
2510 else {
2511 if (_getbuffer(arg, &varg) < 0)
2512 return NULL;
2513 argptr = varg.buf;
2514 argsize = varg.len;
2515 }
2516 myptr = self->ob_bytes;
2517 mysize = Py_SIZE(self);
2518 left = lstrip_helper(myptr, mysize, argptr, argsize);
2519 right = mysize;
2520 if (arg != Py_None)
2521 PyBuffer_Release(&varg);
2522 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2523 }
2524
2525 PyDoc_STRVAR(rstrip__doc__,
2526 "B.rstrip([bytes]) -> bytearray\n\
2527 \n\
2528 Strip trailing bytes contained in the argument.\n\
2529 If the argument is omitted, strip trailing ASCII whitespace.");
2530 static PyObject *
bytearray_rstrip(PyByteArrayObject * self,PyObject * args)2531 bytearray_rstrip(PyByteArrayObject *self, PyObject *args)
2532 {
2533 Py_ssize_t left, right, mysize, argsize;
2534 void *myptr, *argptr;
2535 PyObject *arg = Py_None;
2536 Py_buffer varg;
2537 if (!PyArg_ParseTuple(args, "|O:rstrip", &arg))
2538 return NULL;
2539 if (arg == Py_None) {
2540 argptr = "\t\n\r\f\v ";
2541 argsize = 6;
2542 }
2543 else {
2544 if (_getbuffer(arg, &varg) < 0)
2545 return NULL;
2546 argptr = varg.buf;
2547 argsize = varg.len;
2548 }
2549 myptr = self->ob_bytes;
2550 mysize = Py_SIZE(self);
2551 left = 0;
2552 right = rstrip_helper(myptr, mysize, argptr, argsize);
2553 if (arg != Py_None)
2554 PyBuffer_Release(&varg);
2555 return PyByteArray_FromStringAndSize(self->ob_bytes + left, right - left);
2556 }
2557
2558 PyDoc_STRVAR(decode_doc,
2559 "B.decode([encoding[, errors]]) -> unicode object.\n\
2560 \n\
2561 Decodes B using the codec registered for encoding. encoding defaults\n\
2562 to the default encoding. errors may be given to set a different error\n\
2563 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2564 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2565 as well as any other name registered with codecs.register_error that is\n\
2566 able to handle UnicodeDecodeErrors.");
2567
2568 static PyObject *
bytearray_decode(PyObject * self,PyObject * args,PyObject * kwargs)2569 bytearray_decode(PyObject *self, PyObject *args, PyObject *kwargs)
2570 {
2571 const char *encoding = NULL;
2572 const char *errors = NULL;
2573 static char *kwlist[] = {"encoding", "errors", 0};
2574
2575 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", kwlist, &encoding, &errors))
2576 return NULL;
2577 if (encoding == NULL) {
2578 #ifdef Py_USING_UNICODE
2579 encoding = PyUnicode_GetDefaultEncoding();
2580 #else
2581 PyErr_SetString(PyExc_ValueError, "no encoding specified");
2582 return NULL;
2583 #endif
2584 }
2585 return _PyCodec_DecodeText(self, encoding, errors);
2586 }
2587
2588 PyDoc_STRVAR(alloc_doc,
2589 "B.__alloc__() -> int\n\
2590 \n\
2591 Returns the number of bytes actually allocated.");
2592
2593 static PyObject *
bytearray_alloc(PyByteArrayObject * self)2594 bytearray_alloc(PyByteArrayObject *self)
2595 {
2596 return PyInt_FromSsize_t(self->ob_alloc);
2597 }
2598
2599 PyDoc_STRVAR(join_doc,
2600 "B.join(iterable_of_bytes) -> bytes\n\
2601 \n\
2602 Concatenates any number of bytearray objects, with B in between each pair.");
2603
2604 static PyObject *
bytearray_join(PyByteArrayObject * self,PyObject * it)2605 bytearray_join(PyByteArrayObject *self, PyObject *it)
2606 {
2607 PyObject *seq;
2608 Py_ssize_t mysize = Py_SIZE(self);
2609 Py_ssize_t i;
2610 Py_ssize_t n;
2611 PyObject **items;
2612 Py_ssize_t totalsize = 0;
2613 PyObject *result;
2614 char *dest;
2615
2616 seq = PySequence_Fast(it, "can only join an iterable");
2617 if (seq == NULL)
2618 return NULL;
2619 n = PySequence_Fast_GET_SIZE(seq);
2620 items = PySequence_Fast_ITEMS(seq);
2621
2622 /* Compute the total size, and check that they are all bytes */
2623 /* XXX Shouldn't we use _getbuffer() on these items instead? */
2624 for (i = 0; i < n; i++) {
2625 PyObject *obj = items[i];
2626 if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) {
2627 PyErr_Format(PyExc_TypeError,
2628 "can only join an iterable of bytes "
2629 "(item %ld has type '%.100s')",
2630 /* XXX %ld isn't right on Win64 */
2631 (long)i, Py_TYPE(obj)->tp_name);
2632 goto error;
2633 }
2634 if (i > 0)
2635 totalsize += mysize;
2636 totalsize += Py_SIZE(obj);
2637 if (totalsize < 0) {
2638 PyErr_NoMemory();
2639 goto error;
2640 }
2641 }
2642
2643 /* Allocate the result, and copy the bytes */
2644 result = PyByteArray_FromStringAndSize(NULL, totalsize);
2645 if (result == NULL)
2646 goto error;
2647 dest = PyByteArray_AS_STRING(result);
2648 for (i = 0; i < n; i++) {
2649 PyObject *obj = items[i];
2650 Py_ssize_t size = Py_SIZE(obj);
2651 char *buf;
2652 if (PyByteArray_Check(obj))
2653 buf = PyByteArray_AS_STRING(obj);
2654 else
2655 buf = PyBytes_AS_STRING(obj);
2656 if (i) {
2657 memcpy(dest, self->ob_bytes, mysize);
2658 dest += mysize;
2659 }
2660 memcpy(dest, buf, size);
2661 dest += size;
2662 }
2663
2664 /* Done */
2665 Py_DECREF(seq);
2666 return result;
2667
2668 /* Error handling */
2669 error:
2670 Py_DECREF(seq);
2671 return NULL;
2672 }
2673
2674 PyDoc_STRVAR(splitlines__doc__,
2675 "B.splitlines(keepends=False) -> list of lines\n\
2676 \n\
2677 Return a list of the lines in B, breaking at line boundaries.\n\
2678 Line breaks are not included in the resulting list unless keepends\n\
2679 is given and true.");
2680
2681 static PyObject*
bytearray_splitlines(PyObject * self,PyObject * args)2682 bytearray_splitlines(PyObject *self, PyObject *args)
2683 {
2684 int keepends = 0;
2685
2686 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2687 return NULL;
2688
2689 return stringlib_splitlines(
2690 (PyObject*) self, PyByteArray_AS_STRING(self),
2691 PyByteArray_GET_SIZE(self), keepends
2692 );
2693 }
2694
2695 PyDoc_STRVAR(fromhex_doc,
2696 "bytearray.fromhex(string) -> bytearray\n\
2697 \n\
2698 Create a bytearray object from a string of hexadecimal numbers.\n\
2699 Spaces between two numbers are accepted.\n\
2700 Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef').");
2701
2702 static int
hex_digit_to_int(char c)2703 hex_digit_to_int(char c)
2704 {
2705 if (Py_ISDIGIT(c))
2706 return c - '0';
2707 else {
2708 if (Py_ISUPPER(c))
2709 c = Py_TOLOWER(c);
2710 if (c >= 'a' && c <= 'f')
2711 return c - 'a' + 10;
2712 }
2713 return -1;
2714 }
2715
2716 static PyObject *
bytearray_fromhex(PyObject * cls,PyObject * args)2717 bytearray_fromhex(PyObject *cls, PyObject *args)
2718 {
2719 PyObject *newbytes;
2720 char *buf;
2721 char *hex;
2722 Py_ssize_t hexlen, byteslen, i, j;
2723 int top, bot;
2724
2725 if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &hexlen))
2726 return NULL;
2727 byteslen = hexlen/2; /* This overestimates if there are spaces */
2728 newbytes = PyByteArray_FromStringAndSize(NULL, byteslen);
2729 if (!newbytes)
2730 return NULL;
2731 buf = PyByteArray_AS_STRING(newbytes);
2732 for (i = j = 0; i < hexlen; i += 2) {
2733 /* skip over spaces in the input */
2734 while (hex[i] == ' ')
2735 i++;
2736 if (i >= hexlen)
2737 break;
2738 top = hex_digit_to_int(hex[i]);
2739 bot = hex_digit_to_int(hex[i+1]);
2740 if (top == -1 || bot == -1) {
2741 PyErr_Format(PyExc_ValueError,
2742 "non-hexadecimal number found in "
2743 "fromhex() arg at position %zd", i);
2744 goto error;
2745 }
2746 buf[j++] = (top << 4) + bot;
2747 }
2748 if (PyByteArray_Resize(newbytes, j) < 0)
2749 goto error;
2750 return newbytes;
2751
2752 error:
2753 Py_DECREF(newbytes);
2754 return NULL;
2755 }
2756
2757 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
2758
2759 static PyObject *
bytearray_reduce(PyByteArrayObject * self)2760 bytearray_reduce(PyByteArrayObject *self)
2761 {
2762 PyObject *latin1, *dict;
2763 if (self->ob_bytes)
2764 #ifdef Py_USING_UNICODE
2765 latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
2766 Py_SIZE(self), NULL);
2767 #else
2768 latin1 = PyString_FromStringAndSize(self->ob_bytes, Py_SIZE(self));
2769 #endif
2770 else
2771 #ifdef Py_USING_UNICODE
2772 latin1 = PyUnicode_FromString("");
2773 #else
2774 latin1 = PyString_FromString("");
2775 #endif
2776
2777 dict = PyObject_GetAttrString((PyObject *)self, "__dict__");
2778 if (dict == NULL) {
2779 PyErr_Clear();
2780 dict = Py_None;
2781 Py_INCREF(dict);
2782 }
2783
2784 return Py_BuildValue("(O(Ns)N)", Py_TYPE(self), latin1, "latin-1", dict);
2785 }
2786
2787 PyDoc_STRVAR(sizeof_doc,
2788 "B.__sizeof__() -> int\n\
2789 \n\
2790 Returns the size of B in memory, in bytes");
2791 static PyObject *
bytearray_sizeof(PyByteArrayObject * self)2792 bytearray_sizeof(PyByteArrayObject *self)
2793 {
2794 Py_ssize_t res;
2795
2796 res = _PyObject_SIZE(Py_TYPE(self)) + self->ob_alloc * sizeof(char);
2797 return PyInt_FromSsize_t(res);
2798 }
2799
2800 static PySequenceMethods bytearray_as_sequence = {
2801 (lenfunc)bytearray_length, /* sq_length */
2802 (binaryfunc)PyByteArray_Concat, /* sq_concat */
2803 (ssizeargfunc)bytearray_repeat, /* sq_repeat */
2804 (ssizeargfunc)bytearray_getitem, /* sq_item */
2805 0, /* sq_slice */
2806 (ssizeobjargproc)bytearray_setitem, /* sq_ass_item */
2807 0, /* sq_ass_slice */
2808 (objobjproc)bytearray_contains, /* sq_contains */
2809 (binaryfunc)bytearray_iconcat, /* sq_inplace_concat */
2810 (ssizeargfunc)bytearray_irepeat, /* sq_inplace_repeat */
2811 };
2812
2813 static PyMappingMethods bytearray_as_mapping = {
2814 (lenfunc)bytearray_length,
2815 (binaryfunc)bytearray_subscript,
2816 (objobjargproc)bytearray_ass_subscript,
2817 };
2818
2819 static PyBufferProcs bytearray_as_buffer = {
2820 (readbufferproc)bytearray_buffer_getreadbuf,
2821 (writebufferproc)bytearray_buffer_getwritebuf,
2822 (segcountproc)bytearray_buffer_getsegcount,
2823 (charbufferproc)bytearray_buffer_getcharbuf,
2824 (getbufferproc)bytearray_getbuffer,
2825 (releasebufferproc)bytearray_releasebuffer,
2826 };
2827
2828 static PyMethodDef
2829 bytearray_methods[] = {
2830 {"__alloc__", (PyCFunction)bytearray_alloc, METH_NOARGS, alloc_doc},
2831 {"__reduce__", (PyCFunction)bytearray_reduce, METH_NOARGS, reduce_doc},
2832 {"__sizeof__", (PyCFunction)bytearray_sizeof, METH_NOARGS, sizeof_doc},
2833 {"append", (PyCFunction)bytearray_append, METH_O, append__doc__},
2834 {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2835 _Py_capitalize__doc__},
2836 {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__},
2837 {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__},
2838 {"decode", (PyCFunction)bytearray_decode, METH_VARARGS | METH_KEYWORDS, decode_doc},
2839 {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__},
2840 {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS,
2841 expandtabs__doc__},
2842 {"extend", (PyCFunction)bytearray_extend, METH_O, extend__doc__},
2843 {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__},
2844 {"fromhex", (PyCFunction)bytearray_fromhex, METH_VARARGS|METH_CLASS,
2845 fromhex_doc},
2846 {"index", (PyCFunction)bytearray_index, METH_VARARGS, index__doc__},
2847 {"insert", (PyCFunction)bytearray_insert, METH_VARARGS, insert__doc__},
2848 {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2849 _Py_isalnum__doc__},
2850 {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2851 _Py_isalpha__doc__},
2852 {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2853 _Py_isdigit__doc__},
2854 {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2855 _Py_islower__doc__},
2856 {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2857 _Py_isspace__doc__},
2858 {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2859 _Py_istitle__doc__},
2860 {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2861 _Py_isupper__doc__},
2862 {"join", (PyCFunction)bytearray_join, METH_O, join_doc},
2863 {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__},
2864 {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2865 {"lstrip", (PyCFunction)bytearray_lstrip, METH_VARARGS, lstrip__doc__},
2866 {"partition", (PyCFunction)bytearray_partition, METH_O, partition__doc__},
2867 {"pop", (PyCFunction)bytearray_pop, METH_VARARGS, pop__doc__},
2868 {"remove", (PyCFunction)bytearray_remove, METH_O, remove__doc__},
2869 {"replace", (PyCFunction)bytearray_replace, METH_VARARGS, replace__doc__},
2870 {"reverse", (PyCFunction)bytearray_reverse, METH_NOARGS, reverse__doc__},
2871 {"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, rfind__doc__},
2872 {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, rindex__doc__},
2873 {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
2874 {"rpartition", (PyCFunction)bytearray_rpartition, METH_O, rpartition__doc__},
2875 {"rsplit", (PyCFunction)bytearray_rsplit, METH_VARARGS, rsplit__doc__},
2876 {"rstrip", (PyCFunction)bytearray_rstrip, METH_VARARGS, rstrip__doc__},
2877 {"split", (PyCFunction)bytearray_split, METH_VARARGS, split__doc__},
2878 {"splitlines", (PyCFunction)bytearray_splitlines, METH_VARARGS,
2879 splitlines__doc__},
2880 {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS ,
2881 startswith__doc__},
2882 {"strip", (PyCFunction)bytearray_strip, METH_VARARGS, strip__doc__},
2883 {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2884 _Py_swapcase__doc__},
2885 {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2886 {"translate", (PyCFunction)bytearray_translate, METH_VARARGS,
2887 translate__doc__},
2888 {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2889 {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
2890 {NULL}
2891 };
2892
2893 PyDoc_STRVAR(bytearray_doc,
2894 "bytearray(iterable_of_ints) -> bytearray.\n\
2895 bytearray(string, encoding[, errors]) -> bytearray.\n\
2896 bytearray(bytes_or_bytearray) -> mutable copy of bytes_or_bytearray.\n\
2897 bytearray(memory_view) -> bytearray.\n\
2898 \n\
2899 Construct a mutable bytearray object from:\n\
2900 - an iterable yielding integers in range(256)\n\
2901 - a text string encoded using the specified encoding\n\
2902 - a bytes or a bytearray object\n\
2903 - any object implementing the buffer API.\n\
2904 \n\
2905 bytearray(int) -> bytearray.\n\
2906 \n\
2907 Construct a zero-initialized bytearray of the given length.");
2908
2909
2910 static PyObject *bytearray_iter(PyObject *seq);
2911
2912 PyTypeObject PyByteArray_Type = {
2913 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2914 "bytearray",
2915 sizeof(PyByteArrayObject),
2916 0,
2917 (destructor)bytearray_dealloc, /* tp_dealloc */
2918 0, /* tp_print */
2919 0, /* tp_getattr */
2920 0, /* tp_setattr */
2921 0, /* tp_compare */
2922 (reprfunc)bytearray_repr, /* tp_repr */
2923 0, /* tp_as_number */
2924 &bytearray_as_sequence, /* tp_as_sequence */
2925 &bytearray_as_mapping, /* tp_as_mapping */
2926 0, /* tp_hash */
2927 0, /* tp_call */
2928 bytearray_str, /* tp_str */
2929 PyObject_GenericGetAttr, /* tp_getattro */
2930 0, /* tp_setattro */
2931 &bytearray_as_buffer, /* tp_as_buffer */
2932 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2933 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
2934 bytearray_doc, /* tp_doc */
2935 0, /* tp_traverse */
2936 0, /* tp_clear */
2937 (richcmpfunc)bytearray_richcompare, /* tp_richcompare */
2938 0, /* tp_weaklistoffset */
2939 bytearray_iter, /* tp_iter */
2940 0, /* tp_iternext */
2941 bytearray_methods, /* tp_methods */
2942 0, /* tp_members */
2943 0, /* tp_getset */
2944 0, /* tp_base */
2945 0, /* tp_dict */
2946 0, /* tp_descr_get */
2947 0, /* tp_descr_set */
2948 0, /* tp_dictoffset */
2949 (initproc)bytearray_init, /* tp_init */
2950 PyType_GenericAlloc, /* tp_alloc */
2951 PyType_GenericNew, /* tp_new */
2952 PyObject_Del, /* tp_free */
2953 };
2954
2955 /*********************** Bytes Iterator ****************************/
2956
2957 typedef struct {
2958 PyObject_HEAD
2959 Py_ssize_t it_index;
2960 PyByteArrayObject *it_seq; /* Set to NULL when iterator is exhausted */
2961 } bytesiterobject;
2962
2963 static void
bytearrayiter_dealloc(bytesiterobject * it)2964 bytearrayiter_dealloc(bytesiterobject *it)
2965 {
2966 _PyObject_GC_UNTRACK(it);
2967 Py_XDECREF(it->it_seq);
2968 PyObject_GC_Del(it);
2969 }
2970
2971 static int
bytearrayiter_traverse(bytesiterobject * it,visitproc visit,void * arg)2972 bytearrayiter_traverse(bytesiterobject *it, visitproc visit, void *arg)
2973 {
2974 Py_VISIT(it->it_seq);
2975 return 0;
2976 }
2977
2978 static PyObject *
bytearrayiter_next(bytesiterobject * it)2979 bytearrayiter_next(bytesiterobject *it)
2980 {
2981 PyByteArrayObject *seq;
2982 PyObject *item;
2983
2984 assert(it != NULL);
2985 seq = it->it_seq;
2986 if (seq == NULL)
2987 return NULL;
2988 assert(PyByteArray_Check(seq));
2989
2990 if (it->it_index < PyByteArray_GET_SIZE(seq)) {
2991 item = PyInt_FromLong(
2992 (unsigned char)seq->ob_bytes[it->it_index]);
2993 if (item != NULL)
2994 ++it->it_index;
2995 return item;
2996 }
2997
2998 it->it_seq = NULL;
2999 Py_DECREF(seq);
3000 return NULL;
3001 }
3002
3003 static PyObject *
bytesarrayiter_length_hint(bytesiterobject * it)3004 bytesarrayiter_length_hint(bytesiterobject *it)
3005 {
3006 Py_ssize_t len = 0;
3007 if (it->it_seq)
3008 len = PyByteArray_GET_SIZE(it->it_seq) - it->it_index;
3009 return PyInt_FromSsize_t(len);
3010 }
3011
3012 PyDoc_STRVAR(length_hint_doc,
3013 "Private method returning an estimate of len(list(it)).");
3014
3015 static PyMethodDef bytearrayiter_methods[] = {
3016 {"__length_hint__", (PyCFunction)bytesarrayiter_length_hint, METH_NOARGS,
3017 length_hint_doc},
3018 {NULL, NULL} /* sentinel */
3019 };
3020
3021 PyTypeObject PyByteArrayIter_Type = {
3022 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3023 "bytearray_iterator", /* tp_name */
3024 sizeof(bytesiterobject), /* tp_basicsize */
3025 0, /* tp_itemsize */
3026 /* methods */
3027 (destructor)bytearrayiter_dealloc, /* tp_dealloc */
3028 0, /* tp_print */
3029 0, /* tp_getattr */
3030 0, /* tp_setattr */
3031 0, /* tp_compare */
3032 0, /* tp_repr */
3033 0, /* tp_as_number */
3034 0, /* tp_as_sequence */
3035 0, /* tp_as_mapping */
3036 0, /* tp_hash */
3037 0, /* tp_call */
3038 0, /* tp_str */
3039 PyObject_GenericGetAttr, /* tp_getattro */
3040 0, /* tp_setattro */
3041 0, /* tp_as_buffer */
3042 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3043 0, /* tp_doc */
3044 (traverseproc)bytearrayiter_traverse, /* tp_traverse */
3045 0, /* tp_clear */
3046 0, /* tp_richcompare */
3047 0, /* tp_weaklistoffset */
3048 PyObject_SelfIter, /* tp_iter */
3049 (iternextfunc)bytearrayiter_next, /* tp_iternext */
3050 bytearrayiter_methods, /* tp_methods */
3051 0,
3052 };
3053
3054 static PyObject *
bytearray_iter(PyObject * seq)3055 bytearray_iter(PyObject *seq)
3056 {
3057 bytesiterobject *it;
3058
3059 if (!PyByteArray_Check(seq)) {
3060 PyErr_BadInternalCall();
3061 return NULL;
3062 }
3063 it = PyObject_GC_New(bytesiterobject, &PyByteArrayIter_Type);
3064 if (it == NULL)
3065 return NULL;
3066 it->it_index = 0;
3067 Py_INCREF(seq);
3068 it->it_seq = (PyByteArrayObject *)seq;
3069 _PyObject_GC_TRACK(it);
3070 return (PyObject *)it;
3071 }
3072