1 #define PY_SSIZE_T_CLEAN
2 #include "Python.h"
3 #include "bytes_methods.h"
4 
5 PyDoc_STRVAR_shared(_Py_isspace__doc__,
6 "B.isspace() -> bool\n\
7 \n\
8 Return True if all characters in B are whitespace\n\
9 and there is at least one character in B, False otherwise.");
10 
11 PyObject*
_Py_bytes_isspace(const char * cptr,Py_ssize_t len)12 _Py_bytes_isspace(const char *cptr, Py_ssize_t len)
13 {
14     const unsigned char *p
15         = (const unsigned char *) cptr;
16     const unsigned char *e;
17 
18     /* Shortcut for single character strings */
19     if (len == 1 && Py_ISSPACE(*p))
20         Py_RETURN_TRUE;
21 
22     /* Special case for empty strings */
23     if (len == 0)
24         Py_RETURN_FALSE;
25 
26     e = p + len;
27     for (; p < e; p++) {
28         if (!Py_ISSPACE(*p))
29             Py_RETURN_FALSE;
30     }
31     Py_RETURN_TRUE;
32 }
33 
34 
35 PyDoc_STRVAR_shared(_Py_isalpha__doc__,
36 "B.isalpha() -> bool\n\
37 \n\
38 Return True if all characters in B are alphabetic\n\
39 and there is at least one character in B, False otherwise.");
40 
41 PyObject*
_Py_bytes_isalpha(const char * cptr,Py_ssize_t len)42 _Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
43 {
44     const unsigned char *p
45         = (const unsigned char *) cptr;
46     const unsigned char *e;
47 
48     /* Shortcut for single character strings */
49     if (len == 1 && Py_ISALPHA(*p))
50         Py_RETURN_TRUE;
51 
52     /* Special case for empty strings */
53     if (len == 0)
54         Py_RETURN_FALSE;
55 
56     e = p + len;
57     for (; p < e; p++) {
58         if (!Py_ISALPHA(*p))
59             Py_RETURN_FALSE;
60     }
61     Py_RETURN_TRUE;
62 }
63 
64 
65 PyDoc_STRVAR_shared(_Py_isalnum__doc__,
66 "B.isalnum() -> bool\n\
67 \n\
68 Return True if all characters in B are alphanumeric\n\
69 and there is at least one character in B, False otherwise.");
70 
71 PyObject*
_Py_bytes_isalnum(const char * cptr,Py_ssize_t len)72 _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
73 {
74     const unsigned char *p
75         = (const unsigned char *) cptr;
76     const unsigned char *e;
77 
78     /* Shortcut for single character strings */
79     if (len == 1 && Py_ISALNUM(*p))
80         Py_RETURN_TRUE;
81 
82     /* Special case for empty strings */
83     if (len == 0)
84         Py_RETURN_FALSE;
85 
86     e = p + len;
87     for (; p < e; p++) {
88         if (!Py_ISALNUM(*p))
89             Py_RETURN_FALSE;
90     }
91     Py_RETURN_TRUE;
92 }
93 
94 
95 PyDoc_STRVAR_shared(_Py_isascii__doc__,
96 "B.isascii() -> bool\n\
97 \n\
98 Return True if B is empty or all characters in B are ASCII,\n\
99 False otherwise.");
100 
101 // Optimization is copied from ascii_decode in unicodeobject.c
102 /* Mask to quickly check whether a C 'long' contains a
103    non-ASCII, UTF8-encoded char. */
104 #if (SIZEOF_LONG == 8)
105 # define ASCII_CHAR_MASK 0x8080808080808080UL
106 #elif (SIZEOF_LONG == 4)
107 # define ASCII_CHAR_MASK 0x80808080UL
108 #else
109 # error C 'long' size should be either 4 or 8!
110 #endif
111 
112 PyObject*
_Py_bytes_isascii(const char * cptr,Py_ssize_t len)113 _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
114 {
115     const char *p = cptr;
116     const char *end = p + len;
117     const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
118 
119     while (p < end) {
120         /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
121            for an explanation. */
122         if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
123             /* Help allocation */
124             const char *_p = p;
125             while (_p < aligned_end) {
126                 unsigned long value = *(const unsigned long *) _p;
127                 if (value & ASCII_CHAR_MASK) {
128                     Py_RETURN_FALSE;
129                 }
130                 _p += SIZEOF_LONG;
131             }
132             p = _p;
133             if (_p == end)
134                 break;
135         }
136         if ((unsigned char)*p & 0x80) {
137             Py_RETURN_FALSE;
138         }
139         p++;
140     }
141     Py_RETURN_TRUE;
142 }
143 
144 #undef ASCII_CHAR_MASK
145 
146 
147 PyDoc_STRVAR_shared(_Py_isdigit__doc__,
148 "B.isdigit() -> bool\n\
149 \n\
150 Return True if all characters in B are digits\n\
151 and there is at least one character in B, False otherwise.");
152 
153 PyObject*
_Py_bytes_isdigit(const char * cptr,Py_ssize_t len)154 _Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
155 {
156     const unsigned char *p
157         = (const unsigned char *) cptr;
158     const unsigned char *e;
159 
160     /* Shortcut for single character strings */
161     if (len == 1 && Py_ISDIGIT(*p))
162         Py_RETURN_TRUE;
163 
164     /* Special case for empty strings */
165     if (len == 0)
166         Py_RETURN_FALSE;
167 
168     e = p + len;
169     for (; p < e; p++) {
170         if (!Py_ISDIGIT(*p))
171             Py_RETURN_FALSE;
172     }
173     Py_RETURN_TRUE;
174 }
175 
176 
177 PyDoc_STRVAR_shared(_Py_islower__doc__,
178 "B.islower() -> bool\n\
179 \n\
180 Return True if all cased characters in B are lowercase and there is\n\
181 at least one cased character in B, False otherwise.");
182 
183 PyObject*
_Py_bytes_islower(const char * cptr,Py_ssize_t len)184 _Py_bytes_islower(const char *cptr, Py_ssize_t len)
185 {
186     const unsigned char *p
187         = (const unsigned char *) cptr;
188     const unsigned char *e;
189     int cased;
190 
191     /* Shortcut for single character strings */
192     if (len == 1)
193         return PyBool_FromLong(Py_ISLOWER(*p));
194 
195     /* Special case for empty strings */
196     if (len == 0)
197         Py_RETURN_FALSE;
198 
199     e = p + len;
200     cased = 0;
201     for (; p < e; p++) {
202         if (Py_ISUPPER(*p))
203             Py_RETURN_FALSE;
204         else if (!cased && Py_ISLOWER(*p))
205             cased = 1;
206     }
207     return PyBool_FromLong(cased);
208 }
209 
210 
211 PyDoc_STRVAR_shared(_Py_isupper__doc__,
212 "B.isupper() -> bool\n\
213 \n\
214 Return True if all cased characters in B are uppercase and there is\n\
215 at least one cased character in B, False otherwise.");
216 
217 PyObject*
_Py_bytes_isupper(const char * cptr,Py_ssize_t len)218 _Py_bytes_isupper(const char *cptr, Py_ssize_t len)
219 {
220     const unsigned char *p
221         = (const unsigned char *) cptr;
222     const unsigned char *e;
223     int cased;
224 
225     /* Shortcut for single character strings */
226     if (len == 1)
227         return PyBool_FromLong(Py_ISUPPER(*p));
228 
229     /* Special case for empty strings */
230     if (len == 0)
231         Py_RETURN_FALSE;
232 
233     e = p + len;
234     cased = 0;
235     for (; p < e; p++) {
236         if (Py_ISLOWER(*p))
237             Py_RETURN_FALSE;
238         else if (!cased && Py_ISUPPER(*p))
239             cased = 1;
240     }
241     return PyBool_FromLong(cased);
242 }
243 
244 
245 PyDoc_STRVAR_shared(_Py_istitle__doc__,
246 "B.istitle() -> bool\n\
247 \n\
248 Return True if B is a titlecased string and there is at least one\n\
249 character in B, i.e. uppercase characters may only follow uncased\n\
250 characters and lowercase characters only cased ones. Return False\n\
251 otherwise.");
252 
253 PyObject*
_Py_bytes_istitle(const char * cptr,Py_ssize_t len)254 _Py_bytes_istitle(const char *cptr, Py_ssize_t len)
255 {
256     const unsigned char *p
257         = (const unsigned char *) cptr;
258     const unsigned char *e;
259     int cased, previous_is_cased;
260 
261     /* Shortcut for single character strings */
262     if (len == 1)
263         return PyBool_FromLong(Py_ISUPPER(*p));
264 
265     /* Special case for empty strings */
266     if (len == 0)
267         Py_RETURN_FALSE;
268 
269     e = p + len;
270     cased = 0;
271     previous_is_cased = 0;
272     for (; p < e; p++) {
273         const unsigned char ch = *p;
274 
275         if (Py_ISUPPER(ch)) {
276             if (previous_is_cased)
277                 Py_RETURN_FALSE;
278             previous_is_cased = 1;
279             cased = 1;
280         }
281         else if (Py_ISLOWER(ch)) {
282             if (!previous_is_cased)
283                 Py_RETURN_FALSE;
284             previous_is_cased = 1;
285             cased = 1;
286         }
287         else
288             previous_is_cased = 0;
289     }
290     return PyBool_FromLong(cased);
291 }
292 
293 
294 PyDoc_STRVAR_shared(_Py_lower__doc__,
295 "B.lower() -> copy of B\n\
296 \n\
297 Return a copy of B with all ASCII characters converted to lowercase.");
298 
299 void
_Py_bytes_lower(char * result,const char * cptr,Py_ssize_t len)300 _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
301 {
302     Py_ssize_t i;
303 
304     for (i = 0; i < len; i++) {
305         result[i] = Py_TOLOWER((unsigned char) cptr[i]);
306     }
307 }
308 
309 
310 PyDoc_STRVAR_shared(_Py_upper__doc__,
311 "B.upper() -> copy of B\n\
312 \n\
313 Return a copy of B with all ASCII characters converted to uppercase.");
314 
315 void
_Py_bytes_upper(char * result,const char * cptr,Py_ssize_t len)316 _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
317 {
318     Py_ssize_t i;
319 
320     for (i = 0; i < len; i++) {
321         result[i] = Py_TOUPPER((unsigned char) cptr[i]);
322     }
323 }
324 
325 
326 PyDoc_STRVAR_shared(_Py_title__doc__,
327 "B.title() -> copy of B\n\
328 \n\
329 Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
330 characters, all remaining cased characters have lowercase.");
331 
332 void
_Py_bytes_title(char * result,const char * s,Py_ssize_t len)333 _Py_bytes_title(char *result, const char *s, Py_ssize_t len)
334 {
335     Py_ssize_t i;
336     int previous_is_cased = 0;
337 
338     for (i = 0; i < len; i++) {
339         int c = Py_CHARMASK(*s++);
340         if (Py_ISLOWER(c)) {
341             if (!previous_is_cased)
342                 c = Py_TOUPPER(c);
343             previous_is_cased = 1;
344         } else if (Py_ISUPPER(c)) {
345             if (previous_is_cased)
346                 c = Py_TOLOWER(c);
347             previous_is_cased = 1;
348         } else
349             previous_is_cased = 0;
350         *result++ = c;
351     }
352 }
353 
354 
355 PyDoc_STRVAR_shared(_Py_capitalize__doc__,
356 "B.capitalize() -> copy of B\n\
357 \n\
358 Return a copy of B with only its first character capitalized (ASCII)\n\
359 and the rest lower-cased.");
360 
361 void
_Py_bytes_capitalize(char * result,const char * s,Py_ssize_t len)362 _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
363 {
364     if (len > 0) {
365         *result = Py_TOUPPER(*s);
366         _Py_bytes_lower(result + 1, s + 1, len - 1);
367     }
368 }
369 
370 
371 PyDoc_STRVAR_shared(_Py_swapcase__doc__,
372 "B.swapcase() -> copy of B\n\
373 \n\
374 Return a copy of B with uppercase ASCII characters converted\n\
375 to lowercase ASCII and vice versa.");
376 
377 void
_Py_bytes_swapcase(char * result,const char * s,Py_ssize_t len)378 _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
379 {
380     Py_ssize_t i;
381 
382     for (i = 0; i < len; i++) {
383         int c = Py_CHARMASK(*s++);
384         if (Py_ISLOWER(c)) {
385             *result = Py_TOUPPER(c);
386         }
387         else if (Py_ISUPPER(c)) {
388             *result = Py_TOLOWER(c);
389         }
390         else
391             *result = c;
392         result++;
393     }
394 }
395 
396 
397 PyDoc_STRVAR_shared(_Py_maketrans__doc__,
398 "B.maketrans(frm, to) -> translation table\n\
399 \n\
400 Return a translation table (a bytes object of length 256) suitable\n\
401 for use in the bytes or bytearray translate method where each byte\n\
402 in frm is mapped to the byte at the same position in to.\n\
403 The bytes objects frm and to must be of the same length.");
404 
405 PyObject *
_Py_bytes_maketrans(Py_buffer * frm,Py_buffer * to)406 _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
407 {
408     PyObject *res = NULL;
409     Py_ssize_t i;
410     char *p;
411 
412     if (frm->len != to->len) {
413         PyErr_Format(PyExc_ValueError,
414                      "maketrans arguments must have same length");
415         return NULL;
416     }
417     res = PyBytes_FromStringAndSize(NULL, 256);
418     if (!res)
419         return NULL;
420     p = PyBytes_AS_STRING(res);
421     for (i = 0; i < 256; i++)
422         p[i] = (char) i;
423     for (i = 0; i < frm->len; i++) {
424         p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
425     }
426 
427     return res;
428 }
429 
430 #define FASTSEARCH fastsearch
431 #define STRINGLIB(F) stringlib_##F
432 #define STRINGLIB_CHAR char
433 #define STRINGLIB_SIZEOF_CHAR 1
434 
435 #include "stringlib/fastsearch.h"
436 #include "stringlib/count.h"
437 #include "stringlib/find.h"
438 
439 /*
440 Wraps stringlib_parse_args_finds() and additionally checks the first
441 argument type.
442 
443 In case the first argument is a bytes-like object, sets it to subobj,
444 and doesn't touch the byte parameter.
445 In case it is an integer in range(0, 256), writes the integer value
446 to byte, and sets subobj to NULL.
447 
448 The other parameters are similar to those of
449 stringlib_parse_args_finds().
450 */
451 
452 Py_LOCAL_INLINE(int)
parse_args_finds_byte(const char * function_name,PyObject * args,PyObject ** subobj,char * byte,Py_ssize_t * start,Py_ssize_t * end)453 parse_args_finds_byte(const char *function_name, PyObject *args,
454                       PyObject **subobj, char *byte,
455                       Py_ssize_t *start, Py_ssize_t *end)
456 {
457     PyObject *tmp_subobj;
458     Py_ssize_t ival;
459 
460     if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
461                                    start, end))
462         return 0;
463 
464     if (PyObject_CheckBuffer(tmp_subobj)) {
465         *subobj = tmp_subobj;
466         return 1;
467     }
468 
469     if (!PyIndex_Check(tmp_subobj)) {
470         PyErr_Format(PyExc_TypeError,
471                      "argument should be integer or bytes-like object, "
472                      "not '%.200s'",
473                      Py_TYPE(tmp_subobj)->tp_name);
474         return 0;
475     }
476 
477     ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
478     if (ival == -1 && PyErr_Occurred()) {
479         return 0;
480     }
481     if (ival < 0 || ival > 255) {
482         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
483         return 0;
484     }
485 
486     *subobj = NULL;
487     *byte = (char)ival;
488     return 1;
489 }
490 
491 /* helper macro to fixup start/end slice values */
492 #define ADJUST_INDICES(start, end, len)         \
493     if (end > len)                          \
494         end = len;                          \
495     else if (end < 0) {                     \
496         end += len;                         \
497         if (end < 0)                        \
498         end = 0;                        \
499     }                                       \
500     if (start < 0) {                        \
501         start += len;                       \
502         if (start < 0)                      \
503         start = 0;                      \
504     }
505 
506 Py_LOCAL_INLINE(Py_ssize_t)
find_internal(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int dir)507 find_internal(const char *str, Py_ssize_t len,
508               const char *function_name, PyObject *args, int dir)
509 {
510     PyObject *subobj;
511     char byte;
512     Py_buffer subbuf;
513     const char *sub;
514     Py_ssize_t sub_len;
515     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
516     Py_ssize_t res;
517 
518     if (!parse_args_finds_byte(function_name, args,
519                                &subobj, &byte, &start, &end))
520         return -2;
521 
522     if (subobj) {
523         if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
524             return -2;
525 
526         sub = subbuf.buf;
527         sub_len = subbuf.len;
528     }
529     else {
530         sub = &byte;
531         sub_len = 1;
532     }
533 
534     ADJUST_INDICES(start, end, len);
535     if (end - start < sub_len)
536         res = -1;
537     else if (sub_len == 1) {
538         if (dir > 0)
539             res = stringlib_find_char(
540                 str + start, end - start,
541                 *sub);
542         else
543             res = stringlib_rfind_char(
544                 str + start, end - start,
545                 *sub);
546         if (res >= 0)
547             res += start;
548     }
549     else {
550         if (dir > 0)
551             res = stringlib_find_slice(
552                 str, len,
553                 sub, sub_len, start, end);
554         else
555             res = stringlib_rfind_slice(
556                 str, len,
557                 sub, sub_len, start, end);
558     }
559 
560     if (subobj)
561         PyBuffer_Release(&subbuf);
562 
563     return res;
564 }
565 
566 PyDoc_STRVAR_shared(_Py_find__doc__,
567 "B.find(sub[, start[, end]]) -> int\n\
568 \n\
569 Return the lowest index in B where subsection sub is found,\n\
570 such that sub is contained within B[start,end].  Optional\n\
571 arguments start and end are interpreted as in slice notation.\n\
572 \n\
573 Return -1 on failure.");
574 
575 PyObject *
_Py_bytes_find(const char * str,Py_ssize_t len,PyObject * args)576 _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
577 {
578     Py_ssize_t result = find_internal(str, len, "find", args, +1);
579     if (result == -2)
580         return NULL;
581     return PyLong_FromSsize_t(result);
582 }
583 
584 PyDoc_STRVAR_shared(_Py_index__doc__,
585 "B.index(sub[, start[, end]]) -> int\n\
586 \n\
587 Return the lowest index in B where subsection sub is found,\n\
588 such that sub is contained within B[start,end].  Optional\n\
589 arguments start and end are interpreted as in slice notation.\n\
590 \n\
591 Raises ValueError when the subsection is not found.");
592 
593 PyObject *
_Py_bytes_index(const char * str,Py_ssize_t len,PyObject * args)594 _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
595 {
596     Py_ssize_t result = find_internal(str, len, "index", args, +1);
597     if (result == -2)
598         return NULL;
599     if (result == -1) {
600         PyErr_SetString(PyExc_ValueError,
601                         "subsection not found");
602         return NULL;
603     }
604     return PyLong_FromSsize_t(result);
605 }
606 
607 PyDoc_STRVAR_shared(_Py_rfind__doc__,
608 "B.rfind(sub[, start[, end]]) -> int\n\
609 \n\
610 Return the highest index in B where subsection sub is found,\n\
611 such that sub is contained within B[start,end].  Optional\n\
612 arguments start and end are interpreted as in slice notation.\n\
613 \n\
614 Return -1 on failure.");
615 
616 PyObject *
_Py_bytes_rfind(const char * str,Py_ssize_t len,PyObject * args)617 _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
618 {
619     Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
620     if (result == -2)
621         return NULL;
622     return PyLong_FromSsize_t(result);
623 }
624 
625 PyDoc_STRVAR_shared(_Py_rindex__doc__,
626 "B.rindex(sub[, start[, end]]) -> int\n\
627 \n\
628 Return the highest index in B where subsection sub is found,\n\
629 such that sub is contained within B[start,end].  Optional\n\
630 arguments start and end are interpreted as in slice notation.\n\
631 \n\
632 Raise ValueError when the subsection is not found.");
633 
634 PyObject *
_Py_bytes_rindex(const char * str,Py_ssize_t len,PyObject * args)635 _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
636 {
637     Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
638     if (result == -2)
639         return NULL;
640     if (result == -1) {
641         PyErr_SetString(PyExc_ValueError,
642                         "subsection not found");
643         return NULL;
644     }
645     return PyLong_FromSsize_t(result);
646 }
647 
648 PyDoc_STRVAR_shared(_Py_count__doc__,
649 "B.count(sub[, start[, end]]) -> int\n\
650 \n\
651 Return the number of non-overlapping occurrences of subsection sub in\n\
652 bytes B[start:end].  Optional arguments start and end are interpreted\n\
653 as in slice notation.");
654 
655 PyObject *
_Py_bytes_count(const char * str,Py_ssize_t len,PyObject * args)656 _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
657 {
658     PyObject *sub_obj;
659     const char *sub;
660     Py_ssize_t sub_len;
661     char byte;
662     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
663 
664     Py_buffer vsub;
665     PyObject *count_obj;
666 
667     if (!parse_args_finds_byte("count", args,
668                                &sub_obj, &byte, &start, &end))
669         return NULL;
670 
671     if (sub_obj) {
672         if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
673             return NULL;
674 
675         sub = vsub.buf;
676         sub_len = vsub.len;
677     }
678     else {
679         sub = &byte;
680         sub_len = 1;
681     }
682 
683     ADJUST_INDICES(start, end, len);
684 
685     count_obj = PyLong_FromSsize_t(
686         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
687         );
688 
689     if (sub_obj)
690         PyBuffer_Release(&vsub);
691 
692     return count_obj;
693 }
694 
695 int
_Py_bytes_contains(const char * str,Py_ssize_t len,PyObject * arg)696 _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
697 {
698     Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
699     if (ival == -1 && PyErr_Occurred()) {
700         Py_buffer varg;
701         Py_ssize_t pos;
702         PyErr_Clear();
703         if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
704             return -1;
705         pos = stringlib_find(str, len,
706                              varg.buf, varg.len, 0);
707         PyBuffer_Release(&varg);
708         return pos >= 0;
709     }
710     if (ival < 0 || ival >= 256) {
711         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
712         return -1;
713     }
714 
715     return memchr(str, (int) ival, len) != NULL;
716 }
717 
718 
719 /* Matches the end (direction >= 0) or start (direction < 0) of the buffer
720  * against substr, using the start and end arguments. Returns
721  * -1 on error, 0 if not found and 1 if found.
722  */
723 static int
tailmatch(const char * str,Py_ssize_t len,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)724 tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
725           Py_ssize_t start, Py_ssize_t end, int direction)
726 {
727     Py_buffer sub_view = {NULL, NULL};
728     const char *sub;
729     Py_ssize_t slen;
730 
731     if (PyBytes_Check(substr)) {
732         sub = PyBytes_AS_STRING(substr);
733         slen = PyBytes_GET_SIZE(substr);
734     }
735     else {
736         if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
737             return -1;
738         sub = sub_view.buf;
739         slen = sub_view.len;
740     }
741 
742     ADJUST_INDICES(start, end, len);
743 
744     if (direction < 0) {
745         /* startswith */
746         if (start > len - slen)
747             goto notfound;
748     } else {
749         /* endswith */
750         if (end - start < slen || start > len)
751             goto notfound;
752 
753         if (end - slen > start)
754             start = end - slen;
755     }
756     if (end - start < slen)
757         goto notfound;
758     if (memcmp(str + start, sub, slen) != 0)
759         goto notfound;
760 
761     PyBuffer_Release(&sub_view);
762     return 1;
763 
764 notfound:
765     PyBuffer_Release(&sub_view);
766     return 0;
767 }
768 
769 static PyObject *
_Py_bytes_tailmatch(const char * str,Py_ssize_t len,const char * function_name,PyObject * args,int direction)770 _Py_bytes_tailmatch(const char *str, Py_ssize_t len,
771                     const char *function_name, PyObject *args,
772                     int direction)
773 {
774     Py_ssize_t start = 0;
775     Py_ssize_t end = PY_SSIZE_T_MAX;
776     PyObject *subobj;
777     int result;
778 
779     if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
780         return NULL;
781     if (PyTuple_Check(subobj)) {
782         Py_ssize_t i;
783         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
784             result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
785                                start, end, direction);
786             if (result == -1)
787                 return NULL;
788             else if (result) {
789                 Py_RETURN_TRUE;
790             }
791         }
792         Py_RETURN_FALSE;
793     }
794     result = tailmatch(str, len, subobj, start, end, direction);
795     if (result == -1) {
796         if (PyErr_ExceptionMatches(PyExc_TypeError))
797             PyErr_Format(PyExc_TypeError,
798                          "%s first arg must be bytes or a tuple of bytes, "
799                          "not %s",
800                          function_name, Py_TYPE(subobj)->tp_name);
801         return NULL;
802     }
803     else
804         return PyBool_FromLong(result);
805 }
806 
807 PyDoc_STRVAR_shared(_Py_startswith__doc__,
808 "B.startswith(prefix[, start[, end]]) -> bool\n\
809 \n\
810 Return True if B starts with the specified prefix, False otherwise.\n\
811 With optional start, test B beginning at that position.\n\
812 With optional end, stop comparing B at that position.\n\
813 prefix can also be a tuple of bytes to try.");
814 
815 PyObject *
_Py_bytes_startswith(const char * str,Py_ssize_t len,PyObject * args)816 _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
817 {
818     return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
819 }
820 
821 PyDoc_STRVAR_shared(_Py_endswith__doc__,
822 "B.endswith(suffix[, start[, end]]) -> bool\n\
823 \n\
824 Return True if B ends with the specified suffix, False otherwise.\n\
825 With optional start, test B beginning at that position.\n\
826 With optional end, stop comparing B at that position.\n\
827 suffix can also be a tuple of bytes to try.");
828 
829 PyObject *
_Py_bytes_endswith(const char * str,Py_ssize_t len,PyObject * args)830 _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
831 {
832     return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
833 }
834