1 /* String (str/bytes) object implementation */
2 
3 #define PY_SSIZE_T_CLEAN
4 
5 #include "Python.h"
6 #include <ctype.h>
7 #include <stddef.h>
8 
9 #ifdef COUNT_ALLOCS
10 Py_ssize_t null_strings, one_strings;
11 #endif
12 
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
15 
16 /* This dictionary holds all interned strings.  Note that references to
17    strings in this dictionary are *not* counted in the string's ob_refcnt.
18    When the interned string reaches a refcnt of 0 the string deallocation
19    function will delete the reference from this dictionary.
20 
21    Another way to look at this is that to say that the actual reference
22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
23 */
24 static PyObject *interned;
25 
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27    for a string of length n should request PyStringObject_SIZE + n bytes.
28 
29    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30    3 bytes per string allocation on a typical system.
31 */
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33 
34 /*
35    For PyString_FromString(), the parameter `str' points to a null-terminated
36    string containing exactly `size' bytes.
37 
38    For PyString_FromStringAndSize(), the parameter `str' is
39    either NULL or else points to a string containing at least `size' bytes.
40    For PyString_FromStringAndSize(), the string in the `str' parameter does
41    not have to be null-terminated.  (Therefore it is safe to construct a
42    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44    bytes (setting the last byte to the null terminating character) and you can
45    fill in the data yourself.  If `str' is non-NULL then the resulting
46    PyString object must be treated as immutable and you must not fill in nor
47    alter the data yourself, since the strings may be shared.
48 
49    The PyObject member `op->ob_size', which denotes the number of "extra
50    items" in a variable-size object, will contain the number of bytes
51    allocated for string data, not counting the null terminating character.
52    It is therefore equal to the `size' parameter (for
53    PyString_FromStringAndSize()) or the length of the string in the `str'
54    parameter (for PyString_FromString()).
55 */
56 PyObject *
PyString_FromStringAndSize(const char * str,Py_ssize_t size)57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58 {
59     register PyStringObject *op;
60     if (size < 0) {
61         PyErr_SetString(PyExc_SystemError,
62             "Negative size passed to PyString_FromStringAndSize");
63         return NULL;
64     }
65     if (size == 0 && (op = nullstring) != NULL) {
66 #ifdef COUNT_ALLOCS
67         null_strings++;
68 #endif
69         Py_INCREF(op);
70         return (PyObject *)op;
71     }
72     if (size == 1 && str != NULL &&
73         (op = characters[*str & UCHAR_MAX]) != NULL)
74     {
75 #ifdef COUNT_ALLOCS
76         one_strings++;
77 #endif
78         Py_INCREF(op);
79         return (PyObject *)op;
80     }
81 
82     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83         PyErr_SetString(PyExc_OverflowError, "string is too large");
84         return NULL;
85     }
86 
87     /* Inline PyObject_NewVar */
88     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89     if (op == NULL)
90         return PyErr_NoMemory();
91     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
92     op->ob_shash = -1;
93     op->ob_sstate = SSTATE_NOT_INTERNED;
94     if (str != NULL)
95         Py_MEMCPY(op->ob_sval, str, size);
96     op->ob_sval[size] = '\0';
97     /* share short strings */
98     if (size == 0) {
99         PyObject *t = (PyObject *)op;
100         PyString_InternInPlace(&t);
101         op = (PyStringObject *)t;
102         nullstring = op;
103         Py_INCREF(op);
104     } else if (size == 1 && str != NULL) {
105         PyObject *t = (PyObject *)op;
106         PyString_InternInPlace(&t);
107         op = (PyStringObject *)t;
108         characters[*str & UCHAR_MAX] = op;
109         Py_INCREF(op);
110     }
111     return (PyObject *) op;
112 }
113 
114 PyObject *
PyString_FromString(const char * str)115 PyString_FromString(const char *str)
116 {
117     register size_t size;
118     register PyStringObject *op;
119 
120     assert(str != NULL);
121     size = strlen(str);
122     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123         PyErr_SetString(PyExc_OverflowError,
124             "string is too long for a Python string");
125         return NULL;
126     }
127     if (size == 0 && (op = nullstring) != NULL) {
128 #ifdef COUNT_ALLOCS
129         null_strings++;
130 #endif
131         Py_INCREF(op);
132         return (PyObject *)op;
133     }
134     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135 #ifdef COUNT_ALLOCS
136         one_strings++;
137 #endif
138         Py_INCREF(op);
139         return (PyObject *)op;
140     }
141 
142     /* Inline PyObject_NewVar */
143     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144     if (op == NULL)
145         return PyErr_NoMemory();
146     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
147     op->ob_shash = -1;
148     op->ob_sstate = SSTATE_NOT_INTERNED;
149     Py_MEMCPY(op->ob_sval, str, size+1);
150     /* share short strings */
151     if (size == 0) {
152         PyObject *t = (PyObject *)op;
153         PyString_InternInPlace(&t);
154         op = (PyStringObject *)t;
155         nullstring = op;
156         Py_INCREF(op);
157     } else if (size == 1) {
158         PyObject *t = (PyObject *)op;
159         PyString_InternInPlace(&t);
160         op = (PyStringObject *)t;
161         characters[*str & UCHAR_MAX] = op;
162         Py_INCREF(op);
163     }
164     return (PyObject *) op;
165 }
166 
167 PyObject *
PyString_FromFormatV(const char * format,va_list vargs)168 PyString_FromFormatV(const char *format, va_list vargs)
169 {
170     va_list count;
171     Py_ssize_t n = 0;
172     const char* f;
173     char *s;
174     PyObject* string;
175 
176 #ifdef VA_LIST_IS_ARRAY
177     Py_MEMCPY(count, vargs, sizeof(va_list));
178 #else
179 #ifdef  __va_copy
180     __va_copy(count, vargs);
181 #else
182     count = vargs;
183 #endif
184 #endif
185     /* step 1: figure out how large a buffer we need */
186     for (f = format; *f; f++) {
187         if (*f == '%') {
188 #ifdef HAVE_LONG_LONG
189             int longlongflag = 0;
190 #endif
191             const char* p = f;
192             while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193                 ;
194 
195             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196              * they don't affect the amount of space we reserve.
197              */
198             if (*f == 'l') {
199                 if (f[1] == 'd' || f[1] == 'u') {
200                     ++f;
201                 }
202 #ifdef HAVE_LONG_LONG
203                 else if (f[1] == 'l' &&
204                          (f[2] == 'd' || f[2] == 'u')) {
205                     longlongflag = 1;
206                     f += 2;
207                 }
208 #endif
209             }
210             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211                 ++f;
212             }
213 
214             switch (*f) {
215             case 'c':
216                 (void)va_arg(count, int);
217                 /* fall through... */
218             case '%':
219                 n++;
220                 break;
221             case 'd': case 'u': case 'i': case 'x':
222                 (void) va_arg(count, int);
223 #ifdef HAVE_LONG_LONG
224                 /* Need at most
225                    ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226                    plus 1 for the sign.  53/22 is an upper
227                    bound for log10(256). */
228                 if (longlongflag)
229                     n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230                 else
231 #endif
232                     /* 20 bytes is enough to hold a 64-bit
233                        integer.  Decimal takes the most
234                        space.  This isn't enough for
235                        octal. */
236                     n += 20;
237 
238                 break;
239             case 's':
240                 s = va_arg(count, char*);
241                 n += strlen(s);
242                 break;
243             case 'p':
244                 (void) va_arg(count, int);
245                 /* maximum 64-bit pointer representation:
246                  * 0xffffffffffffffff
247                  * so 19 characters is enough.
248                  * XXX I count 18 -- what's the extra for?
249                  */
250                 n += 19;
251                 break;
252             default:
253                 /* if we stumble upon an unknown
254                    formatting code, copy the rest of
255                    the format string to the output
256                    string. (we cannot just skip the
257                    code, since there's no way to know
258                    what's in the argument list) */
259                 n += strlen(p);
260                 goto expand;
261             }
262         } else
263             n++;
264     }
265  expand:
266     /* step 2: fill the buffer */
267     /* Since we've analyzed how much space we need for the worst case,
268        use sprintf directly instead of the slower PyOS_snprintf. */
269     string = PyString_FromStringAndSize(NULL, n);
270     if (!string)
271         return NULL;
272 
273     s = PyString_AsString(string);
274 
275     for (f = format; *f; f++) {
276         if (*f == '%') {
277             const char* p = f++;
278             Py_ssize_t i;
279             int longflag = 0;
280 #ifdef HAVE_LONG_LONG
281             int longlongflag = 0;
282 #endif
283             int size_tflag = 0;
284             /* parse the width.precision part (we're only
285                interested in the precision value, if any) */
286             n = 0;
287             while (isdigit(Py_CHARMASK(*f)))
288                 n = (n*10) + *f++ - '0';
289             if (*f == '.') {
290                 f++;
291                 n = 0;
292                 while (isdigit(Py_CHARMASK(*f)))
293                     n = (n*10) + *f++ - '0';
294             }
295             while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296                 f++;
297             /* Handle %ld, %lu, %lld and %llu. */
298             if (*f == 'l') {
299                 if (f[1] == 'd' || f[1] == 'u') {
300                     longflag = 1;
301                     ++f;
302                 }
303 #ifdef HAVE_LONG_LONG
304                 else if (f[1] == 'l' &&
305                          (f[2] == 'd' || f[2] == 'u')) {
306                     longlongflag = 1;
307                     f += 2;
308                 }
309 #endif
310             }
311             /* handle the size_t flag. */
312             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313                 size_tflag = 1;
314                 ++f;
315             }
316 
317             switch (*f) {
318             case 'c':
319                 *s++ = va_arg(vargs, int);
320                 break;
321             case 'd':
322                 if (longflag)
323                     sprintf(s, "%ld", va_arg(vargs, long));
324 #ifdef HAVE_LONG_LONG
325                 else if (longlongflag)
326                     sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327                         va_arg(vargs, PY_LONG_LONG));
328 #endif
329                 else if (size_tflag)
330                     sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331                         va_arg(vargs, Py_ssize_t));
332                 else
333                     sprintf(s, "%d", va_arg(vargs, int));
334                 s += strlen(s);
335                 break;
336             case 'u':
337                 if (longflag)
338                     sprintf(s, "%lu",
339                         va_arg(vargs, unsigned long));
340 #ifdef HAVE_LONG_LONG
341                 else if (longlongflag)
342                     sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343                         va_arg(vargs, PY_LONG_LONG));
344 #endif
345                 else if (size_tflag)
346                     sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347                         va_arg(vargs, size_t));
348                 else
349                     sprintf(s, "%u",
350                         va_arg(vargs, unsigned int));
351                 s += strlen(s);
352                 break;
353             case 'i':
354                 sprintf(s, "%i", va_arg(vargs, int));
355                 s += strlen(s);
356                 break;
357             case 'x':
358                 sprintf(s, "%x", va_arg(vargs, int));
359                 s += strlen(s);
360                 break;
361             case 's':
362                 p = va_arg(vargs, char*);
363                 if (n <= 0) {
364                     i = strlen(p);
365                 }
366                 else {
367                     i = 0;
368                     while (i < n && p[i]) {
369                         i++;
370                     }
371                 }
372                 Py_MEMCPY(s, p, i);
373                 s += i;
374                 break;
375             case 'p':
376                 sprintf(s, "%p", va_arg(vargs, void*));
377                 /* %p is ill-defined:  ensure leading 0x. */
378                 if (s[1] == 'X')
379                     s[1] = 'x';
380                 else if (s[1] != 'x') {
381                     memmove(s+2, s, strlen(s)+1);
382                     s[0] = '0';
383                     s[1] = 'x';
384                 }
385                 s += strlen(s);
386                 break;
387             case '%':
388                 *s++ = '%';
389                 break;
390             default:
391                 strcpy(s, p);
392                 s += strlen(s);
393                 goto end;
394             }
395         } else
396             *s++ = *f;
397     }
398 
399  end:
400     if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
401         return NULL;
402     return string;
403 }
404 
405 PyObject *
PyString_FromFormat(const char * format,...)406 PyString_FromFormat(const char *format, ...)
407 {
408     PyObject* ret;
409     va_list vargs;
410 
411 #ifdef HAVE_STDARG_PROTOTYPES
412     va_start(vargs, format);
413 #else
414     va_start(vargs);
415 #endif
416     ret = PyString_FromFormatV(format, vargs);
417     va_end(vargs);
418     return ret;
419 }
420 
421 
PyString_Decode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)422 PyObject *PyString_Decode(const char *s,
423                           Py_ssize_t size,
424                           const char *encoding,
425                           const char *errors)
426 {
427     PyObject *v, *str;
428 
429     str = PyString_FromStringAndSize(s, size);
430     if (str == NULL)
431         return NULL;
432     v = PyString_AsDecodedString(str, encoding, errors);
433     Py_DECREF(str);
434     return v;
435 }
436 
PyString_AsDecodedObject(PyObject * str,const char * encoding,const char * errors)437 PyObject *PyString_AsDecodedObject(PyObject *str,
438                                    const char *encoding,
439                                    const char *errors)
440 {
441     PyObject *v;
442 
443     if (!PyString_Check(str)) {
444         PyErr_BadArgument();
445         goto onError;
446     }
447 
448     if (encoding == NULL) {
449 #ifdef Py_USING_UNICODE
450         encoding = PyUnicode_GetDefaultEncoding();
451 #else
452         PyErr_SetString(PyExc_ValueError, "no encoding specified");
453         goto onError;
454 #endif
455     }
456 
457     /* Decode via the codec registry */
458     v = _PyCodec_DecodeText(str, encoding, errors);
459     if (v == NULL)
460         goto onError;
461 
462     return v;
463 
464  onError:
465     return NULL;
466 }
467 
PyString_AsDecodedString(PyObject * str,const char * encoding,const char * errors)468 PyObject *PyString_AsDecodedString(PyObject *str,
469                                    const char *encoding,
470                                    const char *errors)
471 {
472     PyObject *v;
473 
474     v = PyString_AsDecodedObject(str, encoding, errors);
475     if (v == NULL)
476         goto onError;
477 
478 #ifdef Py_USING_UNICODE
479     /* Convert Unicode to a string using the default encoding */
480     if (PyUnicode_Check(v)) {
481         PyObject *temp = v;
482         v = PyUnicode_AsEncodedString(v, NULL, NULL);
483         Py_DECREF(temp);
484         if (v == NULL)
485             goto onError;
486     }
487 #endif
488     if (!PyString_Check(v)) {
489         PyErr_Format(PyExc_TypeError,
490                      "decoder did not return a string object (type=%.400s)",
491                      Py_TYPE(v)->tp_name);
492         Py_DECREF(v);
493         goto onError;
494     }
495 
496     return v;
497 
498  onError:
499     return NULL;
500 }
501 
PyString_Encode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)502 PyObject *PyString_Encode(const char *s,
503                           Py_ssize_t size,
504                           const char *encoding,
505                           const char *errors)
506 {
507     PyObject *v, *str;
508 
509     str = PyString_FromStringAndSize(s, size);
510     if (str == NULL)
511         return NULL;
512     v = PyString_AsEncodedString(str, encoding, errors);
513     Py_DECREF(str);
514     return v;
515 }
516 
PyString_AsEncodedObject(PyObject * str,const char * encoding,const char * errors)517 PyObject *PyString_AsEncodedObject(PyObject *str,
518                                    const char *encoding,
519                                    const char *errors)
520 {
521     PyObject *v;
522 
523     if (!PyString_Check(str)) {
524         PyErr_BadArgument();
525         goto onError;
526     }
527 
528     if (encoding == NULL) {
529 #ifdef Py_USING_UNICODE
530         encoding = PyUnicode_GetDefaultEncoding();
531 #else
532         PyErr_SetString(PyExc_ValueError, "no encoding specified");
533         goto onError;
534 #endif
535     }
536 
537     /* Encode via the codec registry */
538     v = _PyCodec_EncodeText(str, encoding, errors);
539     if (v == NULL)
540         goto onError;
541 
542     return v;
543 
544  onError:
545     return NULL;
546 }
547 
PyString_AsEncodedString(PyObject * str,const char * encoding,const char * errors)548 PyObject *PyString_AsEncodedString(PyObject *str,
549                                    const char *encoding,
550                                    const char *errors)
551 {
552     PyObject *v;
553 
554     v = PyString_AsEncodedObject(str, encoding, errors);
555     if (v == NULL)
556         goto onError;
557 
558 #ifdef Py_USING_UNICODE
559     /* Convert Unicode to a string using the default encoding */
560     if (PyUnicode_Check(v)) {
561         PyObject *temp = v;
562         v = PyUnicode_AsEncodedString(v, NULL, NULL);
563         Py_DECREF(temp);
564         if (v == NULL)
565             goto onError;
566     }
567 #endif
568     if (!PyString_Check(v)) {
569         PyErr_Format(PyExc_TypeError,
570                      "encoder did not return a string object (type=%.400s)",
571                      Py_TYPE(v)->tp_name);
572         Py_DECREF(v);
573         goto onError;
574     }
575 
576     return v;
577 
578  onError:
579     return NULL;
580 }
581 
582 static void
string_dealloc(PyObject * op)583 string_dealloc(PyObject *op)
584 {
585     switch (PyString_CHECK_INTERNED(op)) {
586         case SSTATE_NOT_INTERNED:
587             break;
588 
589         case SSTATE_INTERNED_MORTAL:
590             /* revive dead object temporarily for DelItem */
591             Py_REFCNT(op) = 3;
592             if (PyDict_DelItem(interned, op) != 0)
593                 Py_FatalError(
594                     "deletion of interned string failed");
595             break;
596 
597         case SSTATE_INTERNED_IMMORTAL:
598             Py_FatalError("Immortal interned string died.");
599 
600         default:
601             Py_FatalError("Inconsistent interned string state.");
602     }
603     Py_TYPE(op)->tp_free(op);
604 }
605 
606 /* Unescape a backslash-escaped string. If unicode is non-zero,
607    the string is a u-literal. If recode_encoding is non-zero,
608    the string is UTF-8 encoded and should be re-encoded in the
609    specified encoding.  */
610 
PyString_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)611 PyObject *PyString_DecodeEscape(const char *s,
612                                 Py_ssize_t len,
613                                 const char *errors,
614                                 Py_ssize_t unicode,
615                                 const char *recode_encoding)
616 {
617     int c;
618     char *p, *buf;
619     const char *end;
620     PyObject *v;
621     Py_ssize_t newlen;
622     /* Check for integer overflow */
623     if (recode_encoding && (len > PY_SSIZE_T_MAX / 4)) {
624         PyErr_SetString(PyExc_OverflowError, "string is too large");
625         return NULL;
626     }
627     newlen = recode_encoding ? 4*len:len;
628     v = PyString_FromStringAndSize((char *)NULL, newlen);
629     if (v == NULL)
630         return NULL;
631     p = buf = PyString_AsString(v);
632     end = s + len;
633     while (s < end) {
634         if (*s != '\\') {
635           non_esc:
636 #ifdef Py_USING_UNICODE
637             if (recode_encoding && (*s & 0x80)) {
638                 PyObject *u, *w;
639                 char *r;
640                 const char* t;
641                 Py_ssize_t rn;
642                 t = s;
643                 /* Decode non-ASCII bytes as UTF-8. */
644                 while (t < end && (*t & 0x80)) t++;
645                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
646                 if(!u) goto failed;
647 
648                 /* Recode them in target encoding. */
649                 w = PyUnicode_AsEncodedString(
650                     u, recode_encoding, errors);
651                 Py_DECREF(u);
652                 if (!w)                 goto failed;
653 
654                 /* Append bytes to output buffer. */
655                 assert(PyString_Check(w));
656                 r = PyString_AS_STRING(w);
657                 rn = PyString_GET_SIZE(w);
658                 Py_MEMCPY(p, r, rn);
659                 p += rn;
660                 Py_DECREF(w);
661                 s = t;
662             } else {
663                 *p++ = *s++;
664             }
665 #else
666             *p++ = *s++;
667 #endif
668             continue;
669         }
670         s++;
671         if (s==end) {
672             PyErr_SetString(PyExc_ValueError,
673                             "Trailing \\ in string");
674             goto failed;
675         }
676         switch (*s++) {
677         /* XXX This assumes ASCII! */
678         case '\n': break;
679         case '\\': *p++ = '\\'; break;
680         case '\'': *p++ = '\''; break;
681         case '\"': *p++ = '\"'; break;
682         case 'b': *p++ = '\b'; break;
683         case 'f': *p++ = '\014'; break; /* FF */
684         case 't': *p++ = '\t'; break;
685         case 'n': *p++ = '\n'; break;
686         case 'r': *p++ = '\r'; break;
687         case 'v': *p++ = '\013'; break; /* VT */
688         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
689         case '0': case '1': case '2': case '3':
690         case '4': case '5': case '6': case '7':
691             c = s[-1] - '0';
692             if (s < end && '0' <= *s && *s <= '7') {
693                 c = (c<<3) + *s++ - '0';
694                 if (s < end && '0' <= *s && *s <= '7')
695                     c = (c<<3) + *s++ - '0';
696             }
697             *p++ = c;
698             break;
699         case 'x':
700             if (s+1 < end &&
701                 isxdigit(Py_CHARMASK(s[0])) &&
702                 isxdigit(Py_CHARMASK(s[1])))
703             {
704                 unsigned int x = 0;
705                 c = Py_CHARMASK(*s);
706                 s++;
707                 if (isdigit(c))
708                     x = c - '0';
709                 else if (islower(c))
710                     x = 10 + c - 'a';
711                 else
712                     x = 10 + c - 'A';
713                 x = x << 4;
714                 c = Py_CHARMASK(*s);
715                 s++;
716                 if (isdigit(c))
717                     x += c - '0';
718                 else if (islower(c))
719                     x += 10 + c - 'a';
720                 else
721                     x += 10 + c - 'A';
722                 *p++ = x;
723                 break;
724             }
725             if (!errors || strcmp(errors, "strict") == 0) {
726                 PyErr_SetString(PyExc_ValueError,
727                                 "invalid \\x escape");
728                 goto failed;
729             }
730             if (strcmp(errors, "replace") == 0) {
731                 *p++ = '?';
732             } else if (strcmp(errors, "ignore") == 0)
733                 /* do nothing */;
734             else {
735                 PyErr_Format(PyExc_ValueError,
736                              "decoding error; "
737                              "unknown error handling code: %.400s",
738                              errors);
739                 goto failed;
740             }
741             /* skip \x */
742             if (s < end && isxdigit(Py_CHARMASK(s[0])))
743                 s++; /* and a hexdigit */
744             break;
745 #ifndef Py_USING_UNICODE
746         case 'u':
747         case 'U':
748         case 'N':
749             if (unicode) {
750                 PyErr_SetString(PyExc_ValueError,
751                           "Unicode escapes not legal "
752                           "when Unicode disabled");
753                 goto failed;
754             }
755 #endif
756         default:
757             *p++ = '\\';
758             s--;
759             goto non_esc; /* an arbitrary number of unescaped
760                              UTF-8 bytes may follow. */
761         }
762     }
763     if (p-buf < newlen)
764         _PyString_Resize(&v, p - buf); /* v is cleared on error */
765     return v;
766   failed:
767     Py_DECREF(v);
768     return NULL;
769 }
770 
771 /* -------------------------------------------------------------------- */
772 /* object api */
773 
774 static Py_ssize_t
string_getsize(register PyObject * op)775 string_getsize(register PyObject *op)
776 {
777     char *s;
778     Py_ssize_t len;
779     if (PyString_AsStringAndSize(op, &s, &len))
780         return -1;
781     return len;
782 }
783 
784 static /*const*/ char *
string_getbuffer(register PyObject * op)785 string_getbuffer(register PyObject *op)
786 {
787     char *s;
788     Py_ssize_t len;
789     if (PyString_AsStringAndSize(op, &s, &len))
790         return NULL;
791     return s;
792 }
793 
794 Py_ssize_t
PyString_Size(register PyObject * op)795 PyString_Size(register PyObject *op)
796 {
797     if (!PyString_Check(op))
798         return string_getsize(op);
799     return Py_SIZE(op);
800 }
801 
802 /*const*/ char *
PyString_AsString(register PyObject * op)803 PyString_AsString(register PyObject *op)
804 {
805     if (!PyString_Check(op))
806         return string_getbuffer(op);
807     return ((PyStringObject *)op) -> ob_sval;
808 }
809 
810 int
PyString_AsStringAndSize(register PyObject * obj,register char ** s,register Py_ssize_t * len)811 PyString_AsStringAndSize(register PyObject *obj,
812                          register char **s,
813                          register Py_ssize_t *len)
814 {
815     if (s == NULL) {
816         PyErr_BadInternalCall();
817         return -1;
818     }
819 
820     if (!PyString_Check(obj)) {
821 #ifdef Py_USING_UNICODE
822         if (PyUnicode_Check(obj)) {
823             obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
824             if (obj == NULL)
825                 return -1;
826         }
827         else
828 #endif
829         {
830             PyErr_Format(PyExc_TypeError,
831                          "expected string or Unicode object, "
832                          "%.200s found", Py_TYPE(obj)->tp_name);
833             return -1;
834         }
835     }
836 
837     *s = PyString_AS_STRING(obj);
838     if (len != NULL)
839         *len = PyString_GET_SIZE(obj);
840     else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
841         PyErr_SetString(PyExc_TypeError,
842                         "expected string without null bytes");
843         return -1;
844     }
845     return 0;
846 }
847 
848 /* -------------------------------------------------------------------- */
849 /* Methods */
850 
851 #include "stringlib/stringdefs.h"
852 #include "stringlib/fastsearch.h"
853 
854 #include "stringlib/count.h"
855 #include "stringlib/find.h"
856 #include "stringlib/partition.h"
857 #include "stringlib/split.h"
858 
859 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
860 #include "stringlib/localeutil.h"
861 
862 
863 
864 static int
string_print(PyStringObject * op,FILE * fp,int flags)865 string_print(PyStringObject *op, FILE *fp, int flags)
866 {
867     Py_ssize_t i, str_len;
868     char c;
869     int quote;
870 
871     /* XXX Ought to check for interrupts when writing long strings */
872     if (! PyString_CheckExact(op)) {
873         int ret;
874         /* A str subclass may have its own __str__ method. */
875         op = (PyStringObject *) PyObject_Str((PyObject *)op);
876         if (op == NULL)
877             return -1;
878         ret = string_print(op, fp, flags);
879         Py_DECREF(op);
880         return ret;
881     }
882     if (flags & Py_PRINT_RAW) {
883         char *data = op->ob_sval;
884         Py_ssize_t size = Py_SIZE(op);
885         Py_BEGIN_ALLOW_THREADS
886         while (size > INT_MAX) {
887             /* Very long strings cannot be written atomically.
888              * But don't write exactly INT_MAX bytes at a time
889              * to avoid memory aligment issues.
890              */
891             const int chunk_size = INT_MAX & ~0x3FFF;
892             fwrite(data, 1, chunk_size, fp);
893             data += chunk_size;
894             size -= chunk_size;
895         }
896 #ifdef __VMS
897         if (size) fwrite(data, (size_t)size, 1, fp);
898 #else
899         fwrite(data, 1, (size_t)size, fp);
900 #endif
901         Py_END_ALLOW_THREADS
902         return 0;
903     }
904 
905     /* figure out which quote to use; single is preferred */
906     quote = '\'';
907     if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
908         !memchr(op->ob_sval, '"', Py_SIZE(op)))
909         quote = '"';
910 
911     str_len = Py_SIZE(op);
912     Py_BEGIN_ALLOW_THREADS
913     fputc(quote, fp);
914     for (i = 0; i < str_len; i++) {
915         /* Since strings are immutable and the caller should have a
916         reference, accessing the internal buffer should not be an issue
917         with the GIL released. */
918         c = op->ob_sval[i];
919         if (c == quote || c == '\\')
920             fprintf(fp, "\\%c", c);
921         else if (c == '\t')
922             fprintf(fp, "\\t");
923         else if (c == '\n')
924             fprintf(fp, "\\n");
925         else if (c == '\r')
926             fprintf(fp, "\\r");
927         else if (c < ' ' || c >= 0x7f)
928             fprintf(fp, "\\x%02x", c & 0xff);
929         else
930             fputc(c, fp);
931     }
932     fputc(quote, fp);
933     Py_END_ALLOW_THREADS
934     return 0;
935 }
936 
937 PyObject *
PyString_Repr(PyObject * obj,int smartquotes)938 PyString_Repr(PyObject *obj, int smartquotes)
939 {
940     register PyStringObject* op = (PyStringObject*) obj;
941     size_t newsize;
942     PyObject *v;
943     if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
944         PyErr_SetString(PyExc_OverflowError,
945             "string is too large to make repr");
946         return NULL;
947     }
948     newsize = 2 + 4*Py_SIZE(op);
949     v = PyString_FromStringAndSize((char *)NULL, newsize);
950     if (v == NULL) {
951         return NULL;
952     }
953     else {
954         register Py_ssize_t i;
955         register char c;
956         register char *p;
957         int quote;
958 
959         /* figure out which quote to use; single is preferred */
960         quote = '\'';
961         if (smartquotes &&
962             memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
963             !memchr(op->ob_sval, '"', Py_SIZE(op)))
964             quote = '"';
965 
966         p = PyString_AS_STRING(v);
967         *p++ = quote;
968         for (i = 0; i < Py_SIZE(op); i++) {
969             /* There's at least enough room for a hex escape
970                and a closing quote. */
971             assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
972             c = op->ob_sval[i];
973             if (c == quote || c == '\\')
974                 *p++ = '\\', *p++ = c;
975             else if (c == '\t')
976                 *p++ = '\\', *p++ = 't';
977             else if (c == '\n')
978                 *p++ = '\\', *p++ = 'n';
979             else if (c == '\r')
980                 *p++ = '\\', *p++ = 'r';
981             else if (c < ' ' || c >= 0x7f) {
982                 /* For performance, we don't want to call
983                    PyOS_snprintf here (extra layers of
984                    function call). */
985                 sprintf(p, "\\x%02x", c & 0xff);
986                 p += 4;
987             }
988             else
989                 *p++ = c;
990         }
991         assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
992         *p++ = quote;
993         *p = '\0';
994         if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
995             return NULL;
996         return v;
997     }
998 }
999 
1000 static PyObject *
string_repr(PyObject * op)1001 string_repr(PyObject *op)
1002 {
1003     return PyString_Repr(op, 1);
1004 }
1005 
1006 static PyObject *
string_str(PyObject * s)1007 string_str(PyObject *s)
1008 {
1009     assert(PyString_Check(s));
1010     if (PyString_CheckExact(s)) {
1011         Py_INCREF(s);
1012         return s;
1013     }
1014     else {
1015         /* Subtype -- return genuine string with the same value. */
1016         PyStringObject *t = (PyStringObject *) s;
1017         return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1018     }
1019 }
1020 
1021 static Py_ssize_t
string_length(PyStringObject * a)1022 string_length(PyStringObject *a)
1023 {
1024     return Py_SIZE(a);
1025 }
1026 
1027 static PyObject *
string_concat(register PyStringObject * a,register PyObject * bb)1028 string_concat(register PyStringObject *a, register PyObject *bb)
1029 {
1030     register Py_ssize_t size;
1031     register PyStringObject *op;
1032     if (!PyString_Check(bb)) {
1033 #ifdef Py_USING_UNICODE
1034         if (PyUnicode_Check(bb))
1035             return PyUnicode_Concat((PyObject *)a, bb);
1036 #endif
1037         if (PyByteArray_Check(bb))
1038             return PyByteArray_Concat((PyObject *)a, bb);
1039         PyErr_Format(PyExc_TypeError,
1040                      "cannot concatenate 'str' and '%.200s' objects",
1041                      Py_TYPE(bb)->tp_name);
1042         return NULL;
1043     }
1044 #define b ((PyStringObject *)bb)
1045     /* Optimize cases with empty left or right operand */
1046     if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1047         PyString_CheckExact(a) && PyString_CheckExact(b)) {
1048         if (Py_SIZE(a) == 0) {
1049             Py_INCREF(bb);
1050             return bb;
1051         }
1052         Py_INCREF(a);
1053         return (PyObject *)a;
1054     }
1055     /* Check that string sizes are not negative, to prevent an
1056        overflow in cases where we are passed incorrectly-created
1057        strings with negative lengths (due to a bug in other code).
1058     */
1059     if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1060         Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1061         PyErr_SetString(PyExc_OverflowError,
1062                         "strings are too large to concat");
1063         return NULL;
1064     }
1065     size = Py_SIZE(a) + Py_SIZE(b);
1066 
1067     /* Inline PyObject_NewVar */
1068     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1069         PyErr_SetString(PyExc_OverflowError,
1070                         "strings are too large to concat");
1071         return NULL;
1072     }
1073     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1074     if (op == NULL)
1075         return PyErr_NoMemory();
1076     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1077     op->ob_shash = -1;
1078     op->ob_sstate = SSTATE_NOT_INTERNED;
1079     Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1080     Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1081     op->ob_sval[size] = '\0';
1082     return (PyObject *) op;
1083 #undef b
1084 }
1085 
1086 static PyObject *
string_repeat(register PyStringObject * a,register Py_ssize_t n)1087 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1088 {
1089     register Py_ssize_t i;
1090     register Py_ssize_t j;
1091     register Py_ssize_t size;
1092     register PyStringObject *op;
1093     size_t nbytes;
1094     if (n < 0)
1095         n = 0;
1096     /* watch out for overflows:  the size can overflow Py_ssize_t,
1097      * and the # of bytes needed can overflow size_t
1098      */
1099     if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1100         PyErr_SetString(PyExc_OverflowError,
1101             "repeated string is too long");
1102         return NULL;
1103     }
1104     size = Py_SIZE(a) * n;
1105     if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1106         Py_INCREF(a);
1107         return (PyObject *)a;
1108     }
1109     nbytes = (size_t)size;
1110     if (nbytes + PyStringObject_SIZE <= nbytes) {
1111         PyErr_SetString(PyExc_OverflowError,
1112             "repeated string is too long");
1113         return NULL;
1114     }
1115     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1116     if (op == NULL)
1117         return PyErr_NoMemory();
1118     (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1119     op->ob_shash = -1;
1120     op->ob_sstate = SSTATE_NOT_INTERNED;
1121     op->ob_sval[size] = '\0';
1122     if (Py_SIZE(a) == 1 && n > 0) {
1123         memset(op->ob_sval, a->ob_sval[0] , n);
1124         return (PyObject *) op;
1125     }
1126     i = 0;
1127     if (i < size) {
1128         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1129         i = Py_SIZE(a);
1130     }
1131     while (i < size) {
1132         j = (i <= size-i)  ?  i  :  size-i;
1133         Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1134         i += j;
1135     }
1136     return (PyObject *) op;
1137 }
1138 
1139 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1140 
1141 static PyObject *
string_slice(register PyStringObject * a,register Py_ssize_t i,register Py_ssize_t j)1142 string_slice(register PyStringObject *a, register Py_ssize_t i,
1143              register Py_ssize_t j)
1144      /* j -- may be negative! */
1145 {
1146     if (i < 0)
1147         i = 0;
1148     if (j < 0)
1149         j = 0; /* Avoid signed/unsigned bug in next line */
1150     if (j > Py_SIZE(a))
1151         j = Py_SIZE(a);
1152     if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1153         /* It's the same as a */
1154         Py_INCREF(a);
1155         return (PyObject *)a;
1156     }
1157     if (j < i)
1158         j = i;
1159     return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1160 }
1161 
1162 static int
string_contains(PyObject * str_obj,PyObject * sub_obj)1163 string_contains(PyObject *str_obj, PyObject *sub_obj)
1164 {
1165     if (!PyString_CheckExact(sub_obj)) {
1166 #ifdef Py_USING_UNICODE
1167         if (PyUnicode_Check(sub_obj))
1168             return PyUnicode_Contains(str_obj, sub_obj);
1169 #endif
1170         if (!PyString_Check(sub_obj)) {
1171             PyErr_Format(PyExc_TypeError,
1172                 "'in <string>' requires string as left operand, "
1173                 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1174             return -1;
1175         }
1176     }
1177 
1178     return stringlib_contains_obj(str_obj, sub_obj);
1179 }
1180 
1181 static PyObject *
string_item(PyStringObject * a,register Py_ssize_t i)1182 string_item(PyStringObject *a, register Py_ssize_t i)
1183 {
1184     char pchar;
1185     PyObject *v;
1186     if (i < 0 || i >= Py_SIZE(a)) {
1187         PyErr_SetString(PyExc_IndexError, "string index out of range");
1188         return NULL;
1189     }
1190     pchar = a->ob_sval[i];
1191     v = (PyObject *)characters[pchar & UCHAR_MAX];
1192     if (v == NULL)
1193         v = PyString_FromStringAndSize(&pchar, 1);
1194     else {
1195 #ifdef COUNT_ALLOCS
1196         one_strings++;
1197 #endif
1198         Py_INCREF(v);
1199     }
1200     return v;
1201 }
1202 
1203 static PyObject*
string_richcompare(PyStringObject * a,PyStringObject * b,int op)1204 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1205 {
1206     int c;
1207     Py_ssize_t len_a, len_b;
1208     Py_ssize_t min_len;
1209     PyObject *result;
1210 
1211     /* Make sure both arguments are strings. */
1212     if (!(PyString_Check(a) && PyString_Check(b))) {
1213         result = Py_NotImplemented;
1214         goto out;
1215     }
1216     if (a == b) {
1217         switch (op) {
1218         case Py_EQ:case Py_LE:case Py_GE:
1219             result = Py_True;
1220             goto out;
1221         case Py_NE:case Py_LT:case Py_GT:
1222             result = Py_False;
1223             goto out;
1224         }
1225     }
1226     if (op == Py_EQ) {
1227         /* Supporting Py_NE here as well does not save
1228            much time, since Py_NE is rarely used.  */
1229         if (Py_SIZE(a) == Py_SIZE(b)
1230             && (a->ob_sval[0] == b->ob_sval[0]
1231             && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1232             result = Py_True;
1233         } else {
1234             result = Py_False;
1235         }
1236         goto out;
1237     }
1238     len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1239     min_len = (len_a < len_b) ? len_a : len_b;
1240     if (min_len > 0) {
1241         c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1242         if (c==0)
1243             c = memcmp(a->ob_sval, b->ob_sval, min_len);
1244     } else
1245         c = 0;
1246     if (c == 0)
1247         c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1248     switch (op) {
1249     case Py_LT: c = c <  0; break;
1250     case Py_LE: c = c <= 0; break;
1251     case Py_EQ: assert(0);  break; /* unreachable */
1252     case Py_NE: c = c != 0; break;
1253     case Py_GT: c = c >  0; break;
1254     case Py_GE: c = c >= 0; break;
1255     default:
1256         result = Py_NotImplemented;
1257         goto out;
1258     }
1259     result = c ? Py_True : Py_False;
1260   out:
1261     Py_INCREF(result);
1262     return result;
1263 }
1264 
1265 int
_PyString_Eq(PyObject * o1,PyObject * o2)1266 _PyString_Eq(PyObject *o1, PyObject *o2)
1267 {
1268     PyStringObject *a = (PyStringObject*) o1;
1269     PyStringObject *b = (PyStringObject*) o2;
1270     return Py_SIZE(a) == Py_SIZE(b)
1271       && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1272 }
1273 
1274 static long
string_hash(PyStringObject * a)1275 string_hash(PyStringObject *a)
1276 {
1277     register Py_ssize_t len;
1278     register unsigned char *p;
1279     register long x;
1280 
1281 #ifdef Py_DEBUG
1282     assert(_Py_HashSecret_Initialized);
1283 #endif
1284     if (a->ob_shash != -1)
1285         return a->ob_shash;
1286     len = Py_SIZE(a);
1287     /*
1288       We make the hash of the empty string be 0, rather than using
1289       (prefix ^ suffix), since this slightly obfuscates the hash secret
1290     */
1291     if (len == 0) {
1292         a->ob_shash = 0;
1293         return 0;
1294     }
1295     p = (unsigned char *) a->ob_sval;
1296     x = _Py_HashSecret.prefix;
1297     x ^= *p << 7;
1298     while (--len >= 0)
1299         x = (1000003*x) ^ *p++;
1300     x ^= Py_SIZE(a);
1301     x ^= _Py_HashSecret.suffix;
1302     if (x == -1)
1303         x = -2;
1304     a->ob_shash = x;
1305     return x;
1306 }
1307 
1308 static PyObject*
string_subscript(PyStringObject * self,PyObject * item)1309 string_subscript(PyStringObject* self, PyObject* item)
1310 {
1311     if (PyIndex_Check(item)) {
1312         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1313         if (i == -1 && PyErr_Occurred())
1314             return NULL;
1315         if (i < 0)
1316             i += PyString_GET_SIZE(self);
1317         return string_item(self, i);
1318     }
1319     else if (PySlice_Check(item)) {
1320         Py_ssize_t start, stop, step, slicelength, cur, i;
1321         char* source_buf;
1322         char* result_buf;
1323         PyObject* result;
1324 
1325         if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
1326             return NULL;
1327         }
1328         slicelength = _PySlice_AdjustIndices(PyString_GET_SIZE(self), &start,
1329                                             &stop, step);
1330 
1331         if (slicelength <= 0) {
1332             return PyString_FromStringAndSize("", 0);
1333         }
1334         else if (start == 0 && step == 1 &&
1335                  slicelength == PyString_GET_SIZE(self) &&
1336                  PyString_CheckExact(self)) {
1337             Py_INCREF(self);
1338             return (PyObject *)self;
1339         }
1340         else if (step == 1) {
1341             return PyString_FromStringAndSize(
1342                 PyString_AS_STRING(self) + start,
1343                 slicelength);
1344         }
1345         else {
1346             source_buf = PyString_AsString((PyObject*)self);
1347             result_buf = (char *)PyMem_Malloc(slicelength);
1348             if (result_buf == NULL)
1349                 return PyErr_NoMemory();
1350 
1351             for (cur = start, i = 0; i < slicelength;
1352                  cur += step, i++) {
1353                 result_buf[i] = source_buf[cur];
1354             }
1355 
1356             result = PyString_FromStringAndSize(result_buf,
1357                                                 slicelength);
1358             PyMem_Free(result_buf);
1359             return result;
1360         }
1361     }
1362     else {
1363         PyErr_Format(PyExc_TypeError,
1364                      "string indices must be integers, not %.200s",
1365                      Py_TYPE(item)->tp_name);
1366         return NULL;
1367     }
1368 }
1369 
1370 static Py_ssize_t
string_buffer_getreadbuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1371 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1372 {
1373     if ( index != 0 ) {
1374         PyErr_SetString(PyExc_SystemError,
1375                         "accessing non-existent string segment");
1376         return -1;
1377     }
1378     *ptr = (void *)self->ob_sval;
1379     return Py_SIZE(self);
1380 }
1381 
1382 static Py_ssize_t
string_buffer_getwritebuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1383 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1384 {
1385     PyErr_SetString(PyExc_TypeError,
1386                     "Cannot use string as modifiable buffer");
1387     return -1;
1388 }
1389 
1390 static Py_ssize_t
string_buffer_getsegcount(PyStringObject * self,Py_ssize_t * lenp)1391 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1392 {
1393     if ( lenp )
1394         *lenp = Py_SIZE(self);
1395     return 1;
1396 }
1397 
1398 static Py_ssize_t
string_buffer_getcharbuf(PyStringObject * self,Py_ssize_t index,const char ** ptr)1399 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1400 {
1401     if ( index != 0 ) {
1402         PyErr_SetString(PyExc_SystemError,
1403                         "accessing non-existent string segment");
1404         return -1;
1405     }
1406     *ptr = self->ob_sval;
1407     return Py_SIZE(self);
1408 }
1409 
1410 static int
string_buffer_getbuffer(PyStringObject * self,Py_buffer * view,int flags)1411 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1412 {
1413     return PyBuffer_FillInfo(view, (PyObject*)self,
1414                              (void *)self->ob_sval, Py_SIZE(self),
1415                              1, flags);
1416 }
1417 
1418 static PySequenceMethods string_as_sequence = {
1419     (lenfunc)string_length, /*sq_length*/
1420     (binaryfunc)string_concat, /*sq_concat*/
1421     (ssizeargfunc)string_repeat, /*sq_repeat*/
1422     (ssizeargfunc)string_item, /*sq_item*/
1423     (ssizessizeargfunc)string_slice, /*sq_slice*/
1424     0,                  /*sq_ass_item*/
1425     0,                  /*sq_ass_slice*/
1426     (objobjproc)string_contains /*sq_contains*/
1427 };
1428 
1429 static PyMappingMethods string_as_mapping = {
1430     (lenfunc)string_length,
1431     (binaryfunc)string_subscript,
1432     0,
1433 };
1434 
1435 static PyBufferProcs string_as_buffer = {
1436     (readbufferproc)string_buffer_getreadbuf,
1437     (writebufferproc)string_buffer_getwritebuf,
1438     (segcountproc)string_buffer_getsegcount,
1439     (charbufferproc)string_buffer_getcharbuf,
1440     (getbufferproc)string_buffer_getbuffer,
1441     0, /* XXX */
1442 };
1443 
1444 
1445 
1446 #define LEFTSTRIP 0
1447 #define RIGHTSTRIP 1
1448 #define BOTHSTRIP 2
1449 
1450 /* Arrays indexed by above */
1451 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1452 
1453 #define STRIPNAME(i) (stripformat[i]+3)
1454 
1455 PyDoc_STRVAR(split__doc__,
1456 "S.split([sep [,maxsplit]]) -> list of strings\n\
1457 \n\
1458 Return a list of the words in the string S, using sep as the\n\
1459 delimiter string.  If maxsplit is given, at most maxsplit\n\
1460 splits are done. If sep is not specified or is None, any\n\
1461 whitespace string is a separator and empty strings are removed\n\
1462 from the result.");
1463 
1464 static PyObject *
string_split(PyStringObject * self,PyObject * args)1465 string_split(PyStringObject *self, PyObject *args)
1466 {
1467     Py_ssize_t len = PyString_GET_SIZE(self), n;
1468     Py_ssize_t maxsplit = -1;
1469     const char *s = PyString_AS_STRING(self), *sub;
1470     PyObject *subobj = Py_None;
1471 
1472     if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1473         return NULL;
1474     if (maxsplit < 0)
1475         maxsplit = PY_SSIZE_T_MAX;
1476     if (subobj == Py_None)
1477         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1478     if (PyString_Check(subobj)) {
1479         sub = PyString_AS_STRING(subobj);
1480         n = PyString_GET_SIZE(subobj);
1481     }
1482 #ifdef Py_USING_UNICODE
1483     else if (PyUnicode_Check(subobj))
1484         return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1485 #endif
1486     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1487         return NULL;
1488 
1489     return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1490 }
1491 
1492 PyDoc_STRVAR(partition__doc__,
1493 "S.partition(sep) -> (head, sep, tail)\n\
1494 \n\
1495 Search for the separator sep in S, and return the part before it,\n\
1496 the separator itself, and the part after it.  If the separator is not\n\
1497 found, return S and two empty strings.");
1498 
1499 static PyObject *
string_partition(PyStringObject * self,PyObject * sep_obj)1500 string_partition(PyStringObject *self, PyObject *sep_obj)
1501 {
1502     const char *sep;
1503     Py_ssize_t sep_len;
1504 
1505     if (PyString_Check(sep_obj)) {
1506         sep = PyString_AS_STRING(sep_obj);
1507         sep_len = PyString_GET_SIZE(sep_obj);
1508     }
1509 #ifdef Py_USING_UNICODE
1510     else if (PyUnicode_Check(sep_obj))
1511         return PyUnicode_Partition((PyObject *) self, sep_obj);
1512 #endif
1513     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1514         return NULL;
1515 
1516     return stringlib_partition(
1517         (PyObject*) self,
1518         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1519         sep_obj, sep, sep_len
1520         );
1521 }
1522 
1523 PyDoc_STRVAR(rpartition__doc__,
1524 "S.rpartition(sep) -> (head, sep, tail)\n\
1525 \n\
1526 Search for the separator sep in S, starting at the end of S, and return\n\
1527 the part before it, the separator itself, and the part after it.  If the\n\
1528 separator is not found, return two empty strings and S.");
1529 
1530 static PyObject *
string_rpartition(PyStringObject * self,PyObject * sep_obj)1531 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1532 {
1533     const char *sep;
1534     Py_ssize_t sep_len;
1535 
1536     if (PyString_Check(sep_obj)) {
1537         sep = PyString_AS_STRING(sep_obj);
1538         sep_len = PyString_GET_SIZE(sep_obj);
1539     }
1540 #ifdef Py_USING_UNICODE
1541     else if (PyUnicode_Check(sep_obj))
1542         return PyUnicode_RPartition((PyObject *) self, sep_obj);
1543 #endif
1544     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1545         return NULL;
1546 
1547     return stringlib_rpartition(
1548         (PyObject*) self,
1549         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1550         sep_obj, sep, sep_len
1551         );
1552 }
1553 
1554 PyDoc_STRVAR(rsplit__doc__,
1555 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1556 \n\
1557 Return a list of the words in the string S, using sep as the\n\
1558 delimiter string, starting at the end of the string and working\n\
1559 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1560 done. If sep is not specified or is None, any whitespace string\n\
1561 is a separator.");
1562 
1563 static PyObject *
string_rsplit(PyStringObject * self,PyObject * args)1564 string_rsplit(PyStringObject *self, PyObject *args)
1565 {
1566     Py_ssize_t len = PyString_GET_SIZE(self), n;
1567     Py_ssize_t maxsplit = -1;
1568     const char *s = PyString_AS_STRING(self), *sub;
1569     PyObject *subobj = Py_None;
1570 
1571     if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1572         return NULL;
1573     if (maxsplit < 0)
1574         maxsplit = PY_SSIZE_T_MAX;
1575     if (subobj == Py_None)
1576         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1577     if (PyString_Check(subobj)) {
1578         sub = PyString_AS_STRING(subobj);
1579         n = PyString_GET_SIZE(subobj);
1580     }
1581 #ifdef Py_USING_UNICODE
1582     else if (PyUnicode_Check(subobj))
1583         return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1584 #endif
1585     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1586         return NULL;
1587 
1588     return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1589 }
1590 
1591 
1592 PyDoc_STRVAR(join__doc__,
1593 "S.join(iterable) -> string\n\
1594 \n\
1595 Return a string which is the concatenation of the strings in the\n\
1596 iterable.  The separator between elements is S.");
1597 
1598 static PyObject *
string_join(PyStringObject * self,PyObject * orig)1599 string_join(PyStringObject *self, PyObject *orig)
1600 {
1601     char *sep = PyString_AS_STRING(self);
1602     const Py_ssize_t seplen = PyString_GET_SIZE(self);
1603     PyObject *res = NULL;
1604     char *p;
1605     Py_ssize_t seqlen = 0;
1606     size_t sz = 0;
1607     Py_ssize_t i;
1608     PyObject *seq, *item;
1609 
1610     seq = PySequence_Fast(orig, "can only join an iterable");
1611     if (seq == NULL) {
1612         return NULL;
1613     }
1614 
1615     seqlen = PySequence_Size(seq);
1616     if (seqlen == 0) {
1617         Py_DECREF(seq);
1618         return PyString_FromString("");
1619     }
1620     if (seqlen == 1) {
1621         item = PySequence_Fast_GET_ITEM(seq, 0);
1622         if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1623             Py_INCREF(item);
1624             Py_DECREF(seq);
1625             return item;
1626         }
1627     }
1628 
1629     /* There are at least two things to join, or else we have a subclass
1630      * of the builtin types in the sequence.
1631      * Do a pre-pass to figure out the total amount of space we'll
1632      * need (sz), see whether any argument is absurd, and defer to
1633      * the Unicode join if appropriate.
1634      */
1635     for (i = 0; i < seqlen; i++) {
1636         const size_t old_sz = sz;
1637         item = PySequence_Fast_GET_ITEM(seq, i);
1638         if (!PyString_Check(item)){
1639 #ifdef Py_USING_UNICODE
1640             if (PyUnicode_Check(item)) {
1641                 /* Defer to Unicode join.
1642                  * CAUTION:  There's no guarantee that the
1643                  * original sequence can be iterated over
1644                  * again, so we must pass seq here.
1645                  */
1646                 PyObject *result;
1647                 result = PyUnicode_Join((PyObject *)self, seq);
1648                 Py_DECREF(seq);
1649                 return result;
1650             }
1651 #endif
1652             PyErr_Format(PyExc_TypeError,
1653                          "sequence item %zd: expected string,"
1654                          " %.80s found",
1655                          i, Py_TYPE(item)->tp_name);
1656             Py_DECREF(seq);
1657             return NULL;
1658         }
1659         sz += PyString_GET_SIZE(item);
1660         if (i != 0)
1661             sz += seplen;
1662         if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1663             PyErr_SetString(PyExc_OverflowError,
1664                 "join() result is too long for a Python string");
1665             Py_DECREF(seq);
1666             return NULL;
1667         }
1668     }
1669 
1670     /* Allocate result space. */
1671     res = PyString_FromStringAndSize((char*)NULL, sz);
1672     if (res == NULL) {
1673         Py_DECREF(seq);
1674         return NULL;
1675     }
1676 
1677     /* Catenate everything. */
1678     p = PyString_AS_STRING(res);
1679     for (i = 0; i < seqlen; ++i) {
1680         size_t n;
1681         item = PySequence_Fast_GET_ITEM(seq, i);
1682         n = PyString_GET_SIZE(item);
1683         Py_MEMCPY(p, PyString_AS_STRING(item), n);
1684         p += n;
1685         if (i < seqlen - 1) {
1686             Py_MEMCPY(p, sep, seplen);
1687             p += seplen;
1688         }
1689     }
1690 
1691     Py_DECREF(seq);
1692     return res;
1693 }
1694 
1695 PyObject *
_PyString_Join(PyObject * sep,PyObject * x)1696 _PyString_Join(PyObject *sep, PyObject *x)
1697 {
1698     assert(sep != NULL && PyString_Check(sep));
1699     assert(x != NULL);
1700     return string_join((PyStringObject *)sep, x);
1701 }
1702 
1703 /* helper macro to fixup start/end slice values */
1704 #define ADJUST_INDICES(start, end, len)         \
1705     if (end > len)                          \
1706         end = len;                          \
1707     else if (end < 0) {                     \
1708         end += len;                         \
1709         if (end < 0)                        \
1710         end = 0;                        \
1711     }                                       \
1712     if (start < 0) {                        \
1713         start += len;                       \
1714         if (start < 0)                      \
1715         start = 0;                      \
1716     }
1717 
1718 Py_LOCAL_INLINE(Py_ssize_t)
string_find_internal(PyStringObject * self,PyObject * args,int dir)1719 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1720 {
1721     PyObject *subobj;
1722     const char *sub;
1723     Py_ssize_t sub_len;
1724     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1725 
1726     if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1727                                     args, &subobj, &start, &end))
1728         return -2;
1729 
1730     if (PyString_Check(subobj)) {
1731         sub = PyString_AS_STRING(subobj);
1732         sub_len = PyString_GET_SIZE(subobj);
1733     }
1734 #ifdef Py_USING_UNICODE
1735     else if (PyUnicode_Check(subobj))
1736         return PyUnicode_Find(
1737             (PyObject *)self, subobj, start, end, dir);
1738 #endif
1739     else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1740         /* XXX - the "expected a character buffer object" is pretty
1741            confusing for a non-expert.  remap to something else ? */
1742         return -2;
1743 
1744     if (dir > 0)
1745         return stringlib_find_slice(
1746             PyString_AS_STRING(self), PyString_GET_SIZE(self),
1747             sub, sub_len, start, end);
1748     else
1749         return stringlib_rfind_slice(
1750             PyString_AS_STRING(self), PyString_GET_SIZE(self),
1751             sub, sub_len, start, end);
1752 }
1753 
1754 
1755 PyDoc_STRVAR(find__doc__,
1756 "S.find(sub [,start [,end]]) -> int\n\
1757 \n\
1758 Return the lowest index in S where substring sub is found,\n\
1759 such that sub is contained within S[start:end].  Optional\n\
1760 arguments start and end are interpreted as in slice notation.\n\
1761 \n\
1762 Return -1 on failure.");
1763 
1764 static PyObject *
string_find(PyStringObject * self,PyObject * args)1765 string_find(PyStringObject *self, PyObject *args)
1766 {
1767     Py_ssize_t result = string_find_internal(self, args, +1);
1768     if (result == -2)
1769         return NULL;
1770     return PyInt_FromSsize_t(result);
1771 }
1772 
1773 
1774 PyDoc_STRVAR(index__doc__,
1775 "S.index(sub [,start [,end]]) -> int\n\
1776 \n\
1777 Like S.find() but raise ValueError when the substring is not found.");
1778 
1779 static PyObject *
string_index(PyStringObject * self,PyObject * args)1780 string_index(PyStringObject *self, PyObject *args)
1781 {
1782     Py_ssize_t result = string_find_internal(self, args, +1);
1783     if (result == -2)
1784         return NULL;
1785     if (result == -1) {
1786         PyErr_SetString(PyExc_ValueError,
1787                         "substring not found");
1788         return NULL;
1789     }
1790     return PyInt_FromSsize_t(result);
1791 }
1792 
1793 
1794 PyDoc_STRVAR(rfind__doc__,
1795 "S.rfind(sub [,start [,end]]) -> int\n\
1796 \n\
1797 Return the highest index in S where substring sub is found,\n\
1798 such that sub is contained within S[start:end].  Optional\n\
1799 arguments start and end are interpreted as in slice notation.\n\
1800 \n\
1801 Return -1 on failure.");
1802 
1803 static PyObject *
string_rfind(PyStringObject * self,PyObject * args)1804 string_rfind(PyStringObject *self, PyObject *args)
1805 {
1806     Py_ssize_t result = string_find_internal(self, args, -1);
1807     if (result == -2)
1808         return NULL;
1809     return PyInt_FromSsize_t(result);
1810 }
1811 
1812 
1813 PyDoc_STRVAR(rindex__doc__,
1814 "S.rindex(sub [,start [,end]]) -> int\n\
1815 \n\
1816 Like S.rfind() but raise ValueError when the substring is not found.");
1817 
1818 static PyObject *
string_rindex(PyStringObject * self,PyObject * args)1819 string_rindex(PyStringObject *self, PyObject *args)
1820 {
1821     Py_ssize_t result = string_find_internal(self, args, -1);
1822     if (result == -2)
1823         return NULL;
1824     if (result == -1) {
1825         PyErr_SetString(PyExc_ValueError,
1826                         "substring not found");
1827         return NULL;
1828     }
1829     return PyInt_FromSsize_t(result);
1830 }
1831 
1832 
1833 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject * self,int striptype,PyObject * sepobj)1834 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1835 {
1836     char *s = PyString_AS_STRING(self);
1837     Py_ssize_t len = PyString_GET_SIZE(self);
1838     char *sep = PyString_AS_STRING(sepobj);
1839     Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1840     Py_ssize_t i, j;
1841 
1842     i = 0;
1843     if (striptype != RIGHTSTRIP) {
1844         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1845             i++;
1846         }
1847     }
1848 
1849     j = len;
1850     if (striptype != LEFTSTRIP) {
1851         do {
1852             j--;
1853         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1854         j++;
1855     }
1856 
1857     if (i == 0 && j == len && PyString_CheckExact(self)) {
1858         Py_INCREF(self);
1859         return (PyObject*)self;
1860     }
1861     else
1862         return PyString_FromStringAndSize(s+i, j-i);
1863 }
1864 
1865 
1866 Py_LOCAL_INLINE(PyObject *)
do_strip(PyStringObject * self,int striptype)1867 do_strip(PyStringObject *self, int striptype)
1868 {
1869     char *s = PyString_AS_STRING(self);
1870     Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1871 
1872     i = 0;
1873     if (striptype != RIGHTSTRIP) {
1874         while (i < len && isspace(Py_CHARMASK(s[i]))) {
1875             i++;
1876         }
1877     }
1878 
1879     j = len;
1880     if (striptype != LEFTSTRIP) {
1881         do {
1882             j--;
1883         } while (j >= i && isspace(Py_CHARMASK(s[j])));
1884         j++;
1885     }
1886 
1887     if (i == 0 && j == len && PyString_CheckExact(self)) {
1888         Py_INCREF(self);
1889         return (PyObject*)self;
1890     }
1891     else
1892         return PyString_FromStringAndSize(s+i, j-i);
1893 }
1894 
1895 
1896 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyStringObject * self,int striptype,PyObject * args)1897 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1898 {
1899     PyObject *sep = NULL;
1900 
1901     if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1902         return NULL;
1903 
1904     if (sep != NULL && sep != Py_None) {
1905         if (PyString_Check(sep))
1906             return do_xstrip(self, striptype, sep);
1907 #ifdef Py_USING_UNICODE
1908         else if (PyUnicode_Check(sep)) {
1909             PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1910             PyObject *res;
1911             if (uniself==NULL)
1912                 return NULL;
1913             res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1914                 striptype, sep);
1915             Py_DECREF(uniself);
1916             return res;
1917         }
1918 #endif
1919         PyErr_Format(PyExc_TypeError,
1920 #ifdef Py_USING_UNICODE
1921                      "%s arg must be None, str or unicode",
1922 #else
1923                      "%s arg must be None or str",
1924 #endif
1925                      STRIPNAME(striptype));
1926         return NULL;
1927     }
1928 
1929     return do_strip(self, striptype);
1930 }
1931 
1932 
1933 PyDoc_STRVAR(strip__doc__,
1934 "S.strip([chars]) -> string or unicode\n\
1935 \n\
1936 Return a copy of the string S with leading and trailing\n\
1937 whitespace removed.\n\
1938 If chars is given and not None, remove characters in chars instead.\n\
1939 If chars is unicode, S will be converted to unicode before stripping");
1940 
1941 static PyObject *
string_strip(PyStringObject * self,PyObject * args)1942 string_strip(PyStringObject *self, PyObject *args)
1943 {
1944     if (PyTuple_GET_SIZE(args) == 0)
1945         return do_strip(self, BOTHSTRIP); /* Common case */
1946     else
1947         return do_argstrip(self, BOTHSTRIP, args);
1948 }
1949 
1950 
1951 PyDoc_STRVAR(lstrip__doc__,
1952 "S.lstrip([chars]) -> string or unicode\n\
1953 \n\
1954 Return a copy of the string S with leading whitespace removed.\n\
1955 If chars is given and not None, remove characters in chars instead.\n\
1956 If chars is unicode, S will be converted to unicode before stripping");
1957 
1958 static PyObject *
string_lstrip(PyStringObject * self,PyObject * args)1959 string_lstrip(PyStringObject *self, PyObject *args)
1960 {
1961     if (PyTuple_GET_SIZE(args) == 0)
1962         return do_strip(self, LEFTSTRIP); /* Common case */
1963     else
1964         return do_argstrip(self, LEFTSTRIP, args);
1965 }
1966 
1967 
1968 PyDoc_STRVAR(rstrip__doc__,
1969 "S.rstrip([chars]) -> string or unicode\n\
1970 \n\
1971 Return a copy of the string S with trailing whitespace removed.\n\
1972 If chars is given and not None, remove characters in chars instead.\n\
1973 If chars is unicode, S will be converted to unicode before stripping");
1974 
1975 static PyObject *
string_rstrip(PyStringObject * self,PyObject * args)1976 string_rstrip(PyStringObject *self, PyObject *args)
1977 {
1978     if (PyTuple_GET_SIZE(args) == 0)
1979         return do_strip(self, RIGHTSTRIP); /* Common case */
1980     else
1981         return do_argstrip(self, RIGHTSTRIP, args);
1982 }
1983 
1984 
1985 PyDoc_STRVAR(lower__doc__,
1986 "S.lower() -> string\n\
1987 \n\
1988 Return a copy of the string S converted to lowercase.");
1989 
1990 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1991 #ifndef _tolower
1992 #define _tolower tolower
1993 #endif
1994 
1995 static PyObject *
string_lower(PyStringObject * self)1996 string_lower(PyStringObject *self)
1997 {
1998     char *s;
1999     Py_ssize_t i, n = PyString_GET_SIZE(self);
2000     PyObject *newobj;
2001 
2002     newobj = PyString_FromStringAndSize(NULL, n);
2003     if (!newobj)
2004         return NULL;
2005 
2006     s = PyString_AS_STRING(newobj);
2007 
2008     Py_MEMCPY(s, PyString_AS_STRING(self), n);
2009 
2010     for (i = 0; i < n; i++) {
2011         int c = Py_CHARMASK(s[i]);
2012         if (isupper(c))
2013             s[i] = _tolower(c);
2014     }
2015 
2016     return newobj;
2017 }
2018 
2019 PyDoc_STRVAR(upper__doc__,
2020 "S.upper() -> string\n\
2021 \n\
2022 Return a copy of the string S converted to uppercase.");
2023 
2024 #ifndef _toupper
2025 #define _toupper toupper
2026 #endif
2027 
2028 static PyObject *
string_upper(PyStringObject * self)2029 string_upper(PyStringObject *self)
2030 {
2031     char *s;
2032     Py_ssize_t i, n = PyString_GET_SIZE(self);
2033     PyObject *newobj;
2034 
2035     newobj = PyString_FromStringAndSize(NULL, n);
2036     if (!newobj)
2037         return NULL;
2038 
2039     s = PyString_AS_STRING(newobj);
2040 
2041     Py_MEMCPY(s, PyString_AS_STRING(self), n);
2042 
2043     for (i = 0; i < n; i++) {
2044         int c = Py_CHARMASK(s[i]);
2045         if (islower(c))
2046             s[i] = _toupper(c);
2047     }
2048 
2049     return newobj;
2050 }
2051 
2052 PyDoc_STRVAR(title__doc__,
2053 "S.title() -> string\n\
2054 \n\
2055 Return a titlecased version of S, i.e. words start with uppercase\n\
2056 characters, all remaining cased characters have lowercase.");
2057 
2058 static PyObject*
string_title(PyStringObject * self)2059 string_title(PyStringObject *self)
2060 {
2061     char *s = PyString_AS_STRING(self), *s_new;
2062     Py_ssize_t i, n = PyString_GET_SIZE(self);
2063     int previous_is_cased = 0;
2064     PyObject *newobj;
2065 
2066     newobj = PyString_FromStringAndSize(NULL, n);
2067     if (newobj == NULL)
2068         return NULL;
2069     s_new = PyString_AsString(newobj);
2070     for (i = 0; i < n; i++) {
2071         int c = Py_CHARMASK(*s++);
2072         if (islower(c)) {
2073             if (!previous_is_cased)
2074                 c = toupper(c);
2075             previous_is_cased = 1;
2076         } else if (isupper(c)) {
2077             if (previous_is_cased)
2078                 c = tolower(c);
2079             previous_is_cased = 1;
2080         } else
2081             previous_is_cased = 0;
2082         *s_new++ = c;
2083     }
2084     return newobj;
2085 }
2086 
2087 PyDoc_STRVAR(capitalize__doc__,
2088 "S.capitalize() -> string\n\
2089 \n\
2090 Return a copy of the string S with only its first character\n\
2091 capitalized.");
2092 
2093 static PyObject *
string_capitalize(PyStringObject * self)2094 string_capitalize(PyStringObject *self)
2095 {
2096     char *s = PyString_AS_STRING(self), *s_new;
2097     Py_ssize_t i, n = PyString_GET_SIZE(self);
2098     PyObject *newobj;
2099 
2100     newobj = PyString_FromStringAndSize(NULL, n);
2101     if (newobj == NULL)
2102         return NULL;
2103     s_new = PyString_AsString(newobj);
2104     if (0 < n) {
2105         int c = Py_CHARMASK(*s++);
2106         if (islower(c))
2107             *s_new = toupper(c);
2108         else
2109             *s_new = c;
2110         s_new++;
2111     }
2112     for (i = 1; i < n; i++) {
2113         int c = Py_CHARMASK(*s++);
2114         if (isupper(c))
2115             *s_new = tolower(c);
2116         else
2117             *s_new = c;
2118         s_new++;
2119     }
2120     return newobj;
2121 }
2122 
2123 
2124 PyDoc_STRVAR(count__doc__,
2125 "S.count(sub[, start[, end]]) -> int\n\
2126 \n\
2127 Return the number of non-overlapping occurrences of substring sub in\n\
2128 string S[start:end].  Optional arguments start and end are interpreted\n\
2129 as in slice notation.");
2130 
2131 static PyObject *
string_count(PyStringObject * self,PyObject * args)2132 string_count(PyStringObject *self, PyObject *args)
2133 {
2134     PyObject *sub_obj;
2135     const char *str = PyString_AS_STRING(self), *sub;
2136     Py_ssize_t sub_len;
2137     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2138 
2139     if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2140         return NULL;
2141 
2142     if (PyString_Check(sub_obj)) {
2143         sub = PyString_AS_STRING(sub_obj);
2144         sub_len = PyString_GET_SIZE(sub_obj);
2145     }
2146 #ifdef Py_USING_UNICODE
2147     else if (PyUnicode_Check(sub_obj)) {
2148         Py_ssize_t count;
2149         count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2150         if (count == -1)
2151             return NULL;
2152         else
2153             return PyInt_FromSsize_t(count);
2154     }
2155 #endif
2156     else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2157         return NULL;
2158 
2159     ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2160 
2161     return PyInt_FromSsize_t(
2162         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2163         );
2164 }
2165 
2166 PyDoc_STRVAR(swapcase__doc__,
2167 "S.swapcase() -> string\n\
2168 \n\
2169 Return a copy of the string S with uppercase characters\n\
2170 converted to lowercase and vice versa.");
2171 
2172 static PyObject *
string_swapcase(PyStringObject * self)2173 string_swapcase(PyStringObject *self)
2174 {
2175     char *s = PyString_AS_STRING(self), *s_new;
2176     Py_ssize_t i, n = PyString_GET_SIZE(self);
2177     PyObject *newobj;
2178 
2179     newobj = PyString_FromStringAndSize(NULL, n);
2180     if (newobj == NULL)
2181         return NULL;
2182     s_new = PyString_AsString(newobj);
2183     for (i = 0; i < n; i++) {
2184         int c = Py_CHARMASK(*s++);
2185         if (islower(c)) {
2186             *s_new = toupper(c);
2187         }
2188         else if (isupper(c)) {
2189             *s_new = tolower(c);
2190         }
2191         else
2192             *s_new = c;
2193         s_new++;
2194     }
2195     return newobj;
2196 }
2197 
2198 
2199 PyDoc_STRVAR(translate__doc__,
2200 "S.translate(table [,deletechars]) -> string\n\
2201 \n\
2202 Return a copy of the string S, where all characters occurring\n\
2203 in the optional argument deletechars are removed, and the\n\
2204 remaining characters have been mapped through the given\n\
2205 translation table, which must be a string of length 256 or None.\n\
2206 If the table argument is None, no translation is applied and\n\
2207 the operation simply removes the characters in deletechars.");
2208 
2209 static PyObject *
string_translate(PyStringObject * self,PyObject * args)2210 string_translate(PyStringObject *self, PyObject *args)
2211 {
2212     register char *input, *output;
2213     const char *table;
2214     register Py_ssize_t i, c, changed = 0;
2215     PyObject *input_obj = (PyObject*)self;
2216     const char *output_start, *del_table=NULL;
2217     Py_ssize_t inlen, tablen, dellen = 0;
2218     PyObject *result;
2219     int trans_table[256];
2220     PyObject *tableobj, *delobj = NULL;
2221 
2222     if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2223                           &tableobj, &delobj))
2224         return NULL;
2225 
2226     if (PyString_Check(tableobj)) {
2227         table = PyString_AS_STRING(tableobj);
2228         tablen = PyString_GET_SIZE(tableobj);
2229     }
2230     else if (tableobj == Py_None) {
2231         table = NULL;
2232         tablen = 256;
2233     }
2234 #ifdef Py_USING_UNICODE
2235     else if (PyUnicode_Check(tableobj)) {
2236         /* Unicode .translate() does not support the deletechars
2237            parameter; instead a mapping to None will cause characters
2238            to be deleted. */
2239         if (delobj != NULL) {
2240             PyErr_SetString(PyExc_TypeError,
2241             "deletions are implemented differently for unicode");
2242             return NULL;
2243         }
2244         return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2245     }
2246 #endif
2247     else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2248         return NULL;
2249 
2250     if (tablen != 256) {
2251         PyErr_SetString(PyExc_ValueError,
2252           "translation table must be 256 characters long");
2253         return NULL;
2254     }
2255 
2256     if (delobj != NULL) {
2257         if (PyString_Check(delobj)) {
2258             del_table = PyString_AS_STRING(delobj);
2259             dellen = PyString_GET_SIZE(delobj);
2260         }
2261 #ifdef Py_USING_UNICODE
2262         else if (PyUnicode_Check(delobj)) {
2263             PyErr_SetString(PyExc_TypeError,
2264             "deletions are implemented differently for unicode");
2265             return NULL;
2266         }
2267 #endif
2268         else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2269             return NULL;
2270     }
2271     else {
2272         del_table = NULL;
2273         dellen = 0;
2274     }
2275 
2276     inlen = PyString_GET_SIZE(input_obj);
2277     result = PyString_FromStringAndSize((char *)NULL, inlen);
2278     if (result == NULL)
2279         return NULL;
2280     output_start = output = PyString_AsString(result);
2281     input = PyString_AS_STRING(input_obj);
2282 
2283     if (dellen == 0 && table != NULL) {
2284         /* If no deletions are required, use faster code */
2285         for (i = inlen; --i >= 0; ) {
2286             c = Py_CHARMASK(*input++);
2287             if (Py_CHARMASK((*output++ = table[c])) != c)
2288                 changed = 1;
2289         }
2290         if (changed || !PyString_CheckExact(input_obj))
2291             return result;
2292         Py_DECREF(result);
2293         Py_INCREF(input_obj);
2294         return input_obj;
2295     }
2296 
2297     if (table == NULL) {
2298         for (i = 0; i < 256; i++)
2299             trans_table[i] = Py_CHARMASK(i);
2300     } else {
2301         for (i = 0; i < 256; i++)
2302             trans_table[i] = Py_CHARMASK(table[i]);
2303     }
2304 
2305     for (i = 0; i < dellen; i++)
2306         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2307 
2308     for (i = inlen; --i >= 0; ) {
2309         c = Py_CHARMASK(*input++);
2310         if (trans_table[c] != -1)
2311             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2312                 continue;
2313         changed = 1;
2314     }
2315     if (!changed && PyString_CheckExact(input_obj)) {
2316         Py_DECREF(result);
2317         Py_INCREF(input_obj);
2318         return input_obj;
2319     }
2320     /* Fix the size of the resulting string */
2321     if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2322         return NULL;
2323     return result;
2324 }
2325 
2326 
2327 /* find and count characters and substrings */
2328 
2329 #define findchar(target, target_len, c)                         \
2330   ((char *)memchr((const void *)(target), c, target_len))
2331 
2332 /* String ops must return a string.  */
2333 /* If the object is subclass of string, create a copy */
2334 Py_LOCAL(PyStringObject *)
return_self(PyStringObject * self)2335 return_self(PyStringObject *self)
2336 {
2337     if (PyString_CheckExact(self)) {
2338         Py_INCREF(self);
2339         return self;
2340     }
2341     return (PyStringObject *)PyString_FromStringAndSize(
2342         PyString_AS_STRING(self),
2343         PyString_GET_SIZE(self));
2344 }
2345 
2346 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)2347 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
2348 {
2349     Py_ssize_t count=0;
2350     const char *start=target;
2351     const char *end=target+target_len;
2352 
2353     while ( (start=findchar(start, end-start, c)) != NULL ) {
2354         count++;
2355         if (count >= maxcount)
2356             break;
2357         start += 1;
2358     }
2359     return count;
2360 }
2361 
2362 
2363 /* Algorithms for different cases of string replacement */
2364 
2365 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2366 Py_LOCAL(PyStringObject *)
replace_interleave(PyStringObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2367 replace_interleave(PyStringObject *self,
2368                    const char *to_s, Py_ssize_t to_len,
2369                    Py_ssize_t maxcount)
2370 {
2371     char *self_s, *result_s;
2372     Py_ssize_t self_len, result_len;
2373     Py_ssize_t count, i;
2374     PyStringObject *result;
2375 
2376     self_len = PyString_GET_SIZE(self);
2377 
2378     /* 1 at the end plus 1 after every character;
2379        count = min(maxcount, self_len + 1) */
2380     if (maxcount <= self_len) {
2381         count = maxcount;
2382     }
2383     else {
2384         /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2385         count = self_len + 1;
2386     }
2387 
2388     /* Check for overflow */
2389     /*   result_len = count * to_len + self_len; */
2390     assert(count > 0);
2391     if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
2392         PyErr_SetString(PyExc_OverflowError,
2393                         "replace string is too long");
2394         return NULL;
2395     }
2396     result_len = count * to_len + self_len;
2397     if (! (result = (PyStringObject *)
2398                      PyString_FromStringAndSize(NULL, result_len)) )
2399         return NULL;
2400 
2401     self_s = PyString_AS_STRING(self);
2402     result_s = PyString_AS_STRING(result);
2403 
2404     /* TODO: special case single character, which doesn't need memcpy */
2405 
2406     /* Lay the first one down (guaranteed this will occur) */
2407     Py_MEMCPY(result_s, to_s, to_len);
2408     result_s += to_len;
2409     count -= 1;
2410 
2411     for (i=0; i<count; i++) {
2412         *result_s++ = *self_s++;
2413         Py_MEMCPY(result_s, to_s, to_len);
2414         result_s += to_len;
2415     }
2416 
2417     /* Copy the rest of the original string */
2418     Py_MEMCPY(result_s, self_s, self_len-i);
2419 
2420     return result;
2421 }
2422 
2423 /* Special case for deleting a single character */
2424 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2425 Py_LOCAL(PyStringObject *)
replace_delete_single_character(PyStringObject * self,char from_c,Py_ssize_t maxcount)2426 replace_delete_single_character(PyStringObject *self,
2427                                 char from_c, Py_ssize_t maxcount)
2428 {
2429     char *self_s, *result_s;
2430     char *start, *next, *end;
2431     Py_ssize_t self_len, result_len;
2432     Py_ssize_t count;
2433     PyStringObject *result;
2434 
2435     self_len = PyString_GET_SIZE(self);
2436     self_s = PyString_AS_STRING(self);
2437 
2438     count = countchar(self_s, self_len, from_c, maxcount);
2439     if (count == 0) {
2440         return return_self(self);
2441     }
2442 
2443     result_len = self_len - count;  /* from_len == 1 */
2444     assert(result_len>=0);
2445 
2446     if ( (result = (PyStringObject *)
2447                     PyString_FromStringAndSize(NULL, result_len)) == NULL)
2448         return NULL;
2449     result_s = PyString_AS_STRING(result);
2450 
2451     start = self_s;
2452     end = self_s + self_len;
2453     while (count-- > 0) {
2454         next = findchar(start, end-start, from_c);
2455         if (next == NULL)
2456             break;
2457         Py_MEMCPY(result_s, start, next-start);
2458         result_s += (next-start);
2459         start = next+1;
2460     }
2461     Py_MEMCPY(result_s, start, end-start);
2462 
2463     return result;
2464 }
2465 
2466 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2467 
2468 Py_LOCAL(PyStringObject *)
replace_delete_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)2469 replace_delete_substring(PyStringObject *self,
2470                          const char *from_s, Py_ssize_t from_len,
2471                          Py_ssize_t maxcount) {
2472     char *self_s, *result_s;
2473     char *start, *next, *end;
2474     Py_ssize_t self_len, result_len;
2475     Py_ssize_t count, offset;
2476     PyStringObject *result;
2477 
2478     self_len = PyString_GET_SIZE(self);
2479     self_s = PyString_AS_STRING(self);
2480 
2481     count = stringlib_count(self_s, self_len,
2482                             from_s, from_len,
2483                             maxcount);
2484 
2485     if (count == 0) {
2486         /* no matches */
2487         return return_self(self);
2488     }
2489 
2490     result_len = self_len - (count * from_len);
2491     assert (result_len>=0);
2492 
2493     if ( (result = (PyStringObject *)
2494           PyString_FromStringAndSize(NULL, result_len)) == NULL )
2495         return NULL;
2496 
2497     result_s = PyString_AS_STRING(result);
2498 
2499     start = self_s;
2500     end = self_s + self_len;
2501     while (count-- > 0) {
2502         offset = stringlib_find(start, end-start,
2503                                 from_s, from_len,
2504                                 0);
2505         if (offset == -1)
2506             break;
2507         next = start + offset;
2508 
2509         Py_MEMCPY(result_s, start, next-start);
2510 
2511         result_s += (next-start);
2512         start = next+from_len;
2513     }
2514     Py_MEMCPY(result_s, start, end-start);
2515     return result;
2516 }
2517 
2518 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2519 Py_LOCAL(PyStringObject *)
replace_single_character_in_place(PyStringObject * self,char from_c,char to_c,Py_ssize_t maxcount)2520 replace_single_character_in_place(PyStringObject *self,
2521                                   char from_c, char to_c,
2522                                   Py_ssize_t maxcount)
2523 {
2524     char *self_s, *result_s, *start, *end, *next;
2525     Py_ssize_t self_len;
2526     PyStringObject *result;
2527 
2528     /* The result string will be the same size */
2529     self_s = PyString_AS_STRING(self);
2530     self_len = PyString_GET_SIZE(self);
2531 
2532     next = findchar(self_s, self_len, from_c);
2533 
2534     if (next == NULL) {
2535         /* No matches; return the original string */
2536         return return_self(self);
2537     }
2538 
2539     /* Need to make a new string */
2540     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2541     if (result == NULL)
2542         return NULL;
2543     result_s = PyString_AS_STRING(result);
2544     Py_MEMCPY(result_s, self_s, self_len);
2545 
2546     /* change everything in-place, starting with this one */
2547     start =  result_s + (next-self_s);
2548     *start = to_c;
2549     start++;
2550     end = result_s + self_len;
2551 
2552     while (--maxcount > 0) {
2553         next = findchar(start, end-start, from_c);
2554         if (next == NULL)
2555             break;
2556         *next = to_c;
2557         start = next+1;
2558     }
2559 
2560     return result;
2561 }
2562 
2563 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2564 Py_LOCAL(PyStringObject *)
replace_substring_in_place(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2565 replace_substring_in_place(PyStringObject *self,
2566                            const char *from_s, Py_ssize_t from_len,
2567                            const char *to_s, Py_ssize_t to_len,
2568                            Py_ssize_t maxcount)
2569 {
2570     char *result_s, *start, *end;
2571     char *self_s;
2572     Py_ssize_t self_len, offset;
2573     PyStringObject *result;
2574 
2575     /* The result string will be the same size */
2576 
2577     self_s = PyString_AS_STRING(self);
2578     self_len = PyString_GET_SIZE(self);
2579 
2580     offset = stringlib_find(self_s, self_len,
2581                             from_s, from_len,
2582                             0);
2583     if (offset == -1) {
2584         /* No matches; return the original string */
2585         return return_self(self);
2586     }
2587 
2588     /* Need to make a new string */
2589     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2590     if (result == NULL)
2591         return NULL;
2592     result_s = PyString_AS_STRING(result);
2593     Py_MEMCPY(result_s, self_s, self_len);
2594 
2595     /* change everything in-place, starting with this one */
2596     start =  result_s + offset;
2597     Py_MEMCPY(start, to_s, from_len);
2598     start += from_len;
2599     end = result_s + self_len;
2600 
2601     while ( --maxcount > 0) {
2602         offset = stringlib_find(start, end-start,
2603                                 from_s, from_len,
2604                                 0);
2605         if (offset==-1)
2606             break;
2607         Py_MEMCPY(start+offset, to_s, from_len);
2608         start += offset+from_len;
2609     }
2610 
2611     return result;
2612 }
2613 
2614 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2615 Py_LOCAL(PyStringObject *)
replace_single_character(PyStringObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2616 replace_single_character(PyStringObject *self,
2617                          char from_c,
2618                          const char *to_s, Py_ssize_t to_len,
2619                          Py_ssize_t maxcount)
2620 {
2621     char *self_s, *result_s;
2622     char *start, *next, *end;
2623     Py_ssize_t self_len, result_len;
2624     Py_ssize_t count;
2625     PyStringObject *result;
2626 
2627     self_s = PyString_AS_STRING(self);
2628     self_len = PyString_GET_SIZE(self);
2629 
2630     count = countchar(self_s, self_len, from_c, maxcount);
2631     if (count == 0) {
2632         /* no matches, return unchanged */
2633         return return_self(self);
2634     }
2635 
2636     /* use the difference between current and new, hence the "-1" */
2637     /*   result_len = self_len + count * (to_len-1)  */
2638     assert(count > 0);
2639     if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
2640         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2641         return NULL;
2642     }
2643     result_len = self_len + count * (to_len - 1);
2644 
2645     if ( (result = (PyStringObject *)
2646           PyString_FromStringAndSize(NULL, result_len)) == NULL)
2647         return NULL;
2648     result_s = PyString_AS_STRING(result);
2649 
2650     start = self_s;
2651     end = self_s + self_len;
2652     while (count-- > 0) {
2653         next = findchar(start, end-start, from_c);
2654         if (next == NULL)
2655             break;
2656 
2657         if (next == start) {
2658             /* replace with the 'to' */
2659             Py_MEMCPY(result_s, to_s, to_len);
2660             result_s += to_len;
2661             start += 1;
2662         } else {
2663             /* copy the unchanged old then the 'to' */
2664             Py_MEMCPY(result_s, start, next-start);
2665             result_s += (next-start);
2666             Py_MEMCPY(result_s, to_s, to_len);
2667             result_s += to_len;
2668             start = next+1;
2669         }
2670     }
2671     /* Copy the remainder of the remaining string */
2672     Py_MEMCPY(result_s, start, end-start);
2673 
2674     return result;
2675 }
2676 
2677 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2678 Py_LOCAL(PyStringObject *)
replace_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2679 replace_substring(PyStringObject *self,
2680                   const char *from_s, Py_ssize_t from_len,
2681                   const char *to_s, Py_ssize_t to_len,
2682                   Py_ssize_t maxcount) {
2683     char *self_s, *result_s;
2684     char *start, *next, *end;
2685     Py_ssize_t self_len, result_len;
2686     Py_ssize_t count, offset;
2687     PyStringObject *result;
2688 
2689     self_s = PyString_AS_STRING(self);
2690     self_len = PyString_GET_SIZE(self);
2691 
2692     count = stringlib_count(self_s, self_len,
2693                             from_s, from_len,
2694                             maxcount);
2695 
2696     if (count == 0) {
2697         /* no matches, return unchanged */
2698         return return_self(self);
2699     }
2700 
2701     /* Check for overflow */
2702     /*    result_len = self_len + count * (to_len-from_len) */
2703     assert(count > 0);
2704     if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
2705         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2706         return NULL;
2707     }
2708     result_len = self_len + count * (to_len - from_len);
2709 
2710     if ( (result = (PyStringObject *)
2711           PyString_FromStringAndSize(NULL, result_len)) == NULL)
2712         return NULL;
2713     result_s = PyString_AS_STRING(result);
2714 
2715     start = self_s;
2716     end = self_s + self_len;
2717     while (count-- > 0) {
2718         offset = stringlib_find(start, end-start,
2719                                 from_s, from_len,
2720                                 0);
2721         if (offset == -1)
2722             break;
2723         next = start+offset;
2724         if (next == start) {
2725             /* replace with the 'to' */
2726             Py_MEMCPY(result_s, to_s, to_len);
2727             result_s += to_len;
2728             start += from_len;
2729         } else {
2730             /* copy the unchanged old then the 'to' */
2731             Py_MEMCPY(result_s, start, next-start);
2732             result_s += (next-start);
2733             Py_MEMCPY(result_s, to_s, to_len);
2734             result_s += to_len;
2735             start = next+from_len;
2736         }
2737     }
2738     /* Copy the remainder of the remaining string */
2739     Py_MEMCPY(result_s, start, end-start);
2740 
2741     return result;
2742 }
2743 
2744 
2745 Py_LOCAL(PyStringObject *)
replace(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2746 replace(PyStringObject *self,
2747     const char *from_s, Py_ssize_t from_len,
2748     const char *to_s, Py_ssize_t to_len,
2749     Py_ssize_t maxcount)
2750 {
2751     if (maxcount < 0) {
2752         maxcount = PY_SSIZE_T_MAX;
2753     } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2754         /* nothing to do; return the original string */
2755         return return_self(self);
2756     }
2757 
2758     if (maxcount == 0 ||
2759         (from_len == 0 && to_len == 0)) {
2760         /* nothing to do; return the original string */
2761         return return_self(self);
2762     }
2763 
2764     /* Handle zero-length special cases */
2765 
2766     if (from_len == 0) {
2767         /* insert the 'to' string everywhere.   */
2768         /*    >>> "Python".replace("", ".")     */
2769         /*    '.P.y.t.h.o.n.'                   */
2770         return replace_interleave(self, to_s, to_len, maxcount);
2771     }
2772 
2773     /* Except for "".replace("", "A") == "A" there is no way beyond this */
2774     /* point for an empty self string to generate a non-empty string */
2775     /* Special case so the remaining code always gets a non-empty string */
2776     if (PyString_GET_SIZE(self) == 0) {
2777         return return_self(self);
2778     }
2779 
2780     if (to_len == 0) {
2781         /* delete all occurrences of 'from' string */
2782         if (from_len == 1) {
2783             return replace_delete_single_character(
2784                 self, from_s[0], maxcount);
2785         } else {
2786             return replace_delete_substring(self, from_s, from_len, maxcount);
2787         }
2788     }
2789 
2790     /* Handle special case where both strings have the same length */
2791 
2792     if (from_len == to_len) {
2793         if (from_len == 1) {
2794             return replace_single_character_in_place(
2795                 self,
2796                 from_s[0],
2797                 to_s[0],
2798                 maxcount);
2799         } else {
2800             return replace_substring_in_place(
2801                 self, from_s, from_len, to_s, to_len, maxcount);
2802         }
2803     }
2804 
2805     /* Otherwise use the more generic algorithms */
2806     if (from_len == 1) {
2807         return replace_single_character(self, from_s[0],
2808                                         to_s, to_len, maxcount);
2809     } else {
2810         /* len('from')>=2, len('to')>=1 */
2811         return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2812     }
2813 }
2814 
2815 PyDoc_STRVAR(replace__doc__,
2816 "S.replace(old, new[, count]) -> string\n\
2817 \n\
2818 Return a copy of string S with all occurrences of substring\n\
2819 old replaced by new.  If the optional argument count is\n\
2820 given, only the first count occurrences are replaced.");
2821 
2822 static PyObject *
string_replace(PyStringObject * self,PyObject * args)2823 string_replace(PyStringObject *self, PyObject *args)
2824 {
2825     Py_ssize_t count = -1;
2826     PyObject *from, *to;
2827     const char *from_s, *to_s;
2828     Py_ssize_t from_len, to_len;
2829 
2830     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2831         return NULL;
2832 
2833     if (PyString_Check(from)) {
2834         from_s = PyString_AS_STRING(from);
2835         from_len = PyString_GET_SIZE(from);
2836     }
2837 #ifdef Py_USING_UNICODE
2838     if (PyUnicode_Check(from))
2839         return PyUnicode_Replace((PyObject *)self,
2840                                  from, to, count);
2841 #endif
2842     else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2843         return NULL;
2844 
2845     if (PyString_Check(to)) {
2846         to_s = PyString_AS_STRING(to);
2847         to_len = PyString_GET_SIZE(to);
2848     }
2849 #ifdef Py_USING_UNICODE
2850     else if (PyUnicode_Check(to))
2851         return PyUnicode_Replace((PyObject *)self,
2852                                  from, to, count);
2853 #endif
2854     else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2855         return NULL;
2856 
2857     return (PyObject *)replace((PyStringObject *) self,
2858                                from_s, from_len,
2859                                to_s, to_len, count);
2860 }
2861 
2862 /** End DALKE **/
2863 
2864 /* Matches the end (direction >= 0) or start (direction < 0) of self
2865  * against substr, using the start and end arguments. Returns
2866  * -1 on error, 0 if not found and 1 if found.
2867  */
2868 Py_LOCAL(int)
_string_tailmatch(PyStringObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)2869 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2870                   Py_ssize_t end, int direction)
2871 {
2872     Py_ssize_t len = PyString_GET_SIZE(self);
2873     Py_ssize_t slen;
2874     const char* sub;
2875     const char* str;
2876 
2877     if (PyString_Check(substr)) {
2878         sub = PyString_AS_STRING(substr);
2879         slen = PyString_GET_SIZE(substr);
2880     }
2881 #ifdef Py_USING_UNICODE
2882     else if (PyUnicode_Check(substr))
2883         return PyUnicode_Tailmatch((PyObject *)self,
2884                                    substr, start, end, direction);
2885 #endif
2886     else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2887         return -1;
2888     str = PyString_AS_STRING(self);
2889 
2890     ADJUST_INDICES(start, end, len);
2891 
2892     if (direction < 0) {
2893         /* startswith */
2894         if (start+slen > len)
2895             return 0;
2896     } else {
2897         /* endswith */
2898         if (end-start < slen || start > len)
2899             return 0;
2900 
2901         if (end-slen > start)
2902             start = end - slen;
2903     }
2904     if (end-start >= slen)
2905         return ! memcmp(str+start, sub, slen);
2906     return 0;
2907 }
2908 
2909 
2910 PyDoc_STRVAR(startswith__doc__,
2911 "S.startswith(prefix[, start[, end]]) -> bool\n\
2912 \n\
2913 Return True if S starts with the specified prefix, False otherwise.\n\
2914 With optional start, test S beginning at that position.\n\
2915 With optional end, stop comparing S at that position.\n\
2916 prefix can also be a tuple of strings to try.");
2917 
2918 static PyObject *
string_startswith(PyStringObject * self,PyObject * args)2919 string_startswith(PyStringObject *self, PyObject *args)
2920 {
2921     Py_ssize_t start = 0;
2922     Py_ssize_t end = PY_SSIZE_T_MAX;
2923     PyObject *subobj;
2924     int result;
2925 
2926     if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2927         return NULL;
2928     if (PyTuple_Check(subobj)) {
2929         Py_ssize_t i;
2930         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2931             result = _string_tailmatch(self,
2932                             PyTuple_GET_ITEM(subobj, i),
2933                             start, end, -1);
2934             if (result == -1)
2935                 return NULL;
2936             else if (result) {
2937                 Py_RETURN_TRUE;
2938             }
2939         }
2940         Py_RETURN_FALSE;
2941     }
2942     result = _string_tailmatch(self, subobj, start, end, -1);
2943     if (result == -1) {
2944         if (PyErr_ExceptionMatches(PyExc_TypeError))
2945             PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2946                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2947         return NULL;
2948     }
2949     else
2950         return PyBool_FromLong(result);
2951 }
2952 
2953 
2954 PyDoc_STRVAR(endswith__doc__,
2955 "S.endswith(suffix[, start[, end]]) -> bool\n\
2956 \n\
2957 Return True if S ends with the specified suffix, False otherwise.\n\
2958 With optional start, test S beginning at that position.\n\
2959 With optional end, stop comparing S at that position.\n\
2960 suffix can also be a tuple of strings to try.");
2961 
2962 static PyObject *
string_endswith(PyStringObject * self,PyObject * args)2963 string_endswith(PyStringObject *self, PyObject *args)
2964 {
2965     Py_ssize_t start = 0;
2966     Py_ssize_t end = PY_SSIZE_T_MAX;
2967     PyObject *subobj;
2968     int result;
2969 
2970     if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2971         return NULL;
2972     if (PyTuple_Check(subobj)) {
2973         Py_ssize_t i;
2974         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2975             result = _string_tailmatch(self,
2976                             PyTuple_GET_ITEM(subobj, i),
2977                             start, end, +1);
2978             if (result == -1)
2979                 return NULL;
2980             else if (result) {
2981                 Py_RETURN_TRUE;
2982             }
2983         }
2984         Py_RETURN_FALSE;
2985     }
2986     result = _string_tailmatch(self, subobj, start, end, +1);
2987     if (result == -1) {
2988         if (PyErr_ExceptionMatches(PyExc_TypeError))
2989             PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2990                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2991         return NULL;
2992     }
2993     else
2994         return PyBool_FromLong(result);
2995 }
2996 
2997 
2998 PyDoc_STRVAR(encode__doc__,
2999 "S.encode([encoding[,errors]]) -> object\n\
3000 \n\
3001 Encodes S using the codec registered for encoding. encoding defaults\n\
3002 to the default encoding. errors may be given to set a different error\n\
3003 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3004 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3005 'xmlcharrefreplace' as well as any other name registered with\n\
3006 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3007 
3008 static PyObject *
string_encode(PyStringObject * self,PyObject * args,PyObject * kwargs)3009 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3010 {
3011     static char *kwlist[] = {"encoding", "errors", 0};
3012     char *encoding = NULL;
3013     char *errors = NULL;
3014     PyObject *v;
3015 
3016     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3017                                      kwlist, &encoding, &errors))
3018         return NULL;
3019     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3020     if (v == NULL)
3021         goto onError;
3022     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3023         PyErr_Format(PyExc_TypeError,
3024                      "encoder did not return a string/unicode object "
3025                      "(type=%.400s)",
3026                      Py_TYPE(v)->tp_name);
3027         Py_DECREF(v);
3028         return NULL;
3029     }
3030     return v;
3031 
3032  onError:
3033     return NULL;
3034 }
3035 
3036 
3037 PyDoc_STRVAR(decode__doc__,
3038 "S.decode([encoding[,errors]]) -> object\n\
3039 \n\
3040 Decodes S using the codec registered for encoding. encoding defaults\n\
3041 to the default encoding. errors may be given to set a different error\n\
3042 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3043 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3044 as well as any other name registered with codecs.register_error that is\n\
3045 able to handle UnicodeDecodeErrors.");
3046 
3047 static PyObject *
string_decode(PyStringObject * self,PyObject * args,PyObject * kwargs)3048 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3049 {
3050     static char *kwlist[] = {"encoding", "errors", 0};
3051     char *encoding = NULL;
3052     char *errors = NULL;
3053     PyObject *v;
3054 
3055     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3056                                      kwlist, &encoding, &errors))
3057         return NULL;
3058     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3059     if (v == NULL)
3060         goto onError;
3061     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3062         PyErr_Format(PyExc_TypeError,
3063                      "decoder did not return a string/unicode object "
3064                      "(type=%.400s)",
3065                      Py_TYPE(v)->tp_name);
3066         Py_DECREF(v);
3067         return NULL;
3068     }
3069     return v;
3070 
3071  onError:
3072     return NULL;
3073 }
3074 
3075 
3076 PyDoc_STRVAR(expandtabs__doc__,
3077 "S.expandtabs([tabsize]) -> string\n\
3078 \n\
3079 Return a copy of S where all tab characters are expanded using spaces.\n\
3080 If tabsize is not given, a tab size of 8 characters is assumed.");
3081 
3082 static PyObject*
string_expandtabs(PyStringObject * self,PyObject * args)3083 string_expandtabs(PyStringObject *self, PyObject *args)
3084 {
3085     const char *e, *p, *qe;
3086     char *q;
3087     Py_ssize_t i, j, incr;
3088     PyObject *u;
3089     int tabsize = 8;
3090 
3091     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3092         return NULL;
3093 
3094     /* First pass: determine size of output string */
3095     i = 0; /* chars up to and including most recent \n or \r */
3096     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3097     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3098     for (p = PyString_AS_STRING(self); p < e; p++) {
3099         if (*p == '\t') {
3100             if (tabsize > 0) {
3101                 incr = tabsize - (j % tabsize);
3102                 if (j > PY_SSIZE_T_MAX - incr)
3103                     goto overflow1;
3104                 j += incr;
3105             }
3106         }
3107         else {
3108             if (j > PY_SSIZE_T_MAX - 1)
3109                 goto overflow1;
3110             j++;
3111             if (*p == '\n' || *p == '\r') {
3112                 if (i > PY_SSIZE_T_MAX - j)
3113                     goto overflow1;
3114                 i += j;
3115                 j = 0;
3116             }
3117         }
3118     }
3119 
3120     if (i > PY_SSIZE_T_MAX - j)
3121         goto overflow1;
3122 
3123     /* Second pass: create output string and fill it */
3124     u = PyString_FromStringAndSize(NULL, i + j);
3125     if (!u)
3126         return NULL;
3127 
3128     j = 0; /* same as in first pass */
3129     q = PyString_AS_STRING(u); /* next output char */
3130     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3131 
3132     for (p = PyString_AS_STRING(self); p < e; p++) {
3133         if (*p == '\t') {
3134             if (tabsize > 0) {
3135                 i = tabsize - (j % tabsize);
3136                 j += i;
3137                 while (i--) {
3138                     if (q >= qe)
3139                         goto overflow2;
3140                     *q++ = ' ';
3141                 }
3142             }
3143         }
3144         else {
3145             if (q >= qe)
3146                 goto overflow2;
3147             *q++ = *p;
3148             j++;
3149             if (*p == '\n' || *p == '\r')
3150                 j = 0;
3151         }
3152     }
3153 
3154     return u;
3155 
3156   overflow2:
3157     Py_DECREF(u);
3158   overflow1:
3159     PyErr_SetString(PyExc_OverflowError, "new string is too long");
3160     return NULL;
3161 }
3162 
3163 Py_LOCAL_INLINE(PyObject *)
pad(PyStringObject * self,Py_ssize_t left,Py_ssize_t right,char fill)3164 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3165 {
3166     PyObject *u;
3167 
3168     if (left < 0)
3169         left = 0;
3170     if (right < 0)
3171         right = 0;
3172 
3173     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3174         Py_INCREF(self);
3175         return (PyObject *)self;
3176     }
3177 
3178     u = PyString_FromStringAndSize(NULL,
3179                                    left + PyString_GET_SIZE(self) + right);
3180     if (u) {
3181         if (left)
3182             memset(PyString_AS_STRING(u), fill, left);
3183         Py_MEMCPY(PyString_AS_STRING(u) + left,
3184                PyString_AS_STRING(self),
3185                PyString_GET_SIZE(self));
3186         if (right)
3187             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3188                fill, right);
3189     }
3190 
3191     return u;
3192 }
3193 
3194 PyDoc_STRVAR(ljust__doc__,
3195 "S.ljust(width[, fillchar]) -> string\n"
3196 "\n"
3197 "Return S left-justified in a string of length width. Padding is\n"
3198 "done using the specified fill character (default is a space).");
3199 
3200 static PyObject *
string_ljust(PyStringObject * self,PyObject * args)3201 string_ljust(PyStringObject *self, PyObject *args)
3202 {
3203     Py_ssize_t width;
3204     char fillchar = ' ';
3205 
3206     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3207         return NULL;
3208 
3209     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3210         Py_INCREF(self);
3211         return (PyObject*) self;
3212     }
3213 
3214     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3215 }
3216 
3217 
3218 PyDoc_STRVAR(rjust__doc__,
3219 "S.rjust(width[, fillchar]) -> string\n"
3220 "\n"
3221 "Return S right-justified in a string of length width. Padding is\n"
3222 "done using the specified fill character (default is a space)");
3223 
3224 static PyObject *
string_rjust(PyStringObject * self,PyObject * args)3225 string_rjust(PyStringObject *self, PyObject *args)
3226 {
3227     Py_ssize_t width;
3228     char fillchar = ' ';
3229 
3230     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3231         return NULL;
3232 
3233     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3234         Py_INCREF(self);
3235         return (PyObject*) self;
3236     }
3237 
3238     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3239 }
3240 
3241 
3242 PyDoc_STRVAR(center__doc__,
3243 "S.center(width[, fillchar]) -> string\n"
3244 "\n"
3245 "Return S centered in a string of length width. Padding is\n"
3246 "done using the specified fill character (default is a space)");
3247 
3248 static PyObject *
string_center(PyStringObject * self,PyObject * args)3249 string_center(PyStringObject *self, PyObject *args)
3250 {
3251     Py_ssize_t marg, left;
3252     Py_ssize_t width;
3253     char fillchar = ' ';
3254 
3255     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3256         return NULL;
3257 
3258     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3259         Py_INCREF(self);
3260         return (PyObject*) self;
3261     }
3262 
3263     marg = width - PyString_GET_SIZE(self);
3264     left = marg / 2 + (marg & width & 1);
3265 
3266     return pad(self, left, marg - left, fillchar);
3267 }
3268 
3269 PyDoc_STRVAR(zfill__doc__,
3270 "S.zfill(width) -> string\n"
3271 "\n"
3272 "Pad a numeric string S with zeros on the left, to fill a field\n"
3273 "of the specified width.  The string S is never truncated.");
3274 
3275 static PyObject *
string_zfill(PyStringObject * self,PyObject * args)3276 string_zfill(PyStringObject *self, PyObject *args)
3277 {
3278     Py_ssize_t fill;
3279     PyObject *s;
3280     char *p;
3281     Py_ssize_t width;
3282 
3283     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3284         return NULL;
3285 
3286     if (PyString_GET_SIZE(self) >= width) {
3287         if (PyString_CheckExact(self)) {
3288             Py_INCREF(self);
3289             return (PyObject*) self;
3290         }
3291         else
3292             return PyString_FromStringAndSize(
3293                 PyString_AS_STRING(self),
3294                 PyString_GET_SIZE(self)
3295             );
3296     }
3297 
3298     fill = width - PyString_GET_SIZE(self);
3299 
3300     s = pad(self, fill, 0, '0');
3301 
3302     if (s == NULL)
3303         return NULL;
3304 
3305     p = PyString_AS_STRING(s);
3306     if (p[fill] == '+' || p[fill] == '-') {
3307         /* move sign to beginning of string */
3308         p[0] = p[fill];
3309         p[fill] = '0';
3310     }
3311 
3312     return (PyObject*) s;
3313 }
3314 
3315 PyDoc_STRVAR(isspace__doc__,
3316 "S.isspace() -> bool\n\
3317 \n\
3318 Return True if all characters in S are whitespace\n\
3319 and there is at least one character in S, False otherwise.");
3320 
3321 static PyObject*
string_isspace(PyStringObject * self)3322 string_isspace(PyStringObject *self)
3323 {
3324     register const unsigned char *p
3325         = (unsigned char *) PyString_AS_STRING(self);
3326     register const unsigned char *e;
3327 
3328     /* Shortcut for single character strings */
3329     if (PyString_GET_SIZE(self) == 1 &&
3330         isspace(*p))
3331         return PyBool_FromLong(1);
3332 
3333     /* Special case for empty strings */
3334     if (PyString_GET_SIZE(self) == 0)
3335         return PyBool_FromLong(0);
3336 
3337     e = p + PyString_GET_SIZE(self);
3338     for (; p < e; p++) {
3339         if (!isspace(*p))
3340             return PyBool_FromLong(0);
3341     }
3342     return PyBool_FromLong(1);
3343 }
3344 
3345 
3346 PyDoc_STRVAR(isalpha__doc__,
3347 "S.isalpha() -> bool\n\
3348 \n\
3349 Return True if all characters in S are alphabetic\n\
3350 and there is at least one character in S, False otherwise.");
3351 
3352 static PyObject*
string_isalpha(PyStringObject * self)3353 string_isalpha(PyStringObject *self)
3354 {
3355     register const unsigned char *p
3356         = (unsigned char *) PyString_AS_STRING(self);
3357     register const unsigned char *e;
3358 
3359     /* Shortcut for single character strings */
3360     if (PyString_GET_SIZE(self) == 1 &&
3361         isalpha(*p))
3362         return PyBool_FromLong(1);
3363 
3364     /* Special case for empty strings */
3365     if (PyString_GET_SIZE(self) == 0)
3366         return PyBool_FromLong(0);
3367 
3368     e = p + PyString_GET_SIZE(self);
3369     for (; p < e; p++) {
3370         if (!isalpha(*p))
3371             return PyBool_FromLong(0);
3372     }
3373     return PyBool_FromLong(1);
3374 }
3375 
3376 
3377 PyDoc_STRVAR(isalnum__doc__,
3378 "S.isalnum() -> bool\n\
3379 \n\
3380 Return True if all characters in S are alphanumeric\n\
3381 and there is at least one character in S, False otherwise.");
3382 
3383 static PyObject*
string_isalnum(PyStringObject * self)3384 string_isalnum(PyStringObject *self)
3385 {
3386     register const unsigned char *p
3387         = (unsigned char *) PyString_AS_STRING(self);
3388     register const unsigned char *e;
3389 
3390     /* Shortcut for single character strings */
3391     if (PyString_GET_SIZE(self) == 1 &&
3392         isalnum(*p))
3393         return PyBool_FromLong(1);
3394 
3395     /* Special case for empty strings */
3396     if (PyString_GET_SIZE(self) == 0)
3397         return PyBool_FromLong(0);
3398 
3399     e = p + PyString_GET_SIZE(self);
3400     for (; p < e; p++) {
3401         if (!isalnum(*p))
3402             return PyBool_FromLong(0);
3403     }
3404     return PyBool_FromLong(1);
3405 }
3406 
3407 
3408 PyDoc_STRVAR(isdigit__doc__,
3409 "S.isdigit() -> bool\n\
3410 \n\
3411 Return True if all characters in S are digits\n\
3412 and there is at least one character in S, False otherwise.");
3413 
3414 static PyObject*
string_isdigit(PyStringObject * self)3415 string_isdigit(PyStringObject *self)
3416 {
3417     register const unsigned char *p
3418         = (unsigned char *) PyString_AS_STRING(self);
3419     register const unsigned char *e;
3420 
3421     /* Shortcut for single character strings */
3422     if (PyString_GET_SIZE(self) == 1 &&
3423         isdigit(*p))
3424         return PyBool_FromLong(1);
3425 
3426     /* Special case for empty strings */
3427     if (PyString_GET_SIZE(self) == 0)
3428         return PyBool_FromLong(0);
3429 
3430     e = p + PyString_GET_SIZE(self);
3431     for (; p < e; p++) {
3432         if (!isdigit(*p))
3433             return PyBool_FromLong(0);
3434     }
3435     return PyBool_FromLong(1);
3436 }
3437 
3438 
3439 PyDoc_STRVAR(islower__doc__,
3440 "S.islower() -> bool\n\
3441 \n\
3442 Return True if all cased characters in S are lowercase and there is\n\
3443 at least one cased character in S, False otherwise.");
3444 
3445 static PyObject*
string_islower(PyStringObject * self)3446 string_islower(PyStringObject *self)
3447 {
3448     register const unsigned char *p
3449         = (unsigned char *) PyString_AS_STRING(self);
3450     register const unsigned char *e;
3451     int cased;
3452 
3453     /* Shortcut for single character strings */
3454     if (PyString_GET_SIZE(self) == 1)
3455         return PyBool_FromLong(islower(*p) != 0);
3456 
3457     /* Special case for empty strings */
3458     if (PyString_GET_SIZE(self) == 0)
3459         return PyBool_FromLong(0);
3460 
3461     e = p + PyString_GET_SIZE(self);
3462     cased = 0;
3463     for (; p < e; p++) {
3464         if (isupper(*p))
3465             return PyBool_FromLong(0);
3466         else if (!cased && islower(*p))
3467             cased = 1;
3468     }
3469     return PyBool_FromLong(cased);
3470 }
3471 
3472 
3473 PyDoc_STRVAR(isupper__doc__,
3474 "S.isupper() -> bool\n\
3475 \n\
3476 Return True if all cased characters in S are uppercase and there is\n\
3477 at least one cased character in S, False otherwise.");
3478 
3479 static PyObject*
string_isupper(PyStringObject * self)3480 string_isupper(PyStringObject *self)
3481 {
3482     register const unsigned char *p
3483         = (unsigned char *) PyString_AS_STRING(self);
3484     register const unsigned char *e;
3485     int cased;
3486 
3487     /* Shortcut for single character strings */
3488     if (PyString_GET_SIZE(self) == 1)
3489         return PyBool_FromLong(isupper(*p) != 0);
3490 
3491     /* Special case for empty strings */
3492     if (PyString_GET_SIZE(self) == 0)
3493         return PyBool_FromLong(0);
3494 
3495     e = p + PyString_GET_SIZE(self);
3496     cased = 0;
3497     for (; p < e; p++) {
3498         if (islower(*p))
3499             return PyBool_FromLong(0);
3500         else if (!cased && isupper(*p))
3501             cased = 1;
3502     }
3503     return PyBool_FromLong(cased);
3504 }
3505 
3506 
3507 PyDoc_STRVAR(istitle__doc__,
3508 "S.istitle() -> bool\n\
3509 \n\
3510 Return True if S is a titlecased string and there is at least one\n\
3511 character in S, i.e. uppercase characters may only follow uncased\n\
3512 characters and lowercase characters only cased ones. Return False\n\
3513 otherwise.");
3514 
3515 static PyObject*
string_istitle(PyStringObject * self,PyObject * uncased)3516 string_istitle(PyStringObject *self, PyObject *uncased)
3517 {
3518     register const unsigned char *p
3519         = (unsigned char *) PyString_AS_STRING(self);
3520     register const unsigned char *e;
3521     int cased, previous_is_cased;
3522 
3523     /* Shortcut for single character strings */
3524     if (PyString_GET_SIZE(self) == 1)
3525         return PyBool_FromLong(isupper(*p) != 0);
3526 
3527     /* Special case for empty strings */
3528     if (PyString_GET_SIZE(self) == 0)
3529         return PyBool_FromLong(0);
3530 
3531     e = p + PyString_GET_SIZE(self);
3532     cased = 0;
3533     previous_is_cased = 0;
3534     for (; p < e; p++) {
3535         register const unsigned char ch = *p;
3536 
3537         if (isupper(ch)) {
3538             if (previous_is_cased)
3539                 return PyBool_FromLong(0);
3540             previous_is_cased = 1;
3541             cased = 1;
3542         }
3543         else if (islower(ch)) {
3544             if (!previous_is_cased)
3545                 return PyBool_FromLong(0);
3546             previous_is_cased = 1;
3547             cased = 1;
3548         }
3549         else
3550             previous_is_cased = 0;
3551     }
3552     return PyBool_FromLong(cased);
3553 }
3554 
3555 
3556 PyDoc_STRVAR(splitlines__doc__,
3557 "S.splitlines(keepends=False) -> list of strings\n\
3558 \n\
3559 Return a list of the lines in S, breaking at line boundaries.\n\
3560 Line breaks are not included in the resulting list unless keepends\n\
3561 is given and true.");
3562 
3563 static PyObject*
string_splitlines(PyStringObject * self,PyObject * args)3564 string_splitlines(PyStringObject *self, PyObject *args)
3565 {
3566     int keepends = 0;
3567 
3568     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3569         return NULL;
3570 
3571     return stringlib_splitlines(
3572         (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3573         keepends
3574     );
3575 }
3576 
3577 PyDoc_STRVAR(sizeof__doc__,
3578 "S.__sizeof__() -> size of S in memory, in bytes");
3579 
3580 static PyObject *
string_sizeof(PyStringObject * v)3581 string_sizeof(PyStringObject *v)
3582 {
3583     Py_ssize_t res;
3584     res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3585     return PyInt_FromSsize_t(res);
3586 }
3587 
3588 static PyObject *
string_getnewargs(PyStringObject * v)3589 string_getnewargs(PyStringObject *v)
3590 {
3591     return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3592 }
3593 
3594 
3595 #include "stringlib/string_format.h"
3596 
3597 PyDoc_STRVAR(format__doc__,
3598 "S.format(*args, **kwargs) -> string\n\
3599 \n\
3600 Return a formatted version of S, using substitutions from args and kwargs.\n\
3601 The substitutions are identified by braces ('{' and '}').");
3602 
3603 static PyObject *
string__format__(PyObject * self,PyObject * args)3604 string__format__(PyObject* self, PyObject* args)
3605 {
3606     PyObject *format_spec;
3607     PyObject *result = NULL;
3608     PyObject *tmp = NULL;
3609 
3610     /* If 2.x, convert format_spec to the same type as value */
3611     /* This is to allow things like u''.format('') */
3612     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3613         goto done;
3614     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3615         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3616                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3617         goto done;
3618     }
3619     tmp = PyObject_Str(format_spec);
3620     if (tmp == NULL)
3621         goto done;
3622     format_spec = tmp;
3623 
3624     result = _PyBytes_FormatAdvanced(self,
3625                                      PyString_AS_STRING(format_spec),
3626                                      PyString_GET_SIZE(format_spec));
3627 done:
3628     Py_XDECREF(tmp);
3629     return result;
3630 }
3631 
3632 PyDoc_STRVAR(p_format__doc__,
3633 "S.__format__(format_spec) -> string\n\
3634 \n\
3635 Return a formatted version of S as described by format_spec.");
3636 
3637 
3638 static PyMethodDef
3639 string_methods[] = {
3640     /* Counterparts of the obsolete stropmodule functions; except
3641        string.maketrans(). */
3642     {"join", (PyCFunction)string_join, METH_O, join__doc__},
3643     {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3644     {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3645     {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3646     {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3647     {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3648     {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3649     {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3650     {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3651     {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3652     {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3653     {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3654     {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3655      capitalize__doc__},
3656     {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3657     {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3658      endswith__doc__},
3659     {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3660     {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3661     {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3662     {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3663     {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3664     {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3665     {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3666     {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3667     {"rpartition", (PyCFunction)string_rpartition, METH_O,
3668      rpartition__doc__},
3669     {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3670      startswith__doc__},
3671     {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3672     {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3673      swapcase__doc__},
3674     {"translate", (PyCFunction)string_translate, METH_VARARGS,
3675      translate__doc__},
3676     {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3677     {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3678     {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3679     {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3680     {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3681     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3682     {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3683     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3684     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3685     {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3686     {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3687     {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3688      expandtabs__doc__},
3689     {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3690      splitlines__doc__},
3691     {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3692      sizeof__doc__},
3693     {"__getnewargs__",          (PyCFunction)string_getnewargs, METH_NOARGS},
3694     {NULL,     NULL}                         /* sentinel */
3695 };
3696 
3697 static PyObject *
3698 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3699 
3700 static PyObject *
string_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3701 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3702 {
3703     PyObject *x = NULL;
3704     static char *kwlist[] = {"object", 0};
3705 
3706     if (type != &PyString_Type)
3707         return str_subtype_new(type, args, kwds);
3708     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3709         return NULL;
3710     if (x == NULL)
3711         return PyString_FromString("");
3712     return PyObject_Str(x);
3713 }
3714 
3715 static PyObject *
str_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3716 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3717 {
3718     PyObject *tmp, *pnew;
3719     Py_ssize_t n;
3720 
3721     assert(PyType_IsSubtype(type, &PyString_Type));
3722     tmp = string_new(&PyString_Type, args, kwds);
3723     if (tmp == NULL)
3724         return NULL;
3725     assert(PyString_Check(tmp));
3726     n = PyString_GET_SIZE(tmp);
3727     pnew = type->tp_alloc(type, n);
3728     if (pnew != NULL) {
3729         Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3730         ((PyStringObject *)pnew)->ob_shash =
3731             ((PyStringObject *)tmp)->ob_shash;
3732         ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3733     }
3734     Py_DECREF(tmp);
3735     return pnew;
3736 }
3737 
3738 static PyObject *
basestring_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3739 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3740 {
3741     PyErr_SetString(PyExc_TypeError,
3742                     "The basestring type cannot be instantiated");
3743     return NULL;
3744 }
3745 
3746 static PyObject *
string_mod(PyObject * v,PyObject * w)3747 string_mod(PyObject *v, PyObject *w)
3748 {
3749     if (!PyString_Check(v)) {
3750         Py_INCREF(Py_NotImplemented);
3751         return Py_NotImplemented;
3752     }
3753     return PyString_Format(v, w);
3754 }
3755 
3756 PyDoc_STRVAR(basestring_doc,
3757 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3758 
3759 static PyNumberMethods string_as_number = {
3760     0,                          /*nb_add*/
3761     0,                          /*nb_subtract*/
3762     0,                          /*nb_multiply*/
3763     0,                          /*nb_divide*/
3764     string_mod,                 /*nb_remainder*/
3765 };
3766 
3767 
3768 PyTypeObject PyBaseString_Type = {
3769     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3770     "basestring",
3771     0,
3772     0,
3773     0,                                          /* tp_dealloc */
3774     0,                                          /* tp_print */
3775     0,                                          /* tp_getattr */
3776     0,                                          /* tp_setattr */
3777     0,                                          /* tp_compare */
3778     0,                                          /* tp_repr */
3779     0,                                          /* tp_as_number */
3780     0,                                          /* tp_as_sequence */
3781     0,                                          /* tp_as_mapping */
3782     0,                                          /* tp_hash */
3783     0,                                          /* tp_call */
3784     0,                                          /* tp_str */
3785     0,                                          /* tp_getattro */
3786     0,                                          /* tp_setattro */
3787     0,                                          /* tp_as_buffer */
3788     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3789     basestring_doc,                             /* tp_doc */
3790     0,                                          /* tp_traverse */
3791     0,                                          /* tp_clear */
3792     0,                                          /* tp_richcompare */
3793     0,                                          /* tp_weaklistoffset */
3794     0,                                          /* tp_iter */
3795     0,                                          /* tp_iternext */
3796     0,                                          /* tp_methods */
3797     0,                                          /* tp_members */
3798     0,                                          /* tp_getset */
3799     &PyBaseObject_Type,                         /* tp_base */
3800     0,                                          /* tp_dict */
3801     0,                                          /* tp_descr_get */
3802     0,                                          /* tp_descr_set */
3803     0,                                          /* tp_dictoffset */
3804     0,                                          /* tp_init */
3805     0,                                          /* tp_alloc */
3806     basestring_new,                             /* tp_new */
3807     0,                                          /* tp_free */
3808 };
3809 
3810 PyDoc_STRVAR(string_doc,
3811 "str(object='') -> string\n\
3812 \n\
3813 Return a nice string representation of the object.\n\
3814 If the argument is a string, the return value is the same object.");
3815 
3816 PyTypeObject PyString_Type = {
3817     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3818     "str",
3819     PyStringObject_SIZE,
3820     sizeof(char),
3821     string_dealloc,                             /* tp_dealloc */
3822     (printfunc)string_print,                    /* tp_print */
3823     0,                                          /* tp_getattr */
3824     0,                                          /* tp_setattr */
3825     0,                                          /* tp_compare */
3826     string_repr,                                /* tp_repr */
3827     &string_as_number,                          /* tp_as_number */
3828     &string_as_sequence,                        /* tp_as_sequence */
3829     &string_as_mapping,                         /* tp_as_mapping */
3830     (hashfunc)string_hash,                      /* tp_hash */
3831     0,                                          /* tp_call */
3832     string_str,                                 /* tp_str */
3833     PyObject_GenericGetAttr,                    /* tp_getattro */
3834     0,                                          /* tp_setattro */
3835     &string_as_buffer,                          /* tp_as_buffer */
3836     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3837         Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3838         Py_TPFLAGS_HAVE_NEWBUFFER,              /* tp_flags */
3839     string_doc,                                 /* tp_doc */
3840     0,                                          /* tp_traverse */
3841     0,                                          /* tp_clear */
3842     (richcmpfunc)string_richcompare,            /* tp_richcompare */
3843     0,                                          /* tp_weaklistoffset */
3844     0,                                          /* tp_iter */
3845     0,                                          /* tp_iternext */
3846     string_methods,                             /* tp_methods */
3847     0,                                          /* tp_members */
3848     0,                                          /* tp_getset */
3849     &PyBaseString_Type,                         /* tp_base */
3850     0,                                          /* tp_dict */
3851     0,                                          /* tp_descr_get */
3852     0,                                          /* tp_descr_set */
3853     0,                                          /* tp_dictoffset */
3854     0,                                          /* tp_init */
3855     0,                                          /* tp_alloc */
3856     string_new,                                 /* tp_new */
3857     PyObject_Del,                               /* tp_free */
3858 };
3859 
3860 void
PyString_Concat(register PyObject ** pv,register PyObject * w)3861 PyString_Concat(register PyObject **pv, register PyObject *w)
3862 {
3863     register PyObject *v;
3864     if (*pv == NULL)
3865         return;
3866     if (w == NULL || !PyString_Check(*pv)) {
3867         Py_CLEAR(*pv);
3868         return;
3869     }
3870     v = string_concat((PyStringObject *) *pv, w);
3871     Py_SETREF(*pv, v);
3872 }
3873 
3874 void
PyString_ConcatAndDel(register PyObject ** pv,register PyObject * w)3875 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3876 {
3877     PyString_Concat(pv, w);
3878     Py_XDECREF(w);
3879 }
3880 
3881 
3882 /* The following function breaks the notion that strings are immutable:
3883    it changes the size of a string.  We get away with this only if there
3884    is only one module referencing the object.  You can also think of it
3885    as creating a new string object and destroying the old one, only
3886    more efficiently.  In any case, don't use this if the string may
3887    already be known to some other part of the code...
3888    Note that if there's not enough memory to resize the string, the original
3889    string object at *pv is deallocated, *pv is set to NULL, an "out of
3890    memory" exception is set, and -1 is returned.  Else (on success) 0 is
3891    returned, and the value in *pv may or may not be the same as on input.
3892    As always, an extra byte is allocated for a trailing \0 byte (newsize
3893    does *not* include that), and a trailing \0 byte is stored.
3894 */
3895 
3896 int
_PyString_Resize(PyObject ** pv,Py_ssize_t newsize)3897 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3898 {
3899     register PyObject *v;
3900     register PyStringObject *sv;
3901     v = *pv;
3902     if (!PyString_Check(v) || newsize < 0) {
3903         *pv = 0;
3904         Py_DECREF(v);
3905         PyErr_BadInternalCall();
3906         return -1;
3907     }
3908     if (Py_SIZE(v) == 0) {
3909         if (newsize == 0) {
3910             return 0;
3911         }
3912         *pv = PyString_FromStringAndSize(NULL, newsize);
3913         Py_DECREF(v);
3914         return (*pv == NULL) ? -1 : 0;
3915     }
3916     if (Py_REFCNT(v) != 1 || PyString_CHECK_INTERNED(v)) {
3917         *pv = 0;
3918         Py_DECREF(v);
3919         PyErr_BadInternalCall();
3920         return -1;
3921     }
3922     if (newsize == 0) {
3923         *pv = PyString_FromStringAndSize(NULL, 0);
3924         Py_DECREF(v);
3925         return (*pv == NULL) ? -1 : 0;
3926     }
3927     /* XXX UNREF/NEWREF interface should be more symmetrical */
3928     _Py_DEC_REFTOTAL;
3929     _Py_ForgetReference(v);
3930     *pv = (PyObject *)
3931         PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3932     if (*pv == NULL) {
3933         PyObject_Del(v);
3934         PyErr_NoMemory();
3935         return -1;
3936     }
3937     _Py_NewReference(*pv);
3938     sv = (PyStringObject *) *pv;
3939     Py_SIZE(sv) = newsize;
3940     sv->ob_sval[newsize] = '\0';
3941     sv->ob_shash = -1;          /* invalidate cached hash value */
3942     return 0;
3943 }
3944 
3945 /* Helpers for formatstring */
3946 
3947 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)3948 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3949 {
3950     Py_ssize_t argidx = *p_argidx;
3951     if (argidx < arglen) {
3952         (*p_argidx)++;
3953         if (arglen < 0)
3954             return args;
3955         else
3956             return PyTuple_GetItem(args, argidx);
3957     }
3958     PyErr_SetString(PyExc_TypeError,
3959                     "not enough arguments for format string");
3960     return NULL;
3961 }
3962 
3963 /* Format codes
3964  * F_LJUST      '-'
3965  * F_SIGN       '+'
3966  * F_BLANK      ' '
3967  * F_ALT        '#'
3968  * F_ZERO       '0'
3969  */
3970 #define F_LJUST (1<<0)
3971 #define F_SIGN  (1<<1)
3972 #define F_BLANK (1<<2)
3973 #define F_ALT   (1<<3)
3974 #define F_ZERO  (1<<4)
3975 
3976 /* Returns a new reference to a PyString object, or NULL on failure. */
3977 
3978 static PyObject *
formatfloat(PyObject * v,int flags,int prec,int type)3979 formatfloat(PyObject *v, int flags, int prec, int type)
3980 {
3981     char *p;
3982     PyObject *result;
3983     double x;
3984 
3985     x = PyFloat_AsDouble(v);
3986     if (x == -1.0 && PyErr_Occurred()) {
3987         PyErr_Format(PyExc_TypeError, "float argument required, "
3988                      "not %.200s", Py_TYPE(v)->tp_name);
3989         return NULL;
3990     }
3991 
3992     if (prec < 0)
3993         prec = 6;
3994 
3995     p = PyOS_double_to_string(x, type, prec,
3996                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3997 
3998     if (p == NULL)
3999         return NULL;
4000     result = PyString_FromStringAndSize(p, strlen(p));
4001     PyMem_Free(p);
4002     return result;
4003 }
4004 
4005 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4006  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
4007  * Python's regular ints.
4008  * Return value:  a new PyString*, or NULL if error.
4009  *  .  *pbuf is set to point into it,
4010  *     *plen set to the # of chars following that.
4011  *     Caller must decref it when done using pbuf.
4012  *     The string starting at *pbuf is of the form
4013  *         "-"? ("0x" | "0X")? digit+
4014  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
4015  *         set in flags.  The case of hex digits will be correct,
4016  *     There will be at least prec digits, zero-filled on the left if
4017  *         necessary to get that many.
4018  * val          object to be converted
4019  * flags        bitmask of format flags; only F_ALT is looked at
4020  * prec         minimum number of digits; 0-fill on left if needed
4021  * type         a character in [duoxX]; u acts the same as d
4022  *
4023  * CAUTION:  o, x and X conversions on regular ints can never
4024  * produce a '-' sign, but can for Python's unbounded ints.
4025  */
4026 PyObject*
_PyString_FormatLong(PyObject * val,int flags,int prec,int type,char ** pbuf,int * plen)4027 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4028                      char **pbuf, int *plen)
4029 {
4030     PyObject *result = NULL, *r1;
4031     const char *s;
4032     char *buf;
4033     Py_ssize_t i;
4034     int sign;           /* 1 if '-', else 0 */
4035     int len;            /* number of characters */
4036     Py_ssize_t llen;
4037     int numdigits;      /* len == numnondigits + skipped + numdigits */
4038     int numnondigits, skipped, filled;
4039     const char *method;
4040 
4041     switch (type) {
4042     case 'd':
4043     case 'u':
4044         method = "str";
4045         result = Py_TYPE(val)->tp_str(val);
4046         break;
4047     case 'o':
4048         method = "oct";
4049         result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4050         break;
4051     case 'x':
4052     case 'X':
4053         method = "hex";
4054         result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4055         break;
4056     default:
4057         assert(!"'type' not in [duoxX]");
4058     }
4059     if (!result)
4060         return NULL;
4061 
4062     if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) {
4063         Py_DECREF(result);
4064         return NULL;
4065     }
4066     if (llen > INT_MAX) {
4067         PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4068         Py_DECREF(result);
4069         return NULL;
4070     }
4071     len = (int)llen;
4072     if (len > 0 && s[len-1] == 'L') {
4073         --len;
4074         if (len == 0)
4075             goto error;
4076     }
4077     sign = s[0] == '-';
4078     numnondigits = sign;
4079 
4080     /* Need to skip 0x, 0X or 0. */
4081     skipped = 0;
4082     switch (type) {
4083     case 'o':
4084         if (s[sign] != '0')
4085             goto error;
4086         /* If 0 is only digit, leave it alone. */
4087         if ((flags & F_ALT) == 0 && len - sign > 1)
4088             skipped = 1;
4089         break;
4090     case 'x':
4091     case 'X':
4092         if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X'))
4093             goto error;
4094         if ((flags & F_ALT) == 0)
4095             skipped = 2;
4096         else
4097             numnondigits += 2;
4098         break;
4099     }
4100     numdigits = len - numnondigits - skipped;
4101     if (numdigits <= 0)
4102         goto error;
4103 
4104     filled = prec - numdigits;
4105     if (filled < 0)
4106         filled = 0;
4107     len = numnondigits + filled + numdigits;
4108 
4109     /* To modify the string in-place, there can only be one reference. */
4110     if (skipped >= filled &&
4111         PyString_CheckExact(result) &&
4112         Py_REFCNT(result) == 1 &&
4113         !PyString_CHECK_INTERNED(result))
4114     {
4115         r1 = NULL;
4116         buf = (char *)s + skipped - filled;
4117     }
4118     else {
4119         r1 = result;
4120         result = PyString_FromStringAndSize(NULL, len);
4121         if (!result) {
4122             Py_DECREF(r1);
4123             return NULL;
4124         }
4125         buf = PyString_AS_STRING(result);
4126     }
4127 
4128     for (i = numnondigits; --i >= 0;)
4129         buf[i] = s[i];
4130     buf += numnondigits;
4131     s += numnondigits + skipped;
4132     for (i = 0; i < filled; i++)
4133         *buf++ = '0';
4134     if (r1 == NULL) {
4135         assert(buf == s);
4136         buf += numdigits;
4137     }
4138     else {
4139         for (i = 0; i < numdigits; i++)
4140             *buf++ = *s++;
4141     }
4142     *buf = '\0';
4143     buf -= len;
4144     Py_XDECREF(r1);
4145 
4146     /* Fix up case for hex conversions. */
4147     if (type == 'X') {
4148         /* Need to convert all lower case letters to upper case.
4149            and need to convert 0x to 0X (and -0x to -0X). */
4150         for (i = 0; i < len; i++) {
4151             if (buf[i] >= 'a' && buf[i] <= 'z')
4152                 buf[i] -= 'a'-'A';
4153         }
4154     }
4155     *pbuf = buf;
4156     *plen = len;
4157     return result;
4158 
4159 error:
4160     PyErr_Format(PyExc_ValueError,
4161                  "%%%c format: invalid result of __%s__ (type=%.200s)",
4162                  type, method, Py_TYPE(val)->tp_name);
4163     Py_DECREF(result);
4164     return NULL;
4165 }
4166 
4167 Py_LOCAL_INLINE(int)
formatint(char * buf,size_t buflen,int flags,int prec,int type,PyObject * v)4168 formatint(char *buf, size_t buflen, int flags,
4169           int prec, int type, PyObject *v)
4170 {
4171     /* fmt = '%#.' + `prec` + 'l' + `type`
4172        worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4173        + 1 + 1 = 24 */
4174     char fmt[64];       /* plenty big enough! */
4175     char *sign;
4176     long x;
4177 
4178     x = PyInt_AsLong(v);
4179     if (x == -1 && PyErr_Occurred()) {
4180         PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4181                      Py_TYPE(v)->tp_name);
4182         return -1;
4183     }
4184     if (x < 0 && type == 'u') {
4185         type = 'd';
4186     }
4187     if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4188         sign = "-";
4189     else
4190         sign = "";
4191     if (prec < 0)
4192         prec = 1;
4193 
4194     if ((flags & F_ALT) &&
4195         (type == 'x' || type == 'X')) {
4196         /* When converting under %#x or %#X, there are a number
4197          * of issues that cause pain:
4198          * - when 0 is being converted, the C standard leaves off
4199          *   the '0x' or '0X', which is inconsistent with other
4200          *   %#x/%#X conversions and inconsistent with Python's
4201          *   hex() function
4202          * - there are platforms that violate the standard and
4203          *   convert 0 with the '0x' or '0X'
4204          *   (Metrowerks, Compaq Tru64)
4205          * - there are platforms that give '0x' when converting
4206          *   under %#X, but convert 0 in accordance with the
4207          *   standard (OS/2 EMX)
4208          *
4209          * We can achieve the desired consistency by inserting our
4210          * own '0x' or '0X' prefix, and substituting %x/%X in place
4211          * of %#x/%#X.
4212          *
4213          * Note that this is the same approach as used in
4214          * formatint() in unicodeobject.c
4215          */
4216         PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4217                       sign, type, prec, type);
4218     }
4219     else {
4220         PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4221                       sign, (flags&F_ALT) ? "#" : "",
4222                       prec, type);
4223     }
4224 
4225     /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4226      * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4227      */
4228     if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4229         PyErr_SetString(PyExc_OverflowError,
4230             "formatted integer is too long (precision too large?)");
4231         return -1;
4232     }
4233     if (sign[0])
4234         PyOS_snprintf(buf, buflen, fmt, -x);
4235     else
4236         PyOS_snprintf(buf, buflen, fmt, x);
4237     return (int)strlen(buf);
4238 }
4239 
4240 Py_LOCAL_INLINE(int)
formatchar(char * buf,size_t buflen,PyObject * v)4241 formatchar(char *buf, size_t buflen, PyObject *v)
4242 {
4243     /* presume that the buffer is at least 2 characters long */
4244     if (PyString_Check(v)) {
4245         if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4246             return -1;
4247     }
4248     else {
4249         if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4250             return -1;
4251     }
4252     buf[1] = '\0';
4253     return 1;
4254 }
4255 
4256 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4257 
4258    FORMATBUFLEN is the length of the buffer in which the ints &
4259    chars are formatted. XXX This is a magic number. Each formatting
4260    routine does bounds checking to ensure no overflow, but a better
4261    solution may be to malloc a buffer of appropriate size for each
4262    format. For now, the current solution is sufficient.
4263 */
4264 #define FORMATBUFLEN (size_t)120
4265 
4266 PyObject *
PyString_Format(PyObject * format,PyObject * args)4267 PyString_Format(PyObject *format, PyObject *args)
4268 {
4269     char *fmt, *res;
4270     Py_ssize_t arglen, argidx;
4271     Py_ssize_t reslen, rescnt, fmtcnt;
4272     int args_owned = 0;
4273     PyObject *result, *orig_args;
4274 #ifdef Py_USING_UNICODE
4275     PyObject *v, *w;
4276 #endif
4277     PyObject *dict = NULL;
4278     if (format == NULL || !PyString_Check(format) || args == NULL) {
4279         PyErr_BadInternalCall();
4280         return NULL;
4281     }
4282     orig_args = args;
4283     fmt = PyString_AS_STRING(format);
4284     fmtcnt = PyString_GET_SIZE(format);
4285     reslen = rescnt = fmtcnt + 100;
4286     result = PyString_FromStringAndSize((char *)NULL, reslen);
4287     if (result == NULL)
4288         return NULL;
4289     res = PyString_AsString(result);
4290     if (PyTuple_Check(args)) {
4291         arglen = PyTuple_GET_SIZE(args);
4292         argidx = 0;
4293     }
4294     else {
4295         arglen = -1;
4296         argidx = -2;
4297     }
4298     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4299         !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
4300         dict = args;
4301     while (--fmtcnt >= 0) {
4302         if (*fmt != '%') {
4303             if (--rescnt < 0) {
4304                 rescnt = fmtcnt + 100;
4305                 reslen += rescnt;
4306                 if (_PyString_Resize(&result, reslen))
4307                     return NULL;
4308                 res = PyString_AS_STRING(result)
4309                     + reslen - rescnt;
4310                 --rescnt;
4311             }
4312             *res++ = *fmt++;
4313         }
4314         else {
4315             /* Got a format specifier */
4316             int flags = 0;
4317             Py_ssize_t width = -1;
4318             int prec = -1;
4319             int c = '\0';
4320             int fill;
4321             int isnumok;
4322             PyObject *v = NULL;
4323             PyObject *temp = NULL;
4324             char *pbuf;
4325             int sign;
4326             Py_ssize_t len;
4327             char formatbuf[FORMATBUFLEN];
4328                  /* For format{int,char}() */
4329 #ifdef Py_USING_UNICODE
4330             char *fmt_start = fmt;
4331             Py_ssize_t argidx_start = argidx;
4332 #endif
4333 
4334             fmt++;
4335             if (*fmt == '(') {
4336                 char *keystart;
4337                 Py_ssize_t keylen;
4338                 PyObject *key;
4339                 int pcount = 1;
4340 
4341                 if (dict == NULL) {
4342                     PyErr_SetString(PyExc_TypeError,
4343                              "format requires a mapping");
4344                     goto error;
4345                 }
4346                 ++fmt;
4347                 --fmtcnt;
4348                 keystart = fmt;
4349                 /* Skip over balanced parentheses */
4350                 while (pcount > 0 && --fmtcnt >= 0) {
4351                     if (*fmt == ')')
4352                         --pcount;
4353                     else if (*fmt == '(')
4354                         ++pcount;
4355                     fmt++;
4356                 }
4357                 keylen = fmt - keystart - 1;
4358                 if (fmtcnt < 0 || pcount > 0) {
4359                     PyErr_SetString(PyExc_ValueError,
4360                                "incomplete format key");
4361                     goto error;
4362                 }
4363                 key = PyString_FromStringAndSize(keystart,
4364                                                  keylen);
4365                 if (key == NULL)
4366                     goto error;
4367                 if (args_owned) {
4368                     Py_DECREF(args);
4369                     args_owned = 0;
4370                 }
4371                 args = PyObject_GetItem(dict, key);
4372                 Py_DECREF(key);
4373                 if (args == NULL) {
4374                     goto error;
4375                 }
4376                 args_owned = 1;
4377                 arglen = -1;
4378                 argidx = -2;
4379             }
4380             while (--fmtcnt >= 0) {
4381                 switch (c = *fmt++) {
4382                 case '-': flags |= F_LJUST; continue;
4383                 case '+': flags |= F_SIGN; continue;
4384                 case ' ': flags |= F_BLANK; continue;
4385                 case '#': flags |= F_ALT; continue;
4386                 case '0': flags |= F_ZERO; continue;
4387                 }
4388                 break;
4389             }
4390             if (c == '*') {
4391                 v = getnextarg(args, arglen, &argidx);
4392                 if (v == NULL)
4393                     goto error;
4394                 if (!PyInt_Check(v)) {
4395                     PyErr_SetString(PyExc_TypeError,
4396                                     "* wants int");
4397                     goto error;
4398                 }
4399                 width = PyInt_AsSsize_t(v);
4400                 if (width == -1 && PyErr_Occurred())
4401                     goto error;
4402                 if (width < 0) {
4403                     flags |= F_LJUST;
4404                     width = -width;
4405                 }
4406                 if (--fmtcnt >= 0)
4407                     c = *fmt++;
4408             }
4409             else if (c >= 0 && isdigit(c)) {
4410                 width = c - '0';
4411                 while (--fmtcnt >= 0) {
4412                     c = Py_CHARMASK(*fmt++);
4413                     if (!isdigit(c))
4414                         break;
4415                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
4416                         PyErr_SetString(
4417                             PyExc_ValueError,
4418                             "width too big");
4419                         goto error;
4420                     }
4421                     width = width*10 + (c - '0');
4422                 }
4423             }
4424             if (c == '.') {
4425                 prec = 0;
4426                 if (--fmtcnt >= 0)
4427                     c = *fmt++;
4428                 if (c == '*') {
4429                     v = getnextarg(args, arglen, &argidx);
4430                     if (v == NULL)
4431                         goto error;
4432                     if (!PyInt_Check(v)) {
4433                         PyErr_SetString(
4434                             PyExc_TypeError,
4435                             "* wants int");
4436                         goto error;
4437                     }
4438                     prec = _PyInt_AsInt(v);
4439                     if (prec == -1 && PyErr_Occurred())
4440                         goto error;
4441                     if (prec < 0)
4442                         prec = 0;
4443                     if (--fmtcnt >= 0)
4444                         c = *fmt++;
4445                 }
4446                 else if (c >= 0 && isdigit(c)) {
4447                     prec = c - '0';
4448                     while (--fmtcnt >= 0) {
4449                         c = Py_CHARMASK(*fmt++);
4450                         if (!isdigit(c))
4451                             break;
4452                         if (prec > (INT_MAX - ((int)c - '0')) / 10) {
4453                             PyErr_SetString(
4454                                 PyExc_ValueError,
4455                                 "prec too big");
4456                             goto error;
4457                         }
4458                         prec = prec*10 + (c - '0');
4459                     }
4460                 }
4461             } /* prec */
4462             if (fmtcnt >= 0) {
4463                 if (c == 'h' || c == 'l' || c == 'L') {
4464                     if (--fmtcnt >= 0)
4465                         c = *fmt++;
4466                 }
4467             }
4468             if (fmtcnt < 0) {
4469                 PyErr_SetString(PyExc_ValueError,
4470                                 "incomplete format");
4471                 goto error;
4472             }
4473             if (c != '%') {
4474                 v = getnextarg(args, arglen, &argidx);
4475                 if (v == NULL)
4476                     goto error;
4477             }
4478             sign = 0;
4479             fill = ' ';
4480             switch (c) {
4481             case '%':
4482                 pbuf = "%";
4483                 len = 1;
4484                 break;
4485             case 's':
4486 #ifdef Py_USING_UNICODE
4487                 if (PyUnicode_Check(v)) {
4488                     fmt = fmt_start;
4489                     argidx = argidx_start;
4490                     goto unicode;
4491                 }
4492 #endif
4493                 temp = _PyObject_Str(v);
4494 #ifdef Py_USING_UNICODE
4495                 if (temp != NULL && PyUnicode_Check(temp)) {
4496                     Py_DECREF(temp);
4497                     fmt = fmt_start;
4498                     argidx = argidx_start;
4499                     goto unicode;
4500                 }
4501 #endif
4502                 /* Fall through */
4503             case 'r':
4504                 if (c == 'r')
4505                     temp = PyObject_Repr(v);
4506                 if (temp == NULL)
4507                     goto error;
4508                 if (!PyString_Check(temp)) {
4509                     PyErr_SetString(PyExc_TypeError,
4510                       "%s argument has non-string str()");
4511                     Py_DECREF(temp);
4512                     goto error;
4513                 }
4514                 pbuf = PyString_AS_STRING(temp);
4515                 len = PyString_GET_SIZE(temp);
4516                 if (prec >= 0 && len > prec)
4517                     len = prec;
4518                 break;
4519             case 'i':
4520             case 'd':
4521             case 'u':
4522             case 'o':
4523             case 'x':
4524             case 'X':
4525                 if (c == 'i')
4526                     c = 'd';
4527                 isnumok = 0;
4528                 if (PyNumber_Check(v)) {
4529                     PyObject *iobj=NULL;
4530 
4531                     if (_PyAnyInt_Check(v)) {
4532                         iobj = v;
4533                         Py_INCREF(iobj);
4534                     }
4535                     else {
4536                         iobj = PyNumber_Int(v);
4537                         if (iobj==NULL) {
4538                             PyErr_Clear();
4539                             iobj = PyNumber_Long(v);
4540                         }
4541                     }
4542                     if (iobj!=NULL) {
4543                         if (PyInt_Check(iobj)) {
4544                             isnumok = 1;
4545                             pbuf = formatbuf;
4546                             len = formatint(pbuf,
4547                                             sizeof(formatbuf),
4548                                             flags, prec, c, iobj);
4549                             Py_DECREF(iobj);
4550                             if (len < 0)
4551                                 goto error;
4552                             sign = 1;
4553                         }
4554                         else if (PyLong_Check(iobj)) {
4555                             int ilen;
4556 
4557                             isnumok = 1;
4558                             temp = _PyString_FormatLong(iobj, flags,
4559                                 prec, c, &pbuf, &ilen);
4560                             Py_DECREF(iobj);
4561                             len = ilen;
4562                             if (!temp)
4563                                 goto error;
4564                             sign = 1;
4565                         }
4566                         else {
4567                             Py_DECREF(iobj);
4568                         }
4569                     }
4570                 }
4571                 if (!isnumok) {
4572                     PyErr_Format(PyExc_TypeError,
4573                         "%%%c format: a number is required, "
4574                         "not %.200s", c, Py_TYPE(v)->tp_name);
4575                     goto error;
4576                 }
4577                 if (flags & F_ZERO)
4578                     fill = '0';
4579                 break;
4580             case 'e':
4581             case 'E':
4582             case 'f':
4583             case 'F':
4584             case 'g':
4585             case 'G':
4586                 temp = formatfloat(v, flags, prec, c);
4587                 if (temp == NULL)
4588                     goto error;
4589                 pbuf = PyString_AS_STRING(temp);
4590                 len = PyString_GET_SIZE(temp);
4591                 sign = 1;
4592                 if (flags & F_ZERO)
4593                     fill = '0';
4594                 break;
4595             case 'c':
4596 #ifdef Py_USING_UNICODE
4597                 if (PyUnicode_Check(v)) {
4598                     fmt = fmt_start;
4599                     argidx = argidx_start;
4600                     goto unicode;
4601                 }
4602 #endif
4603                 pbuf = formatbuf;
4604                 len = formatchar(pbuf, sizeof(formatbuf), v);
4605                 if (len < 0)
4606                     goto error;
4607                 break;
4608             default:
4609                 PyErr_Format(PyExc_ValueError,
4610                   "unsupported format character '%c' (0x%x) "
4611                   "at index %zd",
4612                   c, c,
4613                   (Py_ssize_t)(fmt - 1 -
4614                                PyString_AsString(format)));
4615                 goto error;
4616             }
4617             if (sign) {
4618                 if (*pbuf == '-' || *pbuf == '+') {
4619                     sign = *pbuf++;
4620                     len--;
4621                 }
4622                 else if (flags & F_SIGN)
4623                     sign = '+';
4624                 else if (flags & F_BLANK)
4625                     sign = ' ';
4626                 else
4627                     sign = 0;
4628             }
4629             if (width < len)
4630                 width = len;
4631             if (rescnt - (sign != 0) < width) {
4632                 reslen -= rescnt;
4633                 rescnt = width + fmtcnt + 100;
4634                 reslen += rescnt;
4635                 if (reslen < 0) {
4636                     Py_DECREF(result);
4637                     Py_XDECREF(temp);
4638                     return PyErr_NoMemory();
4639                 }
4640                 if (_PyString_Resize(&result, reslen)) {
4641                     Py_XDECREF(temp);
4642                     return NULL;
4643                 }
4644                 res = PyString_AS_STRING(result)
4645                     + reslen - rescnt;
4646             }
4647             if (sign) {
4648                 if (fill != ' ')
4649                     *res++ = sign;
4650                 rescnt--;
4651                 if (width > len)
4652                     width--;
4653             }
4654             if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4655                 assert(pbuf[0] == '0');
4656                 assert(pbuf[1] == c);
4657                 if (fill != ' ') {
4658                     *res++ = *pbuf++;
4659                     *res++ = *pbuf++;
4660                 }
4661                 rescnt -= 2;
4662                 width -= 2;
4663                 if (width < 0)
4664                     width = 0;
4665                 len -= 2;
4666             }
4667             if (width > len && !(flags & F_LJUST)) {
4668                 do {
4669                     --rescnt;
4670                     *res++ = fill;
4671                 } while (--width > len);
4672             }
4673             if (fill == ' ') {
4674                 if (sign)
4675                     *res++ = sign;
4676                 if ((flags & F_ALT) &&
4677                     (c == 'x' || c == 'X')) {
4678                     assert(pbuf[0] == '0');
4679                     assert(pbuf[1] == c);
4680                     *res++ = *pbuf++;
4681                     *res++ = *pbuf++;
4682                 }
4683             }
4684             Py_MEMCPY(res, pbuf, len);
4685             res += len;
4686             rescnt -= len;
4687             while (--width >= len) {
4688                 --rescnt;
4689                 *res++ = ' ';
4690             }
4691             if (dict && (argidx < arglen) && c != '%') {
4692                 PyErr_SetString(PyExc_TypeError,
4693                            "not all arguments converted during string formatting");
4694                 Py_XDECREF(temp);
4695                 goto error;
4696             }
4697             Py_XDECREF(temp);
4698         } /* '%' */
4699     } /* until end */
4700     if (argidx < arglen && !dict) {
4701         PyErr_SetString(PyExc_TypeError,
4702                         "not all arguments converted during string formatting");
4703         goto error;
4704     }
4705     if (args_owned) {
4706         Py_DECREF(args);
4707     }
4708     if (_PyString_Resize(&result, reslen - rescnt))
4709         return NULL;
4710     return result;
4711 
4712 #ifdef Py_USING_UNICODE
4713  unicode:
4714     if (args_owned) {
4715         Py_DECREF(args);
4716         args_owned = 0;
4717     }
4718     /* Fiddle args right (remove the first argidx arguments) */
4719     if (PyTuple_Check(orig_args) && argidx > 0) {
4720         PyObject *v;
4721         Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4722         v = PyTuple_New(n);
4723         if (v == NULL)
4724             goto error;
4725         while (--n >= 0) {
4726             PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4727             Py_INCREF(w);
4728             PyTuple_SET_ITEM(v, n, w);
4729         }
4730         args = v;
4731     } else {
4732         Py_INCREF(orig_args);
4733         args = orig_args;
4734     }
4735     args_owned = 1;
4736     /* Take what we have of the result and let the Unicode formatting
4737        function format the rest of the input. */
4738     rescnt = res - PyString_AS_STRING(result);
4739     if (_PyString_Resize(&result, rescnt))
4740         goto error;
4741     fmtcnt = PyString_GET_SIZE(format) - \
4742              (fmt - PyString_AS_STRING(format));
4743     format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4744     if (format == NULL)
4745         goto error;
4746     v = PyUnicode_Format(format, args);
4747     Py_DECREF(format);
4748     if (v == NULL)
4749         goto error;
4750     /* Paste what we have (result) to what the Unicode formatting
4751        function returned (v) and return the result (or error) */
4752     w = PyUnicode_Concat(result, v);
4753     Py_DECREF(result);
4754     Py_DECREF(v);
4755     Py_DECREF(args);
4756     return w;
4757 #endif /* Py_USING_UNICODE */
4758 
4759  error:
4760     Py_DECREF(result);
4761     if (args_owned) {
4762         Py_DECREF(args);
4763     }
4764     return NULL;
4765 }
4766 
4767 void
PyString_InternInPlace(PyObject ** p)4768 PyString_InternInPlace(PyObject **p)
4769 {
4770     register PyStringObject *s = (PyStringObject *)(*p);
4771     PyObject *t;
4772     if (s == NULL || !PyString_Check(s))
4773         Py_FatalError("PyString_InternInPlace: strings only please!");
4774     /* If it's a string subclass, we don't really know what putting
4775        it in the interned dict might do. */
4776     if (!PyString_CheckExact(s))
4777         return;
4778     if (PyString_CHECK_INTERNED(s))
4779         return;
4780     if (interned == NULL) {
4781         interned = PyDict_New();
4782         if (interned == NULL) {
4783             PyErr_Clear(); /* Don't leave an exception */
4784             return;
4785         }
4786     }
4787     t = PyDict_GetItem(interned, (PyObject *)s);
4788     if (t) {
4789         Py_INCREF(t);
4790         Py_SETREF(*p, t);
4791         return;
4792     }
4793 
4794     if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4795         PyErr_Clear();
4796         return;
4797     }
4798     /* The two references in interned are not counted by refcnt.
4799        The string deallocator will take care of this */
4800     Py_REFCNT(s) -= 2;
4801     PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4802 }
4803 
4804 void
PyString_InternImmortal(PyObject ** p)4805 PyString_InternImmortal(PyObject **p)
4806 {
4807     PyString_InternInPlace(p);
4808     if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4809         PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4810         Py_INCREF(*p);
4811     }
4812 }
4813 
4814 
4815 PyObject *
PyString_InternFromString(const char * cp)4816 PyString_InternFromString(const char *cp)
4817 {
4818     PyObject *s = PyString_FromString(cp);
4819     if (s == NULL)
4820         return NULL;
4821     PyString_InternInPlace(&s);
4822     return s;
4823 }
4824 
4825 void
PyString_Fini(void)4826 PyString_Fini(void)
4827 {
4828     int i;
4829     for (i = 0; i < UCHAR_MAX + 1; i++)
4830         Py_CLEAR(characters[i]);
4831     Py_CLEAR(nullstring);
4832 }
4833 
_Py_ReleaseInternedStrings(void)4834 void _Py_ReleaseInternedStrings(void)
4835 {
4836     PyObject *keys;
4837     PyStringObject *s;
4838     Py_ssize_t i, n;
4839     Py_ssize_t immortal_size = 0, mortal_size = 0;
4840 
4841     if (interned == NULL || !PyDict_Check(interned))
4842         return;
4843     keys = PyDict_Keys(interned);
4844     if (keys == NULL || !PyList_Check(keys)) {
4845         PyErr_Clear();
4846         return;
4847     }
4848 
4849     /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4850        detector, interned strings are not forcibly deallocated; rather, we
4851        give them their stolen references back, and then clear and DECREF
4852        the interned dict. */
4853 
4854     n = PyList_GET_SIZE(keys);
4855     fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4856         n);
4857     for (i = 0; i < n; i++) {
4858         s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4859         switch (s->ob_sstate) {
4860         case SSTATE_NOT_INTERNED:
4861             /* XXX Shouldn't happen */
4862             break;
4863         case SSTATE_INTERNED_IMMORTAL:
4864             Py_REFCNT(s) += 1;
4865             immortal_size += Py_SIZE(s);
4866             break;
4867         case SSTATE_INTERNED_MORTAL:
4868             Py_REFCNT(s) += 2;
4869             mortal_size += Py_SIZE(s);
4870             break;
4871         default:
4872             Py_FatalError("Inconsistent interned string state.");
4873         }
4874         s->ob_sstate = SSTATE_NOT_INTERNED;
4875     }
4876     fprintf(stderr, "total size of all interned strings: "
4877                     "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4878                     "mortal/immortal\n", mortal_size, immortal_size);
4879     Py_DECREF(keys);
4880     PyDict_Clear(interned);
4881     Py_CLEAR(interned);
4882 }
4883