1 /* String (str/bytes) object implementation */
2
3 #define PY_SSIZE_T_CLEAN
4
5 #include "Python.h"
6 #include <ctype.h>
7 #include <stddef.h>
8
9 #ifdef COUNT_ALLOCS
10 Py_ssize_t null_strings, one_strings;
11 #endif
12
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
15
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23 */
24 static PyObject *interned;
25
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31 */
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
34 /*
35 For PyString_FromString(), the parameter `str' points to a null-terminated
36 string containing exactly `size' bytes.
37
38 For PyString_FromStringAndSize(), the parameter `str' is
39 either NULL or else points to a string containing at least `size' bytes.
40 For PyString_FromStringAndSize(), the string in the `str' parameter does
41 not have to be null-terminated. (Therefore it is safe to construct a
42 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
51 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
53 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
55 */
56 PyObject *
PyString_FromStringAndSize(const char * str,Py_ssize_t size)57 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58 {
59 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
66 #ifdef COUNT_ALLOCS
67 null_strings++;
68 #endif
69 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
75 #ifdef COUNT_ALLOCS
76 one_strings++;
77 #endif
78 Py_INCREF(op);
79 return (PyObject *)op;
80 }
81
82 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
86
87 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
91 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
92 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
112 }
113
114 PyObject *
PyString_FromString(const char * str)115 PyString_FromString(const char *str)
116 {
117 register size_t size;
118 register PyStringObject *op;
119
120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
128 #ifdef COUNT_ALLOCS
129 null_strings++;
130 #endif
131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135 #ifdef COUNT_ALLOCS
136 one_strings++;
137 #endif
138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
141
142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
146 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
165 }
166
167 PyObject *
PyString_FromFormatV(const char * format,va_list vargs)168 PyString_FromFormatV(const char *format, va_list vargs)
169 {
170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
175
176 #ifdef VA_LIST_IS_ARRAY
177 Py_MEMCPY(count, vargs, sizeof(va_list));
178 #else
179 #ifdef __va_copy
180 __va_copy(count, vargs);
181 #else
182 count = vargs;
183 #endif
184 #endif
185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
188 #ifdef HAVE_LONG_LONG
189 int longlongflag = 0;
190 #endif
191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
194
195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
202 #ifdef HAVE_LONG_LONG
203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
208 #endif
209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
213
214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
223 #ifdef HAVE_LONG_LONG
224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
231 #endif
232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
237
238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
265 expand:
266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
272
273 s = PyString_AsString(string);
274
275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
280 #ifdef HAVE_LONG_LONG
281 int longlongflag = 0;
282 #endif
283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
303 #ifdef HAVE_LONG_LONG
304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
309 #endif
310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
316
317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
324 #ifdef HAVE_LONG_LONG
325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
328 #endif
329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
340 #ifdef HAVE_LONG_LONG
341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
344 #endif
345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 if (n <= 0) {
364 i = strlen(p);
365 }
366 else {
367 i = 0;
368 while (i < n && p[i]) {
369 i++;
370 }
371 }
372 Py_MEMCPY(s, p, i);
373 s += i;
374 break;
375 case 'p':
376 sprintf(s, "%p", va_arg(vargs, void*));
377 /* %p is ill-defined: ensure leading 0x. */
378 if (s[1] == 'X')
379 s[1] = 'x';
380 else if (s[1] != 'x') {
381 memmove(s+2, s, strlen(s)+1);
382 s[0] = '0';
383 s[1] = 'x';
384 }
385 s += strlen(s);
386 break;
387 case '%':
388 *s++ = '%';
389 break;
390 default:
391 strcpy(s, p);
392 s += strlen(s);
393 goto end;
394 }
395 } else
396 *s++ = *f;
397 }
398
399 end:
400 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
401 return NULL;
402 return string;
403 }
404
405 PyObject *
PyString_FromFormat(const char * format,...)406 PyString_FromFormat(const char *format, ...)
407 {
408 PyObject* ret;
409 va_list vargs;
410
411 #ifdef HAVE_STDARG_PROTOTYPES
412 va_start(vargs, format);
413 #else
414 va_start(vargs);
415 #endif
416 ret = PyString_FromFormatV(format, vargs);
417 va_end(vargs);
418 return ret;
419 }
420
421
PyString_Decode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)422 PyObject *PyString_Decode(const char *s,
423 Py_ssize_t size,
424 const char *encoding,
425 const char *errors)
426 {
427 PyObject *v, *str;
428
429 str = PyString_FromStringAndSize(s, size);
430 if (str == NULL)
431 return NULL;
432 v = PyString_AsDecodedString(str, encoding, errors);
433 Py_DECREF(str);
434 return v;
435 }
436
PyString_AsDecodedObject(PyObject * str,const char * encoding,const char * errors)437 PyObject *PyString_AsDecodedObject(PyObject *str,
438 const char *encoding,
439 const char *errors)
440 {
441 PyObject *v;
442
443 if (!PyString_Check(str)) {
444 PyErr_BadArgument();
445 goto onError;
446 }
447
448 if (encoding == NULL) {
449 #ifdef Py_USING_UNICODE
450 encoding = PyUnicode_GetDefaultEncoding();
451 #else
452 PyErr_SetString(PyExc_ValueError, "no encoding specified");
453 goto onError;
454 #endif
455 }
456
457 /* Decode via the codec registry */
458 v = _PyCodec_DecodeText(str, encoding, errors);
459 if (v == NULL)
460 goto onError;
461
462 return v;
463
464 onError:
465 return NULL;
466 }
467
PyString_AsDecodedString(PyObject * str,const char * encoding,const char * errors)468 PyObject *PyString_AsDecodedString(PyObject *str,
469 const char *encoding,
470 const char *errors)
471 {
472 PyObject *v;
473
474 v = PyString_AsDecodedObject(str, encoding, errors);
475 if (v == NULL)
476 goto onError;
477
478 #ifdef Py_USING_UNICODE
479 /* Convert Unicode to a string using the default encoding */
480 if (PyUnicode_Check(v)) {
481 PyObject *temp = v;
482 v = PyUnicode_AsEncodedString(v, NULL, NULL);
483 Py_DECREF(temp);
484 if (v == NULL)
485 goto onError;
486 }
487 #endif
488 if (!PyString_Check(v)) {
489 PyErr_Format(PyExc_TypeError,
490 "decoder did not return a string object (type=%.400s)",
491 Py_TYPE(v)->tp_name);
492 Py_DECREF(v);
493 goto onError;
494 }
495
496 return v;
497
498 onError:
499 return NULL;
500 }
501
PyString_Encode(const char * s,Py_ssize_t size,const char * encoding,const char * errors)502 PyObject *PyString_Encode(const char *s,
503 Py_ssize_t size,
504 const char *encoding,
505 const char *errors)
506 {
507 PyObject *v, *str;
508
509 str = PyString_FromStringAndSize(s, size);
510 if (str == NULL)
511 return NULL;
512 v = PyString_AsEncodedString(str, encoding, errors);
513 Py_DECREF(str);
514 return v;
515 }
516
PyString_AsEncodedObject(PyObject * str,const char * encoding,const char * errors)517 PyObject *PyString_AsEncodedObject(PyObject *str,
518 const char *encoding,
519 const char *errors)
520 {
521 PyObject *v;
522
523 if (!PyString_Check(str)) {
524 PyErr_BadArgument();
525 goto onError;
526 }
527
528 if (encoding == NULL) {
529 #ifdef Py_USING_UNICODE
530 encoding = PyUnicode_GetDefaultEncoding();
531 #else
532 PyErr_SetString(PyExc_ValueError, "no encoding specified");
533 goto onError;
534 #endif
535 }
536
537 /* Encode via the codec registry */
538 v = _PyCodec_EncodeText(str, encoding, errors);
539 if (v == NULL)
540 goto onError;
541
542 return v;
543
544 onError:
545 return NULL;
546 }
547
PyString_AsEncodedString(PyObject * str,const char * encoding,const char * errors)548 PyObject *PyString_AsEncodedString(PyObject *str,
549 const char *encoding,
550 const char *errors)
551 {
552 PyObject *v;
553
554 v = PyString_AsEncodedObject(str, encoding, errors);
555 if (v == NULL)
556 goto onError;
557
558 #ifdef Py_USING_UNICODE
559 /* Convert Unicode to a string using the default encoding */
560 if (PyUnicode_Check(v)) {
561 PyObject *temp = v;
562 v = PyUnicode_AsEncodedString(v, NULL, NULL);
563 Py_DECREF(temp);
564 if (v == NULL)
565 goto onError;
566 }
567 #endif
568 if (!PyString_Check(v)) {
569 PyErr_Format(PyExc_TypeError,
570 "encoder did not return a string object (type=%.400s)",
571 Py_TYPE(v)->tp_name);
572 Py_DECREF(v);
573 goto onError;
574 }
575
576 return v;
577
578 onError:
579 return NULL;
580 }
581
582 static void
string_dealloc(PyObject * op)583 string_dealloc(PyObject *op)
584 {
585 switch (PyString_CHECK_INTERNED(op)) {
586 case SSTATE_NOT_INTERNED:
587 break;
588
589 case SSTATE_INTERNED_MORTAL:
590 /* revive dead object temporarily for DelItem */
591 Py_REFCNT(op) = 3;
592 if (PyDict_DelItem(interned, op) != 0)
593 Py_FatalError(
594 "deletion of interned string failed");
595 break;
596
597 case SSTATE_INTERNED_IMMORTAL:
598 Py_FatalError("Immortal interned string died.");
599
600 default:
601 Py_FatalError("Inconsistent interned string state.");
602 }
603 Py_TYPE(op)->tp_free(op);
604 }
605
606 /* Unescape a backslash-escaped string. If unicode is non-zero,
607 the string is a u-literal. If recode_encoding is non-zero,
608 the string is UTF-8 encoded and should be re-encoded in the
609 specified encoding. */
610
PyString_DecodeEscape(const char * s,Py_ssize_t len,const char * errors,Py_ssize_t unicode,const char * recode_encoding)611 PyObject *PyString_DecodeEscape(const char *s,
612 Py_ssize_t len,
613 const char *errors,
614 Py_ssize_t unicode,
615 const char *recode_encoding)
616 {
617 int c;
618 char *p, *buf;
619 const char *end;
620 PyObject *v;
621 Py_ssize_t newlen;
622 /* Check for integer overflow */
623 if (recode_encoding && (len > PY_SSIZE_T_MAX / 4)) {
624 PyErr_SetString(PyExc_OverflowError, "string is too large");
625 return NULL;
626 }
627 newlen = recode_encoding ? 4*len:len;
628 v = PyString_FromStringAndSize((char *)NULL, newlen);
629 if (v == NULL)
630 return NULL;
631 p = buf = PyString_AsString(v);
632 end = s + len;
633 while (s < end) {
634 if (*s != '\\') {
635 non_esc:
636 #ifdef Py_USING_UNICODE
637 if (recode_encoding && (*s & 0x80)) {
638 PyObject *u, *w;
639 char *r;
640 const char* t;
641 Py_ssize_t rn;
642 t = s;
643 /* Decode non-ASCII bytes as UTF-8. */
644 while (t < end && (*t & 0x80)) t++;
645 u = PyUnicode_DecodeUTF8(s, t - s, errors);
646 if(!u) goto failed;
647
648 /* Recode them in target encoding. */
649 w = PyUnicode_AsEncodedString(
650 u, recode_encoding, errors);
651 Py_DECREF(u);
652 if (!w) goto failed;
653
654 /* Append bytes to output buffer. */
655 assert(PyString_Check(w));
656 r = PyString_AS_STRING(w);
657 rn = PyString_GET_SIZE(w);
658 Py_MEMCPY(p, r, rn);
659 p += rn;
660 Py_DECREF(w);
661 s = t;
662 } else {
663 *p++ = *s++;
664 }
665 #else
666 *p++ = *s++;
667 #endif
668 continue;
669 }
670 s++;
671 if (s==end) {
672 PyErr_SetString(PyExc_ValueError,
673 "Trailing \\ in string");
674 goto failed;
675 }
676 switch (*s++) {
677 /* XXX This assumes ASCII! */
678 case '\n': break;
679 case '\\': *p++ = '\\'; break;
680 case '\'': *p++ = '\''; break;
681 case '\"': *p++ = '\"'; break;
682 case 'b': *p++ = '\b'; break;
683 case 'f': *p++ = '\014'; break; /* FF */
684 case 't': *p++ = '\t'; break;
685 case 'n': *p++ = '\n'; break;
686 case 'r': *p++ = '\r'; break;
687 case 'v': *p++ = '\013'; break; /* VT */
688 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
689 case '0': case '1': case '2': case '3':
690 case '4': case '5': case '6': case '7':
691 c = s[-1] - '0';
692 if (s < end && '0' <= *s && *s <= '7') {
693 c = (c<<3) + *s++ - '0';
694 if (s < end && '0' <= *s && *s <= '7')
695 c = (c<<3) + *s++ - '0';
696 }
697 *p++ = c;
698 break;
699 case 'x':
700 if (s+1 < end &&
701 isxdigit(Py_CHARMASK(s[0])) &&
702 isxdigit(Py_CHARMASK(s[1])))
703 {
704 unsigned int x = 0;
705 c = Py_CHARMASK(*s);
706 s++;
707 if (isdigit(c))
708 x = c - '0';
709 else if (islower(c))
710 x = 10 + c - 'a';
711 else
712 x = 10 + c - 'A';
713 x = x << 4;
714 c = Py_CHARMASK(*s);
715 s++;
716 if (isdigit(c))
717 x += c - '0';
718 else if (islower(c))
719 x += 10 + c - 'a';
720 else
721 x += 10 + c - 'A';
722 *p++ = x;
723 break;
724 }
725 if (!errors || strcmp(errors, "strict") == 0) {
726 PyErr_SetString(PyExc_ValueError,
727 "invalid \\x escape");
728 goto failed;
729 }
730 if (strcmp(errors, "replace") == 0) {
731 *p++ = '?';
732 } else if (strcmp(errors, "ignore") == 0)
733 /* do nothing */;
734 else {
735 PyErr_Format(PyExc_ValueError,
736 "decoding error; "
737 "unknown error handling code: %.400s",
738 errors);
739 goto failed;
740 }
741 /* skip \x */
742 if (s < end && isxdigit(Py_CHARMASK(s[0])))
743 s++; /* and a hexdigit */
744 break;
745 #ifndef Py_USING_UNICODE
746 case 'u':
747 case 'U':
748 case 'N':
749 if (unicode) {
750 PyErr_SetString(PyExc_ValueError,
751 "Unicode escapes not legal "
752 "when Unicode disabled");
753 goto failed;
754 }
755 #endif
756 default:
757 *p++ = '\\';
758 s--;
759 goto non_esc; /* an arbitrary number of unescaped
760 UTF-8 bytes may follow. */
761 }
762 }
763 if (p-buf < newlen)
764 _PyString_Resize(&v, p - buf); /* v is cleared on error */
765 return v;
766 failed:
767 Py_DECREF(v);
768 return NULL;
769 }
770
771 /* -------------------------------------------------------------------- */
772 /* object api */
773
774 static Py_ssize_t
string_getsize(register PyObject * op)775 string_getsize(register PyObject *op)
776 {
777 char *s;
778 Py_ssize_t len;
779 if (PyString_AsStringAndSize(op, &s, &len))
780 return -1;
781 return len;
782 }
783
784 static /*const*/ char *
string_getbuffer(register PyObject * op)785 string_getbuffer(register PyObject *op)
786 {
787 char *s;
788 Py_ssize_t len;
789 if (PyString_AsStringAndSize(op, &s, &len))
790 return NULL;
791 return s;
792 }
793
794 Py_ssize_t
PyString_Size(register PyObject * op)795 PyString_Size(register PyObject *op)
796 {
797 if (!PyString_Check(op))
798 return string_getsize(op);
799 return Py_SIZE(op);
800 }
801
802 /*const*/ char *
PyString_AsString(register PyObject * op)803 PyString_AsString(register PyObject *op)
804 {
805 if (!PyString_Check(op))
806 return string_getbuffer(op);
807 return ((PyStringObject *)op) -> ob_sval;
808 }
809
810 int
PyString_AsStringAndSize(register PyObject * obj,register char ** s,register Py_ssize_t * len)811 PyString_AsStringAndSize(register PyObject *obj,
812 register char **s,
813 register Py_ssize_t *len)
814 {
815 if (s == NULL) {
816 PyErr_BadInternalCall();
817 return -1;
818 }
819
820 if (!PyString_Check(obj)) {
821 #ifdef Py_USING_UNICODE
822 if (PyUnicode_Check(obj)) {
823 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
824 if (obj == NULL)
825 return -1;
826 }
827 else
828 #endif
829 {
830 PyErr_Format(PyExc_TypeError,
831 "expected string or Unicode object, "
832 "%.200s found", Py_TYPE(obj)->tp_name);
833 return -1;
834 }
835 }
836
837 *s = PyString_AS_STRING(obj);
838 if (len != NULL)
839 *len = PyString_GET_SIZE(obj);
840 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
841 PyErr_SetString(PyExc_TypeError,
842 "expected string without null bytes");
843 return -1;
844 }
845 return 0;
846 }
847
848 /* -------------------------------------------------------------------- */
849 /* Methods */
850
851 #include "stringlib/stringdefs.h"
852 #include "stringlib/fastsearch.h"
853
854 #include "stringlib/count.h"
855 #include "stringlib/find.h"
856 #include "stringlib/partition.h"
857 #include "stringlib/split.h"
858
859 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
860 #include "stringlib/localeutil.h"
861
862
863
864 static int
string_print(PyStringObject * op,FILE * fp,int flags)865 string_print(PyStringObject *op, FILE *fp, int flags)
866 {
867 Py_ssize_t i, str_len;
868 char c;
869 int quote;
870
871 /* XXX Ought to check for interrupts when writing long strings */
872 if (! PyString_CheckExact(op)) {
873 int ret;
874 /* A str subclass may have its own __str__ method. */
875 op = (PyStringObject *) PyObject_Str((PyObject *)op);
876 if (op == NULL)
877 return -1;
878 ret = string_print(op, fp, flags);
879 Py_DECREF(op);
880 return ret;
881 }
882 if (flags & Py_PRINT_RAW) {
883 char *data = op->ob_sval;
884 Py_ssize_t size = Py_SIZE(op);
885 Py_BEGIN_ALLOW_THREADS
886 while (size > INT_MAX) {
887 /* Very long strings cannot be written atomically.
888 * But don't write exactly INT_MAX bytes at a time
889 * to avoid memory aligment issues.
890 */
891 const int chunk_size = INT_MAX & ~0x3FFF;
892 fwrite(data, 1, chunk_size, fp);
893 data += chunk_size;
894 size -= chunk_size;
895 }
896 #ifdef __VMS
897 if (size) fwrite(data, (size_t)size, 1, fp);
898 #else
899 fwrite(data, 1, (size_t)size, fp);
900 #endif
901 Py_END_ALLOW_THREADS
902 return 0;
903 }
904
905 /* figure out which quote to use; single is preferred */
906 quote = '\'';
907 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
908 !memchr(op->ob_sval, '"', Py_SIZE(op)))
909 quote = '"';
910
911 str_len = Py_SIZE(op);
912 Py_BEGIN_ALLOW_THREADS
913 fputc(quote, fp);
914 for (i = 0; i < str_len; i++) {
915 /* Since strings are immutable and the caller should have a
916 reference, accessing the internal buffer should not be an issue
917 with the GIL released. */
918 c = op->ob_sval[i];
919 if (c == quote || c == '\\')
920 fprintf(fp, "\\%c", c);
921 else if (c == '\t')
922 fprintf(fp, "\\t");
923 else if (c == '\n')
924 fprintf(fp, "\\n");
925 else if (c == '\r')
926 fprintf(fp, "\\r");
927 else if (c < ' ' || c >= 0x7f)
928 fprintf(fp, "\\x%02x", c & 0xff);
929 else
930 fputc(c, fp);
931 }
932 fputc(quote, fp);
933 Py_END_ALLOW_THREADS
934 return 0;
935 }
936
937 PyObject *
PyString_Repr(PyObject * obj,int smartquotes)938 PyString_Repr(PyObject *obj, int smartquotes)
939 {
940 register PyStringObject* op = (PyStringObject*) obj;
941 size_t newsize;
942 PyObject *v;
943 if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
944 PyErr_SetString(PyExc_OverflowError,
945 "string is too large to make repr");
946 return NULL;
947 }
948 newsize = 2 + 4*Py_SIZE(op);
949 v = PyString_FromStringAndSize((char *)NULL, newsize);
950 if (v == NULL) {
951 return NULL;
952 }
953 else {
954 register Py_ssize_t i;
955 register char c;
956 register char *p;
957 int quote;
958
959 /* figure out which quote to use; single is preferred */
960 quote = '\'';
961 if (smartquotes &&
962 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
963 !memchr(op->ob_sval, '"', Py_SIZE(op)))
964 quote = '"';
965
966 p = PyString_AS_STRING(v);
967 *p++ = quote;
968 for (i = 0; i < Py_SIZE(op); i++) {
969 /* There's at least enough room for a hex escape
970 and a closing quote. */
971 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
972 c = op->ob_sval[i];
973 if (c == quote || c == '\\')
974 *p++ = '\\', *p++ = c;
975 else if (c == '\t')
976 *p++ = '\\', *p++ = 't';
977 else if (c == '\n')
978 *p++ = '\\', *p++ = 'n';
979 else if (c == '\r')
980 *p++ = '\\', *p++ = 'r';
981 else if (c < ' ' || c >= 0x7f) {
982 /* For performance, we don't want to call
983 PyOS_snprintf here (extra layers of
984 function call). */
985 sprintf(p, "\\x%02x", c & 0xff);
986 p += 4;
987 }
988 else
989 *p++ = c;
990 }
991 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
992 *p++ = quote;
993 *p = '\0';
994 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
995 return NULL;
996 return v;
997 }
998 }
999
1000 static PyObject *
string_repr(PyObject * op)1001 string_repr(PyObject *op)
1002 {
1003 return PyString_Repr(op, 1);
1004 }
1005
1006 static PyObject *
string_str(PyObject * s)1007 string_str(PyObject *s)
1008 {
1009 assert(PyString_Check(s));
1010 if (PyString_CheckExact(s)) {
1011 Py_INCREF(s);
1012 return s;
1013 }
1014 else {
1015 /* Subtype -- return genuine string with the same value. */
1016 PyStringObject *t = (PyStringObject *) s;
1017 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1018 }
1019 }
1020
1021 static Py_ssize_t
string_length(PyStringObject * a)1022 string_length(PyStringObject *a)
1023 {
1024 return Py_SIZE(a);
1025 }
1026
1027 static PyObject *
string_concat(register PyStringObject * a,register PyObject * bb)1028 string_concat(register PyStringObject *a, register PyObject *bb)
1029 {
1030 register Py_ssize_t size;
1031 register PyStringObject *op;
1032 if (!PyString_Check(bb)) {
1033 #ifdef Py_USING_UNICODE
1034 if (PyUnicode_Check(bb))
1035 return PyUnicode_Concat((PyObject *)a, bb);
1036 #endif
1037 if (PyByteArray_Check(bb))
1038 return PyByteArray_Concat((PyObject *)a, bb);
1039 PyErr_Format(PyExc_TypeError,
1040 "cannot concatenate 'str' and '%.200s' objects",
1041 Py_TYPE(bb)->tp_name);
1042 return NULL;
1043 }
1044 #define b ((PyStringObject *)bb)
1045 /* Optimize cases with empty left or right operand */
1046 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1047 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1048 if (Py_SIZE(a) == 0) {
1049 Py_INCREF(bb);
1050 return bb;
1051 }
1052 Py_INCREF(a);
1053 return (PyObject *)a;
1054 }
1055 /* Check that string sizes are not negative, to prevent an
1056 overflow in cases where we are passed incorrectly-created
1057 strings with negative lengths (due to a bug in other code).
1058 */
1059 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1060 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1061 PyErr_SetString(PyExc_OverflowError,
1062 "strings are too large to concat");
1063 return NULL;
1064 }
1065 size = Py_SIZE(a) + Py_SIZE(b);
1066
1067 /* Inline PyObject_NewVar */
1068 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1069 PyErr_SetString(PyExc_OverflowError,
1070 "strings are too large to concat");
1071 return NULL;
1072 }
1073 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1074 if (op == NULL)
1075 return PyErr_NoMemory();
1076 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1077 op->ob_shash = -1;
1078 op->ob_sstate = SSTATE_NOT_INTERNED;
1079 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1080 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1081 op->ob_sval[size] = '\0';
1082 return (PyObject *) op;
1083 #undef b
1084 }
1085
1086 static PyObject *
string_repeat(register PyStringObject * a,register Py_ssize_t n)1087 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1088 {
1089 register Py_ssize_t i;
1090 register Py_ssize_t j;
1091 register Py_ssize_t size;
1092 register PyStringObject *op;
1093 size_t nbytes;
1094 if (n < 0)
1095 n = 0;
1096 /* watch out for overflows: the size can overflow Py_ssize_t,
1097 * and the # of bytes needed can overflow size_t
1098 */
1099 if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1100 PyErr_SetString(PyExc_OverflowError,
1101 "repeated string is too long");
1102 return NULL;
1103 }
1104 size = Py_SIZE(a) * n;
1105 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1106 Py_INCREF(a);
1107 return (PyObject *)a;
1108 }
1109 nbytes = (size_t)size;
1110 if (nbytes + PyStringObject_SIZE <= nbytes) {
1111 PyErr_SetString(PyExc_OverflowError,
1112 "repeated string is too long");
1113 return NULL;
1114 }
1115 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1116 if (op == NULL)
1117 return PyErr_NoMemory();
1118 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
1119 op->ob_shash = -1;
1120 op->ob_sstate = SSTATE_NOT_INTERNED;
1121 op->ob_sval[size] = '\0';
1122 if (Py_SIZE(a) == 1 && n > 0) {
1123 memset(op->ob_sval, a->ob_sval[0] , n);
1124 return (PyObject *) op;
1125 }
1126 i = 0;
1127 if (i < size) {
1128 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1129 i = Py_SIZE(a);
1130 }
1131 while (i < size) {
1132 j = (i <= size-i) ? i : size-i;
1133 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1134 i += j;
1135 }
1136 return (PyObject *) op;
1137 }
1138
1139 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1140
1141 static PyObject *
string_slice(register PyStringObject * a,register Py_ssize_t i,register Py_ssize_t j)1142 string_slice(register PyStringObject *a, register Py_ssize_t i,
1143 register Py_ssize_t j)
1144 /* j -- may be negative! */
1145 {
1146 if (i < 0)
1147 i = 0;
1148 if (j < 0)
1149 j = 0; /* Avoid signed/unsigned bug in next line */
1150 if (j > Py_SIZE(a))
1151 j = Py_SIZE(a);
1152 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1153 /* It's the same as a */
1154 Py_INCREF(a);
1155 return (PyObject *)a;
1156 }
1157 if (j < i)
1158 j = i;
1159 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1160 }
1161
1162 static int
string_contains(PyObject * str_obj,PyObject * sub_obj)1163 string_contains(PyObject *str_obj, PyObject *sub_obj)
1164 {
1165 if (!PyString_CheckExact(sub_obj)) {
1166 #ifdef Py_USING_UNICODE
1167 if (PyUnicode_Check(sub_obj))
1168 return PyUnicode_Contains(str_obj, sub_obj);
1169 #endif
1170 if (!PyString_Check(sub_obj)) {
1171 PyErr_Format(PyExc_TypeError,
1172 "'in <string>' requires string as left operand, "
1173 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1174 return -1;
1175 }
1176 }
1177
1178 return stringlib_contains_obj(str_obj, sub_obj);
1179 }
1180
1181 static PyObject *
string_item(PyStringObject * a,register Py_ssize_t i)1182 string_item(PyStringObject *a, register Py_ssize_t i)
1183 {
1184 char pchar;
1185 PyObject *v;
1186 if (i < 0 || i >= Py_SIZE(a)) {
1187 PyErr_SetString(PyExc_IndexError, "string index out of range");
1188 return NULL;
1189 }
1190 pchar = a->ob_sval[i];
1191 v = (PyObject *)characters[pchar & UCHAR_MAX];
1192 if (v == NULL)
1193 v = PyString_FromStringAndSize(&pchar, 1);
1194 else {
1195 #ifdef COUNT_ALLOCS
1196 one_strings++;
1197 #endif
1198 Py_INCREF(v);
1199 }
1200 return v;
1201 }
1202
1203 static PyObject*
string_richcompare(PyStringObject * a,PyStringObject * b,int op)1204 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1205 {
1206 int c;
1207 Py_ssize_t len_a, len_b;
1208 Py_ssize_t min_len;
1209 PyObject *result;
1210
1211 /* Make sure both arguments are strings. */
1212 if (!(PyString_Check(a) && PyString_Check(b))) {
1213 result = Py_NotImplemented;
1214 goto out;
1215 }
1216 if (a == b) {
1217 switch (op) {
1218 case Py_EQ:case Py_LE:case Py_GE:
1219 result = Py_True;
1220 goto out;
1221 case Py_NE:case Py_LT:case Py_GT:
1222 result = Py_False;
1223 goto out;
1224 }
1225 }
1226 if (op == Py_EQ) {
1227 /* Supporting Py_NE here as well does not save
1228 much time, since Py_NE is rarely used. */
1229 if (Py_SIZE(a) == Py_SIZE(b)
1230 && (a->ob_sval[0] == b->ob_sval[0]
1231 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1232 result = Py_True;
1233 } else {
1234 result = Py_False;
1235 }
1236 goto out;
1237 }
1238 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1239 min_len = (len_a < len_b) ? len_a : len_b;
1240 if (min_len > 0) {
1241 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1242 if (c==0)
1243 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1244 } else
1245 c = 0;
1246 if (c == 0)
1247 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1248 switch (op) {
1249 case Py_LT: c = c < 0; break;
1250 case Py_LE: c = c <= 0; break;
1251 case Py_EQ: assert(0); break; /* unreachable */
1252 case Py_NE: c = c != 0; break;
1253 case Py_GT: c = c > 0; break;
1254 case Py_GE: c = c >= 0; break;
1255 default:
1256 result = Py_NotImplemented;
1257 goto out;
1258 }
1259 result = c ? Py_True : Py_False;
1260 out:
1261 Py_INCREF(result);
1262 return result;
1263 }
1264
1265 int
_PyString_Eq(PyObject * o1,PyObject * o2)1266 _PyString_Eq(PyObject *o1, PyObject *o2)
1267 {
1268 PyStringObject *a = (PyStringObject*) o1;
1269 PyStringObject *b = (PyStringObject*) o2;
1270 return Py_SIZE(a) == Py_SIZE(b)
1271 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1272 }
1273
1274 static long
string_hash(PyStringObject * a)1275 string_hash(PyStringObject *a)
1276 {
1277 register Py_ssize_t len;
1278 register unsigned char *p;
1279 register long x;
1280
1281 #ifdef Py_DEBUG
1282 assert(_Py_HashSecret_Initialized);
1283 #endif
1284 if (a->ob_shash != -1)
1285 return a->ob_shash;
1286 len = Py_SIZE(a);
1287 /*
1288 We make the hash of the empty string be 0, rather than using
1289 (prefix ^ suffix), since this slightly obfuscates the hash secret
1290 */
1291 if (len == 0) {
1292 a->ob_shash = 0;
1293 return 0;
1294 }
1295 p = (unsigned char *) a->ob_sval;
1296 x = _Py_HashSecret.prefix;
1297 x ^= *p << 7;
1298 while (--len >= 0)
1299 x = (1000003*x) ^ *p++;
1300 x ^= Py_SIZE(a);
1301 x ^= _Py_HashSecret.suffix;
1302 if (x == -1)
1303 x = -2;
1304 a->ob_shash = x;
1305 return x;
1306 }
1307
1308 static PyObject*
string_subscript(PyStringObject * self,PyObject * item)1309 string_subscript(PyStringObject* self, PyObject* item)
1310 {
1311 if (PyIndex_Check(item)) {
1312 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1313 if (i == -1 && PyErr_Occurred())
1314 return NULL;
1315 if (i < 0)
1316 i += PyString_GET_SIZE(self);
1317 return string_item(self, i);
1318 }
1319 else if (PySlice_Check(item)) {
1320 Py_ssize_t start, stop, step, slicelength, cur, i;
1321 char* source_buf;
1322 char* result_buf;
1323 PyObject* result;
1324
1325 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
1326 return NULL;
1327 }
1328 slicelength = _PySlice_AdjustIndices(PyString_GET_SIZE(self), &start,
1329 &stop, step);
1330
1331 if (slicelength <= 0) {
1332 return PyString_FromStringAndSize("", 0);
1333 }
1334 else if (start == 0 && step == 1 &&
1335 slicelength == PyString_GET_SIZE(self) &&
1336 PyString_CheckExact(self)) {
1337 Py_INCREF(self);
1338 return (PyObject *)self;
1339 }
1340 else if (step == 1) {
1341 return PyString_FromStringAndSize(
1342 PyString_AS_STRING(self) + start,
1343 slicelength);
1344 }
1345 else {
1346 source_buf = PyString_AsString((PyObject*)self);
1347 result_buf = (char *)PyMem_Malloc(slicelength);
1348 if (result_buf == NULL)
1349 return PyErr_NoMemory();
1350
1351 for (cur = start, i = 0; i < slicelength;
1352 cur += step, i++) {
1353 result_buf[i] = source_buf[cur];
1354 }
1355
1356 result = PyString_FromStringAndSize(result_buf,
1357 slicelength);
1358 PyMem_Free(result_buf);
1359 return result;
1360 }
1361 }
1362 else {
1363 PyErr_Format(PyExc_TypeError,
1364 "string indices must be integers, not %.200s",
1365 Py_TYPE(item)->tp_name);
1366 return NULL;
1367 }
1368 }
1369
1370 static Py_ssize_t
string_buffer_getreadbuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1371 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1372 {
1373 if ( index != 0 ) {
1374 PyErr_SetString(PyExc_SystemError,
1375 "accessing non-existent string segment");
1376 return -1;
1377 }
1378 *ptr = (void *)self->ob_sval;
1379 return Py_SIZE(self);
1380 }
1381
1382 static Py_ssize_t
string_buffer_getwritebuf(PyStringObject * self,Py_ssize_t index,const void ** ptr)1383 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1384 {
1385 PyErr_SetString(PyExc_TypeError,
1386 "Cannot use string as modifiable buffer");
1387 return -1;
1388 }
1389
1390 static Py_ssize_t
string_buffer_getsegcount(PyStringObject * self,Py_ssize_t * lenp)1391 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1392 {
1393 if ( lenp )
1394 *lenp = Py_SIZE(self);
1395 return 1;
1396 }
1397
1398 static Py_ssize_t
string_buffer_getcharbuf(PyStringObject * self,Py_ssize_t index,const char ** ptr)1399 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1400 {
1401 if ( index != 0 ) {
1402 PyErr_SetString(PyExc_SystemError,
1403 "accessing non-existent string segment");
1404 return -1;
1405 }
1406 *ptr = self->ob_sval;
1407 return Py_SIZE(self);
1408 }
1409
1410 static int
string_buffer_getbuffer(PyStringObject * self,Py_buffer * view,int flags)1411 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1412 {
1413 return PyBuffer_FillInfo(view, (PyObject*)self,
1414 (void *)self->ob_sval, Py_SIZE(self),
1415 1, flags);
1416 }
1417
1418 static PySequenceMethods string_as_sequence = {
1419 (lenfunc)string_length, /*sq_length*/
1420 (binaryfunc)string_concat, /*sq_concat*/
1421 (ssizeargfunc)string_repeat, /*sq_repeat*/
1422 (ssizeargfunc)string_item, /*sq_item*/
1423 (ssizessizeargfunc)string_slice, /*sq_slice*/
1424 0, /*sq_ass_item*/
1425 0, /*sq_ass_slice*/
1426 (objobjproc)string_contains /*sq_contains*/
1427 };
1428
1429 static PyMappingMethods string_as_mapping = {
1430 (lenfunc)string_length,
1431 (binaryfunc)string_subscript,
1432 0,
1433 };
1434
1435 static PyBufferProcs string_as_buffer = {
1436 (readbufferproc)string_buffer_getreadbuf,
1437 (writebufferproc)string_buffer_getwritebuf,
1438 (segcountproc)string_buffer_getsegcount,
1439 (charbufferproc)string_buffer_getcharbuf,
1440 (getbufferproc)string_buffer_getbuffer,
1441 0, /* XXX */
1442 };
1443
1444
1445
1446 #define LEFTSTRIP 0
1447 #define RIGHTSTRIP 1
1448 #define BOTHSTRIP 2
1449
1450 /* Arrays indexed by above */
1451 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1452
1453 #define STRIPNAME(i) (stripformat[i]+3)
1454
1455 PyDoc_STRVAR(split__doc__,
1456 "S.split([sep [,maxsplit]]) -> list of strings\n\
1457 \n\
1458 Return a list of the words in the string S, using sep as the\n\
1459 delimiter string. If maxsplit is given, at most maxsplit\n\
1460 splits are done. If sep is not specified or is None, any\n\
1461 whitespace string is a separator and empty strings are removed\n\
1462 from the result.");
1463
1464 static PyObject *
string_split(PyStringObject * self,PyObject * args)1465 string_split(PyStringObject *self, PyObject *args)
1466 {
1467 Py_ssize_t len = PyString_GET_SIZE(self), n;
1468 Py_ssize_t maxsplit = -1;
1469 const char *s = PyString_AS_STRING(self), *sub;
1470 PyObject *subobj = Py_None;
1471
1472 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1473 return NULL;
1474 if (maxsplit < 0)
1475 maxsplit = PY_SSIZE_T_MAX;
1476 if (subobj == Py_None)
1477 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1478 if (PyString_Check(subobj)) {
1479 sub = PyString_AS_STRING(subobj);
1480 n = PyString_GET_SIZE(subobj);
1481 }
1482 #ifdef Py_USING_UNICODE
1483 else if (PyUnicode_Check(subobj))
1484 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1485 #endif
1486 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1487 return NULL;
1488
1489 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1490 }
1491
1492 PyDoc_STRVAR(partition__doc__,
1493 "S.partition(sep) -> (head, sep, tail)\n\
1494 \n\
1495 Search for the separator sep in S, and return the part before it,\n\
1496 the separator itself, and the part after it. If the separator is not\n\
1497 found, return S and two empty strings.");
1498
1499 static PyObject *
string_partition(PyStringObject * self,PyObject * sep_obj)1500 string_partition(PyStringObject *self, PyObject *sep_obj)
1501 {
1502 const char *sep;
1503 Py_ssize_t sep_len;
1504
1505 if (PyString_Check(sep_obj)) {
1506 sep = PyString_AS_STRING(sep_obj);
1507 sep_len = PyString_GET_SIZE(sep_obj);
1508 }
1509 #ifdef Py_USING_UNICODE
1510 else if (PyUnicode_Check(sep_obj))
1511 return PyUnicode_Partition((PyObject *) self, sep_obj);
1512 #endif
1513 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1514 return NULL;
1515
1516 return stringlib_partition(
1517 (PyObject*) self,
1518 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1519 sep_obj, sep, sep_len
1520 );
1521 }
1522
1523 PyDoc_STRVAR(rpartition__doc__,
1524 "S.rpartition(sep) -> (head, sep, tail)\n\
1525 \n\
1526 Search for the separator sep in S, starting at the end of S, and return\n\
1527 the part before it, the separator itself, and the part after it. If the\n\
1528 separator is not found, return two empty strings and S.");
1529
1530 static PyObject *
string_rpartition(PyStringObject * self,PyObject * sep_obj)1531 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1532 {
1533 const char *sep;
1534 Py_ssize_t sep_len;
1535
1536 if (PyString_Check(sep_obj)) {
1537 sep = PyString_AS_STRING(sep_obj);
1538 sep_len = PyString_GET_SIZE(sep_obj);
1539 }
1540 #ifdef Py_USING_UNICODE
1541 else if (PyUnicode_Check(sep_obj))
1542 return PyUnicode_RPartition((PyObject *) self, sep_obj);
1543 #endif
1544 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1545 return NULL;
1546
1547 return stringlib_rpartition(
1548 (PyObject*) self,
1549 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1550 sep_obj, sep, sep_len
1551 );
1552 }
1553
1554 PyDoc_STRVAR(rsplit__doc__,
1555 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1556 \n\
1557 Return a list of the words in the string S, using sep as the\n\
1558 delimiter string, starting at the end of the string and working\n\
1559 to the front. If maxsplit is given, at most maxsplit splits are\n\
1560 done. If sep is not specified or is None, any whitespace string\n\
1561 is a separator.");
1562
1563 static PyObject *
string_rsplit(PyStringObject * self,PyObject * args)1564 string_rsplit(PyStringObject *self, PyObject *args)
1565 {
1566 Py_ssize_t len = PyString_GET_SIZE(self), n;
1567 Py_ssize_t maxsplit = -1;
1568 const char *s = PyString_AS_STRING(self), *sub;
1569 PyObject *subobj = Py_None;
1570
1571 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1572 return NULL;
1573 if (maxsplit < 0)
1574 maxsplit = PY_SSIZE_T_MAX;
1575 if (subobj == Py_None)
1576 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1577 if (PyString_Check(subobj)) {
1578 sub = PyString_AS_STRING(subobj);
1579 n = PyString_GET_SIZE(subobj);
1580 }
1581 #ifdef Py_USING_UNICODE
1582 else if (PyUnicode_Check(subobj))
1583 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1584 #endif
1585 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1586 return NULL;
1587
1588 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1589 }
1590
1591
1592 PyDoc_STRVAR(join__doc__,
1593 "S.join(iterable) -> string\n\
1594 \n\
1595 Return a string which is the concatenation of the strings in the\n\
1596 iterable. The separator between elements is S.");
1597
1598 static PyObject *
string_join(PyStringObject * self,PyObject * orig)1599 string_join(PyStringObject *self, PyObject *orig)
1600 {
1601 char *sep = PyString_AS_STRING(self);
1602 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1603 PyObject *res = NULL;
1604 char *p;
1605 Py_ssize_t seqlen = 0;
1606 size_t sz = 0;
1607 Py_ssize_t i;
1608 PyObject *seq, *item;
1609
1610 seq = PySequence_Fast(orig, "can only join an iterable");
1611 if (seq == NULL) {
1612 return NULL;
1613 }
1614
1615 seqlen = PySequence_Size(seq);
1616 if (seqlen == 0) {
1617 Py_DECREF(seq);
1618 return PyString_FromString("");
1619 }
1620 if (seqlen == 1) {
1621 item = PySequence_Fast_GET_ITEM(seq, 0);
1622 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1623 Py_INCREF(item);
1624 Py_DECREF(seq);
1625 return item;
1626 }
1627 }
1628
1629 /* There are at least two things to join, or else we have a subclass
1630 * of the builtin types in the sequence.
1631 * Do a pre-pass to figure out the total amount of space we'll
1632 * need (sz), see whether any argument is absurd, and defer to
1633 * the Unicode join if appropriate.
1634 */
1635 for (i = 0; i < seqlen; i++) {
1636 const size_t old_sz = sz;
1637 item = PySequence_Fast_GET_ITEM(seq, i);
1638 if (!PyString_Check(item)){
1639 #ifdef Py_USING_UNICODE
1640 if (PyUnicode_Check(item)) {
1641 /* Defer to Unicode join.
1642 * CAUTION: There's no guarantee that the
1643 * original sequence can be iterated over
1644 * again, so we must pass seq here.
1645 */
1646 PyObject *result;
1647 result = PyUnicode_Join((PyObject *)self, seq);
1648 Py_DECREF(seq);
1649 return result;
1650 }
1651 #endif
1652 PyErr_Format(PyExc_TypeError,
1653 "sequence item %zd: expected string,"
1654 " %.80s found",
1655 i, Py_TYPE(item)->tp_name);
1656 Py_DECREF(seq);
1657 return NULL;
1658 }
1659 sz += PyString_GET_SIZE(item);
1660 if (i != 0)
1661 sz += seplen;
1662 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1663 PyErr_SetString(PyExc_OverflowError,
1664 "join() result is too long for a Python string");
1665 Py_DECREF(seq);
1666 return NULL;
1667 }
1668 }
1669
1670 /* Allocate result space. */
1671 res = PyString_FromStringAndSize((char*)NULL, sz);
1672 if (res == NULL) {
1673 Py_DECREF(seq);
1674 return NULL;
1675 }
1676
1677 /* Catenate everything. */
1678 p = PyString_AS_STRING(res);
1679 for (i = 0; i < seqlen; ++i) {
1680 size_t n;
1681 item = PySequence_Fast_GET_ITEM(seq, i);
1682 n = PyString_GET_SIZE(item);
1683 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1684 p += n;
1685 if (i < seqlen - 1) {
1686 Py_MEMCPY(p, sep, seplen);
1687 p += seplen;
1688 }
1689 }
1690
1691 Py_DECREF(seq);
1692 return res;
1693 }
1694
1695 PyObject *
_PyString_Join(PyObject * sep,PyObject * x)1696 _PyString_Join(PyObject *sep, PyObject *x)
1697 {
1698 assert(sep != NULL && PyString_Check(sep));
1699 assert(x != NULL);
1700 return string_join((PyStringObject *)sep, x);
1701 }
1702
1703 /* helper macro to fixup start/end slice values */
1704 #define ADJUST_INDICES(start, end, len) \
1705 if (end > len) \
1706 end = len; \
1707 else if (end < 0) { \
1708 end += len; \
1709 if (end < 0) \
1710 end = 0; \
1711 } \
1712 if (start < 0) { \
1713 start += len; \
1714 if (start < 0) \
1715 start = 0; \
1716 }
1717
1718 Py_LOCAL_INLINE(Py_ssize_t)
string_find_internal(PyStringObject * self,PyObject * args,int dir)1719 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1720 {
1721 PyObject *subobj;
1722 const char *sub;
1723 Py_ssize_t sub_len;
1724 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1725
1726 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1727 args, &subobj, &start, &end))
1728 return -2;
1729
1730 if (PyString_Check(subobj)) {
1731 sub = PyString_AS_STRING(subobj);
1732 sub_len = PyString_GET_SIZE(subobj);
1733 }
1734 #ifdef Py_USING_UNICODE
1735 else if (PyUnicode_Check(subobj))
1736 return PyUnicode_Find(
1737 (PyObject *)self, subobj, start, end, dir);
1738 #endif
1739 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1740 /* XXX - the "expected a character buffer object" is pretty
1741 confusing for a non-expert. remap to something else ? */
1742 return -2;
1743
1744 if (dir > 0)
1745 return stringlib_find_slice(
1746 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1747 sub, sub_len, start, end);
1748 else
1749 return stringlib_rfind_slice(
1750 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1751 sub, sub_len, start, end);
1752 }
1753
1754
1755 PyDoc_STRVAR(find__doc__,
1756 "S.find(sub [,start [,end]]) -> int\n\
1757 \n\
1758 Return the lowest index in S where substring sub is found,\n\
1759 such that sub is contained within S[start:end]. Optional\n\
1760 arguments start and end are interpreted as in slice notation.\n\
1761 \n\
1762 Return -1 on failure.");
1763
1764 static PyObject *
string_find(PyStringObject * self,PyObject * args)1765 string_find(PyStringObject *self, PyObject *args)
1766 {
1767 Py_ssize_t result = string_find_internal(self, args, +1);
1768 if (result == -2)
1769 return NULL;
1770 return PyInt_FromSsize_t(result);
1771 }
1772
1773
1774 PyDoc_STRVAR(index__doc__,
1775 "S.index(sub [,start [,end]]) -> int\n\
1776 \n\
1777 Like S.find() but raise ValueError when the substring is not found.");
1778
1779 static PyObject *
string_index(PyStringObject * self,PyObject * args)1780 string_index(PyStringObject *self, PyObject *args)
1781 {
1782 Py_ssize_t result = string_find_internal(self, args, +1);
1783 if (result == -2)
1784 return NULL;
1785 if (result == -1) {
1786 PyErr_SetString(PyExc_ValueError,
1787 "substring not found");
1788 return NULL;
1789 }
1790 return PyInt_FromSsize_t(result);
1791 }
1792
1793
1794 PyDoc_STRVAR(rfind__doc__,
1795 "S.rfind(sub [,start [,end]]) -> int\n\
1796 \n\
1797 Return the highest index in S where substring sub is found,\n\
1798 such that sub is contained within S[start:end]. Optional\n\
1799 arguments start and end are interpreted as in slice notation.\n\
1800 \n\
1801 Return -1 on failure.");
1802
1803 static PyObject *
string_rfind(PyStringObject * self,PyObject * args)1804 string_rfind(PyStringObject *self, PyObject *args)
1805 {
1806 Py_ssize_t result = string_find_internal(self, args, -1);
1807 if (result == -2)
1808 return NULL;
1809 return PyInt_FromSsize_t(result);
1810 }
1811
1812
1813 PyDoc_STRVAR(rindex__doc__,
1814 "S.rindex(sub [,start [,end]]) -> int\n\
1815 \n\
1816 Like S.rfind() but raise ValueError when the substring is not found.");
1817
1818 static PyObject *
string_rindex(PyStringObject * self,PyObject * args)1819 string_rindex(PyStringObject *self, PyObject *args)
1820 {
1821 Py_ssize_t result = string_find_internal(self, args, -1);
1822 if (result == -2)
1823 return NULL;
1824 if (result == -1) {
1825 PyErr_SetString(PyExc_ValueError,
1826 "substring not found");
1827 return NULL;
1828 }
1829 return PyInt_FromSsize_t(result);
1830 }
1831
1832
1833 Py_LOCAL_INLINE(PyObject *)
do_xstrip(PyStringObject * self,int striptype,PyObject * sepobj)1834 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1835 {
1836 char *s = PyString_AS_STRING(self);
1837 Py_ssize_t len = PyString_GET_SIZE(self);
1838 char *sep = PyString_AS_STRING(sepobj);
1839 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1840 Py_ssize_t i, j;
1841
1842 i = 0;
1843 if (striptype != RIGHTSTRIP) {
1844 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1845 i++;
1846 }
1847 }
1848
1849 j = len;
1850 if (striptype != LEFTSTRIP) {
1851 do {
1852 j--;
1853 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1854 j++;
1855 }
1856
1857 if (i == 0 && j == len && PyString_CheckExact(self)) {
1858 Py_INCREF(self);
1859 return (PyObject*)self;
1860 }
1861 else
1862 return PyString_FromStringAndSize(s+i, j-i);
1863 }
1864
1865
1866 Py_LOCAL_INLINE(PyObject *)
do_strip(PyStringObject * self,int striptype)1867 do_strip(PyStringObject *self, int striptype)
1868 {
1869 char *s = PyString_AS_STRING(self);
1870 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1871
1872 i = 0;
1873 if (striptype != RIGHTSTRIP) {
1874 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1875 i++;
1876 }
1877 }
1878
1879 j = len;
1880 if (striptype != LEFTSTRIP) {
1881 do {
1882 j--;
1883 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1884 j++;
1885 }
1886
1887 if (i == 0 && j == len && PyString_CheckExact(self)) {
1888 Py_INCREF(self);
1889 return (PyObject*)self;
1890 }
1891 else
1892 return PyString_FromStringAndSize(s+i, j-i);
1893 }
1894
1895
1896 Py_LOCAL_INLINE(PyObject *)
do_argstrip(PyStringObject * self,int striptype,PyObject * args)1897 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1898 {
1899 PyObject *sep = NULL;
1900
1901 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1902 return NULL;
1903
1904 if (sep != NULL && sep != Py_None) {
1905 if (PyString_Check(sep))
1906 return do_xstrip(self, striptype, sep);
1907 #ifdef Py_USING_UNICODE
1908 else if (PyUnicode_Check(sep)) {
1909 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1910 PyObject *res;
1911 if (uniself==NULL)
1912 return NULL;
1913 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1914 striptype, sep);
1915 Py_DECREF(uniself);
1916 return res;
1917 }
1918 #endif
1919 PyErr_Format(PyExc_TypeError,
1920 #ifdef Py_USING_UNICODE
1921 "%s arg must be None, str or unicode",
1922 #else
1923 "%s arg must be None or str",
1924 #endif
1925 STRIPNAME(striptype));
1926 return NULL;
1927 }
1928
1929 return do_strip(self, striptype);
1930 }
1931
1932
1933 PyDoc_STRVAR(strip__doc__,
1934 "S.strip([chars]) -> string or unicode\n\
1935 \n\
1936 Return a copy of the string S with leading and trailing\n\
1937 whitespace removed.\n\
1938 If chars is given and not None, remove characters in chars instead.\n\
1939 If chars is unicode, S will be converted to unicode before stripping");
1940
1941 static PyObject *
string_strip(PyStringObject * self,PyObject * args)1942 string_strip(PyStringObject *self, PyObject *args)
1943 {
1944 if (PyTuple_GET_SIZE(args) == 0)
1945 return do_strip(self, BOTHSTRIP); /* Common case */
1946 else
1947 return do_argstrip(self, BOTHSTRIP, args);
1948 }
1949
1950
1951 PyDoc_STRVAR(lstrip__doc__,
1952 "S.lstrip([chars]) -> string or unicode\n\
1953 \n\
1954 Return a copy of the string S with leading whitespace removed.\n\
1955 If chars is given and not None, remove characters in chars instead.\n\
1956 If chars is unicode, S will be converted to unicode before stripping");
1957
1958 static PyObject *
string_lstrip(PyStringObject * self,PyObject * args)1959 string_lstrip(PyStringObject *self, PyObject *args)
1960 {
1961 if (PyTuple_GET_SIZE(args) == 0)
1962 return do_strip(self, LEFTSTRIP); /* Common case */
1963 else
1964 return do_argstrip(self, LEFTSTRIP, args);
1965 }
1966
1967
1968 PyDoc_STRVAR(rstrip__doc__,
1969 "S.rstrip([chars]) -> string or unicode\n\
1970 \n\
1971 Return a copy of the string S with trailing whitespace removed.\n\
1972 If chars is given and not None, remove characters in chars instead.\n\
1973 If chars is unicode, S will be converted to unicode before stripping");
1974
1975 static PyObject *
string_rstrip(PyStringObject * self,PyObject * args)1976 string_rstrip(PyStringObject *self, PyObject *args)
1977 {
1978 if (PyTuple_GET_SIZE(args) == 0)
1979 return do_strip(self, RIGHTSTRIP); /* Common case */
1980 else
1981 return do_argstrip(self, RIGHTSTRIP, args);
1982 }
1983
1984
1985 PyDoc_STRVAR(lower__doc__,
1986 "S.lower() -> string\n\
1987 \n\
1988 Return a copy of the string S converted to lowercase.");
1989
1990 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1991 #ifndef _tolower
1992 #define _tolower tolower
1993 #endif
1994
1995 static PyObject *
string_lower(PyStringObject * self)1996 string_lower(PyStringObject *self)
1997 {
1998 char *s;
1999 Py_ssize_t i, n = PyString_GET_SIZE(self);
2000 PyObject *newobj;
2001
2002 newobj = PyString_FromStringAndSize(NULL, n);
2003 if (!newobj)
2004 return NULL;
2005
2006 s = PyString_AS_STRING(newobj);
2007
2008 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2009
2010 for (i = 0; i < n; i++) {
2011 int c = Py_CHARMASK(s[i]);
2012 if (isupper(c))
2013 s[i] = _tolower(c);
2014 }
2015
2016 return newobj;
2017 }
2018
2019 PyDoc_STRVAR(upper__doc__,
2020 "S.upper() -> string\n\
2021 \n\
2022 Return a copy of the string S converted to uppercase.");
2023
2024 #ifndef _toupper
2025 #define _toupper toupper
2026 #endif
2027
2028 static PyObject *
string_upper(PyStringObject * self)2029 string_upper(PyStringObject *self)
2030 {
2031 char *s;
2032 Py_ssize_t i, n = PyString_GET_SIZE(self);
2033 PyObject *newobj;
2034
2035 newobj = PyString_FromStringAndSize(NULL, n);
2036 if (!newobj)
2037 return NULL;
2038
2039 s = PyString_AS_STRING(newobj);
2040
2041 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2042
2043 for (i = 0; i < n; i++) {
2044 int c = Py_CHARMASK(s[i]);
2045 if (islower(c))
2046 s[i] = _toupper(c);
2047 }
2048
2049 return newobj;
2050 }
2051
2052 PyDoc_STRVAR(title__doc__,
2053 "S.title() -> string\n\
2054 \n\
2055 Return a titlecased version of S, i.e. words start with uppercase\n\
2056 characters, all remaining cased characters have lowercase.");
2057
2058 static PyObject*
string_title(PyStringObject * self)2059 string_title(PyStringObject *self)
2060 {
2061 char *s = PyString_AS_STRING(self), *s_new;
2062 Py_ssize_t i, n = PyString_GET_SIZE(self);
2063 int previous_is_cased = 0;
2064 PyObject *newobj;
2065
2066 newobj = PyString_FromStringAndSize(NULL, n);
2067 if (newobj == NULL)
2068 return NULL;
2069 s_new = PyString_AsString(newobj);
2070 for (i = 0; i < n; i++) {
2071 int c = Py_CHARMASK(*s++);
2072 if (islower(c)) {
2073 if (!previous_is_cased)
2074 c = toupper(c);
2075 previous_is_cased = 1;
2076 } else if (isupper(c)) {
2077 if (previous_is_cased)
2078 c = tolower(c);
2079 previous_is_cased = 1;
2080 } else
2081 previous_is_cased = 0;
2082 *s_new++ = c;
2083 }
2084 return newobj;
2085 }
2086
2087 PyDoc_STRVAR(capitalize__doc__,
2088 "S.capitalize() -> string\n\
2089 \n\
2090 Return a copy of the string S with only its first character\n\
2091 capitalized.");
2092
2093 static PyObject *
string_capitalize(PyStringObject * self)2094 string_capitalize(PyStringObject *self)
2095 {
2096 char *s = PyString_AS_STRING(self), *s_new;
2097 Py_ssize_t i, n = PyString_GET_SIZE(self);
2098 PyObject *newobj;
2099
2100 newobj = PyString_FromStringAndSize(NULL, n);
2101 if (newobj == NULL)
2102 return NULL;
2103 s_new = PyString_AsString(newobj);
2104 if (0 < n) {
2105 int c = Py_CHARMASK(*s++);
2106 if (islower(c))
2107 *s_new = toupper(c);
2108 else
2109 *s_new = c;
2110 s_new++;
2111 }
2112 for (i = 1; i < n; i++) {
2113 int c = Py_CHARMASK(*s++);
2114 if (isupper(c))
2115 *s_new = tolower(c);
2116 else
2117 *s_new = c;
2118 s_new++;
2119 }
2120 return newobj;
2121 }
2122
2123
2124 PyDoc_STRVAR(count__doc__,
2125 "S.count(sub[, start[, end]]) -> int\n\
2126 \n\
2127 Return the number of non-overlapping occurrences of substring sub in\n\
2128 string S[start:end]. Optional arguments start and end are interpreted\n\
2129 as in slice notation.");
2130
2131 static PyObject *
string_count(PyStringObject * self,PyObject * args)2132 string_count(PyStringObject *self, PyObject *args)
2133 {
2134 PyObject *sub_obj;
2135 const char *str = PyString_AS_STRING(self), *sub;
2136 Py_ssize_t sub_len;
2137 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2138
2139 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2140 return NULL;
2141
2142 if (PyString_Check(sub_obj)) {
2143 sub = PyString_AS_STRING(sub_obj);
2144 sub_len = PyString_GET_SIZE(sub_obj);
2145 }
2146 #ifdef Py_USING_UNICODE
2147 else if (PyUnicode_Check(sub_obj)) {
2148 Py_ssize_t count;
2149 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2150 if (count == -1)
2151 return NULL;
2152 else
2153 return PyInt_FromSsize_t(count);
2154 }
2155 #endif
2156 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2157 return NULL;
2158
2159 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2160
2161 return PyInt_FromSsize_t(
2162 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2163 );
2164 }
2165
2166 PyDoc_STRVAR(swapcase__doc__,
2167 "S.swapcase() -> string\n\
2168 \n\
2169 Return a copy of the string S with uppercase characters\n\
2170 converted to lowercase and vice versa.");
2171
2172 static PyObject *
string_swapcase(PyStringObject * self)2173 string_swapcase(PyStringObject *self)
2174 {
2175 char *s = PyString_AS_STRING(self), *s_new;
2176 Py_ssize_t i, n = PyString_GET_SIZE(self);
2177 PyObject *newobj;
2178
2179 newobj = PyString_FromStringAndSize(NULL, n);
2180 if (newobj == NULL)
2181 return NULL;
2182 s_new = PyString_AsString(newobj);
2183 for (i = 0; i < n; i++) {
2184 int c = Py_CHARMASK(*s++);
2185 if (islower(c)) {
2186 *s_new = toupper(c);
2187 }
2188 else if (isupper(c)) {
2189 *s_new = tolower(c);
2190 }
2191 else
2192 *s_new = c;
2193 s_new++;
2194 }
2195 return newobj;
2196 }
2197
2198
2199 PyDoc_STRVAR(translate__doc__,
2200 "S.translate(table [,deletechars]) -> string\n\
2201 \n\
2202 Return a copy of the string S, where all characters occurring\n\
2203 in the optional argument deletechars are removed, and the\n\
2204 remaining characters have been mapped through the given\n\
2205 translation table, which must be a string of length 256 or None.\n\
2206 If the table argument is None, no translation is applied and\n\
2207 the operation simply removes the characters in deletechars.");
2208
2209 static PyObject *
string_translate(PyStringObject * self,PyObject * args)2210 string_translate(PyStringObject *self, PyObject *args)
2211 {
2212 register char *input, *output;
2213 const char *table;
2214 register Py_ssize_t i, c, changed = 0;
2215 PyObject *input_obj = (PyObject*)self;
2216 const char *output_start, *del_table=NULL;
2217 Py_ssize_t inlen, tablen, dellen = 0;
2218 PyObject *result;
2219 int trans_table[256];
2220 PyObject *tableobj, *delobj = NULL;
2221
2222 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2223 &tableobj, &delobj))
2224 return NULL;
2225
2226 if (PyString_Check(tableobj)) {
2227 table = PyString_AS_STRING(tableobj);
2228 tablen = PyString_GET_SIZE(tableobj);
2229 }
2230 else if (tableobj == Py_None) {
2231 table = NULL;
2232 tablen = 256;
2233 }
2234 #ifdef Py_USING_UNICODE
2235 else if (PyUnicode_Check(tableobj)) {
2236 /* Unicode .translate() does not support the deletechars
2237 parameter; instead a mapping to None will cause characters
2238 to be deleted. */
2239 if (delobj != NULL) {
2240 PyErr_SetString(PyExc_TypeError,
2241 "deletions are implemented differently for unicode");
2242 return NULL;
2243 }
2244 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2245 }
2246 #endif
2247 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2248 return NULL;
2249
2250 if (tablen != 256) {
2251 PyErr_SetString(PyExc_ValueError,
2252 "translation table must be 256 characters long");
2253 return NULL;
2254 }
2255
2256 if (delobj != NULL) {
2257 if (PyString_Check(delobj)) {
2258 del_table = PyString_AS_STRING(delobj);
2259 dellen = PyString_GET_SIZE(delobj);
2260 }
2261 #ifdef Py_USING_UNICODE
2262 else if (PyUnicode_Check(delobj)) {
2263 PyErr_SetString(PyExc_TypeError,
2264 "deletions are implemented differently for unicode");
2265 return NULL;
2266 }
2267 #endif
2268 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2269 return NULL;
2270 }
2271 else {
2272 del_table = NULL;
2273 dellen = 0;
2274 }
2275
2276 inlen = PyString_GET_SIZE(input_obj);
2277 result = PyString_FromStringAndSize((char *)NULL, inlen);
2278 if (result == NULL)
2279 return NULL;
2280 output_start = output = PyString_AsString(result);
2281 input = PyString_AS_STRING(input_obj);
2282
2283 if (dellen == 0 && table != NULL) {
2284 /* If no deletions are required, use faster code */
2285 for (i = inlen; --i >= 0; ) {
2286 c = Py_CHARMASK(*input++);
2287 if (Py_CHARMASK((*output++ = table[c])) != c)
2288 changed = 1;
2289 }
2290 if (changed || !PyString_CheckExact(input_obj))
2291 return result;
2292 Py_DECREF(result);
2293 Py_INCREF(input_obj);
2294 return input_obj;
2295 }
2296
2297 if (table == NULL) {
2298 for (i = 0; i < 256; i++)
2299 trans_table[i] = Py_CHARMASK(i);
2300 } else {
2301 for (i = 0; i < 256; i++)
2302 trans_table[i] = Py_CHARMASK(table[i]);
2303 }
2304
2305 for (i = 0; i < dellen; i++)
2306 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2307
2308 for (i = inlen; --i >= 0; ) {
2309 c = Py_CHARMASK(*input++);
2310 if (trans_table[c] != -1)
2311 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2312 continue;
2313 changed = 1;
2314 }
2315 if (!changed && PyString_CheckExact(input_obj)) {
2316 Py_DECREF(result);
2317 Py_INCREF(input_obj);
2318 return input_obj;
2319 }
2320 /* Fix the size of the resulting string */
2321 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2322 return NULL;
2323 return result;
2324 }
2325
2326
2327 /* find and count characters and substrings */
2328
2329 #define findchar(target, target_len, c) \
2330 ((char *)memchr((const void *)(target), c, target_len))
2331
2332 /* String ops must return a string. */
2333 /* If the object is subclass of string, create a copy */
2334 Py_LOCAL(PyStringObject *)
return_self(PyStringObject * self)2335 return_self(PyStringObject *self)
2336 {
2337 if (PyString_CheckExact(self)) {
2338 Py_INCREF(self);
2339 return self;
2340 }
2341 return (PyStringObject *)PyString_FromStringAndSize(
2342 PyString_AS_STRING(self),
2343 PyString_GET_SIZE(self));
2344 }
2345
2346 Py_LOCAL_INLINE(Py_ssize_t)
countchar(const char * target,Py_ssize_t target_len,char c,Py_ssize_t maxcount)2347 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
2348 {
2349 Py_ssize_t count=0;
2350 const char *start=target;
2351 const char *end=target+target_len;
2352
2353 while ( (start=findchar(start, end-start, c)) != NULL ) {
2354 count++;
2355 if (count >= maxcount)
2356 break;
2357 start += 1;
2358 }
2359 return count;
2360 }
2361
2362
2363 /* Algorithms for different cases of string replacement */
2364
2365 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2366 Py_LOCAL(PyStringObject *)
replace_interleave(PyStringObject * self,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2367 replace_interleave(PyStringObject *self,
2368 const char *to_s, Py_ssize_t to_len,
2369 Py_ssize_t maxcount)
2370 {
2371 char *self_s, *result_s;
2372 Py_ssize_t self_len, result_len;
2373 Py_ssize_t count, i;
2374 PyStringObject *result;
2375
2376 self_len = PyString_GET_SIZE(self);
2377
2378 /* 1 at the end plus 1 after every character;
2379 count = min(maxcount, self_len + 1) */
2380 if (maxcount <= self_len) {
2381 count = maxcount;
2382 }
2383 else {
2384 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2385 count = self_len + 1;
2386 }
2387
2388 /* Check for overflow */
2389 /* result_len = count * to_len + self_len; */
2390 assert(count > 0);
2391 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
2392 PyErr_SetString(PyExc_OverflowError,
2393 "replace string is too long");
2394 return NULL;
2395 }
2396 result_len = count * to_len + self_len;
2397 if (! (result = (PyStringObject *)
2398 PyString_FromStringAndSize(NULL, result_len)) )
2399 return NULL;
2400
2401 self_s = PyString_AS_STRING(self);
2402 result_s = PyString_AS_STRING(result);
2403
2404 /* TODO: special case single character, which doesn't need memcpy */
2405
2406 /* Lay the first one down (guaranteed this will occur) */
2407 Py_MEMCPY(result_s, to_s, to_len);
2408 result_s += to_len;
2409 count -= 1;
2410
2411 for (i=0; i<count; i++) {
2412 *result_s++ = *self_s++;
2413 Py_MEMCPY(result_s, to_s, to_len);
2414 result_s += to_len;
2415 }
2416
2417 /* Copy the rest of the original string */
2418 Py_MEMCPY(result_s, self_s, self_len-i);
2419
2420 return result;
2421 }
2422
2423 /* Special case for deleting a single character */
2424 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2425 Py_LOCAL(PyStringObject *)
replace_delete_single_character(PyStringObject * self,char from_c,Py_ssize_t maxcount)2426 replace_delete_single_character(PyStringObject *self,
2427 char from_c, Py_ssize_t maxcount)
2428 {
2429 char *self_s, *result_s;
2430 char *start, *next, *end;
2431 Py_ssize_t self_len, result_len;
2432 Py_ssize_t count;
2433 PyStringObject *result;
2434
2435 self_len = PyString_GET_SIZE(self);
2436 self_s = PyString_AS_STRING(self);
2437
2438 count = countchar(self_s, self_len, from_c, maxcount);
2439 if (count == 0) {
2440 return return_self(self);
2441 }
2442
2443 result_len = self_len - count; /* from_len == 1 */
2444 assert(result_len>=0);
2445
2446 if ( (result = (PyStringObject *)
2447 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2448 return NULL;
2449 result_s = PyString_AS_STRING(result);
2450
2451 start = self_s;
2452 end = self_s + self_len;
2453 while (count-- > 0) {
2454 next = findchar(start, end-start, from_c);
2455 if (next == NULL)
2456 break;
2457 Py_MEMCPY(result_s, start, next-start);
2458 result_s += (next-start);
2459 start = next+1;
2460 }
2461 Py_MEMCPY(result_s, start, end-start);
2462
2463 return result;
2464 }
2465
2466 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2467
2468 Py_LOCAL(PyStringObject *)
replace_delete_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,Py_ssize_t maxcount)2469 replace_delete_substring(PyStringObject *self,
2470 const char *from_s, Py_ssize_t from_len,
2471 Py_ssize_t maxcount) {
2472 char *self_s, *result_s;
2473 char *start, *next, *end;
2474 Py_ssize_t self_len, result_len;
2475 Py_ssize_t count, offset;
2476 PyStringObject *result;
2477
2478 self_len = PyString_GET_SIZE(self);
2479 self_s = PyString_AS_STRING(self);
2480
2481 count = stringlib_count(self_s, self_len,
2482 from_s, from_len,
2483 maxcount);
2484
2485 if (count == 0) {
2486 /* no matches */
2487 return return_self(self);
2488 }
2489
2490 result_len = self_len - (count * from_len);
2491 assert (result_len>=0);
2492
2493 if ( (result = (PyStringObject *)
2494 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2495 return NULL;
2496
2497 result_s = PyString_AS_STRING(result);
2498
2499 start = self_s;
2500 end = self_s + self_len;
2501 while (count-- > 0) {
2502 offset = stringlib_find(start, end-start,
2503 from_s, from_len,
2504 0);
2505 if (offset == -1)
2506 break;
2507 next = start + offset;
2508
2509 Py_MEMCPY(result_s, start, next-start);
2510
2511 result_s += (next-start);
2512 start = next+from_len;
2513 }
2514 Py_MEMCPY(result_s, start, end-start);
2515 return result;
2516 }
2517
2518 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2519 Py_LOCAL(PyStringObject *)
replace_single_character_in_place(PyStringObject * self,char from_c,char to_c,Py_ssize_t maxcount)2520 replace_single_character_in_place(PyStringObject *self,
2521 char from_c, char to_c,
2522 Py_ssize_t maxcount)
2523 {
2524 char *self_s, *result_s, *start, *end, *next;
2525 Py_ssize_t self_len;
2526 PyStringObject *result;
2527
2528 /* The result string will be the same size */
2529 self_s = PyString_AS_STRING(self);
2530 self_len = PyString_GET_SIZE(self);
2531
2532 next = findchar(self_s, self_len, from_c);
2533
2534 if (next == NULL) {
2535 /* No matches; return the original string */
2536 return return_self(self);
2537 }
2538
2539 /* Need to make a new string */
2540 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2541 if (result == NULL)
2542 return NULL;
2543 result_s = PyString_AS_STRING(result);
2544 Py_MEMCPY(result_s, self_s, self_len);
2545
2546 /* change everything in-place, starting with this one */
2547 start = result_s + (next-self_s);
2548 *start = to_c;
2549 start++;
2550 end = result_s + self_len;
2551
2552 while (--maxcount > 0) {
2553 next = findchar(start, end-start, from_c);
2554 if (next == NULL)
2555 break;
2556 *next = to_c;
2557 start = next+1;
2558 }
2559
2560 return result;
2561 }
2562
2563 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2564 Py_LOCAL(PyStringObject *)
replace_substring_in_place(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2565 replace_substring_in_place(PyStringObject *self,
2566 const char *from_s, Py_ssize_t from_len,
2567 const char *to_s, Py_ssize_t to_len,
2568 Py_ssize_t maxcount)
2569 {
2570 char *result_s, *start, *end;
2571 char *self_s;
2572 Py_ssize_t self_len, offset;
2573 PyStringObject *result;
2574
2575 /* The result string will be the same size */
2576
2577 self_s = PyString_AS_STRING(self);
2578 self_len = PyString_GET_SIZE(self);
2579
2580 offset = stringlib_find(self_s, self_len,
2581 from_s, from_len,
2582 0);
2583 if (offset == -1) {
2584 /* No matches; return the original string */
2585 return return_self(self);
2586 }
2587
2588 /* Need to make a new string */
2589 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2590 if (result == NULL)
2591 return NULL;
2592 result_s = PyString_AS_STRING(result);
2593 Py_MEMCPY(result_s, self_s, self_len);
2594
2595 /* change everything in-place, starting with this one */
2596 start = result_s + offset;
2597 Py_MEMCPY(start, to_s, from_len);
2598 start += from_len;
2599 end = result_s + self_len;
2600
2601 while ( --maxcount > 0) {
2602 offset = stringlib_find(start, end-start,
2603 from_s, from_len,
2604 0);
2605 if (offset==-1)
2606 break;
2607 Py_MEMCPY(start+offset, to_s, from_len);
2608 start += offset+from_len;
2609 }
2610
2611 return result;
2612 }
2613
2614 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2615 Py_LOCAL(PyStringObject *)
replace_single_character(PyStringObject * self,char from_c,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2616 replace_single_character(PyStringObject *self,
2617 char from_c,
2618 const char *to_s, Py_ssize_t to_len,
2619 Py_ssize_t maxcount)
2620 {
2621 char *self_s, *result_s;
2622 char *start, *next, *end;
2623 Py_ssize_t self_len, result_len;
2624 Py_ssize_t count;
2625 PyStringObject *result;
2626
2627 self_s = PyString_AS_STRING(self);
2628 self_len = PyString_GET_SIZE(self);
2629
2630 count = countchar(self_s, self_len, from_c, maxcount);
2631 if (count == 0) {
2632 /* no matches, return unchanged */
2633 return return_self(self);
2634 }
2635
2636 /* use the difference between current and new, hence the "-1" */
2637 /* result_len = self_len + count * (to_len-1) */
2638 assert(count > 0);
2639 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
2640 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2641 return NULL;
2642 }
2643 result_len = self_len + count * (to_len - 1);
2644
2645 if ( (result = (PyStringObject *)
2646 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2647 return NULL;
2648 result_s = PyString_AS_STRING(result);
2649
2650 start = self_s;
2651 end = self_s + self_len;
2652 while (count-- > 0) {
2653 next = findchar(start, end-start, from_c);
2654 if (next == NULL)
2655 break;
2656
2657 if (next == start) {
2658 /* replace with the 'to' */
2659 Py_MEMCPY(result_s, to_s, to_len);
2660 result_s += to_len;
2661 start += 1;
2662 } else {
2663 /* copy the unchanged old then the 'to' */
2664 Py_MEMCPY(result_s, start, next-start);
2665 result_s += (next-start);
2666 Py_MEMCPY(result_s, to_s, to_len);
2667 result_s += to_len;
2668 start = next+1;
2669 }
2670 }
2671 /* Copy the remainder of the remaining string */
2672 Py_MEMCPY(result_s, start, end-start);
2673
2674 return result;
2675 }
2676
2677 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2678 Py_LOCAL(PyStringObject *)
replace_substring(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2679 replace_substring(PyStringObject *self,
2680 const char *from_s, Py_ssize_t from_len,
2681 const char *to_s, Py_ssize_t to_len,
2682 Py_ssize_t maxcount) {
2683 char *self_s, *result_s;
2684 char *start, *next, *end;
2685 Py_ssize_t self_len, result_len;
2686 Py_ssize_t count, offset;
2687 PyStringObject *result;
2688
2689 self_s = PyString_AS_STRING(self);
2690 self_len = PyString_GET_SIZE(self);
2691
2692 count = stringlib_count(self_s, self_len,
2693 from_s, from_len,
2694 maxcount);
2695
2696 if (count == 0) {
2697 /* no matches, return unchanged */
2698 return return_self(self);
2699 }
2700
2701 /* Check for overflow */
2702 /* result_len = self_len + count * (to_len-from_len) */
2703 assert(count > 0);
2704 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
2705 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2706 return NULL;
2707 }
2708 result_len = self_len + count * (to_len - from_len);
2709
2710 if ( (result = (PyStringObject *)
2711 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2712 return NULL;
2713 result_s = PyString_AS_STRING(result);
2714
2715 start = self_s;
2716 end = self_s + self_len;
2717 while (count-- > 0) {
2718 offset = stringlib_find(start, end-start,
2719 from_s, from_len,
2720 0);
2721 if (offset == -1)
2722 break;
2723 next = start+offset;
2724 if (next == start) {
2725 /* replace with the 'to' */
2726 Py_MEMCPY(result_s, to_s, to_len);
2727 result_s += to_len;
2728 start += from_len;
2729 } else {
2730 /* copy the unchanged old then the 'to' */
2731 Py_MEMCPY(result_s, start, next-start);
2732 result_s += (next-start);
2733 Py_MEMCPY(result_s, to_s, to_len);
2734 result_s += to_len;
2735 start = next+from_len;
2736 }
2737 }
2738 /* Copy the remainder of the remaining string */
2739 Py_MEMCPY(result_s, start, end-start);
2740
2741 return result;
2742 }
2743
2744
2745 Py_LOCAL(PyStringObject *)
replace(PyStringObject * self,const char * from_s,Py_ssize_t from_len,const char * to_s,Py_ssize_t to_len,Py_ssize_t maxcount)2746 replace(PyStringObject *self,
2747 const char *from_s, Py_ssize_t from_len,
2748 const char *to_s, Py_ssize_t to_len,
2749 Py_ssize_t maxcount)
2750 {
2751 if (maxcount < 0) {
2752 maxcount = PY_SSIZE_T_MAX;
2753 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2754 /* nothing to do; return the original string */
2755 return return_self(self);
2756 }
2757
2758 if (maxcount == 0 ||
2759 (from_len == 0 && to_len == 0)) {
2760 /* nothing to do; return the original string */
2761 return return_self(self);
2762 }
2763
2764 /* Handle zero-length special cases */
2765
2766 if (from_len == 0) {
2767 /* insert the 'to' string everywhere. */
2768 /* >>> "Python".replace("", ".") */
2769 /* '.P.y.t.h.o.n.' */
2770 return replace_interleave(self, to_s, to_len, maxcount);
2771 }
2772
2773 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2774 /* point for an empty self string to generate a non-empty string */
2775 /* Special case so the remaining code always gets a non-empty string */
2776 if (PyString_GET_SIZE(self) == 0) {
2777 return return_self(self);
2778 }
2779
2780 if (to_len == 0) {
2781 /* delete all occurrences of 'from' string */
2782 if (from_len == 1) {
2783 return replace_delete_single_character(
2784 self, from_s[0], maxcount);
2785 } else {
2786 return replace_delete_substring(self, from_s, from_len, maxcount);
2787 }
2788 }
2789
2790 /* Handle special case where both strings have the same length */
2791
2792 if (from_len == to_len) {
2793 if (from_len == 1) {
2794 return replace_single_character_in_place(
2795 self,
2796 from_s[0],
2797 to_s[0],
2798 maxcount);
2799 } else {
2800 return replace_substring_in_place(
2801 self, from_s, from_len, to_s, to_len, maxcount);
2802 }
2803 }
2804
2805 /* Otherwise use the more generic algorithms */
2806 if (from_len == 1) {
2807 return replace_single_character(self, from_s[0],
2808 to_s, to_len, maxcount);
2809 } else {
2810 /* len('from')>=2, len('to')>=1 */
2811 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2812 }
2813 }
2814
2815 PyDoc_STRVAR(replace__doc__,
2816 "S.replace(old, new[, count]) -> string\n\
2817 \n\
2818 Return a copy of string S with all occurrences of substring\n\
2819 old replaced by new. If the optional argument count is\n\
2820 given, only the first count occurrences are replaced.");
2821
2822 static PyObject *
string_replace(PyStringObject * self,PyObject * args)2823 string_replace(PyStringObject *self, PyObject *args)
2824 {
2825 Py_ssize_t count = -1;
2826 PyObject *from, *to;
2827 const char *from_s, *to_s;
2828 Py_ssize_t from_len, to_len;
2829
2830 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2831 return NULL;
2832
2833 if (PyString_Check(from)) {
2834 from_s = PyString_AS_STRING(from);
2835 from_len = PyString_GET_SIZE(from);
2836 }
2837 #ifdef Py_USING_UNICODE
2838 if (PyUnicode_Check(from))
2839 return PyUnicode_Replace((PyObject *)self,
2840 from, to, count);
2841 #endif
2842 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2843 return NULL;
2844
2845 if (PyString_Check(to)) {
2846 to_s = PyString_AS_STRING(to);
2847 to_len = PyString_GET_SIZE(to);
2848 }
2849 #ifdef Py_USING_UNICODE
2850 else if (PyUnicode_Check(to))
2851 return PyUnicode_Replace((PyObject *)self,
2852 from, to, count);
2853 #endif
2854 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2855 return NULL;
2856
2857 return (PyObject *)replace((PyStringObject *) self,
2858 from_s, from_len,
2859 to_s, to_len, count);
2860 }
2861
2862 /** End DALKE **/
2863
2864 /* Matches the end (direction >= 0) or start (direction < 0) of self
2865 * against substr, using the start and end arguments. Returns
2866 * -1 on error, 0 if not found and 1 if found.
2867 */
2868 Py_LOCAL(int)
_string_tailmatch(PyStringObject * self,PyObject * substr,Py_ssize_t start,Py_ssize_t end,int direction)2869 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2870 Py_ssize_t end, int direction)
2871 {
2872 Py_ssize_t len = PyString_GET_SIZE(self);
2873 Py_ssize_t slen;
2874 const char* sub;
2875 const char* str;
2876
2877 if (PyString_Check(substr)) {
2878 sub = PyString_AS_STRING(substr);
2879 slen = PyString_GET_SIZE(substr);
2880 }
2881 #ifdef Py_USING_UNICODE
2882 else if (PyUnicode_Check(substr))
2883 return PyUnicode_Tailmatch((PyObject *)self,
2884 substr, start, end, direction);
2885 #endif
2886 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2887 return -1;
2888 str = PyString_AS_STRING(self);
2889
2890 ADJUST_INDICES(start, end, len);
2891
2892 if (direction < 0) {
2893 /* startswith */
2894 if (start+slen > len)
2895 return 0;
2896 } else {
2897 /* endswith */
2898 if (end-start < slen || start > len)
2899 return 0;
2900
2901 if (end-slen > start)
2902 start = end - slen;
2903 }
2904 if (end-start >= slen)
2905 return ! memcmp(str+start, sub, slen);
2906 return 0;
2907 }
2908
2909
2910 PyDoc_STRVAR(startswith__doc__,
2911 "S.startswith(prefix[, start[, end]]) -> bool\n\
2912 \n\
2913 Return True if S starts with the specified prefix, False otherwise.\n\
2914 With optional start, test S beginning at that position.\n\
2915 With optional end, stop comparing S at that position.\n\
2916 prefix can also be a tuple of strings to try.");
2917
2918 static PyObject *
string_startswith(PyStringObject * self,PyObject * args)2919 string_startswith(PyStringObject *self, PyObject *args)
2920 {
2921 Py_ssize_t start = 0;
2922 Py_ssize_t end = PY_SSIZE_T_MAX;
2923 PyObject *subobj;
2924 int result;
2925
2926 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2927 return NULL;
2928 if (PyTuple_Check(subobj)) {
2929 Py_ssize_t i;
2930 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2931 result = _string_tailmatch(self,
2932 PyTuple_GET_ITEM(subobj, i),
2933 start, end, -1);
2934 if (result == -1)
2935 return NULL;
2936 else if (result) {
2937 Py_RETURN_TRUE;
2938 }
2939 }
2940 Py_RETURN_FALSE;
2941 }
2942 result = _string_tailmatch(self, subobj, start, end, -1);
2943 if (result == -1) {
2944 if (PyErr_ExceptionMatches(PyExc_TypeError))
2945 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2946 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2947 return NULL;
2948 }
2949 else
2950 return PyBool_FromLong(result);
2951 }
2952
2953
2954 PyDoc_STRVAR(endswith__doc__,
2955 "S.endswith(suffix[, start[, end]]) -> bool\n\
2956 \n\
2957 Return True if S ends with the specified suffix, False otherwise.\n\
2958 With optional start, test S beginning at that position.\n\
2959 With optional end, stop comparing S at that position.\n\
2960 suffix can also be a tuple of strings to try.");
2961
2962 static PyObject *
string_endswith(PyStringObject * self,PyObject * args)2963 string_endswith(PyStringObject *self, PyObject *args)
2964 {
2965 Py_ssize_t start = 0;
2966 Py_ssize_t end = PY_SSIZE_T_MAX;
2967 PyObject *subobj;
2968 int result;
2969
2970 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2971 return NULL;
2972 if (PyTuple_Check(subobj)) {
2973 Py_ssize_t i;
2974 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2975 result = _string_tailmatch(self,
2976 PyTuple_GET_ITEM(subobj, i),
2977 start, end, +1);
2978 if (result == -1)
2979 return NULL;
2980 else if (result) {
2981 Py_RETURN_TRUE;
2982 }
2983 }
2984 Py_RETURN_FALSE;
2985 }
2986 result = _string_tailmatch(self, subobj, start, end, +1);
2987 if (result == -1) {
2988 if (PyErr_ExceptionMatches(PyExc_TypeError))
2989 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2990 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2991 return NULL;
2992 }
2993 else
2994 return PyBool_FromLong(result);
2995 }
2996
2997
2998 PyDoc_STRVAR(encode__doc__,
2999 "S.encode([encoding[,errors]]) -> object\n\
3000 \n\
3001 Encodes S using the codec registered for encoding. encoding defaults\n\
3002 to the default encoding. errors may be given to set a different error\n\
3003 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3004 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3005 'xmlcharrefreplace' as well as any other name registered with\n\
3006 codecs.register_error that is able to handle UnicodeEncodeErrors.");
3007
3008 static PyObject *
string_encode(PyStringObject * self,PyObject * args,PyObject * kwargs)3009 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3010 {
3011 static char *kwlist[] = {"encoding", "errors", 0};
3012 char *encoding = NULL;
3013 char *errors = NULL;
3014 PyObject *v;
3015
3016 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3017 kwlist, &encoding, &errors))
3018 return NULL;
3019 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3020 if (v == NULL)
3021 goto onError;
3022 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3023 PyErr_Format(PyExc_TypeError,
3024 "encoder did not return a string/unicode object "
3025 "(type=%.400s)",
3026 Py_TYPE(v)->tp_name);
3027 Py_DECREF(v);
3028 return NULL;
3029 }
3030 return v;
3031
3032 onError:
3033 return NULL;
3034 }
3035
3036
3037 PyDoc_STRVAR(decode__doc__,
3038 "S.decode([encoding[,errors]]) -> object\n\
3039 \n\
3040 Decodes S using the codec registered for encoding. encoding defaults\n\
3041 to the default encoding. errors may be given to set a different error\n\
3042 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3043 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3044 as well as any other name registered with codecs.register_error that is\n\
3045 able to handle UnicodeDecodeErrors.");
3046
3047 static PyObject *
string_decode(PyStringObject * self,PyObject * args,PyObject * kwargs)3048 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3049 {
3050 static char *kwlist[] = {"encoding", "errors", 0};
3051 char *encoding = NULL;
3052 char *errors = NULL;
3053 PyObject *v;
3054
3055 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3056 kwlist, &encoding, &errors))
3057 return NULL;
3058 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3059 if (v == NULL)
3060 goto onError;
3061 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3062 PyErr_Format(PyExc_TypeError,
3063 "decoder did not return a string/unicode object "
3064 "(type=%.400s)",
3065 Py_TYPE(v)->tp_name);
3066 Py_DECREF(v);
3067 return NULL;
3068 }
3069 return v;
3070
3071 onError:
3072 return NULL;
3073 }
3074
3075
3076 PyDoc_STRVAR(expandtabs__doc__,
3077 "S.expandtabs([tabsize]) -> string\n\
3078 \n\
3079 Return a copy of S where all tab characters are expanded using spaces.\n\
3080 If tabsize is not given, a tab size of 8 characters is assumed.");
3081
3082 static PyObject*
string_expandtabs(PyStringObject * self,PyObject * args)3083 string_expandtabs(PyStringObject *self, PyObject *args)
3084 {
3085 const char *e, *p, *qe;
3086 char *q;
3087 Py_ssize_t i, j, incr;
3088 PyObject *u;
3089 int tabsize = 8;
3090
3091 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3092 return NULL;
3093
3094 /* First pass: determine size of output string */
3095 i = 0; /* chars up to and including most recent \n or \r */
3096 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3097 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3098 for (p = PyString_AS_STRING(self); p < e; p++) {
3099 if (*p == '\t') {
3100 if (tabsize > 0) {
3101 incr = tabsize - (j % tabsize);
3102 if (j > PY_SSIZE_T_MAX - incr)
3103 goto overflow1;
3104 j += incr;
3105 }
3106 }
3107 else {
3108 if (j > PY_SSIZE_T_MAX - 1)
3109 goto overflow1;
3110 j++;
3111 if (*p == '\n' || *p == '\r') {
3112 if (i > PY_SSIZE_T_MAX - j)
3113 goto overflow1;
3114 i += j;
3115 j = 0;
3116 }
3117 }
3118 }
3119
3120 if (i > PY_SSIZE_T_MAX - j)
3121 goto overflow1;
3122
3123 /* Second pass: create output string and fill it */
3124 u = PyString_FromStringAndSize(NULL, i + j);
3125 if (!u)
3126 return NULL;
3127
3128 j = 0; /* same as in first pass */
3129 q = PyString_AS_STRING(u); /* next output char */
3130 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3131
3132 for (p = PyString_AS_STRING(self); p < e; p++) {
3133 if (*p == '\t') {
3134 if (tabsize > 0) {
3135 i = tabsize - (j % tabsize);
3136 j += i;
3137 while (i--) {
3138 if (q >= qe)
3139 goto overflow2;
3140 *q++ = ' ';
3141 }
3142 }
3143 }
3144 else {
3145 if (q >= qe)
3146 goto overflow2;
3147 *q++ = *p;
3148 j++;
3149 if (*p == '\n' || *p == '\r')
3150 j = 0;
3151 }
3152 }
3153
3154 return u;
3155
3156 overflow2:
3157 Py_DECREF(u);
3158 overflow1:
3159 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3160 return NULL;
3161 }
3162
3163 Py_LOCAL_INLINE(PyObject *)
pad(PyStringObject * self,Py_ssize_t left,Py_ssize_t right,char fill)3164 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3165 {
3166 PyObject *u;
3167
3168 if (left < 0)
3169 left = 0;
3170 if (right < 0)
3171 right = 0;
3172
3173 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3174 Py_INCREF(self);
3175 return (PyObject *)self;
3176 }
3177
3178 u = PyString_FromStringAndSize(NULL,
3179 left + PyString_GET_SIZE(self) + right);
3180 if (u) {
3181 if (left)
3182 memset(PyString_AS_STRING(u), fill, left);
3183 Py_MEMCPY(PyString_AS_STRING(u) + left,
3184 PyString_AS_STRING(self),
3185 PyString_GET_SIZE(self));
3186 if (right)
3187 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3188 fill, right);
3189 }
3190
3191 return u;
3192 }
3193
3194 PyDoc_STRVAR(ljust__doc__,
3195 "S.ljust(width[, fillchar]) -> string\n"
3196 "\n"
3197 "Return S left-justified in a string of length width. Padding is\n"
3198 "done using the specified fill character (default is a space).");
3199
3200 static PyObject *
string_ljust(PyStringObject * self,PyObject * args)3201 string_ljust(PyStringObject *self, PyObject *args)
3202 {
3203 Py_ssize_t width;
3204 char fillchar = ' ';
3205
3206 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3207 return NULL;
3208
3209 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3210 Py_INCREF(self);
3211 return (PyObject*) self;
3212 }
3213
3214 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3215 }
3216
3217
3218 PyDoc_STRVAR(rjust__doc__,
3219 "S.rjust(width[, fillchar]) -> string\n"
3220 "\n"
3221 "Return S right-justified in a string of length width. Padding is\n"
3222 "done using the specified fill character (default is a space)");
3223
3224 static PyObject *
string_rjust(PyStringObject * self,PyObject * args)3225 string_rjust(PyStringObject *self, PyObject *args)
3226 {
3227 Py_ssize_t width;
3228 char fillchar = ' ';
3229
3230 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3231 return NULL;
3232
3233 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3234 Py_INCREF(self);
3235 return (PyObject*) self;
3236 }
3237
3238 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3239 }
3240
3241
3242 PyDoc_STRVAR(center__doc__,
3243 "S.center(width[, fillchar]) -> string\n"
3244 "\n"
3245 "Return S centered in a string of length width. Padding is\n"
3246 "done using the specified fill character (default is a space)");
3247
3248 static PyObject *
string_center(PyStringObject * self,PyObject * args)3249 string_center(PyStringObject *self, PyObject *args)
3250 {
3251 Py_ssize_t marg, left;
3252 Py_ssize_t width;
3253 char fillchar = ' ';
3254
3255 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3256 return NULL;
3257
3258 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3259 Py_INCREF(self);
3260 return (PyObject*) self;
3261 }
3262
3263 marg = width - PyString_GET_SIZE(self);
3264 left = marg / 2 + (marg & width & 1);
3265
3266 return pad(self, left, marg - left, fillchar);
3267 }
3268
3269 PyDoc_STRVAR(zfill__doc__,
3270 "S.zfill(width) -> string\n"
3271 "\n"
3272 "Pad a numeric string S with zeros on the left, to fill a field\n"
3273 "of the specified width. The string S is never truncated.");
3274
3275 static PyObject *
string_zfill(PyStringObject * self,PyObject * args)3276 string_zfill(PyStringObject *self, PyObject *args)
3277 {
3278 Py_ssize_t fill;
3279 PyObject *s;
3280 char *p;
3281 Py_ssize_t width;
3282
3283 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3284 return NULL;
3285
3286 if (PyString_GET_SIZE(self) >= width) {
3287 if (PyString_CheckExact(self)) {
3288 Py_INCREF(self);
3289 return (PyObject*) self;
3290 }
3291 else
3292 return PyString_FromStringAndSize(
3293 PyString_AS_STRING(self),
3294 PyString_GET_SIZE(self)
3295 );
3296 }
3297
3298 fill = width - PyString_GET_SIZE(self);
3299
3300 s = pad(self, fill, 0, '0');
3301
3302 if (s == NULL)
3303 return NULL;
3304
3305 p = PyString_AS_STRING(s);
3306 if (p[fill] == '+' || p[fill] == '-') {
3307 /* move sign to beginning of string */
3308 p[0] = p[fill];
3309 p[fill] = '0';
3310 }
3311
3312 return (PyObject*) s;
3313 }
3314
3315 PyDoc_STRVAR(isspace__doc__,
3316 "S.isspace() -> bool\n\
3317 \n\
3318 Return True if all characters in S are whitespace\n\
3319 and there is at least one character in S, False otherwise.");
3320
3321 static PyObject*
string_isspace(PyStringObject * self)3322 string_isspace(PyStringObject *self)
3323 {
3324 register const unsigned char *p
3325 = (unsigned char *) PyString_AS_STRING(self);
3326 register const unsigned char *e;
3327
3328 /* Shortcut for single character strings */
3329 if (PyString_GET_SIZE(self) == 1 &&
3330 isspace(*p))
3331 return PyBool_FromLong(1);
3332
3333 /* Special case for empty strings */
3334 if (PyString_GET_SIZE(self) == 0)
3335 return PyBool_FromLong(0);
3336
3337 e = p + PyString_GET_SIZE(self);
3338 for (; p < e; p++) {
3339 if (!isspace(*p))
3340 return PyBool_FromLong(0);
3341 }
3342 return PyBool_FromLong(1);
3343 }
3344
3345
3346 PyDoc_STRVAR(isalpha__doc__,
3347 "S.isalpha() -> bool\n\
3348 \n\
3349 Return True if all characters in S are alphabetic\n\
3350 and there is at least one character in S, False otherwise.");
3351
3352 static PyObject*
string_isalpha(PyStringObject * self)3353 string_isalpha(PyStringObject *self)
3354 {
3355 register const unsigned char *p
3356 = (unsigned char *) PyString_AS_STRING(self);
3357 register const unsigned char *e;
3358
3359 /* Shortcut for single character strings */
3360 if (PyString_GET_SIZE(self) == 1 &&
3361 isalpha(*p))
3362 return PyBool_FromLong(1);
3363
3364 /* Special case for empty strings */
3365 if (PyString_GET_SIZE(self) == 0)
3366 return PyBool_FromLong(0);
3367
3368 e = p + PyString_GET_SIZE(self);
3369 for (; p < e; p++) {
3370 if (!isalpha(*p))
3371 return PyBool_FromLong(0);
3372 }
3373 return PyBool_FromLong(1);
3374 }
3375
3376
3377 PyDoc_STRVAR(isalnum__doc__,
3378 "S.isalnum() -> bool\n\
3379 \n\
3380 Return True if all characters in S are alphanumeric\n\
3381 and there is at least one character in S, False otherwise.");
3382
3383 static PyObject*
string_isalnum(PyStringObject * self)3384 string_isalnum(PyStringObject *self)
3385 {
3386 register const unsigned char *p
3387 = (unsigned char *) PyString_AS_STRING(self);
3388 register const unsigned char *e;
3389
3390 /* Shortcut for single character strings */
3391 if (PyString_GET_SIZE(self) == 1 &&
3392 isalnum(*p))
3393 return PyBool_FromLong(1);
3394
3395 /* Special case for empty strings */
3396 if (PyString_GET_SIZE(self) == 0)
3397 return PyBool_FromLong(0);
3398
3399 e = p + PyString_GET_SIZE(self);
3400 for (; p < e; p++) {
3401 if (!isalnum(*p))
3402 return PyBool_FromLong(0);
3403 }
3404 return PyBool_FromLong(1);
3405 }
3406
3407
3408 PyDoc_STRVAR(isdigit__doc__,
3409 "S.isdigit() -> bool\n\
3410 \n\
3411 Return True if all characters in S are digits\n\
3412 and there is at least one character in S, False otherwise.");
3413
3414 static PyObject*
string_isdigit(PyStringObject * self)3415 string_isdigit(PyStringObject *self)
3416 {
3417 register const unsigned char *p
3418 = (unsigned char *) PyString_AS_STRING(self);
3419 register const unsigned char *e;
3420
3421 /* Shortcut for single character strings */
3422 if (PyString_GET_SIZE(self) == 1 &&
3423 isdigit(*p))
3424 return PyBool_FromLong(1);
3425
3426 /* Special case for empty strings */
3427 if (PyString_GET_SIZE(self) == 0)
3428 return PyBool_FromLong(0);
3429
3430 e = p + PyString_GET_SIZE(self);
3431 for (; p < e; p++) {
3432 if (!isdigit(*p))
3433 return PyBool_FromLong(0);
3434 }
3435 return PyBool_FromLong(1);
3436 }
3437
3438
3439 PyDoc_STRVAR(islower__doc__,
3440 "S.islower() -> bool\n\
3441 \n\
3442 Return True if all cased characters in S are lowercase and there is\n\
3443 at least one cased character in S, False otherwise.");
3444
3445 static PyObject*
string_islower(PyStringObject * self)3446 string_islower(PyStringObject *self)
3447 {
3448 register const unsigned char *p
3449 = (unsigned char *) PyString_AS_STRING(self);
3450 register const unsigned char *e;
3451 int cased;
3452
3453 /* Shortcut for single character strings */
3454 if (PyString_GET_SIZE(self) == 1)
3455 return PyBool_FromLong(islower(*p) != 0);
3456
3457 /* Special case for empty strings */
3458 if (PyString_GET_SIZE(self) == 0)
3459 return PyBool_FromLong(0);
3460
3461 e = p + PyString_GET_SIZE(self);
3462 cased = 0;
3463 for (; p < e; p++) {
3464 if (isupper(*p))
3465 return PyBool_FromLong(0);
3466 else if (!cased && islower(*p))
3467 cased = 1;
3468 }
3469 return PyBool_FromLong(cased);
3470 }
3471
3472
3473 PyDoc_STRVAR(isupper__doc__,
3474 "S.isupper() -> bool\n\
3475 \n\
3476 Return True if all cased characters in S are uppercase and there is\n\
3477 at least one cased character in S, False otherwise.");
3478
3479 static PyObject*
string_isupper(PyStringObject * self)3480 string_isupper(PyStringObject *self)
3481 {
3482 register const unsigned char *p
3483 = (unsigned char *) PyString_AS_STRING(self);
3484 register const unsigned char *e;
3485 int cased;
3486
3487 /* Shortcut for single character strings */
3488 if (PyString_GET_SIZE(self) == 1)
3489 return PyBool_FromLong(isupper(*p) != 0);
3490
3491 /* Special case for empty strings */
3492 if (PyString_GET_SIZE(self) == 0)
3493 return PyBool_FromLong(0);
3494
3495 e = p + PyString_GET_SIZE(self);
3496 cased = 0;
3497 for (; p < e; p++) {
3498 if (islower(*p))
3499 return PyBool_FromLong(0);
3500 else if (!cased && isupper(*p))
3501 cased = 1;
3502 }
3503 return PyBool_FromLong(cased);
3504 }
3505
3506
3507 PyDoc_STRVAR(istitle__doc__,
3508 "S.istitle() -> bool\n\
3509 \n\
3510 Return True if S is a titlecased string and there is at least one\n\
3511 character in S, i.e. uppercase characters may only follow uncased\n\
3512 characters and lowercase characters only cased ones. Return False\n\
3513 otherwise.");
3514
3515 static PyObject*
string_istitle(PyStringObject * self,PyObject * uncased)3516 string_istitle(PyStringObject *self, PyObject *uncased)
3517 {
3518 register const unsigned char *p
3519 = (unsigned char *) PyString_AS_STRING(self);
3520 register const unsigned char *e;
3521 int cased, previous_is_cased;
3522
3523 /* Shortcut for single character strings */
3524 if (PyString_GET_SIZE(self) == 1)
3525 return PyBool_FromLong(isupper(*p) != 0);
3526
3527 /* Special case for empty strings */
3528 if (PyString_GET_SIZE(self) == 0)
3529 return PyBool_FromLong(0);
3530
3531 e = p + PyString_GET_SIZE(self);
3532 cased = 0;
3533 previous_is_cased = 0;
3534 for (; p < e; p++) {
3535 register const unsigned char ch = *p;
3536
3537 if (isupper(ch)) {
3538 if (previous_is_cased)
3539 return PyBool_FromLong(0);
3540 previous_is_cased = 1;
3541 cased = 1;
3542 }
3543 else if (islower(ch)) {
3544 if (!previous_is_cased)
3545 return PyBool_FromLong(0);
3546 previous_is_cased = 1;
3547 cased = 1;
3548 }
3549 else
3550 previous_is_cased = 0;
3551 }
3552 return PyBool_FromLong(cased);
3553 }
3554
3555
3556 PyDoc_STRVAR(splitlines__doc__,
3557 "S.splitlines(keepends=False) -> list of strings\n\
3558 \n\
3559 Return a list of the lines in S, breaking at line boundaries.\n\
3560 Line breaks are not included in the resulting list unless keepends\n\
3561 is given and true.");
3562
3563 static PyObject*
string_splitlines(PyStringObject * self,PyObject * args)3564 string_splitlines(PyStringObject *self, PyObject *args)
3565 {
3566 int keepends = 0;
3567
3568 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3569 return NULL;
3570
3571 return stringlib_splitlines(
3572 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3573 keepends
3574 );
3575 }
3576
3577 PyDoc_STRVAR(sizeof__doc__,
3578 "S.__sizeof__() -> size of S in memory, in bytes");
3579
3580 static PyObject *
string_sizeof(PyStringObject * v)3581 string_sizeof(PyStringObject *v)
3582 {
3583 Py_ssize_t res;
3584 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3585 return PyInt_FromSsize_t(res);
3586 }
3587
3588 static PyObject *
string_getnewargs(PyStringObject * v)3589 string_getnewargs(PyStringObject *v)
3590 {
3591 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3592 }
3593
3594
3595 #include "stringlib/string_format.h"
3596
3597 PyDoc_STRVAR(format__doc__,
3598 "S.format(*args, **kwargs) -> string\n\
3599 \n\
3600 Return a formatted version of S, using substitutions from args and kwargs.\n\
3601 The substitutions are identified by braces ('{' and '}').");
3602
3603 static PyObject *
string__format__(PyObject * self,PyObject * args)3604 string__format__(PyObject* self, PyObject* args)
3605 {
3606 PyObject *format_spec;
3607 PyObject *result = NULL;
3608 PyObject *tmp = NULL;
3609
3610 /* If 2.x, convert format_spec to the same type as value */
3611 /* This is to allow things like u''.format('') */
3612 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3613 goto done;
3614 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3615 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3616 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3617 goto done;
3618 }
3619 tmp = PyObject_Str(format_spec);
3620 if (tmp == NULL)
3621 goto done;
3622 format_spec = tmp;
3623
3624 result = _PyBytes_FormatAdvanced(self,
3625 PyString_AS_STRING(format_spec),
3626 PyString_GET_SIZE(format_spec));
3627 done:
3628 Py_XDECREF(tmp);
3629 return result;
3630 }
3631
3632 PyDoc_STRVAR(p_format__doc__,
3633 "S.__format__(format_spec) -> string\n\
3634 \n\
3635 Return a formatted version of S as described by format_spec.");
3636
3637
3638 static PyMethodDef
3639 string_methods[] = {
3640 /* Counterparts of the obsolete stropmodule functions; except
3641 string.maketrans(). */
3642 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3643 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3644 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3645 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3646 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3647 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3648 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3649 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3650 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3651 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3652 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3653 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3654 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3655 capitalize__doc__},
3656 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3657 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3658 endswith__doc__},
3659 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3660 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3661 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3662 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3663 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3664 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3665 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3666 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3667 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3668 rpartition__doc__},
3669 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3670 startswith__doc__},
3671 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3672 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3673 swapcase__doc__},
3674 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3675 translate__doc__},
3676 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3677 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3678 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3679 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3680 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3681 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3682 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3683 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3684 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3685 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3686 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3687 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3688 expandtabs__doc__},
3689 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3690 splitlines__doc__},
3691 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3692 sizeof__doc__},
3693 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3694 {NULL, NULL} /* sentinel */
3695 };
3696
3697 static PyObject *
3698 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3699
3700 static PyObject *
string_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3701 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3702 {
3703 PyObject *x = NULL;
3704 static char *kwlist[] = {"object", 0};
3705
3706 if (type != &PyString_Type)
3707 return str_subtype_new(type, args, kwds);
3708 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3709 return NULL;
3710 if (x == NULL)
3711 return PyString_FromString("");
3712 return PyObject_Str(x);
3713 }
3714
3715 static PyObject *
str_subtype_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3716 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3717 {
3718 PyObject *tmp, *pnew;
3719 Py_ssize_t n;
3720
3721 assert(PyType_IsSubtype(type, &PyString_Type));
3722 tmp = string_new(&PyString_Type, args, kwds);
3723 if (tmp == NULL)
3724 return NULL;
3725 assert(PyString_Check(tmp));
3726 n = PyString_GET_SIZE(tmp);
3727 pnew = type->tp_alloc(type, n);
3728 if (pnew != NULL) {
3729 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3730 ((PyStringObject *)pnew)->ob_shash =
3731 ((PyStringObject *)tmp)->ob_shash;
3732 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3733 }
3734 Py_DECREF(tmp);
3735 return pnew;
3736 }
3737
3738 static PyObject *
basestring_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3739 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3740 {
3741 PyErr_SetString(PyExc_TypeError,
3742 "The basestring type cannot be instantiated");
3743 return NULL;
3744 }
3745
3746 static PyObject *
string_mod(PyObject * v,PyObject * w)3747 string_mod(PyObject *v, PyObject *w)
3748 {
3749 if (!PyString_Check(v)) {
3750 Py_INCREF(Py_NotImplemented);
3751 return Py_NotImplemented;
3752 }
3753 return PyString_Format(v, w);
3754 }
3755
3756 PyDoc_STRVAR(basestring_doc,
3757 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3758
3759 static PyNumberMethods string_as_number = {
3760 0, /*nb_add*/
3761 0, /*nb_subtract*/
3762 0, /*nb_multiply*/
3763 0, /*nb_divide*/
3764 string_mod, /*nb_remainder*/
3765 };
3766
3767
3768 PyTypeObject PyBaseString_Type = {
3769 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3770 "basestring",
3771 0,
3772 0,
3773 0, /* tp_dealloc */
3774 0, /* tp_print */
3775 0, /* tp_getattr */
3776 0, /* tp_setattr */
3777 0, /* tp_compare */
3778 0, /* tp_repr */
3779 0, /* tp_as_number */
3780 0, /* tp_as_sequence */
3781 0, /* tp_as_mapping */
3782 0, /* tp_hash */
3783 0, /* tp_call */
3784 0, /* tp_str */
3785 0, /* tp_getattro */
3786 0, /* tp_setattro */
3787 0, /* tp_as_buffer */
3788 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3789 basestring_doc, /* tp_doc */
3790 0, /* tp_traverse */
3791 0, /* tp_clear */
3792 0, /* tp_richcompare */
3793 0, /* tp_weaklistoffset */
3794 0, /* tp_iter */
3795 0, /* tp_iternext */
3796 0, /* tp_methods */
3797 0, /* tp_members */
3798 0, /* tp_getset */
3799 &PyBaseObject_Type, /* tp_base */
3800 0, /* tp_dict */
3801 0, /* tp_descr_get */
3802 0, /* tp_descr_set */
3803 0, /* tp_dictoffset */
3804 0, /* tp_init */
3805 0, /* tp_alloc */
3806 basestring_new, /* tp_new */
3807 0, /* tp_free */
3808 };
3809
3810 PyDoc_STRVAR(string_doc,
3811 "str(object='') -> string\n\
3812 \n\
3813 Return a nice string representation of the object.\n\
3814 If the argument is a string, the return value is the same object.");
3815
3816 PyTypeObject PyString_Type = {
3817 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3818 "str",
3819 PyStringObject_SIZE,
3820 sizeof(char),
3821 string_dealloc, /* tp_dealloc */
3822 (printfunc)string_print, /* tp_print */
3823 0, /* tp_getattr */
3824 0, /* tp_setattr */
3825 0, /* tp_compare */
3826 string_repr, /* tp_repr */
3827 &string_as_number, /* tp_as_number */
3828 &string_as_sequence, /* tp_as_sequence */
3829 &string_as_mapping, /* tp_as_mapping */
3830 (hashfunc)string_hash, /* tp_hash */
3831 0, /* tp_call */
3832 string_str, /* tp_str */
3833 PyObject_GenericGetAttr, /* tp_getattro */
3834 0, /* tp_setattro */
3835 &string_as_buffer, /* tp_as_buffer */
3836 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3837 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3838 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3839 string_doc, /* tp_doc */
3840 0, /* tp_traverse */
3841 0, /* tp_clear */
3842 (richcmpfunc)string_richcompare, /* tp_richcompare */
3843 0, /* tp_weaklistoffset */
3844 0, /* tp_iter */
3845 0, /* tp_iternext */
3846 string_methods, /* tp_methods */
3847 0, /* tp_members */
3848 0, /* tp_getset */
3849 &PyBaseString_Type, /* tp_base */
3850 0, /* tp_dict */
3851 0, /* tp_descr_get */
3852 0, /* tp_descr_set */
3853 0, /* tp_dictoffset */
3854 0, /* tp_init */
3855 0, /* tp_alloc */
3856 string_new, /* tp_new */
3857 PyObject_Del, /* tp_free */
3858 };
3859
3860 void
PyString_Concat(register PyObject ** pv,register PyObject * w)3861 PyString_Concat(register PyObject **pv, register PyObject *w)
3862 {
3863 register PyObject *v;
3864 if (*pv == NULL)
3865 return;
3866 if (w == NULL || !PyString_Check(*pv)) {
3867 Py_CLEAR(*pv);
3868 return;
3869 }
3870 v = string_concat((PyStringObject *) *pv, w);
3871 Py_SETREF(*pv, v);
3872 }
3873
3874 void
PyString_ConcatAndDel(register PyObject ** pv,register PyObject * w)3875 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3876 {
3877 PyString_Concat(pv, w);
3878 Py_XDECREF(w);
3879 }
3880
3881
3882 /* The following function breaks the notion that strings are immutable:
3883 it changes the size of a string. We get away with this only if there
3884 is only one module referencing the object. You can also think of it
3885 as creating a new string object and destroying the old one, only
3886 more efficiently. In any case, don't use this if the string may
3887 already be known to some other part of the code...
3888 Note that if there's not enough memory to resize the string, the original
3889 string object at *pv is deallocated, *pv is set to NULL, an "out of
3890 memory" exception is set, and -1 is returned. Else (on success) 0 is
3891 returned, and the value in *pv may or may not be the same as on input.
3892 As always, an extra byte is allocated for a trailing \0 byte (newsize
3893 does *not* include that), and a trailing \0 byte is stored.
3894 */
3895
3896 int
_PyString_Resize(PyObject ** pv,Py_ssize_t newsize)3897 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3898 {
3899 register PyObject *v;
3900 register PyStringObject *sv;
3901 v = *pv;
3902 if (!PyString_Check(v) || newsize < 0) {
3903 *pv = 0;
3904 Py_DECREF(v);
3905 PyErr_BadInternalCall();
3906 return -1;
3907 }
3908 if (Py_SIZE(v) == 0) {
3909 if (newsize == 0) {
3910 return 0;
3911 }
3912 *pv = PyString_FromStringAndSize(NULL, newsize);
3913 Py_DECREF(v);
3914 return (*pv == NULL) ? -1 : 0;
3915 }
3916 if (Py_REFCNT(v) != 1 || PyString_CHECK_INTERNED(v)) {
3917 *pv = 0;
3918 Py_DECREF(v);
3919 PyErr_BadInternalCall();
3920 return -1;
3921 }
3922 if (newsize == 0) {
3923 *pv = PyString_FromStringAndSize(NULL, 0);
3924 Py_DECREF(v);
3925 return (*pv == NULL) ? -1 : 0;
3926 }
3927 /* XXX UNREF/NEWREF interface should be more symmetrical */
3928 _Py_DEC_REFTOTAL;
3929 _Py_ForgetReference(v);
3930 *pv = (PyObject *)
3931 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3932 if (*pv == NULL) {
3933 PyObject_Del(v);
3934 PyErr_NoMemory();
3935 return -1;
3936 }
3937 _Py_NewReference(*pv);
3938 sv = (PyStringObject *) *pv;
3939 Py_SIZE(sv) = newsize;
3940 sv->ob_sval[newsize] = '\0';
3941 sv->ob_shash = -1; /* invalidate cached hash value */
3942 return 0;
3943 }
3944
3945 /* Helpers for formatstring */
3946
3947 Py_LOCAL_INLINE(PyObject *)
getnextarg(PyObject * args,Py_ssize_t arglen,Py_ssize_t * p_argidx)3948 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3949 {
3950 Py_ssize_t argidx = *p_argidx;
3951 if (argidx < arglen) {
3952 (*p_argidx)++;
3953 if (arglen < 0)
3954 return args;
3955 else
3956 return PyTuple_GetItem(args, argidx);
3957 }
3958 PyErr_SetString(PyExc_TypeError,
3959 "not enough arguments for format string");
3960 return NULL;
3961 }
3962
3963 /* Format codes
3964 * F_LJUST '-'
3965 * F_SIGN '+'
3966 * F_BLANK ' '
3967 * F_ALT '#'
3968 * F_ZERO '0'
3969 */
3970 #define F_LJUST (1<<0)
3971 #define F_SIGN (1<<1)
3972 #define F_BLANK (1<<2)
3973 #define F_ALT (1<<3)
3974 #define F_ZERO (1<<4)
3975
3976 /* Returns a new reference to a PyString object, or NULL on failure. */
3977
3978 static PyObject *
formatfloat(PyObject * v,int flags,int prec,int type)3979 formatfloat(PyObject *v, int flags, int prec, int type)
3980 {
3981 char *p;
3982 PyObject *result;
3983 double x;
3984
3985 x = PyFloat_AsDouble(v);
3986 if (x == -1.0 && PyErr_Occurred()) {
3987 PyErr_Format(PyExc_TypeError, "float argument required, "
3988 "not %.200s", Py_TYPE(v)->tp_name);
3989 return NULL;
3990 }
3991
3992 if (prec < 0)
3993 prec = 6;
3994
3995 p = PyOS_double_to_string(x, type, prec,
3996 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3997
3998 if (p == NULL)
3999 return NULL;
4000 result = PyString_FromStringAndSize(p, strlen(p));
4001 PyMem_Free(p);
4002 return result;
4003 }
4004
4005 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4006 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4007 * Python's regular ints.
4008 * Return value: a new PyString*, or NULL if error.
4009 * . *pbuf is set to point into it,
4010 * *plen set to the # of chars following that.
4011 * Caller must decref it when done using pbuf.
4012 * The string starting at *pbuf is of the form
4013 * "-"? ("0x" | "0X")? digit+
4014 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4015 * set in flags. The case of hex digits will be correct,
4016 * There will be at least prec digits, zero-filled on the left if
4017 * necessary to get that many.
4018 * val object to be converted
4019 * flags bitmask of format flags; only F_ALT is looked at
4020 * prec minimum number of digits; 0-fill on left if needed
4021 * type a character in [duoxX]; u acts the same as d
4022 *
4023 * CAUTION: o, x and X conversions on regular ints can never
4024 * produce a '-' sign, but can for Python's unbounded ints.
4025 */
4026 PyObject*
_PyString_FormatLong(PyObject * val,int flags,int prec,int type,char ** pbuf,int * plen)4027 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4028 char **pbuf, int *plen)
4029 {
4030 PyObject *result = NULL, *r1;
4031 const char *s;
4032 char *buf;
4033 Py_ssize_t i;
4034 int sign; /* 1 if '-', else 0 */
4035 int len; /* number of characters */
4036 Py_ssize_t llen;
4037 int numdigits; /* len == numnondigits + skipped + numdigits */
4038 int numnondigits, skipped, filled;
4039 const char *method;
4040
4041 switch (type) {
4042 case 'd':
4043 case 'u':
4044 method = "str";
4045 result = Py_TYPE(val)->tp_str(val);
4046 break;
4047 case 'o':
4048 method = "oct";
4049 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4050 break;
4051 case 'x':
4052 case 'X':
4053 method = "hex";
4054 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4055 break;
4056 default:
4057 assert(!"'type' not in [duoxX]");
4058 }
4059 if (!result)
4060 return NULL;
4061
4062 if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) {
4063 Py_DECREF(result);
4064 return NULL;
4065 }
4066 if (llen > INT_MAX) {
4067 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4068 Py_DECREF(result);
4069 return NULL;
4070 }
4071 len = (int)llen;
4072 if (len > 0 && s[len-1] == 'L') {
4073 --len;
4074 if (len == 0)
4075 goto error;
4076 }
4077 sign = s[0] == '-';
4078 numnondigits = sign;
4079
4080 /* Need to skip 0x, 0X or 0. */
4081 skipped = 0;
4082 switch (type) {
4083 case 'o':
4084 if (s[sign] != '0')
4085 goto error;
4086 /* If 0 is only digit, leave it alone. */
4087 if ((flags & F_ALT) == 0 && len - sign > 1)
4088 skipped = 1;
4089 break;
4090 case 'x':
4091 case 'X':
4092 if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X'))
4093 goto error;
4094 if ((flags & F_ALT) == 0)
4095 skipped = 2;
4096 else
4097 numnondigits += 2;
4098 break;
4099 }
4100 numdigits = len - numnondigits - skipped;
4101 if (numdigits <= 0)
4102 goto error;
4103
4104 filled = prec - numdigits;
4105 if (filled < 0)
4106 filled = 0;
4107 len = numnondigits + filled + numdigits;
4108
4109 /* To modify the string in-place, there can only be one reference. */
4110 if (skipped >= filled &&
4111 PyString_CheckExact(result) &&
4112 Py_REFCNT(result) == 1 &&
4113 !PyString_CHECK_INTERNED(result))
4114 {
4115 r1 = NULL;
4116 buf = (char *)s + skipped - filled;
4117 }
4118 else {
4119 r1 = result;
4120 result = PyString_FromStringAndSize(NULL, len);
4121 if (!result) {
4122 Py_DECREF(r1);
4123 return NULL;
4124 }
4125 buf = PyString_AS_STRING(result);
4126 }
4127
4128 for (i = numnondigits; --i >= 0;)
4129 buf[i] = s[i];
4130 buf += numnondigits;
4131 s += numnondigits + skipped;
4132 for (i = 0; i < filled; i++)
4133 *buf++ = '0';
4134 if (r1 == NULL) {
4135 assert(buf == s);
4136 buf += numdigits;
4137 }
4138 else {
4139 for (i = 0; i < numdigits; i++)
4140 *buf++ = *s++;
4141 }
4142 *buf = '\0';
4143 buf -= len;
4144 Py_XDECREF(r1);
4145
4146 /* Fix up case for hex conversions. */
4147 if (type == 'X') {
4148 /* Need to convert all lower case letters to upper case.
4149 and need to convert 0x to 0X (and -0x to -0X). */
4150 for (i = 0; i < len; i++) {
4151 if (buf[i] >= 'a' && buf[i] <= 'z')
4152 buf[i] -= 'a'-'A';
4153 }
4154 }
4155 *pbuf = buf;
4156 *plen = len;
4157 return result;
4158
4159 error:
4160 PyErr_Format(PyExc_ValueError,
4161 "%%%c format: invalid result of __%s__ (type=%.200s)",
4162 type, method, Py_TYPE(val)->tp_name);
4163 Py_DECREF(result);
4164 return NULL;
4165 }
4166
4167 Py_LOCAL_INLINE(int)
formatint(char * buf,size_t buflen,int flags,int prec,int type,PyObject * v)4168 formatint(char *buf, size_t buflen, int flags,
4169 int prec, int type, PyObject *v)
4170 {
4171 /* fmt = '%#.' + `prec` + 'l' + `type`
4172 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4173 + 1 + 1 = 24 */
4174 char fmt[64]; /* plenty big enough! */
4175 char *sign;
4176 long x;
4177
4178 x = PyInt_AsLong(v);
4179 if (x == -1 && PyErr_Occurred()) {
4180 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4181 Py_TYPE(v)->tp_name);
4182 return -1;
4183 }
4184 if (x < 0 && type == 'u') {
4185 type = 'd';
4186 }
4187 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4188 sign = "-";
4189 else
4190 sign = "";
4191 if (prec < 0)
4192 prec = 1;
4193
4194 if ((flags & F_ALT) &&
4195 (type == 'x' || type == 'X')) {
4196 /* When converting under %#x or %#X, there are a number
4197 * of issues that cause pain:
4198 * - when 0 is being converted, the C standard leaves off
4199 * the '0x' or '0X', which is inconsistent with other
4200 * %#x/%#X conversions and inconsistent with Python's
4201 * hex() function
4202 * - there are platforms that violate the standard and
4203 * convert 0 with the '0x' or '0X'
4204 * (Metrowerks, Compaq Tru64)
4205 * - there are platforms that give '0x' when converting
4206 * under %#X, but convert 0 in accordance with the
4207 * standard (OS/2 EMX)
4208 *
4209 * We can achieve the desired consistency by inserting our
4210 * own '0x' or '0X' prefix, and substituting %x/%X in place
4211 * of %#x/%#X.
4212 *
4213 * Note that this is the same approach as used in
4214 * formatint() in unicodeobject.c
4215 */
4216 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4217 sign, type, prec, type);
4218 }
4219 else {
4220 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4221 sign, (flags&F_ALT) ? "#" : "",
4222 prec, type);
4223 }
4224
4225 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4226 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4227 */
4228 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4229 PyErr_SetString(PyExc_OverflowError,
4230 "formatted integer is too long (precision too large?)");
4231 return -1;
4232 }
4233 if (sign[0])
4234 PyOS_snprintf(buf, buflen, fmt, -x);
4235 else
4236 PyOS_snprintf(buf, buflen, fmt, x);
4237 return (int)strlen(buf);
4238 }
4239
4240 Py_LOCAL_INLINE(int)
formatchar(char * buf,size_t buflen,PyObject * v)4241 formatchar(char *buf, size_t buflen, PyObject *v)
4242 {
4243 /* presume that the buffer is at least 2 characters long */
4244 if (PyString_Check(v)) {
4245 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4246 return -1;
4247 }
4248 else {
4249 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4250 return -1;
4251 }
4252 buf[1] = '\0';
4253 return 1;
4254 }
4255
4256 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4257
4258 FORMATBUFLEN is the length of the buffer in which the ints &
4259 chars are formatted. XXX This is a magic number. Each formatting
4260 routine does bounds checking to ensure no overflow, but a better
4261 solution may be to malloc a buffer of appropriate size for each
4262 format. For now, the current solution is sufficient.
4263 */
4264 #define FORMATBUFLEN (size_t)120
4265
4266 PyObject *
PyString_Format(PyObject * format,PyObject * args)4267 PyString_Format(PyObject *format, PyObject *args)
4268 {
4269 char *fmt, *res;
4270 Py_ssize_t arglen, argidx;
4271 Py_ssize_t reslen, rescnt, fmtcnt;
4272 int args_owned = 0;
4273 PyObject *result, *orig_args;
4274 #ifdef Py_USING_UNICODE
4275 PyObject *v, *w;
4276 #endif
4277 PyObject *dict = NULL;
4278 if (format == NULL || !PyString_Check(format) || args == NULL) {
4279 PyErr_BadInternalCall();
4280 return NULL;
4281 }
4282 orig_args = args;
4283 fmt = PyString_AS_STRING(format);
4284 fmtcnt = PyString_GET_SIZE(format);
4285 reslen = rescnt = fmtcnt + 100;
4286 result = PyString_FromStringAndSize((char *)NULL, reslen);
4287 if (result == NULL)
4288 return NULL;
4289 res = PyString_AsString(result);
4290 if (PyTuple_Check(args)) {
4291 arglen = PyTuple_GET_SIZE(args);
4292 argidx = 0;
4293 }
4294 else {
4295 arglen = -1;
4296 argidx = -2;
4297 }
4298 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4299 !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
4300 dict = args;
4301 while (--fmtcnt >= 0) {
4302 if (*fmt != '%') {
4303 if (--rescnt < 0) {
4304 rescnt = fmtcnt + 100;
4305 reslen += rescnt;
4306 if (_PyString_Resize(&result, reslen))
4307 return NULL;
4308 res = PyString_AS_STRING(result)
4309 + reslen - rescnt;
4310 --rescnt;
4311 }
4312 *res++ = *fmt++;
4313 }
4314 else {
4315 /* Got a format specifier */
4316 int flags = 0;
4317 Py_ssize_t width = -1;
4318 int prec = -1;
4319 int c = '\0';
4320 int fill;
4321 int isnumok;
4322 PyObject *v = NULL;
4323 PyObject *temp = NULL;
4324 char *pbuf;
4325 int sign;
4326 Py_ssize_t len;
4327 char formatbuf[FORMATBUFLEN];
4328 /* For format{int,char}() */
4329 #ifdef Py_USING_UNICODE
4330 char *fmt_start = fmt;
4331 Py_ssize_t argidx_start = argidx;
4332 #endif
4333
4334 fmt++;
4335 if (*fmt == '(') {
4336 char *keystart;
4337 Py_ssize_t keylen;
4338 PyObject *key;
4339 int pcount = 1;
4340
4341 if (dict == NULL) {
4342 PyErr_SetString(PyExc_TypeError,
4343 "format requires a mapping");
4344 goto error;
4345 }
4346 ++fmt;
4347 --fmtcnt;
4348 keystart = fmt;
4349 /* Skip over balanced parentheses */
4350 while (pcount > 0 && --fmtcnt >= 0) {
4351 if (*fmt == ')')
4352 --pcount;
4353 else if (*fmt == '(')
4354 ++pcount;
4355 fmt++;
4356 }
4357 keylen = fmt - keystart - 1;
4358 if (fmtcnt < 0 || pcount > 0) {
4359 PyErr_SetString(PyExc_ValueError,
4360 "incomplete format key");
4361 goto error;
4362 }
4363 key = PyString_FromStringAndSize(keystart,
4364 keylen);
4365 if (key == NULL)
4366 goto error;
4367 if (args_owned) {
4368 Py_DECREF(args);
4369 args_owned = 0;
4370 }
4371 args = PyObject_GetItem(dict, key);
4372 Py_DECREF(key);
4373 if (args == NULL) {
4374 goto error;
4375 }
4376 args_owned = 1;
4377 arglen = -1;
4378 argidx = -2;
4379 }
4380 while (--fmtcnt >= 0) {
4381 switch (c = *fmt++) {
4382 case '-': flags |= F_LJUST; continue;
4383 case '+': flags |= F_SIGN; continue;
4384 case ' ': flags |= F_BLANK; continue;
4385 case '#': flags |= F_ALT; continue;
4386 case '0': flags |= F_ZERO; continue;
4387 }
4388 break;
4389 }
4390 if (c == '*') {
4391 v = getnextarg(args, arglen, &argidx);
4392 if (v == NULL)
4393 goto error;
4394 if (!PyInt_Check(v)) {
4395 PyErr_SetString(PyExc_TypeError,
4396 "* wants int");
4397 goto error;
4398 }
4399 width = PyInt_AsSsize_t(v);
4400 if (width == -1 && PyErr_Occurred())
4401 goto error;
4402 if (width < 0) {
4403 flags |= F_LJUST;
4404 width = -width;
4405 }
4406 if (--fmtcnt >= 0)
4407 c = *fmt++;
4408 }
4409 else if (c >= 0 && isdigit(c)) {
4410 width = c - '0';
4411 while (--fmtcnt >= 0) {
4412 c = Py_CHARMASK(*fmt++);
4413 if (!isdigit(c))
4414 break;
4415 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
4416 PyErr_SetString(
4417 PyExc_ValueError,
4418 "width too big");
4419 goto error;
4420 }
4421 width = width*10 + (c - '0');
4422 }
4423 }
4424 if (c == '.') {
4425 prec = 0;
4426 if (--fmtcnt >= 0)
4427 c = *fmt++;
4428 if (c == '*') {
4429 v = getnextarg(args, arglen, &argidx);
4430 if (v == NULL)
4431 goto error;
4432 if (!PyInt_Check(v)) {
4433 PyErr_SetString(
4434 PyExc_TypeError,
4435 "* wants int");
4436 goto error;
4437 }
4438 prec = _PyInt_AsInt(v);
4439 if (prec == -1 && PyErr_Occurred())
4440 goto error;
4441 if (prec < 0)
4442 prec = 0;
4443 if (--fmtcnt >= 0)
4444 c = *fmt++;
4445 }
4446 else if (c >= 0 && isdigit(c)) {
4447 prec = c - '0';
4448 while (--fmtcnt >= 0) {
4449 c = Py_CHARMASK(*fmt++);
4450 if (!isdigit(c))
4451 break;
4452 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
4453 PyErr_SetString(
4454 PyExc_ValueError,
4455 "prec too big");
4456 goto error;
4457 }
4458 prec = prec*10 + (c - '0');
4459 }
4460 }
4461 } /* prec */
4462 if (fmtcnt >= 0) {
4463 if (c == 'h' || c == 'l' || c == 'L') {
4464 if (--fmtcnt >= 0)
4465 c = *fmt++;
4466 }
4467 }
4468 if (fmtcnt < 0) {
4469 PyErr_SetString(PyExc_ValueError,
4470 "incomplete format");
4471 goto error;
4472 }
4473 if (c != '%') {
4474 v = getnextarg(args, arglen, &argidx);
4475 if (v == NULL)
4476 goto error;
4477 }
4478 sign = 0;
4479 fill = ' ';
4480 switch (c) {
4481 case '%':
4482 pbuf = "%";
4483 len = 1;
4484 break;
4485 case 's':
4486 #ifdef Py_USING_UNICODE
4487 if (PyUnicode_Check(v)) {
4488 fmt = fmt_start;
4489 argidx = argidx_start;
4490 goto unicode;
4491 }
4492 #endif
4493 temp = _PyObject_Str(v);
4494 #ifdef Py_USING_UNICODE
4495 if (temp != NULL && PyUnicode_Check(temp)) {
4496 Py_DECREF(temp);
4497 fmt = fmt_start;
4498 argidx = argidx_start;
4499 goto unicode;
4500 }
4501 #endif
4502 /* Fall through */
4503 case 'r':
4504 if (c == 'r')
4505 temp = PyObject_Repr(v);
4506 if (temp == NULL)
4507 goto error;
4508 if (!PyString_Check(temp)) {
4509 PyErr_SetString(PyExc_TypeError,
4510 "%s argument has non-string str()");
4511 Py_DECREF(temp);
4512 goto error;
4513 }
4514 pbuf = PyString_AS_STRING(temp);
4515 len = PyString_GET_SIZE(temp);
4516 if (prec >= 0 && len > prec)
4517 len = prec;
4518 break;
4519 case 'i':
4520 case 'd':
4521 case 'u':
4522 case 'o':
4523 case 'x':
4524 case 'X':
4525 if (c == 'i')
4526 c = 'd';
4527 isnumok = 0;
4528 if (PyNumber_Check(v)) {
4529 PyObject *iobj=NULL;
4530
4531 if (_PyAnyInt_Check(v)) {
4532 iobj = v;
4533 Py_INCREF(iobj);
4534 }
4535 else {
4536 iobj = PyNumber_Int(v);
4537 if (iobj==NULL) {
4538 PyErr_Clear();
4539 iobj = PyNumber_Long(v);
4540 }
4541 }
4542 if (iobj!=NULL) {
4543 if (PyInt_Check(iobj)) {
4544 isnumok = 1;
4545 pbuf = formatbuf;
4546 len = formatint(pbuf,
4547 sizeof(formatbuf),
4548 flags, prec, c, iobj);
4549 Py_DECREF(iobj);
4550 if (len < 0)
4551 goto error;
4552 sign = 1;
4553 }
4554 else if (PyLong_Check(iobj)) {
4555 int ilen;
4556
4557 isnumok = 1;
4558 temp = _PyString_FormatLong(iobj, flags,
4559 prec, c, &pbuf, &ilen);
4560 Py_DECREF(iobj);
4561 len = ilen;
4562 if (!temp)
4563 goto error;
4564 sign = 1;
4565 }
4566 else {
4567 Py_DECREF(iobj);
4568 }
4569 }
4570 }
4571 if (!isnumok) {
4572 PyErr_Format(PyExc_TypeError,
4573 "%%%c format: a number is required, "
4574 "not %.200s", c, Py_TYPE(v)->tp_name);
4575 goto error;
4576 }
4577 if (flags & F_ZERO)
4578 fill = '0';
4579 break;
4580 case 'e':
4581 case 'E':
4582 case 'f':
4583 case 'F':
4584 case 'g':
4585 case 'G':
4586 temp = formatfloat(v, flags, prec, c);
4587 if (temp == NULL)
4588 goto error;
4589 pbuf = PyString_AS_STRING(temp);
4590 len = PyString_GET_SIZE(temp);
4591 sign = 1;
4592 if (flags & F_ZERO)
4593 fill = '0';
4594 break;
4595 case 'c':
4596 #ifdef Py_USING_UNICODE
4597 if (PyUnicode_Check(v)) {
4598 fmt = fmt_start;
4599 argidx = argidx_start;
4600 goto unicode;
4601 }
4602 #endif
4603 pbuf = formatbuf;
4604 len = formatchar(pbuf, sizeof(formatbuf), v);
4605 if (len < 0)
4606 goto error;
4607 break;
4608 default:
4609 PyErr_Format(PyExc_ValueError,
4610 "unsupported format character '%c' (0x%x) "
4611 "at index %zd",
4612 c, c,
4613 (Py_ssize_t)(fmt - 1 -
4614 PyString_AsString(format)));
4615 goto error;
4616 }
4617 if (sign) {
4618 if (*pbuf == '-' || *pbuf == '+') {
4619 sign = *pbuf++;
4620 len--;
4621 }
4622 else if (flags & F_SIGN)
4623 sign = '+';
4624 else if (flags & F_BLANK)
4625 sign = ' ';
4626 else
4627 sign = 0;
4628 }
4629 if (width < len)
4630 width = len;
4631 if (rescnt - (sign != 0) < width) {
4632 reslen -= rescnt;
4633 rescnt = width + fmtcnt + 100;
4634 reslen += rescnt;
4635 if (reslen < 0) {
4636 Py_DECREF(result);
4637 Py_XDECREF(temp);
4638 return PyErr_NoMemory();
4639 }
4640 if (_PyString_Resize(&result, reslen)) {
4641 Py_XDECREF(temp);
4642 return NULL;
4643 }
4644 res = PyString_AS_STRING(result)
4645 + reslen - rescnt;
4646 }
4647 if (sign) {
4648 if (fill != ' ')
4649 *res++ = sign;
4650 rescnt--;
4651 if (width > len)
4652 width--;
4653 }
4654 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4655 assert(pbuf[0] == '0');
4656 assert(pbuf[1] == c);
4657 if (fill != ' ') {
4658 *res++ = *pbuf++;
4659 *res++ = *pbuf++;
4660 }
4661 rescnt -= 2;
4662 width -= 2;
4663 if (width < 0)
4664 width = 0;
4665 len -= 2;
4666 }
4667 if (width > len && !(flags & F_LJUST)) {
4668 do {
4669 --rescnt;
4670 *res++ = fill;
4671 } while (--width > len);
4672 }
4673 if (fill == ' ') {
4674 if (sign)
4675 *res++ = sign;
4676 if ((flags & F_ALT) &&
4677 (c == 'x' || c == 'X')) {
4678 assert(pbuf[0] == '0');
4679 assert(pbuf[1] == c);
4680 *res++ = *pbuf++;
4681 *res++ = *pbuf++;
4682 }
4683 }
4684 Py_MEMCPY(res, pbuf, len);
4685 res += len;
4686 rescnt -= len;
4687 while (--width >= len) {
4688 --rescnt;
4689 *res++ = ' ';
4690 }
4691 if (dict && (argidx < arglen) && c != '%') {
4692 PyErr_SetString(PyExc_TypeError,
4693 "not all arguments converted during string formatting");
4694 Py_XDECREF(temp);
4695 goto error;
4696 }
4697 Py_XDECREF(temp);
4698 } /* '%' */
4699 } /* until end */
4700 if (argidx < arglen && !dict) {
4701 PyErr_SetString(PyExc_TypeError,
4702 "not all arguments converted during string formatting");
4703 goto error;
4704 }
4705 if (args_owned) {
4706 Py_DECREF(args);
4707 }
4708 if (_PyString_Resize(&result, reslen - rescnt))
4709 return NULL;
4710 return result;
4711
4712 #ifdef Py_USING_UNICODE
4713 unicode:
4714 if (args_owned) {
4715 Py_DECREF(args);
4716 args_owned = 0;
4717 }
4718 /* Fiddle args right (remove the first argidx arguments) */
4719 if (PyTuple_Check(orig_args) && argidx > 0) {
4720 PyObject *v;
4721 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4722 v = PyTuple_New(n);
4723 if (v == NULL)
4724 goto error;
4725 while (--n >= 0) {
4726 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4727 Py_INCREF(w);
4728 PyTuple_SET_ITEM(v, n, w);
4729 }
4730 args = v;
4731 } else {
4732 Py_INCREF(orig_args);
4733 args = orig_args;
4734 }
4735 args_owned = 1;
4736 /* Take what we have of the result and let the Unicode formatting
4737 function format the rest of the input. */
4738 rescnt = res - PyString_AS_STRING(result);
4739 if (_PyString_Resize(&result, rescnt))
4740 goto error;
4741 fmtcnt = PyString_GET_SIZE(format) - \
4742 (fmt - PyString_AS_STRING(format));
4743 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4744 if (format == NULL)
4745 goto error;
4746 v = PyUnicode_Format(format, args);
4747 Py_DECREF(format);
4748 if (v == NULL)
4749 goto error;
4750 /* Paste what we have (result) to what the Unicode formatting
4751 function returned (v) and return the result (or error) */
4752 w = PyUnicode_Concat(result, v);
4753 Py_DECREF(result);
4754 Py_DECREF(v);
4755 Py_DECREF(args);
4756 return w;
4757 #endif /* Py_USING_UNICODE */
4758
4759 error:
4760 Py_DECREF(result);
4761 if (args_owned) {
4762 Py_DECREF(args);
4763 }
4764 return NULL;
4765 }
4766
4767 void
PyString_InternInPlace(PyObject ** p)4768 PyString_InternInPlace(PyObject **p)
4769 {
4770 register PyStringObject *s = (PyStringObject *)(*p);
4771 PyObject *t;
4772 if (s == NULL || !PyString_Check(s))
4773 Py_FatalError("PyString_InternInPlace: strings only please!");
4774 /* If it's a string subclass, we don't really know what putting
4775 it in the interned dict might do. */
4776 if (!PyString_CheckExact(s))
4777 return;
4778 if (PyString_CHECK_INTERNED(s))
4779 return;
4780 if (interned == NULL) {
4781 interned = PyDict_New();
4782 if (interned == NULL) {
4783 PyErr_Clear(); /* Don't leave an exception */
4784 return;
4785 }
4786 }
4787 t = PyDict_GetItem(interned, (PyObject *)s);
4788 if (t) {
4789 Py_INCREF(t);
4790 Py_SETREF(*p, t);
4791 return;
4792 }
4793
4794 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4795 PyErr_Clear();
4796 return;
4797 }
4798 /* The two references in interned are not counted by refcnt.
4799 The string deallocator will take care of this */
4800 Py_REFCNT(s) -= 2;
4801 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4802 }
4803
4804 void
PyString_InternImmortal(PyObject ** p)4805 PyString_InternImmortal(PyObject **p)
4806 {
4807 PyString_InternInPlace(p);
4808 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4809 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4810 Py_INCREF(*p);
4811 }
4812 }
4813
4814
4815 PyObject *
PyString_InternFromString(const char * cp)4816 PyString_InternFromString(const char *cp)
4817 {
4818 PyObject *s = PyString_FromString(cp);
4819 if (s == NULL)
4820 return NULL;
4821 PyString_InternInPlace(&s);
4822 return s;
4823 }
4824
4825 void
PyString_Fini(void)4826 PyString_Fini(void)
4827 {
4828 int i;
4829 for (i = 0; i < UCHAR_MAX + 1; i++)
4830 Py_CLEAR(characters[i]);
4831 Py_CLEAR(nullstring);
4832 }
4833
_Py_ReleaseInternedStrings(void)4834 void _Py_ReleaseInternedStrings(void)
4835 {
4836 PyObject *keys;
4837 PyStringObject *s;
4838 Py_ssize_t i, n;
4839 Py_ssize_t immortal_size = 0, mortal_size = 0;
4840
4841 if (interned == NULL || !PyDict_Check(interned))
4842 return;
4843 keys = PyDict_Keys(interned);
4844 if (keys == NULL || !PyList_Check(keys)) {
4845 PyErr_Clear();
4846 return;
4847 }
4848
4849 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4850 detector, interned strings are not forcibly deallocated; rather, we
4851 give them their stolen references back, and then clear and DECREF
4852 the interned dict. */
4853
4854 n = PyList_GET_SIZE(keys);
4855 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4856 n);
4857 for (i = 0; i < n; i++) {
4858 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4859 switch (s->ob_sstate) {
4860 case SSTATE_NOT_INTERNED:
4861 /* XXX Shouldn't happen */
4862 break;
4863 case SSTATE_INTERNED_IMMORTAL:
4864 Py_REFCNT(s) += 1;
4865 immortal_size += Py_SIZE(s);
4866 break;
4867 case SSTATE_INTERNED_MORTAL:
4868 Py_REFCNT(s) += 2;
4869 mortal_size += Py_SIZE(s);
4870 break;
4871 default:
4872 Py_FatalError("Inconsistent interned string state.");
4873 }
4874 s->ob_sstate = SSTATE_NOT_INTERNED;
4875 }
4876 fprintf(stderr, "total size of all interned strings: "
4877 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4878 "mortal/immortal\n", mortal_size, immortal_size);
4879 Py_DECREF(keys);
4880 PyDict_Clear(interned);
4881 Py_CLEAR(interned);
4882 }
4883