1 #define PY_SSIZE_T_CLEAN
2 #include "Python.h"
3 #include "structmember.h"
4 #include "pycore_accu.h"
5 #include "pycore_object.h"
6 #include "_iomodule.h"
7 
8 /* Implementation note: the buffer is always at least one character longer
9    than the enclosed string, for proper functioning of _PyIO_find_line_ending.
10 */
11 
12 #define STATE_REALIZED 1
13 #define STATE_ACCUMULATING 2
14 
15 /*[clinic input]
16 module _io
17 class _io.StringIO "stringio *" "&PyStringIO_Type"
18 [clinic start generated code]*/
19 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c17bc0f42165cd7d]*/
20 
21 typedef struct {
22     PyObject_HEAD
23     Py_UCS4 *buf;
24     Py_ssize_t pos;
25     Py_ssize_t string_size;
26     size_t buf_size;
27 
28     /* The stringio object can be in two states: accumulating or realized.
29        In accumulating state, the internal buffer contains nothing and
30        the contents are given by the embedded _PyAccu structure.
31        In realized state, the internal buffer is meaningful and the
32        _PyAccu is destroyed.
33     */
34     int state;
35     _PyAccu accu;
36 
37     char ok; /* initialized? */
38     char closed;
39     char readuniversal;
40     char readtranslate;
41     PyObject *decoder;
42     PyObject *readnl;
43     PyObject *writenl;
44 
45     PyObject *dict;
46     PyObject *weakreflist;
47 } stringio;
48 
49 static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
50 
51 #define CHECK_INITIALIZED(self) \
52     if (self->ok <= 0) { \
53         PyErr_SetString(PyExc_ValueError, \
54             "I/O operation on uninitialized object"); \
55         return NULL; \
56     }
57 
58 #define CHECK_CLOSED(self) \
59     if (self->closed) { \
60         PyErr_SetString(PyExc_ValueError, \
61             "I/O operation on closed file"); \
62         return NULL; \
63     }
64 
65 #define ENSURE_REALIZED(self) \
66     if (realize(self) < 0) { \
67         return NULL; \
68     }
69 
70 
71 /* Internal routine for changing the size, in terms of characters, of the
72    buffer of StringIO objects.  The caller should ensure that the 'size'
73    argument is non-negative.  Returns 0 on success, -1 otherwise. */
74 static int
resize_buffer(stringio * self,size_t size)75 resize_buffer(stringio *self, size_t size)
76 {
77     /* Here, unsigned types are used to avoid dealing with signed integer
78        overflow, which is undefined in C. */
79     size_t alloc = self->buf_size;
80     Py_UCS4 *new_buf = NULL;
81 
82     assert(self->buf != NULL);
83 
84     /* Reserve one more char for line ending detection. */
85     size = size + 1;
86     /* For simplicity, stay in the range of the signed type. Anyway, Python
87        doesn't allow strings to be longer than this. */
88     if (size > PY_SSIZE_T_MAX)
89         goto overflow;
90 
91     if (size < alloc / 2) {
92         /* Major downsize; resize down to exact size. */
93         alloc = size + 1;
94     }
95     else if (size < alloc) {
96         /* Within allocated size; quick exit */
97         return 0;
98     }
99     else if (size <= alloc * 1.125) {
100         /* Moderate upsize; overallocate similar to list_resize() */
101         alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
102     }
103     else {
104         /* Major upsize; resize up to exact size */
105         alloc = size + 1;
106     }
107 
108     if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
109         goto overflow;
110     new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
111     if (new_buf == NULL) {
112         PyErr_NoMemory();
113         return -1;
114     }
115     self->buf_size = alloc;
116     self->buf = new_buf;
117 
118     return 0;
119 
120   overflow:
121     PyErr_SetString(PyExc_OverflowError,
122                     "new buffer size too large");
123     return -1;
124 }
125 
126 static PyObject *
make_intermediate(stringio * self)127 make_intermediate(stringio *self)
128 {
129     PyObject *intermediate = _PyAccu_Finish(&self->accu);
130     self->state = STATE_REALIZED;
131     if (intermediate == NULL)
132         return NULL;
133     if (_PyAccu_Init(&self->accu) ||
134         _PyAccu_Accumulate(&self->accu, intermediate)) {
135         Py_DECREF(intermediate);
136         return NULL;
137     }
138     self->state = STATE_ACCUMULATING;
139     return intermediate;
140 }
141 
142 static int
realize(stringio * self)143 realize(stringio *self)
144 {
145     Py_ssize_t len;
146     PyObject *intermediate;
147 
148     if (self->state == STATE_REALIZED)
149         return 0;
150     assert(self->state == STATE_ACCUMULATING);
151     self->state = STATE_REALIZED;
152 
153     intermediate = _PyAccu_Finish(&self->accu);
154     if (intermediate == NULL)
155         return -1;
156 
157     /* Append the intermediate string to the internal buffer.
158        The length should be equal to the current cursor position.
159      */
160     len = PyUnicode_GET_LENGTH(intermediate);
161     if (resize_buffer(self, len) < 0) {
162         Py_DECREF(intermediate);
163         return -1;
164     }
165     if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
166         Py_DECREF(intermediate);
167         return -1;
168     }
169 
170     Py_DECREF(intermediate);
171     return 0;
172 }
173 
174 /* Internal routine for writing a whole PyUnicode object to the buffer of a
175    StringIO object. Returns 0 on success, or -1 on error. */
176 static Py_ssize_t
write_str(stringio * self,PyObject * obj)177 write_str(stringio *self, PyObject *obj)
178 {
179     Py_ssize_t len;
180     PyObject *decoded = NULL;
181 
182     assert(self->buf != NULL);
183     assert(self->pos >= 0);
184 
185     if (self->decoder != NULL) {
186         decoded = _PyIncrementalNewlineDecoder_decode(
187             self->decoder, obj, 1 /* always final */);
188     }
189     else {
190         decoded = obj;
191         Py_INCREF(decoded);
192     }
193     if (self->writenl) {
194         PyObject *translated = PyUnicode_Replace(
195             decoded, _PyIO_str_nl, self->writenl, -1);
196         Py_DECREF(decoded);
197         decoded = translated;
198     }
199     if (decoded == NULL)
200         return -1;
201 
202     assert(PyUnicode_Check(decoded));
203     if (PyUnicode_READY(decoded)) {
204         Py_DECREF(decoded);
205         return -1;
206     }
207     len = PyUnicode_GET_LENGTH(decoded);
208     assert(len >= 0);
209 
210     /* This overflow check is not strictly necessary. However, it avoids us to
211        deal with funky things like comparing an unsigned and a signed
212        integer. */
213     if (self->pos > PY_SSIZE_T_MAX - len) {
214         PyErr_SetString(PyExc_OverflowError,
215                         "new position too large");
216         goto fail;
217     }
218 
219     if (self->state == STATE_ACCUMULATING) {
220         if (self->string_size == self->pos) {
221             if (_PyAccu_Accumulate(&self->accu, decoded))
222                 goto fail;
223             goto success;
224         }
225         if (realize(self))
226             goto fail;
227     }
228 
229     if (self->pos + len > self->string_size) {
230         if (resize_buffer(self, self->pos + len) < 0)
231             goto fail;
232     }
233 
234     if (self->pos > self->string_size) {
235         /* In case of overseek, pad with null bytes the buffer region between
236            the end of stream and the current position.
237 
238           0   lo      string_size                           hi
239           |   |<---used--->|<----------available----------->|
240           |   |            <--to pad-->|<---to write--->    |
241           0   buf                   position
242 
243         */
244         memset(self->buf + self->string_size, '\0',
245                (self->pos - self->string_size) * sizeof(Py_UCS4));
246     }
247 
248     /* Copy the data to the internal buffer, overwriting some of the
249        existing data if self->pos < self->string_size. */
250     if (!PyUnicode_AsUCS4(decoded,
251                           self->buf + self->pos,
252                           self->buf_size - self->pos,
253                           0))
254         goto fail;
255 
256 success:
257     /* Set the new length of the internal string if it has changed. */
258     self->pos += len;
259     if (self->string_size < self->pos)
260         self->string_size = self->pos;
261 
262     Py_DECREF(decoded);
263     return 0;
264 
265 fail:
266     Py_XDECREF(decoded);
267     return -1;
268 }
269 
270 /*[clinic input]
271 _io.StringIO.getvalue
272 
273 Retrieve the entire contents of the object.
274 [clinic start generated code]*/
275 
276 static PyObject *
_io_StringIO_getvalue_impl(stringio * self)277 _io_StringIO_getvalue_impl(stringio *self)
278 /*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/
279 {
280     CHECK_INITIALIZED(self);
281     CHECK_CLOSED(self);
282     if (self->state == STATE_ACCUMULATING)
283         return make_intermediate(self);
284     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
285                                      self->string_size);
286 }
287 
288 /*[clinic input]
289 _io.StringIO.tell
290 
291 Tell the current file position.
292 [clinic start generated code]*/
293 
294 static PyObject *
_io_StringIO_tell_impl(stringio * self)295 _io_StringIO_tell_impl(stringio *self)
296 /*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/
297 {
298     CHECK_INITIALIZED(self);
299     CHECK_CLOSED(self);
300     return PyLong_FromSsize_t(self->pos);
301 }
302 
303 /*[clinic input]
304 _io.StringIO.read
305     size: Py_ssize_t(accept={int, NoneType}) = -1
306     /
307 
308 Read at most size characters, returned as a string.
309 
310 If the argument is negative or omitted, read until EOF
311 is reached. Return an empty string at EOF.
312 [clinic start generated code]*/
313 
314 static PyObject *
_io_StringIO_read_impl(stringio * self,Py_ssize_t size)315 _io_StringIO_read_impl(stringio *self, Py_ssize_t size)
316 /*[clinic end generated code: output=ae8cf6002f71626c input=0921093383dfb92d]*/
317 {
318     Py_ssize_t n;
319     Py_UCS4 *output;
320 
321     CHECK_INITIALIZED(self);
322     CHECK_CLOSED(self);
323 
324     /* adjust invalid sizes */
325     n = self->string_size - self->pos;
326     if (size < 0 || size > n) {
327         size = n;
328         if (size < 0)
329             size = 0;
330     }
331 
332     /* Optimization for seek(0); read() */
333     if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
334         PyObject *result = make_intermediate(self);
335         self->pos = self->string_size;
336         return result;
337     }
338 
339     ENSURE_REALIZED(self);
340     output = self->buf + self->pos;
341     self->pos += size;
342     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
343 }
344 
345 /* Internal helper, used by stringio_readline and stringio_iternext */
346 static PyObject *
_stringio_readline(stringio * self,Py_ssize_t limit)347 _stringio_readline(stringio *self, Py_ssize_t limit)
348 {
349     Py_UCS4 *start, *end, old_char;
350     Py_ssize_t len, consumed;
351 
352     /* In case of overseek, return the empty string */
353     if (self->pos >= self->string_size)
354         return PyUnicode_New(0, 0);
355 
356     start = self->buf + self->pos;
357     if (limit < 0 || limit > self->string_size - self->pos)
358         limit = self->string_size - self->pos;
359 
360     end = start + limit;
361     old_char = *end;
362     *end = '\0';
363     len = _PyIO_find_line_ending(
364         self->readtranslate, self->readuniversal, self->readnl,
365         PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
366     *end = old_char;
367     /* If we haven't found any line ending, we just return everything
368        (`consumed` is ignored). */
369     if (len < 0)
370         len = limit;
371     self->pos += len;
372     return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
373 }
374 
375 /*[clinic input]
376 _io.StringIO.readline
377     size: Py_ssize_t(accept={int, NoneType}) = -1
378     /
379 
380 Read until newline or EOF.
381 
382 Returns an empty string if EOF is hit immediately.
383 [clinic start generated code]*/
384 
385 static PyObject *
_io_StringIO_readline_impl(stringio * self,Py_ssize_t size)386 _io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
387 /*[clinic end generated code: output=cabd6452f1b7e85d input=a5bd70bf682aa276]*/
388 {
389     CHECK_INITIALIZED(self);
390     CHECK_CLOSED(self);
391     ENSURE_REALIZED(self);
392 
393     return _stringio_readline(self, size);
394 }
395 
396 static PyObject *
stringio_iternext(stringio * self)397 stringio_iternext(stringio *self)
398 {
399     PyObject *line;
400 
401     CHECK_INITIALIZED(self);
402     CHECK_CLOSED(self);
403     ENSURE_REALIZED(self);
404 
405     if (Py_TYPE(self) == &PyStringIO_Type) {
406         /* Skip method call overhead for speed */
407         line = _stringio_readline(self, -1);
408     }
409     else {
410         /* XXX is subclassing StringIO really supported? */
411         line = PyObject_CallMethodObjArgs((PyObject *)self,
412                                            _PyIO_str_readline, NULL);
413         if (line && !PyUnicode_Check(line)) {
414             PyErr_Format(PyExc_OSError,
415                          "readline() should have returned a str object, "
416                          "not '%.200s'", Py_TYPE(line)->tp_name);
417             Py_DECREF(line);
418             return NULL;
419         }
420     }
421 
422     if (line == NULL)
423         return NULL;
424 
425     if (PyUnicode_GET_LENGTH(line) == 0) {
426         /* Reached EOF */
427         Py_DECREF(line);
428         return NULL;
429     }
430 
431     return line;
432 }
433 
434 /*[clinic input]
435 _io.StringIO.truncate
436     pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
437     /
438 
439 Truncate size to pos.
440 
441 The pos argument defaults to the current file position, as
442 returned by tell().  The current file position is unchanged.
443 Returns the new absolute position.
444 [clinic start generated code]*/
445 
446 static PyObject *
_io_StringIO_truncate_impl(stringio * self,Py_ssize_t size)447 _io_StringIO_truncate_impl(stringio *self, Py_ssize_t size)
448 /*[clinic end generated code: output=eb3aef8e06701365 input=5505cff90ca48b96]*/
449 {
450     CHECK_INITIALIZED(self);
451     CHECK_CLOSED(self);
452 
453     if (size < 0) {
454         PyErr_Format(PyExc_ValueError,
455                      "Negative size value %zd", size);
456         return NULL;
457     }
458 
459     if (size < self->string_size) {
460         ENSURE_REALIZED(self);
461         if (resize_buffer(self, size) < 0)
462             return NULL;
463         self->string_size = size;
464     }
465 
466     return PyLong_FromSsize_t(size);
467 }
468 
469 /*[clinic input]
470 _io.StringIO.seek
471     pos: Py_ssize_t
472     whence: int = 0
473     /
474 
475 Change stream position.
476 
477 Seek to character offset pos relative to position indicated by whence:
478     0  Start of stream (the default).  pos should be >= 0;
479     1  Current position - pos must be 0;
480     2  End of stream - pos must be 0.
481 Returns the new absolute position.
482 [clinic start generated code]*/
483 
484 static PyObject *
_io_StringIO_seek_impl(stringio * self,Py_ssize_t pos,int whence)485 _io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
486 /*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/
487 {
488     CHECK_INITIALIZED(self);
489     CHECK_CLOSED(self);
490 
491     if (whence != 0 && whence != 1 && whence != 2) {
492         PyErr_Format(PyExc_ValueError,
493                      "Invalid whence (%i, should be 0, 1 or 2)", whence);
494         return NULL;
495     }
496     else if (pos < 0 && whence == 0) {
497         PyErr_Format(PyExc_ValueError,
498                      "Negative seek position %zd", pos);
499         return NULL;
500     }
501     else if (whence != 0 && pos != 0) {
502         PyErr_SetString(PyExc_OSError,
503                         "Can't do nonzero cur-relative seeks");
504         return NULL;
505     }
506 
507     /* whence = 0: offset relative to beginning of the string.
508        whence = 1: no change to current position.
509        whence = 2: change position to end of file. */
510     if (whence == 1) {
511         pos = self->pos;
512     }
513     else if (whence == 2) {
514         pos = self->string_size;
515     }
516 
517     self->pos = pos;
518 
519     return PyLong_FromSsize_t(self->pos);
520 }
521 
522 /*[clinic input]
523 _io.StringIO.write
524     s as obj: object
525     /
526 
527 Write string to file.
528 
529 Returns the number of characters written, which is always equal to
530 the length of the string.
531 [clinic start generated code]*/
532 
533 static PyObject *
_io_StringIO_write(stringio * self,PyObject * obj)534 _io_StringIO_write(stringio *self, PyObject *obj)
535 /*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/
536 {
537     Py_ssize_t size;
538 
539     CHECK_INITIALIZED(self);
540     if (!PyUnicode_Check(obj)) {
541         PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
542                      Py_TYPE(obj)->tp_name);
543         return NULL;
544     }
545     if (PyUnicode_READY(obj))
546         return NULL;
547     CHECK_CLOSED(self);
548     size = PyUnicode_GET_LENGTH(obj);
549 
550     if (size > 0 && write_str(self, obj) < 0)
551         return NULL;
552 
553     return PyLong_FromSsize_t(size);
554 }
555 
556 /*[clinic input]
557 _io.StringIO.close
558 
559 Close the IO object.
560 
561 Attempting any further operation after the object is closed
562 will raise a ValueError.
563 
564 This method has no effect if the file is already closed.
565 [clinic start generated code]*/
566 
567 static PyObject *
_io_StringIO_close_impl(stringio * self)568 _io_StringIO_close_impl(stringio *self)
569 /*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/
570 {
571     self->closed = 1;
572     /* Free up some memory */
573     if (resize_buffer(self, 0) < 0)
574         return NULL;
575     _PyAccu_Destroy(&self->accu);
576     Py_CLEAR(self->readnl);
577     Py_CLEAR(self->writenl);
578     Py_CLEAR(self->decoder);
579     Py_RETURN_NONE;
580 }
581 
582 static int
stringio_traverse(stringio * self,visitproc visit,void * arg)583 stringio_traverse(stringio *self, visitproc visit, void *arg)
584 {
585     Py_VISIT(self->dict);
586     return 0;
587 }
588 
589 static int
stringio_clear(stringio * self)590 stringio_clear(stringio *self)
591 {
592     Py_CLEAR(self->dict);
593     return 0;
594 }
595 
596 static void
stringio_dealloc(stringio * self)597 stringio_dealloc(stringio *self)
598 {
599     _PyObject_GC_UNTRACK(self);
600     self->ok = 0;
601     if (self->buf) {
602         PyMem_Free(self->buf);
603         self->buf = NULL;
604     }
605     _PyAccu_Destroy(&self->accu);
606     Py_CLEAR(self->readnl);
607     Py_CLEAR(self->writenl);
608     Py_CLEAR(self->decoder);
609     Py_CLEAR(self->dict);
610     if (self->weakreflist != NULL)
611         PyObject_ClearWeakRefs((PyObject *) self);
612     Py_TYPE(self)->tp_free(self);
613 }
614 
615 static PyObject *
stringio_new(PyTypeObject * type,PyObject * args,PyObject * kwds)616 stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
617 {
618     stringio *self;
619 
620     assert(type != NULL && type->tp_alloc != NULL);
621     self = (stringio *)type->tp_alloc(type, 0);
622     if (self == NULL)
623         return NULL;
624 
625     /* tp_alloc initializes all the fields to zero. So we don't have to
626        initialize them here. */
627 
628     self->buf = (Py_UCS4 *)PyMem_Malloc(0);
629     if (self->buf == NULL) {
630         Py_DECREF(self);
631         return PyErr_NoMemory();
632     }
633 
634     return (PyObject *)self;
635 }
636 
637 /*[clinic input]
638 _io.StringIO.__init__
639     initial_value as value: object(c_default="NULL") = ''
640     newline as newline_obj: object(c_default="NULL") = '\n'
641 
642 Text I/O implementation using an in-memory buffer.
643 
644 The initial_value argument sets the value of object.  The newline
645 argument is like the one of TextIOWrapper's constructor.
646 [clinic start generated code]*/
647 
648 static int
_io_StringIO___init___impl(stringio * self,PyObject * value,PyObject * newline_obj)649 _io_StringIO___init___impl(stringio *self, PyObject *value,
650                            PyObject *newline_obj)
651 /*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
652 {
653     const char *newline = "\n";
654     Py_ssize_t value_len;
655 
656     /* Parse the newline argument. We only want to allow unicode objects or
657        None. */
658     if (newline_obj == Py_None) {
659         newline = NULL;
660     }
661     else if (newline_obj) {
662         if (!PyUnicode_Check(newline_obj)) {
663             PyErr_Format(PyExc_TypeError,
664                          "newline must be str or None, not %.200s",
665                          Py_TYPE(newline_obj)->tp_name);
666             return -1;
667         }
668         newline = PyUnicode_AsUTF8(newline_obj);
669         if (newline == NULL)
670             return -1;
671     }
672 
673     if (newline && newline[0] != '\0'
674         && !(newline[0] == '\n' && newline[1] == '\0')
675         && !(newline[0] == '\r' && newline[1] == '\0')
676         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
677         PyErr_Format(PyExc_ValueError,
678                      "illegal newline value: %R", newline_obj);
679         return -1;
680     }
681     if (value && value != Py_None && !PyUnicode_Check(value)) {
682         PyErr_Format(PyExc_TypeError,
683                      "initial_value must be str or None, not %.200s",
684                      Py_TYPE(value)->tp_name);
685         return -1;
686     }
687 
688     self->ok = 0;
689 
690     _PyAccu_Destroy(&self->accu);
691     Py_CLEAR(self->readnl);
692     Py_CLEAR(self->writenl);
693     Py_CLEAR(self->decoder);
694 
695     assert((newline != NULL && newline_obj != Py_None) ||
696            (newline == NULL && newline_obj == Py_None));
697 
698     if (newline) {
699         self->readnl = PyUnicode_FromString(newline);
700         if (self->readnl == NULL)
701             return -1;
702     }
703     self->readuniversal = (newline == NULL || newline[0] == '\0');
704     self->readtranslate = (newline == NULL);
705     /* If newline == "", we don't translate anything.
706        If newline == "\n" or newline == None, we translate to "\n", which is
707        a no-op.
708        (for newline == None, TextIOWrapper translates to os.linesep, but it
709        is pointless for StringIO)
710     */
711     if (newline != NULL && newline[0] == '\r') {
712         self->writenl = self->readnl;
713         Py_INCREF(self->writenl);
714     }
715 
716     if (self->readuniversal) {
717         self->decoder = PyObject_CallFunction(
718             (PyObject *)&PyIncrementalNewlineDecoder_Type,
719             "Oi", Py_None, (int) self->readtranslate);
720         if (self->decoder == NULL)
721             return -1;
722     }
723 
724     /* Now everything is set up, resize buffer to size of initial value,
725        and copy it */
726     self->string_size = 0;
727     if (value && value != Py_None)
728         value_len = PyUnicode_GetLength(value);
729     else
730         value_len = 0;
731     if (value_len > 0) {
732         /* This is a heuristic, for newline translation might change
733            the string length. */
734         if (resize_buffer(self, 0) < 0)
735             return -1;
736         self->state = STATE_REALIZED;
737         self->pos = 0;
738         if (write_str(self, value) < 0)
739             return -1;
740     }
741     else {
742         /* Empty stringio object, we can start by accumulating */
743         if (resize_buffer(self, 0) < 0)
744             return -1;
745         if (_PyAccu_Init(&self->accu))
746             return -1;
747         self->state = STATE_ACCUMULATING;
748     }
749     self->pos = 0;
750 
751     self->closed = 0;
752     self->ok = 1;
753     return 0;
754 }
755 
756 /* Properties and pseudo-properties */
757 
758 /*[clinic input]
759 _io.StringIO.readable
760 
761 Returns True if the IO object can be read.
762 [clinic start generated code]*/
763 
764 static PyObject *
_io_StringIO_readable_impl(stringio * self)765 _io_StringIO_readable_impl(stringio *self)
766 /*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/
767 {
768     CHECK_INITIALIZED(self);
769     CHECK_CLOSED(self);
770     Py_RETURN_TRUE;
771 }
772 
773 /*[clinic input]
774 _io.StringIO.writable
775 
776 Returns True if the IO object can be written.
777 [clinic start generated code]*/
778 
779 static PyObject *
_io_StringIO_writable_impl(stringio * self)780 _io_StringIO_writable_impl(stringio *self)
781 /*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/
782 {
783     CHECK_INITIALIZED(self);
784     CHECK_CLOSED(self);
785     Py_RETURN_TRUE;
786 }
787 
788 /*[clinic input]
789 _io.StringIO.seekable
790 
791 Returns True if the IO object can be seeked.
792 [clinic start generated code]*/
793 
794 static PyObject *
_io_StringIO_seekable_impl(stringio * self)795 _io_StringIO_seekable_impl(stringio *self)
796 /*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/
797 {
798     CHECK_INITIALIZED(self);
799     CHECK_CLOSED(self);
800     Py_RETURN_TRUE;
801 }
802 
803 /* Pickling support.
804 
805    The implementation of __getstate__ is similar to the one for BytesIO,
806    except that we also save the newline parameter. For __setstate__ and unlike
807    BytesIO, we call __init__ to restore the object's state. Doing so allows us
808    to avoid decoding the complex newline state while keeping the object
809    representation compact.
810 
811    See comment in bytesio.c regarding why only pickle protocols and onward are
812    supported.
813 */
814 
815 static PyObject *
stringio_getstate(stringio * self,PyObject * Py_UNUSED (ignored))816 stringio_getstate(stringio *self, PyObject *Py_UNUSED(ignored))
817 {
818     PyObject *initvalue = _io_StringIO_getvalue_impl(self);
819     PyObject *dict;
820     PyObject *state;
821 
822     if (initvalue == NULL)
823         return NULL;
824     if (self->dict == NULL) {
825         Py_INCREF(Py_None);
826         dict = Py_None;
827     }
828     else {
829         dict = PyDict_Copy(self->dict);
830         if (dict == NULL) {
831             Py_DECREF(initvalue);
832             return NULL;
833         }
834     }
835 
836     state = Py_BuildValue("(OOnN)", initvalue,
837                           self->readnl ? self->readnl : Py_None,
838                           self->pos, dict);
839     Py_DECREF(initvalue);
840     return state;
841 }
842 
843 static PyObject *
stringio_setstate(stringio * self,PyObject * state)844 stringio_setstate(stringio *self, PyObject *state)
845 {
846     PyObject *initarg;
847     PyObject *position_obj;
848     PyObject *dict;
849     Py_ssize_t pos;
850 
851     assert(state != NULL);
852     CHECK_CLOSED(self);
853 
854     /* We allow the state tuple to be longer than 4, because we may need
855        someday to extend the object's state without breaking
856        backward-compatibility. */
857     if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
858         PyErr_Format(PyExc_TypeError,
859                      "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
860                      Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
861         return NULL;
862     }
863 
864     /* Initialize the object's state. */
865     initarg = PyTuple_GetSlice(state, 0, 2);
866     if (initarg == NULL)
867         return NULL;
868     if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
869         Py_DECREF(initarg);
870         return NULL;
871     }
872     Py_DECREF(initarg);
873 
874     /* Restore the buffer state. Even if __init__ did initialize the buffer,
875        we have to initialize it again since __init__ may translate the
876        newlines in the initial_value string. We clearly do not want that
877        because the string value in the state tuple has already been translated
878        once by __init__. So we do not take any chance and replace object's
879        buffer completely. */
880     {
881         PyObject *item;
882         Py_UCS4 *buf;
883         Py_ssize_t bufsize;
884 
885         item = PyTuple_GET_ITEM(state, 0);
886         buf = PyUnicode_AsUCS4Copy(item);
887         if (buf == NULL)
888             return NULL;
889         bufsize = PyUnicode_GET_LENGTH(item);
890 
891         if (resize_buffer(self, bufsize) < 0) {
892             PyMem_Free(buf);
893             return NULL;
894         }
895         memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
896         PyMem_Free(buf);
897         self->string_size = bufsize;
898     }
899 
900     /* Set carefully the position value. Alternatively, we could use the seek
901        method instead of modifying self->pos directly to better protect the
902        object internal state against erroneous (or malicious) inputs. */
903     position_obj = PyTuple_GET_ITEM(state, 2);
904     if (!PyLong_Check(position_obj)) {
905         PyErr_Format(PyExc_TypeError,
906                      "third item of state must be an integer, got %.200s",
907                      Py_TYPE(position_obj)->tp_name);
908         return NULL;
909     }
910     pos = PyLong_AsSsize_t(position_obj);
911     if (pos == -1 && PyErr_Occurred())
912         return NULL;
913     if (pos < 0) {
914         PyErr_SetString(PyExc_ValueError,
915                         "position value cannot be negative");
916         return NULL;
917     }
918     self->pos = pos;
919 
920     /* Set the dictionary of the instance variables. */
921     dict = PyTuple_GET_ITEM(state, 3);
922     if (dict != Py_None) {
923         if (!PyDict_Check(dict)) {
924             PyErr_Format(PyExc_TypeError,
925                          "fourth item of state should be a dict, got a %.200s",
926                          Py_TYPE(dict)->tp_name);
927             return NULL;
928         }
929         if (self->dict) {
930             /* Alternatively, we could replace the internal dictionary
931                completely. However, it seems more practical to just update it. */
932             if (PyDict_Update(self->dict, dict) < 0)
933                 return NULL;
934         }
935         else {
936             Py_INCREF(dict);
937             self->dict = dict;
938         }
939     }
940 
941     Py_RETURN_NONE;
942 }
943 
944 
945 static PyObject *
stringio_closed(stringio * self,void * context)946 stringio_closed(stringio *self, void *context)
947 {
948     CHECK_INITIALIZED(self);
949     return PyBool_FromLong(self->closed);
950 }
951 
952 static PyObject *
stringio_line_buffering(stringio * self,void * context)953 stringio_line_buffering(stringio *self, void *context)
954 {
955     CHECK_INITIALIZED(self);
956     CHECK_CLOSED(self);
957     Py_RETURN_FALSE;
958 }
959 
960 static PyObject *
stringio_newlines(stringio * self,void * context)961 stringio_newlines(stringio *self, void *context)
962 {
963     CHECK_INITIALIZED(self);
964     CHECK_CLOSED(self);
965     if (self->decoder == NULL)
966         Py_RETURN_NONE;
967     return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
968 }
969 
970 #include "clinic/stringio.c.h"
971 
972 static struct PyMethodDef stringio_methods[] = {
973     _IO_STRINGIO_CLOSE_METHODDEF
974     _IO_STRINGIO_GETVALUE_METHODDEF
975     _IO_STRINGIO_READ_METHODDEF
976     _IO_STRINGIO_READLINE_METHODDEF
977     _IO_STRINGIO_TELL_METHODDEF
978     _IO_STRINGIO_TRUNCATE_METHODDEF
979     _IO_STRINGIO_SEEK_METHODDEF
980     _IO_STRINGIO_WRITE_METHODDEF
981 
982     _IO_STRINGIO_SEEKABLE_METHODDEF
983     _IO_STRINGIO_READABLE_METHODDEF
984     _IO_STRINGIO_WRITABLE_METHODDEF
985 
986     {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
987     {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
988     {NULL, NULL}        /* sentinel */
989 };
990 
991 static PyGetSetDef stringio_getset[] = {
992     {"closed",         (getter)stringio_closed,         NULL, NULL},
993     {"newlines",       (getter)stringio_newlines,       NULL, NULL},
994     /*  (following comments straight off of the original Python wrapper:)
995         XXX Cruft to support the TextIOWrapper API. This would only
996         be meaningful if StringIO supported the buffer attribute.
997         Hopefully, a better solution, than adding these pseudo-attributes,
998         will be found.
999     */
1000     {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
1001     {NULL}
1002 };
1003 
1004 PyTypeObject PyStringIO_Type = {
1005     PyVarObject_HEAD_INIT(NULL, 0)
1006     "_io.StringIO",                            /*tp_name*/
1007     sizeof(stringio),                    /*tp_basicsize*/
1008     0,                                         /*tp_itemsize*/
1009     (destructor)stringio_dealloc,              /*tp_dealloc*/
1010     0,                                         /*tp_vectorcall_offset*/
1011     0,                                         /*tp_getattr*/
1012     0,                                         /*tp_setattr*/
1013     0,                                         /*tp_as_async*/
1014     0,                                         /*tp_repr*/
1015     0,                                         /*tp_as_number*/
1016     0,                                         /*tp_as_sequence*/
1017     0,                                         /*tp_as_mapping*/
1018     0,                                         /*tp_hash*/
1019     0,                                         /*tp_call*/
1020     0,                                         /*tp_str*/
1021     0,                                         /*tp_getattro*/
1022     0,                                         /*tp_setattro*/
1023     0,                                         /*tp_as_buffer*/
1024     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
1025                        | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
1026     _io_StringIO___init____doc__,              /*tp_doc*/
1027     (traverseproc)stringio_traverse,           /*tp_traverse*/
1028     (inquiry)stringio_clear,                   /*tp_clear*/
1029     0,                                         /*tp_richcompare*/
1030     offsetof(stringio, weakreflist),            /*tp_weaklistoffset*/
1031     0,                                         /*tp_iter*/
1032     (iternextfunc)stringio_iternext,           /*tp_iternext*/
1033     stringio_methods,                          /*tp_methods*/
1034     0,                                         /*tp_members*/
1035     stringio_getset,                           /*tp_getset*/
1036     0,                                         /*tp_base*/
1037     0,                                         /*tp_dict*/
1038     0,                                         /*tp_descr_get*/
1039     0,                                         /*tp_descr_set*/
1040     offsetof(stringio, dict),                  /*tp_dictoffset*/
1041     _io_StringIO___init__,                     /*tp_init*/
1042     0,                                         /*tp_alloc*/
1043     stringio_new,                              /*tp_new*/
1044 };
1045