1 /*
2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 
4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 
6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8 
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "pycore_object.h"
12 #include "structmember.h"
13 #include "_iomodule.h"
14 
15 /*[clinic input]
16 module _io
17 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
18 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
21 
22 _Py_IDENTIFIER(close);
23 _Py_IDENTIFIER(_dealloc_warn);
24 _Py_IDENTIFIER(decode);
25 _Py_IDENTIFIER(fileno);
26 _Py_IDENTIFIER(flush);
27 _Py_IDENTIFIER(getpreferredencoding);
28 _Py_IDENTIFIER(isatty);
29 _Py_IDENTIFIER(mode);
30 _Py_IDENTIFIER(name);
31 _Py_IDENTIFIER(raw);
32 _Py_IDENTIFIER(read);
33 _Py_IDENTIFIER(readable);
34 _Py_IDENTIFIER(replace);
35 _Py_IDENTIFIER(reset);
36 _Py_IDENTIFIER(seek);
37 _Py_IDENTIFIER(seekable);
38 _Py_IDENTIFIER(setstate);
39 _Py_IDENTIFIER(strict);
40 _Py_IDENTIFIER(tell);
41 _Py_IDENTIFIER(writable);
42 
43 /* TextIOBase */
44 
45 PyDoc_STRVAR(textiobase_doc,
46     "Base class for text I/O.\n"
47     "\n"
48     "This class provides a character and line based interface to stream\n"
49     "I/O. There is no readinto method because Python's character strings\n"
50     "are immutable. There is no public constructor.\n"
51     );
52 
53 static PyObject *
_unsupported(const char * message)54 _unsupported(const char *message)
55 {
56     _PyIO_State *state = IO_STATE();
57     if (state != NULL)
58         PyErr_SetString(state->unsupported_operation, message);
59     return NULL;
60 }
61 
62 PyDoc_STRVAR(textiobase_detach_doc,
63     "Separate the underlying buffer from the TextIOBase and return it.\n"
64     "\n"
65     "After the underlying buffer has been detached, the TextIO is in an\n"
66     "unusable state.\n"
67     );
68 
69 static PyObject *
textiobase_detach(PyObject * self,PyObject * Py_UNUSED (ignored))70 textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
71 {
72     return _unsupported("detach");
73 }
74 
75 PyDoc_STRVAR(textiobase_read_doc,
76     "Read at most n characters from stream.\n"
77     "\n"
78     "Read from underlying buffer until we have n characters or we hit EOF.\n"
79     "If n is negative or omitted, read until EOF.\n"
80     );
81 
82 static PyObject *
textiobase_read(PyObject * self,PyObject * args)83 textiobase_read(PyObject *self, PyObject *args)
84 {
85     return _unsupported("read");
86 }
87 
88 PyDoc_STRVAR(textiobase_readline_doc,
89     "Read until newline or EOF.\n"
90     "\n"
91     "Returns an empty string if EOF is hit immediately.\n"
92     );
93 
94 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)95 textiobase_readline(PyObject *self, PyObject *args)
96 {
97     return _unsupported("readline");
98 }
99 
100 PyDoc_STRVAR(textiobase_write_doc,
101     "Write string to stream.\n"
102     "Returns the number of characters written (which is always equal to\n"
103     "the length of the string).\n"
104     );
105 
106 static PyObject *
textiobase_write(PyObject * self,PyObject * args)107 textiobase_write(PyObject *self, PyObject *args)
108 {
109     return _unsupported("write");
110 }
111 
112 PyDoc_STRVAR(textiobase_encoding_doc,
113     "Encoding of the text stream.\n"
114     "\n"
115     "Subclasses should override.\n"
116     );
117 
118 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)119 textiobase_encoding_get(PyObject *self, void *context)
120 {
121     Py_RETURN_NONE;
122 }
123 
124 PyDoc_STRVAR(textiobase_newlines_doc,
125     "Line endings translated so far.\n"
126     "\n"
127     "Only line endings translated during reading are considered.\n"
128     "\n"
129     "Subclasses should override.\n"
130     );
131 
132 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)133 textiobase_newlines_get(PyObject *self, void *context)
134 {
135     Py_RETURN_NONE;
136 }
137 
138 PyDoc_STRVAR(textiobase_errors_doc,
139     "The error setting of the decoder or encoder.\n"
140     "\n"
141     "Subclasses should override.\n"
142     );
143 
144 static PyObject *
textiobase_errors_get(PyObject * self,void * context)145 textiobase_errors_get(PyObject *self, void *context)
146 {
147     Py_RETURN_NONE;
148 }
149 
150 
151 static PyMethodDef textiobase_methods[] = {
152     {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
153     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
154     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
155     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
156     {NULL, NULL}
157 };
158 
159 static PyGetSetDef textiobase_getset[] = {
160     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
161     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
162     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
163     {NULL}
164 };
165 
166 PyTypeObject PyTextIOBase_Type = {
167     PyVarObject_HEAD_INIT(NULL, 0)
168     "_io._TextIOBase",          /*tp_name*/
169     0,                          /*tp_basicsize*/
170     0,                          /*tp_itemsize*/
171     0,                          /*tp_dealloc*/
172     0,                          /*tp_vectorcall_offset*/
173     0,                          /*tp_getattr*/
174     0,                          /*tp_setattr*/
175     0,                          /*tp_as_async*/
176     0,                          /*tp_repr*/
177     0,                          /*tp_as_number*/
178     0,                          /*tp_as_sequence*/
179     0,                          /*tp_as_mapping*/
180     0,                          /*tp_hash */
181     0,                          /*tp_call*/
182     0,                          /*tp_str*/
183     0,                          /*tp_getattro*/
184     0,                          /*tp_setattro*/
185     0,                          /*tp_as_buffer*/
186     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
187     textiobase_doc,             /* tp_doc */
188     0,                          /* tp_traverse */
189     0,                          /* tp_clear */
190     0,                          /* tp_richcompare */
191     0,                          /* tp_weaklistoffset */
192     0,                          /* tp_iter */
193     0,                          /* tp_iternext */
194     textiobase_methods,         /* tp_methods */
195     0,                          /* tp_members */
196     textiobase_getset,          /* tp_getset */
197     &PyIOBase_Type,             /* tp_base */
198     0,                          /* tp_dict */
199     0,                          /* tp_descr_get */
200     0,                          /* tp_descr_set */
201     0,                          /* tp_dictoffset */
202     0,                          /* tp_init */
203     0,                          /* tp_alloc */
204     0,                          /* tp_new */
205     0,                          /* tp_free */
206     0,                          /* tp_is_gc */
207     0,                          /* tp_bases */
208     0,                          /* tp_mro */
209     0,                          /* tp_cache */
210     0,                          /* tp_subclasses */
211     0,                          /* tp_weaklist */
212     0,                          /* tp_del */
213     0,                          /* tp_version_tag */
214     0,                          /* tp_finalize */
215 };
216 
217 
218 /* IncrementalNewlineDecoder */
219 
220 typedef struct {
221     PyObject_HEAD
222     PyObject *decoder;
223     PyObject *errors;
224     unsigned int pendingcr: 1;
225     unsigned int translate: 1;
226     unsigned int seennl: 3;
227 } nldecoder_object;
228 
229 /*[clinic input]
230 _io.IncrementalNewlineDecoder.__init__
231     decoder: object
232     translate: int
233     errors: object(c_default="NULL") = "strict"
234 
235 Codec used when reading a file in universal newlines mode.
236 
237 It wraps another incremental decoder, translating \r\n and \r into \n.
238 It also records the types of newlines encountered.  When used with
239 translate=False, it ensures that the newline sequence is returned in
240 one piece. When used with decoder=None, it expects unicode strings as
241 decode input and translates newlines without first invoking an external
242 decoder.
243 [clinic start generated code]*/
244 
245 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)246 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
247                                             PyObject *decoder, int translate,
248                                             PyObject *errors)
249 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
250 {
251     self->decoder = decoder;
252     Py_INCREF(decoder);
253 
254     if (errors == NULL) {
255         self->errors = _PyUnicode_FromId(&PyId_strict);
256         if (self->errors == NULL)
257             return -1;
258     }
259     else {
260         self->errors = errors;
261     }
262     Py_INCREF(self->errors);
263 
264     self->translate = translate ? 1 : 0;
265     self->seennl = 0;
266     self->pendingcr = 0;
267 
268     return 0;
269 }
270 
271 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)272 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
273 {
274     Py_CLEAR(self->decoder);
275     Py_CLEAR(self->errors);
276     Py_TYPE(self)->tp_free((PyObject *)self);
277 }
278 
279 static int
check_decoded(PyObject * decoded)280 check_decoded(PyObject *decoded)
281 {
282     if (decoded == NULL)
283         return -1;
284     if (!PyUnicode_Check(decoded)) {
285         PyErr_Format(PyExc_TypeError,
286                      "decoder should return a string result, not '%.200s'",
287                      Py_TYPE(decoded)->tp_name);
288         Py_DECREF(decoded);
289         return -1;
290     }
291     if (PyUnicode_READY(decoded) < 0) {
292         Py_DECREF(decoded);
293         return -1;
294     }
295     return 0;
296 }
297 
298 #define SEEN_CR   1
299 #define SEEN_LF   2
300 #define SEEN_CRLF 4
301 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
302 
303 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)304 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
305                                     PyObject *input, int final)
306 {
307     PyObject *output;
308     Py_ssize_t output_len;
309     nldecoder_object *self = (nldecoder_object *) myself;
310 
311     if (self->decoder == NULL) {
312         PyErr_SetString(PyExc_ValueError,
313                         "IncrementalNewlineDecoder.__init__ not called");
314         return NULL;
315     }
316 
317     /* decode input (with the eventual \r from a previous pass) */
318     if (self->decoder != Py_None) {
319         output = PyObject_CallMethodObjArgs(self->decoder,
320             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
321     }
322     else {
323         output = input;
324         Py_INCREF(output);
325     }
326 
327     if (check_decoded(output) < 0)
328         return NULL;
329 
330     output_len = PyUnicode_GET_LENGTH(output);
331     if (self->pendingcr && (final || output_len > 0)) {
332         /* Prefix output with CR */
333         int kind;
334         PyObject *modified;
335         char *out;
336 
337         modified = PyUnicode_New(output_len + 1,
338                                  PyUnicode_MAX_CHAR_VALUE(output));
339         if (modified == NULL)
340             goto error;
341         kind = PyUnicode_KIND(modified);
342         out = PyUnicode_DATA(modified);
343         PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
344         memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
345         Py_DECREF(output);
346         output = modified; /* output remains ready */
347         self->pendingcr = 0;
348         output_len++;
349     }
350 
351     /* retain last \r even when not translating data:
352      * then readline() is sure to get \r\n in one pass
353      */
354     if (!final) {
355         if (output_len > 0
356             && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
357         {
358             PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
359             if (modified == NULL)
360                 goto error;
361             Py_DECREF(output);
362             output = modified;
363             self->pendingcr = 1;
364         }
365     }
366 
367     /* Record which newlines are read and do newline translation if desired,
368        all in one pass. */
369     {
370         void *in_str;
371         Py_ssize_t len;
372         int seennl = self->seennl;
373         int only_lf = 0;
374         int kind;
375 
376         in_str = PyUnicode_DATA(output);
377         len = PyUnicode_GET_LENGTH(output);
378         kind = PyUnicode_KIND(output);
379 
380         if (len == 0)
381             return output;
382 
383         /* If, up to now, newlines are consistently \n, do a quick check
384            for the \r *byte* with the libc's optimized memchr.
385            */
386         if (seennl == SEEN_LF || seennl == 0) {
387             only_lf = (memchr(in_str, '\r', kind * len) == NULL);
388         }
389 
390         if (only_lf) {
391             /* If not already seen, quick scan for a possible "\n" character.
392                (there's nothing else to be done, even when in translation mode)
393             */
394             if (seennl == 0 &&
395                 memchr(in_str, '\n', kind * len) != NULL) {
396                 if (kind == PyUnicode_1BYTE_KIND)
397                     seennl |= SEEN_LF;
398                 else {
399                     Py_ssize_t i = 0;
400                     for (;;) {
401                         Py_UCS4 c;
402                         /* Fast loop for non-control characters */
403                         while (PyUnicode_READ(kind, in_str, i) > '\n')
404                             i++;
405                         c = PyUnicode_READ(kind, in_str, i++);
406                         if (c == '\n') {
407                             seennl |= SEEN_LF;
408                             break;
409                         }
410                         if (i >= len)
411                             break;
412                     }
413                 }
414             }
415             /* Finished: we have scanned for newlines, and none of them
416                need translating */
417         }
418         else if (!self->translate) {
419             Py_ssize_t i = 0;
420             /* We have already seen all newline types, no need to scan again */
421             if (seennl == SEEN_ALL)
422                 goto endscan;
423             for (;;) {
424                 Py_UCS4 c;
425                 /* Fast loop for non-control characters */
426                 while (PyUnicode_READ(kind, in_str, i) > '\r')
427                     i++;
428                 c = PyUnicode_READ(kind, in_str, i++);
429                 if (c == '\n')
430                     seennl |= SEEN_LF;
431                 else if (c == '\r') {
432                     if (PyUnicode_READ(kind, in_str, i) == '\n') {
433                         seennl |= SEEN_CRLF;
434                         i++;
435                     }
436                     else
437                         seennl |= SEEN_CR;
438                 }
439                 if (i >= len)
440                     break;
441                 if (seennl == SEEN_ALL)
442                     break;
443             }
444         endscan:
445             ;
446         }
447         else {
448             void *translated;
449             int kind = PyUnicode_KIND(output);
450             void *in_str = PyUnicode_DATA(output);
451             Py_ssize_t in, out;
452             /* XXX: Previous in-place translation here is disabled as
453                resizing is not possible anymore */
454             /* We could try to optimize this so that we only do a copy
455                when there is something to translate. On the other hand,
456                we already know there is a \r byte, so chances are high
457                that something needs to be done. */
458             translated = PyMem_Malloc(kind * len);
459             if (translated == NULL) {
460                 PyErr_NoMemory();
461                 goto error;
462             }
463             in = out = 0;
464             for (;;) {
465                 Py_UCS4 c;
466                 /* Fast loop for non-control characters */
467                 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
468                     PyUnicode_WRITE(kind, translated, out++, c);
469                 if (c == '\n') {
470                     PyUnicode_WRITE(kind, translated, out++, c);
471                     seennl |= SEEN_LF;
472                     continue;
473                 }
474                 if (c == '\r') {
475                     if (PyUnicode_READ(kind, in_str, in) == '\n') {
476                         in++;
477                         seennl |= SEEN_CRLF;
478                     }
479                     else
480                         seennl |= SEEN_CR;
481                     PyUnicode_WRITE(kind, translated, out++, '\n');
482                     continue;
483                 }
484                 if (in > len)
485                     break;
486                 PyUnicode_WRITE(kind, translated, out++, c);
487             }
488             Py_DECREF(output);
489             output = PyUnicode_FromKindAndData(kind, translated, out);
490             PyMem_Free(translated);
491             if (!output)
492                 return NULL;
493         }
494         self->seennl |= seennl;
495     }
496 
497     return output;
498 
499   error:
500     Py_DECREF(output);
501     return NULL;
502 }
503 
504 /*[clinic input]
505 _io.IncrementalNewlineDecoder.decode
506     input: object
507     final: bool(accept={int}) = False
508 [clinic start generated code]*/
509 
510 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)511 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
512                                           PyObject *input, int final)
513 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
514 {
515     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
516 }
517 
518 /*[clinic input]
519 _io.IncrementalNewlineDecoder.getstate
520 [clinic start generated code]*/
521 
522 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)523 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
524 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
525 {
526     PyObject *buffer;
527     unsigned long long flag;
528 
529     if (self->decoder != Py_None) {
530         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
531            _PyIO_str_getstate, NULL);
532         if (state == NULL)
533             return NULL;
534         if (!PyTuple_Check(state)) {
535             PyErr_SetString(PyExc_TypeError,
536                             "illegal decoder state");
537             Py_DECREF(state);
538             return NULL;
539         }
540         if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
541                               &buffer, &flag))
542         {
543             Py_DECREF(state);
544             return NULL;
545         }
546         Py_INCREF(buffer);
547         Py_DECREF(state);
548     }
549     else {
550         buffer = PyBytes_FromString("");
551         flag = 0;
552     }
553     flag <<= 1;
554     if (self->pendingcr)
555         flag |= 1;
556     return Py_BuildValue("NK", buffer, flag);
557 }
558 
559 /*[clinic input]
560 _io.IncrementalNewlineDecoder.setstate
561     state: object
562     /
563 [clinic start generated code]*/
564 
565 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)566 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
567                                        PyObject *state)
568 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
569 {
570     PyObject *buffer;
571     unsigned long long flag;
572 
573     if (!PyTuple_Check(state)) {
574         PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
575         return NULL;
576     }
577     if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
578                           &buffer, &flag))
579     {
580         return NULL;
581     }
582 
583     self->pendingcr = (int) (flag & 1);
584     flag >>= 1;
585 
586     if (self->decoder != Py_None)
587         return _PyObject_CallMethodId(self->decoder,
588                                       &PyId_setstate, "((OK))", buffer, flag);
589     else
590         Py_RETURN_NONE;
591 }
592 
593 /*[clinic input]
594 _io.IncrementalNewlineDecoder.reset
595 [clinic start generated code]*/
596 
597 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)598 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
599 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
600 {
601     self->seennl = 0;
602     self->pendingcr = 0;
603     if (self->decoder != Py_None)
604         return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
605     else
606         Py_RETURN_NONE;
607 }
608 
609 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)610 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
611 {
612     switch (self->seennl) {
613     case SEEN_CR:
614         return PyUnicode_FromString("\r");
615     case SEEN_LF:
616         return PyUnicode_FromString("\n");
617     case SEEN_CRLF:
618         return PyUnicode_FromString("\r\n");
619     case SEEN_CR | SEEN_LF:
620         return Py_BuildValue("ss", "\r", "\n");
621     case SEEN_CR | SEEN_CRLF:
622         return Py_BuildValue("ss", "\r", "\r\n");
623     case SEEN_LF | SEEN_CRLF:
624         return Py_BuildValue("ss", "\n", "\r\n");
625     case SEEN_CR | SEEN_LF | SEEN_CRLF:
626         return Py_BuildValue("sss", "\r", "\n", "\r\n");
627     default:
628         Py_RETURN_NONE;
629    }
630 
631 }
632 
633 /* TextIOWrapper */
634 
635 typedef PyObject *
636         (*encodefunc_t)(PyObject *, PyObject *);
637 
638 typedef struct
639 {
640     PyObject_HEAD
641     int ok; /* initialized? */
642     int detached;
643     Py_ssize_t chunk_size;
644     PyObject *buffer;
645     PyObject *encoding;
646     PyObject *encoder;
647     PyObject *decoder;
648     PyObject *readnl;
649     PyObject *errors;
650     const char *writenl; /* ASCII-encoded; NULL stands for \n */
651     char line_buffering;
652     char write_through;
653     char readuniversal;
654     char readtranslate;
655     char writetranslate;
656     char seekable;
657     char has_read1;
658     char telling;
659     char finalizing;
660     /* Specialized encoding func (see below) */
661     encodefunc_t encodefunc;
662     /* Whether or not it's the start of the stream */
663     char encoding_start_of_stream;
664 
665     /* Reads and writes are internally buffered in order to speed things up.
666        However, any read will first flush the write buffer if itsn't empty.
667 
668        Please also note that text to be written is first encoded before being
669        buffered. This is necessary so that encoding errors are immediately
670        reported to the caller, but it unfortunately means that the
671        IncrementalEncoder (whose encode() method is always written in Python)
672        becomes a bottleneck for small writes.
673     */
674     PyObject *decoded_chars;       /* buffer for text returned from decoder */
675     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
676     PyObject *pending_bytes;       // data waiting to be written.
677                                    // ascii unicode, bytes, or list of them.
678     Py_ssize_t pending_bytes_count;
679 
680     /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
681      * dec_flags is the second (integer) item of the decoder state and
682      * next_input is the chunk of input bytes that comes next after the
683      * snapshot point.  We use this to reconstruct decoder states in tell().
684      */
685     PyObject *snapshot;
686     /* Bytes-to-characters ratio for the current chunk. Serves as input for
687        the heuristic in tell(). */
688     double b2cratio;
689 
690     /* Cache raw object if it's a FileIO object */
691     PyObject *raw;
692 
693     PyObject *weakreflist;
694     PyObject *dict;
695 } textio;
696 
697 static void
698 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
699 
700 /* A couple of specialized cases in order to bypass the slow incremental
701    encoding methods for the most popular encodings. */
702 
703 static PyObject *
ascii_encode(textio * self,PyObject * text)704 ascii_encode(textio *self, PyObject *text)
705 {
706     return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
707 }
708 
709 static PyObject *
utf16be_encode(textio * self,PyObject * text)710 utf16be_encode(textio *self, PyObject *text)
711 {
712     return _PyUnicode_EncodeUTF16(text,
713                                   PyUnicode_AsUTF8(self->errors), 1);
714 }
715 
716 static PyObject *
utf16le_encode(textio * self,PyObject * text)717 utf16le_encode(textio *self, PyObject *text)
718 {
719     return _PyUnicode_EncodeUTF16(text,
720                                   PyUnicode_AsUTF8(self->errors), -1);
721 }
722 
723 static PyObject *
utf16_encode(textio * self,PyObject * text)724 utf16_encode(textio *self, PyObject *text)
725 {
726     if (!self->encoding_start_of_stream) {
727         /* Skip the BOM and use native byte ordering */
728 #if PY_BIG_ENDIAN
729         return utf16be_encode(self, text);
730 #else
731         return utf16le_encode(self, text);
732 #endif
733     }
734     return _PyUnicode_EncodeUTF16(text,
735                                   PyUnicode_AsUTF8(self->errors), 0);
736 }
737 
738 static PyObject *
utf32be_encode(textio * self,PyObject * text)739 utf32be_encode(textio *self, PyObject *text)
740 {
741     return _PyUnicode_EncodeUTF32(text,
742                                   PyUnicode_AsUTF8(self->errors), 1);
743 }
744 
745 static PyObject *
utf32le_encode(textio * self,PyObject * text)746 utf32le_encode(textio *self, PyObject *text)
747 {
748     return _PyUnicode_EncodeUTF32(text,
749                                   PyUnicode_AsUTF8(self->errors), -1);
750 }
751 
752 static PyObject *
utf32_encode(textio * self,PyObject * text)753 utf32_encode(textio *self, PyObject *text)
754 {
755     if (!self->encoding_start_of_stream) {
756         /* Skip the BOM and use native byte ordering */
757 #if PY_BIG_ENDIAN
758         return utf32be_encode(self, text);
759 #else
760         return utf32le_encode(self, text);
761 #endif
762     }
763     return _PyUnicode_EncodeUTF32(text,
764                                   PyUnicode_AsUTF8(self->errors), 0);
765 }
766 
767 static PyObject *
utf8_encode(textio * self,PyObject * text)768 utf8_encode(textio *self, PyObject *text)
769 {
770     return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
771 }
772 
773 static PyObject *
latin1_encode(textio * self,PyObject * text)774 latin1_encode(textio *self, PyObject *text)
775 {
776     return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
777 }
778 
779 // Return true when encoding can be skipped when text is ascii.
780 static inline int
is_asciicompat_encoding(encodefunc_t f)781 is_asciicompat_encoding(encodefunc_t f)
782 {
783     return f == (encodefunc_t) ascii_encode
784         || f == (encodefunc_t) latin1_encode
785         || f == (encodefunc_t) utf8_encode;
786 }
787 
788 /* Map normalized encoding names onto the specialized encoding funcs */
789 
790 typedef struct {
791     const char *name;
792     encodefunc_t encodefunc;
793 } encodefuncentry;
794 
795 static const encodefuncentry encodefuncs[] = {
796     {"ascii",       (encodefunc_t) ascii_encode},
797     {"iso8859-1",   (encodefunc_t) latin1_encode},
798     {"utf-8",       (encodefunc_t) utf8_encode},
799     {"utf-16-be",   (encodefunc_t) utf16be_encode},
800     {"utf-16-le",   (encodefunc_t) utf16le_encode},
801     {"utf-16",      (encodefunc_t) utf16_encode},
802     {"utf-32-be",   (encodefunc_t) utf32be_encode},
803     {"utf-32-le",   (encodefunc_t) utf32le_encode},
804     {"utf-32",      (encodefunc_t) utf32_encode},
805     {NULL, NULL}
806 };
807 
808 static int
validate_newline(const char * newline)809 validate_newline(const char *newline)
810 {
811     if (newline && newline[0] != '\0'
812         && !(newline[0] == '\n' && newline[1] == '\0')
813         && !(newline[0] == '\r' && newline[1] == '\0')
814         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
815         PyErr_Format(PyExc_ValueError,
816                      "illegal newline value: %s", newline);
817         return -1;
818     }
819     return 0;
820 }
821 
822 static int
set_newline(textio * self,const char * newline)823 set_newline(textio *self, const char *newline)
824 {
825     PyObject *old = self->readnl;
826     if (newline == NULL) {
827         self->readnl = NULL;
828     }
829     else {
830         self->readnl = PyUnicode_FromString(newline);
831         if (self->readnl == NULL) {
832             self->readnl = old;
833             return -1;
834         }
835     }
836     self->readuniversal = (newline == NULL || newline[0] == '\0');
837     self->readtranslate = (newline == NULL);
838     self->writetranslate = (newline == NULL || newline[0] != '\0');
839     if (!self->readuniversal && self->readnl != NULL) {
840         // validate_newline() accepts only ASCII newlines.
841         assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
842         self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
843         if (strcmp(self->writenl, "\n") == 0) {
844             self->writenl = NULL;
845         }
846     }
847     else {
848 #ifdef MS_WINDOWS
849         self->writenl = "\r\n";
850 #else
851         self->writenl = NULL;
852 #endif
853     }
854     Py_XDECREF(old);
855     return 0;
856 }
857 
858 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)859 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
860                            const char *errors)
861 {
862     PyObject *res;
863     int r;
864 
865     res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
866     if (res == NULL)
867         return -1;
868 
869     r = PyObject_IsTrue(res);
870     Py_DECREF(res);
871     if (r == -1)
872         return -1;
873 
874     if (r != 1)
875         return 0;
876 
877     Py_CLEAR(self->decoder);
878     self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
879     if (self->decoder == NULL)
880         return -1;
881 
882     if (self->readuniversal) {
883         PyObject *incrementalDecoder = PyObject_CallFunction(
884             (PyObject *)&PyIncrementalNewlineDecoder_Type,
885             "Oi", self->decoder, (int)self->readtranslate);
886         if (incrementalDecoder == NULL)
887             return -1;
888         Py_CLEAR(self->decoder);
889         self->decoder = incrementalDecoder;
890     }
891 
892     return 0;
893 }
894 
895 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)896 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
897 {
898     PyObject *chars;
899 
900     if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
901         chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
902     else
903         chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
904                                            eof ? Py_True : Py_False, NULL);
905 
906     if (check_decoded(chars) < 0)
907         // check_decoded already decreases refcount
908         return NULL;
909 
910     return chars;
911 }
912 
913 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)914 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
915                            const char *errors)
916 {
917     PyObject *res;
918     int r;
919 
920     res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
921     if (res == NULL)
922         return -1;
923 
924     r = PyObject_IsTrue(res);
925     Py_DECREF(res);
926     if (r == -1)
927         return -1;
928 
929     if (r != 1)
930         return 0;
931 
932     Py_CLEAR(self->encoder);
933     self->encodefunc = NULL;
934     self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
935     if (self->encoder == NULL)
936         return -1;
937 
938     /* Get the normalized named of the codec */
939     if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
940         return -1;
941     }
942     if (res != NULL && PyUnicode_Check(res)) {
943         const encodefuncentry *e = encodefuncs;
944         while (e->name != NULL) {
945             if (_PyUnicode_EqualToASCIIString(res, e->name)) {
946                 self->encodefunc = e->encodefunc;
947                 break;
948             }
949             e++;
950         }
951     }
952     Py_XDECREF(res);
953 
954     return 0;
955 }
956 
957 static int
_textiowrapper_fix_encoder_state(textio * self)958 _textiowrapper_fix_encoder_state(textio *self)
959 {
960     if (!self->seekable || !self->encoder) {
961         return 0;
962     }
963 
964     self->encoding_start_of_stream = 1;
965 
966     PyObject *cookieObj = PyObject_CallMethodObjArgs(
967         self->buffer, _PyIO_str_tell, NULL);
968     if (cookieObj == NULL) {
969         return -1;
970     }
971 
972     int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
973     Py_DECREF(cookieObj);
974     if (cmp < 0) {
975         return -1;
976     }
977 
978     if (cmp == 0) {
979         self->encoding_start_of_stream = 0;
980         PyObject *res = PyObject_CallMethodObjArgs(
981             self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
982         if (res == NULL) {
983             return -1;
984         }
985         Py_DECREF(res);
986     }
987 
988     return 0;
989 }
990 
991 /*[clinic input]
992 _io.TextIOWrapper.__init__
993     buffer: object
994     encoding: str(accept={str, NoneType}) = None
995     errors: object = None
996     newline: str(accept={str, NoneType}) = None
997     line_buffering: bool(accept={int}) = False
998     write_through: bool(accept={int}) = False
999 
1000 Character and line based layer over a BufferedIOBase object, buffer.
1001 
1002 encoding gives the name of the encoding that the stream will be
1003 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1004 
1005 errors determines the strictness of encoding and decoding (see
1006 help(codecs.Codec) or the documentation for codecs.register) and
1007 defaults to "strict".
1008 
1009 newline controls how line endings are handled. It can be None, '',
1010 '\n', '\r', and '\r\n'.  It works as follows:
1011 
1012 * On input, if newline is None, universal newlines mode is
1013   enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1014   these are translated into '\n' before being returned to the
1015   caller. If it is '', universal newline mode is enabled, but line
1016   endings are returned to the caller untranslated. If it has any of
1017   the other legal values, input lines are only terminated by the given
1018   string, and the line ending is returned to the caller untranslated.
1019 
1020 * On output, if newline is None, any '\n' characters written are
1021   translated to the system default line separator, os.linesep. If
1022   newline is '' or '\n', no translation takes place. If newline is any
1023   of the other legal values, any '\n' characters written are translated
1024   to the given string.
1025 
1026 If line_buffering is True, a call to flush is implied when a call to
1027 write contains a newline character.
1028 [clinic start generated code]*/
1029 
1030 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1031 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1032                                 const char *encoding, PyObject *errors,
1033                                 const char *newline, int line_buffering,
1034                                 int write_through)
1035 /*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
1036 {
1037     PyObject *raw, *codec_info = NULL;
1038     _PyIO_State *state = NULL;
1039     PyObject *res;
1040     int r;
1041 
1042     self->ok = 0;
1043     self->detached = 0;
1044 
1045     if (errors == Py_None) {
1046         errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1047         if (errors == NULL) {
1048             return -1;
1049         }
1050     }
1051     else if (!PyUnicode_Check(errors)) {
1052         // Check 'errors' argument here because Argument Clinic doesn't support
1053         // 'str(accept={str, NoneType})' converter.
1054         PyErr_Format(
1055             PyExc_TypeError,
1056             "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1057             errors->ob_type->tp_name);
1058         return -1;
1059     }
1060 
1061     if (validate_newline(newline) < 0) {
1062         return -1;
1063     }
1064 
1065     Py_CLEAR(self->buffer);
1066     Py_CLEAR(self->encoding);
1067     Py_CLEAR(self->encoder);
1068     Py_CLEAR(self->decoder);
1069     Py_CLEAR(self->readnl);
1070     Py_CLEAR(self->decoded_chars);
1071     Py_CLEAR(self->pending_bytes);
1072     Py_CLEAR(self->snapshot);
1073     Py_CLEAR(self->errors);
1074     Py_CLEAR(self->raw);
1075     self->decoded_chars_used = 0;
1076     self->pending_bytes_count = 0;
1077     self->encodefunc = NULL;
1078     self->b2cratio = 0.0;
1079 
1080     if (encoding == NULL) {
1081         /* Try os.device_encoding(fileno) */
1082         PyObject *fileno;
1083         state = IO_STATE();
1084         if (state == NULL)
1085             goto error;
1086         fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
1087         /* Ignore only AttributeError and UnsupportedOperation */
1088         if (fileno == NULL) {
1089             if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1090                 PyErr_ExceptionMatches(state->unsupported_operation)) {
1091                 PyErr_Clear();
1092             }
1093             else {
1094                 goto error;
1095             }
1096         }
1097         else {
1098             int fd = _PyLong_AsInt(fileno);
1099             Py_DECREF(fileno);
1100             if (fd == -1 && PyErr_Occurred()) {
1101                 goto error;
1102             }
1103 
1104             self->encoding = _Py_device_encoding(fd);
1105             if (self->encoding == NULL)
1106                 goto error;
1107             else if (!PyUnicode_Check(self->encoding))
1108                 Py_CLEAR(self->encoding);
1109         }
1110     }
1111     if (encoding == NULL && self->encoding == NULL) {
1112         PyObject *locale_module = _PyIO_get_locale_module(state);
1113         if (locale_module == NULL)
1114             goto catch_ImportError;
1115         self->encoding = _PyObject_CallMethodIdObjArgs(
1116             locale_module, &PyId_getpreferredencoding, Py_False, NULL);
1117         Py_DECREF(locale_module);
1118         if (self->encoding == NULL) {
1119           catch_ImportError:
1120             /*
1121              Importing locale can raise an ImportError because of
1122              _functools, and locale.getpreferredencoding can raise an
1123              ImportError if _locale is not available.  These will happen
1124              during module building.
1125             */
1126             if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1127                 PyErr_Clear();
1128                 self->encoding = PyUnicode_FromString("ascii");
1129             }
1130             else
1131                 goto error;
1132         }
1133         else if (!PyUnicode_Check(self->encoding))
1134             Py_CLEAR(self->encoding);
1135     }
1136     if (self->encoding != NULL) {
1137         encoding = PyUnicode_AsUTF8(self->encoding);
1138         if (encoding == NULL)
1139             goto error;
1140     }
1141     else if (encoding != NULL) {
1142         self->encoding = PyUnicode_FromString(encoding);
1143         if (self->encoding == NULL)
1144             goto error;
1145     }
1146     else {
1147         PyErr_SetString(PyExc_OSError,
1148                         "could not determine default encoding");
1149         goto error;
1150     }
1151 
1152     /* Check we have been asked for a real text encoding */
1153     codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1154     if (codec_info == NULL) {
1155         Py_CLEAR(self->encoding);
1156         goto error;
1157     }
1158 
1159     /* XXX: Failures beyond this point have the potential to leak elements
1160      * of the partially constructed object (like self->encoding)
1161      */
1162 
1163     Py_INCREF(errors);
1164     self->errors = errors;
1165     self->chunk_size = 8192;
1166     self->line_buffering = line_buffering;
1167     self->write_through = write_through;
1168     if (set_newline(self, newline) < 0) {
1169         goto error;
1170     }
1171 
1172     self->buffer = buffer;
1173     Py_INCREF(buffer);
1174 
1175     /* Build the decoder object */
1176     if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1177         goto error;
1178 
1179     /* Build the encoder object */
1180     if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1181         goto error;
1182 
1183     /* Finished sorting out the codec details */
1184     Py_CLEAR(codec_info);
1185 
1186     if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1187         Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1188         Py_TYPE(buffer) == &PyBufferedRandom_Type)
1189     {
1190         if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1191             goto error;
1192         /* Cache the raw FileIO object to speed up 'closed' checks */
1193         if (raw != NULL) {
1194             if (Py_TYPE(raw) == &PyFileIO_Type)
1195                 self->raw = raw;
1196             else
1197                 Py_DECREF(raw);
1198         }
1199     }
1200 
1201     res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
1202     if (res == NULL)
1203         goto error;
1204     r = PyObject_IsTrue(res);
1205     Py_DECREF(res);
1206     if (r < 0)
1207         goto error;
1208     self->seekable = self->telling = r;
1209 
1210     r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1211     if (r < 0) {
1212         goto error;
1213     }
1214     Py_XDECREF(res);
1215     self->has_read1 = r;
1216 
1217     self->encoding_start_of_stream = 0;
1218     if (_textiowrapper_fix_encoder_state(self) < 0) {
1219         goto error;
1220     }
1221 
1222     self->ok = 1;
1223     return 0;
1224 
1225   error:
1226     Py_XDECREF(codec_info);
1227     return -1;
1228 }
1229 
1230 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1231  * -1 on error.
1232  */
1233 static int
convert_optional_bool(PyObject * obj,int default_value)1234 convert_optional_bool(PyObject *obj, int default_value)
1235 {
1236     long v;
1237     if (obj == Py_None) {
1238         v = default_value;
1239     }
1240     else {
1241         v = PyLong_AsLong(obj);
1242         if (v == -1 && PyErr_Occurred())
1243             return -1;
1244     }
1245     return v != 0;
1246 }
1247 
1248 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1249 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1250                               PyObject *errors, int newline_changed)
1251 {
1252     /* Use existing settings where new settings are not specified */
1253     if (encoding == Py_None && errors == Py_None && !newline_changed) {
1254         return 0;  // no change
1255     }
1256 
1257     if (encoding == Py_None) {
1258         encoding = self->encoding;
1259         if (errors == Py_None) {
1260             errors = self->errors;
1261         }
1262     }
1263     else if (errors == Py_None) {
1264         errors = _PyUnicode_FromId(&PyId_strict);
1265         if (errors == NULL) {
1266             return -1;
1267         }
1268     }
1269 
1270     const char *c_errors = PyUnicode_AsUTF8(errors);
1271     if (c_errors == NULL) {
1272         return -1;
1273     }
1274 
1275     // Create new encoder & decoder
1276     PyObject *codec_info = _PyCodec_LookupTextEncoding(
1277         PyUnicode_AsUTF8(encoding), "codecs.open()");
1278     if (codec_info == NULL) {
1279         return -1;
1280     }
1281     if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1282             _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1283         Py_DECREF(codec_info);
1284         return -1;
1285     }
1286     Py_DECREF(codec_info);
1287 
1288     Py_INCREF(encoding);
1289     Py_INCREF(errors);
1290     Py_SETREF(self->encoding, encoding);
1291     Py_SETREF(self->errors, errors);
1292 
1293     return _textiowrapper_fix_encoder_state(self);
1294 }
1295 
1296 /*[clinic input]
1297 _io.TextIOWrapper.reconfigure
1298     *
1299     encoding: object = None
1300     errors: object = None
1301     newline as newline_obj: object(c_default="NULL") = None
1302     line_buffering as line_buffering_obj: object = None
1303     write_through as write_through_obj: object = None
1304 
1305 Reconfigure the text stream with new parameters.
1306 
1307 This also does an implicit stream flush.
1308 
1309 [clinic start generated code]*/
1310 
1311 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1312 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1313                                    PyObject *errors, PyObject *newline_obj,
1314                                    PyObject *line_buffering_obj,
1315                                    PyObject *write_through_obj)
1316 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1317 {
1318     int line_buffering;
1319     int write_through;
1320     const char *newline = NULL;
1321 
1322     /* Check if something is in the read buffer */
1323     if (self->decoded_chars != NULL) {
1324         if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1325             _unsupported("It is not possible to set the encoding or newline "
1326                          "of stream after the first read");
1327             return NULL;
1328         }
1329     }
1330 
1331     if (newline_obj != NULL && newline_obj != Py_None) {
1332         newline = PyUnicode_AsUTF8(newline_obj);
1333         if (newline == NULL || validate_newline(newline) < 0) {
1334             return NULL;
1335         }
1336     }
1337 
1338     line_buffering = convert_optional_bool(line_buffering_obj,
1339                                            self->line_buffering);
1340     write_through = convert_optional_bool(write_through_obj,
1341                                           self->write_through);
1342     if (line_buffering < 0 || write_through < 0) {
1343         return NULL;
1344     }
1345 
1346     PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1347     if (res == NULL) {
1348         return NULL;
1349     }
1350     Py_DECREF(res);
1351     self->b2cratio = 0;
1352 
1353     if (newline_obj != NULL && set_newline(self, newline) < 0) {
1354         return NULL;
1355     }
1356 
1357     if (textiowrapper_change_encoding(
1358             self, encoding, errors, newline_obj != NULL) < 0) {
1359         return NULL;
1360     }
1361 
1362     self->line_buffering = line_buffering;
1363     self->write_through = write_through;
1364     Py_RETURN_NONE;
1365 }
1366 
1367 static int
textiowrapper_clear(textio * self)1368 textiowrapper_clear(textio *self)
1369 {
1370     self->ok = 0;
1371     Py_CLEAR(self->buffer);
1372     Py_CLEAR(self->encoding);
1373     Py_CLEAR(self->encoder);
1374     Py_CLEAR(self->decoder);
1375     Py_CLEAR(self->readnl);
1376     Py_CLEAR(self->decoded_chars);
1377     Py_CLEAR(self->pending_bytes);
1378     Py_CLEAR(self->snapshot);
1379     Py_CLEAR(self->errors);
1380     Py_CLEAR(self->raw);
1381 
1382     Py_CLEAR(self->dict);
1383     return 0;
1384 }
1385 
1386 static void
textiowrapper_dealloc(textio * self)1387 textiowrapper_dealloc(textio *self)
1388 {
1389     self->finalizing = 1;
1390     if (_PyIOBase_finalize((PyObject *) self) < 0)
1391         return;
1392     self->ok = 0;
1393     _PyObject_GC_UNTRACK(self);
1394     if (self->weakreflist != NULL)
1395         PyObject_ClearWeakRefs((PyObject *)self);
1396     textiowrapper_clear(self);
1397     Py_TYPE(self)->tp_free((PyObject *)self);
1398 }
1399 
1400 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1401 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1402 {
1403     Py_VISIT(self->buffer);
1404     Py_VISIT(self->encoding);
1405     Py_VISIT(self->encoder);
1406     Py_VISIT(self->decoder);
1407     Py_VISIT(self->readnl);
1408     Py_VISIT(self->decoded_chars);
1409     Py_VISIT(self->pending_bytes);
1410     Py_VISIT(self->snapshot);
1411     Py_VISIT(self->errors);
1412     Py_VISIT(self->raw);
1413 
1414     Py_VISIT(self->dict);
1415     return 0;
1416 }
1417 
1418 static PyObject *
1419 textiowrapper_closed_get(textio *self, void *context);
1420 
1421 /* This macro takes some shortcuts to make the common case faster. */
1422 #define CHECK_CLOSED(self) \
1423     do { \
1424         int r; \
1425         PyObject *_res; \
1426         if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1427             if (self->raw != NULL) \
1428                 r = _PyFileIO_closed(self->raw); \
1429             else { \
1430                 _res = textiowrapper_closed_get(self, NULL); \
1431                 if (_res == NULL) \
1432                     return NULL; \
1433                 r = PyObject_IsTrue(_res); \
1434                 Py_DECREF(_res); \
1435                 if (r < 0) \
1436                     return NULL; \
1437             } \
1438             if (r > 0) { \
1439                 PyErr_SetString(PyExc_ValueError, \
1440                                 "I/O operation on closed file."); \
1441                 return NULL; \
1442             } \
1443         } \
1444         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1445             return NULL; \
1446     } while (0)
1447 
1448 #define CHECK_INITIALIZED(self) \
1449     if (self->ok <= 0) { \
1450         PyErr_SetString(PyExc_ValueError, \
1451             "I/O operation on uninitialized object"); \
1452         return NULL; \
1453     }
1454 
1455 #define CHECK_ATTACHED(self) \
1456     CHECK_INITIALIZED(self); \
1457     if (self->detached) { \
1458         PyErr_SetString(PyExc_ValueError, \
1459              "underlying buffer has been detached"); \
1460         return NULL; \
1461     }
1462 
1463 #define CHECK_ATTACHED_INT(self) \
1464     if (self->ok <= 0) { \
1465         PyErr_SetString(PyExc_ValueError, \
1466             "I/O operation on uninitialized object"); \
1467         return -1; \
1468     } else if (self->detached) { \
1469         PyErr_SetString(PyExc_ValueError, \
1470              "underlying buffer has been detached"); \
1471         return -1; \
1472     }
1473 
1474 
1475 /*[clinic input]
1476 _io.TextIOWrapper.detach
1477 [clinic start generated code]*/
1478 
1479 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1480 _io_TextIOWrapper_detach_impl(textio *self)
1481 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1482 {
1483     PyObject *buffer, *res;
1484     CHECK_ATTACHED(self);
1485     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1486     if (res == NULL)
1487         return NULL;
1488     Py_DECREF(res);
1489     buffer = self->buffer;
1490     self->buffer = NULL;
1491     self->detached = 1;
1492     return buffer;
1493 }
1494 
1495 /* Flush the internal write buffer. This doesn't explicitly flush the
1496    underlying buffered object, though. */
1497 static int
_textiowrapper_writeflush(textio * self)1498 _textiowrapper_writeflush(textio *self)
1499 {
1500     if (self->pending_bytes == NULL)
1501         return 0;
1502 
1503     PyObject *pending = self->pending_bytes;
1504     PyObject *b;
1505 
1506     if (PyBytes_Check(pending)) {
1507         b = pending;
1508         Py_INCREF(b);
1509     }
1510     else if (PyUnicode_Check(pending)) {
1511         assert(PyUnicode_IS_ASCII(pending));
1512         assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1513         b = PyBytes_FromStringAndSize(
1514                 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1515         if (b == NULL) {
1516             return -1;
1517         }
1518     }
1519     else {
1520         assert(PyList_Check(pending));
1521         b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1522         if (b == NULL) {
1523             return -1;
1524         }
1525 
1526         char *buf = PyBytes_AsString(b);
1527         Py_ssize_t pos = 0;
1528 
1529         for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1530             PyObject *obj = PyList_GET_ITEM(pending, i);
1531             char *src;
1532             Py_ssize_t len;
1533             if (PyUnicode_Check(obj)) {
1534                 assert(PyUnicode_IS_ASCII(obj));
1535                 src = PyUnicode_DATA(obj);
1536                 len = PyUnicode_GET_LENGTH(obj);
1537             }
1538             else {
1539                 assert(PyBytes_Check(obj));
1540                 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1541                     Py_DECREF(b);
1542                     return -1;
1543                 }
1544             }
1545             memcpy(buf + pos, src, len);
1546             pos += len;
1547         }
1548         assert(pos == self->pending_bytes_count);
1549     }
1550 
1551     self->pending_bytes_count = 0;
1552     self->pending_bytes = NULL;
1553     Py_DECREF(pending);
1554 
1555     PyObject *ret;
1556     do {
1557         ret = PyObject_CallMethodObjArgs(self->buffer,
1558                                          _PyIO_str_write, b, NULL);
1559     } while (ret == NULL && _PyIO_trap_eintr());
1560     Py_DECREF(b);
1561     // NOTE: We cleared buffer but we don't know how many bytes are actually written
1562     // when an error occurred.
1563     if (ret == NULL)
1564         return -1;
1565     Py_DECREF(ret);
1566     return 0;
1567 }
1568 
1569 /*[clinic input]
1570 _io.TextIOWrapper.write
1571     text: unicode
1572     /
1573 [clinic start generated code]*/
1574 
1575 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1576 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1577 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1578 {
1579     PyObject *ret;
1580     PyObject *b;
1581     Py_ssize_t textlen;
1582     int haslf = 0;
1583     int needflush = 0, text_needflush = 0;
1584 
1585     if (PyUnicode_READY(text) == -1)
1586         return NULL;
1587 
1588     CHECK_ATTACHED(self);
1589     CHECK_CLOSED(self);
1590 
1591     if (self->encoder == NULL)
1592         return _unsupported("not writable");
1593 
1594     Py_INCREF(text);
1595 
1596     textlen = PyUnicode_GET_LENGTH(text);
1597 
1598     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1599         if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1600             haslf = 1;
1601 
1602     if (haslf && self->writetranslate && self->writenl != NULL) {
1603         PyObject *newtext = _PyObject_CallMethodId(
1604             text, &PyId_replace, "ss", "\n", self->writenl);
1605         Py_DECREF(text);
1606         if (newtext == NULL)
1607             return NULL;
1608         text = newtext;
1609     }
1610 
1611     if (self->write_through)
1612         text_needflush = 1;
1613     if (self->line_buffering &&
1614         (haslf ||
1615          PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1616         needflush = 1;
1617 
1618     /* XXX What if we were just reading? */
1619     if (self->encodefunc != NULL) {
1620         if (PyUnicode_IS_ASCII(text) &&
1621                 // See bpo-43260
1622                 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1623                 is_asciicompat_encoding(self->encodefunc)) {
1624             b = text;
1625             Py_INCREF(b);
1626         }
1627         else {
1628             b = (*self->encodefunc)((PyObject *) self, text);
1629         }
1630         self->encoding_start_of_stream = 0;
1631     }
1632     else {
1633         b = PyObject_CallMethodObjArgs(self->encoder,
1634                                        _PyIO_str_encode, text, NULL);
1635     }
1636 
1637     Py_DECREF(text);
1638     if (b == NULL)
1639         return NULL;
1640     if (b != text && !PyBytes_Check(b)) {
1641         PyErr_Format(PyExc_TypeError,
1642                      "encoder should return a bytes object, not '%.200s'",
1643                      Py_TYPE(b)->tp_name);
1644         Py_DECREF(b);
1645         return NULL;
1646     }
1647 
1648     Py_ssize_t bytes_len;
1649     if (b == text) {
1650         bytes_len = PyUnicode_GET_LENGTH(b);
1651     }
1652     else {
1653         bytes_len = PyBytes_GET_SIZE(b);
1654     }
1655 
1656     if (self->pending_bytes == NULL) {
1657         self->pending_bytes_count = 0;
1658         self->pending_bytes = b;
1659     }
1660     else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1661         // Prevent to concatenate more than chunk_size data.
1662         if (_textiowrapper_writeflush(self) < 0) {
1663             Py_DECREF(b);
1664             return NULL;
1665         }
1666         self->pending_bytes = b;
1667     }
1668     else if (!PyList_CheckExact(self->pending_bytes)) {
1669         PyObject *list = PyList_New(2);
1670         if (list == NULL) {
1671             Py_DECREF(b);
1672             return NULL;
1673         }
1674         PyList_SET_ITEM(list, 0, self->pending_bytes);
1675         PyList_SET_ITEM(list, 1, b);
1676         self->pending_bytes = list;
1677     }
1678     else {
1679         if (PyList_Append(self->pending_bytes, b) < 0) {
1680             Py_DECREF(b);
1681             return NULL;
1682         }
1683         Py_DECREF(b);
1684     }
1685 
1686     self->pending_bytes_count += bytes_len;
1687     if (self->pending_bytes_count >= self->chunk_size || needflush ||
1688         text_needflush) {
1689         if (_textiowrapper_writeflush(self) < 0)
1690             return NULL;
1691     }
1692 
1693     if (needflush) {
1694         ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1695         if (ret == NULL)
1696             return NULL;
1697         Py_DECREF(ret);
1698     }
1699 
1700     textiowrapper_set_decoded_chars(self, NULL);
1701     Py_CLEAR(self->snapshot);
1702 
1703     if (self->decoder) {
1704         ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
1705         if (ret == NULL)
1706             return NULL;
1707         Py_DECREF(ret);
1708     }
1709 
1710     return PyLong_FromSsize_t(textlen);
1711 }
1712 
1713 /* Steal a reference to chars and store it in the decoded_char buffer;
1714  */
1715 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1716 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1717 {
1718     Py_XSETREF(self->decoded_chars, chars);
1719     self->decoded_chars_used = 0;
1720 }
1721 
1722 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1723 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1724 {
1725     PyObject *chars;
1726     Py_ssize_t avail;
1727 
1728     if (self->decoded_chars == NULL)
1729         return PyUnicode_FromStringAndSize(NULL, 0);
1730 
1731     /* decoded_chars is guaranteed to be "ready". */
1732     avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1733              - self->decoded_chars_used);
1734 
1735     assert(avail >= 0);
1736 
1737     if (n < 0 || n > avail)
1738         n = avail;
1739 
1740     if (self->decoded_chars_used > 0 || n < avail) {
1741         chars = PyUnicode_Substring(self->decoded_chars,
1742                                     self->decoded_chars_used,
1743                                     self->decoded_chars_used + n);
1744         if (chars == NULL)
1745             return NULL;
1746     }
1747     else {
1748         chars = self->decoded_chars;
1749         Py_INCREF(chars);
1750     }
1751 
1752     self->decoded_chars_used += n;
1753     return chars;
1754 }
1755 
1756 /* Read and decode the next chunk of data from the BufferedReader.
1757  */
1758 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1759 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1760 {
1761     PyObject *dec_buffer = NULL;
1762     PyObject *dec_flags = NULL;
1763     PyObject *input_chunk = NULL;
1764     Py_buffer input_chunk_buf;
1765     PyObject *decoded_chars, *chunk_size;
1766     Py_ssize_t nbytes, nchars;
1767     int eof;
1768 
1769     /* The return value is True unless EOF was reached.  The decoded string is
1770      * placed in self._decoded_chars (replacing its previous value).  The
1771      * entire input chunk is sent to the decoder, though some of it may remain
1772      * buffered in the decoder, yet to be converted.
1773      */
1774 
1775     if (self->decoder == NULL) {
1776         _unsupported("not readable");
1777         return -1;
1778     }
1779 
1780     if (self->telling) {
1781         /* To prepare for tell(), we need to snapshot a point in the file
1782          * where the decoder's input buffer is empty.
1783          */
1784 
1785         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1786                                                      _PyIO_str_getstate, NULL);
1787         if (state == NULL)
1788             return -1;
1789         /* Given this, we know there was a valid snapshot point
1790          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1791          */
1792         if (!PyTuple_Check(state)) {
1793             PyErr_SetString(PyExc_TypeError,
1794                             "illegal decoder state");
1795             Py_DECREF(state);
1796             return -1;
1797         }
1798         if (!PyArg_ParseTuple(state,
1799                               "OO;illegal decoder state", &dec_buffer, &dec_flags))
1800         {
1801             Py_DECREF(state);
1802             return -1;
1803         }
1804 
1805         if (!PyBytes_Check(dec_buffer)) {
1806             PyErr_Format(PyExc_TypeError,
1807                          "illegal decoder state: the first item should be a "
1808                          "bytes object, not '%.200s'",
1809                          Py_TYPE(dec_buffer)->tp_name);
1810             Py_DECREF(state);
1811             return -1;
1812         }
1813         Py_INCREF(dec_buffer);
1814         Py_INCREF(dec_flags);
1815         Py_DECREF(state);
1816     }
1817 
1818     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1819     if (size_hint > 0) {
1820         size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1821     }
1822     chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1823     if (chunk_size == NULL)
1824         goto fail;
1825 
1826     input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1827         (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1828         chunk_size, NULL);
1829     Py_DECREF(chunk_size);
1830     if (input_chunk == NULL)
1831         goto fail;
1832 
1833     if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1834         PyErr_Format(PyExc_TypeError,
1835                      "underlying %s() should have returned a bytes-like object, "
1836                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
1837                      Py_TYPE(input_chunk)->tp_name);
1838         goto fail;
1839     }
1840 
1841     nbytes = input_chunk_buf.len;
1842     eof = (nbytes == 0);
1843 
1844     decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1845     PyBuffer_Release(&input_chunk_buf);
1846     if (decoded_chars == NULL)
1847         goto fail;
1848 
1849     textiowrapper_set_decoded_chars(self, decoded_chars);
1850     nchars = PyUnicode_GET_LENGTH(decoded_chars);
1851     if (nchars > 0)
1852         self->b2cratio = (double) nbytes / nchars;
1853     else
1854         self->b2cratio = 0.0;
1855     if (nchars > 0)
1856         eof = 0;
1857 
1858     if (self->telling) {
1859         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1860          * next input to be decoded is dec_buffer + input_chunk.
1861          */
1862         PyObject *next_input = dec_buffer;
1863         PyBytes_Concat(&next_input, input_chunk);
1864         dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1865         if (next_input == NULL) {
1866             goto fail;
1867         }
1868         PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1869         if (snapshot == NULL) {
1870             dec_flags = NULL;
1871             goto fail;
1872         }
1873         Py_XSETREF(self->snapshot, snapshot);
1874     }
1875     Py_DECREF(input_chunk);
1876 
1877     return (eof == 0);
1878 
1879   fail:
1880     Py_XDECREF(dec_buffer);
1881     Py_XDECREF(dec_flags);
1882     Py_XDECREF(input_chunk);
1883     return -1;
1884 }
1885 
1886 /*[clinic input]
1887 _io.TextIOWrapper.read
1888     size as n: Py_ssize_t(accept={int, NoneType}) = -1
1889     /
1890 [clinic start generated code]*/
1891 
1892 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1893 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1894 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1895 {
1896     PyObject *result = NULL, *chunks = NULL;
1897 
1898     CHECK_ATTACHED(self);
1899     CHECK_CLOSED(self);
1900 
1901     if (self->decoder == NULL)
1902         return _unsupported("not readable");
1903 
1904     if (_textiowrapper_writeflush(self) < 0)
1905         return NULL;
1906 
1907     if (n < 0) {
1908         /* Read everything */
1909         PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
1910         PyObject *decoded;
1911         if (bytes == NULL)
1912             goto fail;
1913 
1914         if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1915             decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1916                                                           bytes, 1);
1917         else
1918             decoded = PyObject_CallMethodObjArgs(
1919                 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1920         Py_DECREF(bytes);
1921         if (check_decoded(decoded) < 0)
1922             goto fail;
1923 
1924         result = textiowrapper_get_decoded_chars(self, -1);
1925 
1926         if (result == NULL) {
1927             Py_DECREF(decoded);
1928             return NULL;
1929         }
1930 
1931         PyUnicode_AppendAndDel(&result, decoded);
1932         if (result == NULL)
1933             goto fail;
1934 
1935         textiowrapper_set_decoded_chars(self, NULL);
1936         Py_CLEAR(self->snapshot);
1937         return result;
1938     }
1939     else {
1940         int res = 1;
1941         Py_ssize_t remaining = n;
1942 
1943         result = textiowrapper_get_decoded_chars(self, n);
1944         if (result == NULL)
1945             goto fail;
1946         if (PyUnicode_READY(result) == -1)
1947             goto fail;
1948         remaining -= PyUnicode_GET_LENGTH(result);
1949 
1950         /* Keep reading chunks until we have n characters to return */
1951         while (remaining > 0) {
1952             res = textiowrapper_read_chunk(self, remaining);
1953             if (res < 0) {
1954                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1955                    when EINTR occurs so we needn't do it ourselves. */
1956                 if (_PyIO_trap_eintr()) {
1957                     continue;
1958                 }
1959                 goto fail;
1960             }
1961             if (res == 0)  /* EOF */
1962                 break;
1963             if (chunks == NULL) {
1964                 chunks = PyList_New(0);
1965                 if (chunks == NULL)
1966                     goto fail;
1967             }
1968             if (PyUnicode_GET_LENGTH(result) > 0 &&
1969                 PyList_Append(chunks, result) < 0)
1970                 goto fail;
1971             Py_DECREF(result);
1972             result = textiowrapper_get_decoded_chars(self, remaining);
1973             if (result == NULL)
1974                 goto fail;
1975             remaining -= PyUnicode_GET_LENGTH(result);
1976         }
1977         if (chunks != NULL) {
1978             if (result != NULL && PyList_Append(chunks, result) < 0)
1979                 goto fail;
1980             Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1981             if (result == NULL)
1982                 goto fail;
1983             Py_CLEAR(chunks);
1984         }
1985         return result;
1986     }
1987   fail:
1988     Py_XDECREF(result);
1989     Py_XDECREF(chunks);
1990     return NULL;
1991 }
1992 
1993 
1994 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
1995    that is to the NUL character. Otherwise the function will produce
1996    incorrect results. */
1997 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)1998 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
1999 {
2000     if (kind == PyUnicode_1BYTE_KIND) {
2001         assert(ch < 256);
2002         return (char *) memchr((const void *) s, (char) ch, end - s);
2003     }
2004     for (;;) {
2005         while (PyUnicode_READ(kind, s, 0) > ch)
2006             s += kind;
2007         if (PyUnicode_READ(kind, s, 0) == ch)
2008             return s;
2009         if (s == end)
2010             return NULL;
2011         s += kind;
2012     }
2013 }
2014 
2015 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2016 _PyIO_find_line_ending(
2017     int translated, int universal, PyObject *readnl,
2018     int kind, const char *start, const char *end, Py_ssize_t *consumed)
2019 {
2020     Py_ssize_t len = (end - start)/kind;
2021 
2022     if (translated) {
2023         /* Newlines are already translated, only search for \n */
2024         const char *pos = find_control_char(kind, start, end, '\n');
2025         if (pos != NULL)
2026             return (pos - start)/kind + 1;
2027         else {
2028             *consumed = len;
2029             return -1;
2030         }
2031     }
2032     else if (universal) {
2033         /* Universal newline search. Find any of \r, \r\n, \n
2034          * The decoder ensures that \r\n are not split in two pieces
2035          */
2036         const char *s = start;
2037         for (;;) {
2038             Py_UCS4 ch;
2039             /* Fast path for non-control chars. The loop always ends
2040                since the Unicode string is NUL-terminated. */
2041             while (PyUnicode_READ(kind, s, 0) > '\r')
2042                 s += kind;
2043             if (s >= end) {
2044                 *consumed = len;
2045                 return -1;
2046             }
2047             ch = PyUnicode_READ(kind, s, 0);
2048             s += kind;
2049             if (ch == '\n')
2050                 return (s - start)/kind;
2051             if (ch == '\r') {
2052                 if (PyUnicode_READ(kind, s, 0) == '\n')
2053                     return (s - start)/kind + 1;
2054                 else
2055                     return (s - start)/kind;
2056             }
2057         }
2058     }
2059     else {
2060         /* Non-universal mode. */
2061         Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2062         Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2063         /* Assume that readnl is an ASCII character. */
2064         assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2065         if (readnl_len == 1) {
2066             const char *pos = find_control_char(kind, start, end, nl[0]);
2067             if (pos != NULL)
2068                 return (pos - start)/kind + 1;
2069             *consumed = len;
2070             return -1;
2071         }
2072         else {
2073             const char *s = start;
2074             const char *e = end - (readnl_len - 1)*kind;
2075             const char *pos;
2076             if (e < s)
2077                 e = s;
2078             while (s < e) {
2079                 Py_ssize_t i;
2080                 const char *pos = find_control_char(kind, s, end, nl[0]);
2081                 if (pos == NULL || pos >= e)
2082                     break;
2083                 for (i = 1; i < readnl_len; i++) {
2084                     if (PyUnicode_READ(kind, pos, i) != nl[i])
2085                         break;
2086                 }
2087                 if (i == readnl_len)
2088                     return (pos - start)/kind + readnl_len;
2089                 s = pos + kind;
2090             }
2091             pos = find_control_char(kind, e, end, nl[0]);
2092             if (pos == NULL)
2093                 *consumed = len;
2094             else
2095                 *consumed = (pos - start)/kind;
2096             return -1;
2097         }
2098     }
2099 }
2100 
2101 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2102 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2103 {
2104     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2105     Py_ssize_t start, endpos, chunked, offset_to_buffer;
2106     int res;
2107 
2108     CHECK_CLOSED(self);
2109 
2110     if (_textiowrapper_writeflush(self) < 0)
2111         return NULL;
2112 
2113     chunked = 0;
2114 
2115     while (1) {
2116         char *ptr;
2117         Py_ssize_t line_len;
2118         int kind;
2119         Py_ssize_t consumed = 0;
2120 
2121         /* First, get some data if necessary */
2122         res = 1;
2123         while (!self->decoded_chars ||
2124                !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2125             res = textiowrapper_read_chunk(self, 0);
2126             if (res < 0) {
2127                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2128                    when EINTR occurs so we needn't do it ourselves. */
2129                 if (_PyIO_trap_eintr()) {
2130                     continue;
2131                 }
2132                 goto error;
2133             }
2134             if (res == 0)
2135                 break;
2136         }
2137         if (res == 0) {
2138             /* end of file */
2139             textiowrapper_set_decoded_chars(self, NULL);
2140             Py_CLEAR(self->snapshot);
2141             start = endpos = offset_to_buffer = 0;
2142             break;
2143         }
2144 
2145         if (remaining == NULL) {
2146             line = self->decoded_chars;
2147             start = self->decoded_chars_used;
2148             offset_to_buffer = 0;
2149             Py_INCREF(line);
2150         }
2151         else {
2152             assert(self->decoded_chars_used == 0);
2153             line = PyUnicode_Concat(remaining, self->decoded_chars);
2154             start = 0;
2155             offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2156             Py_CLEAR(remaining);
2157             if (line == NULL)
2158                 goto error;
2159             if (PyUnicode_READY(line) == -1)
2160                 goto error;
2161         }
2162 
2163         ptr = PyUnicode_DATA(line);
2164         line_len = PyUnicode_GET_LENGTH(line);
2165         kind = PyUnicode_KIND(line);
2166 
2167         endpos = _PyIO_find_line_ending(
2168             self->readtranslate, self->readuniversal, self->readnl,
2169             kind,
2170             ptr + kind * start,
2171             ptr + kind * line_len,
2172             &consumed);
2173         if (endpos >= 0) {
2174             endpos += start;
2175             if (limit >= 0 && (endpos - start) + chunked >= limit)
2176                 endpos = start + limit - chunked;
2177             break;
2178         }
2179 
2180         /* We can put aside up to `endpos` */
2181         endpos = consumed + start;
2182         if (limit >= 0 && (endpos - start) + chunked >= limit) {
2183             /* Didn't find line ending, but reached length limit */
2184             endpos = start + limit - chunked;
2185             break;
2186         }
2187 
2188         if (endpos > start) {
2189             /* No line ending seen yet - put aside current data */
2190             PyObject *s;
2191             if (chunks == NULL) {
2192                 chunks = PyList_New(0);
2193                 if (chunks == NULL)
2194                     goto error;
2195             }
2196             s = PyUnicode_Substring(line, start, endpos);
2197             if (s == NULL)
2198                 goto error;
2199             if (PyList_Append(chunks, s) < 0) {
2200                 Py_DECREF(s);
2201                 goto error;
2202             }
2203             chunked += PyUnicode_GET_LENGTH(s);
2204             Py_DECREF(s);
2205         }
2206         /* There may be some remaining bytes we'll have to prepend to the
2207            next chunk of data */
2208         if (endpos < line_len) {
2209             remaining = PyUnicode_Substring(line, endpos, line_len);
2210             if (remaining == NULL)
2211                 goto error;
2212         }
2213         Py_CLEAR(line);
2214         /* We have consumed the buffer */
2215         textiowrapper_set_decoded_chars(self, NULL);
2216     }
2217 
2218     if (line != NULL) {
2219         /* Our line ends in the current buffer */
2220         self->decoded_chars_used = endpos - offset_to_buffer;
2221         if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2222             PyObject *s = PyUnicode_Substring(line, start, endpos);
2223             Py_CLEAR(line);
2224             if (s == NULL)
2225                 goto error;
2226             line = s;
2227         }
2228     }
2229     if (remaining != NULL) {
2230         if (chunks == NULL) {
2231             chunks = PyList_New(0);
2232             if (chunks == NULL)
2233                 goto error;
2234         }
2235         if (PyList_Append(chunks, remaining) < 0)
2236             goto error;
2237         Py_CLEAR(remaining);
2238     }
2239     if (chunks != NULL) {
2240         if (line != NULL) {
2241             if (PyList_Append(chunks, line) < 0)
2242                 goto error;
2243             Py_DECREF(line);
2244         }
2245         line = PyUnicode_Join(_PyIO_empty_str, chunks);
2246         if (line == NULL)
2247             goto error;
2248         Py_CLEAR(chunks);
2249     }
2250     if (line == NULL) {
2251         Py_INCREF(_PyIO_empty_str);
2252         line = _PyIO_empty_str;
2253     }
2254 
2255     return line;
2256 
2257   error:
2258     Py_XDECREF(chunks);
2259     Py_XDECREF(remaining);
2260     Py_XDECREF(line);
2261     return NULL;
2262 }
2263 
2264 /*[clinic input]
2265 _io.TextIOWrapper.readline
2266     size: Py_ssize_t = -1
2267     /
2268 [clinic start generated code]*/
2269 
2270 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2271 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2272 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2273 {
2274     CHECK_ATTACHED(self);
2275     return _textiowrapper_readline(self, size);
2276 }
2277 
2278 /* Seek and Tell */
2279 
2280 typedef struct {
2281     Py_off_t start_pos;
2282     int dec_flags;
2283     int bytes_to_feed;
2284     int chars_to_skip;
2285     char need_eof;
2286 } cookie_type;
2287 
2288 /*
2289    To speed up cookie packing/unpacking, we store the fields in a temporary
2290    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2291    The following macros define at which offsets in the intermediary byte
2292    string the various CookieStruct fields will be stored.
2293  */
2294 
2295 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2296 
2297 #if PY_BIG_ENDIAN
2298 /* We want the least significant byte of start_pos to also be the least
2299    significant byte of the cookie, which means that in big-endian mode we
2300    must copy the fields in reverse order. */
2301 
2302 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2303 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2304 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2305 # define OFF_CHARS_TO_SKIP  (sizeof(char))
2306 # define OFF_NEED_EOF       0
2307 
2308 #else
2309 /* Little-endian mode: the least significant byte of start_pos will
2310    naturally end up the least significant byte of the cookie. */
2311 
2312 # define OFF_START_POS      0
2313 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2314 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2315 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2316 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2317 
2318 #endif
2319 
2320 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2321 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2322 {
2323     unsigned char buffer[COOKIE_BUF_LEN];
2324     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2325     if (cookieLong == NULL)
2326         return -1;
2327 
2328     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2329                             PY_LITTLE_ENDIAN, 0) < 0) {
2330         Py_DECREF(cookieLong);
2331         return -1;
2332     }
2333     Py_DECREF(cookieLong);
2334 
2335     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2336     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2337     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2338     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2339     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2340 
2341     return 0;
2342 }
2343 
2344 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2345 textiowrapper_build_cookie(cookie_type *cookie)
2346 {
2347     unsigned char buffer[COOKIE_BUF_LEN];
2348 
2349     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2350     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2351     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2352     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2353     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2354 
2355     return _PyLong_FromByteArray(buffer, sizeof(buffer),
2356                                  PY_LITTLE_ENDIAN, 0);
2357 }
2358 
2359 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2360 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2361 {
2362     PyObject *res;
2363     /* When seeking to the start of the stream, we call decoder.reset()
2364        rather than decoder.getstate().
2365        This is for a few decoders such as utf-16 for which the state value
2366        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2367        utf-16, that we are expecting a BOM).
2368     */
2369     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2370         res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2371     else
2372         res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2373                                      "((yi))", "", cookie->dec_flags);
2374     if (res == NULL)
2375         return -1;
2376     Py_DECREF(res);
2377     return 0;
2378 }
2379 
2380 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2381 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2382 {
2383     PyObject *res;
2384     if (start_of_stream) {
2385         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2386         self->encoding_start_of_stream = 1;
2387     }
2388     else {
2389         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2390                                          _PyLong_Zero, NULL);
2391         self->encoding_start_of_stream = 0;
2392     }
2393     if (res == NULL)
2394         return -1;
2395     Py_DECREF(res);
2396     return 0;
2397 }
2398 
2399 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2400 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2401 {
2402     /* Same as _textiowrapper_decoder_setstate() above. */
2403     return _textiowrapper_encoder_reset(
2404         self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2405 }
2406 
2407 /*[clinic input]
2408 _io.TextIOWrapper.seek
2409     cookie as cookieObj: object
2410     whence: int = 0
2411     /
2412 [clinic start generated code]*/
2413 
2414 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2415 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2416 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2417 {
2418     PyObject *posobj;
2419     cookie_type cookie;
2420     PyObject *res;
2421     int cmp;
2422     PyObject *snapshot;
2423 
2424     CHECK_ATTACHED(self);
2425     CHECK_CLOSED(self);
2426 
2427     Py_INCREF(cookieObj);
2428 
2429     if (!self->seekable) {
2430         _unsupported("underlying stream is not seekable");
2431         goto fail;
2432     }
2433 
2434     switch (whence) {
2435     case SEEK_CUR:
2436         /* seek relative to current position */
2437         cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2438         if (cmp < 0)
2439             goto fail;
2440 
2441         if (cmp == 0) {
2442             _unsupported("can't do nonzero cur-relative seeks");
2443             goto fail;
2444         }
2445 
2446         /* Seeking to the current position should attempt to
2447          * sync the underlying buffer with the current position.
2448          */
2449         Py_DECREF(cookieObj);
2450         cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
2451         if (cookieObj == NULL)
2452             goto fail;
2453         break;
2454 
2455     case SEEK_END:
2456         /* seek relative to end of file */
2457         cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2458         if (cmp < 0)
2459             goto fail;
2460 
2461         if (cmp == 0) {
2462             _unsupported("can't do nonzero end-relative seeks");
2463             goto fail;
2464         }
2465 
2466         res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2467         if (res == NULL)
2468             goto fail;
2469         Py_DECREF(res);
2470 
2471         textiowrapper_set_decoded_chars(self, NULL);
2472         Py_CLEAR(self->snapshot);
2473         if (self->decoder) {
2474             res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
2475             if (res == NULL)
2476                 goto fail;
2477             Py_DECREF(res);
2478         }
2479 
2480         res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2481         Py_CLEAR(cookieObj);
2482         if (res == NULL)
2483             goto fail;
2484         if (self->encoder) {
2485             /* If seek() == 0, we are at the start of stream, otherwise not */
2486             cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
2487             if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2488                 Py_DECREF(res);
2489                 goto fail;
2490             }
2491         }
2492         return res;
2493 
2494     case SEEK_SET:
2495         break;
2496 
2497     default:
2498         PyErr_Format(PyExc_ValueError,
2499                      "invalid whence (%d, should be %d, %d or %d)", whence,
2500                      SEEK_SET, SEEK_CUR, SEEK_END);
2501         goto fail;
2502     }
2503 
2504     cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
2505     if (cmp < 0)
2506         goto fail;
2507 
2508     if (cmp == 1) {
2509         PyErr_Format(PyExc_ValueError,
2510                      "negative seek position %R", cookieObj);
2511         goto fail;
2512     }
2513 
2514     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2515     if (res == NULL)
2516         goto fail;
2517     Py_DECREF(res);
2518 
2519     /* The strategy of seek() is to go back to the safe start point
2520      * and replay the effect of read(chars_to_skip) from there.
2521      */
2522     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2523         goto fail;
2524 
2525     /* Seek back to the safe start point. */
2526     posobj = PyLong_FromOff_t(cookie.start_pos);
2527     if (posobj == NULL)
2528         goto fail;
2529     res = PyObject_CallMethodObjArgs(self->buffer,
2530                                      _PyIO_str_seek, posobj, NULL);
2531     Py_DECREF(posobj);
2532     if (res == NULL)
2533         goto fail;
2534     Py_DECREF(res);
2535 
2536     textiowrapper_set_decoded_chars(self, NULL);
2537     Py_CLEAR(self->snapshot);
2538 
2539     /* Restore the decoder to its state from the safe start point. */
2540     if (self->decoder) {
2541         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2542             goto fail;
2543     }
2544 
2545     if (cookie.chars_to_skip) {
2546         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2547         PyObject *input_chunk = _PyObject_CallMethodId(
2548             self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2549         PyObject *decoded;
2550 
2551         if (input_chunk == NULL)
2552             goto fail;
2553 
2554         if (!PyBytes_Check(input_chunk)) {
2555             PyErr_Format(PyExc_TypeError,
2556                          "underlying read() should have returned a bytes "
2557                          "object, not '%.200s'",
2558                          Py_TYPE(input_chunk)->tp_name);
2559             Py_DECREF(input_chunk);
2560             goto fail;
2561         }
2562 
2563         snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2564         if (snapshot == NULL) {
2565             goto fail;
2566         }
2567         Py_XSETREF(self->snapshot, snapshot);
2568 
2569         decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2570             "Oi", input_chunk, (int)cookie.need_eof);
2571 
2572         if (check_decoded(decoded) < 0)
2573             goto fail;
2574 
2575         textiowrapper_set_decoded_chars(self, decoded);
2576 
2577         /* Skip chars_to_skip of the decoded characters. */
2578         if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2579             PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2580             goto fail;
2581         }
2582         self->decoded_chars_used = cookie.chars_to_skip;
2583     }
2584     else {
2585         snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2586         if (snapshot == NULL)
2587             goto fail;
2588         Py_XSETREF(self->snapshot, snapshot);
2589     }
2590 
2591     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2592     if (self->encoder) {
2593         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2594             goto fail;
2595     }
2596     return cookieObj;
2597   fail:
2598     Py_XDECREF(cookieObj);
2599     return NULL;
2600 
2601 }
2602 
2603 /*[clinic input]
2604 _io.TextIOWrapper.tell
2605 [clinic start generated code]*/
2606 
2607 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2608 _io_TextIOWrapper_tell_impl(textio *self)
2609 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2610 {
2611     PyObject *res;
2612     PyObject *posobj = NULL;
2613     cookie_type cookie = {0,0,0,0,0};
2614     PyObject *next_input;
2615     Py_ssize_t chars_to_skip, chars_decoded;
2616     Py_ssize_t skip_bytes, skip_back;
2617     PyObject *saved_state = NULL;
2618     char *input, *input_end;
2619     Py_ssize_t dec_buffer_len;
2620     int dec_flags;
2621 
2622     CHECK_ATTACHED(self);
2623     CHECK_CLOSED(self);
2624 
2625     if (!self->seekable) {
2626         _unsupported("underlying stream is not seekable");
2627         goto fail;
2628     }
2629     if (!self->telling) {
2630         PyErr_SetString(PyExc_OSError,
2631                         "telling position disabled by next() call");
2632         goto fail;
2633     }
2634 
2635     if (_textiowrapper_writeflush(self) < 0)
2636         return NULL;
2637     res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2638     if (res == NULL)
2639         goto fail;
2640     Py_DECREF(res);
2641 
2642     posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
2643     if (posobj == NULL)
2644         goto fail;
2645 
2646     if (self->decoder == NULL || self->snapshot == NULL) {
2647         assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2648         return posobj;
2649     }
2650 
2651 #if defined(HAVE_LARGEFILE_SUPPORT)
2652     cookie.start_pos = PyLong_AsLongLong(posobj);
2653 #else
2654     cookie.start_pos = PyLong_AsLong(posobj);
2655 #endif
2656     Py_DECREF(posobj);
2657     if (PyErr_Occurred())
2658         goto fail;
2659 
2660     /* Skip backward to the snapshot point (see _read_chunk). */
2661     assert(PyTuple_Check(self->snapshot));
2662     if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2663         goto fail;
2664 
2665     assert (PyBytes_Check(next_input));
2666 
2667     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2668 
2669     /* How many decoded characters have been used up since the snapshot? */
2670     if (self->decoded_chars_used == 0)  {
2671         /* We haven't moved from the snapshot point. */
2672         return textiowrapper_build_cookie(&cookie);
2673     }
2674 
2675     chars_to_skip = self->decoded_chars_used;
2676 
2677     /* Decoder state will be restored at the end */
2678     saved_state = PyObject_CallMethodObjArgs(self->decoder,
2679                                              _PyIO_str_getstate, NULL);
2680     if (saved_state == NULL)
2681         goto fail;
2682 
2683 #define DECODER_GETSTATE() do { \
2684         PyObject *dec_buffer; \
2685         PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2686             _PyIO_str_getstate, NULL); \
2687         if (_state == NULL) \
2688             goto fail; \
2689         if (!PyTuple_Check(_state)) { \
2690             PyErr_SetString(PyExc_TypeError, \
2691                             "illegal decoder state"); \
2692             Py_DECREF(_state); \
2693             goto fail; \
2694         } \
2695         if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2696                               &dec_buffer, &dec_flags)) \
2697         { \
2698             Py_DECREF(_state); \
2699             goto fail; \
2700         } \
2701         if (!PyBytes_Check(dec_buffer)) { \
2702             PyErr_Format(PyExc_TypeError, \
2703                          "illegal decoder state: the first item should be a " \
2704                          "bytes object, not '%.200s'", \
2705                          Py_TYPE(dec_buffer)->tp_name); \
2706             Py_DECREF(_state); \
2707             goto fail; \
2708         } \
2709         dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2710         Py_DECREF(_state); \
2711     } while (0)
2712 
2713 #define DECODER_DECODE(start, len, res) do { \
2714         PyObject *_decoded = _PyObject_CallMethodId( \
2715             self->decoder, &PyId_decode, "y#", start, len); \
2716         if (check_decoded(_decoded) < 0) \
2717             goto fail; \
2718         res = PyUnicode_GET_LENGTH(_decoded); \
2719         Py_DECREF(_decoded); \
2720     } while (0)
2721 
2722     /* Fast search for an acceptable start point, close to our
2723        current pos */
2724     skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2725     skip_back = 1;
2726     assert(skip_back <= PyBytes_GET_SIZE(next_input));
2727     input = PyBytes_AS_STRING(next_input);
2728     while (skip_bytes > 0) {
2729         /* Decode up to temptative start point */
2730         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2731             goto fail;
2732         DECODER_DECODE(input, skip_bytes, chars_decoded);
2733         if (chars_decoded <= chars_to_skip) {
2734             DECODER_GETSTATE();
2735             if (dec_buffer_len == 0) {
2736                 /* Before pos and no bytes buffered in decoder => OK */
2737                 cookie.dec_flags = dec_flags;
2738                 chars_to_skip -= chars_decoded;
2739                 break;
2740             }
2741             /* Skip back by buffered amount and reset heuristic */
2742             skip_bytes -= dec_buffer_len;
2743             skip_back = 1;
2744         }
2745         else {
2746             /* We're too far ahead, skip back a bit */
2747             skip_bytes -= skip_back;
2748             skip_back *= 2;
2749         }
2750     }
2751     if (skip_bytes <= 0) {
2752         skip_bytes = 0;
2753         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2754             goto fail;
2755     }
2756 
2757     /* Note our initial start point. */
2758     cookie.start_pos += skip_bytes;
2759     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2760     if (chars_to_skip == 0)
2761         goto finally;
2762 
2763     /* We should be close to the desired position.  Now feed the decoder one
2764      * byte at a time until we reach the `chars_to_skip` target.
2765      * As we go, note the nearest "safe start point" before the current
2766      * location (a point where the decoder has nothing buffered, so seek()
2767      * can safely start from there and advance to this location).
2768      */
2769     chars_decoded = 0;
2770     input = PyBytes_AS_STRING(next_input);
2771     input_end = input + PyBytes_GET_SIZE(next_input);
2772     input += skip_bytes;
2773     while (input < input_end) {
2774         Py_ssize_t n;
2775 
2776         DECODER_DECODE(input, (Py_ssize_t)1, n);
2777         /* We got n chars for 1 byte */
2778         chars_decoded += n;
2779         cookie.bytes_to_feed += 1;
2780         DECODER_GETSTATE();
2781 
2782         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2783             /* Decoder buffer is empty, so this is a safe start point. */
2784             cookie.start_pos += cookie.bytes_to_feed;
2785             chars_to_skip -= chars_decoded;
2786             cookie.dec_flags = dec_flags;
2787             cookie.bytes_to_feed = 0;
2788             chars_decoded = 0;
2789         }
2790         if (chars_decoded >= chars_to_skip)
2791             break;
2792         input++;
2793     }
2794     if (input == input_end) {
2795         /* We didn't get enough decoded data; signal EOF to get more. */
2796         PyObject *decoded = _PyObject_CallMethodId(
2797             self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
2798         if (check_decoded(decoded) < 0)
2799             goto fail;
2800         chars_decoded += PyUnicode_GET_LENGTH(decoded);
2801         Py_DECREF(decoded);
2802         cookie.need_eof = 1;
2803 
2804         if (chars_decoded < chars_to_skip) {
2805             PyErr_SetString(PyExc_OSError,
2806                             "can't reconstruct logical file position");
2807             goto fail;
2808         }
2809     }
2810 
2811 finally:
2812     res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2813     Py_DECREF(saved_state);
2814     if (res == NULL)
2815         return NULL;
2816     Py_DECREF(res);
2817 
2818     /* The returned cookie corresponds to the last safe start point. */
2819     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2820     return textiowrapper_build_cookie(&cookie);
2821 
2822 fail:
2823     if (saved_state) {
2824         PyObject *type, *value, *traceback;
2825         PyErr_Fetch(&type, &value, &traceback);
2826         res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2827         _PyErr_ChainExceptions(type, value, traceback);
2828         Py_DECREF(saved_state);
2829         Py_XDECREF(res);
2830     }
2831     return NULL;
2832 }
2833 
2834 /*[clinic input]
2835 _io.TextIOWrapper.truncate
2836     pos: object = None
2837     /
2838 [clinic start generated code]*/
2839 
2840 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2841 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2842 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2843 {
2844     PyObject *res;
2845 
2846     CHECK_ATTACHED(self)
2847 
2848     res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2849     if (res == NULL)
2850         return NULL;
2851     Py_DECREF(res);
2852 
2853     return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2854 }
2855 
2856 static PyObject *
textiowrapper_repr(textio * self)2857 textiowrapper_repr(textio *self)
2858 {
2859     PyObject *nameobj, *modeobj, *res, *s;
2860     int status;
2861 
2862     CHECK_INITIALIZED(self);
2863 
2864     res = PyUnicode_FromString("<_io.TextIOWrapper");
2865     if (res == NULL)
2866         return NULL;
2867 
2868     status = Py_ReprEnter((PyObject *)self);
2869     if (status != 0) {
2870         if (status > 0) {
2871             PyErr_Format(PyExc_RuntimeError,
2872                          "reentrant call inside %s.__repr__",
2873                          Py_TYPE(self)->tp_name);
2874         }
2875         goto error;
2876     }
2877     if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2878         if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2879             goto error;
2880         }
2881         /* Ignore ValueError raised if the underlying stream was detached */
2882         PyErr_Clear();
2883     }
2884     if (nameobj != NULL) {
2885         s = PyUnicode_FromFormat(" name=%R", nameobj);
2886         Py_DECREF(nameobj);
2887         if (s == NULL)
2888             goto error;
2889         PyUnicode_AppendAndDel(&res, s);
2890         if (res == NULL)
2891             goto error;
2892     }
2893     if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2894         goto error;
2895     }
2896     if (modeobj != NULL) {
2897         s = PyUnicode_FromFormat(" mode=%R", modeobj);
2898         Py_DECREF(modeobj);
2899         if (s == NULL)
2900             goto error;
2901         PyUnicode_AppendAndDel(&res, s);
2902         if (res == NULL)
2903             goto error;
2904     }
2905     s = PyUnicode_FromFormat("%U encoding=%R>",
2906                              res, self->encoding);
2907     Py_DECREF(res);
2908     if (status == 0) {
2909         Py_ReprLeave((PyObject *)self);
2910     }
2911     return s;
2912 
2913   error:
2914     Py_XDECREF(res);
2915     if (status == 0) {
2916         Py_ReprLeave((PyObject *)self);
2917     }
2918     return NULL;
2919 }
2920 
2921 
2922 /* Inquiries */
2923 
2924 /*[clinic input]
2925 _io.TextIOWrapper.fileno
2926 [clinic start generated code]*/
2927 
2928 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2929 _io_TextIOWrapper_fileno_impl(textio *self)
2930 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2931 {
2932     CHECK_ATTACHED(self);
2933     return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
2934 }
2935 
2936 /*[clinic input]
2937 _io.TextIOWrapper.seekable
2938 [clinic start generated code]*/
2939 
2940 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2941 _io_TextIOWrapper_seekable_impl(textio *self)
2942 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2943 {
2944     CHECK_ATTACHED(self);
2945     return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
2946 }
2947 
2948 /*[clinic input]
2949 _io.TextIOWrapper.readable
2950 [clinic start generated code]*/
2951 
2952 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2953 _io_TextIOWrapper_readable_impl(textio *self)
2954 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2955 {
2956     CHECK_ATTACHED(self);
2957     return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
2958 }
2959 
2960 /*[clinic input]
2961 _io.TextIOWrapper.writable
2962 [clinic start generated code]*/
2963 
2964 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)2965 _io_TextIOWrapper_writable_impl(textio *self)
2966 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2967 {
2968     CHECK_ATTACHED(self);
2969     return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
2970 }
2971 
2972 /*[clinic input]
2973 _io.TextIOWrapper.isatty
2974 [clinic start generated code]*/
2975 
2976 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)2977 _io_TextIOWrapper_isatty_impl(textio *self)
2978 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2979 {
2980     CHECK_ATTACHED(self);
2981     return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
2982 }
2983 
2984 /*[clinic input]
2985 _io.TextIOWrapper.flush
2986 [clinic start generated code]*/
2987 
2988 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)2989 _io_TextIOWrapper_flush_impl(textio *self)
2990 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
2991 {
2992     CHECK_ATTACHED(self);
2993     CHECK_CLOSED(self);
2994     self->telling = self->seekable;
2995     if (_textiowrapper_writeflush(self) < 0)
2996         return NULL;
2997     return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
2998 }
2999 
3000 /*[clinic input]
3001 _io.TextIOWrapper.close
3002 [clinic start generated code]*/
3003 
3004 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)3005 _io_TextIOWrapper_close_impl(textio *self)
3006 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
3007 {
3008     PyObject *res;
3009     int r;
3010     CHECK_ATTACHED(self);
3011 
3012     res = textiowrapper_closed_get(self, NULL);
3013     if (res == NULL)
3014         return NULL;
3015     r = PyObject_IsTrue(res);
3016     Py_DECREF(res);
3017     if (r < 0)
3018         return NULL;
3019 
3020     if (r > 0) {
3021         Py_RETURN_NONE; /* stream already closed */
3022     }
3023     else {
3024         PyObject *exc = NULL, *val, *tb;
3025         if (self->finalizing) {
3026             res = _PyObject_CallMethodIdObjArgs(self->buffer,
3027                                                 &PyId__dealloc_warn,
3028                                                 self, NULL);
3029             if (res)
3030                 Py_DECREF(res);
3031             else
3032                 PyErr_Clear();
3033         }
3034         res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
3035         if (res == NULL)
3036             PyErr_Fetch(&exc, &val, &tb);
3037         else
3038             Py_DECREF(res);
3039 
3040         res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
3041         if (exc != NULL) {
3042             _PyErr_ChainExceptions(exc, val, tb);
3043             Py_CLEAR(res);
3044         }
3045         return res;
3046     }
3047 }
3048 
3049 static PyObject *
textiowrapper_iternext(textio * self)3050 textiowrapper_iternext(textio *self)
3051 {
3052     PyObject *line;
3053 
3054     CHECK_ATTACHED(self);
3055 
3056     self->telling = 0;
3057     if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
3058         /* Skip method call overhead for speed */
3059         line = _textiowrapper_readline(self, -1);
3060     }
3061     else {
3062         line = PyObject_CallMethodObjArgs((PyObject *)self,
3063                                            _PyIO_str_readline, NULL);
3064         if (line && !PyUnicode_Check(line)) {
3065             PyErr_Format(PyExc_OSError,
3066                          "readline() should have returned a str object, "
3067                          "not '%.200s'", Py_TYPE(line)->tp_name);
3068             Py_DECREF(line);
3069             return NULL;
3070         }
3071     }
3072 
3073     if (line == NULL || PyUnicode_READY(line) == -1)
3074         return NULL;
3075 
3076     if (PyUnicode_GET_LENGTH(line) == 0) {
3077         /* Reached EOF or would have blocked */
3078         Py_DECREF(line);
3079         Py_CLEAR(self->snapshot);
3080         self->telling = self->seekable;
3081         return NULL;
3082     }
3083 
3084     return line;
3085 }
3086 
3087 static PyObject *
textiowrapper_name_get(textio * self,void * context)3088 textiowrapper_name_get(textio *self, void *context)
3089 {
3090     CHECK_ATTACHED(self);
3091     return _PyObject_GetAttrId(self->buffer, &PyId_name);
3092 }
3093 
3094 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3095 textiowrapper_closed_get(textio *self, void *context)
3096 {
3097     CHECK_ATTACHED(self);
3098     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3099 }
3100 
3101 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3102 textiowrapper_newlines_get(textio *self, void *context)
3103 {
3104     PyObject *res;
3105     CHECK_ATTACHED(self);
3106     if (self->decoder == NULL ||
3107         _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3108     {
3109         Py_RETURN_NONE;
3110     }
3111     return res;
3112 }
3113 
3114 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3115 textiowrapper_errors_get(textio *self, void *context)
3116 {
3117     CHECK_INITIALIZED(self);
3118     Py_INCREF(self->errors);
3119     return self->errors;
3120 }
3121 
3122 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3123 textiowrapper_chunk_size_get(textio *self, void *context)
3124 {
3125     CHECK_ATTACHED(self);
3126     return PyLong_FromSsize_t(self->chunk_size);
3127 }
3128 
3129 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3130 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3131 {
3132     Py_ssize_t n;
3133     CHECK_ATTACHED_INT(self);
3134     if (arg == NULL) {
3135         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3136         return -1;
3137     }
3138     n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3139     if (n == -1 && PyErr_Occurred())
3140         return -1;
3141     if (n <= 0) {
3142         PyErr_SetString(PyExc_ValueError,
3143                         "a strictly positive integer is required");
3144         return -1;
3145     }
3146     self->chunk_size = n;
3147     return 0;
3148 }
3149 
3150 #include "clinic/textio.c.h"
3151 
3152 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3153     _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3154     _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3155     _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3156     _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3157     {NULL}
3158 };
3159 
3160 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3161     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3162     {NULL}
3163 };
3164 
3165 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3166     PyVarObject_HEAD_INIT(NULL, 0)
3167     "_io.IncrementalNewlineDecoder", /*tp_name*/
3168     sizeof(nldecoder_object), /*tp_basicsize*/
3169     0,                          /*tp_itemsize*/
3170     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3171     0,                          /*tp_vectorcall_offset*/
3172     0,                          /*tp_getattr*/
3173     0,                          /*tp_setattr*/
3174     0,                          /*tp_as_async*/
3175     0,                          /*tp_repr*/
3176     0,                          /*tp_as_number*/
3177     0,                          /*tp_as_sequence*/
3178     0,                          /*tp_as_mapping*/
3179     0,                          /*tp_hash */
3180     0,                          /*tp_call*/
3181     0,                          /*tp_str*/
3182     0,                          /*tp_getattro*/
3183     0,                          /*tp_setattro*/
3184     0,                          /*tp_as_buffer*/
3185     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
3186     _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3187     0,                          /* tp_traverse */
3188     0,                          /* tp_clear */
3189     0,                          /* tp_richcompare */
3190     0,                          /*tp_weaklistoffset*/
3191     0,                          /* tp_iter */
3192     0,                          /* tp_iternext */
3193     incrementalnewlinedecoder_methods, /* tp_methods */
3194     0,                          /* tp_members */
3195     incrementalnewlinedecoder_getset, /* tp_getset */
3196     0,                          /* tp_base */
3197     0,                          /* tp_dict */
3198     0,                          /* tp_descr_get */
3199     0,                          /* tp_descr_set */
3200     0,                          /* tp_dictoffset */
3201     _io_IncrementalNewlineDecoder___init__, /* tp_init */
3202     0,                          /* tp_alloc */
3203     PyType_GenericNew,          /* tp_new */
3204 };
3205 
3206 
3207 static PyMethodDef textiowrapper_methods[] = {
3208     _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3209     _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3210     _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3211     _IO_TEXTIOWRAPPER_READ_METHODDEF
3212     _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3213     _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3214     _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3215 
3216     _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3217     _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3218     _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3219     _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3220     _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3221 
3222     _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3223     _IO_TEXTIOWRAPPER_TELL_METHODDEF
3224     _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3225     {NULL, NULL}
3226 };
3227 
3228 static PyMemberDef textiowrapper_members[] = {
3229     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3230     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3231     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3232     {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3233     {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3234     {NULL}
3235 };
3236 
3237 static PyGetSetDef textiowrapper_getset[] = {
3238     {"name", (getter)textiowrapper_name_get, NULL, NULL},
3239     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3240 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3241 */
3242     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3243     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3244     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3245                     (setter)textiowrapper_chunk_size_set, NULL},
3246     {NULL}
3247 };
3248 
3249 PyTypeObject PyTextIOWrapper_Type = {
3250     PyVarObject_HEAD_INIT(NULL, 0)
3251     "_io.TextIOWrapper",        /*tp_name*/
3252     sizeof(textio), /*tp_basicsize*/
3253     0,                          /*tp_itemsize*/
3254     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3255     0,                          /*tp_vectorcall_offset*/
3256     0,                          /*tp_getattr*/
3257     0,                          /*tps_etattr*/
3258     0,                          /*tp_as_async*/
3259     (reprfunc)textiowrapper_repr,/*tp_repr*/
3260     0,                          /*tp_as_number*/
3261     0,                          /*tp_as_sequence*/
3262     0,                          /*tp_as_mapping*/
3263     0,                          /*tp_hash */
3264     0,                          /*tp_call*/
3265     0,                          /*tp_str*/
3266     0,                          /*tp_getattro*/
3267     0,                          /*tp_setattro*/
3268     0,                          /*tp_as_buffer*/
3269     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3270         | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
3271     _io_TextIOWrapper___init____doc__, /* tp_doc */
3272     (traverseproc)textiowrapper_traverse, /* tp_traverse */
3273     (inquiry)textiowrapper_clear, /* tp_clear */
3274     0,                          /* tp_richcompare */
3275     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3276     0,                          /* tp_iter */
3277     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3278     textiowrapper_methods,      /* tp_methods */
3279     textiowrapper_members,      /* tp_members */
3280     textiowrapper_getset,       /* tp_getset */
3281     0,                          /* tp_base */
3282     0,                          /* tp_dict */
3283     0,                          /* tp_descr_get */
3284     0,                          /* tp_descr_set */
3285     offsetof(textio, dict), /*tp_dictoffset*/
3286     _io_TextIOWrapper___init__, /* tp_init */
3287     0,                          /* tp_alloc */
3288     PyType_GenericNew,          /* tp_new */
3289     0,                          /* tp_free */
3290     0,                          /* tp_is_gc */
3291     0,                          /* tp_bases */
3292     0,                          /* tp_mro */
3293     0,                          /* tp_cache */
3294     0,                          /* tp_subclasses */
3295     0,                          /* tp_weaklist */
3296     0,                          /* tp_del */
3297     0,                          /* tp_version_tag */
3298     0,                          /* tp_finalize */
3299 };
3300