1 /*
2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 
4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 
6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8 
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "pycore_interp.h"        // PyInterpreterState.fs_codec
12 #include "pycore_object.h"
13 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
14 #include "structmember.h"         // PyMemberDef
15 #include "_iomodule.h"
16 
17 /*[clinic input]
18 module _io
19 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
20 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
21 [clinic start generated code]*/
22 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
23 
24 _Py_IDENTIFIER(close);
25 _Py_IDENTIFIER(_dealloc_warn);
26 _Py_IDENTIFIER(decode);
27 _Py_IDENTIFIER(fileno);
28 _Py_IDENTIFIER(flush);
29 _Py_IDENTIFIER(getpreferredencoding);
30 _Py_IDENTIFIER(isatty);
31 _Py_IDENTIFIER(mode);
32 _Py_IDENTIFIER(name);
33 _Py_IDENTIFIER(raw);
34 _Py_IDENTIFIER(read);
35 _Py_IDENTIFIER(readable);
36 _Py_IDENTIFIER(replace);
37 _Py_IDENTIFIER(reset);
38 _Py_IDENTIFIER(seek);
39 _Py_IDENTIFIER(seekable);
40 _Py_IDENTIFIER(setstate);
41 _Py_IDENTIFIER(strict);
42 _Py_IDENTIFIER(tell);
43 _Py_IDENTIFIER(writable);
44 
45 /* TextIOBase */
46 
47 PyDoc_STRVAR(textiobase_doc,
48     "Base class for text I/O.\n"
49     "\n"
50     "This class provides a character and line based interface to stream\n"
51     "I/O. There is no readinto method because Python's character strings\n"
52     "are immutable. There is no public constructor.\n"
53     );
54 
55 static PyObject *
_unsupported(const char * message)56 _unsupported(const char *message)
57 {
58     _PyIO_State *state = IO_STATE();
59     if (state != NULL)
60         PyErr_SetString(state->unsupported_operation, message);
61     return NULL;
62 }
63 
64 PyDoc_STRVAR(textiobase_detach_doc,
65     "Separate the underlying buffer from the TextIOBase and return it.\n"
66     "\n"
67     "After the underlying buffer has been detached, the TextIO is in an\n"
68     "unusable state.\n"
69     );
70 
71 static PyObject *
textiobase_detach(PyObject * self,PyObject * Py_UNUSED (ignored))72 textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
73 {
74     return _unsupported("detach");
75 }
76 
77 PyDoc_STRVAR(textiobase_read_doc,
78     "Read at most n characters from stream.\n"
79     "\n"
80     "Read from underlying buffer until we have n characters or we hit EOF.\n"
81     "If n is negative or omitted, read until EOF.\n"
82     );
83 
84 static PyObject *
textiobase_read(PyObject * self,PyObject * args)85 textiobase_read(PyObject *self, PyObject *args)
86 {
87     return _unsupported("read");
88 }
89 
90 PyDoc_STRVAR(textiobase_readline_doc,
91     "Read until newline or EOF.\n"
92     "\n"
93     "Returns an empty string if EOF is hit immediately.\n"
94     );
95 
96 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)97 textiobase_readline(PyObject *self, PyObject *args)
98 {
99     return _unsupported("readline");
100 }
101 
102 PyDoc_STRVAR(textiobase_write_doc,
103     "Write string to stream.\n"
104     "Returns the number of characters written (which is always equal to\n"
105     "the length of the string).\n"
106     );
107 
108 static PyObject *
textiobase_write(PyObject * self,PyObject * args)109 textiobase_write(PyObject *self, PyObject *args)
110 {
111     return _unsupported("write");
112 }
113 
114 PyDoc_STRVAR(textiobase_encoding_doc,
115     "Encoding of the text stream.\n"
116     "\n"
117     "Subclasses should override.\n"
118     );
119 
120 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)121 textiobase_encoding_get(PyObject *self, void *context)
122 {
123     Py_RETURN_NONE;
124 }
125 
126 PyDoc_STRVAR(textiobase_newlines_doc,
127     "Line endings translated so far.\n"
128     "\n"
129     "Only line endings translated during reading are considered.\n"
130     "\n"
131     "Subclasses should override.\n"
132     );
133 
134 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)135 textiobase_newlines_get(PyObject *self, void *context)
136 {
137     Py_RETURN_NONE;
138 }
139 
140 PyDoc_STRVAR(textiobase_errors_doc,
141     "The error setting of the decoder or encoder.\n"
142     "\n"
143     "Subclasses should override.\n"
144     );
145 
146 static PyObject *
textiobase_errors_get(PyObject * self,void * context)147 textiobase_errors_get(PyObject *self, void *context)
148 {
149     Py_RETURN_NONE;
150 }
151 
152 
153 static PyMethodDef textiobase_methods[] = {
154     {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
155     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
156     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
157     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
158     {NULL, NULL}
159 };
160 
161 static PyGetSetDef textiobase_getset[] = {
162     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
163     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
164     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
165     {NULL}
166 };
167 
168 PyTypeObject PyTextIOBase_Type = {
169     PyVarObject_HEAD_INIT(NULL, 0)
170     "_io._TextIOBase",          /*tp_name*/
171     0,                          /*tp_basicsize*/
172     0,                          /*tp_itemsize*/
173     0,                          /*tp_dealloc*/
174     0,                          /*tp_vectorcall_offset*/
175     0,                          /*tp_getattr*/
176     0,                          /*tp_setattr*/
177     0,                          /*tp_as_async*/
178     0,                          /*tp_repr*/
179     0,                          /*tp_as_number*/
180     0,                          /*tp_as_sequence*/
181     0,                          /*tp_as_mapping*/
182     0,                          /*tp_hash */
183     0,                          /*tp_call*/
184     0,                          /*tp_str*/
185     0,                          /*tp_getattro*/
186     0,                          /*tp_setattro*/
187     0,                          /*tp_as_buffer*/
188     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
189     textiobase_doc,             /* tp_doc */
190     0,                          /* tp_traverse */
191     0,                          /* tp_clear */
192     0,                          /* tp_richcompare */
193     0,                          /* tp_weaklistoffset */
194     0,                          /* tp_iter */
195     0,                          /* tp_iternext */
196     textiobase_methods,         /* tp_methods */
197     0,                          /* tp_members */
198     textiobase_getset,          /* tp_getset */
199     &PyIOBase_Type,             /* tp_base */
200     0,                          /* tp_dict */
201     0,                          /* tp_descr_get */
202     0,                          /* tp_descr_set */
203     0,                          /* tp_dictoffset */
204     0,                          /* tp_init */
205     0,                          /* tp_alloc */
206     0,                          /* tp_new */
207     0,                          /* tp_free */
208     0,                          /* tp_is_gc */
209     0,                          /* tp_bases */
210     0,                          /* tp_mro */
211     0,                          /* tp_cache */
212     0,                          /* tp_subclasses */
213     0,                          /* tp_weaklist */
214     0,                          /* tp_del */
215     0,                          /* tp_version_tag */
216     0,                          /* tp_finalize */
217 };
218 
219 
220 /* IncrementalNewlineDecoder */
221 
222 typedef struct {
223     PyObject_HEAD
224     PyObject *decoder;
225     PyObject *errors;
226     unsigned int pendingcr: 1;
227     unsigned int translate: 1;
228     unsigned int seennl: 3;
229 } nldecoder_object;
230 
231 /*[clinic input]
232 _io.IncrementalNewlineDecoder.__init__
233     decoder: object
234     translate: int
235     errors: object(c_default="NULL") = "strict"
236 
237 Codec used when reading a file in universal newlines mode.
238 
239 It wraps another incremental decoder, translating \r\n and \r into \n.
240 It also records the types of newlines encountered.  When used with
241 translate=False, it ensures that the newline sequence is returned in
242 one piece. When used with decoder=None, it expects unicode strings as
243 decode input and translates newlines without first invoking an external
244 decoder.
245 [clinic start generated code]*/
246 
247 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)248 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
249                                             PyObject *decoder, int translate,
250                                             PyObject *errors)
251 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
252 {
253     self->decoder = decoder;
254     Py_INCREF(decoder);
255 
256     if (errors == NULL) {
257         self->errors = _PyUnicode_FromId(&PyId_strict);
258         if (self->errors == NULL)
259             return -1;
260     }
261     else {
262         self->errors = errors;
263     }
264     Py_INCREF(self->errors);
265 
266     self->translate = translate ? 1 : 0;
267     self->seennl = 0;
268     self->pendingcr = 0;
269 
270     return 0;
271 }
272 
273 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)274 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
275 {
276     Py_CLEAR(self->decoder);
277     Py_CLEAR(self->errors);
278     Py_TYPE(self)->tp_free((PyObject *)self);
279 }
280 
281 static int
check_decoded(PyObject * decoded)282 check_decoded(PyObject *decoded)
283 {
284     if (decoded == NULL)
285         return -1;
286     if (!PyUnicode_Check(decoded)) {
287         PyErr_Format(PyExc_TypeError,
288                      "decoder should return a string result, not '%.200s'",
289                      Py_TYPE(decoded)->tp_name);
290         Py_DECREF(decoded);
291         return -1;
292     }
293     if (PyUnicode_READY(decoded) < 0) {
294         Py_DECREF(decoded);
295         return -1;
296     }
297     return 0;
298 }
299 
300 #define SEEN_CR   1
301 #define SEEN_LF   2
302 #define SEEN_CRLF 4
303 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
304 
305 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)306 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
307                                     PyObject *input, int final)
308 {
309     PyObject *output;
310     Py_ssize_t output_len;
311     nldecoder_object *self = (nldecoder_object *) myself;
312 
313     if (self->decoder == NULL) {
314         PyErr_SetString(PyExc_ValueError,
315                         "IncrementalNewlineDecoder.__init__ not called");
316         return NULL;
317     }
318 
319     /* decode input (with the eventual \r from a previous pass) */
320     if (self->decoder != Py_None) {
321         output = PyObject_CallMethodObjArgs(self->decoder,
322             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
323     }
324     else {
325         output = input;
326         Py_INCREF(output);
327     }
328 
329     if (check_decoded(output) < 0)
330         return NULL;
331 
332     output_len = PyUnicode_GET_LENGTH(output);
333     if (self->pendingcr && (final || output_len > 0)) {
334         /* Prefix output with CR */
335         int kind;
336         PyObject *modified;
337         char *out;
338 
339         modified = PyUnicode_New(output_len + 1,
340                                  PyUnicode_MAX_CHAR_VALUE(output));
341         if (modified == NULL)
342             goto error;
343         kind = PyUnicode_KIND(modified);
344         out = PyUnicode_DATA(modified);
345         PyUnicode_WRITE(kind, out, 0, '\r');
346         memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
347         Py_DECREF(output);
348         output = modified; /* output remains ready */
349         self->pendingcr = 0;
350         output_len++;
351     }
352 
353     /* retain last \r even when not translating data:
354      * then readline() is sure to get \r\n in one pass
355      */
356     if (!final) {
357         if (output_len > 0
358             && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
359         {
360             PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
361             if (modified == NULL)
362                 goto error;
363             Py_DECREF(output);
364             output = modified;
365             self->pendingcr = 1;
366         }
367     }
368 
369     /* Record which newlines are read and do newline translation if desired,
370        all in one pass. */
371     {
372         const void *in_str;
373         Py_ssize_t len;
374         int seennl = self->seennl;
375         int only_lf = 0;
376         int kind;
377 
378         in_str = PyUnicode_DATA(output);
379         len = PyUnicode_GET_LENGTH(output);
380         kind = PyUnicode_KIND(output);
381 
382         if (len == 0)
383             return output;
384 
385         /* If, up to now, newlines are consistently \n, do a quick check
386            for the \r *byte* with the libc's optimized memchr.
387            */
388         if (seennl == SEEN_LF || seennl == 0) {
389             only_lf = (memchr(in_str, '\r', kind * len) == NULL);
390         }
391 
392         if (only_lf) {
393             /* If not already seen, quick scan for a possible "\n" character.
394                (there's nothing else to be done, even when in translation mode)
395             */
396             if (seennl == 0 &&
397                 memchr(in_str, '\n', kind * len) != NULL) {
398                 if (kind == PyUnicode_1BYTE_KIND)
399                     seennl |= SEEN_LF;
400                 else {
401                     Py_ssize_t i = 0;
402                     for (;;) {
403                         Py_UCS4 c;
404                         /* Fast loop for non-control characters */
405                         while (PyUnicode_READ(kind, in_str, i) > '\n')
406                             i++;
407                         c = PyUnicode_READ(kind, in_str, i++);
408                         if (c == '\n') {
409                             seennl |= SEEN_LF;
410                             break;
411                         }
412                         if (i >= len)
413                             break;
414                     }
415                 }
416             }
417             /* Finished: we have scanned for newlines, and none of them
418                need translating */
419         }
420         else if (!self->translate) {
421             Py_ssize_t i = 0;
422             /* We have already seen all newline types, no need to scan again */
423             if (seennl == SEEN_ALL)
424                 goto endscan;
425             for (;;) {
426                 Py_UCS4 c;
427                 /* Fast loop for non-control characters */
428                 while (PyUnicode_READ(kind, in_str, i) > '\r')
429                     i++;
430                 c = PyUnicode_READ(kind, in_str, i++);
431                 if (c == '\n')
432                     seennl |= SEEN_LF;
433                 else if (c == '\r') {
434                     if (PyUnicode_READ(kind, in_str, i) == '\n') {
435                         seennl |= SEEN_CRLF;
436                         i++;
437                     }
438                     else
439                         seennl |= SEEN_CR;
440                 }
441                 if (i >= len)
442                     break;
443                 if (seennl == SEEN_ALL)
444                     break;
445             }
446         endscan:
447             ;
448         }
449         else {
450             void *translated;
451             int kind = PyUnicode_KIND(output);
452             const void *in_str = PyUnicode_DATA(output);
453             Py_ssize_t in, out;
454             /* XXX: Previous in-place translation here is disabled as
455                resizing is not possible anymore */
456             /* We could try to optimize this so that we only do a copy
457                when there is something to translate. On the other hand,
458                we already know there is a \r byte, so chances are high
459                that something needs to be done. */
460             translated = PyMem_Malloc(kind * len);
461             if (translated == NULL) {
462                 PyErr_NoMemory();
463                 goto error;
464             }
465             in = out = 0;
466             for (;;) {
467                 Py_UCS4 c;
468                 /* Fast loop for non-control characters */
469                 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
470                     PyUnicode_WRITE(kind, translated, out++, c);
471                 if (c == '\n') {
472                     PyUnicode_WRITE(kind, translated, out++, c);
473                     seennl |= SEEN_LF;
474                     continue;
475                 }
476                 if (c == '\r') {
477                     if (PyUnicode_READ(kind, in_str, in) == '\n') {
478                         in++;
479                         seennl |= SEEN_CRLF;
480                     }
481                     else
482                         seennl |= SEEN_CR;
483                     PyUnicode_WRITE(kind, translated, out++, '\n');
484                     continue;
485                 }
486                 if (in > len)
487                     break;
488                 PyUnicode_WRITE(kind, translated, out++, c);
489             }
490             Py_DECREF(output);
491             output = PyUnicode_FromKindAndData(kind, translated, out);
492             PyMem_Free(translated);
493             if (!output)
494                 return NULL;
495         }
496         self->seennl |= seennl;
497     }
498 
499     return output;
500 
501   error:
502     Py_DECREF(output);
503     return NULL;
504 }
505 
506 /*[clinic input]
507 _io.IncrementalNewlineDecoder.decode
508     input: object
509     final: bool(accept={int}) = False
510 [clinic start generated code]*/
511 
512 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)513 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
514                                           PyObject *input, int final)
515 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
516 {
517     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
518 }
519 
520 /*[clinic input]
521 _io.IncrementalNewlineDecoder.getstate
522 [clinic start generated code]*/
523 
524 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)525 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
526 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
527 {
528     PyObject *buffer;
529     unsigned long long flag;
530 
531     if (self->decoder != Py_None) {
532         PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
533            _PyIO_str_getstate);
534         if (state == NULL)
535             return NULL;
536         if (!PyTuple_Check(state)) {
537             PyErr_SetString(PyExc_TypeError,
538                             "illegal decoder state");
539             Py_DECREF(state);
540             return NULL;
541         }
542         if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
543                               &buffer, &flag))
544         {
545             Py_DECREF(state);
546             return NULL;
547         }
548         Py_INCREF(buffer);
549         Py_DECREF(state);
550     }
551     else {
552         buffer = PyBytes_FromString("");
553         flag = 0;
554     }
555     flag <<= 1;
556     if (self->pendingcr)
557         flag |= 1;
558     return Py_BuildValue("NK", buffer, flag);
559 }
560 
561 /*[clinic input]
562 _io.IncrementalNewlineDecoder.setstate
563     state: object
564     /
565 [clinic start generated code]*/
566 
567 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)568 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
569                                        PyObject *state)
570 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
571 {
572     PyObject *buffer;
573     unsigned long long flag;
574 
575     if (!PyTuple_Check(state)) {
576         PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
577         return NULL;
578     }
579     if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
580                           &buffer, &flag))
581     {
582         return NULL;
583     }
584 
585     self->pendingcr = (int) (flag & 1);
586     flag >>= 1;
587 
588     if (self->decoder != Py_None)
589         return _PyObject_CallMethodId(self->decoder,
590                                       &PyId_setstate, "((OK))", buffer, flag);
591     else
592         Py_RETURN_NONE;
593 }
594 
595 /*[clinic input]
596 _io.IncrementalNewlineDecoder.reset
597 [clinic start generated code]*/
598 
599 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)600 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
601 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
602 {
603     self->seennl = 0;
604     self->pendingcr = 0;
605     if (self->decoder != Py_None)
606         return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
607     else
608         Py_RETURN_NONE;
609 }
610 
611 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)612 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
613 {
614     switch (self->seennl) {
615     case SEEN_CR:
616         return PyUnicode_FromString("\r");
617     case SEEN_LF:
618         return PyUnicode_FromString("\n");
619     case SEEN_CRLF:
620         return PyUnicode_FromString("\r\n");
621     case SEEN_CR | SEEN_LF:
622         return Py_BuildValue("ss", "\r", "\n");
623     case SEEN_CR | SEEN_CRLF:
624         return Py_BuildValue("ss", "\r", "\r\n");
625     case SEEN_LF | SEEN_CRLF:
626         return Py_BuildValue("ss", "\n", "\r\n");
627     case SEEN_CR | SEEN_LF | SEEN_CRLF:
628         return Py_BuildValue("sss", "\r", "\n", "\r\n");
629     default:
630         Py_RETURN_NONE;
631    }
632 
633 }
634 
635 /* TextIOWrapper */
636 
637 typedef PyObject *
638         (*encodefunc_t)(PyObject *, PyObject *);
639 
640 typedef struct
641 {
642     PyObject_HEAD
643     int ok; /* initialized? */
644     int detached;
645     Py_ssize_t chunk_size;
646     PyObject *buffer;
647     PyObject *encoding;
648     PyObject *encoder;
649     PyObject *decoder;
650     PyObject *readnl;
651     PyObject *errors;
652     const char *writenl; /* ASCII-encoded; NULL stands for \n */
653     char line_buffering;
654     char write_through;
655     char readuniversal;
656     char readtranslate;
657     char writetranslate;
658     char seekable;
659     char has_read1;
660     char telling;
661     char finalizing;
662     /* Specialized encoding func (see below) */
663     encodefunc_t encodefunc;
664     /* Whether or not it's the start of the stream */
665     char encoding_start_of_stream;
666 
667     /* Reads and writes are internally buffered in order to speed things up.
668        However, any read will first flush the write buffer if itsn't empty.
669 
670        Please also note that text to be written is first encoded before being
671        buffered. This is necessary so that encoding errors are immediately
672        reported to the caller, but it unfortunately means that the
673        IncrementalEncoder (whose encode() method is always written in Python)
674        becomes a bottleneck for small writes.
675     */
676     PyObject *decoded_chars;       /* buffer for text returned from decoder */
677     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
678     PyObject *pending_bytes;       // data waiting to be written.
679                                    // ascii unicode, bytes, or list of them.
680     Py_ssize_t pending_bytes_count;
681 
682     /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
683      * dec_flags is the second (integer) item of the decoder state and
684      * next_input is the chunk of input bytes that comes next after the
685      * snapshot point.  We use this to reconstruct decoder states in tell().
686      */
687     PyObject *snapshot;
688     /* Bytes-to-characters ratio for the current chunk. Serves as input for
689        the heuristic in tell(). */
690     double b2cratio;
691 
692     /* Cache raw object if it's a FileIO object */
693     PyObject *raw;
694 
695     PyObject *weakreflist;
696     PyObject *dict;
697 } textio;
698 
699 static void
700 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
701 
702 /* A couple of specialized cases in order to bypass the slow incremental
703    encoding methods for the most popular encodings. */
704 
705 static PyObject *
ascii_encode(textio * self,PyObject * text)706 ascii_encode(textio *self, PyObject *text)
707 {
708     return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
709 }
710 
711 static PyObject *
utf16be_encode(textio * self,PyObject * text)712 utf16be_encode(textio *self, PyObject *text)
713 {
714     return _PyUnicode_EncodeUTF16(text,
715                                   PyUnicode_AsUTF8(self->errors), 1);
716 }
717 
718 static PyObject *
utf16le_encode(textio * self,PyObject * text)719 utf16le_encode(textio *self, PyObject *text)
720 {
721     return _PyUnicode_EncodeUTF16(text,
722                                   PyUnicode_AsUTF8(self->errors), -1);
723 }
724 
725 static PyObject *
utf16_encode(textio * self,PyObject * text)726 utf16_encode(textio *self, PyObject *text)
727 {
728     if (!self->encoding_start_of_stream) {
729         /* Skip the BOM and use native byte ordering */
730 #if PY_BIG_ENDIAN
731         return utf16be_encode(self, text);
732 #else
733         return utf16le_encode(self, text);
734 #endif
735     }
736     return _PyUnicode_EncodeUTF16(text,
737                                   PyUnicode_AsUTF8(self->errors), 0);
738 }
739 
740 static PyObject *
utf32be_encode(textio * self,PyObject * text)741 utf32be_encode(textio *self, PyObject *text)
742 {
743     return _PyUnicode_EncodeUTF32(text,
744                                   PyUnicode_AsUTF8(self->errors), 1);
745 }
746 
747 static PyObject *
utf32le_encode(textio * self,PyObject * text)748 utf32le_encode(textio *self, PyObject *text)
749 {
750     return _PyUnicode_EncodeUTF32(text,
751                                   PyUnicode_AsUTF8(self->errors), -1);
752 }
753 
754 static PyObject *
utf32_encode(textio * self,PyObject * text)755 utf32_encode(textio *self, PyObject *text)
756 {
757     if (!self->encoding_start_of_stream) {
758         /* Skip the BOM and use native byte ordering */
759 #if PY_BIG_ENDIAN
760         return utf32be_encode(self, text);
761 #else
762         return utf32le_encode(self, text);
763 #endif
764     }
765     return _PyUnicode_EncodeUTF32(text,
766                                   PyUnicode_AsUTF8(self->errors), 0);
767 }
768 
769 static PyObject *
utf8_encode(textio * self,PyObject * text)770 utf8_encode(textio *self, PyObject *text)
771 {
772     return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
773 }
774 
775 static PyObject *
latin1_encode(textio * self,PyObject * text)776 latin1_encode(textio *self, PyObject *text)
777 {
778     return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
779 }
780 
781 // Return true when encoding can be skipped when text is ascii.
782 static inline int
is_asciicompat_encoding(encodefunc_t f)783 is_asciicompat_encoding(encodefunc_t f)
784 {
785     return f == (encodefunc_t) ascii_encode
786         || f == (encodefunc_t) latin1_encode
787         || f == (encodefunc_t) utf8_encode;
788 }
789 
790 /* Map normalized encoding names onto the specialized encoding funcs */
791 
792 typedef struct {
793     const char *name;
794     encodefunc_t encodefunc;
795 } encodefuncentry;
796 
797 static const encodefuncentry encodefuncs[] = {
798     {"ascii",       (encodefunc_t) ascii_encode},
799     {"iso8859-1",   (encodefunc_t) latin1_encode},
800     {"utf-8",       (encodefunc_t) utf8_encode},
801     {"utf-16-be",   (encodefunc_t) utf16be_encode},
802     {"utf-16-le",   (encodefunc_t) utf16le_encode},
803     {"utf-16",      (encodefunc_t) utf16_encode},
804     {"utf-32-be",   (encodefunc_t) utf32be_encode},
805     {"utf-32-le",   (encodefunc_t) utf32le_encode},
806     {"utf-32",      (encodefunc_t) utf32_encode},
807     {NULL, NULL}
808 };
809 
810 static int
validate_newline(const char * newline)811 validate_newline(const char *newline)
812 {
813     if (newline && newline[0] != '\0'
814         && !(newline[0] == '\n' && newline[1] == '\0')
815         && !(newline[0] == '\r' && newline[1] == '\0')
816         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
817         PyErr_Format(PyExc_ValueError,
818                      "illegal newline value: %s", newline);
819         return -1;
820     }
821     return 0;
822 }
823 
824 static int
set_newline(textio * self,const char * newline)825 set_newline(textio *self, const char *newline)
826 {
827     PyObject *old = self->readnl;
828     if (newline == NULL) {
829         self->readnl = NULL;
830     }
831     else {
832         self->readnl = PyUnicode_FromString(newline);
833         if (self->readnl == NULL) {
834             self->readnl = old;
835             return -1;
836         }
837     }
838     self->readuniversal = (newline == NULL || newline[0] == '\0');
839     self->readtranslate = (newline == NULL);
840     self->writetranslate = (newline == NULL || newline[0] != '\0');
841     if (!self->readuniversal && self->readnl != NULL) {
842         // validate_newline() accepts only ASCII newlines.
843         assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
844         self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
845         if (strcmp(self->writenl, "\n") == 0) {
846             self->writenl = NULL;
847         }
848     }
849     else {
850 #ifdef MS_WINDOWS
851         self->writenl = "\r\n";
852 #else
853         self->writenl = NULL;
854 #endif
855     }
856     Py_XDECREF(old);
857     return 0;
858 }
859 
860 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)861 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
862                            const char *errors)
863 {
864     PyObject *res;
865     int r;
866 
867     res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
868     if (res == NULL)
869         return -1;
870 
871     r = PyObject_IsTrue(res);
872     Py_DECREF(res);
873     if (r == -1)
874         return -1;
875 
876     if (r != 1)
877         return 0;
878 
879     Py_CLEAR(self->decoder);
880     self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
881     if (self->decoder == NULL)
882         return -1;
883 
884     if (self->readuniversal) {
885         PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
886             (PyObject *)&PyIncrementalNewlineDecoder_Type,
887             self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
888         if (incrementalDecoder == NULL)
889             return -1;
890         Py_CLEAR(self->decoder);
891         self->decoder = incrementalDecoder;
892     }
893 
894     return 0;
895 }
896 
897 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)898 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
899 {
900     PyObject *chars;
901 
902     if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
903         chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
904     else
905         chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
906                                            eof ? Py_True : Py_False, NULL);
907 
908     if (check_decoded(chars) < 0)
909         // check_decoded already decreases refcount
910         return NULL;
911 
912     return chars;
913 }
914 
915 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)916 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
917                            const char *errors)
918 {
919     PyObject *res;
920     int r;
921 
922     res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
923     if (res == NULL)
924         return -1;
925 
926     r = PyObject_IsTrue(res);
927     Py_DECREF(res);
928     if (r == -1)
929         return -1;
930 
931     if (r != 1)
932         return 0;
933 
934     Py_CLEAR(self->encoder);
935     self->encodefunc = NULL;
936     self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
937     if (self->encoder == NULL)
938         return -1;
939 
940     /* Get the normalized named of the codec */
941     if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
942         return -1;
943     }
944     if (res != NULL && PyUnicode_Check(res)) {
945         const encodefuncentry *e = encodefuncs;
946         while (e->name != NULL) {
947             if (_PyUnicode_EqualToASCIIString(res, e->name)) {
948                 self->encodefunc = e->encodefunc;
949                 break;
950             }
951             e++;
952         }
953     }
954     Py_XDECREF(res);
955 
956     return 0;
957 }
958 
959 static int
_textiowrapper_fix_encoder_state(textio * self)960 _textiowrapper_fix_encoder_state(textio *self)
961 {
962     if (!self->seekable || !self->encoder) {
963         return 0;
964     }
965 
966     self->encoding_start_of_stream = 1;
967 
968     PyObject *cookieObj = PyObject_CallMethodNoArgs(
969         self->buffer, _PyIO_str_tell);
970     if (cookieObj == NULL) {
971         return -1;
972     }
973 
974     int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
975     Py_DECREF(cookieObj);
976     if (cmp < 0) {
977         return -1;
978     }
979 
980     if (cmp == 0) {
981         self->encoding_start_of_stream = 0;
982         PyObject *res = PyObject_CallMethodOneArg(
983             self->encoder, _PyIO_str_setstate, _PyLong_Zero);
984         if (res == NULL) {
985             return -1;
986         }
987         Py_DECREF(res);
988     }
989 
990     return 0;
991 }
992 
993 static int
io_check_errors(PyObject * errors)994 io_check_errors(PyObject *errors)
995 {
996     assert(errors != NULL && errors != Py_None);
997 
998     PyInterpreterState *interp = _PyInterpreterState_GET();
999 #ifndef Py_DEBUG
1000     /* In release mode, only check in development mode (-X dev) */
1001     if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1002         return 0;
1003     }
1004 #else
1005     /* Always check in debug mode */
1006 #endif
1007 
1008     /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1009        before_PyUnicode_InitEncodings() is called. */
1010     if (!interp->unicode.fs_codec.encoding) {
1011         return 0;
1012     }
1013 
1014     Py_ssize_t name_length;
1015     const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1016     if (name == NULL) {
1017         return -1;
1018     }
1019     if (strlen(name) != (size_t)name_length) {
1020         PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1021         return -1;
1022     }
1023     PyObject *handler = PyCodec_LookupError(name);
1024     if (handler != NULL) {
1025         Py_DECREF(handler);
1026         return 0;
1027     }
1028     return -1;
1029 }
1030 
1031 
1032 
1033 /*[clinic input]
1034 _io.TextIOWrapper.__init__
1035     buffer: object
1036     encoding: str(accept={str, NoneType}) = None
1037     errors: object = None
1038     newline: str(accept={str, NoneType}) = None
1039     line_buffering: bool(accept={int}) = False
1040     write_through: bool(accept={int}) = False
1041 
1042 Character and line based layer over a BufferedIOBase object, buffer.
1043 
1044 encoding gives the name of the encoding that the stream will be
1045 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1046 
1047 errors determines the strictness of encoding and decoding (see
1048 help(codecs.Codec) or the documentation for codecs.register) and
1049 defaults to "strict".
1050 
1051 newline controls how line endings are handled. It can be None, '',
1052 '\n', '\r', and '\r\n'.  It works as follows:
1053 
1054 * On input, if newline is None, universal newlines mode is
1055   enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1056   these are translated into '\n' before being returned to the
1057   caller. If it is '', universal newline mode is enabled, but line
1058   endings are returned to the caller untranslated. If it has any of
1059   the other legal values, input lines are only terminated by the given
1060   string, and the line ending is returned to the caller untranslated.
1061 
1062 * On output, if newline is None, any '\n' characters written are
1063   translated to the system default line separator, os.linesep. If
1064   newline is '' or '\n', no translation takes place. If newline is any
1065   of the other legal values, any '\n' characters written are translated
1066   to the given string.
1067 
1068 If line_buffering is True, a call to flush is implied when a call to
1069 write contains a newline character.
1070 [clinic start generated code]*/
1071 
1072 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1073 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1074                                 const char *encoding, PyObject *errors,
1075                                 const char *newline, int line_buffering,
1076                                 int write_through)
1077 /*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
1078 {
1079     PyObject *raw, *codec_info = NULL;
1080     _PyIO_State *state = NULL;
1081     PyObject *res;
1082     int r;
1083 
1084     self->ok = 0;
1085     self->detached = 0;
1086 
1087     if (errors == Py_None) {
1088         errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1089         if (errors == NULL) {
1090             return -1;
1091         }
1092     }
1093     else if (!PyUnicode_Check(errors)) {
1094         // Check 'errors' argument here because Argument Clinic doesn't support
1095         // 'str(accept={str, NoneType})' converter.
1096         PyErr_Format(
1097             PyExc_TypeError,
1098             "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1099             Py_TYPE(errors)->tp_name);
1100         return -1;
1101     }
1102     else if (io_check_errors(errors)) {
1103         return -1;
1104     }
1105 
1106     if (validate_newline(newline) < 0) {
1107         return -1;
1108     }
1109 
1110     Py_CLEAR(self->buffer);
1111     Py_CLEAR(self->encoding);
1112     Py_CLEAR(self->encoder);
1113     Py_CLEAR(self->decoder);
1114     Py_CLEAR(self->readnl);
1115     Py_CLEAR(self->decoded_chars);
1116     Py_CLEAR(self->pending_bytes);
1117     Py_CLEAR(self->snapshot);
1118     Py_CLEAR(self->errors);
1119     Py_CLEAR(self->raw);
1120     self->decoded_chars_used = 0;
1121     self->pending_bytes_count = 0;
1122     self->encodefunc = NULL;
1123     self->b2cratio = 0.0;
1124 
1125     if (encoding == NULL) {
1126         /* Try os.device_encoding(fileno) */
1127         PyObject *fileno;
1128         state = IO_STATE();
1129         if (state == NULL)
1130             goto error;
1131         fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
1132         /* Ignore only AttributeError and UnsupportedOperation */
1133         if (fileno == NULL) {
1134             if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1135                 PyErr_ExceptionMatches(state->unsupported_operation)) {
1136                 PyErr_Clear();
1137             }
1138             else {
1139                 goto error;
1140             }
1141         }
1142         else {
1143             int fd = _PyLong_AsInt(fileno);
1144             Py_DECREF(fileno);
1145             if (fd == -1 && PyErr_Occurred()) {
1146                 goto error;
1147             }
1148 
1149             self->encoding = _Py_device_encoding(fd);
1150             if (self->encoding == NULL)
1151                 goto error;
1152             else if (!PyUnicode_Check(self->encoding))
1153                 Py_CLEAR(self->encoding);
1154         }
1155     }
1156     if (encoding == NULL && self->encoding == NULL) {
1157         PyObject *locale_module = _PyIO_get_locale_module(state);
1158         if (locale_module == NULL)
1159             goto catch_ImportError;
1160         self->encoding = _PyObject_CallMethodIdOneArg(
1161             locale_module, &PyId_getpreferredencoding, Py_False);
1162         Py_DECREF(locale_module);
1163         if (self->encoding == NULL) {
1164           catch_ImportError:
1165             /*
1166              Importing locale can raise an ImportError because of
1167              _functools, and locale.getpreferredencoding can raise an
1168              ImportError if _locale is not available.  These will happen
1169              during module building.
1170             */
1171             if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1172                 PyErr_Clear();
1173                 self->encoding = PyUnicode_FromString("ascii");
1174             }
1175             else
1176                 goto error;
1177         }
1178         else if (!PyUnicode_Check(self->encoding))
1179             Py_CLEAR(self->encoding);
1180     }
1181     if (self->encoding != NULL) {
1182         encoding = PyUnicode_AsUTF8(self->encoding);
1183         if (encoding == NULL)
1184             goto error;
1185     }
1186     else if (encoding != NULL) {
1187         self->encoding = PyUnicode_FromString(encoding);
1188         if (self->encoding == NULL)
1189             goto error;
1190     }
1191     else {
1192         PyErr_SetString(PyExc_OSError,
1193                         "could not determine default encoding");
1194         goto error;
1195     }
1196 
1197     /* Check we have been asked for a real text encoding */
1198     codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1199     if (codec_info == NULL) {
1200         Py_CLEAR(self->encoding);
1201         goto error;
1202     }
1203 
1204     /* XXX: Failures beyond this point have the potential to leak elements
1205      * of the partially constructed object (like self->encoding)
1206      */
1207 
1208     Py_INCREF(errors);
1209     self->errors = errors;
1210     self->chunk_size = 8192;
1211     self->line_buffering = line_buffering;
1212     self->write_through = write_through;
1213     if (set_newline(self, newline) < 0) {
1214         goto error;
1215     }
1216 
1217     self->buffer = buffer;
1218     Py_INCREF(buffer);
1219 
1220     /* Build the decoder object */
1221     if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1222         goto error;
1223 
1224     /* Build the encoder object */
1225     if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1226         goto error;
1227 
1228     /* Finished sorting out the codec details */
1229     Py_CLEAR(codec_info);
1230 
1231     if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1232         Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1233         Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
1234     {
1235         if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1236             goto error;
1237         /* Cache the raw FileIO object to speed up 'closed' checks */
1238         if (raw != NULL) {
1239             if (Py_IS_TYPE(raw, &PyFileIO_Type))
1240                 self->raw = raw;
1241             else
1242                 Py_DECREF(raw);
1243         }
1244     }
1245 
1246     res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
1247     if (res == NULL)
1248         goto error;
1249     r = PyObject_IsTrue(res);
1250     Py_DECREF(res);
1251     if (r < 0)
1252         goto error;
1253     self->seekable = self->telling = r;
1254 
1255     r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1256     if (r < 0) {
1257         goto error;
1258     }
1259     Py_XDECREF(res);
1260     self->has_read1 = r;
1261 
1262     self->encoding_start_of_stream = 0;
1263     if (_textiowrapper_fix_encoder_state(self) < 0) {
1264         goto error;
1265     }
1266 
1267     self->ok = 1;
1268     return 0;
1269 
1270   error:
1271     Py_XDECREF(codec_info);
1272     return -1;
1273 }
1274 
1275 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1276  * -1 on error.
1277  */
1278 static int
convert_optional_bool(PyObject * obj,int default_value)1279 convert_optional_bool(PyObject *obj, int default_value)
1280 {
1281     long v;
1282     if (obj == Py_None) {
1283         v = default_value;
1284     }
1285     else {
1286         v = PyLong_AsLong(obj);
1287         if (v == -1 && PyErr_Occurred())
1288             return -1;
1289     }
1290     return v != 0;
1291 }
1292 
1293 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1294 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1295                               PyObject *errors, int newline_changed)
1296 {
1297     /* Use existing settings where new settings are not specified */
1298     if (encoding == Py_None && errors == Py_None && !newline_changed) {
1299         return 0;  // no change
1300     }
1301 
1302     if (encoding == Py_None) {
1303         encoding = self->encoding;
1304         if (errors == Py_None) {
1305             errors = self->errors;
1306         }
1307     }
1308     else if (errors == Py_None) {
1309         errors = _PyUnicode_FromId(&PyId_strict);
1310         if (errors == NULL) {
1311             return -1;
1312         }
1313     }
1314 
1315     const char *c_errors = PyUnicode_AsUTF8(errors);
1316     if (c_errors == NULL) {
1317         return -1;
1318     }
1319 
1320     // Create new encoder & decoder
1321     PyObject *codec_info = _PyCodec_LookupTextEncoding(
1322         PyUnicode_AsUTF8(encoding), "codecs.open()");
1323     if (codec_info == NULL) {
1324         return -1;
1325     }
1326     if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1327             _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1328         Py_DECREF(codec_info);
1329         return -1;
1330     }
1331     Py_DECREF(codec_info);
1332 
1333     Py_INCREF(encoding);
1334     Py_INCREF(errors);
1335     Py_SETREF(self->encoding, encoding);
1336     Py_SETREF(self->errors, errors);
1337 
1338     return _textiowrapper_fix_encoder_state(self);
1339 }
1340 
1341 /*[clinic input]
1342 _io.TextIOWrapper.reconfigure
1343     *
1344     encoding: object = None
1345     errors: object = None
1346     newline as newline_obj: object(c_default="NULL") = None
1347     line_buffering as line_buffering_obj: object = None
1348     write_through as write_through_obj: object = None
1349 
1350 Reconfigure the text stream with new parameters.
1351 
1352 This also does an implicit stream flush.
1353 
1354 [clinic start generated code]*/
1355 
1356 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1357 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1358                                    PyObject *errors, PyObject *newline_obj,
1359                                    PyObject *line_buffering_obj,
1360                                    PyObject *write_through_obj)
1361 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1362 {
1363     int line_buffering;
1364     int write_through;
1365     const char *newline = NULL;
1366 
1367     /* Check if something is in the read buffer */
1368     if (self->decoded_chars != NULL) {
1369         if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1370             _unsupported("It is not possible to set the encoding or newline "
1371                          "of stream after the first read");
1372             return NULL;
1373         }
1374     }
1375 
1376     if (newline_obj != NULL && newline_obj != Py_None) {
1377         newline = PyUnicode_AsUTF8(newline_obj);
1378         if (newline == NULL || validate_newline(newline) < 0) {
1379             return NULL;
1380         }
1381     }
1382 
1383     line_buffering = convert_optional_bool(line_buffering_obj,
1384                                            self->line_buffering);
1385     write_through = convert_optional_bool(write_through_obj,
1386                                           self->write_through);
1387     if (line_buffering < 0 || write_through < 0) {
1388         return NULL;
1389     }
1390 
1391     PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
1392     if (res == NULL) {
1393         return NULL;
1394     }
1395     Py_DECREF(res);
1396     self->b2cratio = 0;
1397 
1398     if (newline_obj != NULL && set_newline(self, newline) < 0) {
1399         return NULL;
1400     }
1401 
1402     if (textiowrapper_change_encoding(
1403             self, encoding, errors, newline_obj != NULL) < 0) {
1404         return NULL;
1405     }
1406 
1407     self->line_buffering = line_buffering;
1408     self->write_through = write_through;
1409     Py_RETURN_NONE;
1410 }
1411 
1412 static int
textiowrapper_clear(textio * self)1413 textiowrapper_clear(textio *self)
1414 {
1415     self->ok = 0;
1416     Py_CLEAR(self->buffer);
1417     Py_CLEAR(self->encoding);
1418     Py_CLEAR(self->encoder);
1419     Py_CLEAR(self->decoder);
1420     Py_CLEAR(self->readnl);
1421     Py_CLEAR(self->decoded_chars);
1422     Py_CLEAR(self->pending_bytes);
1423     Py_CLEAR(self->snapshot);
1424     Py_CLEAR(self->errors);
1425     Py_CLEAR(self->raw);
1426 
1427     Py_CLEAR(self->dict);
1428     return 0;
1429 }
1430 
1431 static void
textiowrapper_dealloc(textio * self)1432 textiowrapper_dealloc(textio *self)
1433 {
1434     self->finalizing = 1;
1435     if (_PyIOBase_finalize((PyObject *) self) < 0)
1436         return;
1437     self->ok = 0;
1438     _PyObject_GC_UNTRACK(self);
1439     if (self->weakreflist != NULL)
1440         PyObject_ClearWeakRefs((PyObject *)self);
1441     textiowrapper_clear(self);
1442     Py_TYPE(self)->tp_free((PyObject *)self);
1443 }
1444 
1445 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1446 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1447 {
1448     Py_VISIT(self->buffer);
1449     Py_VISIT(self->encoding);
1450     Py_VISIT(self->encoder);
1451     Py_VISIT(self->decoder);
1452     Py_VISIT(self->readnl);
1453     Py_VISIT(self->decoded_chars);
1454     Py_VISIT(self->pending_bytes);
1455     Py_VISIT(self->snapshot);
1456     Py_VISIT(self->errors);
1457     Py_VISIT(self->raw);
1458 
1459     Py_VISIT(self->dict);
1460     return 0;
1461 }
1462 
1463 static PyObject *
1464 textiowrapper_closed_get(textio *self, void *context);
1465 
1466 /* This macro takes some shortcuts to make the common case faster. */
1467 #define CHECK_CLOSED(self) \
1468     do { \
1469         int r; \
1470         PyObject *_res; \
1471         if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
1472             if (self->raw != NULL) \
1473                 r = _PyFileIO_closed(self->raw); \
1474             else { \
1475                 _res = textiowrapper_closed_get(self, NULL); \
1476                 if (_res == NULL) \
1477                     return NULL; \
1478                 r = PyObject_IsTrue(_res); \
1479                 Py_DECREF(_res); \
1480                 if (r < 0) \
1481                     return NULL; \
1482             } \
1483             if (r > 0) { \
1484                 PyErr_SetString(PyExc_ValueError, \
1485                                 "I/O operation on closed file."); \
1486                 return NULL; \
1487             } \
1488         } \
1489         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1490             return NULL; \
1491     } while (0)
1492 
1493 #define CHECK_INITIALIZED(self) \
1494     if (self->ok <= 0) { \
1495         PyErr_SetString(PyExc_ValueError, \
1496             "I/O operation on uninitialized object"); \
1497         return NULL; \
1498     }
1499 
1500 #define CHECK_ATTACHED(self) \
1501     CHECK_INITIALIZED(self); \
1502     if (self->detached) { \
1503         PyErr_SetString(PyExc_ValueError, \
1504              "underlying buffer has been detached"); \
1505         return NULL; \
1506     }
1507 
1508 #define CHECK_ATTACHED_INT(self) \
1509     if (self->ok <= 0) { \
1510         PyErr_SetString(PyExc_ValueError, \
1511             "I/O operation on uninitialized object"); \
1512         return -1; \
1513     } else if (self->detached) { \
1514         PyErr_SetString(PyExc_ValueError, \
1515              "underlying buffer has been detached"); \
1516         return -1; \
1517     }
1518 
1519 
1520 /*[clinic input]
1521 _io.TextIOWrapper.detach
1522 [clinic start generated code]*/
1523 
1524 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1525 _io_TextIOWrapper_detach_impl(textio *self)
1526 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1527 {
1528     PyObject *buffer, *res;
1529     CHECK_ATTACHED(self);
1530     res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
1531     if (res == NULL)
1532         return NULL;
1533     Py_DECREF(res);
1534     buffer = self->buffer;
1535     self->buffer = NULL;
1536     self->detached = 1;
1537     return buffer;
1538 }
1539 
1540 /* Flush the internal write buffer. This doesn't explicitly flush the
1541    underlying buffered object, though. */
1542 static int
_textiowrapper_writeflush(textio * self)1543 _textiowrapper_writeflush(textio *self)
1544 {
1545     if (self->pending_bytes == NULL)
1546         return 0;
1547 
1548     PyObject *pending = self->pending_bytes;
1549     PyObject *b;
1550 
1551     if (PyBytes_Check(pending)) {
1552         b = pending;
1553         Py_INCREF(b);
1554     }
1555     else if (PyUnicode_Check(pending)) {
1556         assert(PyUnicode_IS_ASCII(pending));
1557         assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1558         b = PyBytes_FromStringAndSize(
1559                 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1560         if (b == NULL) {
1561             return -1;
1562         }
1563     }
1564     else {
1565         assert(PyList_Check(pending));
1566         b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1567         if (b == NULL) {
1568             return -1;
1569         }
1570 
1571         char *buf = PyBytes_AsString(b);
1572         Py_ssize_t pos = 0;
1573 
1574         for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1575             PyObject *obj = PyList_GET_ITEM(pending, i);
1576             char *src;
1577             Py_ssize_t len;
1578             if (PyUnicode_Check(obj)) {
1579                 assert(PyUnicode_IS_ASCII(obj));
1580                 src = PyUnicode_DATA(obj);
1581                 len = PyUnicode_GET_LENGTH(obj);
1582             }
1583             else {
1584                 assert(PyBytes_Check(obj));
1585                 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1586                     Py_DECREF(b);
1587                     return -1;
1588                 }
1589             }
1590             memcpy(buf + pos, src, len);
1591             pos += len;
1592         }
1593         assert(pos == self->pending_bytes_count);
1594     }
1595 
1596     self->pending_bytes_count = 0;
1597     self->pending_bytes = NULL;
1598     Py_DECREF(pending);
1599 
1600     PyObject *ret;
1601     do {
1602         ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
1603     } while (ret == NULL && _PyIO_trap_eintr());
1604     Py_DECREF(b);
1605     // NOTE: We cleared buffer but we don't know how many bytes are actually written
1606     // when an error occurred.
1607     if (ret == NULL)
1608         return -1;
1609     Py_DECREF(ret);
1610     return 0;
1611 }
1612 
1613 /*[clinic input]
1614 _io.TextIOWrapper.write
1615     text: unicode
1616     /
1617 [clinic start generated code]*/
1618 
1619 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1620 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1621 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1622 {
1623     PyObject *ret;
1624     PyObject *b;
1625     Py_ssize_t textlen;
1626     int haslf = 0;
1627     int needflush = 0, text_needflush = 0;
1628 
1629     if (PyUnicode_READY(text) == -1)
1630         return NULL;
1631 
1632     CHECK_ATTACHED(self);
1633     CHECK_CLOSED(self);
1634 
1635     if (self->encoder == NULL)
1636         return _unsupported("not writable");
1637 
1638     Py_INCREF(text);
1639 
1640     textlen = PyUnicode_GET_LENGTH(text);
1641 
1642     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1643         if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1644             haslf = 1;
1645 
1646     if (haslf && self->writetranslate && self->writenl != NULL) {
1647         PyObject *newtext = _PyObject_CallMethodId(
1648             text, &PyId_replace, "ss", "\n", self->writenl);
1649         Py_DECREF(text);
1650         if (newtext == NULL)
1651             return NULL;
1652         text = newtext;
1653     }
1654 
1655     if (self->write_through)
1656         text_needflush = 1;
1657     if (self->line_buffering &&
1658         (haslf ||
1659          PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1660         needflush = 1;
1661 
1662     /* XXX What if we were just reading? */
1663     if (self->encodefunc != NULL) {
1664         if (PyUnicode_IS_ASCII(text) &&
1665                 // See bpo-43260
1666                 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1667                 is_asciicompat_encoding(self->encodefunc)) {
1668             b = text;
1669             Py_INCREF(b);
1670         }
1671         else {
1672             b = (*self->encodefunc)((PyObject *) self, text);
1673         }
1674         self->encoding_start_of_stream = 0;
1675     }
1676     else {
1677         b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
1678     }
1679 
1680     Py_DECREF(text);
1681     if (b == NULL)
1682         return NULL;
1683     if (b != text && !PyBytes_Check(b)) {
1684         PyErr_Format(PyExc_TypeError,
1685                      "encoder should return a bytes object, not '%.200s'",
1686                      Py_TYPE(b)->tp_name);
1687         Py_DECREF(b);
1688         return NULL;
1689     }
1690 
1691     Py_ssize_t bytes_len;
1692     if (b == text) {
1693         bytes_len = PyUnicode_GET_LENGTH(b);
1694     }
1695     else {
1696         bytes_len = PyBytes_GET_SIZE(b);
1697     }
1698 
1699     if (self->pending_bytes == NULL) {
1700         self->pending_bytes_count = 0;
1701         self->pending_bytes = b;
1702     }
1703     else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1704         // Prevent to concatenate more than chunk_size data.
1705         if (_textiowrapper_writeflush(self) < 0) {
1706             Py_DECREF(b);
1707             return NULL;
1708         }
1709         self->pending_bytes = b;
1710     }
1711     else if (!PyList_CheckExact(self->pending_bytes)) {
1712         PyObject *list = PyList_New(2);
1713         if (list == NULL) {
1714             Py_DECREF(b);
1715             return NULL;
1716         }
1717         PyList_SET_ITEM(list, 0, self->pending_bytes);
1718         PyList_SET_ITEM(list, 1, b);
1719         self->pending_bytes = list;
1720     }
1721     else {
1722         if (PyList_Append(self->pending_bytes, b) < 0) {
1723             Py_DECREF(b);
1724             return NULL;
1725         }
1726         Py_DECREF(b);
1727     }
1728 
1729     self->pending_bytes_count += bytes_len;
1730     if (self->pending_bytes_count >= self->chunk_size || needflush ||
1731         text_needflush) {
1732         if (_textiowrapper_writeflush(self) < 0)
1733             return NULL;
1734     }
1735 
1736     if (needflush) {
1737         ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
1738         if (ret == NULL)
1739             return NULL;
1740         Py_DECREF(ret);
1741     }
1742 
1743     textiowrapper_set_decoded_chars(self, NULL);
1744     Py_CLEAR(self->snapshot);
1745 
1746     if (self->decoder) {
1747         ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
1748         if (ret == NULL)
1749             return NULL;
1750         Py_DECREF(ret);
1751     }
1752 
1753     return PyLong_FromSsize_t(textlen);
1754 }
1755 
1756 /* Steal a reference to chars and store it in the decoded_char buffer;
1757  */
1758 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1759 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1760 {
1761     Py_XSETREF(self->decoded_chars, chars);
1762     self->decoded_chars_used = 0;
1763 }
1764 
1765 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1766 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1767 {
1768     PyObject *chars;
1769     Py_ssize_t avail;
1770 
1771     if (self->decoded_chars == NULL)
1772         return PyUnicode_FromStringAndSize(NULL, 0);
1773 
1774     /* decoded_chars is guaranteed to be "ready". */
1775     avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1776              - self->decoded_chars_used);
1777 
1778     assert(avail >= 0);
1779 
1780     if (n < 0 || n > avail)
1781         n = avail;
1782 
1783     if (self->decoded_chars_used > 0 || n < avail) {
1784         chars = PyUnicode_Substring(self->decoded_chars,
1785                                     self->decoded_chars_used,
1786                                     self->decoded_chars_used + n);
1787         if (chars == NULL)
1788             return NULL;
1789     }
1790     else {
1791         chars = self->decoded_chars;
1792         Py_INCREF(chars);
1793     }
1794 
1795     self->decoded_chars_used += n;
1796     return chars;
1797 }
1798 
1799 /* Read and decode the next chunk of data from the BufferedReader.
1800  */
1801 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1802 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1803 {
1804     PyObject *dec_buffer = NULL;
1805     PyObject *dec_flags = NULL;
1806     PyObject *input_chunk = NULL;
1807     Py_buffer input_chunk_buf;
1808     PyObject *decoded_chars, *chunk_size;
1809     Py_ssize_t nbytes, nchars;
1810     int eof;
1811 
1812     /* The return value is True unless EOF was reached.  The decoded string is
1813      * placed in self._decoded_chars (replacing its previous value).  The
1814      * entire input chunk is sent to the decoder, though some of it may remain
1815      * buffered in the decoder, yet to be converted.
1816      */
1817 
1818     if (self->decoder == NULL) {
1819         _unsupported("not readable");
1820         return -1;
1821     }
1822 
1823     if (self->telling) {
1824         /* To prepare for tell(), we need to snapshot a point in the file
1825          * where the decoder's input buffer is empty.
1826          */
1827         PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1828                                                      _PyIO_str_getstate);
1829         if (state == NULL)
1830             return -1;
1831         /* Given this, we know there was a valid snapshot point
1832          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1833          */
1834         if (!PyTuple_Check(state)) {
1835             PyErr_SetString(PyExc_TypeError,
1836                             "illegal decoder state");
1837             Py_DECREF(state);
1838             return -1;
1839         }
1840         if (!PyArg_ParseTuple(state,
1841                               "OO;illegal decoder state", &dec_buffer, &dec_flags))
1842         {
1843             Py_DECREF(state);
1844             return -1;
1845         }
1846 
1847         if (!PyBytes_Check(dec_buffer)) {
1848             PyErr_Format(PyExc_TypeError,
1849                          "illegal decoder state: the first item should be a "
1850                          "bytes object, not '%.200s'",
1851                          Py_TYPE(dec_buffer)->tp_name);
1852             Py_DECREF(state);
1853             return -1;
1854         }
1855         Py_INCREF(dec_buffer);
1856         Py_INCREF(dec_flags);
1857         Py_DECREF(state);
1858     }
1859 
1860     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1861     if (size_hint > 0) {
1862         size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1863     }
1864     chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1865     if (chunk_size == NULL)
1866         goto fail;
1867 
1868     input_chunk = PyObject_CallMethodOneArg(self->buffer,
1869         (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1870         chunk_size);
1871     Py_DECREF(chunk_size);
1872     if (input_chunk == NULL)
1873         goto fail;
1874 
1875     if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1876         PyErr_Format(PyExc_TypeError,
1877                      "underlying %s() should have returned a bytes-like object, "
1878                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
1879                      Py_TYPE(input_chunk)->tp_name);
1880         goto fail;
1881     }
1882 
1883     nbytes = input_chunk_buf.len;
1884     eof = (nbytes == 0);
1885 
1886     decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1887     PyBuffer_Release(&input_chunk_buf);
1888     if (decoded_chars == NULL)
1889         goto fail;
1890 
1891     textiowrapper_set_decoded_chars(self, decoded_chars);
1892     nchars = PyUnicode_GET_LENGTH(decoded_chars);
1893     if (nchars > 0)
1894         self->b2cratio = (double) nbytes / nchars;
1895     else
1896         self->b2cratio = 0.0;
1897     if (nchars > 0)
1898         eof = 0;
1899 
1900     if (self->telling) {
1901         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1902          * next input to be decoded is dec_buffer + input_chunk.
1903          */
1904         PyObject *next_input = dec_buffer;
1905         PyBytes_Concat(&next_input, input_chunk);
1906         dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1907         if (next_input == NULL) {
1908             goto fail;
1909         }
1910         PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1911         if (snapshot == NULL) {
1912             dec_flags = NULL;
1913             goto fail;
1914         }
1915         Py_XSETREF(self->snapshot, snapshot);
1916     }
1917     Py_DECREF(input_chunk);
1918 
1919     return (eof == 0);
1920 
1921   fail:
1922     Py_XDECREF(dec_buffer);
1923     Py_XDECREF(dec_flags);
1924     Py_XDECREF(input_chunk);
1925     return -1;
1926 }
1927 
1928 /*[clinic input]
1929 _io.TextIOWrapper.read
1930     size as n: Py_ssize_t(accept={int, NoneType}) = -1
1931     /
1932 [clinic start generated code]*/
1933 
1934 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1935 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1936 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1937 {
1938     PyObject *result = NULL, *chunks = NULL;
1939 
1940     CHECK_ATTACHED(self);
1941     CHECK_CLOSED(self);
1942 
1943     if (self->decoder == NULL)
1944         return _unsupported("not readable");
1945 
1946     if (_textiowrapper_writeflush(self) < 0)
1947         return NULL;
1948 
1949     if (n < 0) {
1950         /* Read everything */
1951         PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
1952         PyObject *decoded;
1953         if (bytes == NULL)
1954             goto fail;
1955 
1956         if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
1957             decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1958                                                           bytes, 1);
1959         else
1960             decoded = PyObject_CallMethodObjArgs(
1961                 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1962         Py_DECREF(bytes);
1963         if (check_decoded(decoded) < 0)
1964             goto fail;
1965 
1966         result = textiowrapper_get_decoded_chars(self, -1);
1967 
1968         if (result == NULL) {
1969             Py_DECREF(decoded);
1970             return NULL;
1971         }
1972 
1973         PyUnicode_AppendAndDel(&result, decoded);
1974         if (result == NULL)
1975             goto fail;
1976 
1977         textiowrapper_set_decoded_chars(self, NULL);
1978         Py_CLEAR(self->snapshot);
1979         return result;
1980     }
1981     else {
1982         int res = 1;
1983         Py_ssize_t remaining = n;
1984 
1985         result = textiowrapper_get_decoded_chars(self, n);
1986         if (result == NULL)
1987             goto fail;
1988         if (PyUnicode_READY(result) == -1)
1989             goto fail;
1990         remaining -= PyUnicode_GET_LENGTH(result);
1991 
1992         /* Keep reading chunks until we have n characters to return */
1993         while (remaining > 0) {
1994             res = textiowrapper_read_chunk(self, remaining);
1995             if (res < 0) {
1996                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1997                    when EINTR occurs so we needn't do it ourselves. */
1998                 if (_PyIO_trap_eintr()) {
1999                     continue;
2000                 }
2001                 goto fail;
2002             }
2003             if (res == 0)  /* EOF */
2004                 break;
2005             if (chunks == NULL) {
2006                 chunks = PyList_New(0);
2007                 if (chunks == NULL)
2008                     goto fail;
2009             }
2010             if (PyUnicode_GET_LENGTH(result) > 0 &&
2011                 PyList_Append(chunks, result) < 0)
2012                 goto fail;
2013             Py_DECREF(result);
2014             result = textiowrapper_get_decoded_chars(self, remaining);
2015             if (result == NULL)
2016                 goto fail;
2017             remaining -= PyUnicode_GET_LENGTH(result);
2018         }
2019         if (chunks != NULL) {
2020             if (result != NULL && PyList_Append(chunks, result) < 0)
2021                 goto fail;
2022             Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
2023             if (result == NULL)
2024                 goto fail;
2025             Py_CLEAR(chunks);
2026         }
2027         return result;
2028     }
2029   fail:
2030     Py_XDECREF(result);
2031     Py_XDECREF(chunks);
2032     return NULL;
2033 }
2034 
2035 
2036 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2037    that is to the NUL character. Otherwise the function will produce
2038    incorrect results. */
2039 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)2040 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2041 {
2042     if (kind == PyUnicode_1BYTE_KIND) {
2043         assert(ch < 256);
2044         return (char *) memchr((const void *) s, (char) ch, end - s);
2045     }
2046     for (;;) {
2047         while (PyUnicode_READ(kind, s, 0) > ch)
2048             s += kind;
2049         if (PyUnicode_READ(kind, s, 0) == ch)
2050             return s;
2051         if (s == end)
2052             return NULL;
2053         s += kind;
2054     }
2055 }
2056 
2057 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2058 _PyIO_find_line_ending(
2059     int translated, int universal, PyObject *readnl,
2060     int kind, const char *start, const char *end, Py_ssize_t *consumed)
2061 {
2062     Py_ssize_t len = (end - start)/kind;
2063 
2064     if (translated) {
2065         /* Newlines are already translated, only search for \n */
2066         const char *pos = find_control_char(kind, start, end, '\n');
2067         if (pos != NULL)
2068             return (pos - start)/kind + 1;
2069         else {
2070             *consumed = len;
2071             return -1;
2072         }
2073     }
2074     else if (universal) {
2075         /* Universal newline search. Find any of \r, \r\n, \n
2076          * The decoder ensures that \r\n are not split in two pieces
2077          */
2078         const char *s = start;
2079         for (;;) {
2080             Py_UCS4 ch;
2081             /* Fast path for non-control chars. The loop always ends
2082                since the Unicode string is NUL-terminated. */
2083             while (PyUnicode_READ(kind, s, 0) > '\r')
2084                 s += kind;
2085             if (s >= end) {
2086                 *consumed = len;
2087                 return -1;
2088             }
2089             ch = PyUnicode_READ(kind, s, 0);
2090             s += kind;
2091             if (ch == '\n')
2092                 return (s - start)/kind;
2093             if (ch == '\r') {
2094                 if (PyUnicode_READ(kind, s, 0) == '\n')
2095                     return (s - start)/kind + 1;
2096                 else
2097                     return (s - start)/kind;
2098             }
2099         }
2100     }
2101     else {
2102         /* Non-universal mode. */
2103         Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2104         const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2105         /* Assume that readnl is an ASCII character. */
2106         assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2107         if (readnl_len == 1) {
2108             const char *pos = find_control_char(kind, start, end, nl[0]);
2109             if (pos != NULL)
2110                 return (pos - start)/kind + 1;
2111             *consumed = len;
2112             return -1;
2113         }
2114         else {
2115             const char *s = start;
2116             const char *e = end - (readnl_len - 1)*kind;
2117             const char *pos;
2118             if (e < s)
2119                 e = s;
2120             while (s < e) {
2121                 Py_ssize_t i;
2122                 const char *pos = find_control_char(kind, s, end, nl[0]);
2123                 if (pos == NULL || pos >= e)
2124                     break;
2125                 for (i = 1; i < readnl_len; i++) {
2126                     if (PyUnicode_READ(kind, pos, i) != nl[i])
2127                         break;
2128                 }
2129                 if (i == readnl_len)
2130                     return (pos - start)/kind + readnl_len;
2131                 s = pos + kind;
2132             }
2133             pos = find_control_char(kind, e, end, nl[0]);
2134             if (pos == NULL)
2135                 *consumed = len;
2136             else
2137                 *consumed = (pos - start)/kind;
2138             return -1;
2139         }
2140     }
2141 }
2142 
2143 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2144 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2145 {
2146     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2147     Py_ssize_t start, endpos, chunked, offset_to_buffer;
2148     int res;
2149 
2150     CHECK_CLOSED(self);
2151 
2152     if (_textiowrapper_writeflush(self) < 0)
2153         return NULL;
2154 
2155     chunked = 0;
2156 
2157     while (1) {
2158         const char *ptr;
2159         Py_ssize_t line_len;
2160         int kind;
2161         Py_ssize_t consumed = 0;
2162 
2163         /* First, get some data if necessary */
2164         res = 1;
2165         while (!self->decoded_chars ||
2166                !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2167             res = textiowrapper_read_chunk(self, 0);
2168             if (res < 0) {
2169                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2170                    when EINTR occurs so we needn't do it ourselves. */
2171                 if (_PyIO_trap_eintr()) {
2172                     continue;
2173                 }
2174                 goto error;
2175             }
2176             if (res == 0)
2177                 break;
2178         }
2179         if (res == 0) {
2180             /* end of file */
2181             textiowrapper_set_decoded_chars(self, NULL);
2182             Py_CLEAR(self->snapshot);
2183             start = endpos = offset_to_buffer = 0;
2184             break;
2185         }
2186 
2187         if (remaining == NULL) {
2188             line = self->decoded_chars;
2189             start = self->decoded_chars_used;
2190             offset_to_buffer = 0;
2191             Py_INCREF(line);
2192         }
2193         else {
2194             assert(self->decoded_chars_used == 0);
2195             line = PyUnicode_Concat(remaining, self->decoded_chars);
2196             start = 0;
2197             offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2198             Py_CLEAR(remaining);
2199             if (line == NULL)
2200                 goto error;
2201             if (PyUnicode_READY(line) == -1)
2202                 goto error;
2203         }
2204 
2205         ptr = PyUnicode_DATA(line);
2206         line_len = PyUnicode_GET_LENGTH(line);
2207         kind = PyUnicode_KIND(line);
2208 
2209         endpos = _PyIO_find_line_ending(
2210             self->readtranslate, self->readuniversal, self->readnl,
2211             kind,
2212             ptr + kind * start,
2213             ptr + kind * line_len,
2214             &consumed);
2215         if (endpos >= 0) {
2216             endpos += start;
2217             if (limit >= 0 && (endpos - start) + chunked >= limit)
2218                 endpos = start + limit - chunked;
2219             break;
2220         }
2221 
2222         /* We can put aside up to `endpos` */
2223         endpos = consumed + start;
2224         if (limit >= 0 && (endpos - start) + chunked >= limit) {
2225             /* Didn't find line ending, but reached length limit */
2226             endpos = start + limit - chunked;
2227             break;
2228         }
2229 
2230         if (endpos > start) {
2231             /* No line ending seen yet - put aside current data */
2232             PyObject *s;
2233             if (chunks == NULL) {
2234                 chunks = PyList_New(0);
2235                 if (chunks == NULL)
2236                     goto error;
2237             }
2238             s = PyUnicode_Substring(line, start, endpos);
2239             if (s == NULL)
2240                 goto error;
2241             if (PyList_Append(chunks, s) < 0) {
2242                 Py_DECREF(s);
2243                 goto error;
2244             }
2245             chunked += PyUnicode_GET_LENGTH(s);
2246             Py_DECREF(s);
2247         }
2248         /* There may be some remaining bytes we'll have to prepend to the
2249            next chunk of data */
2250         if (endpos < line_len) {
2251             remaining = PyUnicode_Substring(line, endpos, line_len);
2252             if (remaining == NULL)
2253                 goto error;
2254         }
2255         Py_CLEAR(line);
2256         /* We have consumed the buffer */
2257         textiowrapper_set_decoded_chars(self, NULL);
2258     }
2259 
2260     if (line != NULL) {
2261         /* Our line ends in the current buffer */
2262         self->decoded_chars_used = endpos - offset_to_buffer;
2263         if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2264             PyObject *s = PyUnicode_Substring(line, start, endpos);
2265             Py_CLEAR(line);
2266             if (s == NULL)
2267                 goto error;
2268             line = s;
2269         }
2270     }
2271     if (remaining != NULL) {
2272         if (chunks == NULL) {
2273             chunks = PyList_New(0);
2274             if (chunks == NULL)
2275                 goto error;
2276         }
2277         if (PyList_Append(chunks, remaining) < 0)
2278             goto error;
2279         Py_CLEAR(remaining);
2280     }
2281     if (chunks != NULL) {
2282         if (line != NULL) {
2283             if (PyList_Append(chunks, line) < 0)
2284                 goto error;
2285             Py_DECREF(line);
2286         }
2287         line = PyUnicode_Join(_PyIO_empty_str, chunks);
2288         if (line == NULL)
2289             goto error;
2290         Py_CLEAR(chunks);
2291     }
2292     if (line == NULL) {
2293         Py_INCREF(_PyIO_empty_str);
2294         line = _PyIO_empty_str;
2295     }
2296 
2297     return line;
2298 
2299   error:
2300     Py_XDECREF(chunks);
2301     Py_XDECREF(remaining);
2302     Py_XDECREF(line);
2303     return NULL;
2304 }
2305 
2306 /*[clinic input]
2307 _io.TextIOWrapper.readline
2308     size: Py_ssize_t = -1
2309     /
2310 [clinic start generated code]*/
2311 
2312 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2313 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2314 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2315 {
2316     CHECK_ATTACHED(self);
2317     return _textiowrapper_readline(self, size);
2318 }
2319 
2320 /* Seek and Tell */
2321 
2322 typedef struct {
2323     Py_off_t start_pos;
2324     int dec_flags;
2325     int bytes_to_feed;
2326     int chars_to_skip;
2327     char need_eof;
2328 } cookie_type;
2329 
2330 /*
2331    To speed up cookie packing/unpacking, we store the fields in a temporary
2332    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2333    The following macros define at which offsets in the intermediary byte
2334    string the various CookieStruct fields will be stored.
2335  */
2336 
2337 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2338 
2339 #if PY_BIG_ENDIAN
2340 /* We want the least significant byte of start_pos to also be the least
2341    significant byte of the cookie, which means that in big-endian mode we
2342    must copy the fields in reverse order. */
2343 
2344 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2345 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2346 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2347 # define OFF_CHARS_TO_SKIP  (sizeof(char))
2348 # define OFF_NEED_EOF       0
2349 
2350 #else
2351 /* Little-endian mode: the least significant byte of start_pos will
2352    naturally end up the least significant byte of the cookie. */
2353 
2354 # define OFF_START_POS      0
2355 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2356 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2357 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2358 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2359 
2360 #endif
2361 
2362 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2363 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2364 {
2365     unsigned char buffer[COOKIE_BUF_LEN];
2366     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2367     if (cookieLong == NULL)
2368         return -1;
2369 
2370     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2371                             PY_LITTLE_ENDIAN, 0) < 0) {
2372         Py_DECREF(cookieLong);
2373         return -1;
2374     }
2375     Py_DECREF(cookieLong);
2376 
2377     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2378     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2379     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2380     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2381     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2382 
2383     return 0;
2384 }
2385 
2386 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2387 textiowrapper_build_cookie(cookie_type *cookie)
2388 {
2389     unsigned char buffer[COOKIE_BUF_LEN];
2390 
2391     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2392     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2393     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2394     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2395     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2396 
2397     return _PyLong_FromByteArray(buffer, sizeof(buffer),
2398                                  PY_LITTLE_ENDIAN, 0);
2399 }
2400 
2401 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2402 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2403 {
2404     PyObject *res;
2405     /* When seeking to the start of the stream, we call decoder.reset()
2406        rather than decoder.getstate().
2407        This is for a few decoders such as utf-16 for which the state value
2408        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2409        utf-16, that we are expecting a BOM).
2410     */
2411     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2412         res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
2413     else
2414         res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2415                                      "((yi))", "", cookie->dec_flags);
2416     if (res == NULL)
2417         return -1;
2418     Py_DECREF(res);
2419     return 0;
2420 }
2421 
2422 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2423 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2424 {
2425     PyObject *res;
2426     if (start_of_stream) {
2427         res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
2428         self->encoding_start_of_stream = 1;
2429     }
2430     else {
2431         res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
2432                                         _PyLong_Zero);
2433         self->encoding_start_of_stream = 0;
2434     }
2435     if (res == NULL)
2436         return -1;
2437     Py_DECREF(res);
2438     return 0;
2439 }
2440 
2441 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2442 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2443 {
2444     /* Same as _textiowrapper_decoder_setstate() above. */
2445     return _textiowrapper_encoder_reset(
2446         self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2447 }
2448 
2449 /*[clinic input]
2450 _io.TextIOWrapper.seek
2451     cookie as cookieObj: object
2452     whence: int = 0
2453     /
2454 [clinic start generated code]*/
2455 
2456 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2457 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2458 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2459 {
2460     PyObject *posobj;
2461     cookie_type cookie;
2462     PyObject *res;
2463     int cmp;
2464     PyObject *snapshot;
2465 
2466     CHECK_ATTACHED(self);
2467     CHECK_CLOSED(self);
2468 
2469     Py_INCREF(cookieObj);
2470 
2471     if (!self->seekable) {
2472         _unsupported("underlying stream is not seekable");
2473         goto fail;
2474     }
2475 
2476     switch (whence) {
2477     case SEEK_CUR:
2478         /* seek relative to current position */
2479         cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2480         if (cmp < 0)
2481             goto fail;
2482 
2483         if (cmp == 0) {
2484             _unsupported("can't do nonzero cur-relative seeks");
2485             goto fail;
2486         }
2487 
2488         /* Seeking to the current position should attempt to
2489          * sync the underlying buffer with the current position.
2490          */
2491         Py_DECREF(cookieObj);
2492         cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
2493         if (cookieObj == NULL)
2494             goto fail;
2495         break;
2496 
2497     case SEEK_END:
2498         /* seek relative to end of file */
2499         cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2500         if (cmp < 0)
2501             goto fail;
2502 
2503         if (cmp == 0) {
2504             _unsupported("can't do nonzero end-relative seeks");
2505             goto fail;
2506         }
2507 
2508         res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
2509         if (res == NULL)
2510             goto fail;
2511         Py_DECREF(res);
2512 
2513         textiowrapper_set_decoded_chars(self, NULL);
2514         Py_CLEAR(self->snapshot);
2515         if (self->decoder) {
2516             res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
2517             if (res == NULL)
2518                 goto fail;
2519             Py_DECREF(res);
2520         }
2521 
2522         res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2523         Py_CLEAR(cookieObj);
2524         if (res == NULL)
2525             goto fail;
2526         if (self->encoder) {
2527             /* If seek() == 0, we are at the start of stream, otherwise not */
2528             cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
2529             if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2530                 Py_DECREF(res);
2531                 goto fail;
2532             }
2533         }
2534         return res;
2535 
2536     case SEEK_SET:
2537         break;
2538 
2539     default:
2540         PyErr_Format(PyExc_ValueError,
2541                      "invalid whence (%d, should be %d, %d or %d)", whence,
2542                      SEEK_SET, SEEK_CUR, SEEK_END);
2543         goto fail;
2544     }
2545 
2546     cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
2547     if (cmp < 0)
2548         goto fail;
2549 
2550     if (cmp == 1) {
2551         PyErr_Format(PyExc_ValueError,
2552                      "negative seek position %R", cookieObj);
2553         goto fail;
2554     }
2555 
2556     res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
2557     if (res == NULL)
2558         goto fail;
2559     Py_DECREF(res);
2560 
2561     /* The strategy of seek() is to go back to the safe start point
2562      * and replay the effect of read(chars_to_skip) from there.
2563      */
2564     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2565         goto fail;
2566 
2567     /* Seek back to the safe start point. */
2568     posobj = PyLong_FromOff_t(cookie.start_pos);
2569     if (posobj == NULL)
2570         goto fail;
2571     res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
2572     Py_DECREF(posobj);
2573     if (res == NULL)
2574         goto fail;
2575     Py_DECREF(res);
2576 
2577     textiowrapper_set_decoded_chars(self, NULL);
2578     Py_CLEAR(self->snapshot);
2579 
2580     /* Restore the decoder to its state from the safe start point. */
2581     if (self->decoder) {
2582         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2583             goto fail;
2584     }
2585 
2586     if (cookie.chars_to_skip) {
2587         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2588         PyObject *input_chunk = _PyObject_CallMethodId(
2589             self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2590         PyObject *decoded;
2591 
2592         if (input_chunk == NULL)
2593             goto fail;
2594 
2595         if (!PyBytes_Check(input_chunk)) {
2596             PyErr_Format(PyExc_TypeError,
2597                          "underlying read() should have returned a bytes "
2598                          "object, not '%.200s'",
2599                          Py_TYPE(input_chunk)->tp_name);
2600             Py_DECREF(input_chunk);
2601             goto fail;
2602         }
2603 
2604         snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2605         if (snapshot == NULL) {
2606             goto fail;
2607         }
2608         Py_XSETREF(self->snapshot, snapshot);
2609 
2610         decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
2611             input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2612 
2613         if (check_decoded(decoded) < 0)
2614             goto fail;
2615 
2616         textiowrapper_set_decoded_chars(self, decoded);
2617 
2618         /* Skip chars_to_skip of the decoded characters. */
2619         if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2620             PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2621             goto fail;
2622         }
2623         self->decoded_chars_used = cookie.chars_to_skip;
2624     }
2625     else {
2626         snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2627         if (snapshot == NULL)
2628             goto fail;
2629         Py_XSETREF(self->snapshot, snapshot);
2630     }
2631 
2632     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2633     if (self->encoder) {
2634         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2635             goto fail;
2636     }
2637     return cookieObj;
2638   fail:
2639     Py_XDECREF(cookieObj);
2640     return NULL;
2641 
2642 }
2643 
2644 /*[clinic input]
2645 _io.TextIOWrapper.tell
2646 [clinic start generated code]*/
2647 
2648 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2649 _io_TextIOWrapper_tell_impl(textio *self)
2650 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2651 {
2652     PyObject *res;
2653     PyObject *posobj = NULL;
2654     cookie_type cookie = {0,0,0,0,0};
2655     PyObject *next_input;
2656     Py_ssize_t chars_to_skip, chars_decoded;
2657     Py_ssize_t skip_bytes, skip_back;
2658     PyObject *saved_state = NULL;
2659     const char *input, *input_end;
2660     Py_ssize_t dec_buffer_len;
2661     int dec_flags;
2662 
2663     CHECK_ATTACHED(self);
2664     CHECK_CLOSED(self);
2665 
2666     if (!self->seekable) {
2667         _unsupported("underlying stream is not seekable");
2668         goto fail;
2669     }
2670     if (!self->telling) {
2671         PyErr_SetString(PyExc_OSError,
2672                         "telling position disabled by next() call");
2673         goto fail;
2674     }
2675 
2676     if (_textiowrapper_writeflush(self) < 0)
2677         return NULL;
2678     res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
2679     if (res == NULL)
2680         goto fail;
2681     Py_DECREF(res);
2682 
2683     posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
2684     if (posobj == NULL)
2685         goto fail;
2686 
2687     if (self->decoder == NULL || self->snapshot == NULL) {
2688         assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2689         return posobj;
2690     }
2691 
2692 #if defined(HAVE_LARGEFILE_SUPPORT)
2693     cookie.start_pos = PyLong_AsLongLong(posobj);
2694 #else
2695     cookie.start_pos = PyLong_AsLong(posobj);
2696 #endif
2697     Py_DECREF(posobj);
2698     if (PyErr_Occurred())
2699         goto fail;
2700 
2701     /* Skip backward to the snapshot point (see _read_chunk). */
2702     assert(PyTuple_Check(self->snapshot));
2703     if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2704         goto fail;
2705 
2706     assert (PyBytes_Check(next_input));
2707 
2708     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2709 
2710     /* How many decoded characters have been used up since the snapshot? */
2711     if (self->decoded_chars_used == 0)  {
2712         /* We haven't moved from the snapshot point. */
2713         return textiowrapper_build_cookie(&cookie);
2714     }
2715 
2716     chars_to_skip = self->decoded_chars_used;
2717 
2718     /* Decoder state will be restored at the end */
2719     saved_state = PyObject_CallMethodNoArgs(self->decoder,
2720                                              _PyIO_str_getstate);
2721     if (saved_state == NULL)
2722         goto fail;
2723 
2724 #define DECODER_GETSTATE() do { \
2725         PyObject *dec_buffer; \
2726         PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2727             _PyIO_str_getstate); \
2728         if (_state == NULL) \
2729             goto fail; \
2730         if (!PyTuple_Check(_state)) { \
2731             PyErr_SetString(PyExc_TypeError, \
2732                             "illegal decoder state"); \
2733             Py_DECREF(_state); \
2734             goto fail; \
2735         } \
2736         if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2737                               &dec_buffer, &dec_flags)) \
2738         { \
2739             Py_DECREF(_state); \
2740             goto fail; \
2741         } \
2742         if (!PyBytes_Check(dec_buffer)) { \
2743             PyErr_Format(PyExc_TypeError, \
2744                          "illegal decoder state: the first item should be a " \
2745                          "bytes object, not '%.200s'", \
2746                          Py_TYPE(dec_buffer)->tp_name); \
2747             Py_DECREF(_state); \
2748             goto fail; \
2749         } \
2750         dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2751         Py_DECREF(_state); \
2752     } while (0)
2753 
2754 #define DECODER_DECODE(start, len, res) do { \
2755         PyObject *_decoded = _PyObject_CallMethodId( \
2756             self->decoder, &PyId_decode, "y#", start, len); \
2757         if (check_decoded(_decoded) < 0) \
2758             goto fail; \
2759         res = PyUnicode_GET_LENGTH(_decoded); \
2760         Py_DECREF(_decoded); \
2761     } while (0)
2762 
2763     /* Fast search for an acceptable start point, close to our
2764        current pos */
2765     skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2766     skip_back = 1;
2767     assert(skip_back <= PyBytes_GET_SIZE(next_input));
2768     input = PyBytes_AS_STRING(next_input);
2769     while (skip_bytes > 0) {
2770         /* Decode up to temptative start point */
2771         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2772             goto fail;
2773         DECODER_DECODE(input, skip_bytes, chars_decoded);
2774         if (chars_decoded <= chars_to_skip) {
2775             DECODER_GETSTATE();
2776             if (dec_buffer_len == 0) {
2777                 /* Before pos and no bytes buffered in decoder => OK */
2778                 cookie.dec_flags = dec_flags;
2779                 chars_to_skip -= chars_decoded;
2780                 break;
2781             }
2782             /* Skip back by buffered amount and reset heuristic */
2783             skip_bytes -= dec_buffer_len;
2784             skip_back = 1;
2785         }
2786         else {
2787             /* We're too far ahead, skip back a bit */
2788             skip_bytes -= skip_back;
2789             skip_back *= 2;
2790         }
2791     }
2792     if (skip_bytes <= 0) {
2793         skip_bytes = 0;
2794         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2795             goto fail;
2796     }
2797 
2798     /* Note our initial start point. */
2799     cookie.start_pos += skip_bytes;
2800     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2801     if (chars_to_skip == 0)
2802         goto finally;
2803 
2804     /* We should be close to the desired position.  Now feed the decoder one
2805      * byte at a time until we reach the `chars_to_skip` target.
2806      * As we go, note the nearest "safe start point" before the current
2807      * location (a point where the decoder has nothing buffered, so seek()
2808      * can safely start from there and advance to this location).
2809      */
2810     chars_decoded = 0;
2811     input = PyBytes_AS_STRING(next_input);
2812     input_end = input + PyBytes_GET_SIZE(next_input);
2813     input += skip_bytes;
2814     while (input < input_end) {
2815         Py_ssize_t n;
2816 
2817         DECODER_DECODE(input, (Py_ssize_t)1, n);
2818         /* We got n chars for 1 byte */
2819         chars_decoded += n;
2820         cookie.bytes_to_feed += 1;
2821         DECODER_GETSTATE();
2822 
2823         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2824             /* Decoder buffer is empty, so this is a safe start point. */
2825             cookie.start_pos += cookie.bytes_to_feed;
2826             chars_to_skip -= chars_decoded;
2827             cookie.dec_flags = dec_flags;
2828             cookie.bytes_to_feed = 0;
2829             chars_decoded = 0;
2830         }
2831         if (chars_decoded >= chars_to_skip)
2832             break;
2833         input++;
2834     }
2835     if (input == input_end) {
2836         /* We didn't get enough decoded data; signal EOF to get more. */
2837         PyObject *decoded = _PyObject_CallMethodId(
2838             self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
2839         if (check_decoded(decoded) < 0)
2840             goto fail;
2841         chars_decoded += PyUnicode_GET_LENGTH(decoded);
2842         Py_DECREF(decoded);
2843         cookie.need_eof = 1;
2844 
2845         if (chars_decoded < chars_to_skip) {
2846             PyErr_SetString(PyExc_OSError,
2847                             "can't reconstruct logical file position");
2848             goto fail;
2849         }
2850     }
2851 
2852 finally:
2853     res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
2854     Py_DECREF(saved_state);
2855     if (res == NULL)
2856         return NULL;
2857     Py_DECREF(res);
2858 
2859     /* The returned cookie corresponds to the last safe start point. */
2860     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2861     return textiowrapper_build_cookie(&cookie);
2862 
2863 fail:
2864     if (saved_state) {
2865         PyObject *type, *value, *traceback;
2866         PyErr_Fetch(&type, &value, &traceback);
2867         res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
2868         _PyErr_ChainExceptions(type, value, traceback);
2869         Py_DECREF(saved_state);
2870         Py_XDECREF(res);
2871     }
2872     return NULL;
2873 }
2874 
2875 /*[clinic input]
2876 _io.TextIOWrapper.truncate
2877     pos: object = None
2878     /
2879 [clinic start generated code]*/
2880 
2881 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2882 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2883 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2884 {
2885     PyObject *res;
2886 
2887     CHECK_ATTACHED(self)
2888 
2889     res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
2890     if (res == NULL)
2891         return NULL;
2892     Py_DECREF(res);
2893 
2894     return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
2895 }
2896 
2897 static PyObject *
textiowrapper_repr(textio * self)2898 textiowrapper_repr(textio *self)
2899 {
2900     PyObject *nameobj, *modeobj, *res, *s;
2901     int status;
2902 
2903     CHECK_INITIALIZED(self);
2904 
2905     res = PyUnicode_FromString("<_io.TextIOWrapper");
2906     if (res == NULL)
2907         return NULL;
2908 
2909     status = Py_ReprEnter((PyObject *)self);
2910     if (status != 0) {
2911         if (status > 0) {
2912             PyErr_Format(PyExc_RuntimeError,
2913                          "reentrant call inside %s.__repr__",
2914                          Py_TYPE(self)->tp_name);
2915         }
2916         goto error;
2917     }
2918     if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2919         if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2920             goto error;
2921         }
2922         /* Ignore ValueError raised if the underlying stream was detached */
2923         PyErr_Clear();
2924     }
2925     if (nameobj != NULL) {
2926         s = PyUnicode_FromFormat(" name=%R", nameobj);
2927         Py_DECREF(nameobj);
2928         if (s == NULL)
2929             goto error;
2930         PyUnicode_AppendAndDel(&res, s);
2931         if (res == NULL)
2932             goto error;
2933     }
2934     if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2935         goto error;
2936     }
2937     if (modeobj != NULL) {
2938         s = PyUnicode_FromFormat(" mode=%R", modeobj);
2939         Py_DECREF(modeobj);
2940         if (s == NULL)
2941             goto error;
2942         PyUnicode_AppendAndDel(&res, s);
2943         if (res == NULL)
2944             goto error;
2945     }
2946     s = PyUnicode_FromFormat("%U encoding=%R>",
2947                              res, self->encoding);
2948     Py_DECREF(res);
2949     if (status == 0) {
2950         Py_ReprLeave((PyObject *)self);
2951     }
2952     return s;
2953 
2954   error:
2955     Py_XDECREF(res);
2956     if (status == 0) {
2957         Py_ReprLeave((PyObject *)self);
2958     }
2959     return NULL;
2960 }
2961 
2962 
2963 /* Inquiries */
2964 
2965 /*[clinic input]
2966 _io.TextIOWrapper.fileno
2967 [clinic start generated code]*/
2968 
2969 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2970 _io_TextIOWrapper_fileno_impl(textio *self)
2971 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2972 {
2973     CHECK_ATTACHED(self);
2974     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
2975 }
2976 
2977 /*[clinic input]
2978 _io.TextIOWrapper.seekable
2979 [clinic start generated code]*/
2980 
2981 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2982 _io_TextIOWrapper_seekable_impl(textio *self)
2983 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2984 {
2985     CHECK_ATTACHED(self);
2986     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
2987 }
2988 
2989 /*[clinic input]
2990 _io.TextIOWrapper.readable
2991 [clinic start generated code]*/
2992 
2993 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2994 _io_TextIOWrapper_readable_impl(textio *self)
2995 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2996 {
2997     CHECK_ATTACHED(self);
2998     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
2999 }
3000 
3001 /*[clinic input]
3002 _io.TextIOWrapper.writable
3003 [clinic start generated code]*/
3004 
3005 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)3006 _io_TextIOWrapper_writable_impl(textio *self)
3007 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
3008 {
3009     CHECK_ATTACHED(self);
3010     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
3011 }
3012 
3013 /*[clinic input]
3014 _io.TextIOWrapper.isatty
3015 [clinic start generated code]*/
3016 
3017 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)3018 _io_TextIOWrapper_isatty_impl(textio *self)
3019 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
3020 {
3021     CHECK_ATTACHED(self);
3022     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
3023 }
3024 
3025 /*[clinic input]
3026 _io.TextIOWrapper.flush
3027 [clinic start generated code]*/
3028 
3029 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)3030 _io_TextIOWrapper_flush_impl(textio *self)
3031 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
3032 {
3033     CHECK_ATTACHED(self);
3034     CHECK_CLOSED(self);
3035     self->telling = self->seekable;
3036     if (_textiowrapper_writeflush(self) < 0)
3037         return NULL;
3038     return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
3039 }
3040 
3041 /*[clinic input]
3042 _io.TextIOWrapper.close
3043 [clinic start generated code]*/
3044 
3045 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)3046 _io_TextIOWrapper_close_impl(textio *self)
3047 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
3048 {
3049     PyObject *res;
3050     int r;
3051     CHECK_ATTACHED(self);
3052 
3053     res = textiowrapper_closed_get(self, NULL);
3054     if (res == NULL)
3055         return NULL;
3056     r = PyObject_IsTrue(res);
3057     Py_DECREF(res);
3058     if (r < 0)
3059         return NULL;
3060 
3061     if (r > 0) {
3062         Py_RETURN_NONE; /* stream already closed */
3063     }
3064     else {
3065         PyObject *exc = NULL, *val, *tb;
3066         if (self->finalizing) {
3067             res = _PyObject_CallMethodIdOneArg(self->buffer,
3068                                               &PyId__dealloc_warn,
3069                                               (PyObject *)self);
3070             if (res)
3071                 Py_DECREF(res);
3072             else
3073                 PyErr_Clear();
3074         }
3075         res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
3076         if (res == NULL)
3077             PyErr_Fetch(&exc, &val, &tb);
3078         else
3079             Py_DECREF(res);
3080 
3081         res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
3082         if (exc != NULL) {
3083             _PyErr_ChainExceptions(exc, val, tb);
3084             Py_CLEAR(res);
3085         }
3086         return res;
3087     }
3088 }
3089 
3090 static PyObject *
textiowrapper_iternext(textio * self)3091 textiowrapper_iternext(textio *self)
3092 {
3093     PyObject *line;
3094 
3095     CHECK_ATTACHED(self);
3096 
3097     self->telling = 0;
3098     if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
3099         /* Skip method call overhead for speed */
3100         line = _textiowrapper_readline(self, -1);
3101     }
3102     else {
3103         line = PyObject_CallMethodNoArgs((PyObject *)self,
3104                                           _PyIO_str_readline);
3105         if (line && !PyUnicode_Check(line)) {
3106             PyErr_Format(PyExc_OSError,
3107                          "readline() should have returned a str object, "
3108                          "not '%.200s'", Py_TYPE(line)->tp_name);
3109             Py_DECREF(line);
3110             return NULL;
3111         }
3112     }
3113 
3114     if (line == NULL || PyUnicode_READY(line) == -1)
3115         return NULL;
3116 
3117     if (PyUnicode_GET_LENGTH(line) == 0) {
3118         /* Reached EOF or would have blocked */
3119         Py_DECREF(line);
3120         Py_CLEAR(self->snapshot);
3121         self->telling = self->seekable;
3122         return NULL;
3123     }
3124 
3125     return line;
3126 }
3127 
3128 static PyObject *
textiowrapper_name_get(textio * self,void * context)3129 textiowrapper_name_get(textio *self, void *context)
3130 {
3131     CHECK_ATTACHED(self);
3132     return _PyObject_GetAttrId(self->buffer, &PyId_name);
3133 }
3134 
3135 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3136 textiowrapper_closed_get(textio *self, void *context)
3137 {
3138     CHECK_ATTACHED(self);
3139     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3140 }
3141 
3142 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3143 textiowrapper_newlines_get(textio *self, void *context)
3144 {
3145     PyObject *res;
3146     CHECK_ATTACHED(self);
3147     if (self->decoder == NULL ||
3148         _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3149     {
3150         Py_RETURN_NONE;
3151     }
3152     return res;
3153 }
3154 
3155 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3156 textiowrapper_errors_get(textio *self, void *context)
3157 {
3158     CHECK_INITIALIZED(self);
3159     Py_INCREF(self->errors);
3160     return self->errors;
3161 }
3162 
3163 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3164 textiowrapper_chunk_size_get(textio *self, void *context)
3165 {
3166     CHECK_ATTACHED(self);
3167     return PyLong_FromSsize_t(self->chunk_size);
3168 }
3169 
3170 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3171 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3172 {
3173     Py_ssize_t n;
3174     CHECK_ATTACHED_INT(self);
3175     if (arg == NULL) {
3176         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3177         return -1;
3178     }
3179     n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3180     if (n == -1 && PyErr_Occurred())
3181         return -1;
3182     if (n <= 0) {
3183         PyErr_SetString(PyExc_ValueError,
3184                         "a strictly positive integer is required");
3185         return -1;
3186     }
3187     self->chunk_size = n;
3188     return 0;
3189 }
3190 
3191 #include "clinic/textio.c.h"
3192 
3193 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3194     _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3195     _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3196     _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3197     _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3198     {NULL}
3199 };
3200 
3201 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3202     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3203     {NULL}
3204 };
3205 
3206 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3207     PyVarObject_HEAD_INIT(NULL, 0)
3208     "_io.IncrementalNewlineDecoder", /*tp_name*/
3209     sizeof(nldecoder_object), /*tp_basicsize*/
3210     0,                          /*tp_itemsize*/
3211     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3212     0,                          /*tp_vectorcall_offset*/
3213     0,                          /*tp_getattr*/
3214     0,                          /*tp_setattr*/
3215     0,                          /*tp_as_async*/
3216     0,                          /*tp_repr*/
3217     0,                          /*tp_as_number*/
3218     0,                          /*tp_as_sequence*/
3219     0,                          /*tp_as_mapping*/
3220     0,                          /*tp_hash */
3221     0,                          /*tp_call*/
3222     0,                          /*tp_str*/
3223     0,                          /*tp_getattro*/
3224     0,                          /*tp_setattro*/
3225     0,                          /*tp_as_buffer*/
3226     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
3227     _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3228     0,                          /* tp_traverse */
3229     0,                          /* tp_clear */
3230     0,                          /* tp_richcompare */
3231     0,                          /*tp_weaklistoffset*/
3232     0,                          /* tp_iter */
3233     0,                          /* tp_iternext */
3234     incrementalnewlinedecoder_methods, /* tp_methods */
3235     0,                          /* tp_members */
3236     incrementalnewlinedecoder_getset, /* tp_getset */
3237     0,                          /* tp_base */
3238     0,                          /* tp_dict */
3239     0,                          /* tp_descr_get */
3240     0,                          /* tp_descr_set */
3241     0,                          /* tp_dictoffset */
3242     _io_IncrementalNewlineDecoder___init__, /* tp_init */
3243     0,                          /* tp_alloc */
3244     PyType_GenericNew,          /* tp_new */
3245 };
3246 
3247 
3248 static PyMethodDef textiowrapper_methods[] = {
3249     _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3250     _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3251     _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3252     _IO_TEXTIOWRAPPER_READ_METHODDEF
3253     _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3254     _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3255     _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3256 
3257     _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3258     _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3259     _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3260     _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3261     _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3262 
3263     _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3264     _IO_TEXTIOWRAPPER_TELL_METHODDEF
3265     _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3266     {NULL, NULL}
3267 };
3268 
3269 static PyMemberDef textiowrapper_members[] = {
3270     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3271     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3272     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3273     {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3274     {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3275     {NULL}
3276 };
3277 
3278 static PyGetSetDef textiowrapper_getset[] = {
3279     {"name", (getter)textiowrapper_name_get, NULL, NULL},
3280     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3281 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3282 */
3283     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3284     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3285     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3286                     (setter)textiowrapper_chunk_size_set, NULL},
3287     {NULL}
3288 };
3289 
3290 PyTypeObject PyTextIOWrapper_Type = {
3291     PyVarObject_HEAD_INIT(NULL, 0)
3292     "_io.TextIOWrapper",        /*tp_name*/
3293     sizeof(textio), /*tp_basicsize*/
3294     0,                          /*tp_itemsize*/
3295     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3296     0,                          /*tp_vectorcall_offset*/
3297     0,                          /*tp_getattr*/
3298     0,                          /*tps_etattr*/
3299     0,                          /*tp_as_async*/
3300     (reprfunc)textiowrapper_repr,/*tp_repr*/
3301     0,                          /*tp_as_number*/
3302     0,                          /*tp_as_sequence*/
3303     0,                          /*tp_as_mapping*/
3304     0,                          /*tp_hash */
3305     0,                          /*tp_call*/
3306     0,                          /*tp_str*/
3307     0,                          /*tp_getattro*/
3308     0,                          /*tp_setattro*/
3309     0,                          /*tp_as_buffer*/
3310     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3311         | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
3312     _io_TextIOWrapper___init____doc__, /* tp_doc */
3313     (traverseproc)textiowrapper_traverse, /* tp_traverse */
3314     (inquiry)textiowrapper_clear, /* tp_clear */
3315     0,                          /* tp_richcompare */
3316     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3317     0,                          /* tp_iter */
3318     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3319     textiowrapper_methods,      /* tp_methods */
3320     textiowrapper_members,      /* tp_members */
3321     textiowrapper_getset,       /* tp_getset */
3322     0,                          /* tp_base */
3323     0,                          /* tp_dict */
3324     0,                          /* tp_descr_get */
3325     0,                          /* tp_descr_set */
3326     offsetof(textio, dict), /*tp_dictoffset*/
3327     _io_TextIOWrapper___init__, /* tp_init */
3328     0,                          /* tp_alloc */
3329     PyType_GenericNew,          /* tp_new */
3330     0,                          /* tp_free */
3331     0,                          /* tp_is_gc */
3332     0,                          /* tp_bases */
3333     0,                          /* tp_mro */
3334     0,                          /* tp_cache */
3335     0,                          /* tp_subclasses */
3336     0,                          /* tp_weaklist */
3337     0,                          /* tp_del */
3338     0,                          /* tp_version_tag */
3339     0,                          /* tp_finalize */
3340 };
3341