1 /*
2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 
4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 
6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8 
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "structmember.h"
12 #include "_iomodule.h"
13 
14 /* TextIOBase */
15 
16 PyDoc_STRVAR(textiobase_doc,
17     "Base class for text I/O.\n"
18     "\n"
19     "This class provides a character and line based interface to stream\n"
20     "I/O. There is no readinto method because Python's character strings\n"
21     "are immutable. There is no public constructor.\n"
22     );
23 
24 static PyObject *
_unsupported(const char * message)25 _unsupported(const char *message)
26 {
27     PyErr_SetString(_PyIO_unsupported_operation, message);
28     return NULL;
29 }
30 
31 PyDoc_STRVAR(textiobase_detach_doc,
32     "Separate the underlying buffer from the TextIOBase and return it.\n"
33     "\n"
34     "After the underlying buffer has been detached, the TextIO is in an\n"
35     "unusable state.\n"
36     );
37 
38 static PyObject *
textiobase_detach(PyObject * self)39 textiobase_detach(PyObject *self)
40 {
41     return _unsupported("detach");
42 }
43 
44 PyDoc_STRVAR(textiobase_read_doc,
45     "Read at most n characters from stream.\n"
46     "\n"
47     "Read from underlying buffer until we have n characters or we hit EOF.\n"
48     "If n is negative or omitted, read until EOF.\n"
49     );
50 
51 static PyObject *
textiobase_read(PyObject * self,PyObject * args)52 textiobase_read(PyObject *self, PyObject *args)
53 {
54     return _unsupported("read");
55 }
56 
57 PyDoc_STRVAR(textiobase_readline_doc,
58     "Read until newline or EOF.\n"
59     "\n"
60     "Returns an empty string if EOF is hit immediately.\n"
61     );
62 
63 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)64 textiobase_readline(PyObject *self, PyObject *args)
65 {
66     return _unsupported("readline");
67 }
68 
69 PyDoc_STRVAR(textiobase_write_doc,
70     "Write string to stream.\n"
71     "Returns the number of characters written (which is always equal to\n"
72     "the length of the string).\n"
73     );
74 
75 static PyObject *
textiobase_write(PyObject * self,PyObject * args)76 textiobase_write(PyObject *self, PyObject *args)
77 {
78     return _unsupported("write");
79 }
80 
81 PyDoc_STRVAR(textiobase_encoding_doc,
82     "Encoding of the text stream.\n"
83     "\n"
84     "Subclasses should override.\n"
85     );
86 
87 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)88 textiobase_encoding_get(PyObject *self, void *context)
89 {
90     Py_RETURN_NONE;
91 }
92 
93 PyDoc_STRVAR(textiobase_newlines_doc,
94     "Line endings translated so far.\n"
95     "\n"
96     "Only line endings translated during reading are considered.\n"
97     "\n"
98     "Subclasses should override.\n"
99     );
100 
101 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)102 textiobase_newlines_get(PyObject *self, void *context)
103 {
104     Py_RETURN_NONE;
105 }
106 
107 PyDoc_STRVAR(textiobase_errors_doc,
108     "The error setting of the decoder or encoder.\n"
109     "\n"
110     "Subclasses should override.\n"
111     );
112 
113 static PyObject *
textiobase_errors_get(PyObject * self,void * context)114 textiobase_errors_get(PyObject *self, void *context)
115 {
116     Py_RETURN_NONE;
117 }
118 
119 
120 static PyMethodDef textiobase_methods[] = {
121     {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125     {NULL, NULL}
126 };
127 
128 static PyGetSetDef textiobase_getset[] = {
129     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132     {NULL}
133 };
134 
135 PyTypeObject PyTextIOBase_Type = {
136     PyVarObject_HEAD_INIT(NULL, 0)
137     "_io._TextIOBase",          /*tp_name*/
138     0,                          /*tp_basicsize*/
139     0,                          /*tp_itemsize*/
140     0,                          /*tp_dealloc*/
141     0,                          /*tp_print*/
142     0,                          /*tp_getattr*/
143     0,                          /*tp_setattr*/
144     0,                          /*tp_compare */
145     0,                          /*tp_repr*/
146     0,                          /*tp_as_number*/
147     0,                          /*tp_as_sequence*/
148     0,                          /*tp_as_mapping*/
149     0,                          /*tp_hash */
150     0,                          /*tp_call*/
151     0,                          /*tp_str*/
152     0,                          /*tp_getattro*/
153     0,                          /*tp_setattro*/
154     0,                          /*tp_as_buffer*/
155     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
156     textiobase_doc,             /* tp_doc */
157     0,                          /* tp_traverse */
158     0,                          /* tp_clear */
159     0,                          /* tp_richcompare */
160     0,                          /* tp_weaklistoffset */
161     0,                          /* tp_iter */
162     0,                          /* tp_iternext */
163     textiobase_methods,         /* tp_methods */
164     0,                          /* tp_members */
165     textiobase_getset,          /* tp_getset */
166     &PyIOBase_Type,             /* tp_base */
167     0,                          /* tp_dict */
168     0,                          /* tp_descr_get */
169     0,                          /* tp_descr_set */
170     0,                          /* tp_dictoffset */
171     0,                          /* tp_init */
172     0,                          /* tp_alloc */
173     0,                          /* tp_new */
174 };
175 
176 
177 /* IncrementalNewlineDecoder */
178 
179 PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180     "Codec used when reading a file in universal newlines mode.  It wraps\n"
181     "another incremental decoder, translating \\r\\n and \\r into \\n.  It also\n"
182     "records the types of newlines encountered.  When used with\n"
183     "translate=False, it ensures that the newline sequence is returned in\n"
184     "one piece. When used with decoder=None, it expects unicode strings as\n"
185     "decode input and translates newlines without first invoking an external\n"
186     "decoder.\n"
187     );
188 
189 typedef struct {
190     PyObject_HEAD
191     PyObject *decoder;
192     PyObject *errors;
193     signed int pendingcr: 1;
194     signed int translate: 1;
195     unsigned int seennl: 3;
196 } nldecoder_object;
197 
198 static int
incrementalnewlinedecoder_init(nldecoder_object * self,PyObject * args,PyObject * kwds)199 incrementalnewlinedecoder_init(nldecoder_object *self,
200                                PyObject *args, PyObject *kwds)
201 {
202     PyObject *decoder;
203     int translate;
204     PyObject *errors = NULL;
205     char *kwlist[] = {"decoder", "translate", "errors", NULL};
206 
207     if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208                                      kwlist, &decoder, &translate, &errors))
209         return -1;
210 
211     self->decoder = decoder;
212     Py_INCREF(decoder);
213 
214     if (errors == NULL) {
215         self->errors = PyUnicode_FromString("strict");
216         if (self->errors == NULL)
217             return -1;
218     }
219     else {
220         Py_INCREF(errors);
221         self->errors = errors;
222     }
223 
224     self->translate = translate ? 1 : 0;
225     self->seennl = 0;
226     self->pendingcr = 0;
227 
228     return 0;
229 }
230 
231 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)232 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233 {
234     Py_CLEAR(self->decoder);
235     Py_CLEAR(self->errors);
236     Py_TYPE(self)->tp_free((PyObject *)self);
237 }
238 
239 static int
check_decoded(PyObject * decoded)240 check_decoded(PyObject *decoded)
241 {
242     if (decoded == NULL)
243         return -1;
244     if (!PyUnicode_Check(decoded)) {
245         PyErr_Format(PyExc_TypeError,
246                      "decoder should return a string result, not '%.200s'",
247                      Py_TYPE(decoded)->tp_name);
248         Py_DECREF(decoded);
249         return -1;
250     }
251     return 0;
252 }
253 
254 #define SEEN_CR   1
255 #define SEEN_LF   2
256 #define SEEN_CRLF 4
257 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
258 
259 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * _self,PyObject * input,int final)260 _PyIncrementalNewlineDecoder_decode(PyObject *_self,
261                                     PyObject *input, int final)
262 {
263     PyObject *output;
264     Py_ssize_t output_len;
265     nldecoder_object *self = (nldecoder_object *) _self;
266 
267     if (self->decoder == NULL) {
268         PyErr_SetString(PyExc_ValueError,
269                         "IncrementalNewlineDecoder.__init__ not called");
270         return NULL;
271     }
272 
273     /* decode input (with the eventual \r from a previous pass) */
274     if (self->decoder != Py_None) {
275         output = PyObject_CallMethodObjArgs(self->decoder,
276             _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
277     }
278     else {
279         output = input;
280         Py_INCREF(output);
281     }
282 
283     if (check_decoded(output) < 0)
284         return NULL;
285 
286     output_len = PyUnicode_GET_SIZE(output);
287     if (self->pendingcr && (final || output_len > 0)) {
288         Py_UNICODE *out;
289         PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
290         if (modified == NULL)
291             goto error;
292         out = PyUnicode_AS_UNICODE(modified);
293         out[0] = '\r';
294         memcpy(out + 1, PyUnicode_AS_UNICODE(output),
295                output_len * sizeof(Py_UNICODE));
296         Py_DECREF(output);
297         output = modified;
298         self->pendingcr = 0;
299         output_len++;
300     }
301 
302     /* retain last \r even when not translating data:
303      * then readline() is sure to get \r\n in one pass
304      */
305     if (!final) {
306         if (output_len > 0
307             && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
308 
309             if (Py_REFCNT(output) == 1) {
310                 if (PyUnicode_Resize(&output, output_len - 1) < 0)
311                     goto error;
312             }
313             else {
314                 PyObject *modified = PyUnicode_FromUnicode(
315                     PyUnicode_AS_UNICODE(output),
316                     output_len - 1);
317                 if (modified == NULL)
318                     goto error;
319                 Py_DECREF(output);
320                 output = modified;
321             }
322             self->pendingcr = 1;
323         }
324     }
325 
326     /* Record which newlines are read and do newline translation if desired,
327        all in one pass. */
328     {
329         Py_UNICODE *in_str;
330         Py_ssize_t len;
331         int seennl = self->seennl;
332         int only_lf = 0;
333 
334         in_str = PyUnicode_AS_UNICODE(output);
335         len = PyUnicode_GET_SIZE(output);
336 
337         if (len == 0)
338             return output;
339 
340         /* If, up to now, newlines are consistently \n, do a quick check
341            for the \r *byte* with the libc's optimized memchr.
342            */
343         if (seennl == SEEN_LF || seennl == 0) {
344             only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
345         }
346 
347         if (only_lf) {
348             /* If not already seen, quick scan for a possible "\n" character.
349                (there's nothing else to be done, even when in translation mode)
350             */
351             if (seennl == 0 &&
352                 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
353                 Py_UNICODE *s, *end;
354                 s = in_str;
355                 end = in_str + len;
356                 for (;;) {
357                     Py_UNICODE c;
358                     /* Fast loop for non-control characters */
359                     while (*s > '\n')
360                         s++;
361                     c = *s++;
362                     if (c == '\n') {
363                         seennl |= SEEN_LF;
364                         break;
365                     }
366                     if (s > end)
367                         break;
368                 }
369             }
370             /* Finished: we have scanned for newlines, and none of them
371                need translating */
372         }
373         else if (!self->translate) {
374             Py_UNICODE *s, *end;
375             /* We have already seen all newline types, no need to scan again */
376             if (seennl == SEEN_ALL)
377                 goto endscan;
378             s = in_str;
379             end = in_str + len;
380             for (;;) {
381                 Py_UNICODE c;
382                 /* Fast loop for non-control characters */
383                 while (*s > '\r')
384                     s++;
385                 c = *s++;
386                 if (c == '\n')
387                     seennl |= SEEN_LF;
388                 else if (c == '\r') {
389                     if (*s == '\n') {
390                         seennl |= SEEN_CRLF;
391                         s++;
392                     }
393                     else
394                         seennl |= SEEN_CR;
395                 }
396                 if (s > end)
397                     break;
398                 if (seennl == SEEN_ALL)
399                     break;
400             }
401         endscan:
402             ;
403         }
404         else {
405             PyObject *translated = NULL;
406             Py_UNICODE *out_str;
407             Py_UNICODE *in, *out, *end;
408             if (Py_REFCNT(output) != 1) {
409                 /* We could try to optimize this so that we only do a copy
410                    when there is something to translate. On the other hand,
411                    most decoders should only output non-shared strings, i.e.
412                    translation is done in place. */
413                 translated = PyUnicode_FromUnicode(NULL, len);
414                 if (translated == NULL)
415                     goto error;
416                 assert(Py_REFCNT(translated) == 1);
417                 memcpy(PyUnicode_AS_UNICODE(translated),
418                        PyUnicode_AS_UNICODE(output),
419                        len * sizeof(Py_UNICODE));
420             }
421             else {
422                 translated = output;
423             }
424             out_str = PyUnicode_AS_UNICODE(translated);
425             in = in_str;
426             out = out_str;
427             end = in_str + len;
428             for (;;) {
429                 Py_UNICODE c;
430                 /* Fast loop for non-control characters */
431                 while ((c = *in++) > '\r')
432                     *out++ = c;
433                 if (c == '\n') {
434                     *out++ = c;
435                     seennl |= SEEN_LF;
436                     continue;
437                 }
438                 if (c == '\r') {
439                     if (*in == '\n') {
440                         in++;
441                         seennl |= SEEN_CRLF;
442                     }
443                     else
444                         seennl |= SEEN_CR;
445                     *out++ = '\n';
446                     continue;
447                 }
448                 if (in > end)
449                     break;
450                 *out++ = c;
451             }
452             if (translated != output) {
453                 Py_DECREF(output);
454                 output = translated;
455             }
456             if (out - out_str != len) {
457                 if (PyUnicode_Resize(&output, out - out_str) < 0)
458                     goto error;
459             }
460         }
461         self->seennl |= seennl;
462     }
463 
464     return output;
465 
466   error:
467     Py_DECREF(output);
468     return NULL;
469 }
470 
471 static PyObject *
incrementalnewlinedecoder_decode(nldecoder_object * self,PyObject * args,PyObject * kwds)472 incrementalnewlinedecoder_decode(nldecoder_object *self,
473                                  PyObject *args, PyObject *kwds)
474 {
475     char *kwlist[] = {"input", "final", NULL};
476     PyObject *input;
477     int final = 0;
478 
479     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
480                                      kwlist, &input, &final))
481         return NULL;
482     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
483 }
484 
485 static PyObject *
incrementalnewlinedecoder_getstate(nldecoder_object * self,PyObject * args)486 incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
487 {
488     PyObject *buffer;
489     unsigned PY_LONG_LONG flag;
490 
491     if (self->decoder != Py_None) {
492         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
493            _PyIO_str_getstate, NULL);
494         if (state == NULL)
495             return NULL;
496         if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
497             Py_DECREF(state);
498             return NULL;
499         }
500         Py_INCREF(buffer);
501         Py_DECREF(state);
502     }
503     else {
504         buffer = PyBytes_FromString("");
505         flag = 0;
506     }
507     flag <<= 1;
508     if (self->pendingcr)
509         flag |= 1;
510     return Py_BuildValue("NK", buffer, flag);
511 }
512 
513 static PyObject *
incrementalnewlinedecoder_setstate(nldecoder_object * self,PyObject * state)514 incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
515 {
516     PyObject *buffer;
517     unsigned PY_LONG_LONG flag;
518 
519     if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
520         return NULL;
521 
522     self->pendingcr = (int) flag & 1;
523     flag >>= 1;
524 
525     if (self->decoder != Py_None)
526         return PyObject_CallMethod(self->decoder,
527                                    "setstate", "((OK))", buffer, flag);
528     else
529         Py_RETURN_NONE;
530 }
531 
532 static PyObject *
incrementalnewlinedecoder_reset(nldecoder_object * self,PyObject * args)533 incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
534 {
535     self->seennl = 0;
536     self->pendingcr = 0;
537     if (self->decoder != Py_None)
538         return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
539     else
540         Py_RETURN_NONE;
541 }
542 
543 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)544 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
545 {
546     switch (self->seennl) {
547     case SEEN_CR:
548         return PyUnicode_FromString("\r");
549     case SEEN_LF:
550         return PyUnicode_FromString("\n");
551     case SEEN_CRLF:
552         return PyUnicode_FromString("\r\n");
553     case SEEN_CR | SEEN_LF:
554         return Py_BuildValue("ss", "\r", "\n");
555     case SEEN_CR | SEEN_CRLF:
556         return Py_BuildValue("ss", "\r", "\r\n");
557     case SEEN_LF | SEEN_CRLF:
558         return Py_BuildValue("ss", "\n", "\r\n");
559     case SEEN_CR | SEEN_LF | SEEN_CRLF:
560         return Py_BuildValue("sss", "\r", "\n", "\r\n");
561     default:
562         Py_RETURN_NONE;
563    }
564 
565 }
566 
567 
568 static PyMethodDef incrementalnewlinedecoder_methods[] = {
569     {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
570     {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
571     {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
572     {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
573     {NULL}
574 };
575 
576 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
577     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
578     {NULL}
579 };
580 
581 PyTypeObject PyIncrementalNewlineDecoder_Type = {
582     PyVarObject_HEAD_INIT(NULL, 0)
583     "_io.IncrementalNewlineDecoder", /*tp_name*/
584     sizeof(nldecoder_object), /*tp_basicsize*/
585     0,                          /*tp_itemsize*/
586     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
587     0,                          /*tp_print*/
588     0,                          /*tp_getattr*/
589     0,                          /*tp_setattr*/
590     0,                          /*tp_compare */
591     0,                          /*tp_repr*/
592     0,                          /*tp_as_number*/
593     0,                          /*tp_as_sequence*/
594     0,                          /*tp_as_mapping*/
595     0,                          /*tp_hash */
596     0,                          /*tp_call*/
597     0,                          /*tp_str*/
598     0,                          /*tp_getattro*/
599     0,                          /*tp_setattro*/
600     0,                          /*tp_as_buffer*/
601     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
602     incrementalnewlinedecoder_doc,          /* tp_doc */
603     0,                          /* tp_traverse */
604     0,                          /* tp_clear */
605     0,                          /* tp_richcompare */
606     0,                          /*tp_weaklistoffset*/
607     0,                          /* tp_iter */
608     0,                          /* tp_iternext */
609     incrementalnewlinedecoder_methods, /* tp_methods */
610     0,                          /* tp_members */
611     incrementalnewlinedecoder_getset, /* tp_getset */
612     0,                          /* tp_base */
613     0,                          /* tp_dict */
614     0,                          /* tp_descr_get */
615     0,                          /* tp_descr_set */
616     0,                          /* tp_dictoffset */
617     (initproc)incrementalnewlinedecoder_init, /* tp_init */
618     0,                          /* tp_alloc */
619     PyType_GenericNew,          /* tp_new */
620 };
621 
622 
623 /* TextIOWrapper */
624 
625 PyDoc_STRVAR(textiowrapper_doc,
626     "Character and line based layer over a BufferedIOBase object, buffer.\n"
627     "\n"
628     "encoding gives the name of the encoding that the stream will be\n"
629     "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
630     "\n"
631     "errors determines the strictness of encoding and decoding (see the\n"
632     "codecs.register) and defaults to \"strict\".\n"
633     "\n"
634     "newline controls how line endings are handled. It can be None, '',\n"
635     "'\\n', '\\r', and '\\r\\n'.  It works as follows:\n"
636     "\n"
637     "* On input, if newline is None, universal newlines mode is\n"
638     "  enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
639     "  these are translated into '\\n' before being returned to the\n"
640     "  caller. If it is '', universal newline mode is enabled, but line\n"
641     "  endings are returned to the caller untranslated. If it has any of\n"
642     "  the other legal values, input lines are only terminated by the given\n"
643     "  string, and the line ending is returned to the caller untranslated.\n"
644     "\n"
645     "* On output, if newline is None, any '\\n' characters written are\n"
646     "  translated to the system default line separator, os.linesep. If\n"
647     "  newline is '', no translation takes place. If newline is any of the\n"
648     "  other legal values, any '\\n' characters written are translated to\n"
649     "  the given string.\n"
650     "\n"
651     "If line_buffering is True, a call to flush is implied when a call to\n"
652     "write contains a newline character."
653     );
654 
655 typedef PyObject *
656         (*encodefunc_t)(PyObject *, PyObject *);
657 
658 typedef struct
659 {
660     PyObject_HEAD
661     int ok; /* initialized? */
662     int detached;
663     Py_ssize_t chunk_size;
664     PyObject *buffer;
665     PyObject *encoding;
666     PyObject *encoder;
667     PyObject *decoder;
668     PyObject *readnl;
669     PyObject *errors;
670     const char *writenl; /* utf-8 encoded, NULL stands for \n */
671     char line_buffering;
672     char readuniversal;
673     char readtranslate;
674     char writetranslate;
675     char seekable;
676     char telling;
677     /* Specialized encoding func (see below) */
678     encodefunc_t encodefunc;
679     /* Whether or not it's the start of the stream */
680     char encoding_start_of_stream;
681 
682     /* Reads and writes are internally buffered in order to speed things up.
683        However, any read will first flush the write buffer if itsn't empty.
684 
685        Please also note that text to be written is first encoded before being
686        buffered. This is necessary so that encoding errors are immediately
687        reported to the caller, but it unfortunately means that the
688        IncrementalEncoder (whose encode() method is always written in Python)
689        becomes a bottleneck for small writes.
690     */
691     PyObject *decoded_chars;       /* buffer for text returned from decoder */
692     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
693     PyObject *pending_bytes;       /* list of bytes objects waiting to be
694                                       written, or NULL */
695     Py_ssize_t pending_bytes_count;
696     PyObject *snapshot;
697     /* snapshot is either None, or a tuple (dec_flags, next_input) where
698      * dec_flags is the second (integer) item of the decoder state and
699      * next_input is the chunk of input bytes that comes next after the
700      * snapshot point.  We use this to reconstruct decoder states in tell().
701      */
702 
703     /* Cache raw object if it's a FileIO object */
704     PyObject *raw;
705 
706     PyObject *weakreflist;
707     PyObject *dict;
708 } textio;
709 
710 static void
711 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
712 
713 /* A couple of specialized cases in order to bypass the slow incremental
714    encoding methods for the most popular encodings. */
715 
716 static PyObject *
ascii_encode(textio * self,PyObject * text)717 ascii_encode(textio *self, PyObject *text)
718 {
719     return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
720                                  PyUnicode_GET_SIZE(text),
721                                  PyBytes_AS_STRING(self->errors));
722 }
723 
724 static PyObject *
utf16be_encode(textio * self,PyObject * text)725 utf16be_encode(textio *self, PyObject *text)
726 {
727     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
728                                  PyUnicode_GET_SIZE(text),
729                                  PyBytes_AS_STRING(self->errors), 1);
730 }
731 
732 static PyObject *
utf16le_encode(textio * self,PyObject * text)733 utf16le_encode(textio *self, PyObject *text)
734 {
735     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
736                                  PyUnicode_GET_SIZE(text),
737                                  PyBytes_AS_STRING(self->errors), -1);
738 }
739 
740 static PyObject *
utf16_encode(textio * self,PyObject * text)741 utf16_encode(textio *self, PyObject *text)
742 {
743     if (!self->encoding_start_of_stream) {
744         /* Skip the BOM and use native byte ordering */
745 #if defined(WORDS_BIGENDIAN)
746         return utf16be_encode(self, text);
747 #else
748         return utf16le_encode(self, text);
749 #endif
750     }
751     return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
752                                  PyUnicode_GET_SIZE(text),
753                                  PyBytes_AS_STRING(self->errors), 0);
754 }
755 
756 static PyObject *
utf32be_encode(textio * self,PyObject * text)757 utf32be_encode(textio *self, PyObject *text)
758 {
759     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
760                                  PyUnicode_GET_SIZE(text),
761                                  PyBytes_AS_STRING(self->errors), 1);
762 }
763 
764 static PyObject *
utf32le_encode(textio * self,PyObject * text)765 utf32le_encode(textio *self, PyObject *text)
766 {
767     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
768                                  PyUnicode_GET_SIZE(text),
769                                  PyBytes_AS_STRING(self->errors), -1);
770 }
771 
772 static PyObject *
utf32_encode(textio * self,PyObject * text)773 utf32_encode(textio *self, PyObject *text)
774 {
775     if (!self->encoding_start_of_stream) {
776         /* Skip the BOM and use native byte ordering */
777 #if defined(WORDS_BIGENDIAN)
778         return utf32be_encode(self, text);
779 #else
780         return utf32le_encode(self, text);
781 #endif
782     }
783     return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
784                                  PyUnicode_GET_SIZE(text),
785                                  PyBytes_AS_STRING(self->errors), 0);
786 }
787 
788 static PyObject *
utf8_encode(textio * self,PyObject * text)789 utf8_encode(textio *self, PyObject *text)
790 {
791     return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
792                                 PyUnicode_GET_SIZE(text),
793                                 PyBytes_AS_STRING(self->errors));
794 }
795 
796 static PyObject *
latin1_encode(textio * self,PyObject * text)797 latin1_encode(textio *self, PyObject *text)
798 {
799     return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
800                                   PyUnicode_GET_SIZE(text),
801                                   PyBytes_AS_STRING(self->errors));
802 }
803 
804 /* Map normalized encoding names onto the specialized encoding funcs */
805 
806 typedef struct {
807     const char *name;
808     encodefunc_t encodefunc;
809 } encodefuncentry;
810 
811 static encodefuncentry encodefuncs[] = {
812     {"ascii",       (encodefunc_t) ascii_encode},
813     {"iso8859-1",   (encodefunc_t) latin1_encode},
814     {"utf-8",       (encodefunc_t) utf8_encode},
815     {"utf-16-be",   (encodefunc_t) utf16be_encode},
816     {"utf-16-le",   (encodefunc_t) utf16le_encode},
817     {"utf-16",      (encodefunc_t) utf16_encode},
818     {"utf-32-be",   (encodefunc_t) utf32be_encode},
819     {"utf-32-le",   (encodefunc_t) utf32le_encode},
820     {"utf-32",      (encodefunc_t) utf32_encode},
821     {NULL, NULL}
822 };
823 
824 
825 static int
textiowrapper_init(textio * self,PyObject * args,PyObject * kwds)826 textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
827 {
828     char *kwlist[] = {"buffer", "encoding", "errors",
829                       "newline", "line_buffering",
830                       NULL};
831     PyObject *buffer, *raw, *codec_info = NULL;
832     char *encoding = NULL;
833     char *errors = NULL;
834     char *newline = NULL;
835     int line_buffering = 0;
836 
837     PyObject *res;
838     int r;
839 
840     self->ok = 0;
841     self->detached = 0;
842     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
843                                      kwlist, &buffer, &encoding, &errors,
844                                      &newline, &line_buffering))
845         return -1;
846 
847     if (newline && newline[0] != '\0'
848         && !(newline[0] == '\n' && newline[1] == '\0')
849         && !(newline[0] == '\r' && newline[1] == '\0')
850         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
851         PyErr_Format(PyExc_ValueError,
852                      "illegal newline value: %s", newline);
853         return -1;
854     }
855 
856     Py_CLEAR(self->buffer);
857     Py_CLEAR(self->encoding);
858     Py_CLEAR(self->encoder);
859     Py_CLEAR(self->decoder);
860     Py_CLEAR(self->readnl);
861     Py_CLEAR(self->decoded_chars);
862     Py_CLEAR(self->pending_bytes);
863     Py_CLEAR(self->snapshot);
864     Py_CLEAR(self->errors);
865     Py_CLEAR(self->raw);
866     self->decoded_chars_used = 0;
867     self->pending_bytes_count = 0;
868     self->encodefunc = NULL;
869     self->writenl = NULL;
870 
871     if (encoding == NULL && self->encoding == NULL) {
872         if (_PyIO_locale_module == NULL) {
873             _PyIO_locale_module = PyImport_ImportModule("locale");
874             if (_PyIO_locale_module == NULL)
875                 goto catch_ImportError;
876             else
877                 goto use_locale;
878         }
879         else {
880           use_locale:
881             self->encoding = PyObject_CallMethod(
882                 _PyIO_locale_module, "getpreferredencoding", NULL);
883             if (self->encoding == NULL) {
884               catch_ImportError:
885                 /*
886                  Importing locale can raise an ImportError because of
887                  _functools, and locale.getpreferredencoding can raise an
888                  ImportError if _locale is not available.  These will happen
889                  during module building.
890                 */
891                 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
892                     PyErr_Clear();
893                     self->encoding = PyString_FromString("ascii");
894                 }
895                 else
896                     goto error;
897             }
898             else if (!PyString_Check(self->encoding))
899                 Py_CLEAR(self->encoding);
900         }
901     }
902     if (self->encoding != NULL)
903         encoding = PyString_AsString(self->encoding);
904     else if (encoding != NULL) {
905         self->encoding = PyString_FromString(encoding);
906         if (self->encoding == NULL)
907             goto error;
908     }
909     else {
910         PyErr_SetString(PyExc_IOError,
911                         "could not determine default encoding");
912         goto error;
913     }
914 
915     /* Check we have been asked for a real text encoding */
916     codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
917     if (codec_info == NULL) {
918         Py_CLEAR(self->encoding);
919         goto error;
920     }
921 
922     /* XXX: Failures beyond this point have the potential to leak elements
923      * of the partially constructed object (like self->encoding)
924      */
925 
926     if (errors == NULL)
927         errors = "strict";
928     self->errors = PyBytes_FromString(errors);
929     if (self->errors == NULL)
930         goto error;
931 
932     self->chunk_size = 8192;
933     self->readuniversal = (newline == NULL || newline[0] == '\0');
934     self->line_buffering = line_buffering;
935     self->readtranslate = (newline == NULL);
936     if (newline) {
937         self->readnl = PyString_FromString(newline);
938         if (self->readnl == NULL)
939             goto error;
940     }
941     self->writetranslate = (newline == NULL || newline[0] != '\0');
942     if (!self->readuniversal && self->writetranslate) {
943         self->writenl = PyString_AsString(self->readnl);
944         if (!strcmp(self->writenl, "\n"))
945             self->writenl = NULL;
946     }
947 #ifdef MS_WINDOWS
948     else
949         self->writenl = "\r\n";
950 #endif
951 
952     /* Build the decoder object */
953     res = PyObject_CallMethod(buffer, "readable", NULL);
954     if (res == NULL)
955         goto error;
956     r = PyObject_IsTrue(res);
957     Py_DECREF(res);
958     if (r == -1)
959         goto error;
960     if (r == 1) {
961         self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
962                                                            errors);
963         if (self->decoder == NULL)
964             goto error;
965 
966         if (self->readuniversal) {
967             PyObject *incrementalDecoder = PyObject_CallFunction(
968                 (PyObject *)&PyIncrementalNewlineDecoder_Type,
969                 "Oi", self->decoder, (int)self->readtranslate);
970             if (incrementalDecoder == NULL)
971                 goto error;
972             Py_XSETREF(self->decoder, incrementalDecoder);
973         }
974     }
975 
976     /* Build the encoder object */
977     res = PyObject_CallMethod(buffer, "writable", NULL);
978     if (res == NULL)
979         goto error;
980     r = PyObject_IsTrue(res);
981     Py_DECREF(res);
982     if (r == -1)
983         goto error;
984     if (r == 1) {
985         self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
986                                                            errors);
987         if (self->encoder == NULL)
988             goto error;
989         /* Get the normalized name of the codec */
990         res = PyObject_GetAttrString(codec_info, "name");
991         if (res == NULL) {
992             if (PyErr_ExceptionMatches(PyExc_AttributeError))
993                 PyErr_Clear();
994             else
995                 goto error;
996         }
997         else if (PyString_Check(res)) {
998             encodefuncentry *e = encodefuncs;
999             while (e->name != NULL) {
1000                 if (!strcmp(PyString_AS_STRING(res), e->name)) {
1001                     self->encodefunc = e->encodefunc;
1002                     break;
1003                 }
1004                 e++;
1005             }
1006         }
1007         Py_XDECREF(res);
1008     }
1009 
1010     /* Finished sorting out the codec details */
1011     Py_DECREF(codec_info);
1012 
1013     self->buffer = buffer;
1014     Py_INCREF(buffer);
1015 
1016     if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1017         Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1018         Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1019         raw = PyObject_GetAttrString(buffer, "raw");
1020         /* Cache the raw FileIO object to speed up 'closed' checks */
1021         if (raw == NULL) {
1022             if (PyErr_ExceptionMatches(PyExc_AttributeError))
1023                 PyErr_Clear();
1024             else
1025                 goto error;
1026         }
1027         else if (Py_TYPE(raw) == &PyFileIO_Type)
1028             self->raw = raw;
1029         else
1030             Py_DECREF(raw);
1031     }
1032 
1033     res = PyObject_CallMethod(buffer, "seekable", NULL);
1034     if (res == NULL)
1035         goto error;
1036     r = PyObject_IsTrue(res);
1037     Py_DECREF(res);
1038     if (r < 0)
1039         goto error;
1040     self->seekable = self->telling = r;
1041 
1042     self->encoding_start_of_stream = 0;
1043     if (self->seekable && self->encoder) {
1044         PyObject *cookieObj;
1045         int cmp;
1046 
1047         self->encoding_start_of_stream = 1;
1048 
1049         cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1050         if (cookieObj == NULL)
1051             goto error;
1052 
1053         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1054         Py_DECREF(cookieObj);
1055         if (cmp < 0) {
1056             goto error;
1057         }
1058 
1059         if (cmp == 0) {
1060             self->encoding_start_of_stream = 0;
1061             res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1062                                              _PyIO_zero, NULL);
1063             if (res == NULL)
1064                 goto error;
1065             Py_DECREF(res);
1066         }
1067     }
1068 
1069     self->ok = 1;
1070     return 0;
1071 
1072   error:
1073     Py_XDECREF(codec_info);
1074     return -1;
1075 }
1076 
1077 static void
_textiowrapper_clear(textio * self)1078 _textiowrapper_clear(textio *self)
1079 {
1080     self->ok = 0;
1081     Py_CLEAR(self->buffer);
1082     Py_CLEAR(self->encoding);
1083     Py_CLEAR(self->encoder);
1084     Py_CLEAR(self->decoder);
1085     Py_CLEAR(self->readnl);
1086     Py_CLEAR(self->decoded_chars);
1087     Py_CLEAR(self->pending_bytes);
1088     Py_CLEAR(self->snapshot);
1089     Py_CLEAR(self->errors);
1090     Py_CLEAR(self->raw);
1091 
1092     Py_CLEAR(self->dict);
1093 }
1094 
1095 static void
textiowrapper_dealloc(textio * self)1096 textiowrapper_dealloc(textio *self)
1097 {
1098     if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1099         return;
1100     _PyObject_GC_UNTRACK(self);
1101     if (self->weakreflist != NULL)
1102         PyObject_ClearWeakRefs((PyObject *)self);
1103     _textiowrapper_clear(self);
1104     Py_TYPE(self)->tp_free((PyObject *)self);
1105 }
1106 
1107 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1108 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1109 {
1110     Py_VISIT(self->buffer);
1111     Py_VISIT(self->encoding);
1112     Py_VISIT(self->encoder);
1113     Py_VISIT(self->decoder);
1114     Py_VISIT(self->readnl);
1115     Py_VISIT(self->decoded_chars);
1116     Py_VISIT(self->pending_bytes);
1117     Py_VISIT(self->snapshot);
1118     Py_VISIT(self->errors);
1119     Py_VISIT(self->raw);
1120 
1121     Py_VISIT(self->dict);
1122     return 0;
1123 }
1124 
1125 static int
textiowrapper_clear(textio * self)1126 textiowrapper_clear(textio *self)
1127 {
1128     if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1129         return -1;
1130     _textiowrapper_clear(self);
1131     return 0;
1132 }
1133 
1134 static PyObject *
1135 textiowrapper_closed_get(textio *self, void *context);
1136 
1137 /* This macro takes some shortcuts to make the common case faster. */
1138 #define CHECK_CLOSED(self) \
1139     do { \
1140         int r; \
1141         PyObject *_res; \
1142         if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1143             if (self->raw != NULL) \
1144                 r = _PyFileIO_closed(self->raw); \
1145             else { \
1146                 _res = textiowrapper_closed_get(self, NULL); \
1147                 if (_res == NULL) \
1148                     return NULL; \
1149                 r = PyObject_IsTrue(_res); \
1150                 Py_DECREF(_res); \
1151                 if (r < 0) \
1152                     return NULL; \
1153             } \
1154             if (r > 0) { \
1155                 PyErr_SetString(PyExc_ValueError, \
1156                                 "I/O operation on closed file."); \
1157                 return NULL; \
1158             } \
1159         } \
1160         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1161             return NULL; \
1162     } while (0)
1163 
1164 #define CHECK_INITIALIZED(self) \
1165     if (self->ok <= 0) { \
1166         PyErr_SetString(PyExc_ValueError, \
1167             "I/O operation on uninitialized object"); \
1168         return NULL; \
1169     }
1170 
1171 #define CHECK_ATTACHED(self) \
1172     CHECK_INITIALIZED(self); \
1173     if (self->detached) { \
1174         PyErr_SetString(PyExc_ValueError, \
1175              "underlying buffer has been detached"); \
1176         return NULL; \
1177     }
1178 
1179 #define CHECK_ATTACHED_INT(self) \
1180     if (self->ok <= 0) { \
1181         PyErr_SetString(PyExc_ValueError, \
1182             "I/O operation on uninitialized object"); \
1183         return -1; \
1184     } else if (self->detached) { \
1185         PyErr_SetString(PyExc_ValueError, \
1186              "underlying buffer has been detached"); \
1187         return -1; \
1188     }
1189 
1190 
1191 static PyObject *
textiowrapper_detach(textio * self)1192 textiowrapper_detach(textio *self)
1193 {
1194     PyObject *buffer, *res;
1195     CHECK_ATTACHED(self);
1196     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1197     if (res == NULL)
1198         return NULL;
1199     Py_DECREF(res);
1200     buffer = self->buffer;
1201     self->buffer = NULL;
1202     self->detached = 1;
1203     return buffer;
1204 }
1205 
1206 Py_LOCAL_INLINE(const Py_UNICODE *)
findchar(const Py_UNICODE * s,Py_ssize_t size,Py_UNICODE ch)1207 findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1208 {
1209     /* like wcschr, but doesn't stop at NULL characters */
1210     while (size-- > 0) {
1211         if (*s == ch)
1212             return s;
1213         s++;
1214     }
1215     return NULL;
1216 }
1217 
1218 /* Flush the internal write buffer. This doesn't explicitly flush the
1219    underlying buffered object, though. */
1220 static int
_textiowrapper_writeflush(textio * self)1221 _textiowrapper_writeflush(textio *self)
1222 {
1223     PyObject *pending, *b, *ret;
1224 
1225     if (self->pending_bytes == NULL)
1226         return 0;
1227 
1228     pending = self->pending_bytes;
1229     Py_INCREF(pending);
1230     self->pending_bytes_count = 0;
1231     Py_CLEAR(self->pending_bytes);
1232 
1233     b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1234     Py_DECREF(pending);
1235     if (b == NULL)
1236         return -1;
1237     ret = NULL;
1238     do {
1239         ret = PyObject_CallMethodObjArgs(self->buffer,
1240                                          _PyIO_str_write, b, NULL);
1241     } while (ret == NULL && _PyIO_trap_eintr());
1242     Py_DECREF(b);
1243     if (ret == NULL)
1244         return -1;
1245     Py_DECREF(ret);
1246     return 0;
1247 }
1248 
1249 static PyObject *
textiowrapper_write(textio * self,PyObject * args)1250 textiowrapper_write(textio *self, PyObject *args)
1251 {
1252     PyObject *ret;
1253     PyObject *text; /* owned reference */
1254     PyObject *b;
1255     Py_ssize_t textlen;
1256     int haslf = 0;
1257     int needflush = 0;
1258 
1259     CHECK_ATTACHED(self);
1260 
1261     if (!PyArg_ParseTuple(args, "U:write", &text)) {
1262         return NULL;
1263     }
1264 
1265     CHECK_CLOSED(self);
1266 
1267     if (self->encoder == NULL) {
1268         PyErr_SetString(PyExc_IOError, "not writable");
1269         return NULL;
1270     }
1271 
1272     Py_INCREF(text);
1273 
1274     textlen = PyUnicode_GetSize(text);
1275 
1276     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1277         if (findchar(PyUnicode_AS_UNICODE(text),
1278                      PyUnicode_GET_SIZE(text), '\n'))
1279             haslf = 1;
1280 
1281     if (haslf && self->writetranslate && self->writenl != NULL) {
1282         PyObject *newtext = PyObject_CallMethod(
1283             text, "replace", "ss", "\n", self->writenl);
1284         Py_DECREF(text);
1285         if (newtext == NULL)
1286             return NULL;
1287         text = newtext;
1288     }
1289 
1290     if (self->line_buffering &&
1291         (haslf ||
1292          findchar(PyUnicode_AS_UNICODE(text),
1293                   PyUnicode_GET_SIZE(text), '\r')))
1294         needflush = 1;
1295 
1296     /* XXX What if we were just reading? */
1297     if (self->encodefunc != NULL) {
1298         b = (*self->encodefunc)((PyObject *) self, text);
1299         self->encoding_start_of_stream = 0;
1300     }
1301     else
1302         b = PyObject_CallMethodObjArgs(self->encoder,
1303                                        _PyIO_str_encode, text, NULL);
1304     Py_DECREF(text);
1305     if (b == NULL)
1306         return NULL;
1307 
1308     if (self->pending_bytes == NULL) {
1309         self->pending_bytes = PyList_New(0);
1310         if (self->pending_bytes == NULL) {
1311             Py_DECREF(b);
1312             return NULL;
1313         }
1314         self->pending_bytes_count = 0;
1315     }
1316     if (PyList_Append(self->pending_bytes, b) < 0) {
1317         Py_DECREF(b);
1318         return NULL;
1319     }
1320     self->pending_bytes_count += PyBytes_GET_SIZE(b);
1321     Py_DECREF(b);
1322     if (self->pending_bytes_count > self->chunk_size || needflush) {
1323         if (_textiowrapper_writeflush(self) < 0)
1324             return NULL;
1325     }
1326 
1327     if (needflush) {
1328         ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1329         if (ret == NULL)
1330             return NULL;
1331         Py_DECREF(ret);
1332     }
1333 
1334     textiowrapper_set_decoded_chars(self, NULL);
1335     Py_CLEAR(self->snapshot);
1336 
1337     if (self->decoder) {
1338         ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1339         if (ret == NULL)
1340             return NULL;
1341         Py_DECREF(ret);
1342     }
1343 
1344     return PyLong_FromSsize_t(textlen);
1345 }
1346 
1347 /* Steal a reference to chars and store it in the decoded_char buffer;
1348  */
1349 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1350 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1351 {
1352     Py_XSETREF(self->decoded_chars, chars);
1353     self->decoded_chars_used = 0;
1354 }
1355 
1356 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1357 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1358 {
1359     PyObject *chars;
1360     Py_ssize_t avail;
1361 
1362     if (self->decoded_chars == NULL)
1363         return PyUnicode_FromStringAndSize(NULL, 0);
1364 
1365     avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1366              - self->decoded_chars_used);
1367 
1368     assert(avail >= 0);
1369 
1370     if (n < 0 || n > avail)
1371         n = avail;
1372 
1373     if (self->decoded_chars_used > 0 || n < avail) {
1374         chars = PyUnicode_FromUnicode(
1375             PyUnicode_AS_UNICODE(self->decoded_chars)
1376             + self->decoded_chars_used, n);
1377         if (chars == NULL)
1378             return NULL;
1379     }
1380     else {
1381         chars = self->decoded_chars;
1382         Py_INCREF(chars);
1383     }
1384 
1385     self->decoded_chars_used += n;
1386     return chars;
1387 }
1388 
1389 /* Read and decode the next chunk of data from the BufferedReader.
1390  */
1391 static int
textiowrapper_read_chunk(textio * self)1392 textiowrapper_read_chunk(textio *self)
1393 {
1394     PyObject *dec_buffer = NULL;
1395     PyObject *dec_flags = NULL;
1396     PyObject *input_chunk = NULL;
1397     PyObject *decoded_chars, *chunk_size;
1398     int eof;
1399 
1400     /* The return value is True unless EOF was reached.  The decoded string is
1401      * placed in self._decoded_chars (replacing its previous value).  The
1402      * entire input chunk is sent to the decoder, though some of it may remain
1403      * buffered in the decoder, yet to be converted.
1404      */
1405 
1406     if (self->decoder == NULL) {
1407         PyErr_SetString(PyExc_IOError, "not readable");
1408         return -1;
1409     }
1410 
1411     if (self->telling) {
1412         /* To prepare for tell(), we need to snapshot a point in the file
1413          * where the decoder's input buffer is empty.
1414          */
1415 
1416         PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1417                                                      _PyIO_str_getstate, NULL);
1418         if (state == NULL)
1419             return -1;
1420         /* Given this, we know there was a valid snapshot point
1421          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1422          */
1423         if (!PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags)) {
1424             Py_DECREF(state);
1425             return -1;
1426         }
1427         Py_INCREF(dec_buffer);
1428         Py_INCREF(dec_flags);
1429         Py_DECREF(state);
1430     }
1431 
1432     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1433     chunk_size = PyLong_FromSsize_t(self->chunk_size);
1434     if (chunk_size == NULL)
1435         goto fail;
1436     input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1437         _PyIO_str_read1, chunk_size, NULL);
1438     Py_DECREF(chunk_size);
1439     if (input_chunk == NULL)
1440         goto fail;
1441     if (!PyBytes_Check(input_chunk)) {
1442         PyErr_Format(PyExc_TypeError,
1443                      "underlying read1() should have returned a bytes object, "
1444                      "not '%.200s'", Py_TYPE(input_chunk)->tp_name);
1445         goto fail;
1446     }
1447 
1448     eof = (PyBytes_Size(input_chunk) == 0);
1449 
1450     if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1451         decoded_chars = _PyIncrementalNewlineDecoder_decode(
1452             self->decoder, input_chunk, eof);
1453     }
1454     else {
1455         decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1456             _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1457     }
1458 
1459     if (check_decoded(decoded_chars) < 0)
1460         goto fail;
1461     textiowrapper_set_decoded_chars(self, decoded_chars);
1462     if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1463         eof = 0;
1464 
1465     if (self->telling) {
1466         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1467          * next input to be decoded is dec_buffer + input_chunk.
1468          */
1469         PyObject *snapshot;
1470         PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1471         if (next_input == NULL)
1472             goto fail;
1473         if (!PyBytes_Check(next_input)) {
1474             PyErr_Format(PyExc_TypeError,
1475                          "decoder getstate() should have returned a bytes "
1476                          "object, not '%.200s'",
1477                          Py_TYPE(next_input)->tp_name);
1478             Py_DECREF(next_input);
1479             goto fail;
1480         }
1481         snapshot = Py_BuildValue("NN", dec_flags, next_input);
1482         if (snapshot == NULL) {
1483             dec_flags = NULL;
1484             goto fail;
1485         }
1486         Py_XSETREF(self->snapshot, snapshot);
1487         Py_DECREF(dec_buffer);
1488     }
1489     Py_DECREF(input_chunk);
1490 
1491     return (eof == 0);
1492 
1493   fail:
1494     Py_XDECREF(dec_buffer);
1495     Py_XDECREF(dec_flags);
1496     Py_XDECREF(input_chunk);
1497     return -1;
1498 }
1499 
1500 static PyObject *
textiowrapper_read(textio * self,PyObject * args)1501 textiowrapper_read(textio *self, PyObject *args)
1502 {
1503     Py_ssize_t n = -1;
1504     PyObject *result = NULL, *chunks = NULL;
1505 
1506     CHECK_ATTACHED(self);
1507 
1508     if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
1509         return NULL;
1510 
1511     CHECK_CLOSED(self);
1512 
1513     if (self->decoder == NULL) {
1514         PyErr_SetString(PyExc_IOError, "not readable");
1515         return NULL;
1516     }
1517 
1518     if (_textiowrapper_writeflush(self) < 0)
1519         return NULL;
1520 
1521     if (n < 0) {
1522         /* Read everything */
1523         PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1524         PyObject *decoded, *final;
1525         if (bytes == NULL)
1526             goto fail;
1527         decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1528                                              bytes, Py_True, NULL);
1529         Py_DECREF(bytes);
1530         if (check_decoded(decoded) < 0)
1531             goto fail;
1532 
1533         result = textiowrapper_get_decoded_chars(self, -1);
1534 
1535         if (result == NULL) {
1536             Py_DECREF(decoded);
1537             return NULL;
1538         }
1539 
1540         final = PyUnicode_Concat(result, decoded);
1541         Py_DECREF(result);
1542         Py_DECREF(decoded);
1543         if (final == NULL)
1544             goto fail;
1545 
1546         textiowrapper_set_decoded_chars(self, NULL);
1547         Py_CLEAR(self->snapshot);
1548         return final;
1549     }
1550     else {
1551         int res = 1;
1552         Py_ssize_t remaining = n;
1553 
1554         result = textiowrapper_get_decoded_chars(self, n);
1555         if (result == NULL)
1556             goto fail;
1557         remaining -= PyUnicode_GET_SIZE(result);
1558 
1559         /* Keep reading chunks until we have n characters to return */
1560         while (remaining > 0) {
1561             res = textiowrapper_read_chunk(self);
1562             if (res < 0) {
1563                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1564                    when EINTR occurs so we needn't do it ourselves. */
1565                 if (_PyIO_trap_eintr()) {
1566                     continue;
1567                 }
1568                 goto fail;
1569             }
1570             if (res == 0)  /* EOF */
1571                 break;
1572             if (chunks == NULL) {
1573                 chunks = PyList_New(0);
1574                 if (chunks == NULL)
1575                     goto fail;
1576             }
1577             if (PyList_Append(chunks, result) < 0)
1578                 goto fail;
1579             Py_DECREF(result);
1580             result = textiowrapper_get_decoded_chars(self, remaining);
1581             if (result == NULL)
1582                 goto fail;
1583             remaining -= PyUnicode_GET_SIZE(result);
1584         }
1585         if (chunks != NULL) {
1586             if (result != NULL && PyList_Append(chunks, result) < 0)
1587                 goto fail;
1588             Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1589             if (result == NULL)
1590                 goto fail;
1591             Py_CLEAR(chunks);
1592         }
1593         return result;
1594     }
1595   fail:
1596     Py_XDECREF(result);
1597     Py_XDECREF(chunks);
1598     return NULL;
1599 }
1600 
1601 
1602 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1603    that is to the NUL character. Otherwise the function will produce
1604    incorrect results. */
1605 static Py_UNICODE *
find_control_char(Py_UNICODE * start,Py_UNICODE * end,Py_UNICODE ch)1606 find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1607 {
1608     Py_UNICODE *s = start;
1609     for (;;) {
1610         while (*s > ch)
1611             s++;
1612         if (*s == ch)
1613             return s;
1614         if (s == end)
1615             return NULL;
1616         s++;
1617     }
1618 }
1619 
1620 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,Py_UNICODE * start,Py_UNICODE * end,Py_ssize_t * consumed)1621 _PyIO_find_line_ending(
1622     int translated, int universal, PyObject *readnl,
1623     Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1624 {
1625     Py_ssize_t len = end - start;
1626 
1627     if (translated) {
1628         /* Newlines are already translated, only search for \n */
1629         Py_UNICODE *pos = find_control_char(start, end, '\n');
1630         if (pos != NULL)
1631             return pos - start + 1;
1632         else {
1633             *consumed = len;
1634             return -1;
1635         }
1636     }
1637     else if (universal) {
1638         /* Universal newline search. Find any of \r, \r\n, \n
1639          * The decoder ensures that \r\n are not split in two pieces
1640          */
1641         Py_UNICODE *s = start;
1642         for (;;) {
1643             Py_UNICODE ch;
1644             /* Fast path for non-control chars. The loop always ends
1645                since the Py_UNICODE storage is NUL-terminated. */
1646             while (*s > '\r')
1647                 s++;
1648             if (s >= end) {
1649                 *consumed = len;
1650                 return -1;
1651             }
1652             ch = *s++;
1653             if (ch == '\n')
1654                 return s - start;
1655             if (ch == '\r') {
1656                 if (*s == '\n')
1657                     return s - start + 1;
1658                 else
1659                     return s - start;
1660             }
1661         }
1662     }
1663     else {
1664         /* Non-universal mode. */
1665         Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1666         unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1667         if (readnl_len == 1) {
1668             Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1669             if (pos != NULL)
1670                 return pos - start + 1;
1671             *consumed = len;
1672             return -1;
1673         }
1674         else {
1675             Py_UNICODE *s = start;
1676             Py_UNICODE *e = end - readnl_len + 1;
1677             Py_UNICODE *pos;
1678             if (e < s)
1679                 e = s;
1680             while (s < e) {
1681                 Py_ssize_t i;
1682                 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1683                 if (pos == NULL || pos >= e)
1684                     break;
1685                 for (i = 1; i < readnl_len; i++) {
1686                     if (pos[i] != nl[i])
1687                         break;
1688                 }
1689                 if (i == readnl_len)
1690                     return pos - start + readnl_len;
1691                 s = pos + 1;
1692             }
1693             pos = find_control_char(e, end, nl[0]);
1694             if (pos == NULL)
1695                 *consumed = len;
1696             else
1697                 *consumed = pos - start;
1698             return -1;
1699         }
1700     }
1701 }
1702 
1703 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)1704 _textiowrapper_readline(textio *self, Py_ssize_t limit)
1705 {
1706     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1707     Py_ssize_t start, endpos, chunked, offset_to_buffer;
1708     int res;
1709 
1710     CHECK_CLOSED(self);
1711 
1712     if (_textiowrapper_writeflush(self) < 0)
1713         return NULL;
1714 
1715     chunked = 0;
1716 
1717     while (1) {
1718         Py_UNICODE *ptr;
1719         Py_ssize_t line_len;
1720         Py_ssize_t consumed = 0;
1721 
1722         /* First, get some data if necessary */
1723         res = 1;
1724         while (!self->decoded_chars ||
1725                !PyUnicode_GET_SIZE(self->decoded_chars)) {
1726             res = textiowrapper_read_chunk(self);
1727             if (res < 0) {
1728                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1729                    when EINTR occurs so we needn't do it ourselves. */
1730                 if (_PyIO_trap_eintr()) {
1731                     continue;
1732                 }
1733                 goto error;
1734             }
1735             if (res == 0)
1736                 break;
1737         }
1738         if (res == 0) {
1739             /* end of file */
1740             textiowrapper_set_decoded_chars(self, NULL);
1741             Py_CLEAR(self->snapshot);
1742             start = endpos = offset_to_buffer = 0;
1743             break;
1744         }
1745 
1746         if (remaining == NULL) {
1747             line = self->decoded_chars;
1748             start = self->decoded_chars_used;
1749             offset_to_buffer = 0;
1750             Py_INCREF(line);
1751         }
1752         else {
1753             assert(self->decoded_chars_used == 0);
1754             line = PyUnicode_Concat(remaining, self->decoded_chars);
1755             start = 0;
1756             offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1757             Py_CLEAR(remaining);
1758             if (line == NULL)
1759                 goto error;
1760         }
1761 
1762         ptr = PyUnicode_AS_UNICODE(line);
1763         line_len = PyUnicode_GET_SIZE(line);
1764 
1765         endpos = _PyIO_find_line_ending(
1766             self->readtranslate, self->readuniversal, self->readnl,
1767             ptr + start, ptr + line_len, &consumed);
1768         if (endpos >= 0) {
1769             endpos += start;
1770             if (limit >= 0 && (endpos - start) + chunked >= limit)
1771                 endpos = start + limit - chunked;
1772             break;
1773         }
1774 
1775         /* We can put aside up to `endpos` */
1776         endpos = consumed + start;
1777         if (limit >= 0 && (endpos - start) + chunked >= limit) {
1778             /* Didn't find line ending, but reached length limit */
1779             endpos = start + limit - chunked;
1780             break;
1781         }
1782 
1783         if (endpos > start) {
1784             /* No line ending seen yet - put aside current data */
1785             PyObject *s;
1786             if (chunks == NULL) {
1787                 chunks = PyList_New(0);
1788                 if (chunks == NULL)
1789                     goto error;
1790             }
1791             s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1792             if (s == NULL)
1793                 goto error;
1794             if (PyList_Append(chunks, s) < 0) {
1795                 Py_DECREF(s);
1796                 goto error;
1797             }
1798             chunked += PyUnicode_GET_SIZE(s);
1799             Py_DECREF(s);
1800         }
1801         /* There may be some remaining bytes we'll have to prepend to the
1802            next chunk of data */
1803         if (endpos < line_len) {
1804             remaining = PyUnicode_FromUnicode(
1805                     ptr + endpos, line_len - endpos);
1806             if (remaining == NULL)
1807                 goto error;
1808         }
1809         Py_CLEAR(line);
1810         /* We have consumed the buffer */
1811         textiowrapper_set_decoded_chars(self, NULL);
1812     }
1813 
1814     if (line != NULL) {
1815         /* Our line ends in the current buffer */
1816         self->decoded_chars_used = endpos - offset_to_buffer;
1817         if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1818             if (start == 0 && Py_REFCNT(line) == 1) {
1819                 if (PyUnicode_Resize(&line, endpos) < 0)
1820                     goto error;
1821             }
1822             else {
1823                 PyObject *s = PyUnicode_FromUnicode(
1824                         PyUnicode_AS_UNICODE(line) + start, endpos - start);
1825                 Py_CLEAR(line);
1826                 if (s == NULL)
1827                     goto error;
1828                 line = s;
1829             }
1830         }
1831     }
1832     if (remaining != NULL) {
1833         if (chunks == NULL) {
1834             chunks = PyList_New(0);
1835             if (chunks == NULL)
1836                 goto error;
1837         }
1838         if (PyList_Append(chunks, remaining) < 0)
1839             goto error;
1840         Py_CLEAR(remaining);
1841     }
1842     if (chunks != NULL) {
1843         if (line != NULL && PyList_Append(chunks, line) < 0)
1844             goto error;
1845         Py_XSETREF(line, PyUnicode_Join(_PyIO_empty_str, chunks));
1846         if (line == NULL)
1847             goto error;
1848         Py_DECREF(chunks);
1849     }
1850     if (line == NULL)
1851         line = PyUnicode_FromStringAndSize(NULL, 0);
1852 
1853     return line;
1854 
1855   error:
1856     Py_XDECREF(chunks);
1857     Py_XDECREF(remaining);
1858     Py_XDECREF(line);
1859     return NULL;
1860 }
1861 
1862 static PyObject *
textiowrapper_readline(textio * self,PyObject * args)1863 textiowrapper_readline(textio *self, PyObject *args)
1864 {
1865     PyObject *limitobj = NULL;
1866     Py_ssize_t limit = -1;
1867 
1868     CHECK_ATTACHED(self);
1869     if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1870         return NULL;
1871     }
1872     if (limitobj) {
1873         if (!PyNumber_Check(limitobj)) {
1874             PyErr_Format(PyExc_TypeError,
1875                          "integer argument expected, got '%.200s'",
1876                          Py_TYPE(limitobj)->tp_name);
1877             return NULL;
1878         }
1879         limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1880         if (limit == -1 && PyErr_Occurred())
1881             return NULL;
1882     }
1883     return _textiowrapper_readline(self, limit);
1884 }
1885 
1886 /* Seek and Tell */
1887 
1888 typedef struct {
1889     Py_off_t start_pos;
1890     int dec_flags;
1891     int bytes_to_feed;
1892     int chars_to_skip;
1893     char need_eof;
1894 } cookie_type;
1895 
1896 /*
1897    To speed up cookie packing/unpacking, we store the fields in a temporary
1898    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1899    The following macros define at which offsets in the intermediary byte
1900    string the various CookieStruct fields will be stored.
1901  */
1902 
1903 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1904 
1905 #if defined(WORDS_BIGENDIAN)
1906 
1907 # define IS_LITTLE_ENDIAN   0
1908 
1909 /* We want the least significant byte of start_pos to also be the least
1910    significant byte of the cookie, which means that in big-endian mode we
1911    must copy the fields in reverse order. */
1912 
1913 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
1914 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
1915 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
1916 # define OFF_CHARS_TO_SKIP  (sizeof(char))
1917 # define OFF_NEED_EOF       0
1918 
1919 #else
1920 
1921 # define IS_LITTLE_ENDIAN   1
1922 
1923 /* Little-endian mode: the least significant byte of start_pos will
1924    naturally end up the least significant byte of the cookie. */
1925 
1926 # define OFF_START_POS      0
1927 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
1928 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
1929 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
1930 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
1931 
1932 #endif
1933 
1934 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)1935 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1936 {
1937     unsigned char buffer[COOKIE_BUF_LEN];
1938     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1939     if (cookieLong == NULL)
1940         return -1;
1941 
1942     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1943                             IS_LITTLE_ENDIAN, 0) < 0) {
1944         Py_DECREF(cookieLong);
1945         return -1;
1946     }
1947     Py_DECREF(cookieLong);
1948 
1949     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1950     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1951     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1952     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1953     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1954 
1955     return 0;
1956 }
1957 
1958 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)1959 textiowrapper_build_cookie(cookie_type *cookie)
1960 {
1961     unsigned char buffer[COOKIE_BUF_LEN];
1962 
1963     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1964     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1965     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1966     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1967     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1968 
1969     return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1970 }
1971 #undef IS_LITTLE_ENDIAN
1972 
1973 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)1974 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1975 {
1976     PyObject *res;
1977     /* When seeking to the start of the stream, we call decoder.reset()
1978        rather than decoder.getstate().
1979        This is for a few decoders such as utf-16 for which the state value
1980        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1981        utf-16, that we are expecting a BOM).
1982     */
1983     if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1984         res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1985     else
1986         res = PyObject_CallMethod(self->decoder, "setstate",
1987                                   "((si))", "", cookie->dec_flags);
1988     if (res == NULL)
1989         return -1;
1990     Py_DECREF(res);
1991     return 0;
1992 }
1993 
1994 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)1995 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1996 {
1997     PyObject *res;
1998     /* Same as _textiowrapper_decoder_setstate() above. */
1999     if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2000         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2001         self->encoding_start_of_stream = 1;
2002     }
2003     else {
2004         res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2005                                          _PyIO_zero, NULL);
2006         self->encoding_start_of_stream = 0;
2007     }
2008     if (res == NULL)
2009         return -1;
2010     Py_DECREF(res);
2011     return 0;
2012 }
2013 
2014 static PyObject *
textiowrapper_seek(textio * self,PyObject * args)2015 textiowrapper_seek(textio *self, PyObject *args)
2016 {
2017     PyObject *cookieObj, *posobj;
2018     cookie_type cookie;
2019     int whence = 0;
2020     PyObject *res;
2021     int cmp;
2022     PyObject *snapshot;
2023 
2024     CHECK_ATTACHED(self);
2025 
2026     if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2027         return NULL;
2028     CHECK_CLOSED(self);
2029 
2030     Py_INCREF(cookieObj);
2031 
2032     if (!self->seekable) {
2033         PyErr_SetString(PyExc_IOError,
2034                         "underlying stream is not seekable");
2035         goto fail;
2036     }
2037 
2038     if (whence == 1) {
2039         /* seek relative to current position */
2040         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2041         if (cmp < 0)
2042             goto fail;
2043 
2044         if (cmp == 0) {
2045             PyErr_SetString(PyExc_IOError,
2046                             "can't do nonzero cur-relative seeks");
2047             goto fail;
2048         }
2049 
2050         /* Seeking to the current position should attempt to
2051          * sync the underlying buffer with the current position.
2052          */
2053         Py_DECREF(cookieObj);
2054         cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2055         if (cookieObj == NULL)
2056             goto fail;
2057     }
2058     else if (whence == 2) {
2059         /* seek relative to end of file */
2060 
2061         cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2062         if (cmp < 0)
2063             goto fail;
2064 
2065         if (cmp == 0) {
2066             PyErr_SetString(PyExc_IOError,
2067                             "can't do nonzero end-relative seeks");
2068             goto fail;
2069         }
2070 
2071         res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2072         if (res == NULL)
2073             goto fail;
2074         Py_DECREF(res);
2075 
2076         textiowrapper_set_decoded_chars(self, NULL);
2077         Py_CLEAR(self->snapshot);
2078         if (self->decoder) {
2079             res = PyObject_CallMethod(self->decoder, "reset", NULL);
2080             if (res == NULL)
2081                 goto fail;
2082             Py_DECREF(res);
2083         }
2084 
2085         res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2086         Py_XDECREF(cookieObj);
2087         return res;
2088     }
2089     else if (whence != 0) {
2090         PyErr_Format(PyExc_ValueError,
2091                      "invalid whence (%d, should be 0, 1 or 2)", whence);
2092         goto fail;
2093     }
2094 
2095     cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2096     if (cmp < 0)
2097         goto fail;
2098 
2099     if (cmp == 1) {
2100         PyObject *repr = PyObject_Repr(cookieObj);
2101         if (repr != NULL) {
2102             PyErr_Format(PyExc_ValueError,
2103                          "negative seek position %s",
2104                          PyString_AS_STRING(repr));
2105             Py_DECREF(repr);
2106         }
2107         goto fail;
2108     }
2109 
2110     res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2111     if (res == NULL)
2112         goto fail;
2113     Py_DECREF(res);
2114 
2115     /* The strategy of seek() is to go back to the safe start point
2116      * and replay the effect of read(chars_to_skip) from there.
2117      */
2118     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2119         goto fail;
2120 
2121     /* Seek back to the safe start point. */
2122     posobj = PyLong_FromOff_t(cookie.start_pos);
2123     if (posobj == NULL)
2124         goto fail;
2125     res = PyObject_CallMethodObjArgs(self->buffer,
2126                                      _PyIO_str_seek, posobj, NULL);
2127     Py_DECREF(posobj);
2128     if (res == NULL)
2129         goto fail;
2130     Py_DECREF(res);
2131 
2132     textiowrapper_set_decoded_chars(self, NULL);
2133     Py_CLEAR(self->snapshot);
2134 
2135     /* Restore the decoder to its state from the safe start point. */
2136     if (self->decoder) {
2137         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2138             goto fail;
2139     }
2140 
2141     if (cookie.chars_to_skip) {
2142         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2143         PyObject *input_chunk = PyObject_CallMethod(
2144             self->buffer, "read", "i", cookie.bytes_to_feed);
2145         PyObject *decoded;
2146 
2147         if (input_chunk == NULL)
2148             goto fail;
2149 
2150         if (!PyBytes_Check(input_chunk)) {
2151             PyErr_Format(PyExc_TypeError,
2152                          "underlying read() should have returned a bytes "
2153                          "object, not '%.200s'",
2154                          Py_TYPE(input_chunk)->tp_name);
2155             Py_DECREF(input_chunk);
2156             goto fail;
2157         }
2158 
2159         snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2160         if (snapshot == NULL) {
2161             goto fail;
2162         }
2163         Py_XSETREF(self->snapshot, snapshot);
2164 
2165         decoded = PyObject_CallMethod(self->decoder, "decode",
2166                                       "Oi", input_chunk, (int)cookie.need_eof);
2167 
2168         if (check_decoded(decoded) < 0)
2169             goto fail;
2170 
2171         textiowrapper_set_decoded_chars(self, decoded);
2172 
2173         /* Skip chars_to_skip of the decoded characters. */
2174         if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2175             PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2176             goto fail;
2177         }
2178         self->decoded_chars_used = cookie.chars_to_skip;
2179     }
2180     else {
2181         snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2182         if (snapshot == NULL)
2183             goto fail;
2184         Py_XSETREF(self->snapshot, snapshot);
2185     }
2186 
2187     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2188     if (self->encoder) {
2189         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2190             goto fail;
2191     }
2192     return cookieObj;
2193   fail:
2194     Py_XDECREF(cookieObj);
2195     return NULL;
2196 
2197 }
2198 
2199 static PyObject *
textiowrapper_tell(textio * self,PyObject * args)2200 textiowrapper_tell(textio *self, PyObject *args)
2201 {
2202     PyObject *res;
2203     PyObject *posobj = NULL;
2204     cookie_type cookie = {0,0,0,0,0};
2205     PyObject *next_input;
2206     Py_ssize_t chars_to_skip, chars_decoded;
2207     PyObject *saved_state = NULL;
2208     char *input, *input_end;
2209 
2210     CHECK_ATTACHED(self);
2211     CHECK_CLOSED(self);
2212 
2213     if (!self->seekable) {
2214         PyErr_SetString(PyExc_IOError,
2215                         "underlying stream is not seekable");
2216         goto fail;
2217     }
2218     if (!self->telling) {
2219         PyErr_SetString(PyExc_IOError,
2220                         "telling position disabled by next() call");
2221         goto fail;
2222     }
2223 
2224     if (_textiowrapper_writeflush(self) < 0)
2225         return NULL;
2226     res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2227     if (res == NULL)
2228         goto fail;
2229     Py_DECREF(res);
2230 
2231     posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2232     if (posobj == NULL)
2233         goto fail;
2234 
2235     if (self->decoder == NULL || self->snapshot == NULL) {
2236         assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2237         return posobj;
2238     }
2239 
2240 #if defined(HAVE_LARGEFILE_SUPPORT)
2241     cookie.start_pos = PyLong_AsLongLong(posobj);
2242 #else
2243     cookie.start_pos = PyLong_AsLong(posobj);
2244 #endif
2245     if (PyErr_Occurred())
2246         goto fail;
2247 
2248     /* Skip backward to the snapshot point (see _read_chunk). */
2249     if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2250         goto fail;
2251 
2252     assert (PyBytes_Check(next_input));
2253 
2254     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2255 
2256     /* How many decoded characters have been used up since the snapshot? */
2257     if (self->decoded_chars_used == 0)  {
2258         /* We haven't moved from the snapshot point. */
2259         Py_DECREF(posobj);
2260         return textiowrapper_build_cookie(&cookie);
2261     }
2262 
2263     chars_to_skip = self->decoded_chars_used;
2264 
2265     /* Starting from the snapshot position, we will walk the decoder
2266      * forward until it gives us enough decoded characters.
2267      */
2268     saved_state = PyObject_CallMethodObjArgs(self->decoder,
2269                                              _PyIO_str_getstate, NULL);
2270     if (saved_state == NULL)
2271         goto fail;
2272 
2273     /* Note our initial start point. */
2274     if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2275         goto fail;
2276 
2277     /* Feed the decoder one byte at a time.  As we go, note the
2278      * nearest "safe start point" before the current location
2279      * (a point where the decoder has nothing buffered, so seek()
2280      * can safely start from there and advance to this location).
2281      */
2282     chars_decoded = 0;
2283     input = PyBytes_AS_STRING(next_input);
2284     input_end = input + PyBytes_GET_SIZE(next_input);
2285     while (input < input_end) {
2286         PyObject *state;
2287         char *dec_buffer;
2288         Py_ssize_t dec_buffer_len;
2289         int dec_flags;
2290 
2291         PyObject *decoded = PyObject_CallMethod(
2292             self->decoder, "decode", "s#", input, (Py_ssize_t)1);
2293         if (check_decoded(decoded) < 0)
2294             goto fail;
2295         chars_decoded += PyUnicode_GET_SIZE(decoded);
2296         Py_DECREF(decoded);
2297 
2298         cookie.bytes_to_feed += 1;
2299 
2300         state = PyObject_CallMethodObjArgs(self->decoder,
2301                                            _PyIO_str_getstate, NULL);
2302         if (state == NULL)
2303             goto fail;
2304         if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2305             Py_DECREF(state);
2306             goto fail;
2307         }
2308         Py_DECREF(state);
2309 
2310         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2311             /* Decoder buffer is empty, so this is a safe start point. */
2312             cookie.start_pos += cookie.bytes_to_feed;
2313             chars_to_skip -= chars_decoded;
2314             cookie.dec_flags = dec_flags;
2315             cookie.bytes_to_feed = 0;
2316             chars_decoded = 0;
2317         }
2318         if (chars_decoded >= chars_to_skip)
2319             break;
2320         input++;
2321     }
2322     if (input == input_end) {
2323         /* We didn't get enough decoded data; signal EOF to get more. */
2324         PyObject *decoded = PyObject_CallMethod(
2325             self->decoder, "decode", "si", "", /* final = */ 1);
2326         if (check_decoded(decoded) < 0)
2327             goto fail;
2328         chars_decoded += PyUnicode_GET_SIZE(decoded);
2329         Py_DECREF(decoded);
2330         cookie.need_eof = 1;
2331 
2332         if (chars_decoded < chars_to_skip) {
2333             PyErr_SetString(PyExc_IOError,
2334                             "can't reconstruct logical file position");
2335             goto fail;
2336         }
2337     }
2338 
2339     /* finally */
2340     Py_XDECREF(posobj);
2341     res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2342     Py_DECREF(saved_state);
2343     if (res == NULL)
2344         return NULL;
2345     Py_DECREF(res);
2346 
2347     /* The returned cookie corresponds to the last safe start point. */
2348     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2349     return textiowrapper_build_cookie(&cookie);
2350 
2351   fail:
2352     Py_XDECREF(posobj);
2353     if (saved_state) {
2354         PyObject *type, *value, *traceback;
2355         PyErr_Fetch(&type, &value, &traceback);
2356 
2357         res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2358         _PyErr_ReplaceException(type, value, traceback);
2359         Py_DECREF(saved_state);
2360         Py_XDECREF(res);
2361     }
2362     return NULL;
2363 }
2364 
2365 static PyObject *
textiowrapper_truncate(textio * self,PyObject * args)2366 textiowrapper_truncate(textio *self, PyObject *args)
2367 {
2368     PyObject *pos = Py_None;
2369     PyObject *res;
2370 
2371     CHECK_ATTACHED(self)
2372     if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2373         return NULL;
2374     }
2375 
2376     res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2377     if (res == NULL)
2378         return NULL;
2379     Py_DECREF(res);
2380 
2381     return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2382 }
2383 
2384 static PyObject *
textiowrapper_repr(textio * self)2385 textiowrapper_repr(textio *self)
2386 {
2387     PyObject *nameobj, *res;
2388     PyObject *namerepr = NULL, *encrepr = NULL;
2389 
2390     CHECK_INITIALIZED(self);
2391 
2392     nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2393     if (nameobj == NULL) {
2394         if (PyErr_ExceptionMatches(PyExc_Exception))
2395             PyErr_Clear();
2396         else
2397             goto error;
2398         encrepr = PyObject_Repr(self->encoding);
2399         res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2400                                    PyString_AS_STRING(encrepr));
2401     }
2402     else {
2403         encrepr = PyObject_Repr(self->encoding);
2404         namerepr = PyObject_Repr(nameobj);
2405         res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2406                                    PyString_AS_STRING(namerepr),
2407                                    PyString_AS_STRING(encrepr));
2408         Py_DECREF(nameobj);
2409     }
2410     Py_XDECREF(namerepr);
2411     Py_XDECREF(encrepr);
2412     return res;
2413 
2414 error:
2415     Py_XDECREF(namerepr);
2416     Py_XDECREF(encrepr);
2417     return NULL;
2418 }
2419 
2420 
2421 /* Inquiries */
2422 
2423 static PyObject *
textiowrapper_fileno(textio * self,PyObject * args)2424 textiowrapper_fileno(textio *self, PyObject *args)
2425 {
2426     CHECK_ATTACHED(self);
2427     return PyObject_CallMethod(self->buffer, "fileno", NULL);
2428 }
2429 
2430 static PyObject *
textiowrapper_seekable(textio * self,PyObject * args)2431 textiowrapper_seekable(textio *self, PyObject *args)
2432 {
2433     CHECK_ATTACHED(self);
2434     return PyObject_CallMethod(self->buffer, "seekable", NULL);
2435 }
2436 
2437 static PyObject *
textiowrapper_readable(textio * self,PyObject * args)2438 textiowrapper_readable(textio *self, PyObject *args)
2439 {
2440     CHECK_ATTACHED(self);
2441     return PyObject_CallMethod(self->buffer, "readable", NULL);
2442 }
2443 
2444 static PyObject *
textiowrapper_writable(textio * self,PyObject * args)2445 textiowrapper_writable(textio *self, PyObject *args)
2446 {
2447     CHECK_ATTACHED(self);
2448     return PyObject_CallMethod(self->buffer, "writable", NULL);
2449 }
2450 
2451 static PyObject *
textiowrapper_isatty(textio * self,PyObject * args)2452 textiowrapper_isatty(textio *self, PyObject *args)
2453 {
2454     CHECK_ATTACHED(self);
2455     return PyObject_CallMethod(self->buffer, "isatty", NULL);
2456 }
2457 
2458 static PyObject *
textiowrapper_flush(textio * self,PyObject * args)2459 textiowrapper_flush(textio *self, PyObject *args)
2460 {
2461     CHECK_ATTACHED(self);
2462     CHECK_CLOSED(self);
2463     self->telling = self->seekable;
2464     if (_textiowrapper_writeflush(self) < 0)
2465         return NULL;
2466     return PyObject_CallMethod(self->buffer, "flush", NULL);
2467 }
2468 
2469 static PyObject *
textiowrapper_close(textio * self,PyObject * args)2470 textiowrapper_close(textio *self, PyObject *args)
2471 {
2472     PyObject *res;
2473     int r;
2474     CHECK_ATTACHED(self);
2475 
2476     res = textiowrapper_closed_get(self, NULL);
2477     if (res == NULL)
2478         return NULL;
2479     r = PyObject_IsTrue(res);
2480     Py_DECREF(res);
2481     if (r < 0)
2482         return NULL;
2483 
2484     if (r > 0) {
2485         Py_RETURN_NONE; /* stream already closed */
2486     }
2487     else {
2488         PyObject *exc = NULL, *val, *tb;
2489         res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2490         if (res == NULL)
2491             PyErr_Fetch(&exc, &val, &tb);
2492         else
2493             Py_DECREF(res);
2494 
2495         res = PyObject_CallMethod(self->buffer, "close", NULL);
2496         if (exc != NULL) {
2497             _PyErr_ReplaceException(exc, val, tb);
2498             Py_CLEAR(res);
2499         }
2500         return res;
2501     }
2502 }
2503 
2504 static PyObject *
textiowrapper_iternext(textio * self)2505 textiowrapper_iternext(textio *self)
2506 {
2507     PyObject *line;
2508 
2509     CHECK_ATTACHED(self);
2510 
2511     self->telling = 0;
2512     if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2513         /* Skip method call overhead for speed */
2514         line = _textiowrapper_readline(self, -1);
2515     }
2516     else {
2517         line = PyObject_CallMethodObjArgs((PyObject *)self,
2518                                            _PyIO_str_readline, NULL);
2519         if (line && !PyUnicode_Check(line)) {
2520             PyErr_Format(PyExc_IOError,
2521                          "readline() should have returned an str object, "
2522                          "not '%.200s'", Py_TYPE(line)->tp_name);
2523             Py_DECREF(line);
2524             return NULL;
2525         }
2526     }
2527 
2528     if (line == NULL)
2529         return NULL;
2530 
2531     if (PyUnicode_GET_SIZE(line) == 0) {
2532         /* Reached EOF or would have blocked */
2533         Py_DECREF(line);
2534         Py_CLEAR(self->snapshot);
2535         self->telling = self->seekable;
2536         return NULL;
2537     }
2538 
2539     return line;
2540 }
2541 
2542 static PyObject *
textiowrapper_name_get(textio * self,void * context)2543 textiowrapper_name_get(textio *self, void *context)
2544 {
2545     CHECK_ATTACHED(self);
2546     return PyObject_GetAttrString(self->buffer, "name");
2547 }
2548 
2549 static PyObject *
textiowrapper_closed_get(textio * self,void * context)2550 textiowrapper_closed_get(textio *self, void *context)
2551 {
2552     CHECK_ATTACHED(self);
2553     return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2554 }
2555 
2556 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)2557 textiowrapper_newlines_get(textio *self, void *context)
2558 {
2559     PyObject *res;
2560     CHECK_ATTACHED(self);
2561     if (self->decoder == NULL)
2562         Py_RETURN_NONE;
2563     res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2564     if (res == NULL) {
2565         if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2566             PyErr_Clear();
2567             Py_RETURN_NONE;
2568         }
2569         else {
2570             return NULL;
2571         }
2572     }
2573     return res;
2574 }
2575 
2576 static PyObject *
textiowrapper_errors_get(textio * self,void * context)2577 textiowrapper_errors_get(textio *self, void *context)
2578 {
2579     CHECK_INITIALIZED(self);
2580     Py_INCREF(self->errors);
2581     return self->errors;
2582 }
2583 
2584 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)2585 textiowrapper_chunk_size_get(textio *self, void *context)
2586 {
2587     CHECK_ATTACHED(self);
2588     return PyLong_FromSsize_t(self->chunk_size);
2589 }
2590 
2591 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)2592 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2593 {
2594     Py_ssize_t n;
2595     CHECK_ATTACHED_INT(self);
2596     if (arg == NULL) {
2597         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
2598         return -1;
2599     }
2600     n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2601     if (n == -1 && PyErr_Occurred())
2602         return -1;
2603     if (n <= 0) {
2604         PyErr_SetString(PyExc_ValueError,
2605                         "a strictly positive integer is required");
2606         return -1;
2607     }
2608     self->chunk_size = n;
2609     return 0;
2610 }
2611 
2612 static PyMethodDef textiowrapper_methods[] = {
2613     {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2614     {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2615     {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2616     {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2617     {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2618     {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2619 
2620     {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2621     {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2622     {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2623     {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2624     {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2625 
2626     {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2627     {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2628     {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2629     {NULL, NULL}
2630 };
2631 
2632 static PyMemberDef textiowrapper_members[] = {
2633     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2634     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2635     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2636     {NULL}
2637 };
2638 
2639 static PyGetSetDef textiowrapper_getset[] = {
2640     {"name", (getter)textiowrapper_name_get, NULL, NULL},
2641     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2642 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2643 */
2644     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2645     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2646     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2647                     (setter)textiowrapper_chunk_size_set, NULL},
2648     {NULL}
2649 };
2650 
2651 PyTypeObject PyTextIOWrapper_Type = {
2652     PyVarObject_HEAD_INIT(NULL, 0)
2653     "_io.TextIOWrapper",        /*tp_name*/
2654     sizeof(textio), /*tp_basicsize*/
2655     0,                          /*tp_itemsize*/
2656     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2657     0,                          /*tp_print*/
2658     0,                          /*tp_getattr*/
2659     0,                          /*tps_etattr*/
2660     0,                          /*tp_compare */
2661     (reprfunc)textiowrapper_repr,/*tp_repr*/
2662     0,                          /*tp_as_number*/
2663     0,                          /*tp_as_sequence*/
2664     0,                          /*tp_as_mapping*/
2665     0,                          /*tp_hash */
2666     0,                          /*tp_call*/
2667     0,                          /*tp_str*/
2668     0,                          /*tp_getattro*/
2669     0,                          /*tp_setattro*/
2670     0,                          /*tp_as_buffer*/
2671     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2672             | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2673     textiowrapper_doc,          /* tp_doc */
2674     (traverseproc)textiowrapper_traverse, /* tp_traverse */
2675     (inquiry)textiowrapper_clear, /* tp_clear */
2676     0,                          /* tp_richcompare */
2677     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2678     0,                          /* tp_iter */
2679     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2680     textiowrapper_methods,      /* tp_methods */
2681     textiowrapper_members,      /* tp_members */
2682     textiowrapper_getset,       /* tp_getset */
2683     0,                          /* tp_base */
2684     0,                          /* tp_dict */
2685     0,                          /* tp_descr_get */
2686     0,                          /* tp_descr_set */
2687     offsetof(textio, dict), /*tp_dictoffset*/
2688     (initproc)textiowrapper_init, /* tp_init */
2689     0,                          /* tp_alloc */
2690     PyType_GenericNew,          /* tp_new */
2691 };
2692