1 /*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "structmember.h"
12 #include "_iomodule.h"
13
14 /* TextIOBase */
15
16 PyDoc_STRVAR(textiobase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24 static PyObject *
_unsupported(const char * message)25 _unsupported(const char *message)
26 {
27 PyErr_SetString(_PyIO_unsupported_operation, message);
28 return NULL;
29 }
30
31 PyDoc_STRVAR(textiobase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38 static PyObject *
textiobase_detach(PyObject * self)39 textiobase_detach(PyObject *self)
40 {
41 return _unsupported("detach");
42 }
43
44 PyDoc_STRVAR(textiobase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51 static PyObject *
textiobase_read(PyObject * self,PyObject * args)52 textiobase_read(PyObject *self, PyObject *args)
53 {
54 return _unsupported("read");
55 }
56
57 PyDoc_STRVAR(textiobase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)64 textiobase_readline(PyObject *self, PyObject *args)
65 {
66 return _unsupported("readline");
67 }
68
69 PyDoc_STRVAR(textiobase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75 static PyObject *
textiobase_write(PyObject * self,PyObject * args)76 textiobase_write(PyObject *self, PyObject *args)
77 {
78 return _unsupported("write");
79 }
80
81 PyDoc_STRVAR(textiobase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)88 textiobase_encoding_get(PyObject *self, void *context)
89 {
90 Py_RETURN_NONE;
91 }
92
93 PyDoc_STRVAR(textiobase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)102 textiobase_newlines_get(PyObject *self, void *context)
103 {
104 Py_RETURN_NONE;
105 }
106
107 PyDoc_STRVAR(textiobase_errors_doc,
108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113 static PyObject *
textiobase_errors_get(PyObject * self,void * context)114 textiobase_errors_get(PyObject *self, void *context)
115 {
116 Py_RETURN_NONE;
117 }
118
119
120 static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125 {NULL, NULL}
126 };
127
128 static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132 {NULL}
133 };
134
135 PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
156 textiobase_doc, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
163 textiobase_methods, /* tp_methods */
164 0, /* tp_members */
165 textiobase_getset, /* tp_getset */
166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174 };
175
176
177 /* IncrementalNewlineDecoder */
178
179 PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189 typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
196 } nldecoder_object;
197
198 static int
incrementalnewlinedecoder_init(nldecoder_object * self,PyObject * args,PyObject * kwds)199 incrementalnewlinedecoder_init(nldecoder_object *self,
200 PyObject *args, PyObject *kwds)
201 {
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate ? 1 : 0;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229 }
230
231 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)232 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233 {
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237 }
238
239 static int
check_decoded(PyObject * decoded)240 check_decoded(PyObject *decoded)
241 {
242 if (decoded == NULL)
243 return -1;
244 if (!PyUnicode_Check(decoded)) {
245 PyErr_Format(PyExc_TypeError,
246 "decoder should return a string result, not '%.200s'",
247 Py_TYPE(decoded)->tp_name);
248 Py_DECREF(decoded);
249 return -1;
250 }
251 return 0;
252 }
253
254 #define SEEN_CR 1
255 #define SEEN_LF 2
256 #define SEEN_CRLF 4
257 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
258
259 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * _self,PyObject * input,int final)260 _PyIncrementalNewlineDecoder_decode(PyObject *_self,
261 PyObject *input, int final)
262 {
263 PyObject *output;
264 Py_ssize_t output_len;
265 nldecoder_object *self = (nldecoder_object *) _self;
266
267 if (self->decoder == NULL) {
268 PyErr_SetString(PyExc_ValueError,
269 "IncrementalNewlineDecoder.__init__ not called");
270 return NULL;
271 }
272
273 /* decode input (with the eventual \r from a previous pass) */
274 if (self->decoder != Py_None) {
275 output = PyObject_CallMethodObjArgs(self->decoder,
276 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
277 }
278 else {
279 output = input;
280 Py_INCREF(output);
281 }
282
283 if (check_decoded(output) < 0)
284 return NULL;
285
286 output_len = PyUnicode_GET_SIZE(output);
287 if (self->pendingcr && (final || output_len > 0)) {
288 Py_UNICODE *out;
289 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
290 if (modified == NULL)
291 goto error;
292 out = PyUnicode_AS_UNICODE(modified);
293 out[0] = '\r';
294 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
295 output_len * sizeof(Py_UNICODE));
296 Py_DECREF(output);
297 output = modified;
298 self->pendingcr = 0;
299 output_len++;
300 }
301
302 /* retain last \r even when not translating data:
303 * then readline() is sure to get \r\n in one pass
304 */
305 if (!final) {
306 if (output_len > 0
307 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
308
309 if (Py_REFCNT(output) == 1) {
310 if (PyUnicode_Resize(&output, output_len - 1) < 0)
311 goto error;
312 }
313 else {
314 PyObject *modified = PyUnicode_FromUnicode(
315 PyUnicode_AS_UNICODE(output),
316 output_len - 1);
317 if (modified == NULL)
318 goto error;
319 Py_DECREF(output);
320 output = modified;
321 }
322 self->pendingcr = 1;
323 }
324 }
325
326 /* Record which newlines are read and do newline translation if desired,
327 all in one pass. */
328 {
329 Py_UNICODE *in_str;
330 Py_ssize_t len;
331 int seennl = self->seennl;
332 int only_lf = 0;
333
334 in_str = PyUnicode_AS_UNICODE(output);
335 len = PyUnicode_GET_SIZE(output);
336
337 if (len == 0)
338 return output;
339
340 /* If, up to now, newlines are consistently \n, do a quick check
341 for the \r *byte* with the libc's optimized memchr.
342 */
343 if (seennl == SEEN_LF || seennl == 0) {
344 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
345 }
346
347 if (only_lf) {
348 /* If not already seen, quick scan for a possible "\n" character.
349 (there's nothing else to be done, even when in translation mode)
350 */
351 if (seennl == 0 &&
352 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
353 Py_UNICODE *s, *end;
354 s = in_str;
355 end = in_str + len;
356 for (;;) {
357 Py_UNICODE c;
358 /* Fast loop for non-control characters */
359 while (*s > '\n')
360 s++;
361 c = *s++;
362 if (c == '\n') {
363 seennl |= SEEN_LF;
364 break;
365 }
366 if (s > end)
367 break;
368 }
369 }
370 /* Finished: we have scanned for newlines, and none of them
371 need translating */
372 }
373 else if (!self->translate) {
374 Py_UNICODE *s, *end;
375 /* We have already seen all newline types, no need to scan again */
376 if (seennl == SEEN_ALL)
377 goto endscan;
378 s = in_str;
379 end = in_str + len;
380 for (;;) {
381 Py_UNICODE c;
382 /* Fast loop for non-control characters */
383 while (*s > '\r')
384 s++;
385 c = *s++;
386 if (c == '\n')
387 seennl |= SEEN_LF;
388 else if (c == '\r') {
389 if (*s == '\n') {
390 seennl |= SEEN_CRLF;
391 s++;
392 }
393 else
394 seennl |= SEEN_CR;
395 }
396 if (s > end)
397 break;
398 if (seennl == SEEN_ALL)
399 break;
400 }
401 endscan:
402 ;
403 }
404 else {
405 PyObject *translated = NULL;
406 Py_UNICODE *out_str;
407 Py_UNICODE *in, *out, *end;
408 if (Py_REFCNT(output) != 1) {
409 /* We could try to optimize this so that we only do a copy
410 when there is something to translate. On the other hand,
411 most decoders should only output non-shared strings, i.e.
412 translation is done in place. */
413 translated = PyUnicode_FromUnicode(NULL, len);
414 if (translated == NULL)
415 goto error;
416 assert(Py_REFCNT(translated) == 1);
417 memcpy(PyUnicode_AS_UNICODE(translated),
418 PyUnicode_AS_UNICODE(output),
419 len * sizeof(Py_UNICODE));
420 }
421 else {
422 translated = output;
423 }
424 out_str = PyUnicode_AS_UNICODE(translated);
425 in = in_str;
426 out = out_str;
427 end = in_str + len;
428 for (;;) {
429 Py_UNICODE c;
430 /* Fast loop for non-control characters */
431 while ((c = *in++) > '\r')
432 *out++ = c;
433 if (c == '\n') {
434 *out++ = c;
435 seennl |= SEEN_LF;
436 continue;
437 }
438 if (c == '\r') {
439 if (*in == '\n') {
440 in++;
441 seennl |= SEEN_CRLF;
442 }
443 else
444 seennl |= SEEN_CR;
445 *out++ = '\n';
446 continue;
447 }
448 if (in > end)
449 break;
450 *out++ = c;
451 }
452 if (translated != output) {
453 Py_DECREF(output);
454 output = translated;
455 }
456 if (out - out_str != len) {
457 if (PyUnicode_Resize(&output, out - out_str) < 0)
458 goto error;
459 }
460 }
461 self->seennl |= seennl;
462 }
463
464 return output;
465
466 error:
467 Py_DECREF(output);
468 return NULL;
469 }
470
471 static PyObject *
incrementalnewlinedecoder_decode(nldecoder_object * self,PyObject * args,PyObject * kwds)472 incrementalnewlinedecoder_decode(nldecoder_object *self,
473 PyObject *args, PyObject *kwds)
474 {
475 char *kwlist[] = {"input", "final", NULL};
476 PyObject *input;
477 int final = 0;
478
479 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
480 kwlist, &input, &final))
481 return NULL;
482 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
483 }
484
485 static PyObject *
incrementalnewlinedecoder_getstate(nldecoder_object * self,PyObject * args)486 incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
487 {
488 PyObject *buffer;
489 unsigned PY_LONG_LONG flag;
490
491 if (self->decoder != Py_None) {
492 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
493 _PyIO_str_getstate, NULL);
494 if (state == NULL)
495 return NULL;
496 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
497 Py_DECREF(state);
498 return NULL;
499 }
500 Py_INCREF(buffer);
501 Py_DECREF(state);
502 }
503 else {
504 buffer = PyBytes_FromString("");
505 flag = 0;
506 }
507 flag <<= 1;
508 if (self->pendingcr)
509 flag |= 1;
510 return Py_BuildValue("NK", buffer, flag);
511 }
512
513 static PyObject *
incrementalnewlinedecoder_setstate(nldecoder_object * self,PyObject * state)514 incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
515 {
516 PyObject *buffer;
517 unsigned PY_LONG_LONG flag;
518
519 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
520 return NULL;
521
522 self->pendingcr = (int) flag & 1;
523 flag >>= 1;
524
525 if (self->decoder != Py_None)
526 return PyObject_CallMethod(self->decoder,
527 "setstate", "((OK))", buffer, flag);
528 else
529 Py_RETURN_NONE;
530 }
531
532 static PyObject *
incrementalnewlinedecoder_reset(nldecoder_object * self,PyObject * args)533 incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
534 {
535 self->seennl = 0;
536 self->pendingcr = 0;
537 if (self->decoder != Py_None)
538 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
539 else
540 Py_RETURN_NONE;
541 }
542
543 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)544 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
545 {
546 switch (self->seennl) {
547 case SEEN_CR:
548 return PyUnicode_FromString("\r");
549 case SEEN_LF:
550 return PyUnicode_FromString("\n");
551 case SEEN_CRLF:
552 return PyUnicode_FromString("\r\n");
553 case SEEN_CR | SEEN_LF:
554 return Py_BuildValue("ss", "\r", "\n");
555 case SEEN_CR | SEEN_CRLF:
556 return Py_BuildValue("ss", "\r", "\r\n");
557 case SEEN_LF | SEEN_CRLF:
558 return Py_BuildValue("ss", "\n", "\r\n");
559 case SEEN_CR | SEEN_LF | SEEN_CRLF:
560 return Py_BuildValue("sss", "\r", "\n", "\r\n");
561 default:
562 Py_RETURN_NONE;
563 }
564
565 }
566
567
568 static PyMethodDef incrementalnewlinedecoder_methods[] = {
569 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
570 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
571 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
572 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
573 {NULL}
574 };
575
576 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
577 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
578 {NULL}
579 };
580
581 PyTypeObject PyIncrementalNewlineDecoder_Type = {
582 PyVarObject_HEAD_INIT(NULL, 0)
583 "_io.IncrementalNewlineDecoder", /*tp_name*/
584 sizeof(nldecoder_object), /*tp_basicsize*/
585 0, /*tp_itemsize*/
586 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
587 0, /*tp_print*/
588 0, /*tp_getattr*/
589 0, /*tp_setattr*/
590 0, /*tp_compare */
591 0, /*tp_repr*/
592 0, /*tp_as_number*/
593 0, /*tp_as_sequence*/
594 0, /*tp_as_mapping*/
595 0, /*tp_hash */
596 0, /*tp_call*/
597 0, /*tp_str*/
598 0, /*tp_getattro*/
599 0, /*tp_setattro*/
600 0, /*tp_as_buffer*/
601 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
602 incrementalnewlinedecoder_doc, /* tp_doc */
603 0, /* tp_traverse */
604 0, /* tp_clear */
605 0, /* tp_richcompare */
606 0, /*tp_weaklistoffset*/
607 0, /* tp_iter */
608 0, /* tp_iternext */
609 incrementalnewlinedecoder_methods, /* tp_methods */
610 0, /* tp_members */
611 incrementalnewlinedecoder_getset, /* tp_getset */
612 0, /* tp_base */
613 0, /* tp_dict */
614 0, /* tp_descr_get */
615 0, /* tp_descr_set */
616 0, /* tp_dictoffset */
617 (initproc)incrementalnewlinedecoder_init, /* tp_init */
618 0, /* tp_alloc */
619 PyType_GenericNew, /* tp_new */
620 };
621
622
623 /* TextIOWrapper */
624
625 PyDoc_STRVAR(textiowrapper_doc,
626 "Character and line based layer over a BufferedIOBase object, buffer.\n"
627 "\n"
628 "encoding gives the name of the encoding that the stream will be\n"
629 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
630 "\n"
631 "errors determines the strictness of encoding and decoding (see the\n"
632 "codecs.register) and defaults to \"strict\".\n"
633 "\n"
634 "newline controls how line endings are handled. It can be None, '',\n"
635 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
636 "\n"
637 "* On input, if newline is None, universal newlines mode is\n"
638 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
639 " these are translated into '\\n' before being returned to the\n"
640 " caller. If it is '', universal newline mode is enabled, but line\n"
641 " endings are returned to the caller untranslated. If it has any of\n"
642 " the other legal values, input lines are only terminated by the given\n"
643 " string, and the line ending is returned to the caller untranslated.\n"
644 "\n"
645 "* On output, if newline is None, any '\\n' characters written are\n"
646 " translated to the system default line separator, os.linesep. If\n"
647 " newline is '', no translation takes place. If newline is any of the\n"
648 " other legal values, any '\\n' characters written are translated to\n"
649 " the given string.\n"
650 "\n"
651 "If line_buffering is True, a call to flush is implied when a call to\n"
652 "write contains a newline character."
653 );
654
655 typedef PyObject *
656 (*encodefunc_t)(PyObject *, PyObject *);
657
658 typedef struct
659 {
660 PyObject_HEAD
661 int ok; /* initialized? */
662 int detached;
663 Py_ssize_t chunk_size;
664 PyObject *buffer;
665 PyObject *encoding;
666 PyObject *encoder;
667 PyObject *decoder;
668 PyObject *readnl;
669 PyObject *errors;
670 const char *writenl; /* utf-8 encoded, NULL stands for \n */
671 char line_buffering;
672 char readuniversal;
673 char readtranslate;
674 char writetranslate;
675 char seekable;
676 char telling;
677 /* Specialized encoding func (see below) */
678 encodefunc_t encodefunc;
679 /* Whether or not it's the start of the stream */
680 char encoding_start_of_stream;
681
682 /* Reads and writes are internally buffered in order to speed things up.
683 However, any read will first flush the write buffer if itsn't empty.
684
685 Please also note that text to be written is first encoded before being
686 buffered. This is necessary so that encoding errors are immediately
687 reported to the caller, but it unfortunately means that the
688 IncrementalEncoder (whose encode() method is always written in Python)
689 becomes a bottleneck for small writes.
690 */
691 PyObject *decoded_chars; /* buffer for text returned from decoder */
692 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
693 PyObject *pending_bytes; /* list of bytes objects waiting to be
694 written, or NULL */
695 Py_ssize_t pending_bytes_count;
696 PyObject *snapshot;
697 /* snapshot is either None, or a tuple (dec_flags, next_input) where
698 * dec_flags is the second (integer) item of the decoder state and
699 * next_input is the chunk of input bytes that comes next after the
700 * snapshot point. We use this to reconstruct decoder states in tell().
701 */
702
703 /* Cache raw object if it's a FileIO object */
704 PyObject *raw;
705
706 PyObject *weakreflist;
707 PyObject *dict;
708 } textio;
709
710 static void
711 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
712
713 /* A couple of specialized cases in order to bypass the slow incremental
714 encoding methods for the most popular encodings. */
715
716 static PyObject *
ascii_encode(textio * self,PyObject * text)717 ascii_encode(textio *self, PyObject *text)
718 {
719 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
720 PyUnicode_GET_SIZE(text),
721 PyBytes_AS_STRING(self->errors));
722 }
723
724 static PyObject *
utf16be_encode(textio * self,PyObject * text)725 utf16be_encode(textio *self, PyObject *text)
726 {
727 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
728 PyUnicode_GET_SIZE(text),
729 PyBytes_AS_STRING(self->errors), 1);
730 }
731
732 static PyObject *
utf16le_encode(textio * self,PyObject * text)733 utf16le_encode(textio *self, PyObject *text)
734 {
735 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
736 PyUnicode_GET_SIZE(text),
737 PyBytes_AS_STRING(self->errors), -1);
738 }
739
740 static PyObject *
utf16_encode(textio * self,PyObject * text)741 utf16_encode(textio *self, PyObject *text)
742 {
743 if (!self->encoding_start_of_stream) {
744 /* Skip the BOM and use native byte ordering */
745 #if defined(WORDS_BIGENDIAN)
746 return utf16be_encode(self, text);
747 #else
748 return utf16le_encode(self, text);
749 #endif
750 }
751 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
752 PyUnicode_GET_SIZE(text),
753 PyBytes_AS_STRING(self->errors), 0);
754 }
755
756 static PyObject *
utf32be_encode(textio * self,PyObject * text)757 utf32be_encode(textio *self, PyObject *text)
758 {
759 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
760 PyUnicode_GET_SIZE(text),
761 PyBytes_AS_STRING(self->errors), 1);
762 }
763
764 static PyObject *
utf32le_encode(textio * self,PyObject * text)765 utf32le_encode(textio *self, PyObject *text)
766 {
767 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
768 PyUnicode_GET_SIZE(text),
769 PyBytes_AS_STRING(self->errors), -1);
770 }
771
772 static PyObject *
utf32_encode(textio * self,PyObject * text)773 utf32_encode(textio *self, PyObject *text)
774 {
775 if (!self->encoding_start_of_stream) {
776 /* Skip the BOM and use native byte ordering */
777 #if defined(WORDS_BIGENDIAN)
778 return utf32be_encode(self, text);
779 #else
780 return utf32le_encode(self, text);
781 #endif
782 }
783 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
784 PyUnicode_GET_SIZE(text),
785 PyBytes_AS_STRING(self->errors), 0);
786 }
787
788 static PyObject *
utf8_encode(textio * self,PyObject * text)789 utf8_encode(textio *self, PyObject *text)
790 {
791 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
792 PyUnicode_GET_SIZE(text),
793 PyBytes_AS_STRING(self->errors));
794 }
795
796 static PyObject *
latin1_encode(textio * self,PyObject * text)797 latin1_encode(textio *self, PyObject *text)
798 {
799 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
800 PyUnicode_GET_SIZE(text),
801 PyBytes_AS_STRING(self->errors));
802 }
803
804 /* Map normalized encoding names onto the specialized encoding funcs */
805
806 typedef struct {
807 const char *name;
808 encodefunc_t encodefunc;
809 } encodefuncentry;
810
811 static encodefuncentry encodefuncs[] = {
812 {"ascii", (encodefunc_t) ascii_encode},
813 {"iso8859-1", (encodefunc_t) latin1_encode},
814 {"utf-8", (encodefunc_t) utf8_encode},
815 {"utf-16-be", (encodefunc_t) utf16be_encode},
816 {"utf-16-le", (encodefunc_t) utf16le_encode},
817 {"utf-16", (encodefunc_t) utf16_encode},
818 {"utf-32-be", (encodefunc_t) utf32be_encode},
819 {"utf-32-le", (encodefunc_t) utf32le_encode},
820 {"utf-32", (encodefunc_t) utf32_encode},
821 {NULL, NULL}
822 };
823
824
825 static int
textiowrapper_init(textio * self,PyObject * args,PyObject * kwds)826 textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
827 {
828 char *kwlist[] = {"buffer", "encoding", "errors",
829 "newline", "line_buffering",
830 NULL};
831 PyObject *buffer, *raw, *codec_info = NULL;
832 char *encoding = NULL;
833 char *errors = NULL;
834 char *newline = NULL;
835 int line_buffering = 0;
836
837 PyObject *res;
838 int r;
839
840 self->ok = 0;
841 self->detached = 0;
842 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
843 kwlist, &buffer, &encoding, &errors,
844 &newline, &line_buffering))
845 return -1;
846
847 if (newline && newline[0] != '\0'
848 && !(newline[0] == '\n' && newline[1] == '\0')
849 && !(newline[0] == '\r' && newline[1] == '\0')
850 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
851 PyErr_Format(PyExc_ValueError,
852 "illegal newline value: %s", newline);
853 return -1;
854 }
855
856 Py_CLEAR(self->buffer);
857 Py_CLEAR(self->encoding);
858 Py_CLEAR(self->encoder);
859 Py_CLEAR(self->decoder);
860 Py_CLEAR(self->readnl);
861 Py_CLEAR(self->decoded_chars);
862 Py_CLEAR(self->pending_bytes);
863 Py_CLEAR(self->snapshot);
864 Py_CLEAR(self->errors);
865 Py_CLEAR(self->raw);
866 self->decoded_chars_used = 0;
867 self->pending_bytes_count = 0;
868 self->encodefunc = NULL;
869 self->writenl = NULL;
870
871 if (encoding == NULL && self->encoding == NULL) {
872 if (_PyIO_locale_module == NULL) {
873 _PyIO_locale_module = PyImport_ImportModule("locale");
874 if (_PyIO_locale_module == NULL)
875 goto catch_ImportError;
876 else
877 goto use_locale;
878 }
879 else {
880 use_locale:
881 self->encoding = PyObject_CallMethod(
882 _PyIO_locale_module, "getpreferredencoding", NULL);
883 if (self->encoding == NULL) {
884 catch_ImportError:
885 /*
886 Importing locale can raise an ImportError because of
887 _functools, and locale.getpreferredencoding can raise an
888 ImportError if _locale is not available. These will happen
889 during module building.
890 */
891 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
892 PyErr_Clear();
893 self->encoding = PyString_FromString("ascii");
894 }
895 else
896 goto error;
897 }
898 else if (!PyString_Check(self->encoding))
899 Py_CLEAR(self->encoding);
900 }
901 }
902 if (self->encoding != NULL)
903 encoding = PyString_AsString(self->encoding);
904 else if (encoding != NULL) {
905 self->encoding = PyString_FromString(encoding);
906 if (self->encoding == NULL)
907 goto error;
908 }
909 else {
910 PyErr_SetString(PyExc_IOError,
911 "could not determine default encoding");
912 goto error;
913 }
914
915 /* Check we have been asked for a real text encoding */
916 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
917 if (codec_info == NULL) {
918 Py_CLEAR(self->encoding);
919 goto error;
920 }
921
922 /* XXX: Failures beyond this point have the potential to leak elements
923 * of the partially constructed object (like self->encoding)
924 */
925
926 if (errors == NULL)
927 errors = "strict";
928 self->errors = PyBytes_FromString(errors);
929 if (self->errors == NULL)
930 goto error;
931
932 self->chunk_size = 8192;
933 self->readuniversal = (newline == NULL || newline[0] == '\0');
934 self->line_buffering = line_buffering;
935 self->readtranslate = (newline == NULL);
936 if (newline) {
937 self->readnl = PyString_FromString(newline);
938 if (self->readnl == NULL)
939 goto error;
940 }
941 self->writetranslate = (newline == NULL || newline[0] != '\0');
942 if (!self->readuniversal && self->writetranslate) {
943 self->writenl = PyString_AsString(self->readnl);
944 if (!strcmp(self->writenl, "\n"))
945 self->writenl = NULL;
946 }
947 #ifdef MS_WINDOWS
948 else
949 self->writenl = "\r\n";
950 #endif
951
952 /* Build the decoder object */
953 res = PyObject_CallMethod(buffer, "readable", NULL);
954 if (res == NULL)
955 goto error;
956 r = PyObject_IsTrue(res);
957 Py_DECREF(res);
958 if (r == -1)
959 goto error;
960 if (r == 1) {
961 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
962 errors);
963 if (self->decoder == NULL)
964 goto error;
965
966 if (self->readuniversal) {
967 PyObject *incrementalDecoder = PyObject_CallFunction(
968 (PyObject *)&PyIncrementalNewlineDecoder_Type,
969 "Oi", self->decoder, (int)self->readtranslate);
970 if (incrementalDecoder == NULL)
971 goto error;
972 Py_XSETREF(self->decoder, incrementalDecoder);
973 }
974 }
975
976 /* Build the encoder object */
977 res = PyObject_CallMethod(buffer, "writable", NULL);
978 if (res == NULL)
979 goto error;
980 r = PyObject_IsTrue(res);
981 Py_DECREF(res);
982 if (r == -1)
983 goto error;
984 if (r == 1) {
985 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
986 errors);
987 if (self->encoder == NULL)
988 goto error;
989 /* Get the normalized name of the codec */
990 res = PyObject_GetAttrString(codec_info, "name");
991 if (res == NULL) {
992 if (PyErr_ExceptionMatches(PyExc_AttributeError))
993 PyErr_Clear();
994 else
995 goto error;
996 }
997 else if (PyString_Check(res)) {
998 encodefuncentry *e = encodefuncs;
999 while (e->name != NULL) {
1000 if (!strcmp(PyString_AS_STRING(res), e->name)) {
1001 self->encodefunc = e->encodefunc;
1002 break;
1003 }
1004 e++;
1005 }
1006 }
1007 Py_XDECREF(res);
1008 }
1009
1010 /* Finished sorting out the codec details */
1011 Py_DECREF(codec_info);
1012
1013 self->buffer = buffer;
1014 Py_INCREF(buffer);
1015
1016 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1017 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1018 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1019 raw = PyObject_GetAttrString(buffer, "raw");
1020 /* Cache the raw FileIO object to speed up 'closed' checks */
1021 if (raw == NULL) {
1022 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1023 PyErr_Clear();
1024 else
1025 goto error;
1026 }
1027 else if (Py_TYPE(raw) == &PyFileIO_Type)
1028 self->raw = raw;
1029 else
1030 Py_DECREF(raw);
1031 }
1032
1033 res = PyObject_CallMethod(buffer, "seekable", NULL);
1034 if (res == NULL)
1035 goto error;
1036 r = PyObject_IsTrue(res);
1037 Py_DECREF(res);
1038 if (r < 0)
1039 goto error;
1040 self->seekable = self->telling = r;
1041
1042 self->encoding_start_of_stream = 0;
1043 if (self->seekable && self->encoder) {
1044 PyObject *cookieObj;
1045 int cmp;
1046
1047 self->encoding_start_of_stream = 1;
1048
1049 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1050 if (cookieObj == NULL)
1051 goto error;
1052
1053 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1054 Py_DECREF(cookieObj);
1055 if (cmp < 0) {
1056 goto error;
1057 }
1058
1059 if (cmp == 0) {
1060 self->encoding_start_of_stream = 0;
1061 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1062 _PyIO_zero, NULL);
1063 if (res == NULL)
1064 goto error;
1065 Py_DECREF(res);
1066 }
1067 }
1068
1069 self->ok = 1;
1070 return 0;
1071
1072 error:
1073 Py_XDECREF(codec_info);
1074 return -1;
1075 }
1076
1077 static void
_textiowrapper_clear(textio * self)1078 _textiowrapper_clear(textio *self)
1079 {
1080 self->ok = 0;
1081 Py_CLEAR(self->buffer);
1082 Py_CLEAR(self->encoding);
1083 Py_CLEAR(self->encoder);
1084 Py_CLEAR(self->decoder);
1085 Py_CLEAR(self->readnl);
1086 Py_CLEAR(self->decoded_chars);
1087 Py_CLEAR(self->pending_bytes);
1088 Py_CLEAR(self->snapshot);
1089 Py_CLEAR(self->errors);
1090 Py_CLEAR(self->raw);
1091
1092 Py_CLEAR(self->dict);
1093 }
1094
1095 static void
textiowrapper_dealloc(textio * self)1096 textiowrapper_dealloc(textio *self)
1097 {
1098 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1099 return;
1100 _PyObject_GC_UNTRACK(self);
1101 if (self->weakreflist != NULL)
1102 PyObject_ClearWeakRefs((PyObject *)self);
1103 _textiowrapper_clear(self);
1104 Py_TYPE(self)->tp_free((PyObject *)self);
1105 }
1106
1107 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1108 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1109 {
1110 Py_VISIT(self->buffer);
1111 Py_VISIT(self->encoding);
1112 Py_VISIT(self->encoder);
1113 Py_VISIT(self->decoder);
1114 Py_VISIT(self->readnl);
1115 Py_VISIT(self->decoded_chars);
1116 Py_VISIT(self->pending_bytes);
1117 Py_VISIT(self->snapshot);
1118 Py_VISIT(self->errors);
1119 Py_VISIT(self->raw);
1120
1121 Py_VISIT(self->dict);
1122 return 0;
1123 }
1124
1125 static int
textiowrapper_clear(textio * self)1126 textiowrapper_clear(textio *self)
1127 {
1128 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1129 return -1;
1130 _textiowrapper_clear(self);
1131 return 0;
1132 }
1133
1134 static PyObject *
1135 textiowrapper_closed_get(textio *self, void *context);
1136
1137 /* This macro takes some shortcuts to make the common case faster. */
1138 #define CHECK_CLOSED(self) \
1139 do { \
1140 int r; \
1141 PyObject *_res; \
1142 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1143 if (self->raw != NULL) \
1144 r = _PyFileIO_closed(self->raw); \
1145 else { \
1146 _res = textiowrapper_closed_get(self, NULL); \
1147 if (_res == NULL) \
1148 return NULL; \
1149 r = PyObject_IsTrue(_res); \
1150 Py_DECREF(_res); \
1151 if (r < 0) \
1152 return NULL; \
1153 } \
1154 if (r > 0) { \
1155 PyErr_SetString(PyExc_ValueError, \
1156 "I/O operation on closed file."); \
1157 return NULL; \
1158 } \
1159 } \
1160 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1161 return NULL; \
1162 } while (0)
1163
1164 #define CHECK_INITIALIZED(self) \
1165 if (self->ok <= 0) { \
1166 PyErr_SetString(PyExc_ValueError, \
1167 "I/O operation on uninitialized object"); \
1168 return NULL; \
1169 }
1170
1171 #define CHECK_ATTACHED(self) \
1172 CHECK_INITIALIZED(self); \
1173 if (self->detached) { \
1174 PyErr_SetString(PyExc_ValueError, \
1175 "underlying buffer has been detached"); \
1176 return NULL; \
1177 }
1178
1179 #define CHECK_ATTACHED_INT(self) \
1180 if (self->ok <= 0) { \
1181 PyErr_SetString(PyExc_ValueError, \
1182 "I/O operation on uninitialized object"); \
1183 return -1; \
1184 } else if (self->detached) { \
1185 PyErr_SetString(PyExc_ValueError, \
1186 "underlying buffer has been detached"); \
1187 return -1; \
1188 }
1189
1190
1191 static PyObject *
textiowrapper_detach(textio * self)1192 textiowrapper_detach(textio *self)
1193 {
1194 PyObject *buffer, *res;
1195 CHECK_ATTACHED(self);
1196 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1197 if (res == NULL)
1198 return NULL;
1199 Py_DECREF(res);
1200 buffer = self->buffer;
1201 self->buffer = NULL;
1202 self->detached = 1;
1203 return buffer;
1204 }
1205
1206 Py_LOCAL_INLINE(const Py_UNICODE *)
findchar(const Py_UNICODE * s,Py_ssize_t size,Py_UNICODE ch)1207 findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1208 {
1209 /* like wcschr, but doesn't stop at NULL characters */
1210 while (size-- > 0) {
1211 if (*s == ch)
1212 return s;
1213 s++;
1214 }
1215 return NULL;
1216 }
1217
1218 /* Flush the internal write buffer. This doesn't explicitly flush the
1219 underlying buffered object, though. */
1220 static int
_textiowrapper_writeflush(textio * self)1221 _textiowrapper_writeflush(textio *self)
1222 {
1223 PyObject *pending, *b, *ret;
1224
1225 if (self->pending_bytes == NULL)
1226 return 0;
1227
1228 pending = self->pending_bytes;
1229 Py_INCREF(pending);
1230 self->pending_bytes_count = 0;
1231 Py_CLEAR(self->pending_bytes);
1232
1233 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1234 Py_DECREF(pending);
1235 if (b == NULL)
1236 return -1;
1237 ret = NULL;
1238 do {
1239 ret = PyObject_CallMethodObjArgs(self->buffer,
1240 _PyIO_str_write, b, NULL);
1241 } while (ret == NULL && _PyIO_trap_eintr());
1242 Py_DECREF(b);
1243 if (ret == NULL)
1244 return -1;
1245 Py_DECREF(ret);
1246 return 0;
1247 }
1248
1249 static PyObject *
textiowrapper_write(textio * self,PyObject * args)1250 textiowrapper_write(textio *self, PyObject *args)
1251 {
1252 PyObject *ret;
1253 PyObject *text; /* owned reference */
1254 PyObject *b;
1255 Py_ssize_t textlen;
1256 int haslf = 0;
1257 int needflush = 0;
1258
1259 CHECK_ATTACHED(self);
1260
1261 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1262 return NULL;
1263 }
1264
1265 CHECK_CLOSED(self);
1266
1267 if (self->encoder == NULL) {
1268 PyErr_SetString(PyExc_IOError, "not writable");
1269 return NULL;
1270 }
1271
1272 Py_INCREF(text);
1273
1274 textlen = PyUnicode_GetSize(text);
1275
1276 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1277 if (findchar(PyUnicode_AS_UNICODE(text),
1278 PyUnicode_GET_SIZE(text), '\n'))
1279 haslf = 1;
1280
1281 if (haslf && self->writetranslate && self->writenl != NULL) {
1282 PyObject *newtext = PyObject_CallMethod(
1283 text, "replace", "ss", "\n", self->writenl);
1284 Py_DECREF(text);
1285 if (newtext == NULL)
1286 return NULL;
1287 text = newtext;
1288 }
1289
1290 if (self->line_buffering &&
1291 (haslf ||
1292 findchar(PyUnicode_AS_UNICODE(text),
1293 PyUnicode_GET_SIZE(text), '\r')))
1294 needflush = 1;
1295
1296 /* XXX What if we were just reading? */
1297 if (self->encodefunc != NULL) {
1298 b = (*self->encodefunc)((PyObject *) self, text);
1299 self->encoding_start_of_stream = 0;
1300 }
1301 else
1302 b = PyObject_CallMethodObjArgs(self->encoder,
1303 _PyIO_str_encode, text, NULL);
1304 Py_DECREF(text);
1305 if (b == NULL)
1306 return NULL;
1307
1308 if (self->pending_bytes == NULL) {
1309 self->pending_bytes = PyList_New(0);
1310 if (self->pending_bytes == NULL) {
1311 Py_DECREF(b);
1312 return NULL;
1313 }
1314 self->pending_bytes_count = 0;
1315 }
1316 if (PyList_Append(self->pending_bytes, b) < 0) {
1317 Py_DECREF(b);
1318 return NULL;
1319 }
1320 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1321 Py_DECREF(b);
1322 if (self->pending_bytes_count > self->chunk_size || needflush) {
1323 if (_textiowrapper_writeflush(self) < 0)
1324 return NULL;
1325 }
1326
1327 if (needflush) {
1328 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1329 if (ret == NULL)
1330 return NULL;
1331 Py_DECREF(ret);
1332 }
1333
1334 textiowrapper_set_decoded_chars(self, NULL);
1335 Py_CLEAR(self->snapshot);
1336
1337 if (self->decoder) {
1338 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1339 if (ret == NULL)
1340 return NULL;
1341 Py_DECREF(ret);
1342 }
1343
1344 return PyLong_FromSsize_t(textlen);
1345 }
1346
1347 /* Steal a reference to chars and store it in the decoded_char buffer;
1348 */
1349 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1350 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1351 {
1352 Py_XSETREF(self->decoded_chars, chars);
1353 self->decoded_chars_used = 0;
1354 }
1355
1356 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1357 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1358 {
1359 PyObject *chars;
1360 Py_ssize_t avail;
1361
1362 if (self->decoded_chars == NULL)
1363 return PyUnicode_FromStringAndSize(NULL, 0);
1364
1365 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1366 - self->decoded_chars_used);
1367
1368 assert(avail >= 0);
1369
1370 if (n < 0 || n > avail)
1371 n = avail;
1372
1373 if (self->decoded_chars_used > 0 || n < avail) {
1374 chars = PyUnicode_FromUnicode(
1375 PyUnicode_AS_UNICODE(self->decoded_chars)
1376 + self->decoded_chars_used, n);
1377 if (chars == NULL)
1378 return NULL;
1379 }
1380 else {
1381 chars = self->decoded_chars;
1382 Py_INCREF(chars);
1383 }
1384
1385 self->decoded_chars_used += n;
1386 return chars;
1387 }
1388
1389 /* Read and decode the next chunk of data from the BufferedReader.
1390 */
1391 static int
textiowrapper_read_chunk(textio * self)1392 textiowrapper_read_chunk(textio *self)
1393 {
1394 PyObject *dec_buffer = NULL;
1395 PyObject *dec_flags = NULL;
1396 PyObject *input_chunk = NULL;
1397 PyObject *decoded_chars, *chunk_size;
1398 int eof;
1399
1400 /* The return value is True unless EOF was reached. The decoded string is
1401 * placed in self._decoded_chars (replacing its previous value). The
1402 * entire input chunk is sent to the decoder, though some of it may remain
1403 * buffered in the decoder, yet to be converted.
1404 */
1405
1406 if (self->decoder == NULL) {
1407 PyErr_SetString(PyExc_IOError, "not readable");
1408 return -1;
1409 }
1410
1411 if (self->telling) {
1412 /* To prepare for tell(), we need to snapshot a point in the file
1413 * where the decoder's input buffer is empty.
1414 */
1415
1416 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1417 _PyIO_str_getstate, NULL);
1418 if (state == NULL)
1419 return -1;
1420 /* Given this, we know there was a valid snapshot point
1421 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1422 */
1423 if (!PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags)) {
1424 Py_DECREF(state);
1425 return -1;
1426 }
1427 Py_INCREF(dec_buffer);
1428 Py_INCREF(dec_flags);
1429 Py_DECREF(state);
1430 }
1431
1432 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1433 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1434 if (chunk_size == NULL)
1435 goto fail;
1436 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1437 _PyIO_str_read1, chunk_size, NULL);
1438 Py_DECREF(chunk_size);
1439 if (input_chunk == NULL)
1440 goto fail;
1441 if (!PyBytes_Check(input_chunk)) {
1442 PyErr_Format(PyExc_TypeError,
1443 "underlying read1() should have returned a bytes object, "
1444 "not '%.200s'", Py_TYPE(input_chunk)->tp_name);
1445 goto fail;
1446 }
1447
1448 eof = (PyBytes_Size(input_chunk) == 0);
1449
1450 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1451 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1452 self->decoder, input_chunk, eof);
1453 }
1454 else {
1455 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1456 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1457 }
1458
1459 if (check_decoded(decoded_chars) < 0)
1460 goto fail;
1461 textiowrapper_set_decoded_chars(self, decoded_chars);
1462 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1463 eof = 0;
1464
1465 if (self->telling) {
1466 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1467 * next input to be decoded is dec_buffer + input_chunk.
1468 */
1469 PyObject *snapshot;
1470 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1471 if (next_input == NULL)
1472 goto fail;
1473 if (!PyBytes_Check(next_input)) {
1474 PyErr_Format(PyExc_TypeError,
1475 "decoder getstate() should have returned a bytes "
1476 "object, not '%.200s'",
1477 Py_TYPE(next_input)->tp_name);
1478 Py_DECREF(next_input);
1479 goto fail;
1480 }
1481 snapshot = Py_BuildValue("NN", dec_flags, next_input);
1482 if (snapshot == NULL) {
1483 dec_flags = NULL;
1484 goto fail;
1485 }
1486 Py_XSETREF(self->snapshot, snapshot);
1487 Py_DECREF(dec_buffer);
1488 }
1489 Py_DECREF(input_chunk);
1490
1491 return (eof == 0);
1492
1493 fail:
1494 Py_XDECREF(dec_buffer);
1495 Py_XDECREF(dec_flags);
1496 Py_XDECREF(input_chunk);
1497 return -1;
1498 }
1499
1500 static PyObject *
textiowrapper_read(textio * self,PyObject * args)1501 textiowrapper_read(textio *self, PyObject *args)
1502 {
1503 Py_ssize_t n = -1;
1504 PyObject *result = NULL, *chunks = NULL;
1505
1506 CHECK_ATTACHED(self);
1507
1508 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
1509 return NULL;
1510
1511 CHECK_CLOSED(self);
1512
1513 if (self->decoder == NULL) {
1514 PyErr_SetString(PyExc_IOError, "not readable");
1515 return NULL;
1516 }
1517
1518 if (_textiowrapper_writeflush(self) < 0)
1519 return NULL;
1520
1521 if (n < 0) {
1522 /* Read everything */
1523 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1524 PyObject *decoded, *final;
1525 if (bytes == NULL)
1526 goto fail;
1527 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1528 bytes, Py_True, NULL);
1529 Py_DECREF(bytes);
1530 if (check_decoded(decoded) < 0)
1531 goto fail;
1532
1533 result = textiowrapper_get_decoded_chars(self, -1);
1534
1535 if (result == NULL) {
1536 Py_DECREF(decoded);
1537 return NULL;
1538 }
1539
1540 final = PyUnicode_Concat(result, decoded);
1541 Py_DECREF(result);
1542 Py_DECREF(decoded);
1543 if (final == NULL)
1544 goto fail;
1545
1546 textiowrapper_set_decoded_chars(self, NULL);
1547 Py_CLEAR(self->snapshot);
1548 return final;
1549 }
1550 else {
1551 int res = 1;
1552 Py_ssize_t remaining = n;
1553
1554 result = textiowrapper_get_decoded_chars(self, n);
1555 if (result == NULL)
1556 goto fail;
1557 remaining -= PyUnicode_GET_SIZE(result);
1558
1559 /* Keep reading chunks until we have n characters to return */
1560 while (remaining > 0) {
1561 res = textiowrapper_read_chunk(self);
1562 if (res < 0) {
1563 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1564 when EINTR occurs so we needn't do it ourselves. */
1565 if (_PyIO_trap_eintr()) {
1566 continue;
1567 }
1568 goto fail;
1569 }
1570 if (res == 0) /* EOF */
1571 break;
1572 if (chunks == NULL) {
1573 chunks = PyList_New(0);
1574 if (chunks == NULL)
1575 goto fail;
1576 }
1577 if (PyList_Append(chunks, result) < 0)
1578 goto fail;
1579 Py_DECREF(result);
1580 result = textiowrapper_get_decoded_chars(self, remaining);
1581 if (result == NULL)
1582 goto fail;
1583 remaining -= PyUnicode_GET_SIZE(result);
1584 }
1585 if (chunks != NULL) {
1586 if (result != NULL && PyList_Append(chunks, result) < 0)
1587 goto fail;
1588 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1589 if (result == NULL)
1590 goto fail;
1591 Py_CLEAR(chunks);
1592 }
1593 return result;
1594 }
1595 fail:
1596 Py_XDECREF(result);
1597 Py_XDECREF(chunks);
1598 return NULL;
1599 }
1600
1601
1602 /* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1603 that is to the NUL character. Otherwise the function will produce
1604 incorrect results. */
1605 static Py_UNICODE *
find_control_char(Py_UNICODE * start,Py_UNICODE * end,Py_UNICODE ch)1606 find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1607 {
1608 Py_UNICODE *s = start;
1609 for (;;) {
1610 while (*s > ch)
1611 s++;
1612 if (*s == ch)
1613 return s;
1614 if (s == end)
1615 return NULL;
1616 s++;
1617 }
1618 }
1619
1620 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,Py_UNICODE * start,Py_UNICODE * end,Py_ssize_t * consumed)1621 _PyIO_find_line_ending(
1622 int translated, int universal, PyObject *readnl,
1623 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1624 {
1625 Py_ssize_t len = end - start;
1626
1627 if (translated) {
1628 /* Newlines are already translated, only search for \n */
1629 Py_UNICODE *pos = find_control_char(start, end, '\n');
1630 if (pos != NULL)
1631 return pos - start + 1;
1632 else {
1633 *consumed = len;
1634 return -1;
1635 }
1636 }
1637 else if (universal) {
1638 /* Universal newline search. Find any of \r, \r\n, \n
1639 * The decoder ensures that \r\n are not split in two pieces
1640 */
1641 Py_UNICODE *s = start;
1642 for (;;) {
1643 Py_UNICODE ch;
1644 /* Fast path for non-control chars. The loop always ends
1645 since the Py_UNICODE storage is NUL-terminated. */
1646 while (*s > '\r')
1647 s++;
1648 if (s >= end) {
1649 *consumed = len;
1650 return -1;
1651 }
1652 ch = *s++;
1653 if (ch == '\n')
1654 return s - start;
1655 if (ch == '\r') {
1656 if (*s == '\n')
1657 return s - start + 1;
1658 else
1659 return s - start;
1660 }
1661 }
1662 }
1663 else {
1664 /* Non-universal mode. */
1665 Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1666 unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1667 if (readnl_len == 1) {
1668 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1669 if (pos != NULL)
1670 return pos - start + 1;
1671 *consumed = len;
1672 return -1;
1673 }
1674 else {
1675 Py_UNICODE *s = start;
1676 Py_UNICODE *e = end - readnl_len + 1;
1677 Py_UNICODE *pos;
1678 if (e < s)
1679 e = s;
1680 while (s < e) {
1681 Py_ssize_t i;
1682 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1683 if (pos == NULL || pos >= e)
1684 break;
1685 for (i = 1; i < readnl_len; i++) {
1686 if (pos[i] != nl[i])
1687 break;
1688 }
1689 if (i == readnl_len)
1690 return pos - start + readnl_len;
1691 s = pos + 1;
1692 }
1693 pos = find_control_char(e, end, nl[0]);
1694 if (pos == NULL)
1695 *consumed = len;
1696 else
1697 *consumed = pos - start;
1698 return -1;
1699 }
1700 }
1701 }
1702
1703 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)1704 _textiowrapper_readline(textio *self, Py_ssize_t limit)
1705 {
1706 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1707 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1708 int res;
1709
1710 CHECK_CLOSED(self);
1711
1712 if (_textiowrapper_writeflush(self) < 0)
1713 return NULL;
1714
1715 chunked = 0;
1716
1717 while (1) {
1718 Py_UNICODE *ptr;
1719 Py_ssize_t line_len;
1720 Py_ssize_t consumed = 0;
1721
1722 /* First, get some data if necessary */
1723 res = 1;
1724 while (!self->decoded_chars ||
1725 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1726 res = textiowrapper_read_chunk(self);
1727 if (res < 0) {
1728 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1729 when EINTR occurs so we needn't do it ourselves. */
1730 if (_PyIO_trap_eintr()) {
1731 continue;
1732 }
1733 goto error;
1734 }
1735 if (res == 0)
1736 break;
1737 }
1738 if (res == 0) {
1739 /* end of file */
1740 textiowrapper_set_decoded_chars(self, NULL);
1741 Py_CLEAR(self->snapshot);
1742 start = endpos = offset_to_buffer = 0;
1743 break;
1744 }
1745
1746 if (remaining == NULL) {
1747 line = self->decoded_chars;
1748 start = self->decoded_chars_used;
1749 offset_to_buffer = 0;
1750 Py_INCREF(line);
1751 }
1752 else {
1753 assert(self->decoded_chars_used == 0);
1754 line = PyUnicode_Concat(remaining, self->decoded_chars);
1755 start = 0;
1756 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1757 Py_CLEAR(remaining);
1758 if (line == NULL)
1759 goto error;
1760 }
1761
1762 ptr = PyUnicode_AS_UNICODE(line);
1763 line_len = PyUnicode_GET_SIZE(line);
1764
1765 endpos = _PyIO_find_line_ending(
1766 self->readtranslate, self->readuniversal, self->readnl,
1767 ptr + start, ptr + line_len, &consumed);
1768 if (endpos >= 0) {
1769 endpos += start;
1770 if (limit >= 0 && (endpos - start) + chunked >= limit)
1771 endpos = start + limit - chunked;
1772 break;
1773 }
1774
1775 /* We can put aside up to `endpos` */
1776 endpos = consumed + start;
1777 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1778 /* Didn't find line ending, but reached length limit */
1779 endpos = start + limit - chunked;
1780 break;
1781 }
1782
1783 if (endpos > start) {
1784 /* No line ending seen yet - put aside current data */
1785 PyObject *s;
1786 if (chunks == NULL) {
1787 chunks = PyList_New(0);
1788 if (chunks == NULL)
1789 goto error;
1790 }
1791 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1792 if (s == NULL)
1793 goto error;
1794 if (PyList_Append(chunks, s) < 0) {
1795 Py_DECREF(s);
1796 goto error;
1797 }
1798 chunked += PyUnicode_GET_SIZE(s);
1799 Py_DECREF(s);
1800 }
1801 /* There may be some remaining bytes we'll have to prepend to the
1802 next chunk of data */
1803 if (endpos < line_len) {
1804 remaining = PyUnicode_FromUnicode(
1805 ptr + endpos, line_len - endpos);
1806 if (remaining == NULL)
1807 goto error;
1808 }
1809 Py_CLEAR(line);
1810 /* We have consumed the buffer */
1811 textiowrapper_set_decoded_chars(self, NULL);
1812 }
1813
1814 if (line != NULL) {
1815 /* Our line ends in the current buffer */
1816 self->decoded_chars_used = endpos - offset_to_buffer;
1817 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1818 if (start == 0 && Py_REFCNT(line) == 1) {
1819 if (PyUnicode_Resize(&line, endpos) < 0)
1820 goto error;
1821 }
1822 else {
1823 PyObject *s = PyUnicode_FromUnicode(
1824 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1825 Py_CLEAR(line);
1826 if (s == NULL)
1827 goto error;
1828 line = s;
1829 }
1830 }
1831 }
1832 if (remaining != NULL) {
1833 if (chunks == NULL) {
1834 chunks = PyList_New(0);
1835 if (chunks == NULL)
1836 goto error;
1837 }
1838 if (PyList_Append(chunks, remaining) < 0)
1839 goto error;
1840 Py_CLEAR(remaining);
1841 }
1842 if (chunks != NULL) {
1843 if (line != NULL && PyList_Append(chunks, line) < 0)
1844 goto error;
1845 Py_XSETREF(line, PyUnicode_Join(_PyIO_empty_str, chunks));
1846 if (line == NULL)
1847 goto error;
1848 Py_DECREF(chunks);
1849 }
1850 if (line == NULL)
1851 line = PyUnicode_FromStringAndSize(NULL, 0);
1852
1853 return line;
1854
1855 error:
1856 Py_XDECREF(chunks);
1857 Py_XDECREF(remaining);
1858 Py_XDECREF(line);
1859 return NULL;
1860 }
1861
1862 static PyObject *
textiowrapper_readline(textio * self,PyObject * args)1863 textiowrapper_readline(textio *self, PyObject *args)
1864 {
1865 PyObject *limitobj = NULL;
1866 Py_ssize_t limit = -1;
1867
1868 CHECK_ATTACHED(self);
1869 if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1870 return NULL;
1871 }
1872 if (limitobj) {
1873 if (!PyNumber_Check(limitobj)) {
1874 PyErr_Format(PyExc_TypeError,
1875 "integer argument expected, got '%.200s'",
1876 Py_TYPE(limitobj)->tp_name);
1877 return NULL;
1878 }
1879 limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1880 if (limit == -1 && PyErr_Occurred())
1881 return NULL;
1882 }
1883 return _textiowrapper_readline(self, limit);
1884 }
1885
1886 /* Seek and Tell */
1887
1888 typedef struct {
1889 Py_off_t start_pos;
1890 int dec_flags;
1891 int bytes_to_feed;
1892 int chars_to_skip;
1893 char need_eof;
1894 } cookie_type;
1895
1896 /*
1897 To speed up cookie packing/unpacking, we store the fields in a temporary
1898 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1899 The following macros define at which offsets in the intermediary byte
1900 string the various CookieStruct fields will be stored.
1901 */
1902
1903 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1904
1905 #if defined(WORDS_BIGENDIAN)
1906
1907 # define IS_LITTLE_ENDIAN 0
1908
1909 /* We want the least significant byte of start_pos to also be the least
1910 significant byte of the cookie, which means that in big-endian mode we
1911 must copy the fields in reverse order. */
1912
1913 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1914 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1915 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1916 # define OFF_CHARS_TO_SKIP (sizeof(char))
1917 # define OFF_NEED_EOF 0
1918
1919 #else
1920
1921 # define IS_LITTLE_ENDIAN 1
1922
1923 /* Little-endian mode: the least significant byte of start_pos will
1924 naturally end up the least significant byte of the cookie. */
1925
1926 # define OFF_START_POS 0
1927 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
1928 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1929 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1930 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1931
1932 #endif
1933
1934 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)1935 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1936 {
1937 unsigned char buffer[COOKIE_BUF_LEN];
1938 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1939 if (cookieLong == NULL)
1940 return -1;
1941
1942 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1943 IS_LITTLE_ENDIAN, 0) < 0) {
1944 Py_DECREF(cookieLong);
1945 return -1;
1946 }
1947 Py_DECREF(cookieLong);
1948
1949 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1950 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1951 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1952 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1953 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1954
1955 return 0;
1956 }
1957
1958 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)1959 textiowrapper_build_cookie(cookie_type *cookie)
1960 {
1961 unsigned char buffer[COOKIE_BUF_LEN];
1962
1963 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1964 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1965 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1966 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1967 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1968
1969 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1970 }
1971 #undef IS_LITTLE_ENDIAN
1972
1973 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)1974 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1975 {
1976 PyObject *res;
1977 /* When seeking to the start of the stream, we call decoder.reset()
1978 rather than decoder.getstate().
1979 This is for a few decoders such as utf-16 for which the state value
1980 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1981 utf-16, that we are expecting a BOM).
1982 */
1983 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1984 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1985 else
1986 res = PyObject_CallMethod(self->decoder, "setstate",
1987 "((si))", "", cookie->dec_flags);
1988 if (res == NULL)
1989 return -1;
1990 Py_DECREF(res);
1991 return 0;
1992 }
1993
1994 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)1995 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1996 {
1997 PyObject *res;
1998 /* Same as _textiowrapper_decoder_setstate() above. */
1999 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2000 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2001 self->encoding_start_of_stream = 1;
2002 }
2003 else {
2004 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2005 _PyIO_zero, NULL);
2006 self->encoding_start_of_stream = 0;
2007 }
2008 if (res == NULL)
2009 return -1;
2010 Py_DECREF(res);
2011 return 0;
2012 }
2013
2014 static PyObject *
textiowrapper_seek(textio * self,PyObject * args)2015 textiowrapper_seek(textio *self, PyObject *args)
2016 {
2017 PyObject *cookieObj, *posobj;
2018 cookie_type cookie;
2019 int whence = 0;
2020 PyObject *res;
2021 int cmp;
2022 PyObject *snapshot;
2023
2024 CHECK_ATTACHED(self);
2025
2026 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2027 return NULL;
2028 CHECK_CLOSED(self);
2029
2030 Py_INCREF(cookieObj);
2031
2032 if (!self->seekable) {
2033 PyErr_SetString(PyExc_IOError,
2034 "underlying stream is not seekable");
2035 goto fail;
2036 }
2037
2038 if (whence == 1) {
2039 /* seek relative to current position */
2040 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2041 if (cmp < 0)
2042 goto fail;
2043
2044 if (cmp == 0) {
2045 PyErr_SetString(PyExc_IOError,
2046 "can't do nonzero cur-relative seeks");
2047 goto fail;
2048 }
2049
2050 /* Seeking to the current position should attempt to
2051 * sync the underlying buffer with the current position.
2052 */
2053 Py_DECREF(cookieObj);
2054 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2055 if (cookieObj == NULL)
2056 goto fail;
2057 }
2058 else if (whence == 2) {
2059 /* seek relative to end of file */
2060
2061 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2062 if (cmp < 0)
2063 goto fail;
2064
2065 if (cmp == 0) {
2066 PyErr_SetString(PyExc_IOError,
2067 "can't do nonzero end-relative seeks");
2068 goto fail;
2069 }
2070
2071 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2072 if (res == NULL)
2073 goto fail;
2074 Py_DECREF(res);
2075
2076 textiowrapper_set_decoded_chars(self, NULL);
2077 Py_CLEAR(self->snapshot);
2078 if (self->decoder) {
2079 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2080 if (res == NULL)
2081 goto fail;
2082 Py_DECREF(res);
2083 }
2084
2085 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2086 Py_XDECREF(cookieObj);
2087 return res;
2088 }
2089 else if (whence != 0) {
2090 PyErr_Format(PyExc_ValueError,
2091 "invalid whence (%d, should be 0, 1 or 2)", whence);
2092 goto fail;
2093 }
2094
2095 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2096 if (cmp < 0)
2097 goto fail;
2098
2099 if (cmp == 1) {
2100 PyObject *repr = PyObject_Repr(cookieObj);
2101 if (repr != NULL) {
2102 PyErr_Format(PyExc_ValueError,
2103 "negative seek position %s",
2104 PyString_AS_STRING(repr));
2105 Py_DECREF(repr);
2106 }
2107 goto fail;
2108 }
2109
2110 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2111 if (res == NULL)
2112 goto fail;
2113 Py_DECREF(res);
2114
2115 /* The strategy of seek() is to go back to the safe start point
2116 * and replay the effect of read(chars_to_skip) from there.
2117 */
2118 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2119 goto fail;
2120
2121 /* Seek back to the safe start point. */
2122 posobj = PyLong_FromOff_t(cookie.start_pos);
2123 if (posobj == NULL)
2124 goto fail;
2125 res = PyObject_CallMethodObjArgs(self->buffer,
2126 _PyIO_str_seek, posobj, NULL);
2127 Py_DECREF(posobj);
2128 if (res == NULL)
2129 goto fail;
2130 Py_DECREF(res);
2131
2132 textiowrapper_set_decoded_chars(self, NULL);
2133 Py_CLEAR(self->snapshot);
2134
2135 /* Restore the decoder to its state from the safe start point. */
2136 if (self->decoder) {
2137 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2138 goto fail;
2139 }
2140
2141 if (cookie.chars_to_skip) {
2142 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2143 PyObject *input_chunk = PyObject_CallMethod(
2144 self->buffer, "read", "i", cookie.bytes_to_feed);
2145 PyObject *decoded;
2146
2147 if (input_chunk == NULL)
2148 goto fail;
2149
2150 if (!PyBytes_Check(input_chunk)) {
2151 PyErr_Format(PyExc_TypeError,
2152 "underlying read() should have returned a bytes "
2153 "object, not '%.200s'",
2154 Py_TYPE(input_chunk)->tp_name);
2155 Py_DECREF(input_chunk);
2156 goto fail;
2157 }
2158
2159 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2160 if (snapshot == NULL) {
2161 goto fail;
2162 }
2163 Py_XSETREF(self->snapshot, snapshot);
2164
2165 decoded = PyObject_CallMethod(self->decoder, "decode",
2166 "Oi", input_chunk, (int)cookie.need_eof);
2167
2168 if (check_decoded(decoded) < 0)
2169 goto fail;
2170
2171 textiowrapper_set_decoded_chars(self, decoded);
2172
2173 /* Skip chars_to_skip of the decoded characters. */
2174 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2175 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2176 goto fail;
2177 }
2178 self->decoded_chars_used = cookie.chars_to_skip;
2179 }
2180 else {
2181 snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2182 if (snapshot == NULL)
2183 goto fail;
2184 Py_XSETREF(self->snapshot, snapshot);
2185 }
2186
2187 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2188 if (self->encoder) {
2189 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2190 goto fail;
2191 }
2192 return cookieObj;
2193 fail:
2194 Py_XDECREF(cookieObj);
2195 return NULL;
2196
2197 }
2198
2199 static PyObject *
textiowrapper_tell(textio * self,PyObject * args)2200 textiowrapper_tell(textio *self, PyObject *args)
2201 {
2202 PyObject *res;
2203 PyObject *posobj = NULL;
2204 cookie_type cookie = {0,0,0,0,0};
2205 PyObject *next_input;
2206 Py_ssize_t chars_to_skip, chars_decoded;
2207 PyObject *saved_state = NULL;
2208 char *input, *input_end;
2209
2210 CHECK_ATTACHED(self);
2211 CHECK_CLOSED(self);
2212
2213 if (!self->seekable) {
2214 PyErr_SetString(PyExc_IOError,
2215 "underlying stream is not seekable");
2216 goto fail;
2217 }
2218 if (!self->telling) {
2219 PyErr_SetString(PyExc_IOError,
2220 "telling position disabled by next() call");
2221 goto fail;
2222 }
2223
2224 if (_textiowrapper_writeflush(self) < 0)
2225 return NULL;
2226 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2227 if (res == NULL)
2228 goto fail;
2229 Py_DECREF(res);
2230
2231 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2232 if (posobj == NULL)
2233 goto fail;
2234
2235 if (self->decoder == NULL || self->snapshot == NULL) {
2236 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2237 return posobj;
2238 }
2239
2240 #if defined(HAVE_LARGEFILE_SUPPORT)
2241 cookie.start_pos = PyLong_AsLongLong(posobj);
2242 #else
2243 cookie.start_pos = PyLong_AsLong(posobj);
2244 #endif
2245 if (PyErr_Occurred())
2246 goto fail;
2247
2248 /* Skip backward to the snapshot point (see _read_chunk). */
2249 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2250 goto fail;
2251
2252 assert (PyBytes_Check(next_input));
2253
2254 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2255
2256 /* How many decoded characters have been used up since the snapshot? */
2257 if (self->decoded_chars_used == 0) {
2258 /* We haven't moved from the snapshot point. */
2259 Py_DECREF(posobj);
2260 return textiowrapper_build_cookie(&cookie);
2261 }
2262
2263 chars_to_skip = self->decoded_chars_used;
2264
2265 /* Starting from the snapshot position, we will walk the decoder
2266 * forward until it gives us enough decoded characters.
2267 */
2268 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2269 _PyIO_str_getstate, NULL);
2270 if (saved_state == NULL)
2271 goto fail;
2272
2273 /* Note our initial start point. */
2274 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2275 goto fail;
2276
2277 /* Feed the decoder one byte at a time. As we go, note the
2278 * nearest "safe start point" before the current location
2279 * (a point where the decoder has nothing buffered, so seek()
2280 * can safely start from there and advance to this location).
2281 */
2282 chars_decoded = 0;
2283 input = PyBytes_AS_STRING(next_input);
2284 input_end = input + PyBytes_GET_SIZE(next_input);
2285 while (input < input_end) {
2286 PyObject *state;
2287 char *dec_buffer;
2288 Py_ssize_t dec_buffer_len;
2289 int dec_flags;
2290
2291 PyObject *decoded = PyObject_CallMethod(
2292 self->decoder, "decode", "s#", input, (Py_ssize_t)1);
2293 if (check_decoded(decoded) < 0)
2294 goto fail;
2295 chars_decoded += PyUnicode_GET_SIZE(decoded);
2296 Py_DECREF(decoded);
2297
2298 cookie.bytes_to_feed += 1;
2299
2300 state = PyObject_CallMethodObjArgs(self->decoder,
2301 _PyIO_str_getstate, NULL);
2302 if (state == NULL)
2303 goto fail;
2304 if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2305 Py_DECREF(state);
2306 goto fail;
2307 }
2308 Py_DECREF(state);
2309
2310 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2311 /* Decoder buffer is empty, so this is a safe start point. */
2312 cookie.start_pos += cookie.bytes_to_feed;
2313 chars_to_skip -= chars_decoded;
2314 cookie.dec_flags = dec_flags;
2315 cookie.bytes_to_feed = 0;
2316 chars_decoded = 0;
2317 }
2318 if (chars_decoded >= chars_to_skip)
2319 break;
2320 input++;
2321 }
2322 if (input == input_end) {
2323 /* We didn't get enough decoded data; signal EOF to get more. */
2324 PyObject *decoded = PyObject_CallMethod(
2325 self->decoder, "decode", "si", "", /* final = */ 1);
2326 if (check_decoded(decoded) < 0)
2327 goto fail;
2328 chars_decoded += PyUnicode_GET_SIZE(decoded);
2329 Py_DECREF(decoded);
2330 cookie.need_eof = 1;
2331
2332 if (chars_decoded < chars_to_skip) {
2333 PyErr_SetString(PyExc_IOError,
2334 "can't reconstruct logical file position");
2335 goto fail;
2336 }
2337 }
2338
2339 /* finally */
2340 Py_XDECREF(posobj);
2341 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2342 Py_DECREF(saved_state);
2343 if (res == NULL)
2344 return NULL;
2345 Py_DECREF(res);
2346
2347 /* The returned cookie corresponds to the last safe start point. */
2348 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2349 return textiowrapper_build_cookie(&cookie);
2350
2351 fail:
2352 Py_XDECREF(posobj);
2353 if (saved_state) {
2354 PyObject *type, *value, *traceback;
2355 PyErr_Fetch(&type, &value, &traceback);
2356
2357 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2358 _PyErr_ReplaceException(type, value, traceback);
2359 Py_DECREF(saved_state);
2360 Py_XDECREF(res);
2361 }
2362 return NULL;
2363 }
2364
2365 static PyObject *
textiowrapper_truncate(textio * self,PyObject * args)2366 textiowrapper_truncate(textio *self, PyObject *args)
2367 {
2368 PyObject *pos = Py_None;
2369 PyObject *res;
2370
2371 CHECK_ATTACHED(self)
2372 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2373 return NULL;
2374 }
2375
2376 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2377 if (res == NULL)
2378 return NULL;
2379 Py_DECREF(res);
2380
2381 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2382 }
2383
2384 static PyObject *
textiowrapper_repr(textio * self)2385 textiowrapper_repr(textio *self)
2386 {
2387 PyObject *nameobj, *res;
2388 PyObject *namerepr = NULL, *encrepr = NULL;
2389
2390 CHECK_INITIALIZED(self);
2391
2392 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2393 if (nameobj == NULL) {
2394 if (PyErr_ExceptionMatches(PyExc_Exception))
2395 PyErr_Clear();
2396 else
2397 goto error;
2398 encrepr = PyObject_Repr(self->encoding);
2399 res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2400 PyString_AS_STRING(encrepr));
2401 }
2402 else {
2403 encrepr = PyObject_Repr(self->encoding);
2404 namerepr = PyObject_Repr(nameobj);
2405 res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2406 PyString_AS_STRING(namerepr),
2407 PyString_AS_STRING(encrepr));
2408 Py_DECREF(nameobj);
2409 }
2410 Py_XDECREF(namerepr);
2411 Py_XDECREF(encrepr);
2412 return res;
2413
2414 error:
2415 Py_XDECREF(namerepr);
2416 Py_XDECREF(encrepr);
2417 return NULL;
2418 }
2419
2420
2421 /* Inquiries */
2422
2423 static PyObject *
textiowrapper_fileno(textio * self,PyObject * args)2424 textiowrapper_fileno(textio *self, PyObject *args)
2425 {
2426 CHECK_ATTACHED(self);
2427 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2428 }
2429
2430 static PyObject *
textiowrapper_seekable(textio * self,PyObject * args)2431 textiowrapper_seekable(textio *self, PyObject *args)
2432 {
2433 CHECK_ATTACHED(self);
2434 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2435 }
2436
2437 static PyObject *
textiowrapper_readable(textio * self,PyObject * args)2438 textiowrapper_readable(textio *self, PyObject *args)
2439 {
2440 CHECK_ATTACHED(self);
2441 return PyObject_CallMethod(self->buffer, "readable", NULL);
2442 }
2443
2444 static PyObject *
textiowrapper_writable(textio * self,PyObject * args)2445 textiowrapper_writable(textio *self, PyObject *args)
2446 {
2447 CHECK_ATTACHED(self);
2448 return PyObject_CallMethod(self->buffer, "writable", NULL);
2449 }
2450
2451 static PyObject *
textiowrapper_isatty(textio * self,PyObject * args)2452 textiowrapper_isatty(textio *self, PyObject *args)
2453 {
2454 CHECK_ATTACHED(self);
2455 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2456 }
2457
2458 static PyObject *
textiowrapper_flush(textio * self,PyObject * args)2459 textiowrapper_flush(textio *self, PyObject *args)
2460 {
2461 CHECK_ATTACHED(self);
2462 CHECK_CLOSED(self);
2463 self->telling = self->seekable;
2464 if (_textiowrapper_writeflush(self) < 0)
2465 return NULL;
2466 return PyObject_CallMethod(self->buffer, "flush", NULL);
2467 }
2468
2469 static PyObject *
textiowrapper_close(textio * self,PyObject * args)2470 textiowrapper_close(textio *self, PyObject *args)
2471 {
2472 PyObject *res;
2473 int r;
2474 CHECK_ATTACHED(self);
2475
2476 res = textiowrapper_closed_get(self, NULL);
2477 if (res == NULL)
2478 return NULL;
2479 r = PyObject_IsTrue(res);
2480 Py_DECREF(res);
2481 if (r < 0)
2482 return NULL;
2483
2484 if (r > 0) {
2485 Py_RETURN_NONE; /* stream already closed */
2486 }
2487 else {
2488 PyObject *exc = NULL, *val, *tb;
2489 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2490 if (res == NULL)
2491 PyErr_Fetch(&exc, &val, &tb);
2492 else
2493 Py_DECREF(res);
2494
2495 res = PyObject_CallMethod(self->buffer, "close", NULL);
2496 if (exc != NULL) {
2497 _PyErr_ReplaceException(exc, val, tb);
2498 Py_CLEAR(res);
2499 }
2500 return res;
2501 }
2502 }
2503
2504 static PyObject *
textiowrapper_iternext(textio * self)2505 textiowrapper_iternext(textio *self)
2506 {
2507 PyObject *line;
2508
2509 CHECK_ATTACHED(self);
2510
2511 self->telling = 0;
2512 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2513 /* Skip method call overhead for speed */
2514 line = _textiowrapper_readline(self, -1);
2515 }
2516 else {
2517 line = PyObject_CallMethodObjArgs((PyObject *)self,
2518 _PyIO_str_readline, NULL);
2519 if (line && !PyUnicode_Check(line)) {
2520 PyErr_Format(PyExc_IOError,
2521 "readline() should have returned an str object, "
2522 "not '%.200s'", Py_TYPE(line)->tp_name);
2523 Py_DECREF(line);
2524 return NULL;
2525 }
2526 }
2527
2528 if (line == NULL)
2529 return NULL;
2530
2531 if (PyUnicode_GET_SIZE(line) == 0) {
2532 /* Reached EOF or would have blocked */
2533 Py_DECREF(line);
2534 Py_CLEAR(self->snapshot);
2535 self->telling = self->seekable;
2536 return NULL;
2537 }
2538
2539 return line;
2540 }
2541
2542 static PyObject *
textiowrapper_name_get(textio * self,void * context)2543 textiowrapper_name_get(textio *self, void *context)
2544 {
2545 CHECK_ATTACHED(self);
2546 return PyObject_GetAttrString(self->buffer, "name");
2547 }
2548
2549 static PyObject *
textiowrapper_closed_get(textio * self,void * context)2550 textiowrapper_closed_get(textio *self, void *context)
2551 {
2552 CHECK_ATTACHED(self);
2553 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2554 }
2555
2556 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)2557 textiowrapper_newlines_get(textio *self, void *context)
2558 {
2559 PyObject *res;
2560 CHECK_ATTACHED(self);
2561 if (self->decoder == NULL)
2562 Py_RETURN_NONE;
2563 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2564 if (res == NULL) {
2565 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2566 PyErr_Clear();
2567 Py_RETURN_NONE;
2568 }
2569 else {
2570 return NULL;
2571 }
2572 }
2573 return res;
2574 }
2575
2576 static PyObject *
textiowrapper_errors_get(textio * self,void * context)2577 textiowrapper_errors_get(textio *self, void *context)
2578 {
2579 CHECK_INITIALIZED(self);
2580 Py_INCREF(self->errors);
2581 return self->errors;
2582 }
2583
2584 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)2585 textiowrapper_chunk_size_get(textio *self, void *context)
2586 {
2587 CHECK_ATTACHED(self);
2588 return PyLong_FromSsize_t(self->chunk_size);
2589 }
2590
2591 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)2592 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2593 {
2594 Py_ssize_t n;
2595 CHECK_ATTACHED_INT(self);
2596 if (arg == NULL) {
2597 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
2598 return -1;
2599 }
2600 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2601 if (n == -1 && PyErr_Occurred())
2602 return -1;
2603 if (n <= 0) {
2604 PyErr_SetString(PyExc_ValueError,
2605 "a strictly positive integer is required");
2606 return -1;
2607 }
2608 self->chunk_size = n;
2609 return 0;
2610 }
2611
2612 static PyMethodDef textiowrapper_methods[] = {
2613 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2614 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2615 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2616 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2617 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2618 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2619
2620 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2621 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2622 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2623 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2624 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2625
2626 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2627 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2628 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2629 {NULL, NULL}
2630 };
2631
2632 static PyMemberDef textiowrapper_members[] = {
2633 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2634 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2635 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2636 {NULL}
2637 };
2638
2639 static PyGetSetDef textiowrapper_getset[] = {
2640 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2641 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2642 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2643 */
2644 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2645 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2646 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2647 (setter)textiowrapper_chunk_size_set, NULL},
2648 {NULL}
2649 };
2650
2651 PyTypeObject PyTextIOWrapper_Type = {
2652 PyVarObject_HEAD_INIT(NULL, 0)
2653 "_io.TextIOWrapper", /*tp_name*/
2654 sizeof(textio), /*tp_basicsize*/
2655 0, /*tp_itemsize*/
2656 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2657 0, /*tp_print*/
2658 0, /*tp_getattr*/
2659 0, /*tps_etattr*/
2660 0, /*tp_compare */
2661 (reprfunc)textiowrapper_repr,/*tp_repr*/
2662 0, /*tp_as_number*/
2663 0, /*tp_as_sequence*/
2664 0, /*tp_as_mapping*/
2665 0, /*tp_hash */
2666 0, /*tp_call*/
2667 0, /*tp_str*/
2668 0, /*tp_getattro*/
2669 0, /*tp_setattro*/
2670 0, /*tp_as_buffer*/
2671 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2672 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2673 textiowrapper_doc, /* tp_doc */
2674 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2675 (inquiry)textiowrapper_clear, /* tp_clear */
2676 0, /* tp_richcompare */
2677 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2678 0, /* tp_iter */
2679 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2680 textiowrapper_methods, /* tp_methods */
2681 textiowrapper_members, /* tp_members */
2682 textiowrapper_getset, /* tp_getset */
2683 0, /* tp_base */
2684 0, /* tp_dict */
2685 0, /* tp_descr_get */
2686 0, /* tp_descr_set */
2687 offsetof(textio, dict), /*tp_dictoffset*/
2688 (initproc)textiowrapper_init, /* tp_init */
2689 0, /* tp_alloc */
2690 PyType_GenericNew, /* tp_new */
2691 };
2692