1 /*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "pycore_interp.h" // PyInterpreterState.fs_codec
12 #include "pycore_object.h"
13 #include "pycore_pystate.h" // _PyInterpreterState_GET()
14 #include "structmember.h" // PyMemberDef
15 #include "_iomodule.h"
16
17 /*[clinic input]
18 module _io
19 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
20 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
21 [clinic start generated code]*/
22 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
23
24 _Py_IDENTIFIER(close);
25 _Py_IDENTIFIER(_dealloc_warn);
26 _Py_IDENTIFIER(decode);
27 _Py_IDENTIFIER(fileno);
28 _Py_IDENTIFIER(flush);
29 _Py_IDENTIFIER(getpreferredencoding);
30 _Py_IDENTIFIER(isatty);
31 _Py_IDENTIFIER(mode);
32 _Py_IDENTIFIER(name);
33 _Py_IDENTIFIER(raw);
34 _Py_IDENTIFIER(read);
35 _Py_IDENTIFIER(readable);
36 _Py_IDENTIFIER(replace);
37 _Py_IDENTIFIER(reset);
38 _Py_IDENTIFIER(seek);
39 _Py_IDENTIFIER(seekable);
40 _Py_IDENTIFIER(setstate);
41 _Py_IDENTIFIER(strict);
42 _Py_IDENTIFIER(tell);
43 _Py_IDENTIFIER(writable);
44
45 /* TextIOBase */
46
47 PyDoc_STRVAR(textiobase_doc,
48 "Base class for text I/O.\n"
49 "\n"
50 "This class provides a character and line based interface to stream\n"
51 "I/O. There is no readinto method because Python's character strings\n"
52 "are immutable. There is no public constructor.\n"
53 );
54
55 static PyObject *
_unsupported(const char * message)56 _unsupported(const char *message)
57 {
58 _PyIO_State *state = IO_STATE();
59 if (state != NULL)
60 PyErr_SetString(state->unsupported_operation, message);
61 return NULL;
62 }
63
64 PyDoc_STRVAR(textiobase_detach_doc,
65 "Separate the underlying buffer from the TextIOBase and return it.\n"
66 "\n"
67 "After the underlying buffer has been detached, the TextIO is in an\n"
68 "unusable state.\n"
69 );
70
71 static PyObject *
textiobase_detach(PyObject * self,PyObject * Py_UNUSED (ignored))72 textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
73 {
74 return _unsupported("detach");
75 }
76
77 PyDoc_STRVAR(textiobase_read_doc,
78 "Read at most n characters from stream.\n"
79 "\n"
80 "Read from underlying buffer until we have n characters or we hit EOF.\n"
81 "If n is negative or omitted, read until EOF.\n"
82 );
83
84 static PyObject *
textiobase_read(PyObject * self,PyObject * args)85 textiobase_read(PyObject *self, PyObject *args)
86 {
87 return _unsupported("read");
88 }
89
90 PyDoc_STRVAR(textiobase_readline_doc,
91 "Read until newline or EOF.\n"
92 "\n"
93 "Returns an empty string if EOF is hit immediately.\n"
94 );
95
96 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)97 textiobase_readline(PyObject *self, PyObject *args)
98 {
99 return _unsupported("readline");
100 }
101
102 PyDoc_STRVAR(textiobase_write_doc,
103 "Write string to stream.\n"
104 "Returns the number of characters written (which is always equal to\n"
105 "the length of the string).\n"
106 );
107
108 static PyObject *
textiobase_write(PyObject * self,PyObject * args)109 textiobase_write(PyObject *self, PyObject *args)
110 {
111 return _unsupported("write");
112 }
113
114 PyDoc_STRVAR(textiobase_encoding_doc,
115 "Encoding of the text stream.\n"
116 "\n"
117 "Subclasses should override.\n"
118 );
119
120 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)121 textiobase_encoding_get(PyObject *self, void *context)
122 {
123 Py_RETURN_NONE;
124 }
125
126 PyDoc_STRVAR(textiobase_newlines_doc,
127 "Line endings translated so far.\n"
128 "\n"
129 "Only line endings translated during reading are considered.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)135 textiobase_newlines_get(PyObject *self, void *context)
136 {
137 Py_RETURN_NONE;
138 }
139
140 PyDoc_STRVAR(textiobase_errors_doc,
141 "The error setting of the decoder or encoder.\n"
142 "\n"
143 "Subclasses should override.\n"
144 );
145
146 static PyObject *
textiobase_errors_get(PyObject * self,void * context)147 textiobase_errors_get(PyObject *self, void *context)
148 {
149 Py_RETURN_NONE;
150 }
151
152
153 static PyMethodDef textiobase_methods[] = {
154 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
155 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
156 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
157 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
158 {NULL, NULL}
159 };
160
161 static PyGetSetDef textiobase_getset[] = {
162 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
163 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
164 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
165 {NULL}
166 };
167
168 PyTypeObject PyTextIOBase_Type = {
169 PyVarObject_HEAD_INIT(NULL, 0)
170 "_io._TextIOBase", /*tp_name*/
171 0, /*tp_basicsize*/
172 0, /*tp_itemsize*/
173 0, /*tp_dealloc*/
174 0, /*tp_vectorcall_offset*/
175 0, /*tp_getattr*/
176 0, /*tp_setattr*/
177 0, /*tp_as_async*/
178 0, /*tp_repr*/
179 0, /*tp_as_number*/
180 0, /*tp_as_sequence*/
181 0, /*tp_as_mapping*/
182 0, /*tp_hash */
183 0, /*tp_call*/
184 0, /*tp_str*/
185 0, /*tp_getattro*/
186 0, /*tp_setattro*/
187 0, /*tp_as_buffer*/
188 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
189 textiobase_doc, /* tp_doc */
190 0, /* tp_traverse */
191 0, /* tp_clear */
192 0, /* tp_richcompare */
193 0, /* tp_weaklistoffset */
194 0, /* tp_iter */
195 0, /* tp_iternext */
196 textiobase_methods, /* tp_methods */
197 0, /* tp_members */
198 textiobase_getset, /* tp_getset */
199 &PyIOBase_Type, /* tp_base */
200 0, /* tp_dict */
201 0, /* tp_descr_get */
202 0, /* tp_descr_set */
203 0, /* tp_dictoffset */
204 0, /* tp_init */
205 0, /* tp_alloc */
206 0, /* tp_new */
207 0, /* tp_free */
208 0, /* tp_is_gc */
209 0, /* tp_bases */
210 0, /* tp_mro */
211 0, /* tp_cache */
212 0, /* tp_subclasses */
213 0, /* tp_weaklist */
214 0, /* tp_del */
215 0, /* tp_version_tag */
216 0, /* tp_finalize */
217 };
218
219
220 /* IncrementalNewlineDecoder */
221
222 typedef struct {
223 PyObject_HEAD
224 PyObject *decoder;
225 PyObject *errors;
226 unsigned int pendingcr: 1;
227 unsigned int translate: 1;
228 unsigned int seennl: 3;
229 } nldecoder_object;
230
231 /*[clinic input]
232 _io.IncrementalNewlineDecoder.__init__
233 decoder: object
234 translate: int
235 errors: object(c_default="NULL") = "strict"
236
237 Codec used when reading a file in universal newlines mode.
238
239 It wraps another incremental decoder, translating \r\n and \r into \n.
240 It also records the types of newlines encountered. When used with
241 translate=False, it ensures that the newline sequence is returned in
242 one piece. When used with decoder=None, it expects unicode strings as
243 decode input and translates newlines without first invoking an external
244 decoder.
245 [clinic start generated code]*/
246
247 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)248 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
249 PyObject *decoder, int translate,
250 PyObject *errors)
251 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
252 {
253 self->decoder = decoder;
254 Py_INCREF(decoder);
255
256 if (errors == NULL) {
257 self->errors = _PyUnicode_FromId(&PyId_strict);
258 if (self->errors == NULL)
259 return -1;
260 }
261 else {
262 self->errors = errors;
263 }
264 Py_INCREF(self->errors);
265
266 self->translate = translate ? 1 : 0;
267 self->seennl = 0;
268 self->pendingcr = 0;
269
270 return 0;
271 }
272
273 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)274 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
275 {
276 Py_CLEAR(self->decoder);
277 Py_CLEAR(self->errors);
278 Py_TYPE(self)->tp_free((PyObject *)self);
279 }
280
281 static int
check_decoded(PyObject * decoded)282 check_decoded(PyObject *decoded)
283 {
284 if (decoded == NULL)
285 return -1;
286 if (!PyUnicode_Check(decoded)) {
287 PyErr_Format(PyExc_TypeError,
288 "decoder should return a string result, not '%.200s'",
289 Py_TYPE(decoded)->tp_name);
290 Py_DECREF(decoded);
291 return -1;
292 }
293 if (PyUnicode_READY(decoded) < 0) {
294 Py_DECREF(decoded);
295 return -1;
296 }
297 return 0;
298 }
299
300 #define SEEN_CR 1
301 #define SEEN_LF 2
302 #define SEEN_CRLF 4
303 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
304
305 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)306 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
307 PyObject *input, int final)
308 {
309 PyObject *output;
310 Py_ssize_t output_len;
311 nldecoder_object *self = (nldecoder_object *) myself;
312
313 if (self->decoder == NULL) {
314 PyErr_SetString(PyExc_ValueError,
315 "IncrementalNewlineDecoder.__init__ not called");
316 return NULL;
317 }
318
319 /* decode input (with the eventual \r from a previous pass) */
320 if (self->decoder != Py_None) {
321 output = PyObject_CallMethodObjArgs(self->decoder,
322 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
323 }
324 else {
325 output = input;
326 Py_INCREF(output);
327 }
328
329 if (check_decoded(output) < 0)
330 return NULL;
331
332 output_len = PyUnicode_GET_LENGTH(output);
333 if (self->pendingcr && (final || output_len > 0)) {
334 /* Prefix output with CR */
335 int kind;
336 PyObject *modified;
337 char *out;
338
339 modified = PyUnicode_New(output_len + 1,
340 PyUnicode_MAX_CHAR_VALUE(output));
341 if (modified == NULL)
342 goto error;
343 kind = PyUnicode_KIND(modified);
344 out = PyUnicode_DATA(modified);
345 PyUnicode_WRITE(kind, out, 0, '\r');
346 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
347 Py_DECREF(output);
348 output = modified; /* output remains ready */
349 self->pendingcr = 0;
350 output_len++;
351 }
352
353 /* retain last \r even when not translating data:
354 * then readline() is sure to get \r\n in one pass
355 */
356 if (!final) {
357 if (output_len > 0
358 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
359 {
360 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
361 if (modified == NULL)
362 goto error;
363 Py_DECREF(output);
364 output = modified;
365 self->pendingcr = 1;
366 }
367 }
368
369 /* Record which newlines are read and do newline translation if desired,
370 all in one pass. */
371 {
372 const void *in_str;
373 Py_ssize_t len;
374 int seennl = self->seennl;
375 int only_lf = 0;
376 int kind;
377
378 in_str = PyUnicode_DATA(output);
379 len = PyUnicode_GET_LENGTH(output);
380 kind = PyUnicode_KIND(output);
381
382 if (len == 0)
383 return output;
384
385 /* If, up to now, newlines are consistently \n, do a quick check
386 for the \r *byte* with the libc's optimized memchr.
387 */
388 if (seennl == SEEN_LF || seennl == 0) {
389 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
390 }
391
392 if (only_lf) {
393 /* If not already seen, quick scan for a possible "\n" character.
394 (there's nothing else to be done, even when in translation mode)
395 */
396 if (seennl == 0 &&
397 memchr(in_str, '\n', kind * len) != NULL) {
398 if (kind == PyUnicode_1BYTE_KIND)
399 seennl |= SEEN_LF;
400 else {
401 Py_ssize_t i = 0;
402 for (;;) {
403 Py_UCS4 c;
404 /* Fast loop for non-control characters */
405 while (PyUnicode_READ(kind, in_str, i) > '\n')
406 i++;
407 c = PyUnicode_READ(kind, in_str, i++);
408 if (c == '\n') {
409 seennl |= SEEN_LF;
410 break;
411 }
412 if (i >= len)
413 break;
414 }
415 }
416 }
417 /* Finished: we have scanned for newlines, and none of them
418 need translating */
419 }
420 else if (!self->translate) {
421 Py_ssize_t i = 0;
422 /* We have already seen all newline types, no need to scan again */
423 if (seennl == SEEN_ALL)
424 goto endscan;
425 for (;;) {
426 Py_UCS4 c;
427 /* Fast loop for non-control characters */
428 while (PyUnicode_READ(kind, in_str, i) > '\r')
429 i++;
430 c = PyUnicode_READ(kind, in_str, i++);
431 if (c == '\n')
432 seennl |= SEEN_LF;
433 else if (c == '\r') {
434 if (PyUnicode_READ(kind, in_str, i) == '\n') {
435 seennl |= SEEN_CRLF;
436 i++;
437 }
438 else
439 seennl |= SEEN_CR;
440 }
441 if (i >= len)
442 break;
443 if (seennl == SEEN_ALL)
444 break;
445 }
446 endscan:
447 ;
448 }
449 else {
450 void *translated;
451 int kind = PyUnicode_KIND(output);
452 const void *in_str = PyUnicode_DATA(output);
453 Py_ssize_t in, out;
454 /* XXX: Previous in-place translation here is disabled as
455 resizing is not possible anymore */
456 /* We could try to optimize this so that we only do a copy
457 when there is something to translate. On the other hand,
458 we already know there is a \r byte, so chances are high
459 that something needs to be done. */
460 translated = PyMem_Malloc(kind * len);
461 if (translated == NULL) {
462 PyErr_NoMemory();
463 goto error;
464 }
465 in = out = 0;
466 for (;;) {
467 Py_UCS4 c;
468 /* Fast loop for non-control characters */
469 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
470 PyUnicode_WRITE(kind, translated, out++, c);
471 if (c == '\n') {
472 PyUnicode_WRITE(kind, translated, out++, c);
473 seennl |= SEEN_LF;
474 continue;
475 }
476 if (c == '\r') {
477 if (PyUnicode_READ(kind, in_str, in) == '\n') {
478 in++;
479 seennl |= SEEN_CRLF;
480 }
481 else
482 seennl |= SEEN_CR;
483 PyUnicode_WRITE(kind, translated, out++, '\n');
484 continue;
485 }
486 if (in > len)
487 break;
488 PyUnicode_WRITE(kind, translated, out++, c);
489 }
490 Py_DECREF(output);
491 output = PyUnicode_FromKindAndData(kind, translated, out);
492 PyMem_Free(translated);
493 if (!output)
494 return NULL;
495 }
496 self->seennl |= seennl;
497 }
498
499 return output;
500
501 error:
502 Py_DECREF(output);
503 return NULL;
504 }
505
506 /*[clinic input]
507 _io.IncrementalNewlineDecoder.decode
508 input: object
509 final: bool(accept={int}) = False
510 [clinic start generated code]*/
511
512 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)513 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
514 PyObject *input, int final)
515 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
516 {
517 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
518 }
519
520 /*[clinic input]
521 _io.IncrementalNewlineDecoder.getstate
522 [clinic start generated code]*/
523
524 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)525 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
526 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
527 {
528 PyObject *buffer;
529 unsigned long long flag;
530
531 if (self->decoder != Py_None) {
532 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
533 _PyIO_str_getstate);
534 if (state == NULL)
535 return NULL;
536 if (!PyTuple_Check(state)) {
537 PyErr_SetString(PyExc_TypeError,
538 "illegal decoder state");
539 Py_DECREF(state);
540 return NULL;
541 }
542 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
543 &buffer, &flag))
544 {
545 Py_DECREF(state);
546 return NULL;
547 }
548 Py_INCREF(buffer);
549 Py_DECREF(state);
550 }
551 else {
552 buffer = PyBytes_FromString("");
553 flag = 0;
554 }
555 flag <<= 1;
556 if (self->pendingcr)
557 flag |= 1;
558 return Py_BuildValue("NK", buffer, flag);
559 }
560
561 /*[clinic input]
562 _io.IncrementalNewlineDecoder.setstate
563 state: object
564 /
565 [clinic start generated code]*/
566
567 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)568 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
569 PyObject *state)
570 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
571 {
572 PyObject *buffer;
573 unsigned long long flag;
574
575 if (!PyTuple_Check(state)) {
576 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
577 return NULL;
578 }
579 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
580 &buffer, &flag))
581 {
582 return NULL;
583 }
584
585 self->pendingcr = (int) (flag & 1);
586 flag >>= 1;
587
588 if (self->decoder != Py_None)
589 return _PyObject_CallMethodId(self->decoder,
590 &PyId_setstate, "((OK))", buffer, flag);
591 else
592 Py_RETURN_NONE;
593 }
594
595 /*[clinic input]
596 _io.IncrementalNewlineDecoder.reset
597 [clinic start generated code]*/
598
599 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)600 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
601 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
602 {
603 self->seennl = 0;
604 self->pendingcr = 0;
605 if (self->decoder != Py_None)
606 return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
607 else
608 Py_RETURN_NONE;
609 }
610
611 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)612 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
613 {
614 switch (self->seennl) {
615 case SEEN_CR:
616 return PyUnicode_FromString("\r");
617 case SEEN_LF:
618 return PyUnicode_FromString("\n");
619 case SEEN_CRLF:
620 return PyUnicode_FromString("\r\n");
621 case SEEN_CR | SEEN_LF:
622 return Py_BuildValue("ss", "\r", "\n");
623 case SEEN_CR | SEEN_CRLF:
624 return Py_BuildValue("ss", "\r", "\r\n");
625 case SEEN_LF | SEEN_CRLF:
626 return Py_BuildValue("ss", "\n", "\r\n");
627 case SEEN_CR | SEEN_LF | SEEN_CRLF:
628 return Py_BuildValue("sss", "\r", "\n", "\r\n");
629 default:
630 Py_RETURN_NONE;
631 }
632
633 }
634
635 /* TextIOWrapper */
636
637 typedef PyObject *
638 (*encodefunc_t)(PyObject *, PyObject *);
639
640 typedef struct
641 {
642 PyObject_HEAD
643 int ok; /* initialized? */
644 int detached;
645 Py_ssize_t chunk_size;
646 PyObject *buffer;
647 PyObject *encoding;
648 PyObject *encoder;
649 PyObject *decoder;
650 PyObject *readnl;
651 PyObject *errors;
652 const char *writenl; /* ASCII-encoded; NULL stands for \n */
653 char line_buffering;
654 char write_through;
655 char readuniversal;
656 char readtranslate;
657 char writetranslate;
658 char seekable;
659 char has_read1;
660 char telling;
661 char finalizing;
662 /* Specialized encoding func (see below) */
663 encodefunc_t encodefunc;
664 /* Whether or not it's the start of the stream */
665 char encoding_start_of_stream;
666
667 /* Reads and writes are internally buffered in order to speed things up.
668 However, any read will first flush the write buffer if itsn't empty.
669
670 Please also note that text to be written is first encoded before being
671 buffered. This is necessary so that encoding errors are immediately
672 reported to the caller, but it unfortunately means that the
673 IncrementalEncoder (whose encode() method is always written in Python)
674 becomes a bottleneck for small writes.
675 */
676 PyObject *decoded_chars; /* buffer for text returned from decoder */
677 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
678 PyObject *pending_bytes; // data waiting to be written.
679 // ascii unicode, bytes, or list of them.
680 Py_ssize_t pending_bytes_count;
681
682 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
683 * dec_flags is the second (integer) item of the decoder state and
684 * next_input is the chunk of input bytes that comes next after the
685 * snapshot point. We use this to reconstruct decoder states in tell().
686 */
687 PyObject *snapshot;
688 /* Bytes-to-characters ratio for the current chunk. Serves as input for
689 the heuristic in tell(). */
690 double b2cratio;
691
692 /* Cache raw object if it's a FileIO object */
693 PyObject *raw;
694
695 PyObject *weakreflist;
696 PyObject *dict;
697 } textio;
698
699 static void
700 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
701
702 /* A couple of specialized cases in order to bypass the slow incremental
703 encoding methods for the most popular encodings. */
704
705 static PyObject *
ascii_encode(textio * self,PyObject * text)706 ascii_encode(textio *self, PyObject *text)
707 {
708 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
709 }
710
711 static PyObject *
utf16be_encode(textio * self,PyObject * text)712 utf16be_encode(textio *self, PyObject *text)
713 {
714 return _PyUnicode_EncodeUTF16(text,
715 PyUnicode_AsUTF8(self->errors), 1);
716 }
717
718 static PyObject *
utf16le_encode(textio * self,PyObject * text)719 utf16le_encode(textio *self, PyObject *text)
720 {
721 return _PyUnicode_EncodeUTF16(text,
722 PyUnicode_AsUTF8(self->errors), -1);
723 }
724
725 static PyObject *
utf16_encode(textio * self,PyObject * text)726 utf16_encode(textio *self, PyObject *text)
727 {
728 if (!self->encoding_start_of_stream) {
729 /* Skip the BOM and use native byte ordering */
730 #if PY_BIG_ENDIAN
731 return utf16be_encode(self, text);
732 #else
733 return utf16le_encode(self, text);
734 #endif
735 }
736 return _PyUnicode_EncodeUTF16(text,
737 PyUnicode_AsUTF8(self->errors), 0);
738 }
739
740 static PyObject *
utf32be_encode(textio * self,PyObject * text)741 utf32be_encode(textio *self, PyObject *text)
742 {
743 return _PyUnicode_EncodeUTF32(text,
744 PyUnicode_AsUTF8(self->errors), 1);
745 }
746
747 static PyObject *
utf32le_encode(textio * self,PyObject * text)748 utf32le_encode(textio *self, PyObject *text)
749 {
750 return _PyUnicode_EncodeUTF32(text,
751 PyUnicode_AsUTF8(self->errors), -1);
752 }
753
754 static PyObject *
utf32_encode(textio * self,PyObject * text)755 utf32_encode(textio *self, PyObject *text)
756 {
757 if (!self->encoding_start_of_stream) {
758 /* Skip the BOM and use native byte ordering */
759 #if PY_BIG_ENDIAN
760 return utf32be_encode(self, text);
761 #else
762 return utf32le_encode(self, text);
763 #endif
764 }
765 return _PyUnicode_EncodeUTF32(text,
766 PyUnicode_AsUTF8(self->errors), 0);
767 }
768
769 static PyObject *
utf8_encode(textio * self,PyObject * text)770 utf8_encode(textio *self, PyObject *text)
771 {
772 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
773 }
774
775 static PyObject *
latin1_encode(textio * self,PyObject * text)776 latin1_encode(textio *self, PyObject *text)
777 {
778 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
779 }
780
781 // Return true when encoding can be skipped when text is ascii.
782 static inline int
is_asciicompat_encoding(encodefunc_t f)783 is_asciicompat_encoding(encodefunc_t f)
784 {
785 return f == (encodefunc_t) ascii_encode
786 || f == (encodefunc_t) latin1_encode
787 || f == (encodefunc_t) utf8_encode;
788 }
789
790 /* Map normalized encoding names onto the specialized encoding funcs */
791
792 typedef struct {
793 const char *name;
794 encodefunc_t encodefunc;
795 } encodefuncentry;
796
797 static const encodefuncentry encodefuncs[] = {
798 {"ascii", (encodefunc_t) ascii_encode},
799 {"iso8859-1", (encodefunc_t) latin1_encode},
800 {"utf-8", (encodefunc_t) utf8_encode},
801 {"utf-16-be", (encodefunc_t) utf16be_encode},
802 {"utf-16-le", (encodefunc_t) utf16le_encode},
803 {"utf-16", (encodefunc_t) utf16_encode},
804 {"utf-32-be", (encodefunc_t) utf32be_encode},
805 {"utf-32-le", (encodefunc_t) utf32le_encode},
806 {"utf-32", (encodefunc_t) utf32_encode},
807 {NULL, NULL}
808 };
809
810 static int
validate_newline(const char * newline)811 validate_newline(const char *newline)
812 {
813 if (newline && newline[0] != '\0'
814 && !(newline[0] == '\n' && newline[1] == '\0')
815 && !(newline[0] == '\r' && newline[1] == '\0')
816 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
817 PyErr_Format(PyExc_ValueError,
818 "illegal newline value: %s", newline);
819 return -1;
820 }
821 return 0;
822 }
823
824 static int
set_newline(textio * self,const char * newline)825 set_newline(textio *self, const char *newline)
826 {
827 PyObject *old = self->readnl;
828 if (newline == NULL) {
829 self->readnl = NULL;
830 }
831 else {
832 self->readnl = PyUnicode_FromString(newline);
833 if (self->readnl == NULL) {
834 self->readnl = old;
835 return -1;
836 }
837 }
838 self->readuniversal = (newline == NULL || newline[0] == '\0');
839 self->readtranslate = (newline == NULL);
840 self->writetranslate = (newline == NULL || newline[0] != '\0');
841 if (!self->readuniversal && self->readnl != NULL) {
842 // validate_newline() accepts only ASCII newlines.
843 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
844 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
845 if (strcmp(self->writenl, "\n") == 0) {
846 self->writenl = NULL;
847 }
848 }
849 else {
850 #ifdef MS_WINDOWS
851 self->writenl = "\r\n";
852 #else
853 self->writenl = NULL;
854 #endif
855 }
856 Py_XDECREF(old);
857 return 0;
858 }
859
860 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)861 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
862 const char *errors)
863 {
864 PyObject *res;
865 int r;
866
867 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
868 if (res == NULL)
869 return -1;
870
871 r = PyObject_IsTrue(res);
872 Py_DECREF(res);
873 if (r == -1)
874 return -1;
875
876 if (r != 1)
877 return 0;
878
879 Py_CLEAR(self->decoder);
880 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
881 if (self->decoder == NULL)
882 return -1;
883
884 if (self->readuniversal) {
885 PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
886 (PyObject *)&PyIncrementalNewlineDecoder_Type,
887 self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
888 if (incrementalDecoder == NULL)
889 return -1;
890 Py_CLEAR(self->decoder);
891 self->decoder = incrementalDecoder;
892 }
893
894 return 0;
895 }
896
897 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)898 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
899 {
900 PyObject *chars;
901
902 if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
903 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
904 else
905 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
906 eof ? Py_True : Py_False, NULL);
907
908 if (check_decoded(chars) < 0)
909 // check_decoded already decreases refcount
910 return NULL;
911
912 return chars;
913 }
914
915 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)916 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
917 const char *errors)
918 {
919 PyObject *res;
920 int r;
921
922 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
923 if (res == NULL)
924 return -1;
925
926 r = PyObject_IsTrue(res);
927 Py_DECREF(res);
928 if (r == -1)
929 return -1;
930
931 if (r != 1)
932 return 0;
933
934 Py_CLEAR(self->encoder);
935 self->encodefunc = NULL;
936 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
937 if (self->encoder == NULL)
938 return -1;
939
940 /* Get the normalized named of the codec */
941 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
942 return -1;
943 }
944 if (res != NULL && PyUnicode_Check(res)) {
945 const encodefuncentry *e = encodefuncs;
946 while (e->name != NULL) {
947 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
948 self->encodefunc = e->encodefunc;
949 break;
950 }
951 e++;
952 }
953 }
954 Py_XDECREF(res);
955
956 return 0;
957 }
958
959 static int
_textiowrapper_fix_encoder_state(textio * self)960 _textiowrapper_fix_encoder_state(textio *self)
961 {
962 if (!self->seekable || !self->encoder) {
963 return 0;
964 }
965
966 self->encoding_start_of_stream = 1;
967
968 PyObject *cookieObj = PyObject_CallMethodNoArgs(
969 self->buffer, _PyIO_str_tell);
970 if (cookieObj == NULL) {
971 return -1;
972 }
973
974 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
975 Py_DECREF(cookieObj);
976 if (cmp < 0) {
977 return -1;
978 }
979
980 if (cmp == 0) {
981 self->encoding_start_of_stream = 0;
982 PyObject *res = PyObject_CallMethodOneArg(
983 self->encoder, _PyIO_str_setstate, _PyLong_Zero);
984 if (res == NULL) {
985 return -1;
986 }
987 Py_DECREF(res);
988 }
989
990 return 0;
991 }
992
993 static int
io_check_errors(PyObject * errors)994 io_check_errors(PyObject *errors)
995 {
996 assert(errors != NULL && errors != Py_None);
997
998 PyInterpreterState *interp = _PyInterpreterState_GET();
999 #ifndef Py_DEBUG
1000 /* In release mode, only check in development mode (-X dev) */
1001 if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1002 return 0;
1003 }
1004 #else
1005 /* Always check in debug mode */
1006 #endif
1007
1008 /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1009 before_PyUnicode_InitEncodings() is called. */
1010 if (!interp->unicode.fs_codec.encoding) {
1011 return 0;
1012 }
1013
1014 Py_ssize_t name_length;
1015 const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1016 if (name == NULL) {
1017 return -1;
1018 }
1019 if (strlen(name) != (size_t)name_length) {
1020 PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1021 return -1;
1022 }
1023 PyObject *handler = PyCodec_LookupError(name);
1024 if (handler != NULL) {
1025 Py_DECREF(handler);
1026 return 0;
1027 }
1028 return -1;
1029 }
1030
1031
1032
1033 /*[clinic input]
1034 _io.TextIOWrapper.__init__
1035 buffer: object
1036 encoding: str(accept={str, NoneType}) = None
1037 errors: object = None
1038 newline: str(accept={str, NoneType}) = None
1039 line_buffering: bool(accept={int}) = False
1040 write_through: bool(accept={int}) = False
1041
1042 Character and line based layer over a BufferedIOBase object, buffer.
1043
1044 encoding gives the name of the encoding that the stream will be
1045 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1046
1047 errors determines the strictness of encoding and decoding (see
1048 help(codecs.Codec) or the documentation for codecs.register) and
1049 defaults to "strict".
1050
1051 newline controls how line endings are handled. It can be None, '',
1052 '\n', '\r', and '\r\n'. It works as follows:
1053
1054 * On input, if newline is None, universal newlines mode is
1055 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1056 these are translated into '\n' before being returned to the
1057 caller. If it is '', universal newline mode is enabled, but line
1058 endings are returned to the caller untranslated. If it has any of
1059 the other legal values, input lines are only terminated by the given
1060 string, and the line ending is returned to the caller untranslated.
1061
1062 * On output, if newline is None, any '\n' characters written are
1063 translated to the system default line separator, os.linesep. If
1064 newline is '' or '\n', no translation takes place. If newline is any
1065 of the other legal values, any '\n' characters written are translated
1066 to the given string.
1067
1068 If line_buffering is True, a call to flush is implied when a call to
1069 write contains a newline character.
1070 [clinic start generated code]*/
1071
1072 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1073 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1074 const char *encoding, PyObject *errors,
1075 const char *newline, int line_buffering,
1076 int write_through)
1077 /*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
1078 {
1079 PyObject *raw, *codec_info = NULL;
1080 _PyIO_State *state = NULL;
1081 PyObject *res;
1082 int r;
1083
1084 self->ok = 0;
1085 self->detached = 0;
1086
1087 if (errors == Py_None) {
1088 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1089 if (errors == NULL) {
1090 return -1;
1091 }
1092 }
1093 else if (!PyUnicode_Check(errors)) {
1094 // Check 'errors' argument here because Argument Clinic doesn't support
1095 // 'str(accept={str, NoneType})' converter.
1096 PyErr_Format(
1097 PyExc_TypeError,
1098 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1099 Py_TYPE(errors)->tp_name);
1100 return -1;
1101 }
1102 else if (io_check_errors(errors)) {
1103 return -1;
1104 }
1105
1106 if (validate_newline(newline) < 0) {
1107 return -1;
1108 }
1109
1110 Py_CLEAR(self->buffer);
1111 Py_CLEAR(self->encoding);
1112 Py_CLEAR(self->encoder);
1113 Py_CLEAR(self->decoder);
1114 Py_CLEAR(self->readnl);
1115 Py_CLEAR(self->decoded_chars);
1116 Py_CLEAR(self->pending_bytes);
1117 Py_CLEAR(self->snapshot);
1118 Py_CLEAR(self->errors);
1119 Py_CLEAR(self->raw);
1120 self->decoded_chars_used = 0;
1121 self->pending_bytes_count = 0;
1122 self->encodefunc = NULL;
1123 self->b2cratio = 0.0;
1124
1125 if (encoding == NULL) {
1126 /* Try os.device_encoding(fileno) */
1127 PyObject *fileno;
1128 state = IO_STATE();
1129 if (state == NULL)
1130 goto error;
1131 fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
1132 /* Ignore only AttributeError and UnsupportedOperation */
1133 if (fileno == NULL) {
1134 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1135 PyErr_ExceptionMatches(state->unsupported_operation)) {
1136 PyErr_Clear();
1137 }
1138 else {
1139 goto error;
1140 }
1141 }
1142 else {
1143 int fd = _PyLong_AsInt(fileno);
1144 Py_DECREF(fileno);
1145 if (fd == -1 && PyErr_Occurred()) {
1146 goto error;
1147 }
1148
1149 self->encoding = _Py_device_encoding(fd);
1150 if (self->encoding == NULL)
1151 goto error;
1152 else if (!PyUnicode_Check(self->encoding))
1153 Py_CLEAR(self->encoding);
1154 }
1155 }
1156 if (encoding == NULL && self->encoding == NULL) {
1157 PyObject *locale_module = _PyIO_get_locale_module(state);
1158 if (locale_module == NULL)
1159 goto catch_ImportError;
1160 self->encoding = _PyObject_CallMethodIdOneArg(
1161 locale_module, &PyId_getpreferredencoding, Py_False);
1162 Py_DECREF(locale_module);
1163 if (self->encoding == NULL) {
1164 catch_ImportError:
1165 /*
1166 Importing locale can raise an ImportError because of
1167 _functools, and locale.getpreferredencoding can raise an
1168 ImportError if _locale is not available. These will happen
1169 during module building.
1170 */
1171 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1172 PyErr_Clear();
1173 self->encoding = PyUnicode_FromString("ascii");
1174 }
1175 else
1176 goto error;
1177 }
1178 else if (!PyUnicode_Check(self->encoding))
1179 Py_CLEAR(self->encoding);
1180 }
1181 if (self->encoding != NULL) {
1182 encoding = PyUnicode_AsUTF8(self->encoding);
1183 if (encoding == NULL)
1184 goto error;
1185 }
1186 else if (encoding != NULL) {
1187 self->encoding = PyUnicode_FromString(encoding);
1188 if (self->encoding == NULL)
1189 goto error;
1190 }
1191 else {
1192 PyErr_SetString(PyExc_OSError,
1193 "could not determine default encoding");
1194 goto error;
1195 }
1196
1197 /* Check we have been asked for a real text encoding */
1198 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1199 if (codec_info == NULL) {
1200 Py_CLEAR(self->encoding);
1201 goto error;
1202 }
1203
1204 /* XXX: Failures beyond this point have the potential to leak elements
1205 * of the partially constructed object (like self->encoding)
1206 */
1207
1208 Py_INCREF(errors);
1209 self->errors = errors;
1210 self->chunk_size = 8192;
1211 self->line_buffering = line_buffering;
1212 self->write_through = write_through;
1213 if (set_newline(self, newline) < 0) {
1214 goto error;
1215 }
1216
1217 self->buffer = buffer;
1218 Py_INCREF(buffer);
1219
1220 /* Build the decoder object */
1221 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1222 goto error;
1223
1224 /* Build the encoder object */
1225 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1226 goto error;
1227
1228 /* Finished sorting out the codec details */
1229 Py_CLEAR(codec_info);
1230
1231 if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1232 Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1233 Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
1234 {
1235 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1236 goto error;
1237 /* Cache the raw FileIO object to speed up 'closed' checks */
1238 if (raw != NULL) {
1239 if (Py_IS_TYPE(raw, &PyFileIO_Type))
1240 self->raw = raw;
1241 else
1242 Py_DECREF(raw);
1243 }
1244 }
1245
1246 res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
1247 if (res == NULL)
1248 goto error;
1249 r = PyObject_IsTrue(res);
1250 Py_DECREF(res);
1251 if (r < 0)
1252 goto error;
1253 self->seekable = self->telling = r;
1254
1255 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1256 if (r < 0) {
1257 goto error;
1258 }
1259 Py_XDECREF(res);
1260 self->has_read1 = r;
1261
1262 self->encoding_start_of_stream = 0;
1263 if (_textiowrapper_fix_encoder_state(self) < 0) {
1264 goto error;
1265 }
1266
1267 self->ok = 1;
1268 return 0;
1269
1270 error:
1271 Py_XDECREF(codec_info);
1272 return -1;
1273 }
1274
1275 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1276 * -1 on error.
1277 */
1278 static int
convert_optional_bool(PyObject * obj,int default_value)1279 convert_optional_bool(PyObject *obj, int default_value)
1280 {
1281 long v;
1282 if (obj == Py_None) {
1283 v = default_value;
1284 }
1285 else {
1286 v = PyLong_AsLong(obj);
1287 if (v == -1 && PyErr_Occurred())
1288 return -1;
1289 }
1290 return v != 0;
1291 }
1292
1293 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1294 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1295 PyObject *errors, int newline_changed)
1296 {
1297 /* Use existing settings where new settings are not specified */
1298 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1299 return 0; // no change
1300 }
1301
1302 if (encoding == Py_None) {
1303 encoding = self->encoding;
1304 if (errors == Py_None) {
1305 errors = self->errors;
1306 }
1307 }
1308 else if (errors == Py_None) {
1309 errors = _PyUnicode_FromId(&PyId_strict);
1310 if (errors == NULL) {
1311 return -1;
1312 }
1313 }
1314
1315 const char *c_errors = PyUnicode_AsUTF8(errors);
1316 if (c_errors == NULL) {
1317 return -1;
1318 }
1319
1320 // Create new encoder & decoder
1321 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1322 PyUnicode_AsUTF8(encoding), "codecs.open()");
1323 if (codec_info == NULL) {
1324 return -1;
1325 }
1326 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1327 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1328 Py_DECREF(codec_info);
1329 return -1;
1330 }
1331 Py_DECREF(codec_info);
1332
1333 Py_INCREF(encoding);
1334 Py_INCREF(errors);
1335 Py_SETREF(self->encoding, encoding);
1336 Py_SETREF(self->errors, errors);
1337
1338 return _textiowrapper_fix_encoder_state(self);
1339 }
1340
1341 /*[clinic input]
1342 _io.TextIOWrapper.reconfigure
1343 *
1344 encoding: object = None
1345 errors: object = None
1346 newline as newline_obj: object(c_default="NULL") = None
1347 line_buffering as line_buffering_obj: object = None
1348 write_through as write_through_obj: object = None
1349
1350 Reconfigure the text stream with new parameters.
1351
1352 This also does an implicit stream flush.
1353
1354 [clinic start generated code]*/
1355
1356 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1357 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1358 PyObject *errors, PyObject *newline_obj,
1359 PyObject *line_buffering_obj,
1360 PyObject *write_through_obj)
1361 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1362 {
1363 int line_buffering;
1364 int write_through;
1365 const char *newline = NULL;
1366
1367 /* Check if something is in the read buffer */
1368 if (self->decoded_chars != NULL) {
1369 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1370 _unsupported("It is not possible to set the encoding or newline "
1371 "of stream after the first read");
1372 return NULL;
1373 }
1374 }
1375
1376 if (newline_obj != NULL && newline_obj != Py_None) {
1377 newline = PyUnicode_AsUTF8(newline_obj);
1378 if (newline == NULL || validate_newline(newline) < 0) {
1379 return NULL;
1380 }
1381 }
1382
1383 line_buffering = convert_optional_bool(line_buffering_obj,
1384 self->line_buffering);
1385 write_through = convert_optional_bool(write_through_obj,
1386 self->write_through);
1387 if (line_buffering < 0 || write_through < 0) {
1388 return NULL;
1389 }
1390
1391 PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
1392 if (res == NULL) {
1393 return NULL;
1394 }
1395 Py_DECREF(res);
1396 self->b2cratio = 0;
1397
1398 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1399 return NULL;
1400 }
1401
1402 if (textiowrapper_change_encoding(
1403 self, encoding, errors, newline_obj != NULL) < 0) {
1404 return NULL;
1405 }
1406
1407 self->line_buffering = line_buffering;
1408 self->write_through = write_through;
1409 Py_RETURN_NONE;
1410 }
1411
1412 static int
textiowrapper_clear(textio * self)1413 textiowrapper_clear(textio *self)
1414 {
1415 self->ok = 0;
1416 Py_CLEAR(self->buffer);
1417 Py_CLEAR(self->encoding);
1418 Py_CLEAR(self->encoder);
1419 Py_CLEAR(self->decoder);
1420 Py_CLEAR(self->readnl);
1421 Py_CLEAR(self->decoded_chars);
1422 Py_CLEAR(self->pending_bytes);
1423 Py_CLEAR(self->snapshot);
1424 Py_CLEAR(self->errors);
1425 Py_CLEAR(self->raw);
1426
1427 Py_CLEAR(self->dict);
1428 return 0;
1429 }
1430
1431 static void
textiowrapper_dealloc(textio * self)1432 textiowrapper_dealloc(textio *self)
1433 {
1434 self->finalizing = 1;
1435 if (_PyIOBase_finalize((PyObject *) self) < 0)
1436 return;
1437 self->ok = 0;
1438 _PyObject_GC_UNTRACK(self);
1439 if (self->weakreflist != NULL)
1440 PyObject_ClearWeakRefs((PyObject *)self);
1441 textiowrapper_clear(self);
1442 Py_TYPE(self)->tp_free((PyObject *)self);
1443 }
1444
1445 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1446 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1447 {
1448 Py_VISIT(self->buffer);
1449 Py_VISIT(self->encoding);
1450 Py_VISIT(self->encoder);
1451 Py_VISIT(self->decoder);
1452 Py_VISIT(self->readnl);
1453 Py_VISIT(self->decoded_chars);
1454 Py_VISIT(self->pending_bytes);
1455 Py_VISIT(self->snapshot);
1456 Py_VISIT(self->errors);
1457 Py_VISIT(self->raw);
1458
1459 Py_VISIT(self->dict);
1460 return 0;
1461 }
1462
1463 static PyObject *
1464 textiowrapper_closed_get(textio *self, void *context);
1465
1466 /* This macro takes some shortcuts to make the common case faster. */
1467 #define CHECK_CLOSED(self) \
1468 do { \
1469 int r; \
1470 PyObject *_res; \
1471 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
1472 if (self->raw != NULL) \
1473 r = _PyFileIO_closed(self->raw); \
1474 else { \
1475 _res = textiowrapper_closed_get(self, NULL); \
1476 if (_res == NULL) \
1477 return NULL; \
1478 r = PyObject_IsTrue(_res); \
1479 Py_DECREF(_res); \
1480 if (r < 0) \
1481 return NULL; \
1482 } \
1483 if (r > 0) { \
1484 PyErr_SetString(PyExc_ValueError, \
1485 "I/O operation on closed file."); \
1486 return NULL; \
1487 } \
1488 } \
1489 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1490 return NULL; \
1491 } while (0)
1492
1493 #define CHECK_INITIALIZED(self) \
1494 if (self->ok <= 0) { \
1495 PyErr_SetString(PyExc_ValueError, \
1496 "I/O operation on uninitialized object"); \
1497 return NULL; \
1498 }
1499
1500 #define CHECK_ATTACHED(self) \
1501 CHECK_INITIALIZED(self); \
1502 if (self->detached) { \
1503 PyErr_SetString(PyExc_ValueError, \
1504 "underlying buffer has been detached"); \
1505 return NULL; \
1506 }
1507
1508 #define CHECK_ATTACHED_INT(self) \
1509 if (self->ok <= 0) { \
1510 PyErr_SetString(PyExc_ValueError, \
1511 "I/O operation on uninitialized object"); \
1512 return -1; \
1513 } else if (self->detached) { \
1514 PyErr_SetString(PyExc_ValueError, \
1515 "underlying buffer has been detached"); \
1516 return -1; \
1517 }
1518
1519
1520 /*[clinic input]
1521 _io.TextIOWrapper.detach
1522 [clinic start generated code]*/
1523
1524 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1525 _io_TextIOWrapper_detach_impl(textio *self)
1526 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1527 {
1528 PyObject *buffer, *res;
1529 CHECK_ATTACHED(self);
1530 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
1531 if (res == NULL)
1532 return NULL;
1533 Py_DECREF(res);
1534 buffer = self->buffer;
1535 self->buffer = NULL;
1536 self->detached = 1;
1537 return buffer;
1538 }
1539
1540 /* Flush the internal write buffer. This doesn't explicitly flush the
1541 underlying buffered object, though. */
1542 static int
_textiowrapper_writeflush(textio * self)1543 _textiowrapper_writeflush(textio *self)
1544 {
1545 if (self->pending_bytes == NULL)
1546 return 0;
1547
1548 PyObject *pending = self->pending_bytes;
1549 PyObject *b;
1550
1551 if (PyBytes_Check(pending)) {
1552 b = pending;
1553 Py_INCREF(b);
1554 }
1555 else if (PyUnicode_Check(pending)) {
1556 assert(PyUnicode_IS_ASCII(pending));
1557 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1558 b = PyBytes_FromStringAndSize(
1559 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1560 if (b == NULL) {
1561 return -1;
1562 }
1563 }
1564 else {
1565 assert(PyList_Check(pending));
1566 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1567 if (b == NULL) {
1568 return -1;
1569 }
1570
1571 char *buf = PyBytes_AsString(b);
1572 Py_ssize_t pos = 0;
1573
1574 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1575 PyObject *obj = PyList_GET_ITEM(pending, i);
1576 char *src;
1577 Py_ssize_t len;
1578 if (PyUnicode_Check(obj)) {
1579 assert(PyUnicode_IS_ASCII(obj));
1580 src = PyUnicode_DATA(obj);
1581 len = PyUnicode_GET_LENGTH(obj);
1582 }
1583 else {
1584 assert(PyBytes_Check(obj));
1585 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1586 Py_DECREF(b);
1587 return -1;
1588 }
1589 }
1590 memcpy(buf + pos, src, len);
1591 pos += len;
1592 }
1593 assert(pos == self->pending_bytes_count);
1594 }
1595
1596 self->pending_bytes_count = 0;
1597 self->pending_bytes = NULL;
1598 Py_DECREF(pending);
1599
1600 PyObject *ret;
1601 do {
1602 ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
1603 } while (ret == NULL && _PyIO_trap_eintr());
1604 Py_DECREF(b);
1605 // NOTE: We cleared buffer but we don't know how many bytes are actually written
1606 // when an error occurred.
1607 if (ret == NULL)
1608 return -1;
1609 Py_DECREF(ret);
1610 return 0;
1611 }
1612
1613 /*[clinic input]
1614 _io.TextIOWrapper.write
1615 text: unicode
1616 /
1617 [clinic start generated code]*/
1618
1619 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1620 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1621 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1622 {
1623 PyObject *ret;
1624 PyObject *b;
1625 Py_ssize_t textlen;
1626 int haslf = 0;
1627 int needflush = 0, text_needflush = 0;
1628
1629 if (PyUnicode_READY(text) == -1)
1630 return NULL;
1631
1632 CHECK_ATTACHED(self);
1633 CHECK_CLOSED(self);
1634
1635 if (self->encoder == NULL)
1636 return _unsupported("not writable");
1637
1638 Py_INCREF(text);
1639
1640 textlen = PyUnicode_GET_LENGTH(text);
1641
1642 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1643 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1644 haslf = 1;
1645
1646 if (haslf && self->writetranslate && self->writenl != NULL) {
1647 PyObject *newtext = _PyObject_CallMethodId(
1648 text, &PyId_replace, "ss", "\n", self->writenl);
1649 Py_DECREF(text);
1650 if (newtext == NULL)
1651 return NULL;
1652 text = newtext;
1653 }
1654
1655 if (self->write_through)
1656 text_needflush = 1;
1657 if (self->line_buffering &&
1658 (haslf ||
1659 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1660 needflush = 1;
1661
1662 /* XXX What if we were just reading? */
1663 if (self->encodefunc != NULL) {
1664 if (PyUnicode_IS_ASCII(text) &&
1665 // See bpo-43260
1666 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1667 is_asciicompat_encoding(self->encodefunc)) {
1668 b = text;
1669 Py_INCREF(b);
1670 }
1671 else {
1672 b = (*self->encodefunc)((PyObject *) self, text);
1673 }
1674 self->encoding_start_of_stream = 0;
1675 }
1676 else {
1677 b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
1678 }
1679
1680 Py_DECREF(text);
1681 if (b == NULL)
1682 return NULL;
1683 if (b != text && !PyBytes_Check(b)) {
1684 PyErr_Format(PyExc_TypeError,
1685 "encoder should return a bytes object, not '%.200s'",
1686 Py_TYPE(b)->tp_name);
1687 Py_DECREF(b);
1688 return NULL;
1689 }
1690
1691 Py_ssize_t bytes_len;
1692 if (b == text) {
1693 bytes_len = PyUnicode_GET_LENGTH(b);
1694 }
1695 else {
1696 bytes_len = PyBytes_GET_SIZE(b);
1697 }
1698
1699 if (self->pending_bytes == NULL) {
1700 self->pending_bytes_count = 0;
1701 self->pending_bytes = b;
1702 }
1703 else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1704 // Prevent to concatenate more than chunk_size data.
1705 if (_textiowrapper_writeflush(self) < 0) {
1706 Py_DECREF(b);
1707 return NULL;
1708 }
1709 self->pending_bytes = b;
1710 }
1711 else if (!PyList_CheckExact(self->pending_bytes)) {
1712 PyObject *list = PyList_New(2);
1713 if (list == NULL) {
1714 Py_DECREF(b);
1715 return NULL;
1716 }
1717 PyList_SET_ITEM(list, 0, self->pending_bytes);
1718 PyList_SET_ITEM(list, 1, b);
1719 self->pending_bytes = list;
1720 }
1721 else {
1722 if (PyList_Append(self->pending_bytes, b) < 0) {
1723 Py_DECREF(b);
1724 return NULL;
1725 }
1726 Py_DECREF(b);
1727 }
1728
1729 self->pending_bytes_count += bytes_len;
1730 if (self->pending_bytes_count >= self->chunk_size || needflush ||
1731 text_needflush) {
1732 if (_textiowrapper_writeflush(self) < 0)
1733 return NULL;
1734 }
1735
1736 if (needflush) {
1737 ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
1738 if (ret == NULL)
1739 return NULL;
1740 Py_DECREF(ret);
1741 }
1742
1743 textiowrapper_set_decoded_chars(self, NULL);
1744 Py_CLEAR(self->snapshot);
1745
1746 if (self->decoder) {
1747 ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
1748 if (ret == NULL)
1749 return NULL;
1750 Py_DECREF(ret);
1751 }
1752
1753 return PyLong_FromSsize_t(textlen);
1754 }
1755
1756 /* Steal a reference to chars and store it in the decoded_char buffer;
1757 */
1758 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1759 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1760 {
1761 Py_XSETREF(self->decoded_chars, chars);
1762 self->decoded_chars_used = 0;
1763 }
1764
1765 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1766 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1767 {
1768 PyObject *chars;
1769 Py_ssize_t avail;
1770
1771 if (self->decoded_chars == NULL)
1772 return PyUnicode_FromStringAndSize(NULL, 0);
1773
1774 /* decoded_chars is guaranteed to be "ready". */
1775 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1776 - self->decoded_chars_used);
1777
1778 assert(avail >= 0);
1779
1780 if (n < 0 || n > avail)
1781 n = avail;
1782
1783 if (self->decoded_chars_used > 0 || n < avail) {
1784 chars = PyUnicode_Substring(self->decoded_chars,
1785 self->decoded_chars_used,
1786 self->decoded_chars_used + n);
1787 if (chars == NULL)
1788 return NULL;
1789 }
1790 else {
1791 chars = self->decoded_chars;
1792 Py_INCREF(chars);
1793 }
1794
1795 self->decoded_chars_used += n;
1796 return chars;
1797 }
1798
1799 /* Read and decode the next chunk of data from the BufferedReader.
1800 */
1801 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1802 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1803 {
1804 PyObject *dec_buffer = NULL;
1805 PyObject *dec_flags = NULL;
1806 PyObject *input_chunk = NULL;
1807 Py_buffer input_chunk_buf;
1808 PyObject *decoded_chars, *chunk_size;
1809 Py_ssize_t nbytes, nchars;
1810 int eof;
1811
1812 /* The return value is True unless EOF was reached. The decoded string is
1813 * placed in self._decoded_chars (replacing its previous value). The
1814 * entire input chunk is sent to the decoder, though some of it may remain
1815 * buffered in the decoder, yet to be converted.
1816 */
1817
1818 if (self->decoder == NULL) {
1819 _unsupported("not readable");
1820 return -1;
1821 }
1822
1823 if (self->telling) {
1824 /* To prepare for tell(), we need to snapshot a point in the file
1825 * where the decoder's input buffer is empty.
1826 */
1827 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1828 _PyIO_str_getstate);
1829 if (state == NULL)
1830 return -1;
1831 /* Given this, we know there was a valid snapshot point
1832 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1833 */
1834 if (!PyTuple_Check(state)) {
1835 PyErr_SetString(PyExc_TypeError,
1836 "illegal decoder state");
1837 Py_DECREF(state);
1838 return -1;
1839 }
1840 if (!PyArg_ParseTuple(state,
1841 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1842 {
1843 Py_DECREF(state);
1844 return -1;
1845 }
1846
1847 if (!PyBytes_Check(dec_buffer)) {
1848 PyErr_Format(PyExc_TypeError,
1849 "illegal decoder state: the first item should be a "
1850 "bytes object, not '%.200s'",
1851 Py_TYPE(dec_buffer)->tp_name);
1852 Py_DECREF(state);
1853 return -1;
1854 }
1855 Py_INCREF(dec_buffer);
1856 Py_INCREF(dec_flags);
1857 Py_DECREF(state);
1858 }
1859
1860 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1861 if (size_hint > 0) {
1862 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1863 }
1864 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1865 if (chunk_size == NULL)
1866 goto fail;
1867
1868 input_chunk = PyObject_CallMethodOneArg(self->buffer,
1869 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1870 chunk_size);
1871 Py_DECREF(chunk_size);
1872 if (input_chunk == NULL)
1873 goto fail;
1874
1875 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1876 PyErr_Format(PyExc_TypeError,
1877 "underlying %s() should have returned a bytes-like object, "
1878 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1879 Py_TYPE(input_chunk)->tp_name);
1880 goto fail;
1881 }
1882
1883 nbytes = input_chunk_buf.len;
1884 eof = (nbytes == 0);
1885
1886 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1887 PyBuffer_Release(&input_chunk_buf);
1888 if (decoded_chars == NULL)
1889 goto fail;
1890
1891 textiowrapper_set_decoded_chars(self, decoded_chars);
1892 nchars = PyUnicode_GET_LENGTH(decoded_chars);
1893 if (nchars > 0)
1894 self->b2cratio = (double) nbytes / nchars;
1895 else
1896 self->b2cratio = 0.0;
1897 if (nchars > 0)
1898 eof = 0;
1899
1900 if (self->telling) {
1901 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1902 * next input to be decoded is dec_buffer + input_chunk.
1903 */
1904 PyObject *next_input = dec_buffer;
1905 PyBytes_Concat(&next_input, input_chunk);
1906 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1907 if (next_input == NULL) {
1908 goto fail;
1909 }
1910 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1911 if (snapshot == NULL) {
1912 dec_flags = NULL;
1913 goto fail;
1914 }
1915 Py_XSETREF(self->snapshot, snapshot);
1916 }
1917 Py_DECREF(input_chunk);
1918
1919 return (eof == 0);
1920
1921 fail:
1922 Py_XDECREF(dec_buffer);
1923 Py_XDECREF(dec_flags);
1924 Py_XDECREF(input_chunk);
1925 return -1;
1926 }
1927
1928 /*[clinic input]
1929 _io.TextIOWrapper.read
1930 size as n: Py_ssize_t(accept={int, NoneType}) = -1
1931 /
1932 [clinic start generated code]*/
1933
1934 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1935 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1936 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1937 {
1938 PyObject *result = NULL, *chunks = NULL;
1939
1940 CHECK_ATTACHED(self);
1941 CHECK_CLOSED(self);
1942
1943 if (self->decoder == NULL)
1944 return _unsupported("not readable");
1945
1946 if (_textiowrapper_writeflush(self) < 0)
1947 return NULL;
1948
1949 if (n < 0) {
1950 /* Read everything */
1951 PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
1952 PyObject *decoded;
1953 if (bytes == NULL)
1954 goto fail;
1955
1956 if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
1957 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1958 bytes, 1);
1959 else
1960 decoded = PyObject_CallMethodObjArgs(
1961 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1962 Py_DECREF(bytes);
1963 if (check_decoded(decoded) < 0)
1964 goto fail;
1965
1966 result = textiowrapper_get_decoded_chars(self, -1);
1967
1968 if (result == NULL) {
1969 Py_DECREF(decoded);
1970 return NULL;
1971 }
1972
1973 PyUnicode_AppendAndDel(&result, decoded);
1974 if (result == NULL)
1975 goto fail;
1976
1977 textiowrapper_set_decoded_chars(self, NULL);
1978 Py_CLEAR(self->snapshot);
1979 return result;
1980 }
1981 else {
1982 int res = 1;
1983 Py_ssize_t remaining = n;
1984
1985 result = textiowrapper_get_decoded_chars(self, n);
1986 if (result == NULL)
1987 goto fail;
1988 if (PyUnicode_READY(result) == -1)
1989 goto fail;
1990 remaining -= PyUnicode_GET_LENGTH(result);
1991
1992 /* Keep reading chunks until we have n characters to return */
1993 while (remaining > 0) {
1994 res = textiowrapper_read_chunk(self, remaining);
1995 if (res < 0) {
1996 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1997 when EINTR occurs so we needn't do it ourselves. */
1998 if (_PyIO_trap_eintr()) {
1999 continue;
2000 }
2001 goto fail;
2002 }
2003 if (res == 0) /* EOF */
2004 break;
2005 if (chunks == NULL) {
2006 chunks = PyList_New(0);
2007 if (chunks == NULL)
2008 goto fail;
2009 }
2010 if (PyUnicode_GET_LENGTH(result) > 0 &&
2011 PyList_Append(chunks, result) < 0)
2012 goto fail;
2013 Py_DECREF(result);
2014 result = textiowrapper_get_decoded_chars(self, remaining);
2015 if (result == NULL)
2016 goto fail;
2017 remaining -= PyUnicode_GET_LENGTH(result);
2018 }
2019 if (chunks != NULL) {
2020 if (result != NULL && PyList_Append(chunks, result) < 0)
2021 goto fail;
2022 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
2023 if (result == NULL)
2024 goto fail;
2025 Py_CLEAR(chunks);
2026 }
2027 return result;
2028 }
2029 fail:
2030 Py_XDECREF(result);
2031 Py_XDECREF(chunks);
2032 return NULL;
2033 }
2034
2035
2036 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2037 that is to the NUL character. Otherwise the function will produce
2038 incorrect results. */
2039 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)2040 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2041 {
2042 if (kind == PyUnicode_1BYTE_KIND) {
2043 assert(ch < 256);
2044 return (char *) memchr((const void *) s, (char) ch, end - s);
2045 }
2046 for (;;) {
2047 while (PyUnicode_READ(kind, s, 0) > ch)
2048 s += kind;
2049 if (PyUnicode_READ(kind, s, 0) == ch)
2050 return s;
2051 if (s == end)
2052 return NULL;
2053 s += kind;
2054 }
2055 }
2056
2057 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2058 _PyIO_find_line_ending(
2059 int translated, int universal, PyObject *readnl,
2060 int kind, const char *start, const char *end, Py_ssize_t *consumed)
2061 {
2062 Py_ssize_t len = (end - start)/kind;
2063
2064 if (translated) {
2065 /* Newlines are already translated, only search for \n */
2066 const char *pos = find_control_char(kind, start, end, '\n');
2067 if (pos != NULL)
2068 return (pos - start)/kind + 1;
2069 else {
2070 *consumed = len;
2071 return -1;
2072 }
2073 }
2074 else if (universal) {
2075 /* Universal newline search. Find any of \r, \r\n, \n
2076 * The decoder ensures that \r\n are not split in two pieces
2077 */
2078 const char *s = start;
2079 for (;;) {
2080 Py_UCS4 ch;
2081 /* Fast path for non-control chars. The loop always ends
2082 since the Unicode string is NUL-terminated. */
2083 while (PyUnicode_READ(kind, s, 0) > '\r')
2084 s += kind;
2085 if (s >= end) {
2086 *consumed = len;
2087 return -1;
2088 }
2089 ch = PyUnicode_READ(kind, s, 0);
2090 s += kind;
2091 if (ch == '\n')
2092 return (s - start)/kind;
2093 if (ch == '\r') {
2094 if (PyUnicode_READ(kind, s, 0) == '\n')
2095 return (s - start)/kind + 1;
2096 else
2097 return (s - start)/kind;
2098 }
2099 }
2100 }
2101 else {
2102 /* Non-universal mode. */
2103 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2104 const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2105 /* Assume that readnl is an ASCII character. */
2106 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2107 if (readnl_len == 1) {
2108 const char *pos = find_control_char(kind, start, end, nl[0]);
2109 if (pos != NULL)
2110 return (pos - start)/kind + 1;
2111 *consumed = len;
2112 return -1;
2113 }
2114 else {
2115 const char *s = start;
2116 const char *e = end - (readnl_len - 1)*kind;
2117 const char *pos;
2118 if (e < s)
2119 e = s;
2120 while (s < e) {
2121 Py_ssize_t i;
2122 const char *pos = find_control_char(kind, s, end, nl[0]);
2123 if (pos == NULL || pos >= e)
2124 break;
2125 for (i = 1; i < readnl_len; i++) {
2126 if (PyUnicode_READ(kind, pos, i) != nl[i])
2127 break;
2128 }
2129 if (i == readnl_len)
2130 return (pos - start)/kind + readnl_len;
2131 s = pos + kind;
2132 }
2133 pos = find_control_char(kind, e, end, nl[0]);
2134 if (pos == NULL)
2135 *consumed = len;
2136 else
2137 *consumed = (pos - start)/kind;
2138 return -1;
2139 }
2140 }
2141 }
2142
2143 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2144 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2145 {
2146 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2147 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2148 int res;
2149
2150 CHECK_CLOSED(self);
2151
2152 if (_textiowrapper_writeflush(self) < 0)
2153 return NULL;
2154
2155 chunked = 0;
2156
2157 while (1) {
2158 const char *ptr;
2159 Py_ssize_t line_len;
2160 int kind;
2161 Py_ssize_t consumed = 0;
2162
2163 /* First, get some data if necessary */
2164 res = 1;
2165 while (!self->decoded_chars ||
2166 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2167 res = textiowrapper_read_chunk(self, 0);
2168 if (res < 0) {
2169 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2170 when EINTR occurs so we needn't do it ourselves. */
2171 if (_PyIO_trap_eintr()) {
2172 continue;
2173 }
2174 goto error;
2175 }
2176 if (res == 0)
2177 break;
2178 }
2179 if (res == 0) {
2180 /* end of file */
2181 textiowrapper_set_decoded_chars(self, NULL);
2182 Py_CLEAR(self->snapshot);
2183 start = endpos = offset_to_buffer = 0;
2184 break;
2185 }
2186
2187 if (remaining == NULL) {
2188 line = self->decoded_chars;
2189 start = self->decoded_chars_used;
2190 offset_to_buffer = 0;
2191 Py_INCREF(line);
2192 }
2193 else {
2194 assert(self->decoded_chars_used == 0);
2195 line = PyUnicode_Concat(remaining, self->decoded_chars);
2196 start = 0;
2197 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2198 Py_CLEAR(remaining);
2199 if (line == NULL)
2200 goto error;
2201 if (PyUnicode_READY(line) == -1)
2202 goto error;
2203 }
2204
2205 ptr = PyUnicode_DATA(line);
2206 line_len = PyUnicode_GET_LENGTH(line);
2207 kind = PyUnicode_KIND(line);
2208
2209 endpos = _PyIO_find_line_ending(
2210 self->readtranslate, self->readuniversal, self->readnl,
2211 kind,
2212 ptr + kind * start,
2213 ptr + kind * line_len,
2214 &consumed);
2215 if (endpos >= 0) {
2216 endpos += start;
2217 if (limit >= 0 && (endpos - start) + chunked >= limit)
2218 endpos = start + limit - chunked;
2219 break;
2220 }
2221
2222 /* We can put aside up to `endpos` */
2223 endpos = consumed + start;
2224 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2225 /* Didn't find line ending, but reached length limit */
2226 endpos = start + limit - chunked;
2227 break;
2228 }
2229
2230 if (endpos > start) {
2231 /* No line ending seen yet - put aside current data */
2232 PyObject *s;
2233 if (chunks == NULL) {
2234 chunks = PyList_New(0);
2235 if (chunks == NULL)
2236 goto error;
2237 }
2238 s = PyUnicode_Substring(line, start, endpos);
2239 if (s == NULL)
2240 goto error;
2241 if (PyList_Append(chunks, s) < 0) {
2242 Py_DECREF(s);
2243 goto error;
2244 }
2245 chunked += PyUnicode_GET_LENGTH(s);
2246 Py_DECREF(s);
2247 }
2248 /* There may be some remaining bytes we'll have to prepend to the
2249 next chunk of data */
2250 if (endpos < line_len) {
2251 remaining = PyUnicode_Substring(line, endpos, line_len);
2252 if (remaining == NULL)
2253 goto error;
2254 }
2255 Py_CLEAR(line);
2256 /* We have consumed the buffer */
2257 textiowrapper_set_decoded_chars(self, NULL);
2258 }
2259
2260 if (line != NULL) {
2261 /* Our line ends in the current buffer */
2262 self->decoded_chars_used = endpos - offset_to_buffer;
2263 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2264 PyObject *s = PyUnicode_Substring(line, start, endpos);
2265 Py_CLEAR(line);
2266 if (s == NULL)
2267 goto error;
2268 line = s;
2269 }
2270 }
2271 if (remaining != NULL) {
2272 if (chunks == NULL) {
2273 chunks = PyList_New(0);
2274 if (chunks == NULL)
2275 goto error;
2276 }
2277 if (PyList_Append(chunks, remaining) < 0)
2278 goto error;
2279 Py_CLEAR(remaining);
2280 }
2281 if (chunks != NULL) {
2282 if (line != NULL) {
2283 if (PyList_Append(chunks, line) < 0)
2284 goto error;
2285 Py_DECREF(line);
2286 }
2287 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2288 if (line == NULL)
2289 goto error;
2290 Py_CLEAR(chunks);
2291 }
2292 if (line == NULL) {
2293 Py_INCREF(_PyIO_empty_str);
2294 line = _PyIO_empty_str;
2295 }
2296
2297 return line;
2298
2299 error:
2300 Py_XDECREF(chunks);
2301 Py_XDECREF(remaining);
2302 Py_XDECREF(line);
2303 return NULL;
2304 }
2305
2306 /*[clinic input]
2307 _io.TextIOWrapper.readline
2308 size: Py_ssize_t = -1
2309 /
2310 [clinic start generated code]*/
2311
2312 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2313 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2314 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2315 {
2316 CHECK_ATTACHED(self);
2317 return _textiowrapper_readline(self, size);
2318 }
2319
2320 /* Seek and Tell */
2321
2322 typedef struct {
2323 Py_off_t start_pos;
2324 int dec_flags;
2325 int bytes_to_feed;
2326 int chars_to_skip;
2327 char need_eof;
2328 } cookie_type;
2329
2330 /*
2331 To speed up cookie packing/unpacking, we store the fields in a temporary
2332 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2333 The following macros define at which offsets in the intermediary byte
2334 string the various CookieStruct fields will be stored.
2335 */
2336
2337 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2338
2339 #if PY_BIG_ENDIAN
2340 /* We want the least significant byte of start_pos to also be the least
2341 significant byte of the cookie, which means that in big-endian mode we
2342 must copy the fields in reverse order. */
2343
2344 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2345 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2346 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2347 # define OFF_CHARS_TO_SKIP (sizeof(char))
2348 # define OFF_NEED_EOF 0
2349
2350 #else
2351 /* Little-endian mode: the least significant byte of start_pos will
2352 naturally end up the least significant byte of the cookie. */
2353
2354 # define OFF_START_POS 0
2355 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
2356 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2357 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2358 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2359
2360 #endif
2361
2362 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2363 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2364 {
2365 unsigned char buffer[COOKIE_BUF_LEN];
2366 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2367 if (cookieLong == NULL)
2368 return -1;
2369
2370 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2371 PY_LITTLE_ENDIAN, 0) < 0) {
2372 Py_DECREF(cookieLong);
2373 return -1;
2374 }
2375 Py_DECREF(cookieLong);
2376
2377 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2378 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2379 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2380 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2381 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2382
2383 return 0;
2384 }
2385
2386 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2387 textiowrapper_build_cookie(cookie_type *cookie)
2388 {
2389 unsigned char buffer[COOKIE_BUF_LEN];
2390
2391 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2392 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2393 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2394 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2395 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2396
2397 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2398 PY_LITTLE_ENDIAN, 0);
2399 }
2400
2401 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2402 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2403 {
2404 PyObject *res;
2405 /* When seeking to the start of the stream, we call decoder.reset()
2406 rather than decoder.getstate().
2407 This is for a few decoders such as utf-16 for which the state value
2408 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2409 utf-16, that we are expecting a BOM).
2410 */
2411 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2412 res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
2413 else
2414 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2415 "((yi))", "", cookie->dec_flags);
2416 if (res == NULL)
2417 return -1;
2418 Py_DECREF(res);
2419 return 0;
2420 }
2421
2422 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2423 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2424 {
2425 PyObject *res;
2426 if (start_of_stream) {
2427 res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
2428 self->encoding_start_of_stream = 1;
2429 }
2430 else {
2431 res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
2432 _PyLong_Zero);
2433 self->encoding_start_of_stream = 0;
2434 }
2435 if (res == NULL)
2436 return -1;
2437 Py_DECREF(res);
2438 return 0;
2439 }
2440
2441 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2442 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2443 {
2444 /* Same as _textiowrapper_decoder_setstate() above. */
2445 return _textiowrapper_encoder_reset(
2446 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2447 }
2448
2449 /*[clinic input]
2450 _io.TextIOWrapper.seek
2451 cookie as cookieObj: object
2452 whence: int = 0
2453 /
2454 [clinic start generated code]*/
2455
2456 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2457 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2458 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2459 {
2460 PyObject *posobj;
2461 cookie_type cookie;
2462 PyObject *res;
2463 int cmp;
2464 PyObject *snapshot;
2465
2466 CHECK_ATTACHED(self);
2467 CHECK_CLOSED(self);
2468
2469 Py_INCREF(cookieObj);
2470
2471 if (!self->seekable) {
2472 _unsupported("underlying stream is not seekable");
2473 goto fail;
2474 }
2475
2476 switch (whence) {
2477 case SEEK_CUR:
2478 /* seek relative to current position */
2479 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2480 if (cmp < 0)
2481 goto fail;
2482
2483 if (cmp == 0) {
2484 _unsupported("can't do nonzero cur-relative seeks");
2485 goto fail;
2486 }
2487
2488 /* Seeking to the current position should attempt to
2489 * sync the underlying buffer with the current position.
2490 */
2491 Py_DECREF(cookieObj);
2492 cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
2493 if (cookieObj == NULL)
2494 goto fail;
2495 break;
2496
2497 case SEEK_END:
2498 /* seek relative to end of file */
2499 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2500 if (cmp < 0)
2501 goto fail;
2502
2503 if (cmp == 0) {
2504 _unsupported("can't do nonzero end-relative seeks");
2505 goto fail;
2506 }
2507
2508 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
2509 if (res == NULL)
2510 goto fail;
2511 Py_DECREF(res);
2512
2513 textiowrapper_set_decoded_chars(self, NULL);
2514 Py_CLEAR(self->snapshot);
2515 if (self->decoder) {
2516 res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
2517 if (res == NULL)
2518 goto fail;
2519 Py_DECREF(res);
2520 }
2521
2522 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2523 Py_CLEAR(cookieObj);
2524 if (res == NULL)
2525 goto fail;
2526 if (self->encoder) {
2527 /* If seek() == 0, we are at the start of stream, otherwise not */
2528 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
2529 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2530 Py_DECREF(res);
2531 goto fail;
2532 }
2533 }
2534 return res;
2535
2536 case SEEK_SET:
2537 break;
2538
2539 default:
2540 PyErr_Format(PyExc_ValueError,
2541 "invalid whence (%d, should be %d, %d or %d)", whence,
2542 SEEK_SET, SEEK_CUR, SEEK_END);
2543 goto fail;
2544 }
2545
2546 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
2547 if (cmp < 0)
2548 goto fail;
2549
2550 if (cmp == 1) {
2551 PyErr_Format(PyExc_ValueError,
2552 "negative seek position %R", cookieObj);
2553 goto fail;
2554 }
2555
2556 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
2557 if (res == NULL)
2558 goto fail;
2559 Py_DECREF(res);
2560
2561 /* The strategy of seek() is to go back to the safe start point
2562 * and replay the effect of read(chars_to_skip) from there.
2563 */
2564 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2565 goto fail;
2566
2567 /* Seek back to the safe start point. */
2568 posobj = PyLong_FromOff_t(cookie.start_pos);
2569 if (posobj == NULL)
2570 goto fail;
2571 res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
2572 Py_DECREF(posobj);
2573 if (res == NULL)
2574 goto fail;
2575 Py_DECREF(res);
2576
2577 textiowrapper_set_decoded_chars(self, NULL);
2578 Py_CLEAR(self->snapshot);
2579
2580 /* Restore the decoder to its state from the safe start point. */
2581 if (self->decoder) {
2582 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2583 goto fail;
2584 }
2585
2586 if (cookie.chars_to_skip) {
2587 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2588 PyObject *input_chunk = _PyObject_CallMethodId(
2589 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2590 PyObject *decoded;
2591
2592 if (input_chunk == NULL)
2593 goto fail;
2594
2595 if (!PyBytes_Check(input_chunk)) {
2596 PyErr_Format(PyExc_TypeError,
2597 "underlying read() should have returned a bytes "
2598 "object, not '%.200s'",
2599 Py_TYPE(input_chunk)->tp_name);
2600 Py_DECREF(input_chunk);
2601 goto fail;
2602 }
2603
2604 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2605 if (snapshot == NULL) {
2606 goto fail;
2607 }
2608 Py_XSETREF(self->snapshot, snapshot);
2609
2610 decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
2611 input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2612
2613 if (check_decoded(decoded) < 0)
2614 goto fail;
2615
2616 textiowrapper_set_decoded_chars(self, decoded);
2617
2618 /* Skip chars_to_skip of the decoded characters. */
2619 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2620 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2621 goto fail;
2622 }
2623 self->decoded_chars_used = cookie.chars_to_skip;
2624 }
2625 else {
2626 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2627 if (snapshot == NULL)
2628 goto fail;
2629 Py_XSETREF(self->snapshot, snapshot);
2630 }
2631
2632 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2633 if (self->encoder) {
2634 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2635 goto fail;
2636 }
2637 return cookieObj;
2638 fail:
2639 Py_XDECREF(cookieObj);
2640 return NULL;
2641
2642 }
2643
2644 /*[clinic input]
2645 _io.TextIOWrapper.tell
2646 [clinic start generated code]*/
2647
2648 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2649 _io_TextIOWrapper_tell_impl(textio *self)
2650 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2651 {
2652 PyObject *res;
2653 PyObject *posobj = NULL;
2654 cookie_type cookie = {0,0,0,0,0};
2655 PyObject *next_input;
2656 Py_ssize_t chars_to_skip, chars_decoded;
2657 Py_ssize_t skip_bytes, skip_back;
2658 PyObject *saved_state = NULL;
2659 const char *input, *input_end;
2660 Py_ssize_t dec_buffer_len;
2661 int dec_flags;
2662
2663 CHECK_ATTACHED(self);
2664 CHECK_CLOSED(self);
2665
2666 if (!self->seekable) {
2667 _unsupported("underlying stream is not seekable");
2668 goto fail;
2669 }
2670 if (!self->telling) {
2671 PyErr_SetString(PyExc_OSError,
2672 "telling position disabled by next() call");
2673 goto fail;
2674 }
2675
2676 if (_textiowrapper_writeflush(self) < 0)
2677 return NULL;
2678 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
2679 if (res == NULL)
2680 goto fail;
2681 Py_DECREF(res);
2682
2683 posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
2684 if (posobj == NULL)
2685 goto fail;
2686
2687 if (self->decoder == NULL || self->snapshot == NULL) {
2688 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2689 return posobj;
2690 }
2691
2692 #if defined(HAVE_LARGEFILE_SUPPORT)
2693 cookie.start_pos = PyLong_AsLongLong(posobj);
2694 #else
2695 cookie.start_pos = PyLong_AsLong(posobj);
2696 #endif
2697 Py_DECREF(posobj);
2698 if (PyErr_Occurred())
2699 goto fail;
2700
2701 /* Skip backward to the snapshot point (see _read_chunk). */
2702 assert(PyTuple_Check(self->snapshot));
2703 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2704 goto fail;
2705
2706 assert (PyBytes_Check(next_input));
2707
2708 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2709
2710 /* How many decoded characters have been used up since the snapshot? */
2711 if (self->decoded_chars_used == 0) {
2712 /* We haven't moved from the snapshot point. */
2713 return textiowrapper_build_cookie(&cookie);
2714 }
2715
2716 chars_to_skip = self->decoded_chars_used;
2717
2718 /* Decoder state will be restored at the end */
2719 saved_state = PyObject_CallMethodNoArgs(self->decoder,
2720 _PyIO_str_getstate);
2721 if (saved_state == NULL)
2722 goto fail;
2723
2724 #define DECODER_GETSTATE() do { \
2725 PyObject *dec_buffer; \
2726 PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2727 _PyIO_str_getstate); \
2728 if (_state == NULL) \
2729 goto fail; \
2730 if (!PyTuple_Check(_state)) { \
2731 PyErr_SetString(PyExc_TypeError, \
2732 "illegal decoder state"); \
2733 Py_DECREF(_state); \
2734 goto fail; \
2735 } \
2736 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2737 &dec_buffer, &dec_flags)) \
2738 { \
2739 Py_DECREF(_state); \
2740 goto fail; \
2741 } \
2742 if (!PyBytes_Check(dec_buffer)) { \
2743 PyErr_Format(PyExc_TypeError, \
2744 "illegal decoder state: the first item should be a " \
2745 "bytes object, not '%.200s'", \
2746 Py_TYPE(dec_buffer)->tp_name); \
2747 Py_DECREF(_state); \
2748 goto fail; \
2749 } \
2750 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2751 Py_DECREF(_state); \
2752 } while (0)
2753
2754 #define DECODER_DECODE(start, len, res) do { \
2755 PyObject *_decoded = _PyObject_CallMethodId( \
2756 self->decoder, &PyId_decode, "y#", start, len); \
2757 if (check_decoded(_decoded) < 0) \
2758 goto fail; \
2759 res = PyUnicode_GET_LENGTH(_decoded); \
2760 Py_DECREF(_decoded); \
2761 } while (0)
2762
2763 /* Fast search for an acceptable start point, close to our
2764 current pos */
2765 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2766 skip_back = 1;
2767 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2768 input = PyBytes_AS_STRING(next_input);
2769 while (skip_bytes > 0) {
2770 /* Decode up to temptative start point */
2771 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2772 goto fail;
2773 DECODER_DECODE(input, skip_bytes, chars_decoded);
2774 if (chars_decoded <= chars_to_skip) {
2775 DECODER_GETSTATE();
2776 if (dec_buffer_len == 0) {
2777 /* Before pos and no bytes buffered in decoder => OK */
2778 cookie.dec_flags = dec_flags;
2779 chars_to_skip -= chars_decoded;
2780 break;
2781 }
2782 /* Skip back by buffered amount and reset heuristic */
2783 skip_bytes -= dec_buffer_len;
2784 skip_back = 1;
2785 }
2786 else {
2787 /* We're too far ahead, skip back a bit */
2788 skip_bytes -= skip_back;
2789 skip_back *= 2;
2790 }
2791 }
2792 if (skip_bytes <= 0) {
2793 skip_bytes = 0;
2794 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2795 goto fail;
2796 }
2797
2798 /* Note our initial start point. */
2799 cookie.start_pos += skip_bytes;
2800 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2801 if (chars_to_skip == 0)
2802 goto finally;
2803
2804 /* We should be close to the desired position. Now feed the decoder one
2805 * byte at a time until we reach the `chars_to_skip` target.
2806 * As we go, note the nearest "safe start point" before the current
2807 * location (a point where the decoder has nothing buffered, so seek()
2808 * can safely start from there and advance to this location).
2809 */
2810 chars_decoded = 0;
2811 input = PyBytes_AS_STRING(next_input);
2812 input_end = input + PyBytes_GET_SIZE(next_input);
2813 input += skip_bytes;
2814 while (input < input_end) {
2815 Py_ssize_t n;
2816
2817 DECODER_DECODE(input, (Py_ssize_t)1, n);
2818 /* We got n chars for 1 byte */
2819 chars_decoded += n;
2820 cookie.bytes_to_feed += 1;
2821 DECODER_GETSTATE();
2822
2823 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2824 /* Decoder buffer is empty, so this is a safe start point. */
2825 cookie.start_pos += cookie.bytes_to_feed;
2826 chars_to_skip -= chars_decoded;
2827 cookie.dec_flags = dec_flags;
2828 cookie.bytes_to_feed = 0;
2829 chars_decoded = 0;
2830 }
2831 if (chars_decoded >= chars_to_skip)
2832 break;
2833 input++;
2834 }
2835 if (input == input_end) {
2836 /* We didn't get enough decoded data; signal EOF to get more. */
2837 PyObject *decoded = _PyObject_CallMethodId(
2838 self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
2839 if (check_decoded(decoded) < 0)
2840 goto fail;
2841 chars_decoded += PyUnicode_GET_LENGTH(decoded);
2842 Py_DECREF(decoded);
2843 cookie.need_eof = 1;
2844
2845 if (chars_decoded < chars_to_skip) {
2846 PyErr_SetString(PyExc_OSError,
2847 "can't reconstruct logical file position");
2848 goto fail;
2849 }
2850 }
2851
2852 finally:
2853 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
2854 Py_DECREF(saved_state);
2855 if (res == NULL)
2856 return NULL;
2857 Py_DECREF(res);
2858
2859 /* The returned cookie corresponds to the last safe start point. */
2860 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2861 return textiowrapper_build_cookie(&cookie);
2862
2863 fail:
2864 if (saved_state) {
2865 PyObject *type, *value, *traceback;
2866 PyErr_Fetch(&type, &value, &traceback);
2867 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
2868 _PyErr_ChainExceptions(type, value, traceback);
2869 Py_DECREF(saved_state);
2870 Py_XDECREF(res);
2871 }
2872 return NULL;
2873 }
2874
2875 /*[clinic input]
2876 _io.TextIOWrapper.truncate
2877 pos: object = None
2878 /
2879 [clinic start generated code]*/
2880
2881 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2882 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2883 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2884 {
2885 PyObject *res;
2886
2887 CHECK_ATTACHED(self)
2888
2889 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
2890 if (res == NULL)
2891 return NULL;
2892 Py_DECREF(res);
2893
2894 return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
2895 }
2896
2897 static PyObject *
textiowrapper_repr(textio * self)2898 textiowrapper_repr(textio *self)
2899 {
2900 PyObject *nameobj, *modeobj, *res, *s;
2901 int status;
2902
2903 CHECK_INITIALIZED(self);
2904
2905 res = PyUnicode_FromString("<_io.TextIOWrapper");
2906 if (res == NULL)
2907 return NULL;
2908
2909 status = Py_ReprEnter((PyObject *)self);
2910 if (status != 0) {
2911 if (status > 0) {
2912 PyErr_Format(PyExc_RuntimeError,
2913 "reentrant call inside %s.__repr__",
2914 Py_TYPE(self)->tp_name);
2915 }
2916 goto error;
2917 }
2918 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2919 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2920 goto error;
2921 }
2922 /* Ignore ValueError raised if the underlying stream was detached */
2923 PyErr_Clear();
2924 }
2925 if (nameobj != NULL) {
2926 s = PyUnicode_FromFormat(" name=%R", nameobj);
2927 Py_DECREF(nameobj);
2928 if (s == NULL)
2929 goto error;
2930 PyUnicode_AppendAndDel(&res, s);
2931 if (res == NULL)
2932 goto error;
2933 }
2934 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2935 goto error;
2936 }
2937 if (modeobj != NULL) {
2938 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2939 Py_DECREF(modeobj);
2940 if (s == NULL)
2941 goto error;
2942 PyUnicode_AppendAndDel(&res, s);
2943 if (res == NULL)
2944 goto error;
2945 }
2946 s = PyUnicode_FromFormat("%U encoding=%R>",
2947 res, self->encoding);
2948 Py_DECREF(res);
2949 if (status == 0) {
2950 Py_ReprLeave((PyObject *)self);
2951 }
2952 return s;
2953
2954 error:
2955 Py_XDECREF(res);
2956 if (status == 0) {
2957 Py_ReprLeave((PyObject *)self);
2958 }
2959 return NULL;
2960 }
2961
2962
2963 /* Inquiries */
2964
2965 /*[clinic input]
2966 _io.TextIOWrapper.fileno
2967 [clinic start generated code]*/
2968
2969 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2970 _io_TextIOWrapper_fileno_impl(textio *self)
2971 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2972 {
2973 CHECK_ATTACHED(self);
2974 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
2975 }
2976
2977 /*[clinic input]
2978 _io.TextIOWrapper.seekable
2979 [clinic start generated code]*/
2980
2981 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2982 _io_TextIOWrapper_seekable_impl(textio *self)
2983 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2984 {
2985 CHECK_ATTACHED(self);
2986 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
2987 }
2988
2989 /*[clinic input]
2990 _io.TextIOWrapper.readable
2991 [clinic start generated code]*/
2992
2993 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2994 _io_TextIOWrapper_readable_impl(textio *self)
2995 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2996 {
2997 CHECK_ATTACHED(self);
2998 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
2999 }
3000
3001 /*[clinic input]
3002 _io.TextIOWrapper.writable
3003 [clinic start generated code]*/
3004
3005 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)3006 _io_TextIOWrapper_writable_impl(textio *self)
3007 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
3008 {
3009 CHECK_ATTACHED(self);
3010 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
3011 }
3012
3013 /*[clinic input]
3014 _io.TextIOWrapper.isatty
3015 [clinic start generated code]*/
3016
3017 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)3018 _io_TextIOWrapper_isatty_impl(textio *self)
3019 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
3020 {
3021 CHECK_ATTACHED(self);
3022 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
3023 }
3024
3025 /*[clinic input]
3026 _io.TextIOWrapper.flush
3027 [clinic start generated code]*/
3028
3029 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)3030 _io_TextIOWrapper_flush_impl(textio *self)
3031 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
3032 {
3033 CHECK_ATTACHED(self);
3034 CHECK_CLOSED(self);
3035 self->telling = self->seekable;
3036 if (_textiowrapper_writeflush(self) < 0)
3037 return NULL;
3038 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
3039 }
3040
3041 /*[clinic input]
3042 _io.TextIOWrapper.close
3043 [clinic start generated code]*/
3044
3045 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)3046 _io_TextIOWrapper_close_impl(textio *self)
3047 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
3048 {
3049 PyObject *res;
3050 int r;
3051 CHECK_ATTACHED(self);
3052
3053 res = textiowrapper_closed_get(self, NULL);
3054 if (res == NULL)
3055 return NULL;
3056 r = PyObject_IsTrue(res);
3057 Py_DECREF(res);
3058 if (r < 0)
3059 return NULL;
3060
3061 if (r > 0) {
3062 Py_RETURN_NONE; /* stream already closed */
3063 }
3064 else {
3065 PyObject *exc = NULL, *val, *tb;
3066 if (self->finalizing) {
3067 res = _PyObject_CallMethodIdOneArg(self->buffer,
3068 &PyId__dealloc_warn,
3069 (PyObject *)self);
3070 if (res)
3071 Py_DECREF(res);
3072 else
3073 PyErr_Clear();
3074 }
3075 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
3076 if (res == NULL)
3077 PyErr_Fetch(&exc, &val, &tb);
3078 else
3079 Py_DECREF(res);
3080
3081 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
3082 if (exc != NULL) {
3083 _PyErr_ChainExceptions(exc, val, tb);
3084 Py_CLEAR(res);
3085 }
3086 return res;
3087 }
3088 }
3089
3090 static PyObject *
textiowrapper_iternext(textio * self)3091 textiowrapper_iternext(textio *self)
3092 {
3093 PyObject *line;
3094
3095 CHECK_ATTACHED(self);
3096
3097 self->telling = 0;
3098 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
3099 /* Skip method call overhead for speed */
3100 line = _textiowrapper_readline(self, -1);
3101 }
3102 else {
3103 line = PyObject_CallMethodNoArgs((PyObject *)self,
3104 _PyIO_str_readline);
3105 if (line && !PyUnicode_Check(line)) {
3106 PyErr_Format(PyExc_OSError,
3107 "readline() should have returned a str object, "
3108 "not '%.200s'", Py_TYPE(line)->tp_name);
3109 Py_DECREF(line);
3110 return NULL;
3111 }
3112 }
3113
3114 if (line == NULL || PyUnicode_READY(line) == -1)
3115 return NULL;
3116
3117 if (PyUnicode_GET_LENGTH(line) == 0) {
3118 /* Reached EOF or would have blocked */
3119 Py_DECREF(line);
3120 Py_CLEAR(self->snapshot);
3121 self->telling = self->seekable;
3122 return NULL;
3123 }
3124
3125 return line;
3126 }
3127
3128 static PyObject *
textiowrapper_name_get(textio * self,void * context)3129 textiowrapper_name_get(textio *self, void *context)
3130 {
3131 CHECK_ATTACHED(self);
3132 return _PyObject_GetAttrId(self->buffer, &PyId_name);
3133 }
3134
3135 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3136 textiowrapper_closed_get(textio *self, void *context)
3137 {
3138 CHECK_ATTACHED(self);
3139 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3140 }
3141
3142 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3143 textiowrapper_newlines_get(textio *self, void *context)
3144 {
3145 PyObject *res;
3146 CHECK_ATTACHED(self);
3147 if (self->decoder == NULL ||
3148 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3149 {
3150 Py_RETURN_NONE;
3151 }
3152 return res;
3153 }
3154
3155 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3156 textiowrapper_errors_get(textio *self, void *context)
3157 {
3158 CHECK_INITIALIZED(self);
3159 Py_INCREF(self->errors);
3160 return self->errors;
3161 }
3162
3163 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3164 textiowrapper_chunk_size_get(textio *self, void *context)
3165 {
3166 CHECK_ATTACHED(self);
3167 return PyLong_FromSsize_t(self->chunk_size);
3168 }
3169
3170 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3171 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3172 {
3173 Py_ssize_t n;
3174 CHECK_ATTACHED_INT(self);
3175 if (arg == NULL) {
3176 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3177 return -1;
3178 }
3179 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3180 if (n == -1 && PyErr_Occurred())
3181 return -1;
3182 if (n <= 0) {
3183 PyErr_SetString(PyExc_ValueError,
3184 "a strictly positive integer is required");
3185 return -1;
3186 }
3187 self->chunk_size = n;
3188 return 0;
3189 }
3190
3191 #include "clinic/textio.c.h"
3192
3193 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3194 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3195 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3196 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3197 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3198 {NULL}
3199 };
3200
3201 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3202 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3203 {NULL}
3204 };
3205
3206 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3207 PyVarObject_HEAD_INIT(NULL, 0)
3208 "_io.IncrementalNewlineDecoder", /*tp_name*/
3209 sizeof(nldecoder_object), /*tp_basicsize*/
3210 0, /*tp_itemsize*/
3211 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3212 0, /*tp_vectorcall_offset*/
3213 0, /*tp_getattr*/
3214 0, /*tp_setattr*/
3215 0, /*tp_as_async*/
3216 0, /*tp_repr*/
3217 0, /*tp_as_number*/
3218 0, /*tp_as_sequence*/
3219 0, /*tp_as_mapping*/
3220 0, /*tp_hash */
3221 0, /*tp_call*/
3222 0, /*tp_str*/
3223 0, /*tp_getattro*/
3224 0, /*tp_setattro*/
3225 0, /*tp_as_buffer*/
3226 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3227 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3228 0, /* tp_traverse */
3229 0, /* tp_clear */
3230 0, /* tp_richcompare */
3231 0, /*tp_weaklistoffset*/
3232 0, /* tp_iter */
3233 0, /* tp_iternext */
3234 incrementalnewlinedecoder_methods, /* tp_methods */
3235 0, /* tp_members */
3236 incrementalnewlinedecoder_getset, /* tp_getset */
3237 0, /* tp_base */
3238 0, /* tp_dict */
3239 0, /* tp_descr_get */
3240 0, /* tp_descr_set */
3241 0, /* tp_dictoffset */
3242 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3243 0, /* tp_alloc */
3244 PyType_GenericNew, /* tp_new */
3245 };
3246
3247
3248 static PyMethodDef textiowrapper_methods[] = {
3249 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3250 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3251 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3252 _IO_TEXTIOWRAPPER_READ_METHODDEF
3253 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3254 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3255 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3256
3257 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3258 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3259 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3260 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3261 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3262
3263 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3264 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3265 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3266 {NULL, NULL}
3267 };
3268
3269 static PyMemberDef textiowrapper_members[] = {
3270 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3271 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3272 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3273 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3274 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3275 {NULL}
3276 };
3277
3278 static PyGetSetDef textiowrapper_getset[] = {
3279 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3280 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3281 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3282 */
3283 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3284 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3285 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3286 (setter)textiowrapper_chunk_size_set, NULL},
3287 {NULL}
3288 };
3289
3290 PyTypeObject PyTextIOWrapper_Type = {
3291 PyVarObject_HEAD_INIT(NULL, 0)
3292 "_io.TextIOWrapper", /*tp_name*/
3293 sizeof(textio), /*tp_basicsize*/
3294 0, /*tp_itemsize*/
3295 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3296 0, /*tp_vectorcall_offset*/
3297 0, /*tp_getattr*/
3298 0, /*tps_etattr*/
3299 0, /*tp_as_async*/
3300 (reprfunc)textiowrapper_repr,/*tp_repr*/
3301 0, /*tp_as_number*/
3302 0, /*tp_as_sequence*/
3303 0, /*tp_as_mapping*/
3304 0, /*tp_hash */
3305 0, /*tp_call*/
3306 0, /*tp_str*/
3307 0, /*tp_getattro*/
3308 0, /*tp_setattro*/
3309 0, /*tp_as_buffer*/
3310 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3311 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
3312 _io_TextIOWrapper___init____doc__, /* tp_doc */
3313 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3314 (inquiry)textiowrapper_clear, /* tp_clear */
3315 0, /* tp_richcompare */
3316 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3317 0, /* tp_iter */
3318 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3319 textiowrapper_methods, /* tp_methods */
3320 textiowrapper_members, /* tp_members */
3321 textiowrapper_getset, /* tp_getset */
3322 0, /* tp_base */
3323 0, /* tp_dict */
3324 0, /* tp_descr_get */
3325 0, /* tp_descr_set */
3326 offsetof(textio, dict), /*tp_dictoffset*/
3327 _io_TextIOWrapper___init__, /* tp_init */
3328 0, /* tp_alloc */
3329 PyType_GenericNew, /* tp_new */
3330 0, /* tp_free */
3331 0, /* tp_is_gc */
3332 0, /* tp_bases */
3333 0, /* tp_mro */
3334 0, /* tp_cache */
3335 0, /* tp_subclasses */
3336 0, /* tp_weaklist */
3337 0, /* tp_del */
3338 0, /* tp_version_tag */
3339 0, /* tp_finalize */
3340 };
3341