1 /*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "pycore_object.h"
12 #include "structmember.h"
13 #include "_iomodule.h"
14
15 /*[clinic input]
16 module _io
17 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
18 class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
21
22 _Py_IDENTIFIER(close);
23 _Py_IDENTIFIER(_dealloc_warn);
24 _Py_IDENTIFIER(decode);
25 _Py_IDENTIFIER(fileno);
26 _Py_IDENTIFIER(flush);
27 _Py_IDENTIFIER(getpreferredencoding);
28 _Py_IDENTIFIER(isatty);
29 _Py_IDENTIFIER(mode);
30 _Py_IDENTIFIER(name);
31 _Py_IDENTIFIER(raw);
32 _Py_IDENTIFIER(read);
33 _Py_IDENTIFIER(readable);
34 _Py_IDENTIFIER(replace);
35 _Py_IDENTIFIER(reset);
36 _Py_IDENTIFIER(seek);
37 _Py_IDENTIFIER(seekable);
38 _Py_IDENTIFIER(setstate);
39 _Py_IDENTIFIER(strict);
40 _Py_IDENTIFIER(tell);
41 _Py_IDENTIFIER(writable);
42
43 /* TextIOBase */
44
45 PyDoc_STRVAR(textiobase_doc,
46 "Base class for text I/O.\n"
47 "\n"
48 "This class provides a character and line based interface to stream\n"
49 "I/O. There is no readinto method because Python's character strings\n"
50 "are immutable. There is no public constructor.\n"
51 );
52
53 static PyObject *
_unsupported(const char * message)54 _unsupported(const char *message)
55 {
56 _PyIO_State *state = IO_STATE();
57 if (state != NULL)
58 PyErr_SetString(state->unsupported_operation, message);
59 return NULL;
60 }
61
62 PyDoc_STRVAR(textiobase_detach_doc,
63 "Separate the underlying buffer from the TextIOBase and return it.\n"
64 "\n"
65 "After the underlying buffer has been detached, the TextIO is in an\n"
66 "unusable state.\n"
67 );
68
69 static PyObject *
textiobase_detach(PyObject * self,PyObject * Py_UNUSED (ignored))70 textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
71 {
72 return _unsupported("detach");
73 }
74
75 PyDoc_STRVAR(textiobase_read_doc,
76 "Read at most n characters from stream.\n"
77 "\n"
78 "Read from underlying buffer until we have n characters or we hit EOF.\n"
79 "If n is negative or omitted, read until EOF.\n"
80 );
81
82 static PyObject *
textiobase_read(PyObject * self,PyObject * args)83 textiobase_read(PyObject *self, PyObject *args)
84 {
85 return _unsupported("read");
86 }
87
88 PyDoc_STRVAR(textiobase_readline_doc,
89 "Read until newline or EOF.\n"
90 "\n"
91 "Returns an empty string if EOF is hit immediately.\n"
92 );
93
94 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)95 textiobase_readline(PyObject *self, PyObject *args)
96 {
97 return _unsupported("readline");
98 }
99
100 PyDoc_STRVAR(textiobase_write_doc,
101 "Write string to stream.\n"
102 "Returns the number of characters written (which is always equal to\n"
103 "the length of the string).\n"
104 );
105
106 static PyObject *
textiobase_write(PyObject * self,PyObject * args)107 textiobase_write(PyObject *self, PyObject *args)
108 {
109 return _unsupported("write");
110 }
111
112 PyDoc_STRVAR(textiobase_encoding_doc,
113 "Encoding of the text stream.\n"
114 "\n"
115 "Subclasses should override.\n"
116 );
117
118 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)119 textiobase_encoding_get(PyObject *self, void *context)
120 {
121 Py_RETURN_NONE;
122 }
123
124 PyDoc_STRVAR(textiobase_newlines_doc,
125 "Line endings translated so far.\n"
126 "\n"
127 "Only line endings translated during reading are considered.\n"
128 "\n"
129 "Subclasses should override.\n"
130 );
131
132 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)133 textiobase_newlines_get(PyObject *self, void *context)
134 {
135 Py_RETURN_NONE;
136 }
137
138 PyDoc_STRVAR(textiobase_errors_doc,
139 "The error setting of the decoder or encoder.\n"
140 "\n"
141 "Subclasses should override.\n"
142 );
143
144 static PyObject *
textiobase_errors_get(PyObject * self,void * context)145 textiobase_errors_get(PyObject *self, void *context)
146 {
147 Py_RETURN_NONE;
148 }
149
150
151 static PyMethodDef textiobase_methods[] = {
152 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
153 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
154 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
155 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
156 {NULL, NULL}
157 };
158
159 static PyGetSetDef textiobase_getset[] = {
160 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
161 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
162 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
163 {NULL}
164 };
165
166 PyTypeObject PyTextIOBase_Type = {
167 PyVarObject_HEAD_INIT(NULL, 0)
168 "_io._TextIOBase", /*tp_name*/
169 0, /*tp_basicsize*/
170 0, /*tp_itemsize*/
171 0, /*tp_dealloc*/
172 0, /*tp_vectorcall_offset*/
173 0, /*tp_getattr*/
174 0, /*tp_setattr*/
175 0, /*tp_as_async*/
176 0, /*tp_repr*/
177 0, /*tp_as_number*/
178 0, /*tp_as_sequence*/
179 0, /*tp_as_mapping*/
180 0, /*tp_hash */
181 0, /*tp_call*/
182 0, /*tp_str*/
183 0, /*tp_getattro*/
184 0, /*tp_setattro*/
185 0, /*tp_as_buffer*/
186 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
187 textiobase_doc, /* tp_doc */
188 0, /* tp_traverse */
189 0, /* tp_clear */
190 0, /* tp_richcompare */
191 0, /* tp_weaklistoffset */
192 0, /* tp_iter */
193 0, /* tp_iternext */
194 textiobase_methods, /* tp_methods */
195 0, /* tp_members */
196 textiobase_getset, /* tp_getset */
197 &PyIOBase_Type, /* tp_base */
198 0, /* tp_dict */
199 0, /* tp_descr_get */
200 0, /* tp_descr_set */
201 0, /* tp_dictoffset */
202 0, /* tp_init */
203 0, /* tp_alloc */
204 0, /* tp_new */
205 0, /* tp_free */
206 0, /* tp_is_gc */
207 0, /* tp_bases */
208 0, /* tp_mro */
209 0, /* tp_cache */
210 0, /* tp_subclasses */
211 0, /* tp_weaklist */
212 0, /* tp_del */
213 0, /* tp_version_tag */
214 0, /* tp_finalize */
215 };
216
217
218 /* IncrementalNewlineDecoder */
219
220 typedef struct {
221 PyObject_HEAD
222 PyObject *decoder;
223 PyObject *errors;
224 unsigned int pendingcr: 1;
225 unsigned int translate: 1;
226 unsigned int seennl: 3;
227 } nldecoder_object;
228
229 /*[clinic input]
230 _io.IncrementalNewlineDecoder.__init__
231 decoder: object
232 translate: int
233 errors: object(c_default="NULL") = "strict"
234
235 Codec used when reading a file in universal newlines mode.
236
237 It wraps another incremental decoder, translating \r\n and \r into \n.
238 It also records the types of newlines encountered. When used with
239 translate=False, it ensures that the newline sequence is returned in
240 one piece. When used with decoder=None, it expects unicode strings as
241 decode input and translates newlines without first invoking an external
242 decoder.
243 [clinic start generated code]*/
244
245 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)246 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
247 PyObject *decoder, int translate,
248 PyObject *errors)
249 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
250 {
251 self->decoder = decoder;
252 Py_INCREF(decoder);
253
254 if (errors == NULL) {
255 self->errors = _PyUnicode_FromId(&PyId_strict);
256 if (self->errors == NULL)
257 return -1;
258 }
259 else {
260 self->errors = errors;
261 }
262 Py_INCREF(self->errors);
263
264 self->translate = translate ? 1 : 0;
265 self->seennl = 0;
266 self->pendingcr = 0;
267
268 return 0;
269 }
270
271 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)272 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
273 {
274 Py_CLEAR(self->decoder);
275 Py_CLEAR(self->errors);
276 Py_TYPE(self)->tp_free((PyObject *)self);
277 }
278
279 static int
check_decoded(PyObject * decoded)280 check_decoded(PyObject *decoded)
281 {
282 if (decoded == NULL)
283 return -1;
284 if (!PyUnicode_Check(decoded)) {
285 PyErr_Format(PyExc_TypeError,
286 "decoder should return a string result, not '%.200s'",
287 Py_TYPE(decoded)->tp_name);
288 Py_DECREF(decoded);
289 return -1;
290 }
291 if (PyUnicode_READY(decoded) < 0) {
292 Py_DECREF(decoded);
293 return -1;
294 }
295 return 0;
296 }
297
298 #define SEEN_CR 1
299 #define SEEN_LF 2
300 #define SEEN_CRLF 4
301 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
302
303 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)304 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
305 PyObject *input, int final)
306 {
307 PyObject *output;
308 Py_ssize_t output_len;
309 nldecoder_object *self = (nldecoder_object *) myself;
310
311 if (self->decoder == NULL) {
312 PyErr_SetString(PyExc_ValueError,
313 "IncrementalNewlineDecoder.__init__ not called");
314 return NULL;
315 }
316
317 /* decode input (with the eventual \r from a previous pass) */
318 if (self->decoder != Py_None) {
319 output = PyObject_CallMethodObjArgs(self->decoder,
320 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
321 }
322 else {
323 output = input;
324 Py_INCREF(output);
325 }
326
327 if (check_decoded(output) < 0)
328 return NULL;
329
330 output_len = PyUnicode_GET_LENGTH(output);
331 if (self->pendingcr && (final || output_len > 0)) {
332 /* Prefix output with CR */
333 int kind;
334 PyObject *modified;
335 char *out;
336
337 modified = PyUnicode_New(output_len + 1,
338 PyUnicode_MAX_CHAR_VALUE(output));
339 if (modified == NULL)
340 goto error;
341 kind = PyUnicode_KIND(modified);
342 out = PyUnicode_DATA(modified);
343 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
344 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
345 Py_DECREF(output);
346 output = modified; /* output remains ready */
347 self->pendingcr = 0;
348 output_len++;
349 }
350
351 /* retain last \r even when not translating data:
352 * then readline() is sure to get \r\n in one pass
353 */
354 if (!final) {
355 if (output_len > 0
356 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
357 {
358 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
359 if (modified == NULL)
360 goto error;
361 Py_DECREF(output);
362 output = modified;
363 self->pendingcr = 1;
364 }
365 }
366
367 /* Record which newlines are read and do newline translation if desired,
368 all in one pass. */
369 {
370 void *in_str;
371 Py_ssize_t len;
372 int seennl = self->seennl;
373 int only_lf = 0;
374 int kind;
375
376 in_str = PyUnicode_DATA(output);
377 len = PyUnicode_GET_LENGTH(output);
378 kind = PyUnicode_KIND(output);
379
380 if (len == 0)
381 return output;
382
383 /* If, up to now, newlines are consistently \n, do a quick check
384 for the \r *byte* with the libc's optimized memchr.
385 */
386 if (seennl == SEEN_LF || seennl == 0) {
387 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
388 }
389
390 if (only_lf) {
391 /* If not already seen, quick scan for a possible "\n" character.
392 (there's nothing else to be done, even when in translation mode)
393 */
394 if (seennl == 0 &&
395 memchr(in_str, '\n', kind * len) != NULL) {
396 if (kind == PyUnicode_1BYTE_KIND)
397 seennl |= SEEN_LF;
398 else {
399 Py_ssize_t i = 0;
400 for (;;) {
401 Py_UCS4 c;
402 /* Fast loop for non-control characters */
403 while (PyUnicode_READ(kind, in_str, i) > '\n')
404 i++;
405 c = PyUnicode_READ(kind, in_str, i++);
406 if (c == '\n') {
407 seennl |= SEEN_LF;
408 break;
409 }
410 if (i >= len)
411 break;
412 }
413 }
414 }
415 /* Finished: we have scanned for newlines, and none of them
416 need translating */
417 }
418 else if (!self->translate) {
419 Py_ssize_t i = 0;
420 /* We have already seen all newline types, no need to scan again */
421 if (seennl == SEEN_ALL)
422 goto endscan;
423 for (;;) {
424 Py_UCS4 c;
425 /* Fast loop for non-control characters */
426 while (PyUnicode_READ(kind, in_str, i) > '\r')
427 i++;
428 c = PyUnicode_READ(kind, in_str, i++);
429 if (c == '\n')
430 seennl |= SEEN_LF;
431 else if (c == '\r') {
432 if (PyUnicode_READ(kind, in_str, i) == '\n') {
433 seennl |= SEEN_CRLF;
434 i++;
435 }
436 else
437 seennl |= SEEN_CR;
438 }
439 if (i >= len)
440 break;
441 if (seennl == SEEN_ALL)
442 break;
443 }
444 endscan:
445 ;
446 }
447 else {
448 void *translated;
449 int kind = PyUnicode_KIND(output);
450 void *in_str = PyUnicode_DATA(output);
451 Py_ssize_t in, out;
452 /* XXX: Previous in-place translation here is disabled as
453 resizing is not possible anymore */
454 /* We could try to optimize this so that we only do a copy
455 when there is something to translate. On the other hand,
456 we already know there is a \r byte, so chances are high
457 that something needs to be done. */
458 translated = PyMem_Malloc(kind * len);
459 if (translated == NULL) {
460 PyErr_NoMemory();
461 goto error;
462 }
463 in = out = 0;
464 for (;;) {
465 Py_UCS4 c;
466 /* Fast loop for non-control characters */
467 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
468 PyUnicode_WRITE(kind, translated, out++, c);
469 if (c == '\n') {
470 PyUnicode_WRITE(kind, translated, out++, c);
471 seennl |= SEEN_LF;
472 continue;
473 }
474 if (c == '\r') {
475 if (PyUnicode_READ(kind, in_str, in) == '\n') {
476 in++;
477 seennl |= SEEN_CRLF;
478 }
479 else
480 seennl |= SEEN_CR;
481 PyUnicode_WRITE(kind, translated, out++, '\n');
482 continue;
483 }
484 if (in > len)
485 break;
486 PyUnicode_WRITE(kind, translated, out++, c);
487 }
488 Py_DECREF(output);
489 output = PyUnicode_FromKindAndData(kind, translated, out);
490 PyMem_Free(translated);
491 if (!output)
492 return NULL;
493 }
494 self->seennl |= seennl;
495 }
496
497 return output;
498
499 error:
500 Py_DECREF(output);
501 return NULL;
502 }
503
504 /*[clinic input]
505 _io.IncrementalNewlineDecoder.decode
506 input: object
507 final: bool(accept={int}) = False
508 [clinic start generated code]*/
509
510 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)511 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
512 PyObject *input, int final)
513 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
514 {
515 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
516 }
517
518 /*[clinic input]
519 _io.IncrementalNewlineDecoder.getstate
520 [clinic start generated code]*/
521
522 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)523 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
524 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
525 {
526 PyObject *buffer;
527 unsigned long long flag;
528
529 if (self->decoder != Py_None) {
530 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
531 _PyIO_str_getstate, NULL);
532 if (state == NULL)
533 return NULL;
534 if (!PyTuple_Check(state)) {
535 PyErr_SetString(PyExc_TypeError,
536 "illegal decoder state");
537 Py_DECREF(state);
538 return NULL;
539 }
540 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
541 &buffer, &flag))
542 {
543 Py_DECREF(state);
544 return NULL;
545 }
546 Py_INCREF(buffer);
547 Py_DECREF(state);
548 }
549 else {
550 buffer = PyBytes_FromString("");
551 flag = 0;
552 }
553 flag <<= 1;
554 if (self->pendingcr)
555 flag |= 1;
556 return Py_BuildValue("NK", buffer, flag);
557 }
558
559 /*[clinic input]
560 _io.IncrementalNewlineDecoder.setstate
561 state: object
562 /
563 [clinic start generated code]*/
564
565 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)566 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
567 PyObject *state)
568 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
569 {
570 PyObject *buffer;
571 unsigned long long flag;
572
573 if (!PyTuple_Check(state)) {
574 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
575 return NULL;
576 }
577 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
578 &buffer, &flag))
579 {
580 return NULL;
581 }
582
583 self->pendingcr = (int) (flag & 1);
584 flag >>= 1;
585
586 if (self->decoder != Py_None)
587 return _PyObject_CallMethodId(self->decoder,
588 &PyId_setstate, "((OK))", buffer, flag);
589 else
590 Py_RETURN_NONE;
591 }
592
593 /*[clinic input]
594 _io.IncrementalNewlineDecoder.reset
595 [clinic start generated code]*/
596
597 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)598 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
599 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
600 {
601 self->seennl = 0;
602 self->pendingcr = 0;
603 if (self->decoder != Py_None)
604 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
605 else
606 Py_RETURN_NONE;
607 }
608
609 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)610 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
611 {
612 switch (self->seennl) {
613 case SEEN_CR:
614 return PyUnicode_FromString("\r");
615 case SEEN_LF:
616 return PyUnicode_FromString("\n");
617 case SEEN_CRLF:
618 return PyUnicode_FromString("\r\n");
619 case SEEN_CR | SEEN_LF:
620 return Py_BuildValue("ss", "\r", "\n");
621 case SEEN_CR | SEEN_CRLF:
622 return Py_BuildValue("ss", "\r", "\r\n");
623 case SEEN_LF | SEEN_CRLF:
624 return Py_BuildValue("ss", "\n", "\r\n");
625 case SEEN_CR | SEEN_LF | SEEN_CRLF:
626 return Py_BuildValue("sss", "\r", "\n", "\r\n");
627 default:
628 Py_RETURN_NONE;
629 }
630
631 }
632
633 /* TextIOWrapper */
634
635 typedef PyObject *
636 (*encodefunc_t)(PyObject *, PyObject *);
637
638 typedef struct
639 {
640 PyObject_HEAD
641 int ok; /* initialized? */
642 int detached;
643 Py_ssize_t chunk_size;
644 PyObject *buffer;
645 PyObject *encoding;
646 PyObject *encoder;
647 PyObject *decoder;
648 PyObject *readnl;
649 PyObject *errors;
650 const char *writenl; /* ASCII-encoded; NULL stands for \n */
651 char line_buffering;
652 char write_through;
653 char readuniversal;
654 char readtranslate;
655 char writetranslate;
656 char seekable;
657 char has_read1;
658 char telling;
659 char finalizing;
660 /* Specialized encoding func (see below) */
661 encodefunc_t encodefunc;
662 /* Whether or not it's the start of the stream */
663 char encoding_start_of_stream;
664
665 /* Reads and writes are internally buffered in order to speed things up.
666 However, any read will first flush the write buffer if itsn't empty.
667
668 Please also note that text to be written is first encoded before being
669 buffered. This is necessary so that encoding errors are immediately
670 reported to the caller, but it unfortunately means that the
671 IncrementalEncoder (whose encode() method is always written in Python)
672 becomes a bottleneck for small writes.
673 */
674 PyObject *decoded_chars; /* buffer for text returned from decoder */
675 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
676 PyObject *pending_bytes; // data waiting to be written.
677 // ascii unicode, bytes, or list of them.
678 Py_ssize_t pending_bytes_count;
679
680 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
681 * dec_flags is the second (integer) item of the decoder state and
682 * next_input is the chunk of input bytes that comes next after the
683 * snapshot point. We use this to reconstruct decoder states in tell().
684 */
685 PyObject *snapshot;
686 /* Bytes-to-characters ratio for the current chunk. Serves as input for
687 the heuristic in tell(). */
688 double b2cratio;
689
690 /* Cache raw object if it's a FileIO object */
691 PyObject *raw;
692
693 PyObject *weakreflist;
694 PyObject *dict;
695 } textio;
696
697 static void
698 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
699
700 /* A couple of specialized cases in order to bypass the slow incremental
701 encoding methods for the most popular encodings. */
702
703 static PyObject *
ascii_encode(textio * self,PyObject * text)704 ascii_encode(textio *self, PyObject *text)
705 {
706 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
707 }
708
709 static PyObject *
utf16be_encode(textio * self,PyObject * text)710 utf16be_encode(textio *self, PyObject *text)
711 {
712 return _PyUnicode_EncodeUTF16(text,
713 PyUnicode_AsUTF8(self->errors), 1);
714 }
715
716 static PyObject *
utf16le_encode(textio * self,PyObject * text)717 utf16le_encode(textio *self, PyObject *text)
718 {
719 return _PyUnicode_EncodeUTF16(text,
720 PyUnicode_AsUTF8(self->errors), -1);
721 }
722
723 static PyObject *
utf16_encode(textio * self,PyObject * text)724 utf16_encode(textio *self, PyObject *text)
725 {
726 if (!self->encoding_start_of_stream) {
727 /* Skip the BOM and use native byte ordering */
728 #if PY_BIG_ENDIAN
729 return utf16be_encode(self, text);
730 #else
731 return utf16le_encode(self, text);
732 #endif
733 }
734 return _PyUnicode_EncodeUTF16(text,
735 PyUnicode_AsUTF8(self->errors), 0);
736 }
737
738 static PyObject *
utf32be_encode(textio * self,PyObject * text)739 utf32be_encode(textio *self, PyObject *text)
740 {
741 return _PyUnicode_EncodeUTF32(text,
742 PyUnicode_AsUTF8(self->errors), 1);
743 }
744
745 static PyObject *
utf32le_encode(textio * self,PyObject * text)746 utf32le_encode(textio *self, PyObject *text)
747 {
748 return _PyUnicode_EncodeUTF32(text,
749 PyUnicode_AsUTF8(self->errors), -1);
750 }
751
752 static PyObject *
utf32_encode(textio * self,PyObject * text)753 utf32_encode(textio *self, PyObject *text)
754 {
755 if (!self->encoding_start_of_stream) {
756 /* Skip the BOM and use native byte ordering */
757 #if PY_BIG_ENDIAN
758 return utf32be_encode(self, text);
759 #else
760 return utf32le_encode(self, text);
761 #endif
762 }
763 return _PyUnicode_EncodeUTF32(text,
764 PyUnicode_AsUTF8(self->errors), 0);
765 }
766
767 static PyObject *
utf8_encode(textio * self,PyObject * text)768 utf8_encode(textio *self, PyObject *text)
769 {
770 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
771 }
772
773 static PyObject *
latin1_encode(textio * self,PyObject * text)774 latin1_encode(textio *self, PyObject *text)
775 {
776 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
777 }
778
779 // Return true when encoding can be skipped when text is ascii.
780 static inline int
is_asciicompat_encoding(encodefunc_t f)781 is_asciicompat_encoding(encodefunc_t f)
782 {
783 return f == (encodefunc_t) ascii_encode
784 || f == (encodefunc_t) latin1_encode
785 || f == (encodefunc_t) utf8_encode;
786 }
787
788 /* Map normalized encoding names onto the specialized encoding funcs */
789
790 typedef struct {
791 const char *name;
792 encodefunc_t encodefunc;
793 } encodefuncentry;
794
795 static const encodefuncentry encodefuncs[] = {
796 {"ascii", (encodefunc_t) ascii_encode},
797 {"iso8859-1", (encodefunc_t) latin1_encode},
798 {"utf-8", (encodefunc_t) utf8_encode},
799 {"utf-16-be", (encodefunc_t) utf16be_encode},
800 {"utf-16-le", (encodefunc_t) utf16le_encode},
801 {"utf-16", (encodefunc_t) utf16_encode},
802 {"utf-32-be", (encodefunc_t) utf32be_encode},
803 {"utf-32-le", (encodefunc_t) utf32le_encode},
804 {"utf-32", (encodefunc_t) utf32_encode},
805 {NULL, NULL}
806 };
807
808 static int
validate_newline(const char * newline)809 validate_newline(const char *newline)
810 {
811 if (newline && newline[0] != '\0'
812 && !(newline[0] == '\n' && newline[1] == '\0')
813 && !(newline[0] == '\r' && newline[1] == '\0')
814 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
815 PyErr_Format(PyExc_ValueError,
816 "illegal newline value: %s", newline);
817 return -1;
818 }
819 return 0;
820 }
821
822 static int
set_newline(textio * self,const char * newline)823 set_newline(textio *self, const char *newline)
824 {
825 PyObject *old = self->readnl;
826 if (newline == NULL) {
827 self->readnl = NULL;
828 }
829 else {
830 self->readnl = PyUnicode_FromString(newline);
831 if (self->readnl == NULL) {
832 self->readnl = old;
833 return -1;
834 }
835 }
836 self->readuniversal = (newline == NULL || newline[0] == '\0');
837 self->readtranslate = (newline == NULL);
838 self->writetranslate = (newline == NULL || newline[0] != '\0');
839 if (!self->readuniversal && self->readnl != NULL) {
840 // validate_newline() accepts only ASCII newlines.
841 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
842 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
843 if (strcmp(self->writenl, "\n") == 0) {
844 self->writenl = NULL;
845 }
846 }
847 else {
848 #ifdef MS_WINDOWS
849 self->writenl = "\r\n";
850 #else
851 self->writenl = NULL;
852 #endif
853 }
854 Py_XDECREF(old);
855 return 0;
856 }
857
858 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)859 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
860 const char *errors)
861 {
862 PyObject *res;
863 int r;
864
865 res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
866 if (res == NULL)
867 return -1;
868
869 r = PyObject_IsTrue(res);
870 Py_DECREF(res);
871 if (r == -1)
872 return -1;
873
874 if (r != 1)
875 return 0;
876
877 Py_CLEAR(self->decoder);
878 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
879 if (self->decoder == NULL)
880 return -1;
881
882 if (self->readuniversal) {
883 PyObject *incrementalDecoder = PyObject_CallFunction(
884 (PyObject *)&PyIncrementalNewlineDecoder_Type,
885 "Oi", self->decoder, (int)self->readtranslate);
886 if (incrementalDecoder == NULL)
887 return -1;
888 Py_CLEAR(self->decoder);
889 self->decoder = incrementalDecoder;
890 }
891
892 return 0;
893 }
894
895 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)896 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
897 {
898 PyObject *chars;
899
900 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
901 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
902 else
903 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
904 eof ? Py_True : Py_False, NULL);
905
906 if (check_decoded(chars) < 0)
907 // check_decoded already decreases refcount
908 return NULL;
909
910 return chars;
911 }
912
913 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)914 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
915 const char *errors)
916 {
917 PyObject *res;
918 int r;
919
920 res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
921 if (res == NULL)
922 return -1;
923
924 r = PyObject_IsTrue(res);
925 Py_DECREF(res);
926 if (r == -1)
927 return -1;
928
929 if (r != 1)
930 return 0;
931
932 Py_CLEAR(self->encoder);
933 self->encodefunc = NULL;
934 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
935 if (self->encoder == NULL)
936 return -1;
937
938 /* Get the normalized named of the codec */
939 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
940 return -1;
941 }
942 if (res != NULL && PyUnicode_Check(res)) {
943 const encodefuncentry *e = encodefuncs;
944 while (e->name != NULL) {
945 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
946 self->encodefunc = e->encodefunc;
947 break;
948 }
949 e++;
950 }
951 }
952 Py_XDECREF(res);
953
954 return 0;
955 }
956
957 static int
_textiowrapper_fix_encoder_state(textio * self)958 _textiowrapper_fix_encoder_state(textio *self)
959 {
960 if (!self->seekable || !self->encoder) {
961 return 0;
962 }
963
964 self->encoding_start_of_stream = 1;
965
966 PyObject *cookieObj = PyObject_CallMethodObjArgs(
967 self->buffer, _PyIO_str_tell, NULL);
968 if (cookieObj == NULL) {
969 return -1;
970 }
971
972 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
973 Py_DECREF(cookieObj);
974 if (cmp < 0) {
975 return -1;
976 }
977
978 if (cmp == 0) {
979 self->encoding_start_of_stream = 0;
980 PyObject *res = PyObject_CallMethodObjArgs(
981 self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
982 if (res == NULL) {
983 return -1;
984 }
985 Py_DECREF(res);
986 }
987
988 return 0;
989 }
990
991 /*[clinic input]
992 _io.TextIOWrapper.__init__
993 buffer: object
994 encoding: str(accept={str, NoneType}) = None
995 errors: object = None
996 newline: str(accept={str, NoneType}) = None
997 line_buffering: bool(accept={int}) = False
998 write_through: bool(accept={int}) = False
999
1000 Character and line based layer over a BufferedIOBase object, buffer.
1001
1002 encoding gives the name of the encoding that the stream will be
1003 decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1004
1005 errors determines the strictness of encoding and decoding (see
1006 help(codecs.Codec) or the documentation for codecs.register) and
1007 defaults to "strict".
1008
1009 newline controls how line endings are handled. It can be None, '',
1010 '\n', '\r', and '\r\n'. It works as follows:
1011
1012 * On input, if newline is None, universal newlines mode is
1013 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1014 these are translated into '\n' before being returned to the
1015 caller. If it is '', universal newline mode is enabled, but line
1016 endings are returned to the caller untranslated. If it has any of
1017 the other legal values, input lines are only terminated by the given
1018 string, and the line ending is returned to the caller untranslated.
1019
1020 * On output, if newline is None, any '\n' characters written are
1021 translated to the system default line separator, os.linesep. If
1022 newline is '' or '\n', no translation takes place. If newline is any
1023 of the other legal values, any '\n' characters written are translated
1024 to the given string.
1025
1026 If line_buffering is True, a call to flush is implied when a call to
1027 write contains a newline character.
1028 [clinic start generated code]*/
1029
1030 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1031 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1032 const char *encoding, PyObject *errors,
1033 const char *newline, int line_buffering,
1034 int write_through)
1035 /*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
1036 {
1037 PyObject *raw, *codec_info = NULL;
1038 _PyIO_State *state = NULL;
1039 PyObject *res;
1040 int r;
1041
1042 self->ok = 0;
1043 self->detached = 0;
1044
1045 if (errors == Py_None) {
1046 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1047 if (errors == NULL) {
1048 return -1;
1049 }
1050 }
1051 else if (!PyUnicode_Check(errors)) {
1052 // Check 'errors' argument here because Argument Clinic doesn't support
1053 // 'str(accept={str, NoneType})' converter.
1054 PyErr_Format(
1055 PyExc_TypeError,
1056 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1057 errors->ob_type->tp_name);
1058 return -1;
1059 }
1060
1061 if (validate_newline(newline) < 0) {
1062 return -1;
1063 }
1064
1065 Py_CLEAR(self->buffer);
1066 Py_CLEAR(self->encoding);
1067 Py_CLEAR(self->encoder);
1068 Py_CLEAR(self->decoder);
1069 Py_CLEAR(self->readnl);
1070 Py_CLEAR(self->decoded_chars);
1071 Py_CLEAR(self->pending_bytes);
1072 Py_CLEAR(self->snapshot);
1073 Py_CLEAR(self->errors);
1074 Py_CLEAR(self->raw);
1075 self->decoded_chars_used = 0;
1076 self->pending_bytes_count = 0;
1077 self->encodefunc = NULL;
1078 self->b2cratio = 0.0;
1079
1080 if (encoding == NULL) {
1081 /* Try os.device_encoding(fileno) */
1082 PyObject *fileno;
1083 state = IO_STATE();
1084 if (state == NULL)
1085 goto error;
1086 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
1087 /* Ignore only AttributeError and UnsupportedOperation */
1088 if (fileno == NULL) {
1089 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1090 PyErr_ExceptionMatches(state->unsupported_operation)) {
1091 PyErr_Clear();
1092 }
1093 else {
1094 goto error;
1095 }
1096 }
1097 else {
1098 int fd = _PyLong_AsInt(fileno);
1099 Py_DECREF(fileno);
1100 if (fd == -1 && PyErr_Occurred()) {
1101 goto error;
1102 }
1103
1104 self->encoding = _Py_device_encoding(fd);
1105 if (self->encoding == NULL)
1106 goto error;
1107 else if (!PyUnicode_Check(self->encoding))
1108 Py_CLEAR(self->encoding);
1109 }
1110 }
1111 if (encoding == NULL && self->encoding == NULL) {
1112 PyObject *locale_module = _PyIO_get_locale_module(state);
1113 if (locale_module == NULL)
1114 goto catch_ImportError;
1115 self->encoding = _PyObject_CallMethodIdObjArgs(
1116 locale_module, &PyId_getpreferredencoding, Py_False, NULL);
1117 Py_DECREF(locale_module);
1118 if (self->encoding == NULL) {
1119 catch_ImportError:
1120 /*
1121 Importing locale can raise an ImportError because of
1122 _functools, and locale.getpreferredencoding can raise an
1123 ImportError if _locale is not available. These will happen
1124 during module building.
1125 */
1126 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1127 PyErr_Clear();
1128 self->encoding = PyUnicode_FromString("ascii");
1129 }
1130 else
1131 goto error;
1132 }
1133 else if (!PyUnicode_Check(self->encoding))
1134 Py_CLEAR(self->encoding);
1135 }
1136 if (self->encoding != NULL) {
1137 encoding = PyUnicode_AsUTF8(self->encoding);
1138 if (encoding == NULL)
1139 goto error;
1140 }
1141 else if (encoding != NULL) {
1142 self->encoding = PyUnicode_FromString(encoding);
1143 if (self->encoding == NULL)
1144 goto error;
1145 }
1146 else {
1147 PyErr_SetString(PyExc_OSError,
1148 "could not determine default encoding");
1149 goto error;
1150 }
1151
1152 /* Check we have been asked for a real text encoding */
1153 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1154 if (codec_info == NULL) {
1155 Py_CLEAR(self->encoding);
1156 goto error;
1157 }
1158
1159 /* XXX: Failures beyond this point have the potential to leak elements
1160 * of the partially constructed object (like self->encoding)
1161 */
1162
1163 Py_INCREF(errors);
1164 self->errors = errors;
1165 self->chunk_size = 8192;
1166 self->line_buffering = line_buffering;
1167 self->write_through = write_through;
1168 if (set_newline(self, newline) < 0) {
1169 goto error;
1170 }
1171
1172 self->buffer = buffer;
1173 Py_INCREF(buffer);
1174
1175 /* Build the decoder object */
1176 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1177 goto error;
1178
1179 /* Build the encoder object */
1180 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1181 goto error;
1182
1183 /* Finished sorting out the codec details */
1184 Py_CLEAR(codec_info);
1185
1186 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1187 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1188 Py_TYPE(buffer) == &PyBufferedRandom_Type)
1189 {
1190 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1191 goto error;
1192 /* Cache the raw FileIO object to speed up 'closed' checks */
1193 if (raw != NULL) {
1194 if (Py_TYPE(raw) == &PyFileIO_Type)
1195 self->raw = raw;
1196 else
1197 Py_DECREF(raw);
1198 }
1199 }
1200
1201 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
1202 if (res == NULL)
1203 goto error;
1204 r = PyObject_IsTrue(res);
1205 Py_DECREF(res);
1206 if (r < 0)
1207 goto error;
1208 self->seekable = self->telling = r;
1209
1210 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1211 if (r < 0) {
1212 goto error;
1213 }
1214 Py_XDECREF(res);
1215 self->has_read1 = r;
1216
1217 self->encoding_start_of_stream = 0;
1218 if (_textiowrapper_fix_encoder_state(self) < 0) {
1219 goto error;
1220 }
1221
1222 self->ok = 1;
1223 return 0;
1224
1225 error:
1226 Py_XDECREF(codec_info);
1227 return -1;
1228 }
1229
1230 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1231 * -1 on error.
1232 */
1233 static int
convert_optional_bool(PyObject * obj,int default_value)1234 convert_optional_bool(PyObject *obj, int default_value)
1235 {
1236 long v;
1237 if (obj == Py_None) {
1238 v = default_value;
1239 }
1240 else {
1241 v = PyLong_AsLong(obj);
1242 if (v == -1 && PyErr_Occurred())
1243 return -1;
1244 }
1245 return v != 0;
1246 }
1247
1248 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1249 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1250 PyObject *errors, int newline_changed)
1251 {
1252 /* Use existing settings where new settings are not specified */
1253 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1254 return 0; // no change
1255 }
1256
1257 if (encoding == Py_None) {
1258 encoding = self->encoding;
1259 if (errors == Py_None) {
1260 errors = self->errors;
1261 }
1262 }
1263 else if (errors == Py_None) {
1264 errors = _PyUnicode_FromId(&PyId_strict);
1265 if (errors == NULL) {
1266 return -1;
1267 }
1268 }
1269
1270 const char *c_errors = PyUnicode_AsUTF8(errors);
1271 if (c_errors == NULL) {
1272 return -1;
1273 }
1274
1275 // Create new encoder & decoder
1276 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1277 PyUnicode_AsUTF8(encoding), "codecs.open()");
1278 if (codec_info == NULL) {
1279 return -1;
1280 }
1281 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1282 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1283 Py_DECREF(codec_info);
1284 return -1;
1285 }
1286 Py_DECREF(codec_info);
1287
1288 Py_INCREF(encoding);
1289 Py_INCREF(errors);
1290 Py_SETREF(self->encoding, encoding);
1291 Py_SETREF(self->errors, errors);
1292
1293 return _textiowrapper_fix_encoder_state(self);
1294 }
1295
1296 /*[clinic input]
1297 _io.TextIOWrapper.reconfigure
1298 *
1299 encoding: object = None
1300 errors: object = None
1301 newline as newline_obj: object(c_default="NULL") = None
1302 line_buffering as line_buffering_obj: object = None
1303 write_through as write_through_obj: object = None
1304
1305 Reconfigure the text stream with new parameters.
1306
1307 This also does an implicit stream flush.
1308
1309 [clinic start generated code]*/
1310
1311 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1312 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1313 PyObject *errors, PyObject *newline_obj,
1314 PyObject *line_buffering_obj,
1315 PyObject *write_through_obj)
1316 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1317 {
1318 int line_buffering;
1319 int write_through;
1320 const char *newline = NULL;
1321
1322 /* Check if something is in the read buffer */
1323 if (self->decoded_chars != NULL) {
1324 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1325 _unsupported("It is not possible to set the encoding or newline "
1326 "of stream after the first read");
1327 return NULL;
1328 }
1329 }
1330
1331 if (newline_obj != NULL && newline_obj != Py_None) {
1332 newline = PyUnicode_AsUTF8(newline_obj);
1333 if (newline == NULL || validate_newline(newline) < 0) {
1334 return NULL;
1335 }
1336 }
1337
1338 line_buffering = convert_optional_bool(line_buffering_obj,
1339 self->line_buffering);
1340 write_through = convert_optional_bool(write_through_obj,
1341 self->write_through);
1342 if (line_buffering < 0 || write_through < 0) {
1343 return NULL;
1344 }
1345
1346 PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1347 if (res == NULL) {
1348 return NULL;
1349 }
1350 Py_DECREF(res);
1351 self->b2cratio = 0;
1352
1353 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1354 return NULL;
1355 }
1356
1357 if (textiowrapper_change_encoding(
1358 self, encoding, errors, newline_obj != NULL) < 0) {
1359 return NULL;
1360 }
1361
1362 self->line_buffering = line_buffering;
1363 self->write_through = write_through;
1364 Py_RETURN_NONE;
1365 }
1366
1367 static int
textiowrapper_clear(textio * self)1368 textiowrapper_clear(textio *self)
1369 {
1370 self->ok = 0;
1371 Py_CLEAR(self->buffer);
1372 Py_CLEAR(self->encoding);
1373 Py_CLEAR(self->encoder);
1374 Py_CLEAR(self->decoder);
1375 Py_CLEAR(self->readnl);
1376 Py_CLEAR(self->decoded_chars);
1377 Py_CLEAR(self->pending_bytes);
1378 Py_CLEAR(self->snapshot);
1379 Py_CLEAR(self->errors);
1380 Py_CLEAR(self->raw);
1381
1382 Py_CLEAR(self->dict);
1383 return 0;
1384 }
1385
1386 static void
textiowrapper_dealloc(textio * self)1387 textiowrapper_dealloc(textio *self)
1388 {
1389 self->finalizing = 1;
1390 if (_PyIOBase_finalize((PyObject *) self) < 0)
1391 return;
1392 self->ok = 0;
1393 _PyObject_GC_UNTRACK(self);
1394 if (self->weakreflist != NULL)
1395 PyObject_ClearWeakRefs((PyObject *)self);
1396 textiowrapper_clear(self);
1397 Py_TYPE(self)->tp_free((PyObject *)self);
1398 }
1399
1400 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1401 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1402 {
1403 Py_VISIT(self->buffer);
1404 Py_VISIT(self->encoding);
1405 Py_VISIT(self->encoder);
1406 Py_VISIT(self->decoder);
1407 Py_VISIT(self->readnl);
1408 Py_VISIT(self->decoded_chars);
1409 Py_VISIT(self->pending_bytes);
1410 Py_VISIT(self->snapshot);
1411 Py_VISIT(self->errors);
1412 Py_VISIT(self->raw);
1413
1414 Py_VISIT(self->dict);
1415 return 0;
1416 }
1417
1418 static PyObject *
1419 textiowrapper_closed_get(textio *self, void *context);
1420
1421 /* This macro takes some shortcuts to make the common case faster. */
1422 #define CHECK_CLOSED(self) \
1423 do { \
1424 int r; \
1425 PyObject *_res; \
1426 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1427 if (self->raw != NULL) \
1428 r = _PyFileIO_closed(self->raw); \
1429 else { \
1430 _res = textiowrapper_closed_get(self, NULL); \
1431 if (_res == NULL) \
1432 return NULL; \
1433 r = PyObject_IsTrue(_res); \
1434 Py_DECREF(_res); \
1435 if (r < 0) \
1436 return NULL; \
1437 } \
1438 if (r > 0) { \
1439 PyErr_SetString(PyExc_ValueError, \
1440 "I/O operation on closed file."); \
1441 return NULL; \
1442 } \
1443 } \
1444 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1445 return NULL; \
1446 } while (0)
1447
1448 #define CHECK_INITIALIZED(self) \
1449 if (self->ok <= 0) { \
1450 PyErr_SetString(PyExc_ValueError, \
1451 "I/O operation on uninitialized object"); \
1452 return NULL; \
1453 }
1454
1455 #define CHECK_ATTACHED(self) \
1456 CHECK_INITIALIZED(self); \
1457 if (self->detached) { \
1458 PyErr_SetString(PyExc_ValueError, \
1459 "underlying buffer has been detached"); \
1460 return NULL; \
1461 }
1462
1463 #define CHECK_ATTACHED_INT(self) \
1464 if (self->ok <= 0) { \
1465 PyErr_SetString(PyExc_ValueError, \
1466 "I/O operation on uninitialized object"); \
1467 return -1; \
1468 } else if (self->detached) { \
1469 PyErr_SetString(PyExc_ValueError, \
1470 "underlying buffer has been detached"); \
1471 return -1; \
1472 }
1473
1474
1475 /*[clinic input]
1476 _io.TextIOWrapper.detach
1477 [clinic start generated code]*/
1478
1479 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1480 _io_TextIOWrapper_detach_impl(textio *self)
1481 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1482 {
1483 PyObject *buffer, *res;
1484 CHECK_ATTACHED(self);
1485 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1486 if (res == NULL)
1487 return NULL;
1488 Py_DECREF(res);
1489 buffer = self->buffer;
1490 self->buffer = NULL;
1491 self->detached = 1;
1492 return buffer;
1493 }
1494
1495 /* Flush the internal write buffer. This doesn't explicitly flush the
1496 underlying buffered object, though. */
1497 static int
_textiowrapper_writeflush(textio * self)1498 _textiowrapper_writeflush(textio *self)
1499 {
1500 if (self->pending_bytes == NULL)
1501 return 0;
1502
1503 PyObject *pending = self->pending_bytes;
1504 PyObject *b;
1505
1506 if (PyBytes_Check(pending)) {
1507 b = pending;
1508 Py_INCREF(b);
1509 }
1510 else if (PyUnicode_Check(pending)) {
1511 assert(PyUnicode_IS_ASCII(pending));
1512 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1513 b = PyBytes_FromStringAndSize(
1514 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1515 if (b == NULL) {
1516 return -1;
1517 }
1518 }
1519 else {
1520 assert(PyList_Check(pending));
1521 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1522 if (b == NULL) {
1523 return -1;
1524 }
1525
1526 char *buf = PyBytes_AsString(b);
1527 Py_ssize_t pos = 0;
1528
1529 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1530 PyObject *obj = PyList_GET_ITEM(pending, i);
1531 char *src;
1532 Py_ssize_t len;
1533 if (PyUnicode_Check(obj)) {
1534 assert(PyUnicode_IS_ASCII(obj));
1535 src = PyUnicode_DATA(obj);
1536 len = PyUnicode_GET_LENGTH(obj);
1537 }
1538 else {
1539 assert(PyBytes_Check(obj));
1540 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1541 Py_DECREF(b);
1542 return -1;
1543 }
1544 }
1545 memcpy(buf + pos, src, len);
1546 pos += len;
1547 }
1548 assert(pos == self->pending_bytes_count);
1549 }
1550
1551 self->pending_bytes_count = 0;
1552 self->pending_bytes = NULL;
1553 Py_DECREF(pending);
1554
1555 PyObject *ret;
1556 do {
1557 ret = PyObject_CallMethodObjArgs(self->buffer,
1558 _PyIO_str_write, b, NULL);
1559 } while (ret == NULL && _PyIO_trap_eintr());
1560 Py_DECREF(b);
1561 // NOTE: We cleared buffer but we don't know how many bytes are actually written
1562 // when an error occurred.
1563 if (ret == NULL)
1564 return -1;
1565 Py_DECREF(ret);
1566 return 0;
1567 }
1568
1569 /*[clinic input]
1570 _io.TextIOWrapper.write
1571 text: unicode
1572 /
1573 [clinic start generated code]*/
1574
1575 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1576 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1577 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1578 {
1579 PyObject *ret;
1580 PyObject *b;
1581 Py_ssize_t textlen;
1582 int haslf = 0;
1583 int needflush = 0, text_needflush = 0;
1584
1585 if (PyUnicode_READY(text) == -1)
1586 return NULL;
1587
1588 CHECK_ATTACHED(self);
1589 CHECK_CLOSED(self);
1590
1591 if (self->encoder == NULL)
1592 return _unsupported("not writable");
1593
1594 Py_INCREF(text);
1595
1596 textlen = PyUnicode_GET_LENGTH(text);
1597
1598 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1599 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1600 haslf = 1;
1601
1602 if (haslf && self->writetranslate && self->writenl != NULL) {
1603 PyObject *newtext = _PyObject_CallMethodId(
1604 text, &PyId_replace, "ss", "\n", self->writenl);
1605 Py_DECREF(text);
1606 if (newtext == NULL)
1607 return NULL;
1608 text = newtext;
1609 }
1610
1611 if (self->write_through)
1612 text_needflush = 1;
1613 if (self->line_buffering &&
1614 (haslf ||
1615 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1616 needflush = 1;
1617
1618 /* XXX What if we were just reading? */
1619 if (self->encodefunc != NULL) {
1620 if (PyUnicode_IS_ASCII(text) &&
1621 // See bpo-43260
1622 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1623 is_asciicompat_encoding(self->encodefunc)) {
1624 b = text;
1625 Py_INCREF(b);
1626 }
1627 else {
1628 b = (*self->encodefunc)((PyObject *) self, text);
1629 }
1630 self->encoding_start_of_stream = 0;
1631 }
1632 else {
1633 b = PyObject_CallMethodObjArgs(self->encoder,
1634 _PyIO_str_encode, text, NULL);
1635 }
1636
1637 Py_DECREF(text);
1638 if (b == NULL)
1639 return NULL;
1640 if (b != text && !PyBytes_Check(b)) {
1641 PyErr_Format(PyExc_TypeError,
1642 "encoder should return a bytes object, not '%.200s'",
1643 Py_TYPE(b)->tp_name);
1644 Py_DECREF(b);
1645 return NULL;
1646 }
1647
1648 Py_ssize_t bytes_len;
1649 if (b == text) {
1650 bytes_len = PyUnicode_GET_LENGTH(b);
1651 }
1652 else {
1653 bytes_len = PyBytes_GET_SIZE(b);
1654 }
1655
1656 if (self->pending_bytes == NULL) {
1657 self->pending_bytes_count = 0;
1658 self->pending_bytes = b;
1659 }
1660 else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1661 // Prevent to concatenate more than chunk_size data.
1662 if (_textiowrapper_writeflush(self) < 0) {
1663 Py_DECREF(b);
1664 return NULL;
1665 }
1666 self->pending_bytes = b;
1667 }
1668 else if (!PyList_CheckExact(self->pending_bytes)) {
1669 PyObject *list = PyList_New(2);
1670 if (list == NULL) {
1671 Py_DECREF(b);
1672 return NULL;
1673 }
1674 PyList_SET_ITEM(list, 0, self->pending_bytes);
1675 PyList_SET_ITEM(list, 1, b);
1676 self->pending_bytes = list;
1677 }
1678 else {
1679 if (PyList_Append(self->pending_bytes, b) < 0) {
1680 Py_DECREF(b);
1681 return NULL;
1682 }
1683 Py_DECREF(b);
1684 }
1685
1686 self->pending_bytes_count += bytes_len;
1687 if (self->pending_bytes_count >= self->chunk_size || needflush ||
1688 text_needflush) {
1689 if (_textiowrapper_writeflush(self) < 0)
1690 return NULL;
1691 }
1692
1693 if (needflush) {
1694 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1695 if (ret == NULL)
1696 return NULL;
1697 Py_DECREF(ret);
1698 }
1699
1700 textiowrapper_set_decoded_chars(self, NULL);
1701 Py_CLEAR(self->snapshot);
1702
1703 if (self->decoder) {
1704 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
1705 if (ret == NULL)
1706 return NULL;
1707 Py_DECREF(ret);
1708 }
1709
1710 return PyLong_FromSsize_t(textlen);
1711 }
1712
1713 /* Steal a reference to chars and store it in the decoded_char buffer;
1714 */
1715 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1716 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1717 {
1718 Py_XSETREF(self->decoded_chars, chars);
1719 self->decoded_chars_used = 0;
1720 }
1721
1722 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1723 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1724 {
1725 PyObject *chars;
1726 Py_ssize_t avail;
1727
1728 if (self->decoded_chars == NULL)
1729 return PyUnicode_FromStringAndSize(NULL, 0);
1730
1731 /* decoded_chars is guaranteed to be "ready". */
1732 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1733 - self->decoded_chars_used);
1734
1735 assert(avail >= 0);
1736
1737 if (n < 0 || n > avail)
1738 n = avail;
1739
1740 if (self->decoded_chars_used > 0 || n < avail) {
1741 chars = PyUnicode_Substring(self->decoded_chars,
1742 self->decoded_chars_used,
1743 self->decoded_chars_used + n);
1744 if (chars == NULL)
1745 return NULL;
1746 }
1747 else {
1748 chars = self->decoded_chars;
1749 Py_INCREF(chars);
1750 }
1751
1752 self->decoded_chars_used += n;
1753 return chars;
1754 }
1755
1756 /* Read and decode the next chunk of data from the BufferedReader.
1757 */
1758 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1759 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1760 {
1761 PyObject *dec_buffer = NULL;
1762 PyObject *dec_flags = NULL;
1763 PyObject *input_chunk = NULL;
1764 Py_buffer input_chunk_buf;
1765 PyObject *decoded_chars, *chunk_size;
1766 Py_ssize_t nbytes, nchars;
1767 int eof;
1768
1769 /* The return value is True unless EOF was reached. The decoded string is
1770 * placed in self._decoded_chars (replacing its previous value). The
1771 * entire input chunk is sent to the decoder, though some of it may remain
1772 * buffered in the decoder, yet to be converted.
1773 */
1774
1775 if (self->decoder == NULL) {
1776 _unsupported("not readable");
1777 return -1;
1778 }
1779
1780 if (self->telling) {
1781 /* To prepare for tell(), we need to snapshot a point in the file
1782 * where the decoder's input buffer is empty.
1783 */
1784
1785 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1786 _PyIO_str_getstate, NULL);
1787 if (state == NULL)
1788 return -1;
1789 /* Given this, we know there was a valid snapshot point
1790 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1791 */
1792 if (!PyTuple_Check(state)) {
1793 PyErr_SetString(PyExc_TypeError,
1794 "illegal decoder state");
1795 Py_DECREF(state);
1796 return -1;
1797 }
1798 if (!PyArg_ParseTuple(state,
1799 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1800 {
1801 Py_DECREF(state);
1802 return -1;
1803 }
1804
1805 if (!PyBytes_Check(dec_buffer)) {
1806 PyErr_Format(PyExc_TypeError,
1807 "illegal decoder state: the first item should be a "
1808 "bytes object, not '%.200s'",
1809 Py_TYPE(dec_buffer)->tp_name);
1810 Py_DECREF(state);
1811 return -1;
1812 }
1813 Py_INCREF(dec_buffer);
1814 Py_INCREF(dec_flags);
1815 Py_DECREF(state);
1816 }
1817
1818 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1819 if (size_hint > 0) {
1820 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1821 }
1822 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1823 if (chunk_size == NULL)
1824 goto fail;
1825
1826 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1827 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1828 chunk_size, NULL);
1829 Py_DECREF(chunk_size);
1830 if (input_chunk == NULL)
1831 goto fail;
1832
1833 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1834 PyErr_Format(PyExc_TypeError,
1835 "underlying %s() should have returned a bytes-like object, "
1836 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1837 Py_TYPE(input_chunk)->tp_name);
1838 goto fail;
1839 }
1840
1841 nbytes = input_chunk_buf.len;
1842 eof = (nbytes == 0);
1843
1844 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1845 PyBuffer_Release(&input_chunk_buf);
1846 if (decoded_chars == NULL)
1847 goto fail;
1848
1849 textiowrapper_set_decoded_chars(self, decoded_chars);
1850 nchars = PyUnicode_GET_LENGTH(decoded_chars);
1851 if (nchars > 0)
1852 self->b2cratio = (double) nbytes / nchars;
1853 else
1854 self->b2cratio = 0.0;
1855 if (nchars > 0)
1856 eof = 0;
1857
1858 if (self->telling) {
1859 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1860 * next input to be decoded is dec_buffer + input_chunk.
1861 */
1862 PyObject *next_input = dec_buffer;
1863 PyBytes_Concat(&next_input, input_chunk);
1864 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1865 if (next_input == NULL) {
1866 goto fail;
1867 }
1868 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1869 if (snapshot == NULL) {
1870 dec_flags = NULL;
1871 goto fail;
1872 }
1873 Py_XSETREF(self->snapshot, snapshot);
1874 }
1875 Py_DECREF(input_chunk);
1876
1877 return (eof == 0);
1878
1879 fail:
1880 Py_XDECREF(dec_buffer);
1881 Py_XDECREF(dec_flags);
1882 Py_XDECREF(input_chunk);
1883 return -1;
1884 }
1885
1886 /*[clinic input]
1887 _io.TextIOWrapper.read
1888 size as n: Py_ssize_t(accept={int, NoneType}) = -1
1889 /
1890 [clinic start generated code]*/
1891
1892 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1893 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1894 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1895 {
1896 PyObject *result = NULL, *chunks = NULL;
1897
1898 CHECK_ATTACHED(self);
1899 CHECK_CLOSED(self);
1900
1901 if (self->decoder == NULL)
1902 return _unsupported("not readable");
1903
1904 if (_textiowrapper_writeflush(self) < 0)
1905 return NULL;
1906
1907 if (n < 0) {
1908 /* Read everything */
1909 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
1910 PyObject *decoded;
1911 if (bytes == NULL)
1912 goto fail;
1913
1914 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1915 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1916 bytes, 1);
1917 else
1918 decoded = PyObject_CallMethodObjArgs(
1919 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
1920 Py_DECREF(bytes);
1921 if (check_decoded(decoded) < 0)
1922 goto fail;
1923
1924 result = textiowrapper_get_decoded_chars(self, -1);
1925
1926 if (result == NULL) {
1927 Py_DECREF(decoded);
1928 return NULL;
1929 }
1930
1931 PyUnicode_AppendAndDel(&result, decoded);
1932 if (result == NULL)
1933 goto fail;
1934
1935 textiowrapper_set_decoded_chars(self, NULL);
1936 Py_CLEAR(self->snapshot);
1937 return result;
1938 }
1939 else {
1940 int res = 1;
1941 Py_ssize_t remaining = n;
1942
1943 result = textiowrapper_get_decoded_chars(self, n);
1944 if (result == NULL)
1945 goto fail;
1946 if (PyUnicode_READY(result) == -1)
1947 goto fail;
1948 remaining -= PyUnicode_GET_LENGTH(result);
1949
1950 /* Keep reading chunks until we have n characters to return */
1951 while (remaining > 0) {
1952 res = textiowrapper_read_chunk(self, remaining);
1953 if (res < 0) {
1954 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1955 when EINTR occurs so we needn't do it ourselves. */
1956 if (_PyIO_trap_eintr()) {
1957 continue;
1958 }
1959 goto fail;
1960 }
1961 if (res == 0) /* EOF */
1962 break;
1963 if (chunks == NULL) {
1964 chunks = PyList_New(0);
1965 if (chunks == NULL)
1966 goto fail;
1967 }
1968 if (PyUnicode_GET_LENGTH(result) > 0 &&
1969 PyList_Append(chunks, result) < 0)
1970 goto fail;
1971 Py_DECREF(result);
1972 result = textiowrapper_get_decoded_chars(self, remaining);
1973 if (result == NULL)
1974 goto fail;
1975 remaining -= PyUnicode_GET_LENGTH(result);
1976 }
1977 if (chunks != NULL) {
1978 if (result != NULL && PyList_Append(chunks, result) < 0)
1979 goto fail;
1980 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
1981 if (result == NULL)
1982 goto fail;
1983 Py_CLEAR(chunks);
1984 }
1985 return result;
1986 }
1987 fail:
1988 Py_XDECREF(result);
1989 Py_XDECREF(chunks);
1990 return NULL;
1991 }
1992
1993
1994 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
1995 that is to the NUL character. Otherwise the function will produce
1996 incorrect results. */
1997 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)1998 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
1999 {
2000 if (kind == PyUnicode_1BYTE_KIND) {
2001 assert(ch < 256);
2002 return (char *) memchr((const void *) s, (char) ch, end - s);
2003 }
2004 for (;;) {
2005 while (PyUnicode_READ(kind, s, 0) > ch)
2006 s += kind;
2007 if (PyUnicode_READ(kind, s, 0) == ch)
2008 return s;
2009 if (s == end)
2010 return NULL;
2011 s += kind;
2012 }
2013 }
2014
2015 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2016 _PyIO_find_line_ending(
2017 int translated, int universal, PyObject *readnl,
2018 int kind, const char *start, const char *end, Py_ssize_t *consumed)
2019 {
2020 Py_ssize_t len = (end - start)/kind;
2021
2022 if (translated) {
2023 /* Newlines are already translated, only search for \n */
2024 const char *pos = find_control_char(kind, start, end, '\n');
2025 if (pos != NULL)
2026 return (pos - start)/kind + 1;
2027 else {
2028 *consumed = len;
2029 return -1;
2030 }
2031 }
2032 else if (universal) {
2033 /* Universal newline search. Find any of \r, \r\n, \n
2034 * The decoder ensures that \r\n are not split in two pieces
2035 */
2036 const char *s = start;
2037 for (;;) {
2038 Py_UCS4 ch;
2039 /* Fast path for non-control chars. The loop always ends
2040 since the Unicode string is NUL-terminated. */
2041 while (PyUnicode_READ(kind, s, 0) > '\r')
2042 s += kind;
2043 if (s >= end) {
2044 *consumed = len;
2045 return -1;
2046 }
2047 ch = PyUnicode_READ(kind, s, 0);
2048 s += kind;
2049 if (ch == '\n')
2050 return (s - start)/kind;
2051 if (ch == '\r') {
2052 if (PyUnicode_READ(kind, s, 0) == '\n')
2053 return (s - start)/kind + 1;
2054 else
2055 return (s - start)/kind;
2056 }
2057 }
2058 }
2059 else {
2060 /* Non-universal mode. */
2061 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2062 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2063 /* Assume that readnl is an ASCII character. */
2064 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2065 if (readnl_len == 1) {
2066 const char *pos = find_control_char(kind, start, end, nl[0]);
2067 if (pos != NULL)
2068 return (pos - start)/kind + 1;
2069 *consumed = len;
2070 return -1;
2071 }
2072 else {
2073 const char *s = start;
2074 const char *e = end - (readnl_len - 1)*kind;
2075 const char *pos;
2076 if (e < s)
2077 e = s;
2078 while (s < e) {
2079 Py_ssize_t i;
2080 const char *pos = find_control_char(kind, s, end, nl[0]);
2081 if (pos == NULL || pos >= e)
2082 break;
2083 for (i = 1; i < readnl_len; i++) {
2084 if (PyUnicode_READ(kind, pos, i) != nl[i])
2085 break;
2086 }
2087 if (i == readnl_len)
2088 return (pos - start)/kind + readnl_len;
2089 s = pos + kind;
2090 }
2091 pos = find_control_char(kind, e, end, nl[0]);
2092 if (pos == NULL)
2093 *consumed = len;
2094 else
2095 *consumed = (pos - start)/kind;
2096 return -1;
2097 }
2098 }
2099 }
2100
2101 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2102 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2103 {
2104 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2105 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2106 int res;
2107
2108 CHECK_CLOSED(self);
2109
2110 if (_textiowrapper_writeflush(self) < 0)
2111 return NULL;
2112
2113 chunked = 0;
2114
2115 while (1) {
2116 char *ptr;
2117 Py_ssize_t line_len;
2118 int kind;
2119 Py_ssize_t consumed = 0;
2120
2121 /* First, get some data if necessary */
2122 res = 1;
2123 while (!self->decoded_chars ||
2124 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2125 res = textiowrapper_read_chunk(self, 0);
2126 if (res < 0) {
2127 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2128 when EINTR occurs so we needn't do it ourselves. */
2129 if (_PyIO_trap_eintr()) {
2130 continue;
2131 }
2132 goto error;
2133 }
2134 if (res == 0)
2135 break;
2136 }
2137 if (res == 0) {
2138 /* end of file */
2139 textiowrapper_set_decoded_chars(self, NULL);
2140 Py_CLEAR(self->snapshot);
2141 start = endpos = offset_to_buffer = 0;
2142 break;
2143 }
2144
2145 if (remaining == NULL) {
2146 line = self->decoded_chars;
2147 start = self->decoded_chars_used;
2148 offset_to_buffer = 0;
2149 Py_INCREF(line);
2150 }
2151 else {
2152 assert(self->decoded_chars_used == 0);
2153 line = PyUnicode_Concat(remaining, self->decoded_chars);
2154 start = 0;
2155 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2156 Py_CLEAR(remaining);
2157 if (line == NULL)
2158 goto error;
2159 if (PyUnicode_READY(line) == -1)
2160 goto error;
2161 }
2162
2163 ptr = PyUnicode_DATA(line);
2164 line_len = PyUnicode_GET_LENGTH(line);
2165 kind = PyUnicode_KIND(line);
2166
2167 endpos = _PyIO_find_line_ending(
2168 self->readtranslate, self->readuniversal, self->readnl,
2169 kind,
2170 ptr + kind * start,
2171 ptr + kind * line_len,
2172 &consumed);
2173 if (endpos >= 0) {
2174 endpos += start;
2175 if (limit >= 0 && (endpos - start) + chunked >= limit)
2176 endpos = start + limit - chunked;
2177 break;
2178 }
2179
2180 /* We can put aside up to `endpos` */
2181 endpos = consumed + start;
2182 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2183 /* Didn't find line ending, but reached length limit */
2184 endpos = start + limit - chunked;
2185 break;
2186 }
2187
2188 if (endpos > start) {
2189 /* No line ending seen yet - put aside current data */
2190 PyObject *s;
2191 if (chunks == NULL) {
2192 chunks = PyList_New(0);
2193 if (chunks == NULL)
2194 goto error;
2195 }
2196 s = PyUnicode_Substring(line, start, endpos);
2197 if (s == NULL)
2198 goto error;
2199 if (PyList_Append(chunks, s) < 0) {
2200 Py_DECREF(s);
2201 goto error;
2202 }
2203 chunked += PyUnicode_GET_LENGTH(s);
2204 Py_DECREF(s);
2205 }
2206 /* There may be some remaining bytes we'll have to prepend to the
2207 next chunk of data */
2208 if (endpos < line_len) {
2209 remaining = PyUnicode_Substring(line, endpos, line_len);
2210 if (remaining == NULL)
2211 goto error;
2212 }
2213 Py_CLEAR(line);
2214 /* We have consumed the buffer */
2215 textiowrapper_set_decoded_chars(self, NULL);
2216 }
2217
2218 if (line != NULL) {
2219 /* Our line ends in the current buffer */
2220 self->decoded_chars_used = endpos - offset_to_buffer;
2221 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2222 PyObject *s = PyUnicode_Substring(line, start, endpos);
2223 Py_CLEAR(line);
2224 if (s == NULL)
2225 goto error;
2226 line = s;
2227 }
2228 }
2229 if (remaining != NULL) {
2230 if (chunks == NULL) {
2231 chunks = PyList_New(0);
2232 if (chunks == NULL)
2233 goto error;
2234 }
2235 if (PyList_Append(chunks, remaining) < 0)
2236 goto error;
2237 Py_CLEAR(remaining);
2238 }
2239 if (chunks != NULL) {
2240 if (line != NULL) {
2241 if (PyList_Append(chunks, line) < 0)
2242 goto error;
2243 Py_DECREF(line);
2244 }
2245 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2246 if (line == NULL)
2247 goto error;
2248 Py_CLEAR(chunks);
2249 }
2250 if (line == NULL) {
2251 Py_INCREF(_PyIO_empty_str);
2252 line = _PyIO_empty_str;
2253 }
2254
2255 return line;
2256
2257 error:
2258 Py_XDECREF(chunks);
2259 Py_XDECREF(remaining);
2260 Py_XDECREF(line);
2261 return NULL;
2262 }
2263
2264 /*[clinic input]
2265 _io.TextIOWrapper.readline
2266 size: Py_ssize_t = -1
2267 /
2268 [clinic start generated code]*/
2269
2270 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2271 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2272 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2273 {
2274 CHECK_ATTACHED(self);
2275 return _textiowrapper_readline(self, size);
2276 }
2277
2278 /* Seek and Tell */
2279
2280 typedef struct {
2281 Py_off_t start_pos;
2282 int dec_flags;
2283 int bytes_to_feed;
2284 int chars_to_skip;
2285 char need_eof;
2286 } cookie_type;
2287
2288 /*
2289 To speed up cookie packing/unpacking, we store the fields in a temporary
2290 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2291 The following macros define at which offsets in the intermediary byte
2292 string the various CookieStruct fields will be stored.
2293 */
2294
2295 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2296
2297 #if PY_BIG_ENDIAN
2298 /* We want the least significant byte of start_pos to also be the least
2299 significant byte of the cookie, which means that in big-endian mode we
2300 must copy the fields in reverse order. */
2301
2302 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2303 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2304 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2305 # define OFF_CHARS_TO_SKIP (sizeof(char))
2306 # define OFF_NEED_EOF 0
2307
2308 #else
2309 /* Little-endian mode: the least significant byte of start_pos will
2310 naturally end up the least significant byte of the cookie. */
2311
2312 # define OFF_START_POS 0
2313 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
2314 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2315 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2316 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2317
2318 #endif
2319
2320 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2321 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2322 {
2323 unsigned char buffer[COOKIE_BUF_LEN];
2324 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2325 if (cookieLong == NULL)
2326 return -1;
2327
2328 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2329 PY_LITTLE_ENDIAN, 0) < 0) {
2330 Py_DECREF(cookieLong);
2331 return -1;
2332 }
2333 Py_DECREF(cookieLong);
2334
2335 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2336 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2337 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2338 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2339 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2340
2341 return 0;
2342 }
2343
2344 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2345 textiowrapper_build_cookie(cookie_type *cookie)
2346 {
2347 unsigned char buffer[COOKIE_BUF_LEN];
2348
2349 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2350 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2351 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2352 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2353 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2354
2355 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2356 PY_LITTLE_ENDIAN, 0);
2357 }
2358
2359 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2360 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2361 {
2362 PyObject *res;
2363 /* When seeking to the start of the stream, we call decoder.reset()
2364 rather than decoder.getstate().
2365 This is for a few decoders such as utf-16 for which the state value
2366 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2367 utf-16, that we are expecting a BOM).
2368 */
2369 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2370 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2371 else
2372 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2373 "((yi))", "", cookie->dec_flags);
2374 if (res == NULL)
2375 return -1;
2376 Py_DECREF(res);
2377 return 0;
2378 }
2379
2380 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2381 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2382 {
2383 PyObject *res;
2384 if (start_of_stream) {
2385 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2386 self->encoding_start_of_stream = 1;
2387 }
2388 else {
2389 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2390 _PyLong_Zero, NULL);
2391 self->encoding_start_of_stream = 0;
2392 }
2393 if (res == NULL)
2394 return -1;
2395 Py_DECREF(res);
2396 return 0;
2397 }
2398
2399 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2400 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2401 {
2402 /* Same as _textiowrapper_decoder_setstate() above. */
2403 return _textiowrapper_encoder_reset(
2404 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2405 }
2406
2407 /*[clinic input]
2408 _io.TextIOWrapper.seek
2409 cookie as cookieObj: object
2410 whence: int = 0
2411 /
2412 [clinic start generated code]*/
2413
2414 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2415 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2416 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2417 {
2418 PyObject *posobj;
2419 cookie_type cookie;
2420 PyObject *res;
2421 int cmp;
2422 PyObject *snapshot;
2423
2424 CHECK_ATTACHED(self);
2425 CHECK_CLOSED(self);
2426
2427 Py_INCREF(cookieObj);
2428
2429 if (!self->seekable) {
2430 _unsupported("underlying stream is not seekable");
2431 goto fail;
2432 }
2433
2434 switch (whence) {
2435 case SEEK_CUR:
2436 /* seek relative to current position */
2437 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2438 if (cmp < 0)
2439 goto fail;
2440
2441 if (cmp == 0) {
2442 _unsupported("can't do nonzero cur-relative seeks");
2443 goto fail;
2444 }
2445
2446 /* Seeking to the current position should attempt to
2447 * sync the underlying buffer with the current position.
2448 */
2449 Py_DECREF(cookieObj);
2450 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
2451 if (cookieObj == NULL)
2452 goto fail;
2453 break;
2454
2455 case SEEK_END:
2456 /* seek relative to end of file */
2457 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
2458 if (cmp < 0)
2459 goto fail;
2460
2461 if (cmp == 0) {
2462 _unsupported("can't do nonzero end-relative seeks");
2463 goto fail;
2464 }
2465
2466 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2467 if (res == NULL)
2468 goto fail;
2469 Py_DECREF(res);
2470
2471 textiowrapper_set_decoded_chars(self, NULL);
2472 Py_CLEAR(self->snapshot);
2473 if (self->decoder) {
2474 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
2475 if (res == NULL)
2476 goto fail;
2477 Py_DECREF(res);
2478 }
2479
2480 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2481 Py_CLEAR(cookieObj);
2482 if (res == NULL)
2483 goto fail;
2484 if (self->encoder) {
2485 /* If seek() == 0, we are at the start of stream, otherwise not */
2486 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
2487 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2488 Py_DECREF(res);
2489 goto fail;
2490 }
2491 }
2492 return res;
2493
2494 case SEEK_SET:
2495 break;
2496
2497 default:
2498 PyErr_Format(PyExc_ValueError,
2499 "invalid whence (%d, should be %d, %d or %d)", whence,
2500 SEEK_SET, SEEK_CUR, SEEK_END);
2501 goto fail;
2502 }
2503
2504 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
2505 if (cmp < 0)
2506 goto fail;
2507
2508 if (cmp == 1) {
2509 PyErr_Format(PyExc_ValueError,
2510 "negative seek position %R", cookieObj);
2511 goto fail;
2512 }
2513
2514 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2515 if (res == NULL)
2516 goto fail;
2517 Py_DECREF(res);
2518
2519 /* The strategy of seek() is to go back to the safe start point
2520 * and replay the effect of read(chars_to_skip) from there.
2521 */
2522 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2523 goto fail;
2524
2525 /* Seek back to the safe start point. */
2526 posobj = PyLong_FromOff_t(cookie.start_pos);
2527 if (posobj == NULL)
2528 goto fail;
2529 res = PyObject_CallMethodObjArgs(self->buffer,
2530 _PyIO_str_seek, posobj, NULL);
2531 Py_DECREF(posobj);
2532 if (res == NULL)
2533 goto fail;
2534 Py_DECREF(res);
2535
2536 textiowrapper_set_decoded_chars(self, NULL);
2537 Py_CLEAR(self->snapshot);
2538
2539 /* Restore the decoder to its state from the safe start point. */
2540 if (self->decoder) {
2541 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2542 goto fail;
2543 }
2544
2545 if (cookie.chars_to_skip) {
2546 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2547 PyObject *input_chunk = _PyObject_CallMethodId(
2548 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
2549 PyObject *decoded;
2550
2551 if (input_chunk == NULL)
2552 goto fail;
2553
2554 if (!PyBytes_Check(input_chunk)) {
2555 PyErr_Format(PyExc_TypeError,
2556 "underlying read() should have returned a bytes "
2557 "object, not '%.200s'",
2558 Py_TYPE(input_chunk)->tp_name);
2559 Py_DECREF(input_chunk);
2560 goto fail;
2561 }
2562
2563 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2564 if (snapshot == NULL) {
2565 goto fail;
2566 }
2567 Py_XSETREF(self->snapshot, snapshot);
2568
2569 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2570 "Oi", input_chunk, (int)cookie.need_eof);
2571
2572 if (check_decoded(decoded) < 0)
2573 goto fail;
2574
2575 textiowrapper_set_decoded_chars(self, decoded);
2576
2577 /* Skip chars_to_skip of the decoded characters. */
2578 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2579 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2580 goto fail;
2581 }
2582 self->decoded_chars_used = cookie.chars_to_skip;
2583 }
2584 else {
2585 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2586 if (snapshot == NULL)
2587 goto fail;
2588 Py_XSETREF(self->snapshot, snapshot);
2589 }
2590
2591 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2592 if (self->encoder) {
2593 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2594 goto fail;
2595 }
2596 return cookieObj;
2597 fail:
2598 Py_XDECREF(cookieObj);
2599 return NULL;
2600
2601 }
2602
2603 /*[clinic input]
2604 _io.TextIOWrapper.tell
2605 [clinic start generated code]*/
2606
2607 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2608 _io_TextIOWrapper_tell_impl(textio *self)
2609 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2610 {
2611 PyObject *res;
2612 PyObject *posobj = NULL;
2613 cookie_type cookie = {0,0,0,0,0};
2614 PyObject *next_input;
2615 Py_ssize_t chars_to_skip, chars_decoded;
2616 Py_ssize_t skip_bytes, skip_back;
2617 PyObject *saved_state = NULL;
2618 char *input, *input_end;
2619 Py_ssize_t dec_buffer_len;
2620 int dec_flags;
2621
2622 CHECK_ATTACHED(self);
2623 CHECK_CLOSED(self);
2624
2625 if (!self->seekable) {
2626 _unsupported("underlying stream is not seekable");
2627 goto fail;
2628 }
2629 if (!self->telling) {
2630 PyErr_SetString(PyExc_OSError,
2631 "telling position disabled by next() call");
2632 goto fail;
2633 }
2634
2635 if (_textiowrapper_writeflush(self) < 0)
2636 return NULL;
2637 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
2638 if (res == NULL)
2639 goto fail;
2640 Py_DECREF(res);
2641
2642 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
2643 if (posobj == NULL)
2644 goto fail;
2645
2646 if (self->decoder == NULL || self->snapshot == NULL) {
2647 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2648 return posobj;
2649 }
2650
2651 #if defined(HAVE_LARGEFILE_SUPPORT)
2652 cookie.start_pos = PyLong_AsLongLong(posobj);
2653 #else
2654 cookie.start_pos = PyLong_AsLong(posobj);
2655 #endif
2656 Py_DECREF(posobj);
2657 if (PyErr_Occurred())
2658 goto fail;
2659
2660 /* Skip backward to the snapshot point (see _read_chunk). */
2661 assert(PyTuple_Check(self->snapshot));
2662 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2663 goto fail;
2664
2665 assert (PyBytes_Check(next_input));
2666
2667 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2668
2669 /* How many decoded characters have been used up since the snapshot? */
2670 if (self->decoded_chars_used == 0) {
2671 /* We haven't moved from the snapshot point. */
2672 return textiowrapper_build_cookie(&cookie);
2673 }
2674
2675 chars_to_skip = self->decoded_chars_used;
2676
2677 /* Decoder state will be restored at the end */
2678 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2679 _PyIO_str_getstate, NULL);
2680 if (saved_state == NULL)
2681 goto fail;
2682
2683 #define DECODER_GETSTATE() do { \
2684 PyObject *dec_buffer; \
2685 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2686 _PyIO_str_getstate, NULL); \
2687 if (_state == NULL) \
2688 goto fail; \
2689 if (!PyTuple_Check(_state)) { \
2690 PyErr_SetString(PyExc_TypeError, \
2691 "illegal decoder state"); \
2692 Py_DECREF(_state); \
2693 goto fail; \
2694 } \
2695 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2696 &dec_buffer, &dec_flags)) \
2697 { \
2698 Py_DECREF(_state); \
2699 goto fail; \
2700 } \
2701 if (!PyBytes_Check(dec_buffer)) { \
2702 PyErr_Format(PyExc_TypeError, \
2703 "illegal decoder state: the first item should be a " \
2704 "bytes object, not '%.200s'", \
2705 Py_TYPE(dec_buffer)->tp_name); \
2706 Py_DECREF(_state); \
2707 goto fail; \
2708 } \
2709 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2710 Py_DECREF(_state); \
2711 } while (0)
2712
2713 #define DECODER_DECODE(start, len, res) do { \
2714 PyObject *_decoded = _PyObject_CallMethodId( \
2715 self->decoder, &PyId_decode, "y#", start, len); \
2716 if (check_decoded(_decoded) < 0) \
2717 goto fail; \
2718 res = PyUnicode_GET_LENGTH(_decoded); \
2719 Py_DECREF(_decoded); \
2720 } while (0)
2721
2722 /* Fast search for an acceptable start point, close to our
2723 current pos */
2724 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2725 skip_back = 1;
2726 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2727 input = PyBytes_AS_STRING(next_input);
2728 while (skip_bytes > 0) {
2729 /* Decode up to temptative start point */
2730 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2731 goto fail;
2732 DECODER_DECODE(input, skip_bytes, chars_decoded);
2733 if (chars_decoded <= chars_to_skip) {
2734 DECODER_GETSTATE();
2735 if (dec_buffer_len == 0) {
2736 /* Before pos and no bytes buffered in decoder => OK */
2737 cookie.dec_flags = dec_flags;
2738 chars_to_skip -= chars_decoded;
2739 break;
2740 }
2741 /* Skip back by buffered amount and reset heuristic */
2742 skip_bytes -= dec_buffer_len;
2743 skip_back = 1;
2744 }
2745 else {
2746 /* We're too far ahead, skip back a bit */
2747 skip_bytes -= skip_back;
2748 skip_back *= 2;
2749 }
2750 }
2751 if (skip_bytes <= 0) {
2752 skip_bytes = 0;
2753 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2754 goto fail;
2755 }
2756
2757 /* Note our initial start point. */
2758 cookie.start_pos += skip_bytes;
2759 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2760 if (chars_to_skip == 0)
2761 goto finally;
2762
2763 /* We should be close to the desired position. Now feed the decoder one
2764 * byte at a time until we reach the `chars_to_skip` target.
2765 * As we go, note the nearest "safe start point" before the current
2766 * location (a point where the decoder has nothing buffered, so seek()
2767 * can safely start from there and advance to this location).
2768 */
2769 chars_decoded = 0;
2770 input = PyBytes_AS_STRING(next_input);
2771 input_end = input + PyBytes_GET_SIZE(next_input);
2772 input += skip_bytes;
2773 while (input < input_end) {
2774 Py_ssize_t n;
2775
2776 DECODER_DECODE(input, (Py_ssize_t)1, n);
2777 /* We got n chars for 1 byte */
2778 chars_decoded += n;
2779 cookie.bytes_to_feed += 1;
2780 DECODER_GETSTATE();
2781
2782 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2783 /* Decoder buffer is empty, so this is a safe start point. */
2784 cookie.start_pos += cookie.bytes_to_feed;
2785 chars_to_skip -= chars_decoded;
2786 cookie.dec_flags = dec_flags;
2787 cookie.bytes_to_feed = 0;
2788 chars_decoded = 0;
2789 }
2790 if (chars_decoded >= chars_to_skip)
2791 break;
2792 input++;
2793 }
2794 if (input == input_end) {
2795 /* We didn't get enough decoded data; signal EOF to get more. */
2796 PyObject *decoded = _PyObject_CallMethodId(
2797 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
2798 if (check_decoded(decoded) < 0)
2799 goto fail;
2800 chars_decoded += PyUnicode_GET_LENGTH(decoded);
2801 Py_DECREF(decoded);
2802 cookie.need_eof = 1;
2803
2804 if (chars_decoded < chars_to_skip) {
2805 PyErr_SetString(PyExc_OSError,
2806 "can't reconstruct logical file position");
2807 goto fail;
2808 }
2809 }
2810
2811 finally:
2812 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2813 Py_DECREF(saved_state);
2814 if (res == NULL)
2815 return NULL;
2816 Py_DECREF(res);
2817
2818 /* The returned cookie corresponds to the last safe start point. */
2819 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2820 return textiowrapper_build_cookie(&cookie);
2821
2822 fail:
2823 if (saved_state) {
2824 PyObject *type, *value, *traceback;
2825 PyErr_Fetch(&type, &value, &traceback);
2826 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
2827 _PyErr_ChainExceptions(type, value, traceback);
2828 Py_DECREF(saved_state);
2829 Py_XDECREF(res);
2830 }
2831 return NULL;
2832 }
2833
2834 /*[clinic input]
2835 _io.TextIOWrapper.truncate
2836 pos: object = None
2837 /
2838 [clinic start generated code]*/
2839
2840 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2841 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2842 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2843 {
2844 PyObject *res;
2845
2846 CHECK_ATTACHED(self)
2847
2848 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2849 if (res == NULL)
2850 return NULL;
2851 Py_DECREF(res);
2852
2853 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
2854 }
2855
2856 static PyObject *
textiowrapper_repr(textio * self)2857 textiowrapper_repr(textio *self)
2858 {
2859 PyObject *nameobj, *modeobj, *res, *s;
2860 int status;
2861
2862 CHECK_INITIALIZED(self);
2863
2864 res = PyUnicode_FromString("<_io.TextIOWrapper");
2865 if (res == NULL)
2866 return NULL;
2867
2868 status = Py_ReprEnter((PyObject *)self);
2869 if (status != 0) {
2870 if (status > 0) {
2871 PyErr_Format(PyExc_RuntimeError,
2872 "reentrant call inside %s.__repr__",
2873 Py_TYPE(self)->tp_name);
2874 }
2875 goto error;
2876 }
2877 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2878 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2879 goto error;
2880 }
2881 /* Ignore ValueError raised if the underlying stream was detached */
2882 PyErr_Clear();
2883 }
2884 if (nameobj != NULL) {
2885 s = PyUnicode_FromFormat(" name=%R", nameobj);
2886 Py_DECREF(nameobj);
2887 if (s == NULL)
2888 goto error;
2889 PyUnicode_AppendAndDel(&res, s);
2890 if (res == NULL)
2891 goto error;
2892 }
2893 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2894 goto error;
2895 }
2896 if (modeobj != NULL) {
2897 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2898 Py_DECREF(modeobj);
2899 if (s == NULL)
2900 goto error;
2901 PyUnicode_AppendAndDel(&res, s);
2902 if (res == NULL)
2903 goto error;
2904 }
2905 s = PyUnicode_FromFormat("%U encoding=%R>",
2906 res, self->encoding);
2907 Py_DECREF(res);
2908 if (status == 0) {
2909 Py_ReprLeave((PyObject *)self);
2910 }
2911 return s;
2912
2913 error:
2914 Py_XDECREF(res);
2915 if (status == 0) {
2916 Py_ReprLeave((PyObject *)self);
2917 }
2918 return NULL;
2919 }
2920
2921
2922 /* Inquiries */
2923
2924 /*[clinic input]
2925 _io.TextIOWrapper.fileno
2926 [clinic start generated code]*/
2927
2928 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2929 _io_TextIOWrapper_fileno_impl(textio *self)
2930 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2931 {
2932 CHECK_ATTACHED(self);
2933 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
2934 }
2935
2936 /*[clinic input]
2937 _io.TextIOWrapper.seekable
2938 [clinic start generated code]*/
2939
2940 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2941 _io_TextIOWrapper_seekable_impl(textio *self)
2942 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2943 {
2944 CHECK_ATTACHED(self);
2945 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
2946 }
2947
2948 /*[clinic input]
2949 _io.TextIOWrapper.readable
2950 [clinic start generated code]*/
2951
2952 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2953 _io_TextIOWrapper_readable_impl(textio *self)
2954 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2955 {
2956 CHECK_ATTACHED(self);
2957 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
2958 }
2959
2960 /*[clinic input]
2961 _io.TextIOWrapper.writable
2962 [clinic start generated code]*/
2963
2964 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)2965 _io_TextIOWrapper_writable_impl(textio *self)
2966 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2967 {
2968 CHECK_ATTACHED(self);
2969 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
2970 }
2971
2972 /*[clinic input]
2973 _io.TextIOWrapper.isatty
2974 [clinic start generated code]*/
2975
2976 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)2977 _io_TextIOWrapper_isatty_impl(textio *self)
2978 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2979 {
2980 CHECK_ATTACHED(self);
2981 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
2982 }
2983
2984 /*[clinic input]
2985 _io.TextIOWrapper.flush
2986 [clinic start generated code]*/
2987
2988 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)2989 _io_TextIOWrapper_flush_impl(textio *self)
2990 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
2991 {
2992 CHECK_ATTACHED(self);
2993 CHECK_CLOSED(self);
2994 self->telling = self->seekable;
2995 if (_textiowrapper_writeflush(self) < 0)
2996 return NULL;
2997 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
2998 }
2999
3000 /*[clinic input]
3001 _io.TextIOWrapper.close
3002 [clinic start generated code]*/
3003
3004 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)3005 _io_TextIOWrapper_close_impl(textio *self)
3006 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
3007 {
3008 PyObject *res;
3009 int r;
3010 CHECK_ATTACHED(self);
3011
3012 res = textiowrapper_closed_get(self, NULL);
3013 if (res == NULL)
3014 return NULL;
3015 r = PyObject_IsTrue(res);
3016 Py_DECREF(res);
3017 if (r < 0)
3018 return NULL;
3019
3020 if (r > 0) {
3021 Py_RETURN_NONE; /* stream already closed */
3022 }
3023 else {
3024 PyObject *exc = NULL, *val, *tb;
3025 if (self->finalizing) {
3026 res = _PyObject_CallMethodIdObjArgs(self->buffer,
3027 &PyId__dealloc_warn,
3028 self, NULL);
3029 if (res)
3030 Py_DECREF(res);
3031 else
3032 PyErr_Clear();
3033 }
3034 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
3035 if (res == NULL)
3036 PyErr_Fetch(&exc, &val, &tb);
3037 else
3038 Py_DECREF(res);
3039
3040 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
3041 if (exc != NULL) {
3042 _PyErr_ChainExceptions(exc, val, tb);
3043 Py_CLEAR(res);
3044 }
3045 return res;
3046 }
3047 }
3048
3049 static PyObject *
textiowrapper_iternext(textio * self)3050 textiowrapper_iternext(textio *self)
3051 {
3052 PyObject *line;
3053
3054 CHECK_ATTACHED(self);
3055
3056 self->telling = 0;
3057 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
3058 /* Skip method call overhead for speed */
3059 line = _textiowrapper_readline(self, -1);
3060 }
3061 else {
3062 line = PyObject_CallMethodObjArgs((PyObject *)self,
3063 _PyIO_str_readline, NULL);
3064 if (line && !PyUnicode_Check(line)) {
3065 PyErr_Format(PyExc_OSError,
3066 "readline() should have returned a str object, "
3067 "not '%.200s'", Py_TYPE(line)->tp_name);
3068 Py_DECREF(line);
3069 return NULL;
3070 }
3071 }
3072
3073 if (line == NULL || PyUnicode_READY(line) == -1)
3074 return NULL;
3075
3076 if (PyUnicode_GET_LENGTH(line) == 0) {
3077 /* Reached EOF or would have blocked */
3078 Py_DECREF(line);
3079 Py_CLEAR(self->snapshot);
3080 self->telling = self->seekable;
3081 return NULL;
3082 }
3083
3084 return line;
3085 }
3086
3087 static PyObject *
textiowrapper_name_get(textio * self,void * context)3088 textiowrapper_name_get(textio *self, void *context)
3089 {
3090 CHECK_ATTACHED(self);
3091 return _PyObject_GetAttrId(self->buffer, &PyId_name);
3092 }
3093
3094 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3095 textiowrapper_closed_get(textio *self, void *context)
3096 {
3097 CHECK_ATTACHED(self);
3098 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3099 }
3100
3101 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3102 textiowrapper_newlines_get(textio *self, void *context)
3103 {
3104 PyObject *res;
3105 CHECK_ATTACHED(self);
3106 if (self->decoder == NULL ||
3107 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3108 {
3109 Py_RETURN_NONE;
3110 }
3111 return res;
3112 }
3113
3114 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3115 textiowrapper_errors_get(textio *self, void *context)
3116 {
3117 CHECK_INITIALIZED(self);
3118 Py_INCREF(self->errors);
3119 return self->errors;
3120 }
3121
3122 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3123 textiowrapper_chunk_size_get(textio *self, void *context)
3124 {
3125 CHECK_ATTACHED(self);
3126 return PyLong_FromSsize_t(self->chunk_size);
3127 }
3128
3129 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3130 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3131 {
3132 Py_ssize_t n;
3133 CHECK_ATTACHED_INT(self);
3134 if (arg == NULL) {
3135 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3136 return -1;
3137 }
3138 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3139 if (n == -1 && PyErr_Occurred())
3140 return -1;
3141 if (n <= 0) {
3142 PyErr_SetString(PyExc_ValueError,
3143 "a strictly positive integer is required");
3144 return -1;
3145 }
3146 self->chunk_size = n;
3147 return 0;
3148 }
3149
3150 #include "clinic/textio.c.h"
3151
3152 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3153 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3154 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3155 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3156 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3157 {NULL}
3158 };
3159
3160 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3161 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3162 {NULL}
3163 };
3164
3165 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3166 PyVarObject_HEAD_INIT(NULL, 0)
3167 "_io.IncrementalNewlineDecoder", /*tp_name*/
3168 sizeof(nldecoder_object), /*tp_basicsize*/
3169 0, /*tp_itemsize*/
3170 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3171 0, /*tp_vectorcall_offset*/
3172 0, /*tp_getattr*/
3173 0, /*tp_setattr*/
3174 0, /*tp_as_async*/
3175 0, /*tp_repr*/
3176 0, /*tp_as_number*/
3177 0, /*tp_as_sequence*/
3178 0, /*tp_as_mapping*/
3179 0, /*tp_hash */
3180 0, /*tp_call*/
3181 0, /*tp_str*/
3182 0, /*tp_getattro*/
3183 0, /*tp_setattro*/
3184 0, /*tp_as_buffer*/
3185 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3186 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3187 0, /* tp_traverse */
3188 0, /* tp_clear */
3189 0, /* tp_richcompare */
3190 0, /*tp_weaklistoffset*/
3191 0, /* tp_iter */
3192 0, /* tp_iternext */
3193 incrementalnewlinedecoder_methods, /* tp_methods */
3194 0, /* tp_members */
3195 incrementalnewlinedecoder_getset, /* tp_getset */
3196 0, /* tp_base */
3197 0, /* tp_dict */
3198 0, /* tp_descr_get */
3199 0, /* tp_descr_set */
3200 0, /* tp_dictoffset */
3201 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3202 0, /* tp_alloc */
3203 PyType_GenericNew, /* tp_new */
3204 };
3205
3206
3207 static PyMethodDef textiowrapper_methods[] = {
3208 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3209 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3210 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3211 _IO_TEXTIOWRAPPER_READ_METHODDEF
3212 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3213 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3214 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3215
3216 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3217 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3218 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3219 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3220 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3221
3222 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3223 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3224 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3225 {NULL, NULL}
3226 };
3227
3228 static PyMemberDef textiowrapper_members[] = {
3229 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3230 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3231 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3232 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3233 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3234 {NULL}
3235 };
3236
3237 static PyGetSetDef textiowrapper_getset[] = {
3238 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3239 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3240 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3241 */
3242 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3243 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3244 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3245 (setter)textiowrapper_chunk_size_set, NULL},
3246 {NULL}
3247 };
3248
3249 PyTypeObject PyTextIOWrapper_Type = {
3250 PyVarObject_HEAD_INIT(NULL, 0)
3251 "_io.TextIOWrapper", /*tp_name*/
3252 sizeof(textio), /*tp_basicsize*/
3253 0, /*tp_itemsize*/
3254 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3255 0, /*tp_vectorcall_offset*/
3256 0, /*tp_getattr*/
3257 0, /*tps_etattr*/
3258 0, /*tp_as_async*/
3259 (reprfunc)textiowrapper_repr,/*tp_repr*/
3260 0, /*tp_as_number*/
3261 0, /*tp_as_sequence*/
3262 0, /*tp_as_mapping*/
3263 0, /*tp_hash */
3264 0, /*tp_call*/
3265 0, /*tp_str*/
3266 0, /*tp_getattro*/
3267 0, /*tp_setattro*/
3268 0, /*tp_as_buffer*/
3269 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3270 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
3271 _io_TextIOWrapper___init____doc__, /* tp_doc */
3272 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3273 (inquiry)textiowrapper_clear, /* tp_clear */
3274 0, /* tp_richcompare */
3275 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3276 0, /* tp_iter */
3277 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3278 textiowrapper_methods, /* tp_methods */
3279 textiowrapper_members, /* tp_members */
3280 textiowrapper_getset, /* tp_getset */
3281 0, /* tp_base */
3282 0, /* tp_dict */
3283 0, /* tp_descr_get */
3284 0, /* tp_descr_set */
3285 offsetof(textio, dict), /*tp_dictoffset*/
3286 _io_TextIOWrapper___init__, /* tp_init */
3287 0, /* tp_alloc */
3288 PyType_GenericNew, /* tp_new */
3289 0, /* tp_free */
3290 0, /* tp_is_gc */
3291 0, /* tp_bases */
3292 0, /* tp_mro */
3293 0, /* tp_cache */
3294 0, /* tp_subclasses */
3295 0, /* tp_weaklist */
3296 0, /* tp_del */
3297 0, /* tp_version_tag */
3298 0, /* tp_finalize */
3299 };
3300