1""" 2Python implementation of the io module. 3""" 4 5import os 6import abc 7import codecs 8import errno 9import stat 10import sys 11# Import _thread instead of threading to reduce startup cost 12from _thread import allocate_lock as Lock 13if sys.platform in {'win32', 'cygwin'}: 14 from msvcrt import setmode as _setmode 15else: 16 _setmode = None 17 18import io 19from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) 20 21valid_seek_flags = {0, 1, 2} # Hardwired values 22if hasattr(os, 'SEEK_HOLE') : 23 valid_seek_flags.add(os.SEEK_HOLE) 24 valid_seek_flags.add(os.SEEK_DATA) 25 26# open() uses st_blksize whenever we can 27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 28 29# NOTE: Base classes defined here are registered with the "official" ABCs 30# defined in io.py. We don't use real inheritance though, because we don't want 31# to inherit the C implementations. 32 33# Rebind for compatibility 34BlockingIOError = BlockingIOError 35 36# Does io.IOBase finalizer log the exception if the close() method fails? 37# The exception is ignored silently by default in release build. 38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) 39# Does open() check its 'errors' argument? 40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE 41 42 43def text_encoding(encoding, stacklevel=2): 44 """ 45 A helper function to choose the text encoding. 46 47 When encoding is not None, just return it. 48 Otherwise, return the default text encoding (i.e. "locale"). 49 50 This function emits an EncodingWarning if *encoding* is None and 51 sys.flags.warn_default_encoding is true. 52 53 This can be used in APIs with an encoding=None parameter 54 that pass it to TextIOWrapper or open. 55 However, please consider using encoding="utf-8" for new APIs. 56 """ 57 if encoding is None: 58 encoding = "locale" 59 if sys.flags.warn_default_encoding: 60 import warnings 61 warnings.warn("'encoding' argument not specified.", 62 EncodingWarning, stacklevel + 1) 63 return encoding 64 65 66# Wrapper for builtins.open 67# 68# Trick so that open() won't become a bound method when stored 69# as a class variable (as dbm.dumb does). 70# 71# See init_set_builtins_open() in Python/pylifecycle.c. 72@staticmethod 73def open(file, mode="r", buffering=-1, encoding=None, errors=None, 74 newline=None, closefd=True, opener=None): 75 76 r"""Open file and return a stream. Raise OSError upon failure. 77 78 file is either a text or byte string giving the name (and the path 79 if the file isn't in the current working directory) of the file to 80 be opened or an integer file descriptor of the file to be 81 wrapped. (If a file descriptor is given, it is closed when the 82 returned I/O object is closed, unless closefd is set to False.) 83 84 mode is an optional string that specifies the mode in which the file is 85 opened. It defaults to 'r' which means open for reading in text mode. Other 86 common values are 'w' for writing (truncating the file if it already 87 exists), 'x' for exclusive creation of a new file, and 'a' for appending 88 (which on some Unix systems, means that all writes append to the end of the 89 file regardless of the current seek position). In text mode, if encoding is 90 not specified the encoding used is platform dependent. (For reading and 91 writing raw bytes use binary mode and leave encoding unspecified.) The 92 available modes are: 93 94 ========= =============================================================== 95 Character Meaning 96 --------- --------------------------------------------------------------- 97 'r' open for reading (default) 98 'w' open for writing, truncating the file first 99 'x' create a new file and open it for writing 100 'a' open for writing, appending to the end of the file if it exists 101 'b' binary mode 102 't' text mode (default) 103 '+' open a disk file for updating (reading and writing) 104 'U' universal newline mode (deprecated) 105 ========= =============================================================== 106 107 The default mode is 'rt' (open for reading text). For binary random 108 access, the mode 'w+b' opens and truncates the file to 0 bytes, while 109 'r+b' opens the file without truncation. The 'x' mode implies 'w' and 110 raises an `FileExistsError` if the file already exists. 111 112 Python distinguishes between files opened in binary and text modes, 113 even when the underlying operating system doesn't. Files opened in 114 binary mode (appending 'b' to the mode argument) return contents as 115 bytes objects without any decoding. In text mode (the default, or when 116 't' is appended to the mode argument), the contents of the file are 117 returned as strings, the bytes having been first decoded using a 118 platform-dependent encoding or using the specified encoding if given. 119 120 'U' mode is deprecated and will raise an exception in future versions 121 of Python. It has no effect in Python 3. Use newline to control 122 universal newlines mode. 123 124 buffering is an optional integer used to set the buffering policy. 125 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select 126 line buffering (only usable in text mode), and an integer > 1 to indicate 127 the size of a fixed-size chunk buffer. When no buffering argument is 128 given, the default buffering policy works as follows: 129 130 * Binary files are buffered in fixed-size chunks; the size of the buffer 131 is chosen using a heuristic trying to determine the underlying device's 132 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. 133 On many systems, the buffer will typically be 4096 or 8192 bytes long. 134 135 * "Interactive" text files (files for which isatty() returns True) 136 use line buffering. Other text files use the policy described above 137 for binary files. 138 139 encoding is the str name of the encoding used to decode or encode the 140 file. This should only be used in text mode. The default encoding is 141 platform dependent, but any encoding supported by Python can be 142 passed. See the codecs module for the list of supported encodings. 143 144 errors is an optional string that specifies how encoding errors are to 145 be handled---this argument should not be used in binary mode. Pass 146 'strict' to raise a ValueError exception if there is an encoding error 147 (the default of None has the same effect), or pass 'ignore' to ignore 148 errors. (Note that ignoring encoding errors can lead to data loss.) 149 See the documentation for codecs.register for a list of the permitted 150 encoding error strings. 151 152 newline is a string controlling how universal newlines works (it only 153 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works 154 as follows: 155 156 * On input, if newline is None, universal newlines mode is 157 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 158 these are translated into '\n' before being returned to the 159 caller. If it is '', universal newline mode is enabled, but line 160 endings are returned to the caller untranslated. If it has any of 161 the other legal values, input lines are only terminated by the given 162 string, and the line ending is returned to the caller untranslated. 163 164 * On output, if newline is None, any '\n' characters written are 165 translated to the system default line separator, os.linesep. If 166 newline is '', no translation takes place. If newline is any of the 167 other legal values, any '\n' characters written are translated to 168 the given string. 169 170 closedfd is a bool. If closefd is False, the underlying file descriptor will 171 be kept open when the file is closed. This does not work when a file name is 172 given and must be True in that case. 173 174 The newly created file is non-inheritable. 175 176 A custom opener can be used by passing a callable as *opener*. The 177 underlying file descriptor for the file object is then obtained by calling 178 *opener* with (*file*, *flags*). *opener* must return an open file 179 descriptor (passing os.open as *opener* results in functionality similar to 180 passing None). 181 182 open() returns a file object whose type depends on the mode, and 183 through which the standard file operations such as reading and writing 184 are performed. When open() is used to open a file in a text mode ('w', 185 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open 186 a file in a binary mode, the returned class varies: in read binary 187 mode, it returns a BufferedReader; in write binary and append binary 188 modes, it returns a BufferedWriter, and in read/write mode, it returns 189 a BufferedRandom. 190 191 It is also possible to use a string or bytearray as a file for both 192 reading and writing. For strings StringIO can be used like a file 193 opened in a text mode, and for bytes a BytesIO can be used like a file 194 opened in a binary mode. 195 """ 196 if not isinstance(file, int): 197 file = os.fspath(file) 198 if not isinstance(file, (str, bytes, int)): 199 raise TypeError("invalid file: %r" % file) 200 if not isinstance(mode, str): 201 raise TypeError("invalid mode: %r" % mode) 202 if not isinstance(buffering, int): 203 raise TypeError("invalid buffering: %r" % buffering) 204 if encoding is not None and not isinstance(encoding, str): 205 raise TypeError("invalid encoding: %r" % encoding) 206 if errors is not None and not isinstance(errors, str): 207 raise TypeError("invalid errors: %r" % errors) 208 modes = set(mode) 209 if modes - set("axrwb+tU") or len(mode) > len(modes): 210 raise ValueError("invalid mode: %r" % mode) 211 creating = "x" in modes 212 reading = "r" in modes 213 writing = "w" in modes 214 appending = "a" in modes 215 updating = "+" in modes 216 text = "t" in modes 217 binary = "b" in modes 218 if "U" in modes: 219 if creating or writing or appending or updating: 220 raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'") 221 import warnings 222 warnings.warn("'U' mode is deprecated", 223 DeprecationWarning, 2) 224 reading = True 225 if text and binary: 226 raise ValueError("can't have text and binary mode at once") 227 if creating + reading + writing + appending > 1: 228 raise ValueError("can't have read/write/append mode at once") 229 if not (creating or reading or writing or appending): 230 raise ValueError("must have exactly one of read/write/append mode") 231 if binary and encoding is not None: 232 raise ValueError("binary mode doesn't take an encoding argument") 233 if binary and errors is not None: 234 raise ValueError("binary mode doesn't take an errors argument") 235 if binary and newline is not None: 236 raise ValueError("binary mode doesn't take a newline argument") 237 if binary and buffering == 1: 238 import warnings 239 warnings.warn("line buffering (buffering=1) isn't supported in binary " 240 "mode, the default buffer size will be used", 241 RuntimeWarning, 2) 242 raw = FileIO(file, 243 (creating and "x" or "") + 244 (reading and "r" or "") + 245 (writing and "w" or "") + 246 (appending and "a" or "") + 247 (updating and "+" or ""), 248 closefd, opener=opener) 249 result = raw 250 try: 251 line_buffering = False 252 if buffering == 1 or buffering < 0 and raw.isatty(): 253 buffering = -1 254 line_buffering = True 255 if buffering < 0: 256 buffering = DEFAULT_BUFFER_SIZE 257 try: 258 bs = os.fstat(raw.fileno()).st_blksize 259 except (OSError, AttributeError): 260 pass 261 else: 262 if bs > 1: 263 buffering = bs 264 if buffering < 0: 265 raise ValueError("invalid buffering size") 266 if buffering == 0: 267 if binary: 268 return result 269 raise ValueError("can't have unbuffered text I/O") 270 if updating: 271 buffer = BufferedRandom(raw, buffering) 272 elif creating or writing or appending: 273 buffer = BufferedWriter(raw, buffering) 274 elif reading: 275 buffer = BufferedReader(raw, buffering) 276 else: 277 raise ValueError("unknown mode: %r" % mode) 278 result = buffer 279 if binary: 280 return result 281 encoding = text_encoding(encoding) 282 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) 283 result = text 284 text.mode = mode 285 return result 286 except: 287 result.close() 288 raise 289 290# Define a default pure-Python implementation for open_code() 291# that does not allow hooks. Warn on first use. Defined for tests. 292def _open_code_with_warning(path): 293 """Opens the provided file with mode ``'rb'``. This function 294 should be used when the intent is to treat the contents as 295 executable code. 296 297 ``path`` should be an absolute path. 298 299 When supported by the runtime, this function can be hooked 300 in order to allow embedders more control over code files. 301 This functionality is not supported on the current runtime. 302 """ 303 import warnings 304 warnings.warn("_pyio.open_code() may not be using hooks", 305 RuntimeWarning, 2) 306 return open(path, "rb") 307 308try: 309 open_code = io.open_code 310except AttributeError: 311 open_code = _open_code_with_warning 312 313 314def __getattr__(name): 315 if name == "OpenWrapper": 316 # bpo-43680: Until Python 3.9, _pyio.open was not a static method and 317 # builtins.open was set to OpenWrapper to not become a bound method 318 # when set to a class variable. _io.open is a built-in function whereas 319 # _pyio.open is a Python function. In Python 3.10, _pyio.open() is now 320 # a static method, and builtins.open() is now io.open(). 321 import warnings 322 warnings.warn('OpenWrapper is deprecated, use open instead', 323 DeprecationWarning, stacklevel=2) 324 global OpenWrapper 325 OpenWrapper = open 326 return OpenWrapper 327 raise AttributeError(name) 328 329 330# In normal operation, both `UnsupportedOperation`s should be bound to the 331# same object. 332try: 333 UnsupportedOperation = io.UnsupportedOperation 334except AttributeError: 335 class UnsupportedOperation(OSError, ValueError): 336 pass 337 338 339class IOBase(metaclass=abc.ABCMeta): 340 341 """The abstract base class for all I/O classes, acting on streams of 342 bytes. There is no public constructor. 343 344 This class provides dummy implementations for many methods that 345 derived classes can override selectively; the default implementations 346 represent a file that cannot be read, written or seeked. 347 348 Even though IOBase does not declare read or write because 349 their signatures will vary, implementations and clients should 350 consider those methods part of the interface. Also, implementations 351 may raise UnsupportedOperation when operations they do not support are 352 called. 353 354 The basic type used for binary data read from or written to a file is 355 bytes. Other bytes-like objects are accepted as method arguments too. 356 Text I/O classes work with str data. 357 358 Note that calling any method (even inquiries) on a closed stream is 359 undefined. Implementations may raise OSError in this case. 360 361 IOBase (and its subclasses) support the iterator protocol, meaning 362 that an IOBase object can be iterated over yielding the lines in a 363 stream. 364 365 IOBase also supports the :keyword:`with` statement. In this example, 366 fp is closed after the suite of the with statement is complete: 367 368 with open('spam.txt', 'r') as fp: 369 fp.write('Spam and eggs!') 370 """ 371 372 ### Internal ### 373 374 def _unsupported(self, name): 375 """Internal: raise an OSError exception for unsupported operations.""" 376 raise UnsupportedOperation("%s.%s() not supported" % 377 (self.__class__.__name__, name)) 378 379 ### Positioning ### 380 381 def seek(self, pos, whence=0): 382 """Change stream position. 383 384 Change the stream position to byte offset pos. Argument pos is 385 interpreted relative to the position indicated by whence. Values 386 for whence are ints: 387 388 * 0 -- start of stream (the default); offset should be zero or positive 389 * 1 -- current stream position; offset may be negative 390 * 2 -- end of stream; offset is usually negative 391 Some operating systems / file systems could provide additional values. 392 393 Return an int indicating the new absolute position. 394 """ 395 self._unsupported("seek") 396 397 def tell(self): 398 """Return an int indicating the current stream position.""" 399 return self.seek(0, 1) 400 401 def truncate(self, pos=None): 402 """Truncate file to size bytes. 403 404 Size defaults to the current IO position as reported by tell(). Return 405 the new size. 406 """ 407 self._unsupported("truncate") 408 409 ### Flush and close ### 410 411 def flush(self): 412 """Flush write buffers, if applicable. 413 414 This is not implemented for read-only and non-blocking streams. 415 """ 416 self._checkClosed() 417 # XXX Should this return the number of bytes written??? 418 419 __closed = False 420 421 def close(self): 422 """Flush and close the IO object. 423 424 This method has no effect if the file is already closed. 425 """ 426 if not self.__closed: 427 try: 428 self.flush() 429 finally: 430 self.__closed = True 431 432 def __del__(self): 433 """Destructor. Calls close().""" 434 try: 435 closed = self.closed 436 except AttributeError: 437 # If getting closed fails, then the object is probably 438 # in an unusable state, so ignore. 439 return 440 441 if closed: 442 return 443 444 if _IOBASE_EMITS_UNRAISABLE: 445 self.close() 446 else: 447 # The try/except block is in case this is called at program 448 # exit time, when it's possible that globals have already been 449 # deleted, and then the close() call might fail. Since 450 # there's nothing we can do about such failures and they annoy 451 # the end users, we suppress the traceback. 452 try: 453 self.close() 454 except: 455 pass 456 457 ### Inquiries ### 458 459 def seekable(self): 460 """Return a bool indicating whether object supports random access. 461 462 If False, seek(), tell() and truncate() will raise OSError. 463 This method may need to do a test seek(). 464 """ 465 return False 466 467 def _checkSeekable(self, msg=None): 468 """Internal: raise UnsupportedOperation if file is not seekable 469 """ 470 if not self.seekable(): 471 raise UnsupportedOperation("File or stream is not seekable." 472 if msg is None else msg) 473 474 def readable(self): 475 """Return a bool indicating whether object was opened for reading. 476 477 If False, read() will raise OSError. 478 """ 479 return False 480 481 def _checkReadable(self, msg=None): 482 """Internal: raise UnsupportedOperation if file is not readable 483 """ 484 if not self.readable(): 485 raise UnsupportedOperation("File or stream is not readable." 486 if msg is None else msg) 487 488 def writable(self): 489 """Return a bool indicating whether object was opened for writing. 490 491 If False, write() and truncate() will raise OSError. 492 """ 493 return False 494 495 def _checkWritable(self, msg=None): 496 """Internal: raise UnsupportedOperation if file is not writable 497 """ 498 if not self.writable(): 499 raise UnsupportedOperation("File or stream is not writable." 500 if msg is None else msg) 501 502 @property 503 def closed(self): 504 """closed: bool. True iff the file has been closed. 505 506 For backwards compatibility, this is a property, not a predicate. 507 """ 508 return self.__closed 509 510 def _checkClosed(self, msg=None): 511 """Internal: raise a ValueError if file is closed 512 """ 513 if self.closed: 514 raise ValueError("I/O operation on closed file." 515 if msg is None else msg) 516 517 ### Context manager ### 518 519 def __enter__(self): # That's a forward reference 520 """Context management protocol. Returns self (an instance of IOBase).""" 521 self._checkClosed() 522 return self 523 524 def __exit__(self, *args): 525 """Context management protocol. Calls close()""" 526 self.close() 527 528 ### Lower-level APIs ### 529 530 # XXX Should these be present even if unimplemented? 531 532 def fileno(self): 533 """Returns underlying file descriptor (an int) if one exists. 534 535 An OSError is raised if the IO object does not use a file descriptor. 536 """ 537 self._unsupported("fileno") 538 539 def isatty(self): 540 """Return a bool indicating whether this is an 'interactive' stream. 541 542 Return False if it can't be determined. 543 """ 544 self._checkClosed() 545 return False 546 547 ### Readline[s] and writelines ### 548 549 def readline(self, size=-1): 550 r"""Read and return a line of bytes from the stream. 551 552 If size is specified, at most size bytes will be read. 553 Size should be an int. 554 555 The line terminator is always b'\n' for binary files; for text 556 files, the newlines argument to open can be used to select the line 557 terminator(s) recognized. 558 """ 559 # For backwards compatibility, a (slowish) readline(). 560 if hasattr(self, "peek"): 561 def nreadahead(): 562 readahead = self.peek(1) 563 if not readahead: 564 return 1 565 n = (readahead.find(b"\n") + 1) or len(readahead) 566 if size >= 0: 567 n = min(n, size) 568 return n 569 else: 570 def nreadahead(): 571 return 1 572 if size is None: 573 size = -1 574 else: 575 try: 576 size_index = size.__index__ 577 except AttributeError: 578 raise TypeError(f"{size!r} is not an integer") 579 else: 580 size = size_index() 581 res = bytearray() 582 while size < 0 or len(res) < size: 583 b = self.read(nreadahead()) 584 if not b: 585 break 586 res += b 587 if res.endswith(b"\n"): 588 break 589 return bytes(res) 590 591 def __iter__(self): 592 self._checkClosed() 593 return self 594 595 def __next__(self): 596 line = self.readline() 597 if not line: 598 raise StopIteration 599 return line 600 601 def readlines(self, hint=None): 602 """Return a list of lines from the stream. 603 604 hint can be specified to control the number of lines read: no more 605 lines will be read if the total size (in bytes/characters) of all 606 lines so far exceeds hint. 607 """ 608 if hint is None or hint <= 0: 609 return list(self) 610 n = 0 611 lines = [] 612 for line in self: 613 lines.append(line) 614 n += len(line) 615 if n >= hint: 616 break 617 return lines 618 619 def writelines(self, lines): 620 """Write a list of lines to the stream. 621 622 Line separators are not added, so it is usual for each of the lines 623 provided to have a line separator at the end. 624 """ 625 self._checkClosed() 626 for line in lines: 627 self.write(line) 628 629io.IOBase.register(IOBase) 630 631 632class RawIOBase(IOBase): 633 634 """Base class for raw binary I/O.""" 635 636 # The read() method is implemented by calling readinto(); derived 637 # classes that want to support read() only need to implement 638 # readinto() as a primitive operation. In general, readinto() can be 639 # more efficient than read(). 640 641 # (It would be tempting to also provide an implementation of 642 # readinto() in terms of read(), in case the latter is a more suitable 643 # primitive operation, but that would lead to nasty recursion in case 644 # a subclass doesn't implement either.) 645 646 def read(self, size=-1): 647 """Read and return up to size bytes, where size is an int. 648 649 Returns an empty bytes object on EOF, or None if the object is 650 set not to block and has no data to read. 651 """ 652 if size is None: 653 size = -1 654 if size < 0: 655 return self.readall() 656 b = bytearray(size.__index__()) 657 n = self.readinto(b) 658 if n is None: 659 return None 660 del b[n:] 661 return bytes(b) 662 663 def readall(self): 664 """Read until EOF, using multiple read() call.""" 665 res = bytearray() 666 while True: 667 data = self.read(DEFAULT_BUFFER_SIZE) 668 if not data: 669 break 670 res += data 671 if res: 672 return bytes(res) 673 else: 674 # b'' or None 675 return data 676 677 def readinto(self, b): 678 """Read bytes into a pre-allocated bytes-like object b. 679 680 Returns an int representing the number of bytes read (0 for EOF), or 681 None if the object is set not to block and has no data to read. 682 """ 683 self._unsupported("readinto") 684 685 def write(self, b): 686 """Write the given buffer to the IO stream. 687 688 Returns the number of bytes written, which may be less than the 689 length of b in bytes. 690 """ 691 self._unsupported("write") 692 693io.RawIOBase.register(RawIOBase) 694from _io import FileIO 695RawIOBase.register(FileIO) 696 697 698class BufferedIOBase(IOBase): 699 700 """Base class for buffered IO objects. 701 702 The main difference with RawIOBase is that the read() method 703 supports omitting the size argument, and does not have a default 704 implementation that defers to readinto(). 705 706 In addition, read(), readinto() and write() may raise 707 BlockingIOError if the underlying raw stream is in non-blocking 708 mode and not ready; unlike their raw counterparts, they will never 709 return None. 710 711 A typical implementation should not inherit from a RawIOBase 712 implementation, but wrap one. 713 """ 714 715 def read(self, size=-1): 716 """Read and return up to size bytes, where size is an int. 717 718 If the argument is omitted, None, or negative, reads and 719 returns all data until EOF. 720 721 If the argument is positive, and the underlying raw stream is 722 not 'interactive', multiple raw reads may be issued to satisfy 723 the byte count (unless EOF is reached first). But for 724 interactive raw streams (XXX and for pipes?), at most one raw 725 read will be issued, and a short result does not imply that 726 EOF is imminent. 727 728 Returns an empty bytes array on EOF. 729 730 Raises BlockingIOError if the underlying raw stream has no 731 data at the moment. 732 """ 733 self._unsupported("read") 734 735 def read1(self, size=-1): 736 """Read up to size bytes with at most one read() system call, 737 where size is an int. 738 """ 739 self._unsupported("read1") 740 741 def readinto(self, b): 742 """Read bytes into a pre-allocated bytes-like object b. 743 744 Like read(), this may issue multiple reads to the underlying raw 745 stream, unless the latter is 'interactive'. 746 747 Returns an int representing the number of bytes read (0 for EOF). 748 749 Raises BlockingIOError if the underlying raw stream has no 750 data at the moment. 751 """ 752 753 return self._readinto(b, read1=False) 754 755 def readinto1(self, b): 756 """Read bytes into buffer *b*, using at most one system call 757 758 Returns an int representing the number of bytes read (0 for EOF). 759 760 Raises BlockingIOError if the underlying raw stream has no 761 data at the moment. 762 """ 763 764 return self._readinto(b, read1=True) 765 766 def _readinto(self, b, read1): 767 if not isinstance(b, memoryview): 768 b = memoryview(b) 769 b = b.cast('B') 770 771 if read1: 772 data = self.read1(len(b)) 773 else: 774 data = self.read(len(b)) 775 n = len(data) 776 777 b[:n] = data 778 779 return n 780 781 def write(self, b): 782 """Write the given bytes buffer to the IO stream. 783 784 Return the number of bytes written, which is always the length of b 785 in bytes. 786 787 Raises BlockingIOError if the buffer is full and the 788 underlying raw stream cannot accept more data at the moment. 789 """ 790 self._unsupported("write") 791 792 def detach(self): 793 """ 794 Separate the underlying raw stream from the buffer and return it. 795 796 After the raw stream has been detached, the buffer is in an unusable 797 state. 798 """ 799 self._unsupported("detach") 800 801io.BufferedIOBase.register(BufferedIOBase) 802 803 804class _BufferedIOMixin(BufferedIOBase): 805 806 """A mixin implementation of BufferedIOBase with an underlying raw stream. 807 808 This passes most requests on to the underlying raw stream. It 809 does *not* provide implementations of read(), readinto() or 810 write(). 811 """ 812 813 def __init__(self, raw): 814 self._raw = raw 815 816 ### Positioning ### 817 818 def seek(self, pos, whence=0): 819 new_position = self.raw.seek(pos, whence) 820 if new_position < 0: 821 raise OSError("seek() returned an invalid position") 822 return new_position 823 824 def tell(self): 825 pos = self.raw.tell() 826 if pos < 0: 827 raise OSError("tell() returned an invalid position") 828 return pos 829 830 def truncate(self, pos=None): 831 self._checkClosed() 832 self._checkWritable() 833 834 # Flush the stream. We're mixing buffered I/O with lower-level I/O, 835 # and a flush may be necessary to synch both views of the current 836 # file state. 837 self.flush() 838 839 if pos is None: 840 pos = self.tell() 841 # XXX: Should seek() be used, instead of passing the position 842 # XXX directly to truncate? 843 return self.raw.truncate(pos) 844 845 ### Flush and close ### 846 847 def flush(self): 848 if self.closed: 849 raise ValueError("flush on closed file") 850 self.raw.flush() 851 852 def close(self): 853 if self.raw is not None and not self.closed: 854 try: 855 # may raise BlockingIOError or BrokenPipeError etc 856 self.flush() 857 finally: 858 self.raw.close() 859 860 def detach(self): 861 if self.raw is None: 862 raise ValueError("raw stream already detached") 863 self.flush() 864 raw = self._raw 865 self._raw = None 866 return raw 867 868 ### Inquiries ### 869 870 def seekable(self): 871 return self.raw.seekable() 872 873 @property 874 def raw(self): 875 return self._raw 876 877 @property 878 def closed(self): 879 return self.raw.closed 880 881 @property 882 def name(self): 883 return self.raw.name 884 885 @property 886 def mode(self): 887 return self.raw.mode 888 889 def __getstate__(self): 890 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 891 892 def __repr__(self): 893 modname = self.__class__.__module__ 894 clsname = self.__class__.__qualname__ 895 try: 896 name = self.name 897 except AttributeError: 898 return "<{}.{}>".format(modname, clsname) 899 else: 900 return "<{}.{} name={!r}>".format(modname, clsname, name) 901 902 ### Lower-level APIs ### 903 904 def fileno(self): 905 return self.raw.fileno() 906 907 def isatty(self): 908 return self.raw.isatty() 909 910 911class BytesIO(BufferedIOBase): 912 913 """Buffered I/O implementation using an in-memory bytes buffer.""" 914 915 # Initialize _buffer as soon as possible since it's used by __del__() 916 # which calls close() 917 _buffer = None 918 919 def __init__(self, initial_bytes=None): 920 buf = bytearray() 921 if initial_bytes is not None: 922 buf += initial_bytes 923 self._buffer = buf 924 self._pos = 0 925 926 def __getstate__(self): 927 if self.closed: 928 raise ValueError("__getstate__ on closed file") 929 return self.__dict__.copy() 930 931 def getvalue(self): 932 """Return the bytes value (contents) of the buffer 933 """ 934 if self.closed: 935 raise ValueError("getvalue on closed file") 936 return bytes(self._buffer) 937 938 def getbuffer(self): 939 """Return a readable and writable view of the buffer. 940 """ 941 if self.closed: 942 raise ValueError("getbuffer on closed file") 943 return memoryview(self._buffer) 944 945 def close(self): 946 if self._buffer is not None: 947 self._buffer.clear() 948 super().close() 949 950 def read(self, size=-1): 951 if self.closed: 952 raise ValueError("read from closed file") 953 if size is None: 954 size = -1 955 else: 956 try: 957 size_index = size.__index__ 958 except AttributeError: 959 raise TypeError(f"{size!r} is not an integer") 960 else: 961 size = size_index() 962 if size < 0: 963 size = len(self._buffer) 964 if len(self._buffer) <= self._pos: 965 return b"" 966 newpos = min(len(self._buffer), self._pos + size) 967 b = self._buffer[self._pos : newpos] 968 self._pos = newpos 969 return bytes(b) 970 971 def read1(self, size=-1): 972 """This is the same as read. 973 """ 974 return self.read(size) 975 976 def write(self, b): 977 if self.closed: 978 raise ValueError("write to closed file") 979 if isinstance(b, str): 980 raise TypeError("can't write str to binary stream") 981 with memoryview(b) as view: 982 n = view.nbytes # Size of any bytes-like object 983 if n == 0: 984 return 0 985 pos = self._pos 986 if pos > len(self._buffer): 987 # Inserts null bytes between the current end of the file 988 # and the new write position. 989 padding = b'\x00' * (pos - len(self._buffer)) 990 self._buffer += padding 991 self._buffer[pos:pos + n] = b 992 self._pos += n 993 return n 994 995 def seek(self, pos, whence=0): 996 if self.closed: 997 raise ValueError("seek on closed file") 998 try: 999 pos_index = pos.__index__ 1000 except AttributeError: 1001 raise TypeError(f"{pos!r} is not an integer") 1002 else: 1003 pos = pos_index() 1004 if whence == 0: 1005 if pos < 0: 1006 raise ValueError("negative seek position %r" % (pos,)) 1007 self._pos = pos 1008 elif whence == 1: 1009 self._pos = max(0, self._pos + pos) 1010 elif whence == 2: 1011 self._pos = max(0, len(self._buffer) + pos) 1012 else: 1013 raise ValueError("unsupported whence value") 1014 return self._pos 1015 1016 def tell(self): 1017 if self.closed: 1018 raise ValueError("tell on closed file") 1019 return self._pos 1020 1021 def truncate(self, pos=None): 1022 if self.closed: 1023 raise ValueError("truncate on closed file") 1024 if pos is None: 1025 pos = self._pos 1026 else: 1027 try: 1028 pos_index = pos.__index__ 1029 except AttributeError: 1030 raise TypeError(f"{pos!r} is not an integer") 1031 else: 1032 pos = pos_index() 1033 if pos < 0: 1034 raise ValueError("negative truncate position %r" % (pos,)) 1035 del self._buffer[pos:] 1036 return pos 1037 1038 def readable(self): 1039 if self.closed: 1040 raise ValueError("I/O operation on closed file.") 1041 return True 1042 1043 def writable(self): 1044 if self.closed: 1045 raise ValueError("I/O operation on closed file.") 1046 return True 1047 1048 def seekable(self): 1049 if self.closed: 1050 raise ValueError("I/O operation on closed file.") 1051 return True 1052 1053 1054class BufferedReader(_BufferedIOMixin): 1055 1056 """BufferedReader(raw[, buffer_size]) 1057 1058 A buffer for a readable, sequential BaseRawIO object. 1059 1060 The constructor creates a BufferedReader for the given readable raw 1061 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE 1062 is used. 1063 """ 1064 1065 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1066 """Create a new buffered reader using the given readable raw IO object. 1067 """ 1068 if not raw.readable(): 1069 raise OSError('"raw" argument must be readable.') 1070 1071 _BufferedIOMixin.__init__(self, raw) 1072 if buffer_size <= 0: 1073 raise ValueError("invalid buffer size") 1074 self.buffer_size = buffer_size 1075 self._reset_read_buf() 1076 self._read_lock = Lock() 1077 1078 def readable(self): 1079 return self.raw.readable() 1080 1081 def _reset_read_buf(self): 1082 self._read_buf = b"" 1083 self._read_pos = 0 1084 1085 def read(self, size=None): 1086 """Read size bytes. 1087 1088 Returns exactly size bytes of data unless the underlying raw IO 1089 stream reaches EOF or if the call would block in non-blocking 1090 mode. If size is negative, read until EOF or until read() would 1091 block. 1092 """ 1093 if size is not None and size < -1: 1094 raise ValueError("invalid number of bytes to read") 1095 with self._read_lock: 1096 return self._read_unlocked(size) 1097 1098 def _read_unlocked(self, n=None): 1099 nodata_val = b"" 1100 empty_values = (b"", None) 1101 buf = self._read_buf 1102 pos = self._read_pos 1103 1104 # Special case for when the number of bytes to read is unspecified. 1105 if n is None or n == -1: 1106 self._reset_read_buf() 1107 if hasattr(self.raw, 'readall'): 1108 chunk = self.raw.readall() 1109 if chunk is None: 1110 return buf[pos:] or None 1111 else: 1112 return buf[pos:] + chunk 1113 chunks = [buf[pos:]] # Strip the consumed bytes. 1114 current_size = 0 1115 while True: 1116 # Read until EOF or until read() would block. 1117 chunk = self.raw.read() 1118 if chunk in empty_values: 1119 nodata_val = chunk 1120 break 1121 current_size += len(chunk) 1122 chunks.append(chunk) 1123 return b"".join(chunks) or nodata_val 1124 1125 # The number of bytes to read is specified, return at most n bytes. 1126 avail = len(buf) - pos # Length of the available buffered data. 1127 if n <= avail: 1128 # Fast path: the data to read is fully buffered. 1129 self._read_pos += n 1130 return buf[pos:pos+n] 1131 # Slow path: read from the stream until enough bytes are read, 1132 # or until an EOF occurs or until read() would block. 1133 chunks = [buf[pos:]] 1134 wanted = max(self.buffer_size, n) 1135 while avail < n: 1136 chunk = self.raw.read(wanted) 1137 if chunk in empty_values: 1138 nodata_val = chunk 1139 break 1140 avail += len(chunk) 1141 chunks.append(chunk) 1142 # n is more than avail only when an EOF occurred or when 1143 # read() would have blocked. 1144 n = min(n, avail) 1145 out = b"".join(chunks) 1146 self._read_buf = out[n:] # Save the extra data in the buffer. 1147 self._read_pos = 0 1148 return out[:n] if out else nodata_val 1149 1150 def peek(self, size=0): 1151 """Returns buffered bytes without advancing the position. 1152 1153 The argument indicates a desired minimal number of bytes; we 1154 do at most one raw read to satisfy it. We never return more 1155 than self.buffer_size. 1156 """ 1157 with self._read_lock: 1158 return self._peek_unlocked(size) 1159 1160 def _peek_unlocked(self, n=0): 1161 want = min(n, self.buffer_size) 1162 have = len(self._read_buf) - self._read_pos 1163 if have < want or have <= 0: 1164 to_read = self.buffer_size - have 1165 current = self.raw.read(to_read) 1166 if current: 1167 self._read_buf = self._read_buf[self._read_pos:] + current 1168 self._read_pos = 0 1169 return self._read_buf[self._read_pos:] 1170 1171 def read1(self, size=-1): 1172 """Reads up to size bytes, with at most one read() system call.""" 1173 # Returns up to size bytes. If at least one byte is buffered, we 1174 # only return buffered bytes. Otherwise, we do one raw read. 1175 if size < 0: 1176 size = self.buffer_size 1177 if size == 0: 1178 return b"" 1179 with self._read_lock: 1180 self._peek_unlocked(1) 1181 return self._read_unlocked( 1182 min(size, len(self._read_buf) - self._read_pos)) 1183 1184 # Implementing readinto() and readinto1() is not strictly necessary (we 1185 # could rely on the base class that provides an implementation in terms of 1186 # read() and read1()). We do it anyway to keep the _pyio implementation 1187 # similar to the io implementation (which implements the methods for 1188 # performance reasons). 1189 def _readinto(self, buf, read1): 1190 """Read data into *buf* with at most one system call.""" 1191 1192 # Need to create a memoryview object of type 'b', otherwise 1193 # we may not be able to assign bytes to it, and slicing it 1194 # would create a new object. 1195 if not isinstance(buf, memoryview): 1196 buf = memoryview(buf) 1197 if buf.nbytes == 0: 1198 return 0 1199 buf = buf.cast('B') 1200 1201 written = 0 1202 with self._read_lock: 1203 while written < len(buf): 1204 1205 # First try to read from internal buffer 1206 avail = min(len(self._read_buf) - self._read_pos, len(buf)) 1207 if avail: 1208 buf[written:written+avail] = \ 1209 self._read_buf[self._read_pos:self._read_pos+avail] 1210 self._read_pos += avail 1211 written += avail 1212 if written == len(buf): 1213 break 1214 1215 # If remaining space in callers buffer is larger than 1216 # internal buffer, read directly into callers buffer 1217 if len(buf) - written > self.buffer_size: 1218 n = self.raw.readinto(buf[written:]) 1219 if not n: 1220 break # eof 1221 written += n 1222 1223 # Otherwise refill internal buffer - unless we're 1224 # in read1 mode and already got some data 1225 elif not (read1 and written): 1226 if not self._peek_unlocked(1): 1227 break # eof 1228 1229 # In readinto1 mode, return as soon as we have some data 1230 if read1 and written: 1231 break 1232 1233 return written 1234 1235 def tell(self): 1236 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos 1237 1238 def seek(self, pos, whence=0): 1239 if whence not in valid_seek_flags: 1240 raise ValueError("invalid whence value") 1241 with self._read_lock: 1242 if whence == 1: 1243 pos -= len(self._read_buf) - self._read_pos 1244 pos = _BufferedIOMixin.seek(self, pos, whence) 1245 self._reset_read_buf() 1246 return pos 1247 1248class BufferedWriter(_BufferedIOMixin): 1249 1250 """A buffer for a writeable sequential RawIO object. 1251 1252 The constructor creates a BufferedWriter for the given writeable raw 1253 stream. If the buffer_size is not given, it defaults to 1254 DEFAULT_BUFFER_SIZE. 1255 """ 1256 1257 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1258 if not raw.writable(): 1259 raise OSError('"raw" argument must be writable.') 1260 1261 _BufferedIOMixin.__init__(self, raw) 1262 if buffer_size <= 0: 1263 raise ValueError("invalid buffer size") 1264 self.buffer_size = buffer_size 1265 self._write_buf = bytearray() 1266 self._write_lock = Lock() 1267 1268 def writable(self): 1269 return self.raw.writable() 1270 1271 def write(self, b): 1272 if isinstance(b, str): 1273 raise TypeError("can't write str to binary stream") 1274 with self._write_lock: 1275 if self.closed: 1276 raise ValueError("write to closed file") 1277 # XXX we can implement some more tricks to try and avoid 1278 # partial writes 1279 if len(self._write_buf) > self.buffer_size: 1280 # We're full, so let's pre-flush the buffer. (This may 1281 # raise BlockingIOError with characters_written == 0.) 1282 self._flush_unlocked() 1283 before = len(self._write_buf) 1284 self._write_buf.extend(b) 1285 written = len(self._write_buf) - before 1286 if len(self._write_buf) > self.buffer_size: 1287 try: 1288 self._flush_unlocked() 1289 except BlockingIOError as e: 1290 if len(self._write_buf) > self.buffer_size: 1291 # We've hit the buffer_size. We have to accept a partial 1292 # write and cut back our buffer. 1293 overage = len(self._write_buf) - self.buffer_size 1294 written -= overage 1295 self._write_buf = self._write_buf[:self.buffer_size] 1296 raise BlockingIOError(e.errno, e.strerror, written) 1297 return written 1298 1299 def truncate(self, pos=None): 1300 with self._write_lock: 1301 self._flush_unlocked() 1302 if pos is None: 1303 pos = self.raw.tell() 1304 return self.raw.truncate(pos) 1305 1306 def flush(self): 1307 with self._write_lock: 1308 self._flush_unlocked() 1309 1310 def _flush_unlocked(self): 1311 if self.closed: 1312 raise ValueError("flush on closed file") 1313 while self._write_buf: 1314 try: 1315 n = self.raw.write(self._write_buf) 1316 except BlockingIOError: 1317 raise RuntimeError("self.raw should implement RawIOBase: it " 1318 "should not raise BlockingIOError") 1319 if n is None: 1320 raise BlockingIOError( 1321 errno.EAGAIN, 1322 "write could not complete without blocking", 0) 1323 if n > len(self._write_buf) or n < 0: 1324 raise OSError("write() returned incorrect number of bytes") 1325 del self._write_buf[:n] 1326 1327 def tell(self): 1328 return _BufferedIOMixin.tell(self) + len(self._write_buf) 1329 1330 def seek(self, pos, whence=0): 1331 if whence not in valid_seek_flags: 1332 raise ValueError("invalid whence value") 1333 with self._write_lock: 1334 self._flush_unlocked() 1335 return _BufferedIOMixin.seek(self, pos, whence) 1336 1337 def close(self): 1338 with self._write_lock: 1339 if self.raw is None or self.closed: 1340 return 1341 # We have to release the lock and call self.flush() (which will 1342 # probably just re-take the lock) in case flush has been overridden in 1343 # a subclass or the user set self.flush to something. This is the same 1344 # behavior as the C implementation. 1345 try: 1346 # may raise BlockingIOError or BrokenPipeError etc 1347 self.flush() 1348 finally: 1349 with self._write_lock: 1350 self.raw.close() 1351 1352 1353class BufferedRWPair(BufferedIOBase): 1354 1355 """A buffered reader and writer object together. 1356 1357 A buffered reader object and buffered writer object put together to 1358 form a sequential IO object that can read and write. This is typically 1359 used with a socket or two-way pipe. 1360 1361 reader and writer are RawIOBase objects that are readable and 1362 writeable respectively. If the buffer_size is omitted it defaults to 1363 DEFAULT_BUFFER_SIZE. 1364 """ 1365 1366 # XXX The usefulness of this (compared to having two separate IO 1367 # objects) is questionable. 1368 1369 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE): 1370 """Constructor. 1371 1372 The arguments are two RawIO instances. 1373 """ 1374 if not reader.readable(): 1375 raise OSError('"reader" argument must be readable.') 1376 1377 if not writer.writable(): 1378 raise OSError('"writer" argument must be writable.') 1379 1380 self.reader = BufferedReader(reader, buffer_size) 1381 self.writer = BufferedWriter(writer, buffer_size) 1382 1383 def read(self, size=-1): 1384 if size is None: 1385 size = -1 1386 return self.reader.read(size) 1387 1388 def readinto(self, b): 1389 return self.reader.readinto(b) 1390 1391 def write(self, b): 1392 return self.writer.write(b) 1393 1394 def peek(self, size=0): 1395 return self.reader.peek(size) 1396 1397 def read1(self, size=-1): 1398 return self.reader.read1(size) 1399 1400 def readinto1(self, b): 1401 return self.reader.readinto1(b) 1402 1403 def readable(self): 1404 return self.reader.readable() 1405 1406 def writable(self): 1407 return self.writer.writable() 1408 1409 def flush(self): 1410 return self.writer.flush() 1411 1412 def close(self): 1413 try: 1414 self.writer.close() 1415 finally: 1416 self.reader.close() 1417 1418 def isatty(self): 1419 return self.reader.isatty() or self.writer.isatty() 1420 1421 @property 1422 def closed(self): 1423 return self.writer.closed 1424 1425 1426class BufferedRandom(BufferedWriter, BufferedReader): 1427 1428 """A buffered interface to random access streams. 1429 1430 The constructor creates a reader and writer for a seekable stream, 1431 raw, given in the first argument. If the buffer_size is omitted it 1432 defaults to DEFAULT_BUFFER_SIZE. 1433 """ 1434 1435 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1436 raw._checkSeekable() 1437 BufferedReader.__init__(self, raw, buffer_size) 1438 BufferedWriter.__init__(self, raw, buffer_size) 1439 1440 def seek(self, pos, whence=0): 1441 if whence not in valid_seek_flags: 1442 raise ValueError("invalid whence value") 1443 self.flush() 1444 if self._read_buf: 1445 # Undo read ahead. 1446 with self._read_lock: 1447 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1448 # First do the raw seek, then empty the read buffer, so that 1449 # if the raw seek fails, we don't lose buffered data forever. 1450 pos = self.raw.seek(pos, whence) 1451 with self._read_lock: 1452 self._reset_read_buf() 1453 if pos < 0: 1454 raise OSError("seek() returned invalid position") 1455 return pos 1456 1457 def tell(self): 1458 if self._write_buf: 1459 return BufferedWriter.tell(self) 1460 else: 1461 return BufferedReader.tell(self) 1462 1463 def truncate(self, pos=None): 1464 if pos is None: 1465 pos = self.tell() 1466 # Use seek to flush the read buffer. 1467 return BufferedWriter.truncate(self, pos) 1468 1469 def read(self, size=None): 1470 if size is None: 1471 size = -1 1472 self.flush() 1473 return BufferedReader.read(self, size) 1474 1475 def readinto(self, b): 1476 self.flush() 1477 return BufferedReader.readinto(self, b) 1478 1479 def peek(self, size=0): 1480 self.flush() 1481 return BufferedReader.peek(self, size) 1482 1483 def read1(self, size=-1): 1484 self.flush() 1485 return BufferedReader.read1(self, size) 1486 1487 def readinto1(self, b): 1488 self.flush() 1489 return BufferedReader.readinto1(self, b) 1490 1491 def write(self, b): 1492 if self._read_buf: 1493 # Undo readahead 1494 with self._read_lock: 1495 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1496 self._reset_read_buf() 1497 return BufferedWriter.write(self, b) 1498 1499 1500class FileIO(RawIOBase): 1501 _fd = -1 1502 _created = False 1503 _readable = False 1504 _writable = False 1505 _appending = False 1506 _seekable = None 1507 _closefd = True 1508 1509 def __init__(self, file, mode='r', closefd=True, opener=None): 1510 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, 1511 writing, exclusive creation or appending. The file will be created if it 1512 doesn't exist when opened for writing or appending; it will be truncated 1513 when opened for writing. A FileExistsError will be raised if it already 1514 exists when opened for creating. Opening a file for creating implies 1515 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode 1516 to allow simultaneous reading and writing. A custom opener can be used by 1517 passing a callable as *opener*. The underlying file descriptor for the file 1518 object is then obtained by calling opener with (*name*, *flags*). 1519 *opener* must return an open file descriptor (passing os.open as *opener* 1520 results in functionality similar to passing None). 1521 """ 1522 if self._fd >= 0: 1523 # Have to close the existing file first. 1524 try: 1525 if self._closefd: 1526 os.close(self._fd) 1527 finally: 1528 self._fd = -1 1529 1530 if isinstance(file, float): 1531 raise TypeError('integer argument expected, got float') 1532 if isinstance(file, int): 1533 fd = file 1534 if fd < 0: 1535 raise ValueError('negative file descriptor') 1536 else: 1537 fd = -1 1538 1539 if not isinstance(mode, str): 1540 raise TypeError('invalid mode: %s' % (mode,)) 1541 if not set(mode) <= set('xrwab+'): 1542 raise ValueError('invalid mode: %s' % (mode,)) 1543 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1: 1544 raise ValueError('Must have exactly one of create/read/write/append ' 1545 'mode and at most one plus') 1546 1547 if 'x' in mode: 1548 self._created = True 1549 self._writable = True 1550 flags = os.O_EXCL | os.O_CREAT 1551 elif 'r' in mode: 1552 self._readable = True 1553 flags = 0 1554 elif 'w' in mode: 1555 self._writable = True 1556 flags = os.O_CREAT | os.O_TRUNC 1557 elif 'a' in mode: 1558 self._writable = True 1559 self._appending = True 1560 flags = os.O_APPEND | os.O_CREAT 1561 1562 if '+' in mode: 1563 self._readable = True 1564 self._writable = True 1565 1566 if self._readable and self._writable: 1567 flags |= os.O_RDWR 1568 elif self._readable: 1569 flags |= os.O_RDONLY 1570 else: 1571 flags |= os.O_WRONLY 1572 1573 flags |= getattr(os, 'O_BINARY', 0) 1574 1575 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or 1576 getattr(os, 'O_CLOEXEC', 0)) 1577 flags |= noinherit_flag 1578 1579 owned_fd = None 1580 try: 1581 if fd < 0: 1582 if not closefd: 1583 raise ValueError('Cannot use closefd=False with file name') 1584 if opener is None: 1585 fd = os.open(file, flags, 0o666) 1586 else: 1587 fd = opener(file, flags) 1588 if not isinstance(fd, int): 1589 raise TypeError('expected integer from opener') 1590 if fd < 0: 1591 raise OSError('Negative file descriptor') 1592 owned_fd = fd 1593 if not noinherit_flag: 1594 os.set_inheritable(fd, False) 1595 1596 self._closefd = closefd 1597 fdfstat = os.fstat(fd) 1598 try: 1599 if stat.S_ISDIR(fdfstat.st_mode): 1600 raise IsADirectoryError(errno.EISDIR, 1601 os.strerror(errno.EISDIR), file) 1602 except AttributeError: 1603 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR 1604 # don't exist. 1605 pass 1606 self._blksize = getattr(fdfstat, 'st_blksize', 0) 1607 if self._blksize <= 1: 1608 self._blksize = DEFAULT_BUFFER_SIZE 1609 1610 if _setmode: 1611 # don't translate newlines (\r\n <=> \n) 1612 _setmode(fd, os.O_BINARY) 1613 1614 self.name = file 1615 if self._appending: 1616 # For consistent behaviour, we explicitly seek to the 1617 # end of file (otherwise, it might be done only on the 1618 # first write()). 1619 try: 1620 os.lseek(fd, 0, SEEK_END) 1621 except OSError as e: 1622 if e.errno != errno.ESPIPE: 1623 raise 1624 except: 1625 if owned_fd is not None: 1626 os.close(owned_fd) 1627 raise 1628 self._fd = fd 1629 1630 def __del__(self): 1631 if self._fd >= 0 and self._closefd and not self.closed: 1632 import warnings 1633 warnings.warn('unclosed file %r' % (self,), ResourceWarning, 1634 stacklevel=2, source=self) 1635 self.close() 1636 1637 def __getstate__(self): 1638 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 1639 1640 def __repr__(self): 1641 class_name = '%s.%s' % (self.__class__.__module__, 1642 self.__class__.__qualname__) 1643 if self.closed: 1644 return '<%s [closed]>' % class_name 1645 try: 1646 name = self.name 1647 except AttributeError: 1648 return ('<%s fd=%d mode=%r closefd=%r>' % 1649 (class_name, self._fd, self.mode, self._closefd)) 1650 else: 1651 return ('<%s name=%r mode=%r closefd=%r>' % 1652 (class_name, name, self.mode, self._closefd)) 1653 1654 def _checkReadable(self): 1655 if not self._readable: 1656 raise UnsupportedOperation('File not open for reading') 1657 1658 def _checkWritable(self, msg=None): 1659 if not self._writable: 1660 raise UnsupportedOperation('File not open for writing') 1661 1662 def read(self, size=None): 1663 """Read at most size bytes, returned as bytes. 1664 1665 Only makes one system call, so less data may be returned than requested 1666 In non-blocking mode, returns None if no data is available. 1667 Return an empty bytes object at EOF. 1668 """ 1669 self._checkClosed() 1670 self._checkReadable() 1671 if size is None or size < 0: 1672 return self.readall() 1673 try: 1674 return os.read(self._fd, size) 1675 except BlockingIOError: 1676 return None 1677 1678 def readall(self): 1679 """Read all data from the file, returned as bytes. 1680 1681 In non-blocking mode, returns as much as is immediately available, 1682 or None if no data is available. Return an empty bytes object at EOF. 1683 """ 1684 self._checkClosed() 1685 self._checkReadable() 1686 bufsize = DEFAULT_BUFFER_SIZE 1687 try: 1688 pos = os.lseek(self._fd, 0, SEEK_CUR) 1689 end = os.fstat(self._fd).st_size 1690 if end >= pos: 1691 bufsize = end - pos + 1 1692 except OSError: 1693 pass 1694 1695 result = bytearray() 1696 while True: 1697 if len(result) >= bufsize: 1698 bufsize = len(result) 1699 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) 1700 n = bufsize - len(result) 1701 try: 1702 chunk = os.read(self._fd, n) 1703 except BlockingIOError: 1704 if result: 1705 break 1706 return None 1707 if not chunk: # reached the end of the file 1708 break 1709 result += chunk 1710 1711 return bytes(result) 1712 1713 def readinto(self, b): 1714 """Same as RawIOBase.readinto().""" 1715 m = memoryview(b).cast('B') 1716 data = self.read(len(m)) 1717 n = len(data) 1718 m[:n] = data 1719 return n 1720 1721 def write(self, b): 1722 """Write bytes b to file, return number written. 1723 1724 Only makes one system call, so not all of the data may be written. 1725 The number of bytes actually written is returned. In non-blocking mode, 1726 returns None if the write would block. 1727 """ 1728 self._checkClosed() 1729 self._checkWritable() 1730 try: 1731 return os.write(self._fd, b) 1732 except BlockingIOError: 1733 return None 1734 1735 def seek(self, pos, whence=SEEK_SET): 1736 """Move to new file position. 1737 1738 Argument offset is a byte count. Optional argument whence defaults to 1739 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values 1740 are SEEK_CUR or 1 (move relative to current position, positive or negative), 1741 and SEEK_END or 2 (move relative to end of file, usually negative, although 1742 many platforms allow seeking beyond the end of a file). 1743 1744 Note that not all file objects are seekable. 1745 """ 1746 if isinstance(pos, float): 1747 raise TypeError('an integer is required') 1748 self._checkClosed() 1749 return os.lseek(self._fd, pos, whence) 1750 1751 def tell(self): 1752 """tell() -> int. Current file position. 1753 1754 Can raise OSError for non seekable files.""" 1755 self._checkClosed() 1756 return os.lseek(self._fd, 0, SEEK_CUR) 1757 1758 def truncate(self, size=None): 1759 """Truncate the file to at most size bytes. 1760 1761 Size defaults to the current file position, as returned by tell(). 1762 The current file position is changed to the value of size. 1763 """ 1764 self._checkClosed() 1765 self._checkWritable() 1766 if size is None: 1767 size = self.tell() 1768 os.ftruncate(self._fd, size) 1769 return size 1770 1771 def close(self): 1772 """Close the file. 1773 1774 A closed file cannot be used for further I/O operations. close() may be 1775 called more than once without error. 1776 """ 1777 if not self.closed: 1778 try: 1779 if self._closefd: 1780 os.close(self._fd) 1781 finally: 1782 super().close() 1783 1784 def seekable(self): 1785 """True if file supports random-access.""" 1786 self._checkClosed() 1787 if self._seekable is None: 1788 try: 1789 self.tell() 1790 except OSError: 1791 self._seekable = False 1792 else: 1793 self._seekable = True 1794 return self._seekable 1795 1796 def readable(self): 1797 """True if file was opened in a read mode.""" 1798 self._checkClosed() 1799 return self._readable 1800 1801 def writable(self): 1802 """True if file was opened in a write mode.""" 1803 self._checkClosed() 1804 return self._writable 1805 1806 def fileno(self): 1807 """Return the underlying file descriptor (an integer).""" 1808 self._checkClosed() 1809 return self._fd 1810 1811 def isatty(self): 1812 """True if the file is connected to a TTY device.""" 1813 self._checkClosed() 1814 return os.isatty(self._fd) 1815 1816 @property 1817 def closefd(self): 1818 """True if the file descriptor will be closed by close().""" 1819 return self._closefd 1820 1821 @property 1822 def mode(self): 1823 """String giving the file mode""" 1824 if self._created: 1825 if self._readable: 1826 return 'xb+' 1827 else: 1828 return 'xb' 1829 elif self._appending: 1830 if self._readable: 1831 return 'ab+' 1832 else: 1833 return 'ab' 1834 elif self._readable: 1835 if self._writable: 1836 return 'rb+' 1837 else: 1838 return 'rb' 1839 else: 1840 return 'wb' 1841 1842 1843class TextIOBase(IOBase): 1844 1845 """Base class for text I/O. 1846 1847 This class provides a character and line based interface to stream 1848 I/O. There is no public constructor. 1849 """ 1850 1851 def read(self, size=-1): 1852 """Read at most size characters from stream, where size is an int. 1853 1854 Read from underlying buffer until we have size characters or we hit EOF. 1855 If size is negative or omitted, read until EOF. 1856 1857 Returns a string. 1858 """ 1859 self._unsupported("read") 1860 1861 def write(self, s): 1862 """Write string s to stream and returning an int.""" 1863 self._unsupported("write") 1864 1865 def truncate(self, pos=None): 1866 """Truncate size to pos, where pos is an int.""" 1867 self._unsupported("truncate") 1868 1869 def readline(self): 1870 """Read until newline or EOF. 1871 1872 Returns an empty string if EOF is hit immediately. 1873 """ 1874 self._unsupported("readline") 1875 1876 def detach(self): 1877 """ 1878 Separate the underlying buffer from the TextIOBase and return it. 1879 1880 After the underlying buffer has been detached, the TextIO is in an 1881 unusable state. 1882 """ 1883 self._unsupported("detach") 1884 1885 @property 1886 def encoding(self): 1887 """Subclasses should override.""" 1888 return None 1889 1890 @property 1891 def newlines(self): 1892 """Line endings translated so far. 1893 1894 Only line endings translated during reading are considered. 1895 1896 Subclasses should override. 1897 """ 1898 return None 1899 1900 @property 1901 def errors(self): 1902 """Error setting of the decoder or encoder. 1903 1904 Subclasses should override.""" 1905 return None 1906 1907io.TextIOBase.register(TextIOBase) 1908 1909 1910class IncrementalNewlineDecoder(codecs.IncrementalDecoder): 1911 r"""Codec used when reading a file in universal newlines mode. It wraps 1912 another incremental decoder, translating \r\n and \r into \n. It also 1913 records the types of newlines encountered. When used with 1914 translate=False, it ensures that the newline sequence is returned in 1915 one piece. 1916 """ 1917 def __init__(self, decoder, translate, errors='strict'): 1918 codecs.IncrementalDecoder.__init__(self, errors=errors) 1919 self.translate = translate 1920 self.decoder = decoder 1921 self.seennl = 0 1922 self.pendingcr = False 1923 1924 def decode(self, input, final=False): 1925 # decode input (with the eventual \r from a previous pass) 1926 if self.decoder is None: 1927 output = input 1928 else: 1929 output = self.decoder.decode(input, final=final) 1930 if self.pendingcr and (output or final): 1931 output = "\r" + output 1932 self.pendingcr = False 1933 1934 # retain last \r even when not translating data: 1935 # then readline() is sure to get \r\n in one pass 1936 if output.endswith("\r") and not final: 1937 output = output[:-1] 1938 self.pendingcr = True 1939 1940 # Record which newlines are read 1941 crlf = output.count('\r\n') 1942 cr = output.count('\r') - crlf 1943 lf = output.count('\n') - crlf 1944 self.seennl |= (lf and self._LF) | (cr and self._CR) \ 1945 | (crlf and self._CRLF) 1946 1947 if self.translate: 1948 if crlf: 1949 output = output.replace("\r\n", "\n") 1950 if cr: 1951 output = output.replace("\r", "\n") 1952 1953 return output 1954 1955 def getstate(self): 1956 if self.decoder is None: 1957 buf = b"" 1958 flag = 0 1959 else: 1960 buf, flag = self.decoder.getstate() 1961 flag <<= 1 1962 if self.pendingcr: 1963 flag |= 1 1964 return buf, flag 1965 1966 def setstate(self, state): 1967 buf, flag = state 1968 self.pendingcr = bool(flag & 1) 1969 if self.decoder is not None: 1970 self.decoder.setstate((buf, flag >> 1)) 1971 1972 def reset(self): 1973 self.seennl = 0 1974 self.pendingcr = False 1975 if self.decoder is not None: 1976 self.decoder.reset() 1977 1978 _LF = 1 1979 _CR = 2 1980 _CRLF = 4 1981 1982 @property 1983 def newlines(self): 1984 return (None, 1985 "\n", 1986 "\r", 1987 ("\r", "\n"), 1988 "\r\n", 1989 ("\n", "\r\n"), 1990 ("\r", "\r\n"), 1991 ("\r", "\n", "\r\n") 1992 )[self.seennl] 1993 1994 1995class TextIOWrapper(TextIOBase): 1996 1997 r"""Character and line based layer over a BufferedIOBase object, buffer. 1998 1999 encoding gives the name of the encoding that the stream will be 2000 decoded or encoded with. It defaults to locale.getpreferredencoding(False). 2001 2002 errors determines the strictness of encoding and decoding (see the 2003 codecs.register) and defaults to "strict". 2004 2005 newline can be None, '', '\n', '\r', or '\r\n'. It controls the 2006 handling of line endings. If it is None, universal newlines is 2007 enabled. With this enabled, on input, the lines endings '\n', '\r', 2008 or '\r\n' are translated to '\n' before being returned to the 2009 caller. Conversely, on output, '\n' is translated to the system 2010 default line separator, os.linesep. If newline is any other of its 2011 legal values, that newline becomes the newline when the file is read 2012 and it is returned untranslated. On output, '\n' is converted to the 2013 newline. 2014 2015 If line_buffering is True, a call to flush is implied when a call to 2016 write contains a newline character. 2017 """ 2018 2019 _CHUNK_SIZE = 2048 2020 2021 # Initialize _buffer as soon as possible since it's used by __del__() 2022 # which calls close() 2023 _buffer = None 2024 2025 # The write_through argument has no effect here since this 2026 # implementation always writes through. The argument is present only 2027 # so that the signature can match the signature of the C version. 2028 def __init__(self, buffer, encoding=None, errors=None, newline=None, 2029 line_buffering=False, write_through=False): 2030 self._check_newline(newline) 2031 encoding = text_encoding(encoding) 2032 2033 if encoding == "locale": 2034 try: 2035 encoding = os.device_encoding(buffer.fileno()) or "locale" 2036 except (AttributeError, UnsupportedOperation): 2037 pass 2038 2039 if encoding == "locale": 2040 try: 2041 import locale 2042 except ImportError: 2043 # Importing locale may fail if Python is being built 2044 encoding = "utf-8" 2045 else: 2046 encoding = locale.getpreferredencoding(False) 2047 2048 if not isinstance(encoding, str): 2049 raise ValueError("invalid encoding: %r" % encoding) 2050 2051 if not codecs.lookup(encoding)._is_text_encoding: 2052 msg = ("%r is not a text encoding; " 2053 "use codecs.open() to handle arbitrary codecs") 2054 raise LookupError(msg % encoding) 2055 2056 if errors is None: 2057 errors = "strict" 2058 else: 2059 if not isinstance(errors, str): 2060 raise ValueError("invalid errors: %r" % errors) 2061 if _CHECK_ERRORS: 2062 codecs.lookup_error(errors) 2063 2064 self._buffer = buffer 2065 self._decoded_chars = '' # buffer for text returned from decoder 2066 self._decoded_chars_used = 0 # offset into _decoded_chars for read() 2067 self._snapshot = None # info for reconstructing decoder state 2068 self._seekable = self._telling = self.buffer.seekable() 2069 self._has_read1 = hasattr(self.buffer, 'read1') 2070 self._configure(encoding, errors, newline, 2071 line_buffering, write_through) 2072 2073 def _check_newline(self, newline): 2074 if newline is not None and not isinstance(newline, str): 2075 raise TypeError("illegal newline type: %r" % (type(newline),)) 2076 if newline not in (None, "", "\n", "\r", "\r\n"): 2077 raise ValueError("illegal newline value: %r" % (newline,)) 2078 2079 def _configure(self, encoding=None, errors=None, newline=None, 2080 line_buffering=False, write_through=False): 2081 self._encoding = encoding 2082 self._errors = errors 2083 self._encoder = None 2084 self._decoder = None 2085 self._b2cratio = 0.0 2086 2087 self._readuniversal = not newline 2088 self._readtranslate = newline is None 2089 self._readnl = newline 2090 self._writetranslate = newline != '' 2091 self._writenl = newline or os.linesep 2092 2093 self._line_buffering = line_buffering 2094 self._write_through = write_through 2095 2096 # don't write a BOM in the middle of a file 2097 if self._seekable and self.writable(): 2098 position = self.buffer.tell() 2099 if position != 0: 2100 try: 2101 self._get_encoder().setstate(0) 2102 except LookupError: 2103 # Sometimes the encoder doesn't exist 2104 pass 2105 2106 # self._snapshot is either None, or a tuple (dec_flags, next_input) 2107 # where dec_flags is the second (integer) item of the decoder state 2108 # and next_input is the chunk of input bytes that comes next after the 2109 # snapshot point. We use this to reconstruct decoder states in tell(). 2110 2111 # Naming convention: 2112 # - "bytes_..." for integer variables that count input bytes 2113 # - "chars_..." for integer variables that count decoded characters 2114 2115 def __repr__(self): 2116 result = "<{}.{}".format(self.__class__.__module__, 2117 self.__class__.__qualname__) 2118 try: 2119 name = self.name 2120 except AttributeError: 2121 pass 2122 else: 2123 result += " name={0!r}".format(name) 2124 try: 2125 mode = self.mode 2126 except AttributeError: 2127 pass 2128 else: 2129 result += " mode={0!r}".format(mode) 2130 return result + " encoding={0!r}>".format(self.encoding) 2131 2132 @property 2133 def encoding(self): 2134 return self._encoding 2135 2136 @property 2137 def errors(self): 2138 return self._errors 2139 2140 @property 2141 def line_buffering(self): 2142 return self._line_buffering 2143 2144 @property 2145 def write_through(self): 2146 return self._write_through 2147 2148 @property 2149 def buffer(self): 2150 return self._buffer 2151 2152 def reconfigure(self, *, 2153 encoding=None, errors=None, newline=Ellipsis, 2154 line_buffering=None, write_through=None): 2155 """Reconfigure the text stream with new parameters. 2156 2157 This also flushes the stream. 2158 """ 2159 if (self._decoder is not None 2160 and (encoding is not None or errors is not None 2161 or newline is not Ellipsis)): 2162 raise UnsupportedOperation( 2163 "It is not possible to set the encoding or newline of stream " 2164 "after the first read") 2165 2166 if errors is None: 2167 if encoding is None: 2168 errors = self._errors 2169 else: 2170 errors = 'strict' 2171 elif not isinstance(errors, str): 2172 raise TypeError("invalid errors: %r" % errors) 2173 2174 if encoding is None: 2175 encoding = self._encoding 2176 else: 2177 if not isinstance(encoding, str): 2178 raise TypeError("invalid encoding: %r" % encoding) 2179 2180 if newline is Ellipsis: 2181 newline = self._readnl 2182 self._check_newline(newline) 2183 2184 if line_buffering is None: 2185 line_buffering = self.line_buffering 2186 if write_through is None: 2187 write_through = self.write_through 2188 2189 self.flush() 2190 self._configure(encoding, errors, newline, 2191 line_buffering, write_through) 2192 2193 def seekable(self): 2194 if self.closed: 2195 raise ValueError("I/O operation on closed file.") 2196 return self._seekable 2197 2198 def readable(self): 2199 return self.buffer.readable() 2200 2201 def writable(self): 2202 return self.buffer.writable() 2203 2204 def flush(self): 2205 self.buffer.flush() 2206 self._telling = self._seekable 2207 2208 def close(self): 2209 if self.buffer is not None and not self.closed: 2210 try: 2211 self.flush() 2212 finally: 2213 self.buffer.close() 2214 2215 @property 2216 def closed(self): 2217 return self.buffer.closed 2218 2219 @property 2220 def name(self): 2221 return self.buffer.name 2222 2223 def fileno(self): 2224 return self.buffer.fileno() 2225 2226 def isatty(self): 2227 return self.buffer.isatty() 2228 2229 def write(self, s): 2230 'Write data, where s is a str' 2231 if self.closed: 2232 raise ValueError("write to closed file") 2233 if not isinstance(s, str): 2234 raise TypeError("can't write %s to text stream" % 2235 s.__class__.__name__) 2236 length = len(s) 2237 haslf = (self._writetranslate or self._line_buffering) and "\n" in s 2238 if haslf and self._writetranslate and self._writenl != "\n": 2239 s = s.replace("\n", self._writenl) 2240 encoder = self._encoder or self._get_encoder() 2241 # XXX What if we were just reading? 2242 b = encoder.encode(s) 2243 self.buffer.write(b) 2244 if self._line_buffering and (haslf or "\r" in s): 2245 self.flush() 2246 self._set_decoded_chars('') 2247 self._snapshot = None 2248 if self._decoder: 2249 self._decoder.reset() 2250 return length 2251 2252 def _get_encoder(self): 2253 make_encoder = codecs.getincrementalencoder(self._encoding) 2254 self._encoder = make_encoder(self._errors) 2255 return self._encoder 2256 2257 def _get_decoder(self): 2258 make_decoder = codecs.getincrementaldecoder(self._encoding) 2259 decoder = make_decoder(self._errors) 2260 if self._readuniversal: 2261 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) 2262 self._decoder = decoder 2263 return decoder 2264 2265 # The following three methods implement an ADT for _decoded_chars. 2266 # Text returned from the decoder is buffered here until the client 2267 # requests it by calling our read() or readline() method. 2268 def _set_decoded_chars(self, chars): 2269 """Set the _decoded_chars buffer.""" 2270 self._decoded_chars = chars 2271 self._decoded_chars_used = 0 2272 2273 def _get_decoded_chars(self, n=None): 2274 """Advance into the _decoded_chars buffer.""" 2275 offset = self._decoded_chars_used 2276 if n is None: 2277 chars = self._decoded_chars[offset:] 2278 else: 2279 chars = self._decoded_chars[offset:offset + n] 2280 self._decoded_chars_used += len(chars) 2281 return chars 2282 2283 def _rewind_decoded_chars(self, n): 2284 """Rewind the _decoded_chars buffer.""" 2285 if self._decoded_chars_used < n: 2286 raise AssertionError("rewind decoded_chars out of bounds") 2287 self._decoded_chars_used -= n 2288 2289 def _read_chunk(self): 2290 """ 2291 Read and decode the next chunk of data from the BufferedReader. 2292 """ 2293 2294 # The return value is True unless EOF was reached. The decoded 2295 # string is placed in self._decoded_chars (replacing its previous 2296 # value). The entire input chunk is sent to the decoder, though 2297 # some of it may remain buffered in the decoder, yet to be 2298 # converted. 2299 2300 if self._decoder is None: 2301 raise ValueError("no decoder") 2302 2303 if self._telling: 2304 # To prepare for tell(), we need to snapshot a point in the 2305 # file where the decoder's input buffer is empty. 2306 2307 dec_buffer, dec_flags = self._decoder.getstate() 2308 # Given this, we know there was a valid snapshot point 2309 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 2310 2311 # Read a chunk, decode it, and put the result in self._decoded_chars. 2312 if self._has_read1: 2313 input_chunk = self.buffer.read1(self._CHUNK_SIZE) 2314 else: 2315 input_chunk = self.buffer.read(self._CHUNK_SIZE) 2316 eof = not input_chunk 2317 decoded_chars = self._decoder.decode(input_chunk, eof) 2318 self._set_decoded_chars(decoded_chars) 2319 if decoded_chars: 2320 self._b2cratio = len(input_chunk) / len(self._decoded_chars) 2321 else: 2322 self._b2cratio = 0.0 2323 2324 if self._telling: 2325 # At the snapshot point, len(dec_buffer) bytes before the read, 2326 # the next input to be decoded is dec_buffer + input_chunk. 2327 self._snapshot = (dec_flags, dec_buffer + input_chunk) 2328 2329 return not eof 2330 2331 def _pack_cookie(self, position, dec_flags=0, 2332 bytes_to_feed=0, need_eof=False, chars_to_skip=0): 2333 # The meaning of a tell() cookie is: seek to position, set the 2334 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 2335 # into the decoder with need_eof as the EOF flag, then skip 2336 # chars_to_skip characters of the decoded result. For most simple 2337 # decoders, tell() will often just give a byte offset in the file. 2338 return (position | (dec_flags<<64) | (bytes_to_feed<<128) | 2339 (chars_to_skip<<192) | bool(need_eof)<<256) 2340 2341 def _unpack_cookie(self, bigint): 2342 rest, position = divmod(bigint, 1<<64) 2343 rest, dec_flags = divmod(rest, 1<<64) 2344 rest, bytes_to_feed = divmod(rest, 1<<64) 2345 need_eof, chars_to_skip = divmod(rest, 1<<64) 2346 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip 2347 2348 def tell(self): 2349 if not self._seekable: 2350 raise UnsupportedOperation("underlying stream is not seekable") 2351 if not self._telling: 2352 raise OSError("telling position disabled by next() call") 2353 self.flush() 2354 position = self.buffer.tell() 2355 decoder = self._decoder 2356 if decoder is None or self._snapshot is None: 2357 if self._decoded_chars: 2358 # This should never happen. 2359 raise AssertionError("pending decoded text") 2360 return position 2361 2362 # Skip backward to the snapshot point (see _read_chunk). 2363 dec_flags, next_input = self._snapshot 2364 position -= len(next_input) 2365 2366 # How many decoded characters have been used up since the snapshot? 2367 chars_to_skip = self._decoded_chars_used 2368 if chars_to_skip == 0: 2369 # We haven't moved from the snapshot point. 2370 return self._pack_cookie(position, dec_flags) 2371 2372 # Starting from the snapshot position, we will walk the decoder 2373 # forward until it gives us enough decoded characters. 2374 saved_state = decoder.getstate() 2375 try: 2376 # Fast search for an acceptable start point, close to our 2377 # current pos. 2378 # Rationale: calling decoder.decode() has a large overhead 2379 # regardless of chunk size; we want the number of such calls to 2380 # be O(1) in most situations (common decoders, sensible input). 2381 # Actually, it will be exactly 1 for fixed-size codecs (all 2382 # 8-bit codecs, also UTF-16 and UTF-32). 2383 skip_bytes = int(self._b2cratio * chars_to_skip) 2384 skip_back = 1 2385 assert skip_bytes <= len(next_input) 2386 while skip_bytes > 0: 2387 decoder.setstate((b'', dec_flags)) 2388 # Decode up to temptative start point 2389 n = len(decoder.decode(next_input[:skip_bytes])) 2390 if n <= chars_to_skip: 2391 b, d = decoder.getstate() 2392 if not b: 2393 # Before pos and no bytes buffered in decoder => OK 2394 dec_flags = d 2395 chars_to_skip -= n 2396 break 2397 # Skip back by buffered amount and reset heuristic 2398 skip_bytes -= len(b) 2399 skip_back = 1 2400 else: 2401 # We're too far ahead, skip back a bit 2402 skip_bytes -= skip_back 2403 skip_back = skip_back * 2 2404 else: 2405 skip_bytes = 0 2406 decoder.setstate((b'', dec_flags)) 2407 2408 # Note our initial start point. 2409 start_pos = position + skip_bytes 2410 start_flags = dec_flags 2411 if chars_to_skip == 0: 2412 # We haven't moved from the start point. 2413 return self._pack_cookie(start_pos, start_flags) 2414 2415 # Feed the decoder one byte at a time. As we go, note the 2416 # nearest "safe start point" before the current location 2417 # (a point where the decoder has nothing buffered, so seek() 2418 # can safely start from there and advance to this location). 2419 bytes_fed = 0 2420 need_eof = False 2421 # Chars decoded since `start_pos` 2422 chars_decoded = 0 2423 for i in range(skip_bytes, len(next_input)): 2424 bytes_fed += 1 2425 chars_decoded += len(decoder.decode(next_input[i:i+1])) 2426 dec_buffer, dec_flags = decoder.getstate() 2427 if not dec_buffer and chars_decoded <= chars_to_skip: 2428 # Decoder buffer is empty, so this is a safe start point. 2429 start_pos += bytes_fed 2430 chars_to_skip -= chars_decoded 2431 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 2432 if chars_decoded >= chars_to_skip: 2433 break 2434 else: 2435 # We didn't get enough decoded data; signal EOF to get more. 2436 chars_decoded += len(decoder.decode(b'', final=True)) 2437 need_eof = True 2438 if chars_decoded < chars_to_skip: 2439 raise OSError("can't reconstruct logical file position") 2440 2441 # The returned cookie corresponds to the last safe start point. 2442 return self._pack_cookie( 2443 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) 2444 finally: 2445 decoder.setstate(saved_state) 2446 2447 def truncate(self, pos=None): 2448 self.flush() 2449 if pos is None: 2450 pos = self.tell() 2451 return self.buffer.truncate(pos) 2452 2453 def detach(self): 2454 if self.buffer is None: 2455 raise ValueError("buffer is already detached") 2456 self.flush() 2457 buffer = self._buffer 2458 self._buffer = None 2459 return buffer 2460 2461 def seek(self, cookie, whence=0): 2462 def _reset_encoder(position): 2463 """Reset the encoder (merely useful for proper BOM handling)""" 2464 try: 2465 encoder = self._encoder or self._get_encoder() 2466 except LookupError: 2467 # Sometimes the encoder doesn't exist 2468 pass 2469 else: 2470 if position != 0: 2471 encoder.setstate(0) 2472 else: 2473 encoder.reset() 2474 2475 if self.closed: 2476 raise ValueError("tell on closed file") 2477 if not self._seekable: 2478 raise UnsupportedOperation("underlying stream is not seekable") 2479 if whence == SEEK_CUR: 2480 if cookie != 0: 2481 raise UnsupportedOperation("can't do nonzero cur-relative seeks") 2482 # Seeking to the current position should attempt to 2483 # sync the underlying buffer with the current position. 2484 whence = 0 2485 cookie = self.tell() 2486 elif whence == SEEK_END: 2487 if cookie != 0: 2488 raise UnsupportedOperation("can't do nonzero end-relative seeks") 2489 self.flush() 2490 position = self.buffer.seek(0, whence) 2491 self._set_decoded_chars('') 2492 self._snapshot = None 2493 if self._decoder: 2494 self._decoder.reset() 2495 _reset_encoder(position) 2496 return position 2497 if whence != 0: 2498 raise ValueError("unsupported whence (%r)" % (whence,)) 2499 if cookie < 0: 2500 raise ValueError("negative seek position %r" % (cookie,)) 2501 self.flush() 2502 2503 # The strategy of seek() is to go back to the safe start point 2504 # and replay the effect of read(chars_to_skip) from there. 2505 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ 2506 self._unpack_cookie(cookie) 2507 2508 # Seek back to the safe start point. 2509 self.buffer.seek(start_pos) 2510 self._set_decoded_chars('') 2511 self._snapshot = None 2512 2513 # Restore the decoder to its state from the safe start point. 2514 if cookie == 0 and self._decoder: 2515 self._decoder.reset() 2516 elif self._decoder or dec_flags or chars_to_skip: 2517 self._decoder = self._decoder or self._get_decoder() 2518 self._decoder.setstate((b'', dec_flags)) 2519 self._snapshot = (dec_flags, b'') 2520 2521 if chars_to_skip: 2522 # Just like _read_chunk, feed the decoder and save a snapshot. 2523 input_chunk = self.buffer.read(bytes_to_feed) 2524 self._set_decoded_chars( 2525 self._decoder.decode(input_chunk, need_eof)) 2526 self._snapshot = (dec_flags, input_chunk) 2527 2528 # Skip chars_to_skip of the decoded characters. 2529 if len(self._decoded_chars) < chars_to_skip: 2530 raise OSError("can't restore logical file position") 2531 self._decoded_chars_used = chars_to_skip 2532 2533 _reset_encoder(cookie) 2534 return cookie 2535 2536 def read(self, size=None): 2537 self._checkReadable() 2538 if size is None: 2539 size = -1 2540 else: 2541 try: 2542 size_index = size.__index__ 2543 except AttributeError: 2544 raise TypeError(f"{size!r} is not an integer") 2545 else: 2546 size = size_index() 2547 decoder = self._decoder or self._get_decoder() 2548 if size < 0: 2549 # Read everything. 2550 result = (self._get_decoded_chars() + 2551 decoder.decode(self.buffer.read(), final=True)) 2552 self._set_decoded_chars('') 2553 self._snapshot = None 2554 return result 2555 else: 2556 # Keep reading chunks until we have size characters to return. 2557 eof = False 2558 result = self._get_decoded_chars(size) 2559 while len(result) < size and not eof: 2560 eof = not self._read_chunk() 2561 result += self._get_decoded_chars(size - len(result)) 2562 return result 2563 2564 def __next__(self): 2565 self._telling = False 2566 line = self.readline() 2567 if not line: 2568 self._snapshot = None 2569 self._telling = self._seekable 2570 raise StopIteration 2571 return line 2572 2573 def readline(self, size=None): 2574 if self.closed: 2575 raise ValueError("read from closed file") 2576 if size is None: 2577 size = -1 2578 else: 2579 try: 2580 size_index = size.__index__ 2581 except AttributeError: 2582 raise TypeError(f"{size!r} is not an integer") 2583 else: 2584 size = size_index() 2585 2586 # Grab all the decoded text (we will rewind any extra bits later). 2587 line = self._get_decoded_chars() 2588 2589 start = 0 2590 # Make the decoder if it doesn't already exist. 2591 if not self._decoder: 2592 self._get_decoder() 2593 2594 pos = endpos = None 2595 while True: 2596 if self._readtranslate: 2597 # Newlines are already translated, only search for \n 2598 pos = line.find('\n', start) 2599 if pos >= 0: 2600 endpos = pos + 1 2601 break 2602 else: 2603 start = len(line) 2604 2605 elif self._readuniversal: 2606 # Universal newline search. Find any of \r, \r\n, \n 2607 # The decoder ensures that \r\n are not split in two pieces 2608 2609 # In C we'd look for these in parallel of course. 2610 nlpos = line.find("\n", start) 2611 crpos = line.find("\r", start) 2612 if crpos == -1: 2613 if nlpos == -1: 2614 # Nothing found 2615 start = len(line) 2616 else: 2617 # Found \n 2618 endpos = nlpos + 1 2619 break 2620 elif nlpos == -1: 2621 # Found lone \r 2622 endpos = crpos + 1 2623 break 2624 elif nlpos < crpos: 2625 # Found \n 2626 endpos = nlpos + 1 2627 break 2628 elif nlpos == crpos + 1: 2629 # Found \r\n 2630 endpos = crpos + 2 2631 break 2632 else: 2633 # Found \r 2634 endpos = crpos + 1 2635 break 2636 else: 2637 # non-universal 2638 pos = line.find(self._readnl) 2639 if pos >= 0: 2640 endpos = pos + len(self._readnl) 2641 break 2642 2643 if size >= 0 and len(line) >= size: 2644 endpos = size # reached length size 2645 break 2646 2647 # No line ending seen yet - get more data' 2648 while self._read_chunk(): 2649 if self._decoded_chars: 2650 break 2651 if self._decoded_chars: 2652 line += self._get_decoded_chars() 2653 else: 2654 # end of file 2655 self._set_decoded_chars('') 2656 self._snapshot = None 2657 return line 2658 2659 if size >= 0 and endpos > size: 2660 endpos = size # don't exceed size 2661 2662 # Rewind _decoded_chars to just after the line ending we found. 2663 self._rewind_decoded_chars(len(line) - endpos) 2664 return line[:endpos] 2665 2666 @property 2667 def newlines(self): 2668 return self._decoder.newlines if self._decoder else None 2669 2670 2671class StringIO(TextIOWrapper): 2672 """Text I/O implementation using an in-memory buffer. 2673 2674 The initial_value argument sets the value of object. The newline 2675 argument is like the one of TextIOWrapper's constructor. 2676 """ 2677 2678 def __init__(self, initial_value="", newline="\n"): 2679 super(StringIO, self).__init__(BytesIO(), 2680 encoding="utf-8", 2681 errors="surrogatepass", 2682 newline=newline) 2683 # Issue #5645: make universal newlines semantics the same as in the 2684 # C version, even under Windows. 2685 if newline is None: 2686 self._writetranslate = False 2687 if initial_value is not None: 2688 if not isinstance(initial_value, str): 2689 raise TypeError("initial_value must be str or None, not {0}" 2690 .format(type(initial_value).__name__)) 2691 self.write(initial_value) 2692 self.seek(0) 2693 2694 def getvalue(self): 2695 self.flush() 2696 decoder = self._decoder or self._get_decoder() 2697 old_state = decoder.getstate() 2698 decoder.reset() 2699 try: 2700 return decoder.decode(self.buffer.getvalue(), final=True) 2701 finally: 2702 decoder.setstate(old_state) 2703 2704 def __repr__(self): 2705 # TextIOWrapper tells the encoding in its repr. In StringIO, 2706 # that's an implementation detail. 2707 return object.__repr__(self) 2708 2709 @property 2710 def errors(self): 2711 return None 2712 2713 @property 2714 def encoding(self): 2715 return None 2716 2717 def detach(self): 2718 # This doesn't make sense on StringIO. 2719 self._unsupported("detach") 2720