1#!/usr/local/bin/python3.8 2#------------------------------------------------------------------- 3# tarfile.py 4#------------------------------------------------------------------- 5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de> 6# All rights reserved. 7# 8# Permission is hereby granted, free of charge, to any person 9# obtaining a copy of this software and associated documentation 10# files (the "Software"), to deal in the Software without 11# restriction, including without limitation the rights to use, 12# copy, modify, merge, publish, distribute, sublicense, and/or sell 13# copies of the Software, and to permit persons to whom the 14# Software is furnished to do so, subject to the following 15# conditions: 16# 17# The above copyright notice and this permission notice shall be 18# included in all copies or substantial portions of the Software. 19# 20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27# OTHER DEALINGS IN THE SOFTWARE. 28# 29"""Read from and write to tar format archives. 30""" 31 32version = "0.9.0" 33__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" 34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." 35 36#--------- 37# Imports 38#--------- 39from builtins import open as bltn_open 40import sys 41import os 42import io 43import shutil 44import stat 45import time 46import struct 47import copy 48import re 49 50try: 51 import pwd 52except ImportError: 53 pwd = None 54try: 55 import grp 56except ImportError: 57 grp = None 58 59# os.symlink on Windows prior to 6.0 raises NotImplementedError 60symlink_exception = (AttributeError, NotImplementedError) 61try: 62 # OSError (winerror=1314) will be raised if the caller does not hold the 63 # SeCreateSymbolicLinkPrivilege privilege 64 symlink_exception += (OSError,) 65except NameError: 66 pass 67 68# from tarfile import * 69__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", 70 "CompressionError", "StreamError", "ExtractError", "HeaderError", 71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", 72 "DEFAULT_FORMAT", "open"] 73 74#--------------------------------------------------------- 75# tar constants 76#--------------------------------------------------------- 77NUL = b"\0" # the null character 78BLOCKSIZE = 512 # length of processing blocks 79RECORDSIZE = BLOCKSIZE * 20 # length of records 80GNU_MAGIC = b"ustar \0" # magic gnu tar string 81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string 82 83LENGTH_NAME = 100 # maximum length of a filename 84LENGTH_LINK = 100 # maximum length of a linkname 85LENGTH_PREFIX = 155 # maximum length of the prefix field 86 87REGTYPE = b"0" # regular file 88AREGTYPE = b"\0" # regular file 89LNKTYPE = b"1" # link (inside tarfile) 90SYMTYPE = b"2" # symbolic link 91CHRTYPE = b"3" # character special device 92BLKTYPE = b"4" # block special device 93DIRTYPE = b"5" # directory 94FIFOTYPE = b"6" # fifo special device 95CONTTYPE = b"7" # contiguous file 96 97GNUTYPE_LONGNAME = b"L" # GNU tar longname 98GNUTYPE_LONGLINK = b"K" # GNU tar longlink 99GNUTYPE_SPARSE = b"S" # GNU tar sparse file 100 101XHDTYPE = b"x" # POSIX.1-2001 extended header 102XGLTYPE = b"g" # POSIX.1-2001 global header 103SOLARIS_XHDTYPE = b"X" # Solaris extended header 104 105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format 106GNU_FORMAT = 1 # GNU tar format 107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format 108DEFAULT_FORMAT = PAX_FORMAT 109 110#--------------------------------------------------------- 111# tarfile constants 112#--------------------------------------------------------- 113# File types that tarfile supports: 114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, 115 SYMTYPE, DIRTYPE, FIFOTYPE, 116 CONTTYPE, CHRTYPE, BLKTYPE, 117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 118 GNUTYPE_SPARSE) 119 120# File types that will be treated as a regular file. 121REGULAR_TYPES = (REGTYPE, AREGTYPE, 122 CONTTYPE, GNUTYPE_SPARSE) 123 124# File types that are part of the GNU tar format. 125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 126 GNUTYPE_SPARSE) 127 128# Fields from a pax header that override a TarInfo attribute. 129PAX_FIELDS = ("path", "linkpath", "size", "mtime", 130 "uid", "gid", "uname", "gname") 131 132# Fields from a pax header that are affected by hdrcharset. 133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"} 134 135# Fields in a pax header that are numbers, all other fields 136# are treated as strings. 137PAX_NUMBER_FIELDS = { 138 "atime": float, 139 "ctime": float, 140 "mtime": float, 141 "uid": int, 142 "gid": int, 143 "size": int 144} 145 146#--------------------------------------------------------- 147# initialization 148#--------------------------------------------------------- 149if os.name == "nt": 150 ENCODING = "utf-8" 151else: 152 ENCODING = sys.getfilesystemencoding() 153 154#--------------------------------------------------------- 155# Some useful functions 156#--------------------------------------------------------- 157 158def stn(s, length, encoding, errors): 159 """Convert a string to a null-terminated bytes object. 160 """ 161 s = s.encode(encoding, errors) 162 return s[:length] + (length - len(s)) * NUL 163 164def nts(s, encoding, errors): 165 """Convert a null-terminated bytes object to a string. 166 """ 167 p = s.find(b"\0") 168 if p != -1: 169 s = s[:p] 170 return s.decode(encoding, errors) 171 172def nti(s): 173 """Convert a number field to a python number. 174 """ 175 # There are two possible encodings for a number field, see 176 # itn() below. 177 if s[0] in (0o200, 0o377): 178 n = 0 179 for i in range(len(s) - 1): 180 n <<= 8 181 n += s[i + 1] 182 if s[0] == 0o377: 183 n = -(256 ** (len(s) - 1) - n) 184 else: 185 try: 186 s = nts(s, "ascii", "strict") 187 n = int(s.strip() or "0", 8) 188 except ValueError: 189 raise InvalidHeaderError("invalid header") 190 return n 191 192def itn(n, digits=8, format=DEFAULT_FORMAT): 193 """Convert a python number to a number field. 194 """ 195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of 196 # octal digits followed by a null-byte, this allows values up to 197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than 198 # that if necessary. A leading 0o200 or 0o377 byte indicate this 199 # particular encoding, the following digits-1 bytes are a big-endian 200 # base-256 representation. This allows values up to (256**(digits-1))-1. 201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative 202 # number. 203 n = int(n) 204 if 0 <= n < 8 ** (digits - 1): 205 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL 206 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1): 207 if n >= 0: 208 s = bytearray([0o200]) 209 else: 210 s = bytearray([0o377]) 211 n = 256 ** digits + n 212 213 for i in range(digits - 1): 214 s.insert(1, n & 0o377) 215 n >>= 8 216 else: 217 raise ValueError("overflow in number field") 218 219 return s 220 221def calc_chksums(buf): 222 """Calculate the checksum for a member's header by summing up all 223 characters except for the chksum field which is treated as if 224 it was filled with spaces. According to the GNU tar sources, 225 some tars (Sun and NeXT) calculate chksum with signed char, 226 which will be different if there are chars in the buffer with 227 the high bit set. So we calculate two checksums, unsigned and 228 signed. 229 """ 230 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf)) 231 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) 232 return unsigned_chksum, signed_chksum 233 234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None): 235 """Copy length bytes from fileobj src to fileobj dst. 236 If length is None, copy the entire content. 237 """ 238 bufsize = bufsize or 16 * 1024 239 if length == 0: 240 return 241 if length is None: 242 shutil.copyfileobj(src, dst, bufsize) 243 return 244 245 blocks, remainder = divmod(length, bufsize) 246 for b in range(blocks): 247 buf = src.read(bufsize) 248 if len(buf) < bufsize: 249 raise exception("unexpected end of data") 250 dst.write(buf) 251 252 if remainder != 0: 253 buf = src.read(remainder) 254 if len(buf) < remainder: 255 raise exception("unexpected end of data") 256 dst.write(buf) 257 return 258 259def _safe_print(s): 260 encoding = getattr(sys.stdout, 'encoding', None) 261 if encoding is not None: 262 s = s.encode(encoding, 'backslashreplace').decode(encoding) 263 print(s, end=' ') 264 265 266class TarError(Exception): 267 """Base exception.""" 268 pass 269class ExtractError(TarError): 270 """General exception for extract errors.""" 271 pass 272class ReadError(TarError): 273 """Exception for unreadable tar archives.""" 274 pass 275class CompressionError(TarError): 276 """Exception for unavailable compression methods.""" 277 pass 278class StreamError(TarError): 279 """Exception for unsupported operations on stream-like TarFiles.""" 280 pass 281class HeaderError(TarError): 282 """Base exception for header errors.""" 283 pass 284class EmptyHeaderError(HeaderError): 285 """Exception for empty headers.""" 286 pass 287class TruncatedHeaderError(HeaderError): 288 """Exception for truncated headers.""" 289 pass 290class EOFHeaderError(HeaderError): 291 """Exception for end of file headers.""" 292 pass 293class InvalidHeaderError(HeaderError): 294 """Exception for invalid headers.""" 295 pass 296class SubsequentHeaderError(HeaderError): 297 """Exception for missing and invalid extended headers.""" 298 pass 299 300#--------------------------- 301# internal stream interface 302#--------------------------- 303class _LowLevelFile: 304 """Low-level file object. Supports reading and writing. 305 It is used instead of a regular file object for streaming 306 access. 307 """ 308 309 def __init__(self, name, mode): 310 mode = { 311 "r": os.O_RDONLY, 312 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 313 }[mode] 314 if hasattr(os, "O_BINARY"): 315 mode |= os.O_BINARY 316 self.fd = os.open(name, mode, 0o666) 317 318 def close(self): 319 os.close(self.fd) 320 321 def read(self, size): 322 return os.read(self.fd, size) 323 324 def write(self, s): 325 os.write(self.fd, s) 326 327class _Stream: 328 """Class that serves as an adapter between TarFile and 329 a stream-like object. The stream-like object only 330 needs to have a read() or write() method and is accessed 331 blockwise. Use of gzip or bzip2 compression is possible. 332 A stream-like object could be for example: sys.stdin, 333 sys.stdout, a socket, a tape device etc. 334 335 _Stream is intended to be used only internally. 336 """ 337 338 def __init__(self, name, mode, comptype, fileobj, bufsize): 339 """Construct a _Stream object. 340 """ 341 self._extfileobj = True 342 if fileobj is None: 343 fileobj = _LowLevelFile(name, mode) 344 self._extfileobj = False 345 346 if comptype == '*': 347 # Enable transparent compression detection for the 348 # stream interface 349 fileobj = _StreamProxy(fileobj) 350 comptype = fileobj.getcomptype() 351 352 self.name = name or "" 353 self.mode = mode 354 self.comptype = comptype 355 self.fileobj = fileobj 356 self.bufsize = bufsize 357 self.buf = b"" 358 self.pos = 0 359 self.closed = False 360 361 try: 362 if comptype == "gz": 363 try: 364 import zlib 365 except ImportError: 366 raise CompressionError("zlib module is not available") 367 self.zlib = zlib 368 self.crc = zlib.crc32(b"") 369 if mode == "r": 370 self._init_read_gz() 371 self.exception = zlib.error 372 else: 373 self._init_write_gz() 374 375 elif comptype == "bz2": 376 try: 377 import bz2 378 except ImportError: 379 raise CompressionError("bz2 module is not available") 380 if mode == "r": 381 self.dbuf = b"" 382 self.cmp = bz2.BZ2Decompressor() 383 self.exception = OSError 384 else: 385 self.cmp = bz2.BZ2Compressor() 386 387 elif comptype == "xz": 388 try: 389 import lzma 390 except ImportError: 391 raise CompressionError("lzma module is not available") 392 if mode == "r": 393 self.dbuf = b"" 394 self.cmp = lzma.LZMADecompressor() 395 self.exception = lzma.LZMAError 396 else: 397 self.cmp = lzma.LZMACompressor() 398 399 elif comptype != "tar": 400 raise CompressionError("unknown compression type %r" % comptype) 401 402 except: 403 if not self._extfileobj: 404 self.fileobj.close() 405 self.closed = True 406 raise 407 408 def __del__(self): 409 if hasattr(self, "closed") and not self.closed: 410 self.close() 411 412 def _init_write_gz(self): 413 """Initialize for writing with gzip compression. 414 """ 415 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, 416 -self.zlib.MAX_WBITS, 417 self.zlib.DEF_MEM_LEVEL, 418 0) 419 timestamp = struct.pack("<L", int(time.time())) 420 self.__write(b"\037\213\010\010" + timestamp + b"\002\377") 421 if self.name.endswith(".gz"): 422 self.name = self.name[:-3] 423 # Honor "directory components removed" from RFC1952 424 self.name = os.path.basename(self.name) 425 # RFC1952 says we must use ISO-8859-1 for the FNAME field. 426 self.__write(self.name.encode("iso-8859-1", "replace") + NUL) 427 428 def write(self, s): 429 """Write string s to the stream. 430 """ 431 if self.comptype == "gz": 432 self.crc = self.zlib.crc32(s, self.crc) 433 self.pos += len(s) 434 if self.comptype != "tar": 435 s = self.cmp.compress(s) 436 self.__write(s) 437 438 def __write(self, s): 439 """Write string s to the stream if a whole new block 440 is ready to be written. 441 """ 442 self.buf += s 443 while len(self.buf) > self.bufsize: 444 self.fileobj.write(self.buf[:self.bufsize]) 445 self.buf = self.buf[self.bufsize:] 446 447 def close(self): 448 """Close the _Stream object. No operation should be 449 done on it afterwards. 450 """ 451 if self.closed: 452 return 453 454 self.closed = True 455 try: 456 if self.mode == "w" and self.comptype != "tar": 457 self.buf += self.cmp.flush() 458 459 if self.mode == "w" and self.buf: 460 self.fileobj.write(self.buf) 461 self.buf = b"" 462 if self.comptype == "gz": 463 self.fileobj.write(struct.pack("<L", self.crc)) 464 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF)) 465 finally: 466 if not self._extfileobj: 467 self.fileobj.close() 468 469 def _init_read_gz(self): 470 """Initialize for reading a gzip compressed fileobj. 471 """ 472 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 473 self.dbuf = b"" 474 475 # taken from gzip.GzipFile with some alterations 476 if self.__read(2) != b"\037\213": 477 raise ReadError("not a gzip file") 478 if self.__read(1) != b"\010": 479 raise CompressionError("unsupported compression method") 480 481 flag = ord(self.__read(1)) 482 self.__read(6) 483 484 if flag & 4: 485 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) 486 self.read(xlen) 487 if flag & 8: 488 while True: 489 s = self.__read(1) 490 if not s or s == NUL: 491 break 492 if flag & 16: 493 while True: 494 s = self.__read(1) 495 if not s or s == NUL: 496 break 497 if flag & 2: 498 self.__read(2) 499 500 def tell(self): 501 """Return the stream's file pointer position. 502 """ 503 return self.pos 504 505 def seek(self, pos=0): 506 """Set the stream's file pointer to pos. Negative seeking 507 is forbidden. 508 """ 509 if pos - self.pos >= 0: 510 blocks, remainder = divmod(pos - self.pos, self.bufsize) 511 for i in range(blocks): 512 self.read(self.bufsize) 513 self.read(remainder) 514 else: 515 raise StreamError("seeking backwards is not allowed") 516 return self.pos 517 518 def read(self, size): 519 """Return the next size number of bytes from the stream.""" 520 assert size is not None 521 buf = self._read(size) 522 self.pos += len(buf) 523 return buf 524 525 def _read(self, size): 526 """Return size bytes from the stream. 527 """ 528 if self.comptype == "tar": 529 return self.__read(size) 530 531 c = len(self.dbuf) 532 t = [self.dbuf] 533 while c < size: 534 # Skip underlying buffer to avoid unaligned double buffering. 535 if self.buf: 536 buf = self.buf 537 self.buf = b"" 538 else: 539 buf = self.fileobj.read(self.bufsize) 540 if not buf: 541 break 542 try: 543 buf = self.cmp.decompress(buf) 544 except self.exception: 545 raise ReadError("invalid compressed data") 546 t.append(buf) 547 c += len(buf) 548 t = b"".join(t) 549 self.dbuf = t[size:] 550 return t[:size] 551 552 def __read(self, size): 553 """Return size bytes from stream. If internal buffer is empty, 554 read another block from the stream. 555 """ 556 c = len(self.buf) 557 t = [self.buf] 558 while c < size: 559 buf = self.fileobj.read(self.bufsize) 560 if not buf: 561 break 562 t.append(buf) 563 c += len(buf) 564 t = b"".join(t) 565 self.buf = t[size:] 566 return t[:size] 567# class _Stream 568 569class _StreamProxy(object): 570 """Small proxy class that enables transparent compression 571 detection for the Stream interface (mode 'r|*'). 572 """ 573 574 def __init__(self, fileobj): 575 self.fileobj = fileobj 576 self.buf = self.fileobj.read(BLOCKSIZE) 577 578 def read(self, size): 579 self.read = self.fileobj.read 580 return self.buf 581 582 def getcomptype(self): 583 if self.buf.startswith(b"\x1f\x8b\x08"): 584 return "gz" 585 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": 586 return "bz2" 587 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")): 588 return "xz" 589 else: 590 return "tar" 591 592 def close(self): 593 self.fileobj.close() 594# class StreamProxy 595 596#------------------------ 597# Extraction file object 598#------------------------ 599class _FileInFile(object): 600 """A thin wrapper around an existing file object that 601 provides a part of its data as an individual file 602 object. 603 """ 604 605 def __init__(self, fileobj, offset, size, blockinfo=None): 606 self.fileobj = fileobj 607 self.offset = offset 608 self.size = size 609 self.position = 0 610 self.name = getattr(fileobj, "name", None) 611 self.closed = False 612 613 if blockinfo is None: 614 blockinfo = [(0, size)] 615 616 # Construct a map with data and zero blocks. 617 self.map_index = 0 618 self.map = [] 619 lastpos = 0 620 realpos = self.offset 621 for offset, size in blockinfo: 622 if offset > lastpos: 623 self.map.append((False, lastpos, offset, None)) 624 self.map.append((True, offset, offset + size, realpos)) 625 realpos += size 626 lastpos = offset + size 627 if lastpos < self.size: 628 self.map.append((False, lastpos, self.size, None)) 629 630 def flush(self): 631 pass 632 633 def readable(self): 634 return True 635 636 def writable(self): 637 return False 638 639 def seekable(self): 640 return self.fileobj.seekable() 641 642 def tell(self): 643 """Return the current file position. 644 """ 645 return self.position 646 647 def seek(self, position, whence=io.SEEK_SET): 648 """Seek to a position in the file. 649 """ 650 if whence == io.SEEK_SET: 651 self.position = min(max(position, 0), self.size) 652 elif whence == io.SEEK_CUR: 653 if position < 0: 654 self.position = max(self.position + position, 0) 655 else: 656 self.position = min(self.position + position, self.size) 657 elif whence == io.SEEK_END: 658 self.position = max(min(self.size + position, self.size), 0) 659 else: 660 raise ValueError("Invalid argument") 661 return self.position 662 663 def read(self, size=None): 664 """Read data from the file. 665 """ 666 if size is None: 667 size = self.size - self.position 668 else: 669 size = min(size, self.size - self.position) 670 671 buf = b"" 672 while size > 0: 673 while True: 674 data, start, stop, offset = self.map[self.map_index] 675 if start <= self.position < stop: 676 break 677 else: 678 self.map_index += 1 679 if self.map_index == len(self.map): 680 self.map_index = 0 681 length = min(size, stop - self.position) 682 if data: 683 self.fileobj.seek(offset + (self.position - start)) 684 b = self.fileobj.read(length) 685 if len(b) != length: 686 raise ReadError("unexpected end of data") 687 buf += b 688 else: 689 buf += NUL * length 690 size -= length 691 self.position += length 692 return buf 693 694 def readinto(self, b): 695 buf = self.read(len(b)) 696 b[:len(buf)] = buf 697 return len(buf) 698 699 def close(self): 700 self.closed = True 701#class _FileInFile 702 703class ExFileObject(io.BufferedReader): 704 705 def __init__(self, tarfile, tarinfo): 706 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data, 707 tarinfo.size, tarinfo.sparse) 708 super().__init__(fileobj) 709#class ExFileObject 710 711#------------------ 712# Exported Classes 713#------------------ 714class TarInfo(object): 715 """Informational class which holds the details about an 716 archive member given by a tar header block. 717 TarInfo objects are returned by TarFile.getmember(), 718 TarFile.getmembers() and TarFile.gettarinfo() and are 719 usually created internally. 720 """ 721 722 __slots__ = dict( 723 name = 'Name of the archive member.', 724 mode = 'Permission bits.', 725 uid = 'User ID of the user who originally stored this member.', 726 gid = 'Group ID of the user who originally stored this member.', 727 size = 'Size in bytes.', 728 mtime = 'Time of last modification.', 729 chksum = 'Header checksum.', 730 type = ('File type. type is usually one of these constants: ' 731 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, ' 732 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'), 733 linkname = ('Name of the target file name, which is only present ' 734 'in TarInfo objects of type LNKTYPE and SYMTYPE.'), 735 uname = 'User name.', 736 gname = 'Group name.', 737 devmajor = 'Device major number.', 738 devminor = 'Device minor number.', 739 offset = 'The tar header starts here.', 740 offset_data = "The file's data starts here.", 741 pax_headers = ('A dictionary containing key-value pairs of an ' 742 'associated pax extended header.'), 743 sparse = 'Sparse member information.', 744 tarfile = None, 745 _sparse_structs = None, 746 _link_target = None, 747 ) 748 749 def __init__(self, name=""): 750 """Construct a TarInfo object. name is the optional name 751 of the member. 752 """ 753 self.name = name # member name 754 self.mode = 0o644 # file permissions 755 self.uid = 0 # user id 756 self.gid = 0 # group id 757 self.size = 0 # file size 758 self.mtime = 0 # modification time 759 self.chksum = 0 # header checksum 760 self.type = REGTYPE # member type 761 self.linkname = "" # link name 762 self.uname = "" # user name 763 self.gname = "" # group name 764 self.devmajor = 0 # device major number 765 self.devminor = 0 # device minor number 766 767 self.offset = 0 # the tar header starts here 768 self.offset_data = 0 # the file's data starts here 769 770 self.sparse = None # sparse member information 771 self.pax_headers = {} # pax header information 772 773 @property 774 def path(self): 775 'In pax headers, "name" is called "path".' 776 return self.name 777 778 @path.setter 779 def path(self, name): 780 self.name = name 781 782 @property 783 def linkpath(self): 784 'In pax headers, "linkname" is called "linkpath".' 785 return self.linkname 786 787 @linkpath.setter 788 def linkpath(self, linkname): 789 self.linkname = linkname 790 791 def __repr__(self): 792 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) 793 794 def get_info(self): 795 """Return the TarInfo's attributes as a dictionary. 796 """ 797 info = { 798 "name": self.name, 799 "mode": self.mode & 0o7777, 800 "uid": self.uid, 801 "gid": self.gid, 802 "size": self.size, 803 "mtime": self.mtime, 804 "chksum": self.chksum, 805 "type": self.type, 806 "linkname": self.linkname, 807 "uname": self.uname, 808 "gname": self.gname, 809 "devmajor": self.devmajor, 810 "devminor": self.devminor 811 } 812 813 if info["type"] == DIRTYPE and not info["name"].endswith("/"): 814 info["name"] += "/" 815 816 return info 817 818 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): 819 """Return a tar header as a string of 512 byte blocks. 820 """ 821 info = self.get_info() 822 823 if format == USTAR_FORMAT: 824 return self.create_ustar_header(info, encoding, errors) 825 elif format == GNU_FORMAT: 826 return self.create_gnu_header(info, encoding, errors) 827 elif format == PAX_FORMAT: 828 return self.create_pax_header(info, encoding) 829 else: 830 raise ValueError("invalid format") 831 832 def create_ustar_header(self, info, encoding, errors): 833 """Return the object as a ustar header block. 834 """ 835 info["magic"] = POSIX_MAGIC 836 837 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 838 raise ValueError("linkname is too long") 839 840 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 841 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors) 842 843 return self._create_header(info, USTAR_FORMAT, encoding, errors) 844 845 def create_gnu_header(self, info, encoding, errors): 846 """Return the object as a GNU header block sequence. 847 """ 848 info["magic"] = GNU_MAGIC 849 850 buf = b"" 851 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 852 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) 853 854 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 855 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) 856 857 return buf + self._create_header(info, GNU_FORMAT, encoding, errors) 858 859 def create_pax_header(self, info, encoding): 860 """Return the object as a ustar header block. If it cannot be 861 represented this way, prepend a pax extended header sequence 862 with supplement information. 863 """ 864 info["magic"] = POSIX_MAGIC 865 pax_headers = self.pax_headers.copy() 866 867 # Test string fields for values that exceed the field length or cannot 868 # be represented in ASCII encoding. 869 for name, hname, length in ( 870 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), 871 ("uname", "uname", 32), ("gname", "gname", 32)): 872 873 if hname in pax_headers: 874 # The pax header has priority. 875 continue 876 877 # Try to encode the string as ASCII. 878 try: 879 info[name].encode("ascii", "strict") 880 except UnicodeEncodeError: 881 pax_headers[hname] = info[name] 882 continue 883 884 if len(info[name]) > length: 885 pax_headers[hname] = info[name] 886 887 # Test number fields for values that exceed the field limit or values 888 # that like to be stored as float. 889 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): 890 if name in pax_headers: 891 # The pax header has priority. Avoid overflow. 892 info[name] = 0 893 continue 894 895 val = info[name] 896 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): 897 pax_headers[name] = str(val) 898 info[name] = 0 899 900 # Create a pax extended header if necessary. 901 if pax_headers: 902 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) 903 else: 904 buf = b"" 905 906 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") 907 908 @classmethod 909 def create_pax_global_header(cls, pax_headers): 910 """Return the object as a pax global header block sequence. 911 """ 912 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") 913 914 def _posix_split_name(self, name, encoding, errors): 915 """Split a name longer than 100 chars into a prefix 916 and a name part. 917 """ 918 components = name.split("/") 919 for i in range(1, len(components)): 920 prefix = "/".join(components[:i]) 921 name = "/".join(components[i:]) 922 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \ 923 len(name.encode(encoding, errors)) <= LENGTH_NAME: 924 break 925 else: 926 raise ValueError("name is too long") 927 928 return prefix, name 929 930 @staticmethod 931 def _create_header(info, format, encoding, errors): 932 """Return a header block. info is a dictionary with file 933 information, format must be one of the *_FORMAT constants. 934 """ 935 parts = [ 936 stn(info.get("name", ""), 100, encoding, errors), 937 itn(info.get("mode", 0) & 0o7777, 8, format), 938 itn(info.get("uid", 0), 8, format), 939 itn(info.get("gid", 0), 8, format), 940 itn(info.get("size", 0), 12, format), 941 itn(info.get("mtime", 0), 12, format), 942 b" ", # checksum field 943 info.get("type", REGTYPE), 944 stn(info.get("linkname", ""), 100, encoding, errors), 945 info.get("magic", POSIX_MAGIC), 946 stn(info.get("uname", ""), 32, encoding, errors), 947 stn(info.get("gname", ""), 32, encoding, errors), 948 itn(info.get("devmajor", 0), 8, format), 949 itn(info.get("devminor", 0), 8, format), 950 stn(info.get("prefix", ""), 155, encoding, errors) 951 ] 952 953 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) 954 chksum = calc_chksums(buf[-BLOCKSIZE:])[0] 955 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:] 956 return buf 957 958 @staticmethod 959 def _create_payload(payload): 960 """Return the string payload filled with zero bytes 961 up to the next 512 byte border. 962 """ 963 blocks, remainder = divmod(len(payload), BLOCKSIZE) 964 if remainder > 0: 965 payload += (BLOCKSIZE - remainder) * NUL 966 return payload 967 968 @classmethod 969 def _create_gnu_long_header(cls, name, type, encoding, errors): 970 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence 971 for name. 972 """ 973 name = name.encode(encoding, errors) + NUL 974 975 info = {} 976 info["name"] = "././@LongLink" 977 info["type"] = type 978 info["size"] = len(name) 979 info["magic"] = GNU_MAGIC 980 981 # create extended header + name blocks. 982 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ 983 cls._create_payload(name) 984 985 @classmethod 986 def _create_pax_generic_header(cls, pax_headers, type, encoding): 987 """Return a POSIX.1-2008 extended or global header sequence 988 that contains a list of keyword, value pairs. The values 989 must be strings. 990 """ 991 # Check if one of the fields contains surrogate characters and thereby 992 # forces hdrcharset=BINARY, see _proc_pax() for more information. 993 binary = False 994 for keyword, value in pax_headers.items(): 995 try: 996 value.encode("utf-8", "strict") 997 except UnicodeEncodeError: 998 binary = True 999 break 1000 1001 records = b"" 1002 if binary: 1003 # Put the hdrcharset field at the beginning of the header. 1004 records += b"21 hdrcharset=BINARY\n" 1005 1006 for keyword, value in pax_headers.items(): 1007 keyword = keyword.encode("utf-8") 1008 if binary: 1009 # Try to restore the original byte representation of `value'. 1010 # Needless to say, that the encoding must match the string. 1011 value = value.encode(encoding, "surrogateescape") 1012 else: 1013 value = value.encode("utf-8") 1014 1015 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' 1016 n = p = 0 1017 while True: 1018 n = l + len(str(p)) 1019 if n == p: 1020 break 1021 p = n 1022 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" 1023 1024 # We use a hardcoded "././@PaxHeader" name like star does 1025 # instead of the one that POSIX recommends. 1026 info = {} 1027 info["name"] = "././@PaxHeader" 1028 info["type"] = type 1029 info["size"] = len(records) 1030 info["magic"] = POSIX_MAGIC 1031 1032 # Create pax header + record blocks. 1033 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ 1034 cls._create_payload(records) 1035 1036 @classmethod 1037 def frombuf(cls, buf, encoding, errors): 1038 """Construct a TarInfo object from a 512 byte bytes object. 1039 """ 1040 if len(buf) == 0: 1041 raise EmptyHeaderError("empty header") 1042 if len(buf) != BLOCKSIZE: 1043 raise TruncatedHeaderError("truncated header") 1044 if buf.count(NUL) == BLOCKSIZE: 1045 raise EOFHeaderError("end of file header") 1046 1047 chksum = nti(buf[148:156]) 1048 if chksum not in calc_chksums(buf): 1049 raise InvalidHeaderError("bad checksum") 1050 1051 obj = cls() 1052 obj.name = nts(buf[0:100], encoding, errors) 1053 obj.mode = nti(buf[100:108]) 1054 obj.uid = nti(buf[108:116]) 1055 obj.gid = nti(buf[116:124]) 1056 obj.size = nti(buf[124:136]) 1057 obj.mtime = nti(buf[136:148]) 1058 obj.chksum = chksum 1059 obj.type = buf[156:157] 1060 obj.linkname = nts(buf[157:257], encoding, errors) 1061 obj.uname = nts(buf[265:297], encoding, errors) 1062 obj.gname = nts(buf[297:329], encoding, errors) 1063 obj.devmajor = nti(buf[329:337]) 1064 obj.devminor = nti(buf[337:345]) 1065 prefix = nts(buf[345:500], encoding, errors) 1066 1067 # Old V7 tar format represents a directory as a regular 1068 # file with a trailing slash. 1069 if obj.type == AREGTYPE and obj.name.endswith("/"): 1070 obj.type = DIRTYPE 1071 1072 # The old GNU sparse format occupies some of the unused 1073 # space in the buffer for up to 4 sparse structures. 1074 # Save them for later processing in _proc_sparse(). 1075 if obj.type == GNUTYPE_SPARSE: 1076 pos = 386 1077 structs = [] 1078 for i in range(4): 1079 try: 1080 offset = nti(buf[pos:pos + 12]) 1081 numbytes = nti(buf[pos + 12:pos + 24]) 1082 except ValueError: 1083 break 1084 structs.append((offset, numbytes)) 1085 pos += 24 1086 isextended = bool(buf[482]) 1087 origsize = nti(buf[483:495]) 1088 obj._sparse_structs = (structs, isextended, origsize) 1089 1090 # Remove redundant slashes from directories. 1091 if obj.isdir(): 1092 obj.name = obj.name.rstrip("/") 1093 1094 # Reconstruct a ustar longname. 1095 if prefix and obj.type not in GNU_TYPES: 1096 obj.name = prefix + "/" + obj.name 1097 return obj 1098 1099 @classmethod 1100 def fromtarfile(cls, tarfile): 1101 """Return the next TarInfo object from TarFile object 1102 tarfile. 1103 """ 1104 buf = tarfile.fileobj.read(BLOCKSIZE) 1105 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) 1106 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE 1107 return obj._proc_member(tarfile) 1108 1109 #-------------------------------------------------------------------------- 1110 # The following are methods that are called depending on the type of a 1111 # member. The entry point is _proc_member() which can be overridden in a 1112 # subclass to add custom _proc_*() methods. A _proc_*() method MUST 1113 # implement the following 1114 # operations: 1115 # 1. Set self.offset_data to the position where the data blocks begin, 1116 # if there is data that follows. 1117 # 2. Set tarfile.offset to the position where the next member's header will 1118 # begin. 1119 # 3. Return self or another valid TarInfo object. 1120 def _proc_member(self, tarfile): 1121 """Choose the right processing method depending on 1122 the type and call it. 1123 """ 1124 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): 1125 return self._proc_gnulong(tarfile) 1126 elif self.type == GNUTYPE_SPARSE: 1127 return self._proc_sparse(tarfile) 1128 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): 1129 return self._proc_pax(tarfile) 1130 else: 1131 return self._proc_builtin(tarfile) 1132 1133 def _proc_builtin(self, tarfile): 1134 """Process a builtin type or an unknown type which 1135 will be treated as a regular file. 1136 """ 1137 self.offset_data = tarfile.fileobj.tell() 1138 offset = self.offset_data 1139 if self.isreg() or self.type not in SUPPORTED_TYPES: 1140 # Skip the following data blocks. 1141 offset += self._block(self.size) 1142 tarfile.offset = offset 1143 1144 # Patch the TarInfo object with saved global 1145 # header information. 1146 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) 1147 1148 return self 1149 1150 def _proc_gnulong(self, tarfile): 1151 """Process the blocks that hold a GNU longname 1152 or longlink member. 1153 """ 1154 buf = tarfile.fileobj.read(self._block(self.size)) 1155 1156 # Fetch the next header and process it. 1157 try: 1158 next = self.fromtarfile(tarfile) 1159 except HeaderError: 1160 raise SubsequentHeaderError("missing or bad subsequent header") 1161 1162 # Patch the TarInfo object from the next header with 1163 # the longname information. 1164 next.offset = self.offset 1165 if self.type == GNUTYPE_LONGNAME: 1166 next.name = nts(buf, tarfile.encoding, tarfile.errors) 1167 elif self.type == GNUTYPE_LONGLINK: 1168 next.linkname = nts(buf, tarfile.encoding, tarfile.errors) 1169 1170 return next 1171 1172 def _proc_sparse(self, tarfile): 1173 """Process a GNU sparse header plus extra headers. 1174 """ 1175 # We already collected some sparse structures in frombuf(). 1176 structs, isextended, origsize = self._sparse_structs 1177 del self._sparse_structs 1178 1179 # Collect sparse structures from extended header blocks. 1180 while isextended: 1181 buf = tarfile.fileobj.read(BLOCKSIZE) 1182 pos = 0 1183 for i in range(21): 1184 try: 1185 offset = nti(buf[pos:pos + 12]) 1186 numbytes = nti(buf[pos + 12:pos + 24]) 1187 except ValueError: 1188 break 1189 if offset and numbytes: 1190 structs.append((offset, numbytes)) 1191 pos += 24 1192 isextended = bool(buf[504]) 1193 self.sparse = structs 1194 1195 self.offset_data = tarfile.fileobj.tell() 1196 tarfile.offset = self.offset_data + self._block(self.size) 1197 self.size = origsize 1198 return self 1199 1200 def _proc_pax(self, tarfile): 1201 """Process an extended or global header as described in 1202 POSIX.1-2008. 1203 """ 1204 # Read the header information. 1205 buf = tarfile.fileobj.read(self._block(self.size)) 1206 1207 # A pax header stores supplemental information for either 1208 # the following file (extended) or all following files 1209 # (global). 1210 if self.type == XGLTYPE: 1211 pax_headers = tarfile.pax_headers 1212 else: 1213 pax_headers = tarfile.pax_headers.copy() 1214 1215 # Check if the pax header contains a hdrcharset field. This tells us 1216 # the encoding of the path, linkpath, uname and gname fields. Normally, 1217 # these fields are UTF-8 encoded but since POSIX.1-2008 tar 1218 # implementations are allowed to store them as raw binary strings if 1219 # the translation to UTF-8 fails. 1220 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) 1221 if match is not None: 1222 pax_headers["hdrcharset"] = match.group(1).decode("utf-8") 1223 1224 # For the time being, we don't care about anything other than "BINARY". 1225 # The only other value that is currently allowed by the standard is 1226 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. 1227 hdrcharset = pax_headers.get("hdrcharset") 1228 if hdrcharset == "BINARY": 1229 encoding = tarfile.encoding 1230 else: 1231 encoding = "utf-8" 1232 1233 # Parse pax header information. A record looks like that: 1234 # "%d %s=%s\n" % (length, keyword, value). length is the size 1235 # of the complete record including the length field itself and 1236 # the newline. keyword and value are both UTF-8 encoded strings. 1237 regex = re.compile(br"(\d+) ([^=]+)=") 1238 pos = 0 1239 while True: 1240 match = regex.match(buf, pos) 1241 if not match: 1242 break 1243 1244 length, keyword = match.groups() 1245 length = int(length) 1246 if length == 0: 1247 raise InvalidHeaderError("invalid header") 1248 value = buf[match.end(2) + 1:match.start(1) + length - 1] 1249 1250 # Normally, we could just use "utf-8" as the encoding and "strict" 1251 # as the error handler, but we better not take the risk. For 1252 # example, GNU tar <= 1.23 is known to store filenames it cannot 1253 # translate to UTF-8 as raw strings (unfortunately without a 1254 # hdrcharset=BINARY header). 1255 # We first try the strict standard encoding, and if that fails we 1256 # fall back on the user's encoding and error handler. 1257 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", 1258 tarfile.errors) 1259 if keyword in PAX_NAME_FIELDS: 1260 value = self._decode_pax_field(value, encoding, tarfile.encoding, 1261 tarfile.errors) 1262 else: 1263 value = self._decode_pax_field(value, "utf-8", "utf-8", 1264 tarfile.errors) 1265 1266 pax_headers[keyword] = value 1267 pos += length 1268 1269 # Fetch the next header. 1270 try: 1271 next = self.fromtarfile(tarfile) 1272 except HeaderError: 1273 raise SubsequentHeaderError("missing or bad subsequent header") 1274 1275 # Process GNU sparse information. 1276 if "GNU.sparse.map" in pax_headers: 1277 # GNU extended sparse format version 0.1. 1278 self._proc_gnusparse_01(next, pax_headers) 1279 1280 elif "GNU.sparse.size" in pax_headers: 1281 # GNU extended sparse format version 0.0. 1282 self._proc_gnusparse_00(next, pax_headers, buf) 1283 1284 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": 1285 # GNU extended sparse format version 1.0. 1286 self._proc_gnusparse_10(next, pax_headers, tarfile) 1287 1288 if self.type in (XHDTYPE, SOLARIS_XHDTYPE): 1289 # Patch the TarInfo object with the extended header info. 1290 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) 1291 next.offset = self.offset 1292 1293 if "size" in pax_headers: 1294 # If the extended header replaces the size field, 1295 # we need to recalculate the offset where the next 1296 # header starts. 1297 offset = next.offset_data 1298 if next.isreg() or next.type not in SUPPORTED_TYPES: 1299 offset += next._block(next.size) 1300 tarfile.offset = offset 1301 1302 return next 1303 1304 def _proc_gnusparse_00(self, next, pax_headers, buf): 1305 """Process a GNU tar extended sparse header, version 0.0. 1306 """ 1307 offsets = [] 1308 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): 1309 offsets.append(int(match.group(1))) 1310 numbytes = [] 1311 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): 1312 numbytes.append(int(match.group(1))) 1313 next.sparse = list(zip(offsets, numbytes)) 1314 1315 def _proc_gnusparse_01(self, next, pax_headers): 1316 """Process a GNU tar extended sparse header, version 0.1. 1317 """ 1318 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] 1319 next.sparse = list(zip(sparse[::2], sparse[1::2])) 1320 1321 def _proc_gnusparse_10(self, next, pax_headers, tarfile): 1322 """Process a GNU tar extended sparse header, version 1.0. 1323 """ 1324 fields = None 1325 sparse = [] 1326 buf = tarfile.fileobj.read(BLOCKSIZE) 1327 fields, buf = buf.split(b"\n", 1) 1328 fields = int(fields) 1329 while len(sparse) < fields * 2: 1330 if b"\n" not in buf: 1331 buf += tarfile.fileobj.read(BLOCKSIZE) 1332 number, buf = buf.split(b"\n", 1) 1333 sparse.append(int(number)) 1334 next.offset_data = tarfile.fileobj.tell() 1335 next.sparse = list(zip(sparse[::2], sparse[1::2])) 1336 1337 def _apply_pax_info(self, pax_headers, encoding, errors): 1338 """Replace fields with supplemental information from a previous 1339 pax extended or global header. 1340 """ 1341 for keyword, value in pax_headers.items(): 1342 if keyword == "GNU.sparse.name": 1343 setattr(self, "path", value) 1344 elif keyword == "GNU.sparse.size": 1345 setattr(self, "size", int(value)) 1346 elif keyword == "GNU.sparse.realsize": 1347 setattr(self, "size", int(value)) 1348 elif keyword in PAX_FIELDS: 1349 if keyword in PAX_NUMBER_FIELDS: 1350 try: 1351 value = PAX_NUMBER_FIELDS[keyword](value) 1352 except ValueError: 1353 value = 0 1354 if keyword == "path": 1355 value = value.rstrip("/") 1356 setattr(self, keyword, value) 1357 1358 self.pax_headers = pax_headers.copy() 1359 1360 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): 1361 """Decode a single field from a pax record. 1362 """ 1363 try: 1364 return value.decode(encoding, "strict") 1365 except UnicodeDecodeError: 1366 return value.decode(fallback_encoding, fallback_errors) 1367 1368 def _block(self, count): 1369 """Round up a byte count by BLOCKSIZE and return it, 1370 e.g. _block(834) => 1024. 1371 """ 1372 blocks, remainder = divmod(count, BLOCKSIZE) 1373 if remainder: 1374 blocks += 1 1375 return blocks * BLOCKSIZE 1376 1377 def isreg(self): 1378 'Return True if the Tarinfo object is a regular file.' 1379 return self.type in REGULAR_TYPES 1380 1381 def isfile(self): 1382 'Return True if the Tarinfo object is a regular file.' 1383 return self.isreg() 1384 1385 def isdir(self): 1386 'Return True if it is a directory.' 1387 return self.type == DIRTYPE 1388 1389 def issym(self): 1390 'Return True if it is a symbolic link.' 1391 return self.type == SYMTYPE 1392 1393 def islnk(self): 1394 'Return True if it is a hard link.' 1395 return self.type == LNKTYPE 1396 1397 def ischr(self): 1398 'Return True if it is a character device.' 1399 return self.type == CHRTYPE 1400 1401 def isblk(self): 1402 'Return True if it is a block device.' 1403 return self.type == BLKTYPE 1404 1405 def isfifo(self): 1406 'Return True if it is a FIFO.' 1407 return self.type == FIFOTYPE 1408 1409 def issparse(self): 1410 return self.sparse is not None 1411 1412 def isdev(self): 1413 'Return True if it is one of character device, block device or FIFO.' 1414 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) 1415# class TarInfo 1416 1417class TarFile(object): 1418 """The TarFile Class provides an interface to tar archives. 1419 """ 1420 1421 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) 1422 1423 dereference = False # If true, add content of linked file to the 1424 # tar file, else the link. 1425 1426 ignore_zeros = False # If true, skips empty or invalid blocks and 1427 # continues processing. 1428 1429 errorlevel = 1 # If 0, fatal errors only appear in debug 1430 # messages (if debug >= 0). If > 0, errors 1431 # are passed to the caller as exceptions. 1432 1433 format = DEFAULT_FORMAT # The format to use when creating an archive. 1434 1435 encoding = ENCODING # Encoding for 8-bit character strings. 1436 1437 errors = None # Error handler for unicode conversion. 1438 1439 tarinfo = TarInfo # The default TarInfo class to use. 1440 1441 fileobject = ExFileObject # The file-object for extractfile(). 1442 1443 def __init__(self, name=None, mode="r", fileobj=None, format=None, 1444 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, 1445 errors="surrogateescape", pax_headers=None, debug=None, 1446 errorlevel=None, copybufsize=None): 1447 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 1448 read from an existing archive, 'a' to append data to an existing 1449 file or 'w' to create a new file overwriting an existing one. `mode' 1450 defaults to 'r'. 1451 If `fileobj' is given, it is used for reading or writing data. If it 1452 can be determined, `mode' is overridden by `fileobj's mode. 1453 `fileobj' is not closed, when TarFile is closed. 1454 """ 1455 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"} 1456 if mode not in modes: 1457 raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 1458 self.mode = mode 1459 self._mode = modes[mode] 1460 1461 if not fileobj: 1462 if self.mode == "a" and not os.path.exists(name): 1463 # Create nonexistent files in append mode. 1464 self.mode = "w" 1465 self._mode = "wb" 1466 fileobj = bltn_open(name, self._mode) 1467 self._extfileobj = False 1468 else: 1469 if (name is None and hasattr(fileobj, "name") and 1470 isinstance(fileobj.name, (str, bytes))): 1471 name = fileobj.name 1472 if hasattr(fileobj, "mode"): 1473 self._mode = fileobj.mode 1474 self._extfileobj = True 1475 self.name = os.path.abspath(name) if name else None 1476 self.fileobj = fileobj 1477 1478 # Init attributes. 1479 if format is not None: 1480 self.format = format 1481 if tarinfo is not None: 1482 self.tarinfo = tarinfo 1483 if dereference is not None: 1484 self.dereference = dereference 1485 if ignore_zeros is not None: 1486 self.ignore_zeros = ignore_zeros 1487 if encoding is not None: 1488 self.encoding = encoding 1489 self.errors = errors 1490 1491 if pax_headers is not None and self.format == PAX_FORMAT: 1492 self.pax_headers = pax_headers 1493 else: 1494 self.pax_headers = {} 1495 1496 if debug is not None: 1497 self.debug = debug 1498 if errorlevel is not None: 1499 self.errorlevel = errorlevel 1500 1501 # Init datastructures. 1502 self.copybufsize = copybufsize 1503 self.closed = False 1504 self.members = [] # list of members as TarInfo objects 1505 self._loaded = False # flag if all members have been read 1506 self.offset = self.fileobj.tell() 1507 # current position in the archive file 1508 self.inodes = {} # dictionary caching the inodes of 1509 # archive members already added 1510 1511 try: 1512 if self.mode == "r": 1513 self.firstmember = None 1514 self.firstmember = self.next() 1515 1516 if self.mode == "a": 1517 # Move to the end of the archive, 1518 # before the first empty block. 1519 while True: 1520 self.fileobj.seek(self.offset) 1521 try: 1522 tarinfo = self.tarinfo.fromtarfile(self) 1523 self.members.append(tarinfo) 1524 except EOFHeaderError: 1525 self.fileobj.seek(self.offset) 1526 break 1527 except HeaderError as e: 1528 raise ReadError(str(e)) 1529 1530 if self.mode in ("a", "w", "x"): 1531 self._loaded = True 1532 1533 if self.pax_headers: 1534 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) 1535 self.fileobj.write(buf) 1536 self.offset += len(buf) 1537 except: 1538 if not self._extfileobj: 1539 self.fileobj.close() 1540 self.closed = True 1541 raise 1542 1543 #-------------------------------------------------------------------------- 1544 # Below are the classmethods which act as alternate constructors to the 1545 # TarFile class. The open() method is the only one that is needed for 1546 # public use; it is the "super"-constructor and is able to select an 1547 # adequate "sub"-constructor for a particular compression using the mapping 1548 # from OPEN_METH. 1549 # 1550 # This concept allows one to subclass TarFile without losing the comfort of 1551 # the super-constructor. A sub-constructor is registered and made available 1552 # by adding it to the mapping in OPEN_METH. 1553 1554 @classmethod 1555 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): 1556 """Open a tar archive for reading, writing or appending. Return 1557 an appropriate TarFile class. 1558 1559 mode: 1560 'r' or 'r:*' open for reading with transparent compression 1561 'r:' open for reading exclusively uncompressed 1562 'r:gz' open for reading with gzip compression 1563 'r:bz2' open for reading with bzip2 compression 1564 'r:xz' open for reading with lzma compression 1565 'a' or 'a:' open for appending, creating the file if necessary 1566 'w' or 'w:' open for writing without compression 1567 'w:gz' open for writing with gzip compression 1568 'w:bz2' open for writing with bzip2 compression 1569 'w:xz' open for writing with lzma compression 1570 1571 'x' or 'x:' create a tarfile exclusively without compression, raise 1572 an exception if the file is already created 1573 'x:gz' create a gzip compressed tarfile, raise an exception 1574 if the file is already created 1575 'x:bz2' create a bzip2 compressed tarfile, raise an exception 1576 if the file is already created 1577 'x:xz' create an lzma compressed tarfile, raise an exception 1578 if the file is already created 1579 1580 'r|*' open a stream of tar blocks with transparent compression 1581 'r|' open an uncompressed stream of tar blocks for reading 1582 'r|gz' open a gzip compressed stream of tar blocks 1583 'r|bz2' open a bzip2 compressed stream of tar blocks 1584 'r|xz' open an lzma compressed stream of tar blocks 1585 'w|' open an uncompressed stream for writing 1586 'w|gz' open a gzip compressed stream for writing 1587 'w|bz2' open a bzip2 compressed stream for writing 1588 'w|xz' open an lzma compressed stream for writing 1589 """ 1590 1591 if not name and not fileobj: 1592 raise ValueError("nothing to open") 1593 1594 if mode in ("r", "r:*"): 1595 # Find out which *open() is appropriate for opening the file. 1596 def not_compressed(comptype): 1597 return cls.OPEN_METH[comptype] == 'taropen' 1598 for comptype in sorted(cls.OPEN_METH, key=not_compressed): 1599 func = getattr(cls, cls.OPEN_METH[comptype]) 1600 if fileobj is not None: 1601 saved_pos = fileobj.tell() 1602 try: 1603 return func(name, "r", fileobj, **kwargs) 1604 except (ReadError, CompressionError): 1605 if fileobj is not None: 1606 fileobj.seek(saved_pos) 1607 continue 1608 raise ReadError("file could not be opened successfully") 1609 1610 elif ":" in mode: 1611 filemode, comptype = mode.split(":", 1) 1612 filemode = filemode or "r" 1613 comptype = comptype or "tar" 1614 1615 # Select the *open() function according to 1616 # given compression. 1617 if comptype in cls.OPEN_METH: 1618 func = getattr(cls, cls.OPEN_METH[comptype]) 1619 else: 1620 raise CompressionError("unknown compression type %r" % comptype) 1621 return func(name, filemode, fileobj, **kwargs) 1622 1623 elif "|" in mode: 1624 filemode, comptype = mode.split("|", 1) 1625 filemode = filemode or "r" 1626 comptype = comptype or "tar" 1627 1628 if filemode not in ("r", "w"): 1629 raise ValueError("mode must be 'r' or 'w'") 1630 1631 stream = _Stream(name, filemode, comptype, fileobj, bufsize) 1632 try: 1633 t = cls(name, filemode, stream, **kwargs) 1634 except: 1635 stream.close() 1636 raise 1637 t._extfileobj = False 1638 return t 1639 1640 elif mode in ("a", "w", "x"): 1641 return cls.taropen(name, mode, fileobj, **kwargs) 1642 1643 raise ValueError("undiscernible mode") 1644 1645 @classmethod 1646 def taropen(cls, name, mode="r", fileobj=None, **kwargs): 1647 """Open uncompressed tar archive name for reading or writing. 1648 """ 1649 if mode not in ("r", "a", "w", "x"): 1650 raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 1651 return cls(name, mode, fileobj, **kwargs) 1652 1653 @classmethod 1654 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1655 """Open gzip compressed tar archive name for reading or writing. 1656 Appending is not allowed. 1657 """ 1658 if mode not in ("r", "w", "x"): 1659 raise ValueError("mode must be 'r', 'w' or 'x'") 1660 1661 try: 1662 from gzip import GzipFile 1663 except ImportError: 1664 raise CompressionError("gzip module is not available") 1665 1666 try: 1667 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj) 1668 except OSError: 1669 if fileobj is not None and mode == 'r': 1670 raise ReadError("not a gzip file") 1671 raise 1672 1673 try: 1674 t = cls.taropen(name, mode, fileobj, **kwargs) 1675 except OSError: 1676 fileobj.close() 1677 if mode == 'r': 1678 raise ReadError("not a gzip file") 1679 raise 1680 except: 1681 fileobj.close() 1682 raise 1683 t._extfileobj = False 1684 return t 1685 1686 @classmethod 1687 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1688 """Open bzip2 compressed tar archive name for reading or writing. 1689 Appending is not allowed. 1690 """ 1691 if mode not in ("r", "w", "x"): 1692 raise ValueError("mode must be 'r', 'w' or 'x'") 1693 1694 try: 1695 from bz2 import BZ2File 1696 except ImportError: 1697 raise CompressionError("bz2 module is not available") 1698 1699 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel) 1700 1701 try: 1702 t = cls.taropen(name, mode, fileobj, **kwargs) 1703 except (OSError, EOFError): 1704 fileobj.close() 1705 if mode == 'r': 1706 raise ReadError("not a bzip2 file") 1707 raise 1708 except: 1709 fileobj.close() 1710 raise 1711 t._extfileobj = False 1712 return t 1713 1714 @classmethod 1715 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): 1716 """Open lzma compressed tar archive name for reading or writing. 1717 Appending is not allowed. 1718 """ 1719 if mode not in ("r", "w", "x"): 1720 raise ValueError("mode must be 'r', 'w' or 'x'") 1721 1722 try: 1723 from lzma import LZMAFile, LZMAError 1724 except ImportError: 1725 raise CompressionError("lzma module is not available") 1726 1727 fileobj = LZMAFile(fileobj or name, mode, preset=preset) 1728 1729 try: 1730 t = cls.taropen(name, mode, fileobj, **kwargs) 1731 except (LZMAError, EOFError): 1732 fileobj.close() 1733 if mode == 'r': 1734 raise ReadError("not an lzma file") 1735 raise 1736 except: 1737 fileobj.close() 1738 raise 1739 t._extfileobj = False 1740 return t 1741 1742 # All *open() methods are registered here. 1743 OPEN_METH = { 1744 "tar": "taropen", # uncompressed tar 1745 "gz": "gzopen", # gzip compressed tar 1746 "bz2": "bz2open", # bzip2 compressed tar 1747 "xz": "xzopen" # lzma compressed tar 1748 } 1749 1750 #-------------------------------------------------------------------------- 1751 # The public methods which TarFile provides: 1752 1753 def close(self): 1754 """Close the TarFile. In write-mode, two finishing zero blocks are 1755 appended to the archive. 1756 """ 1757 if self.closed: 1758 return 1759 1760 self.closed = True 1761 try: 1762 if self.mode in ("a", "w", "x"): 1763 self.fileobj.write(NUL * (BLOCKSIZE * 2)) 1764 self.offset += (BLOCKSIZE * 2) 1765 # fill up the end with zero-blocks 1766 # (like option -b20 for tar does) 1767 blocks, remainder = divmod(self.offset, RECORDSIZE) 1768 if remainder > 0: 1769 self.fileobj.write(NUL * (RECORDSIZE - remainder)) 1770 finally: 1771 if not self._extfileobj: 1772 self.fileobj.close() 1773 1774 def getmember(self, name): 1775 """Return a TarInfo object for member `name'. If `name' can not be 1776 found in the archive, KeyError is raised. If a member occurs more 1777 than once in the archive, its last occurrence is assumed to be the 1778 most up-to-date version. 1779 """ 1780 tarinfo = self._getmember(name) 1781 if tarinfo is None: 1782 raise KeyError("filename %r not found" % name) 1783 return tarinfo 1784 1785 def getmembers(self): 1786 """Return the members of the archive as a list of TarInfo objects. The 1787 list has the same order as the members in the archive. 1788 """ 1789 self._check() 1790 if not self._loaded: # if we want to obtain a list of 1791 self._load() # all members, we first have to 1792 # scan the whole archive. 1793 return self.members 1794 1795 def getnames(self): 1796 """Return the members of the archive as a list of their names. It has 1797 the same order as the list returned by getmembers(). 1798 """ 1799 return [tarinfo.name for tarinfo in self.getmembers()] 1800 1801 def gettarinfo(self, name=None, arcname=None, fileobj=None): 1802 """Create a TarInfo object from the result of os.stat or equivalent 1803 on an existing file. The file is either named by `name', or 1804 specified as a file object `fileobj' with a file descriptor. If 1805 given, `arcname' specifies an alternative name for the file in the 1806 archive, otherwise, the name is taken from the 'name' attribute of 1807 'fileobj', or the 'name' argument. The name should be a text 1808 string. 1809 """ 1810 self._check("awx") 1811 1812 # When fileobj is given, replace name by 1813 # fileobj's real name. 1814 if fileobj is not None: 1815 name = fileobj.name 1816 1817 # Building the name of the member in the archive. 1818 # Backward slashes are converted to forward slashes, 1819 # Absolute paths are turned to relative paths. 1820 if arcname is None: 1821 arcname = name 1822 drv, arcname = os.path.splitdrive(arcname) 1823 arcname = arcname.replace(os.sep, "/") 1824 arcname = arcname.lstrip("/") 1825 1826 # Now, fill the TarInfo object with 1827 # information specific for the file. 1828 tarinfo = self.tarinfo() 1829 tarinfo.tarfile = self # Not needed 1830 1831 # Use os.stat or os.lstat, depending on if symlinks shall be resolved. 1832 if fileobj is None: 1833 if not self.dereference: 1834 statres = os.lstat(name) 1835 else: 1836 statres = os.stat(name) 1837 else: 1838 statres = os.fstat(fileobj.fileno()) 1839 linkname = "" 1840 1841 stmd = statres.st_mode 1842 if stat.S_ISREG(stmd): 1843 inode = (statres.st_ino, statres.st_dev) 1844 if not self.dereference and statres.st_nlink > 1 and \ 1845 inode in self.inodes and arcname != self.inodes[inode]: 1846 # Is it a hardlink to an already 1847 # archived file? 1848 type = LNKTYPE 1849 linkname = self.inodes[inode] 1850 else: 1851 # The inode is added only if its valid. 1852 # For win32 it is always 0. 1853 type = REGTYPE 1854 if inode[0]: 1855 self.inodes[inode] = arcname 1856 elif stat.S_ISDIR(stmd): 1857 type = DIRTYPE 1858 elif stat.S_ISFIFO(stmd): 1859 type = FIFOTYPE 1860 elif stat.S_ISLNK(stmd): 1861 type = SYMTYPE 1862 linkname = os.readlink(name) 1863 elif stat.S_ISCHR(stmd): 1864 type = CHRTYPE 1865 elif stat.S_ISBLK(stmd): 1866 type = BLKTYPE 1867 else: 1868 return None 1869 1870 # Fill the TarInfo object with all 1871 # information we can get. 1872 tarinfo.name = arcname 1873 tarinfo.mode = stmd 1874 tarinfo.uid = statres.st_uid 1875 tarinfo.gid = statres.st_gid 1876 if type == REGTYPE: 1877 tarinfo.size = statres.st_size 1878 else: 1879 tarinfo.size = 0 1880 tarinfo.mtime = statres.st_mtime 1881 tarinfo.type = type 1882 tarinfo.linkname = linkname 1883 if pwd: 1884 try: 1885 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] 1886 except KeyError: 1887 pass 1888 if grp: 1889 try: 1890 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] 1891 except KeyError: 1892 pass 1893 1894 if type in (CHRTYPE, BLKTYPE): 1895 if hasattr(os, "major") and hasattr(os, "minor"): 1896 tarinfo.devmajor = os.major(statres.st_rdev) 1897 tarinfo.devminor = os.minor(statres.st_rdev) 1898 return tarinfo 1899 1900 def list(self, verbose=True, *, members=None): 1901 """Print a table of contents to sys.stdout. If `verbose' is False, only 1902 the names of the members are printed. If it is True, an `ls -l'-like 1903 output is produced. `members' is optional and must be a subset of the 1904 list returned by getmembers(). 1905 """ 1906 self._check() 1907 1908 if members is None: 1909 members = self 1910 for tarinfo in members: 1911 if verbose: 1912 _safe_print(stat.filemode(tarinfo.mode)) 1913 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, 1914 tarinfo.gname or tarinfo.gid)) 1915 if tarinfo.ischr() or tarinfo.isblk(): 1916 _safe_print("%10s" % 1917 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) 1918 else: 1919 _safe_print("%10d" % tarinfo.size) 1920 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ 1921 % time.localtime(tarinfo.mtime)[:6]) 1922 1923 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) 1924 1925 if verbose: 1926 if tarinfo.issym(): 1927 _safe_print("-> " + tarinfo.linkname) 1928 if tarinfo.islnk(): 1929 _safe_print("link to " + tarinfo.linkname) 1930 print() 1931 1932 def add(self, name, arcname=None, recursive=True, *, filter=None): 1933 """Add the file `name' to the archive. `name' may be any type of file 1934 (directory, fifo, symbolic link, etc.). If given, `arcname' 1935 specifies an alternative name for the file in the archive. 1936 Directories are added recursively by default. This can be avoided by 1937 setting `recursive' to False. `filter' is a function 1938 that expects a TarInfo object argument and returns the changed 1939 TarInfo object, if it returns None the TarInfo object will be 1940 excluded from the archive. 1941 """ 1942 self._check("awx") 1943 1944 if arcname is None: 1945 arcname = name 1946 1947 # Skip if somebody tries to archive the archive... 1948 if self.name is not None and os.path.abspath(name) == self.name: 1949 self._dbg(2, "tarfile: Skipped %r" % name) 1950 return 1951 1952 self._dbg(1, name) 1953 1954 # Create a TarInfo object from the file. 1955 tarinfo = self.gettarinfo(name, arcname) 1956 1957 if tarinfo is None: 1958 self._dbg(1, "tarfile: Unsupported type %r" % name) 1959 return 1960 1961 # Change or exclude the TarInfo object. 1962 if filter is not None: 1963 tarinfo = filter(tarinfo) 1964 if tarinfo is None: 1965 self._dbg(2, "tarfile: Excluded %r" % name) 1966 return 1967 1968 # Append the tar header and data to the archive. 1969 if tarinfo.isreg(): 1970 with bltn_open(name, "rb") as f: 1971 self.addfile(tarinfo, f) 1972 1973 elif tarinfo.isdir(): 1974 self.addfile(tarinfo) 1975 if recursive: 1976 for f in sorted(os.listdir(name)): 1977 self.add(os.path.join(name, f), os.path.join(arcname, f), 1978 recursive, filter=filter) 1979 1980 else: 1981 self.addfile(tarinfo) 1982 1983 def addfile(self, tarinfo, fileobj=None): 1984 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is 1985 given, it should be a binary file, and tarinfo.size bytes are read 1986 from it and added to the archive. You can create TarInfo objects 1987 directly, or by using gettarinfo(). 1988 """ 1989 self._check("awx") 1990 1991 tarinfo = copy.copy(tarinfo) 1992 1993 buf = tarinfo.tobuf(self.format, self.encoding, self.errors) 1994 self.fileobj.write(buf) 1995 self.offset += len(buf) 1996 bufsize=self.copybufsize 1997 # If there's data to follow, append it. 1998 if fileobj is not None: 1999 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize) 2000 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 2001 if remainder > 0: 2002 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 2003 blocks += 1 2004 self.offset += blocks * BLOCKSIZE 2005 2006 self.members.append(tarinfo) 2007 2008 def extractall(self, path=".", members=None, *, numeric_owner=False): 2009 """Extract all members from the archive to the current working 2010 directory and set owner, modification time and permissions on 2011 directories afterwards. `path' specifies a different directory 2012 to extract to. `members' is optional and must be a subset of the 2013 list returned by getmembers(). If `numeric_owner` is True, only 2014 the numbers for user/group names are used and not the names. 2015 """ 2016 directories = [] 2017 2018 if members is None: 2019 members = self 2020 2021 for tarinfo in members: 2022 if tarinfo.isdir(): 2023 # Extract directories with a safe mode. 2024 directories.append(tarinfo) 2025 tarinfo = copy.copy(tarinfo) 2026 tarinfo.mode = 0o700 2027 # Do not set_attrs directories, as we will do that further down 2028 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), 2029 numeric_owner=numeric_owner) 2030 2031 # Reverse sort directories. 2032 directories.sort(key=lambda a: a.name) 2033 directories.reverse() 2034 2035 # Set correct owner, mtime and filemode on directories. 2036 for tarinfo in directories: 2037 dirpath = os.path.join(path, tarinfo.name) 2038 try: 2039 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) 2040 self.utime(tarinfo, dirpath) 2041 self.chmod(tarinfo, dirpath) 2042 except ExtractError as e: 2043 if self.errorlevel > 1: 2044 raise 2045 else: 2046 self._dbg(1, "tarfile: %s" % e) 2047 2048 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False): 2049 """Extract a member from the archive to the current working directory, 2050 using its full name. Its file information is extracted as accurately 2051 as possible. `member' may be a filename or a TarInfo object. You can 2052 specify a different directory using `path'. File attributes (owner, 2053 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` 2054 is True, only the numbers for user/group names are used and not 2055 the names. 2056 """ 2057 self._check("r") 2058 2059 if isinstance(member, str): 2060 tarinfo = self.getmember(member) 2061 else: 2062 tarinfo = member 2063 2064 # Prepare the link target for makelink(). 2065 if tarinfo.islnk(): 2066 tarinfo._link_target = os.path.join(path, tarinfo.linkname) 2067 2068 try: 2069 self._extract_member(tarinfo, os.path.join(path, tarinfo.name), 2070 set_attrs=set_attrs, 2071 numeric_owner=numeric_owner) 2072 except OSError as e: 2073 if self.errorlevel > 0: 2074 raise 2075 else: 2076 if e.filename is None: 2077 self._dbg(1, "tarfile: %s" % e.strerror) 2078 else: 2079 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 2080 except ExtractError as e: 2081 if self.errorlevel > 1: 2082 raise 2083 else: 2084 self._dbg(1, "tarfile: %s" % e) 2085 2086 def extractfile(self, member): 2087 """Extract a member from the archive as a file object. `member' may be 2088 a filename or a TarInfo object. If `member' is a regular file or a 2089 link, an io.BufferedReader object is returned. Otherwise, None is 2090 returned. 2091 """ 2092 self._check("r") 2093 2094 if isinstance(member, str): 2095 tarinfo = self.getmember(member) 2096 else: 2097 tarinfo = member 2098 2099 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: 2100 # Members with unknown types are treated as regular files. 2101 return self.fileobject(self, tarinfo) 2102 2103 elif tarinfo.islnk() or tarinfo.issym(): 2104 if isinstance(self.fileobj, _Stream): 2105 # A small but ugly workaround for the case that someone tries 2106 # to extract a (sym)link as a file-object from a non-seekable 2107 # stream of tar blocks. 2108 raise StreamError("cannot extract (sym)link as file object") 2109 else: 2110 # A (sym)link's file object is its target's file object. 2111 return self.extractfile(self._find_link_target(tarinfo)) 2112 else: 2113 # If there's no data associated with the member (directory, chrdev, 2114 # blkdev, etc.), return None instead of a file object. 2115 return None 2116 2117 def _extract_member(self, tarinfo, targetpath, set_attrs=True, 2118 numeric_owner=False): 2119 """Extract the TarInfo object tarinfo to a physical 2120 file called targetpath. 2121 """ 2122 # Fetch the TarInfo object for the given name 2123 # and build the destination pathname, replacing 2124 # forward slashes to platform specific separators. 2125 targetpath = targetpath.rstrip("/") 2126 targetpath = targetpath.replace("/", os.sep) 2127 2128 # Create all upper directories. 2129 upperdirs = os.path.dirname(targetpath) 2130 if upperdirs and not os.path.exists(upperdirs): 2131 # Create directories that are not part of the archive with 2132 # default permissions. 2133 os.makedirs(upperdirs) 2134 2135 if tarinfo.islnk() or tarinfo.issym(): 2136 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) 2137 else: 2138 self._dbg(1, tarinfo.name) 2139 2140 if tarinfo.isreg(): 2141 self.makefile(tarinfo, targetpath) 2142 elif tarinfo.isdir(): 2143 self.makedir(tarinfo, targetpath) 2144 elif tarinfo.isfifo(): 2145 self.makefifo(tarinfo, targetpath) 2146 elif tarinfo.ischr() or tarinfo.isblk(): 2147 self.makedev(tarinfo, targetpath) 2148 elif tarinfo.islnk() or tarinfo.issym(): 2149 self.makelink(tarinfo, targetpath) 2150 elif tarinfo.type not in SUPPORTED_TYPES: 2151 self.makeunknown(tarinfo, targetpath) 2152 else: 2153 self.makefile(tarinfo, targetpath) 2154 2155 if set_attrs: 2156 self.chown(tarinfo, targetpath, numeric_owner) 2157 if not tarinfo.issym(): 2158 self.chmod(tarinfo, targetpath) 2159 self.utime(tarinfo, targetpath) 2160 2161 #-------------------------------------------------------------------------- 2162 # Below are the different file methods. They are called via 2163 # _extract_member() when extract() is called. They can be replaced in a 2164 # subclass to implement other functionality. 2165 2166 def makedir(self, tarinfo, targetpath): 2167 """Make a directory called targetpath. 2168 """ 2169 try: 2170 # Use a safe mode for the directory, the real mode is set 2171 # later in _extract_member(). 2172 os.mkdir(targetpath, 0o700) 2173 except FileExistsError: 2174 pass 2175 2176 def makefile(self, tarinfo, targetpath): 2177 """Make a file called targetpath. 2178 """ 2179 source = self.fileobj 2180 source.seek(tarinfo.offset_data) 2181 bufsize = self.copybufsize 2182 with bltn_open(targetpath, "wb") as target: 2183 if tarinfo.sparse is not None: 2184 for offset, size in tarinfo.sparse: 2185 target.seek(offset) 2186 copyfileobj(source, target, size, ReadError, bufsize) 2187 target.seek(tarinfo.size) 2188 target.truncate() 2189 else: 2190 copyfileobj(source, target, tarinfo.size, ReadError, bufsize) 2191 2192 def makeunknown(self, tarinfo, targetpath): 2193 """Make a file from a TarInfo object with an unknown type 2194 at targetpath. 2195 """ 2196 self.makefile(tarinfo, targetpath) 2197 self._dbg(1, "tarfile: Unknown file type %r, " \ 2198 "extracted as regular file." % tarinfo.type) 2199 2200 def makefifo(self, tarinfo, targetpath): 2201 """Make a fifo called targetpath. 2202 """ 2203 if hasattr(os, "mkfifo"): 2204 os.mkfifo(targetpath) 2205 else: 2206 raise ExtractError("fifo not supported by system") 2207 2208 def makedev(self, tarinfo, targetpath): 2209 """Make a character or block device called targetpath. 2210 """ 2211 if not hasattr(os, "mknod") or not hasattr(os, "makedev"): 2212 raise ExtractError("special devices not supported by system") 2213 2214 mode = tarinfo.mode 2215 if tarinfo.isblk(): 2216 mode |= stat.S_IFBLK 2217 else: 2218 mode |= stat.S_IFCHR 2219 2220 os.mknod(targetpath, mode, 2221 os.makedev(tarinfo.devmajor, tarinfo.devminor)) 2222 2223 def makelink(self, tarinfo, targetpath): 2224 """Make a (symbolic) link called targetpath. If it cannot be created 2225 (platform limitation), we try to make a copy of the referenced file 2226 instead of a link. 2227 """ 2228 try: 2229 # For systems that support symbolic and hard links. 2230 if tarinfo.issym(): 2231 if os.path.lexists(targetpath): 2232 # Avoid FileExistsError on following os.symlink. 2233 os.unlink(targetpath) 2234 os.symlink(tarinfo.linkname, targetpath) 2235 else: 2236 # See extract(). 2237 if os.path.exists(tarinfo._link_target): 2238 os.link(tarinfo._link_target, targetpath) 2239 else: 2240 self._extract_member(self._find_link_target(tarinfo), 2241 targetpath) 2242 except symlink_exception: 2243 try: 2244 self._extract_member(self._find_link_target(tarinfo), 2245 targetpath) 2246 except KeyError: 2247 raise ExtractError("unable to resolve link inside archive") 2248 2249 def chown(self, tarinfo, targetpath, numeric_owner): 2250 """Set owner of targetpath according to tarinfo. If numeric_owner 2251 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner 2252 is False, fall back to .gid/.uid when the search based on name 2253 fails. 2254 """ 2255 if hasattr(os, "geteuid") and os.geteuid() == 0: 2256 # We have to be root to do so. 2257 g = tarinfo.gid 2258 u = tarinfo.uid 2259 if not numeric_owner: 2260 try: 2261 if grp: 2262 g = grp.getgrnam(tarinfo.gname)[2] 2263 except KeyError: 2264 pass 2265 try: 2266 if pwd: 2267 u = pwd.getpwnam(tarinfo.uname)[2] 2268 except KeyError: 2269 pass 2270 try: 2271 if tarinfo.issym() and hasattr(os, "lchown"): 2272 os.lchown(targetpath, u, g) 2273 else: 2274 os.chown(targetpath, u, g) 2275 except OSError: 2276 raise ExtractError("could not change owner") 2277 2278 def chmod(self, tarinfo, targetpath): 2279 """Set file permissions of targetpath according to tarinfo. 2280 """ 2281 try: 2282 os.chmod(targetpath, tarinfo.mode) 2283 except OSError: 2284 raise ExtractError("could not change mode") 2285 2286 def utime(self, tarinfo, targetpath): 2287 """Set modification time of targetpath according to tarinfo. 2288 """ 2289 if not hasattr(os, 'utime'): 2290 return 2291 try: 2292 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) 2293 except OSError: 2294 raise ExtractError("could not change modification time") 2295 2296 #-------------------------------------------------------------------------- 2297 def next(self): 2298 """Return the next member of the archive as a TarInfo object, when 2299 TarFile is opened for reading. Return None if there is no more 2300 available. 2301 """ 2302 self._check("ra") 2303 if self.firstmember is not None: 2304 m = self.firstmember 2305 self.firstmember = None 2306 return m 2307 2308 # Advance the file pointer. 2309 if self.offset != self.fileobj.tell(): 2310 self.fileobj.seek(self.offset - 1) 2311 if not self.fileobj.read(1): 2312 raise ReadError("unexpected end of data") 2313 2314 # Read the next block. 2315 tarinfo = None 2316 while True: 2317 try: 2318 tarinfo = self.tarinfo.fromtarfile(self) 2319 except EOFHeaderError as e: 2320 if self.ignore_zeros: 2321 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2322 self.offset += BLOCKSIZE 2323 continue 2324 except InvalidHeaderError as e: 2325 if self.ignore_zeros: 2326 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2327 self.offset += BLOCKSIZE 2328 continue 2329 elif self.offset == 0: 2330 raise ReadError(str(e)) 2331 except EmptyHeaderError: 2332 if self.offset == 0: 2333 raise ReadError("empty file") 2334 except TruncatedHeaderError as e: 2335 if self.offset == 0: 2336 raise ReadError(str(e)) 2337 except SubsequentHeaderError as e: 2338 raise ReadError(str(e)) 2339 break 2340 2341 if tarinfo is not None: 2342 self.members.append(tarinfo) 2343 else: 2344 self._loaded = True 2345 2346 return tarinfo 2347 2348 #-------------------------------------------------------------------------- 2349 # Little helper methods: 2350 2351 def _getmember(self, name, tarinfo=None, normalize=False): 2352 """Find an archive member by name from bottom to top. 2353 If tarinfo is given, it is used as the starting point. 2354 """ 2355 # Ensure that all members have been loaded. 2356 members = self.getmembers() 2357 2358 # Limit the member search list up to tarinfo. 2359 if tarinfo is not None: 2360 members = members[:members.index(tarinfo)] 2361 2362 if normalize: 2363 name = os.path.normpath(name) 2364 2365 for member in reversed(members): 2366 if normalize: 2367 member_name = os.path.normpath(member.name) 2368 else: 2369 member_name = member.name 2370 2371 if name == member_name: 2372 return member 2373 2374 def _load(self): 2375 """Read through the entire archive file and look for readable 2376 members. 2377 """ 2378 while True: 2379 tarinfo = self.next() 2380 if tarinfo is None: 2381 break 2382 self._loaded = True 2383 2384 def _check(self, mode=None): 2385 """Check if TarFile is still open, and if the operation's mode 2386 corresponds to TarFile's mode. 2387 """ 2388 if self.closed: 2389 raise OSError("%s is closed" % self.__class__.__name__) 2390 if mode is not None and self.mode not in mode: 2391 raise OSError("bad operation for mode %r" % self.mode) 2392 2393 def _find_link_target(self, tarinfo): 2394 """Find the target member of a symlink or hardlink member in the 2395 archive. 2396 """ 2397 if tarinfo.issym(): 2398 # Always search the entire archive. 2399 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) 2400 limit = None 2401 else: 2402 # Search the archive before the link, because a hard link is 2403 # just a reference to an already archived file. 2404 linkname = tarinfo.linkname 2405 limit = tarinfo 2406 2407 member = self._getmember(linkname, tarinfo=limit, normalize=True) 2408 if member is None: 2409 raise KeyError("linkname %r not found" % linkname) 2410 return member 2411 2412 def __iter__(self): 2413 """Provide an iterator object. 2414 """ 2415 if self._loaded: 2416 yield from self.members 2417 return 2418 2419 # Yield items using TarFile's next() method. 2420 # When all members have been read, set TarFile as _loaded. 2421 index = 0 2422 # Fix for SF #1100429: Under rare circumstances it can 2423 # happen that getmembers() is called during iteration, 2424 # which will have already exhausted the next() method. 2425 if self.firstmember is not None: 2426 tarinfo = self.next() 2427 index += 1 2428 yield tarinfo 2429 2430 while True: 2431 if index < len(self.members): 2432 tarinfo = self.members[index] 2433 elif not self._loaded: 2434 tarinfo = self.next() 2435 if not tarinfo: 2436 self._loaded = True 2437 return 2438 else: 2439 return 2440 index += 1 2441 yield tarinfo 2442 2443 def _dbg(self, level, msg): 2444 """Write debugging output to sys.stderr. 2445 """ 2446 if level <= self.debug: 2447 print(msg, file=sys.stderr) 2448 2449 def __enter__(self): 2450 self._check() 2451 return self 2452 2453 def __exit__(self, type, value, traceback): 2454 if type is None: 2455 self.close() 2456 else: 2457 # An exception occurred. We must not call close() because 2458 # it would try to write end-of-archive blocks and padding. 2459 if not self._extfileobj: 2460 self.fileobj.close() 2461 self.closed = True 2462 2463#-------------------- 2464# exported functions 2465#-------------------- 2466def is_tarfile(name): 2467 """Return True if name points to a tar archive that we 2468 are able to handle, else return False. 2469 """ 2470 try: 2471 t = open(name) 2472 t.close() 2473 return True 2474 except TarError: 2475 return False 2476 2477open = TarFile.open 2478 2479 2480def main(): 2481 import argparse 2482 2483 description = 'A simple command-line interface for tarfile module.' 2484 parser = argparse.ArgumentParser(description=description) 2485 parser.add_argument('-v', '--verbose', action='store_true', default=False, 2486 help='Verbose output') 2487 group = parser.add_mutually_exclusive_group(required=True) 2488 group.add_argument('-l', '--list', metavar='<tarfile>', 2489 help='Show listing of a tarfile') 2490 group.add_argument('-e', '--extract', nargs='+', 2491 metavar=('<tarfile>', '<output_dir>'), 2492 help='Extract tarfile into target dir') 2493 group.add_argument('-c', '--create', nargs='+', 2494 metavar=('<name>', '<file>'), 2495 help='Create tarfile from sources') 2496 group.add_argument('-t', '--test', metavar='<tarfile>', 2497 help='Test if a tarfile is valid') 2498 args = parser.parse_args() 2499 2500 if args.test is not None: 2501 src = args.test 2502 if is_tarfile(src): 2503 with open(src, 'r') as tar: 2504 tar.getmembers() 2505 print(tar.getmembers(), file=sys.stderr) 2506 if args.verbose: 2507 print('{!r} is a tar archive.'.format(src)) 2508 else: 2509 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2510 2511 elif args.list is not None: 2512 src = args.list 2513 if is_tarfile(src): 2514 with TarFile.open(src, 'r:*') as tf: 2515 tf.list(verbose=args.verbose) 2516 else: 2517 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2518 2519 elif args.extract is not None: 2520 if len(args.extract) == 1: 2521 src = args.extract[0] 2522 curdir = os.curdir 2523 elif len(args.extract) == 2: 2524 src, curdir = args.extract 2525 else: 2526 parser.exit(1, parser.format_help()) 2527 2528 if is_tarfile(src): 2529 with TarFile.open(src, 'r:*') as tf: 2530 tf.extractall(path=curdir) 2531 if args.verbose: 2532 if curdir == '.': 2533 msg = '{!r} file is extracted.'.format(src) 2534 else: 2535 msg = ('{!r} file is extracted ' 2536 'into {!r} directory.').format(src, curdir) 2537 print(msg) 2538 else: 2539 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2540 2541 elif args.create is not None: 2542 tar_name = args.create.pop(0) 2543 _, ext = os.path.splitext(tar_name) 2544 compressions = { 2545 # gz 2546 '.gz': 'gz', 2547 '.tgz': 'gz', 2548 # xz 2549 '.xz': 'xz', 2550 '.txz': 'xz', 2551 # bz2 2552 '.bz2': 'bz2', 2553 '.tbz': 'bz2', 2554 '.tbz2': 'bz2', 2555 '.tb2': 'bz2', 2556 } 2557 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w' 2558 tar_files = args.create 2559 2560 with TarFile.open(tar_name, tar_mode) as tf: 2561 for file_name in tar_files: 2562 tf.add(file_name) 2563 2564 if args.verbose: 2565 print('{!r} file created.'.format(tar_name)) 2566 2567if __name__ == '__main__': 2568 main() 2569