1#!/usr/local/bin/python3.8
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
6# All rights reserved.
7#
8# Permission  is  hereby granted,  free  of charge,  to  any person
9# obtaining a  copy of  this software  and associated documentation
10# files  (the  "Software"),  to   deal  in  the  Software   without
11# restriction,  including  without limitation  the  rights to  use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies  of  the  Software,  and to  permit  persons  to  whom the
14# Software  is  furnished  to  do  so,  subject  to  the  following
15# conditions:
16#
17# The above copyright  notice and this  permission notice shall  be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
21# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
22# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
23# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
24# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
25# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32version     = "0.9.0"
33__author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"
34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
35
36#---------
37# Imports
38#---------
39from builtins import open as bltn_open
40import sys
41import os
42import io
43import shutil
44import stat
45import time
46import struct
47import copy
48import re
49
50try:
51    import pwd
52except ImportError:
53    pwd = None
54try:
55    import grp
56except ImportError:
57    grp = None
58
59# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
62    # OSError (winerror=1314) will be raised if the caller does not hold the
63    # SeCreateSymbolicLinkPrivilege privilege
64    symlink_exception += (OSError,)
65except NameError:
66    pass
67
68# from tarfile import *
69__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70           "CompressionError", "StreamError", "ExtractError", "HeaderError",
71           "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72           "DEFAULT_FORMAT", "open"]
73
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
77NUL = b"\0"                     # the null character
78BLOCKSIZE = 512                 # length of processing blocks
79RECORDSIZE = BLOCKSIZE * 20     # length of records
80GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
82
83LENGTH_NAME = 100               # maximum length of a filename
84LENGTH_LINK = 100               # maximum length of a linkname
85LENGTH_PREFIX = 155             # maximum length of the prefix field
86
87REGTYPE = b"0"                  # regular file
88AREGTYPE = b"\0"                # regular file
89LNKTYPE = b"1"                  # link (inside tarfile)
90SYMTYPE = b"2"                  # symbolic link
91CHRTYPE = b"3"                  # character special device
92BLKTYPE = b"4"                  # block special device
93DIRTYPE = b"5"                  # directory
94FIFOTYPE = b"6"                 # fifo special device
95CONTTYPE = b"7"                 # contiguous file
96
97GNUTYPE_LONGNAME = b"L"         # GNU tar longname
98GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
99GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
100
101XHDTYPE = b"x"                  # POSIX.1-2001 extended header
102XGLTYPE = b"g"                  # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X"          # Solaris extended header
104
105USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1                  # GNU tar format
107PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
108DEFAULT_FORMAT = PAX_FORMAT
109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115                   SYMTYPE, DIRTYPE, FIFOTYPE,
116                   CONTTYPE, CHRTYPE, BLKTYPE,
117                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118                   GNUTYPE_SPARSE)
119
120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122                 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126             GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130              "uid", "gid", "uname", "gname")
131
132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138    "atime": float,
139    "ctime": float,
140    "mtime": float,
141    "uid": int,
142    "gid": int,
143    "size": int
144}
145
146#---------------------------------------------------------
147# initialization
148#---------------------------------------------------------
149if os.name == "nt":
150    ENCODING = "utf-8"
151else:
152    ENCODING = sys.getfilesystemencoding()
153
154#---------------------------------------------------------
155# Some useful functions
156#---------------------------------------------------------
157
158def stn(s, length, encoding, errors):
159    """Convert a string to a null-terminated bytes object.
160    """
161    s = s.encode(encoding, errors)
162    return s[:length] + (length - len(s)) * NUL
163
164def nts(s, encoding, errors):
165    """Convert a null-terminated bytes object to a string.
166    """
167    p = s.find(b"\0")
168    if p != -1:
169        s = s[:p]
170    return s.decode(encoding, errors)
171
172def nti(s):
173    """Convert a number field to a python number.
174    """
175    # There are two possible encodings for a number field, see
176    # itn() below.
177    if s[0] in (0o200, 0o377):
178        n = 0
179        for i in range(len(s) - 1):
180            n <<= 8
181            n += s[i + 1]
182        if s[0] == 0o377:
183            n = -(256 ** (len(s) - 1) - n)
184    else:
185        try:
186            s = nts(s, "ascii", "strict")
187            n = int(s.strip() or "0", 8)
188        except ValueError:
189            raise InvalidHeaderError("invalid header")
190    return n
191
192def itn(n, digits=8, format=DEFAULT_FORMAT):
193    """Convert a python number to a number field.
194    """
195    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196    # octal digits followed by a null-byte, this allows values up to
197    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
198    # that if necessary. A leading 0o200 or 0o377 byte indicate this
199    # particular encoding, the following digits-1 bytes are a big-endian
200    # base-256 representation. This allows values up to (256**(digits-1))-1.
201    # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202    # number.
203    n = int(n)
204    if 0 <= n < 8 ** (digits - 1):
205        s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
206    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
207        if n >= 0:
208            s = bytearray([0o200])
209        else:
210            s = bytearray([0o377])
211            n = 256 ** digits + n
212
213        for i in range(digits - 1):
214            s.insert(1, n & 0o377)
215            n >>= 8
216    else:
217        raise ValueError("overflow in number field")
218
219    return s
220
221def calc_chksums(buf):
222    """Calculate the checksum for a member's header by summing up all
223       characters except for the chksum field which is treated as if
224       it was filled with spaces. According to the GNU tar sources,
225       some tars (Sun and NeXT) calculate chksum with signed char,
226       which will be different if there are chars in the buffer with
227       the high bit set. So we calculate two checksums, unsigned and
228       signed.
229    """
230    unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
231    signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
232    return unsigned_chksum, signed_chksum
233
234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
235    """Copy length bytes from fileobj src to fileobj dst.
236       If length is None, copy the entire content.
237    """
238    bufsize = bufsize or 16 * 1024
239    if length == 0:
240        return
241    if length is None:
242        shutil.copyfileobj(src, dst, bufsize)
243        return
244
245    blocks, remainder = divmod(length, bufsize)
246    for b in range(blocks):
247        buf = src.read(bufsize)
248        if len(buf) < bufsize:
249            raise exception("unexpected end of data")
250        dst.write(buf)
251
252    if remainder != 0:
253        buf = src.read(remainder)
254        if len(buf) < remainder:
255            raise exception("unexpected end of data")
256        dst.write(buf)
257    return
258
259def _safe_print(s):
260    encoding = getattr(sys.stdout, 'encoding', None)
261    if encoding is not None:
262        s = s.encode(encoding, 'backslashreplace').decode(encoding)
263    print(s, end=' ')
264
265
266class TarError(Exception):
267    """Base exception."""
268    pass
269class ExtractError(TarError):
270    """General exception for extract errors."""
271    pass
272class ReadError(TarError):
273    """Exception for unreadable tar archives."""
274    pass
275class CompressionError(TarError):
276    """Exception for unavailable compression methods."""
277    pass
278class StreamError(TarError):
279    """Exception for unsupported operations on stream-like TarFiles."""
280    pass
281class HeaderError(TarError):
282    """Base exception for header errors."""
283    pass
284class EmptyHeaderError(HeaderError):
285    """Exception for empty headers."""
286    pass
287class TruncatedHeaderError(HeaderError):
288    """Exception for truncated headers."""
289    pass
290class EOFHeaderError(HeaderError):
291    """Exception for end of file headers."""
292    pass
293class InvalidHeaderError(HeaderError):
294    """Exception for invalid headers."""
295    pass
296class SubsequentHeaderError(HeaderError):
297    """Exception for missing and invalid extended headers."""
298    pass
299
300#---------------------------
301# internal stream interface
302#---------------------------
303class _LowLevelFile:
304    """Low-level file object. Supports reading and writing.
305       It is used instead of a regular file object for streaming
306       access.
307    """
308
309    def __init__(self, name, mode):
310        mode = {
311            "r": os.O_RDONLY,
312            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
313        }[mode]
314        if hasattr(os, "O_BINARY"):
315            mode |= os.O_BINARY
316        self.fd = os.open(name, mode, 0o666)
317
318    def close(self):
319        os.close(self.fd)
320
321    def read(self, size):
322        return os.read(self.fd, size)
323
324    def write(self, s):
325        os.write(self.fd, s)
326
327class _Stream:
328    """Class that serves as an adapter between TarFile and
329       a stream-like object.  The stream-like object only
330       needs to have a read() or write() method and is accessed
331       blockwise.  Use of gzip or bzip2 compression is possible.
332       A stream-like object could be for example: sys.stdin,
333       sys.stdout, a socket, a tape device etc.
334
335       _Stream is intended to be used only internally.
336    """
337
338    def __init__(self, name, mode, comptype, fileobj, bufsize):
339        """Construct a _Stream object.
340        """
341        self._extfileobj = True
342        if fileobj is None:
343            fileobj = _LowLevelFile(name, mode)
344            self._extfileobj = False
345
346        if comptype == '*':
347            # Enable transparent compression detection for the
348            # stream interface
349            fileobj = _StreamProxy(fileobj)
350            comptype = fileobj.getcomptype()
351
352        self.name     = name or ""
353        self.mode     = mode
354        self.comptype = comptype
355        self.fileobj  = fileobj
356        self.bufsize  = bufsize
357        self.buf      = b""
358        self.pos      = 0
359        self.closed   = False
360
361        try:
362            if comptype == "gz":
363                try:
364                    import zlib
365                except ImportError:
366                    raise CompressionError("zlib module is not available")
367                self.zlib = zlib
368                self.crc = zlib.crc32(b"")
369                if mode == "r":
370                    self._init_read_gz()
371                    self.exception = zlib.error
372                else:
373                    self._init_write_gz()
374
375            elif comptype == "bz2":
376                try:
377                    import bz2
378                except ImportError:
379                    raise CompressionError("bz2 module is not available")
380                if mode == "r":
381                    self.dbuf = b""
382                    self.cmp = bz2.BZ2Decompressor()
383                    self.exception = OSError
384                else:
385                    self.cmp = bz2.BZ2Compressor()
386
387            elif comptype == "xz":
388                try:
389                    import lzma
390                except ImportError:
391                    raise CompressionError("lzma module is not available")
392                if mode == "r":
393                    self.dbuf = b""
394                    self.cmp = lzma.LZMADecompressor()
395                    self.exception = lzma.LZMAError
396                else:
397                    self.cmp = lzma.LZMACompressor()
398
399            elif comptype != "tar":
400                raise CompressionError("unknown compression type %r" % comptype)
401
402        except:
403            if not self._extfileobj:
404                self.fileobj.close()
405            self.closed = True
406            raise
407
408    def __del__(self):
409        if hasattr(self, "closed") and not self.closed:
410            self.close()
411
412    def _init_write_gz(self):
413        """Initialize for writing with gzip compression.
414        """
415        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
416                                            -self.zlib.MAX_WBITS,
417                                            self.zlib.DEF_MEM_LEVEL,
418                                            0)
419        timestamp = struct.pack("<L", int(time.time()))
420        self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
421        if self.name.endswith(".gz"):
422            self.name = self.name[:-3]
423        # Honor "directory components removed" from RFC1952
424        self.name = os.path.basename(self.name)
425        # RFC1952 says we must use ISO-8859-1 for the FNAME field.
426        self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
427
428    def write(self, s):
429        """Write string s to the stream.
430        """
431        if self.comptype == "gz":
432            self.crc = self.zlib.crc32(s, self.crc)
433        self.pos += len(s)
434        if self.comptype != "tar":
435            s = self.cmp.compress(s)
436        self.__write(s)
437
438    def __write(self, s):
439        """Write string s to the stream if a whole new block
440           is ready to be written.
441        """
442        self.buf += s
443        while len(self.buf) > self.bufsize:
444            self.fileobj.write(self.buf[:self.bufsize])
445            self.buf = self.buf[self.bufsize:]
446
447    def close(self):
448        """Close the _Stream object. No operation should be
449           done on it afterwards.
450        """
451        if self.closed:
452            return
453
454        self.closed = True
455        try:
456            if self.mode == "w" and self.comptype != "tar":
457                self.buf += self.cmp.flush()
458
459            if self.mode == "w" and self.buf:
460                self.fileobj.write(self.buf)
461                self.buf = b""
462                if self.comptype == "gz":
463                    self.fileobj.write(struct.pack("<L", self.crc))
464                    self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
465        finally:
466            if not self._extfileobj:
467                self.fileobj.close()
468
469    def _init_read_gz(self):
470        """Initialize for reading a gzip compressed fileobj.
471        """
472        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
473        self.dbuf = b""
474
475        # taken from gzip.GzipFile with some alterations
476        if self.__read(2) != b"\037\213":
477            raise ReadError("not a gzip file")
478        if self.__read(1) != b"\010":
479            raise CompressionError("unsupported compression method")
480
481        flag = ord(self.__read(1))
482        self.__read(6)
483
484        if flag & 4:
485            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
486            self.read(xlen)
487        if flag & 8:
488            while True:
489                s = self.__read(1)
490                if not s or s == NUL:
491                    break
492        if flag & 16:
493            while True:
494                s = self.__read(1)
495                if not s or s == NUL:
496                    break
497        if flag & 2:
498            self.__read(2)
499
500    def tell(self):
501        """Return the stream's file pointer position.
502        """
503        return self.pos
504
505    def seek(self, pos=0):
506        """Set the stream's file pointer to pos. Negative seeking
507           is forbidden.
508        """
509        if pos - self.pos >= 0:
510            blocks, remainder = divmod(pos - self.pos, self.bufsize)
511            for i in range(blocks):
512                self.read(self.bufsize)
513            self.read(remainder)
514        else:
515            raise StreamError("seeking backwards is not allowed")
516        return self.pos
517
518    def read(self, size):
519        """Return the next size number of bytes from the stream."""
520        assert size is not None
521        buf = self._read(size)
522        self.pos += len(buf)
523        return buf
524
525    def _read(self, size):
526        """Return size bytes from the stream.
527        """
528        if self.comptype == "tar":
529            return self.__read(size)
530
531        c = len(self.dbuf)
532        t = [self.dbuf]
533        while c < size:
534            # Skip underlying buffer to avoid unaligned double buffering.
535            if self.buf:
536                buf = self.buf
537                self.buf = b""
538            else:
539                buf = self.fileobj.read(self.bufsize)
540                if not buf:
541                    break
542            try:
543                buf = self.cmp.decompress(buf)
544            except self.exception:
545                raise ReadError("invalid compressed data")
546            t.append(buf)
547            c += len(buf)
548        t = b"".join(t)
549        self.dbuf = t[size:]
550        return t[:size]
551
552    def __read(self, size):
553        """Return size bytes from stream. If internal buffer is empty,
554           read another block from the stream.
555        """
556        c = len(self.buf)
557        t = [self.buf]
558        while c < size:
559            buf = self.fileobj.read(self.bufsize)
560            if not buf:
561                break
562            t.append(buf)
563            c += len(buf)
564        t = b"".join(t)
565        self.buf = t[size:]
566        return t[:size]
567# class _Stream
568
569class _StreamProxy(object):
570    """Small proxy class that enables transparent compression
571       detection for the Stream interface (mode 'r|*').
572    """
573
574    def __init__(self, fileobj):
575        self.fileobj = fileobj
576        self.buf = self.fileobj.read(BLOCKSIZE)
577
578    def read(self, size):
579        self.read = self.fileobj.read
580        return self.buf
581
582    def getcomptype(self):
583        if self.buf.startswith(b"\x1f\x8b\x08"):
584            return "gz"
585        elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
586            return "bz2"
587        elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
588            return "xz"
589        else:
590            return "tar"
591
592    def close(self):
593        self.fileobj.close()
594# class StreamProxy
595
596#------------------------
597# Extraction file object
598#------------------------
599class _FileInFile(object):
600    """A thin wrapper around an existing file object that
601       provides a part of its data as an individual file
602       object.
603    """
604
605    def __init__(self, fileobj, offset, size, blockinfo=None):
606        self.fileobj = fileobj
607        self.offset = offset
608        self.size = size
609        self.position = 0
610        self.name = getattr(fileobj, "name", None)
611        self.closed = False
612
613        if blockinfo is None:
614            blockinfo = [(0, size)]
615
616        # Construct a map with data and zero blocks.
617        self.map_index = 0
618        self.map = []
619        lastpos = 0
620        realpos = self.offset
621        for offset, size in blockinfo:
622            if offset > lastpos:
623                self.map.append((False, lastpos, offset, None))
624            self.map.append((True, offset, offset + size, realpos))
625            realpos += size
626            lastpos = offset + size
627        if lastpos < self.size:
628            self.map.append((False, lastpos, self.size, None))
629
630    def flush(self):
631        pass
632
633    def readable(self):
634        return True
635
636    def writable(self):
637        return False
638
639    def seekable(self):
640        return self.fileobj.seekable()
641
642    def tell(self):
643        """Return the current file position.
644        """
645        return self.position
646
647    def seek(self, position, whence=io.SEEK_SET):
648        """Seek to a position in the file.
649        """
650        if whence == io.SEEK_SET:
651            self.position = min(max(position, 0), self.size)
652        elif whence == io.SEEK_CUR:
653            if position < 0:
654                self.position = max(self.position + position, 0)
655            else:
656                self.position = min(self.position + position, self.size)
657        elif whence == io.SEEK_END:
658            self.position = max(min(self.size + position, self.size), 0)
659        else:
660            raise ValueError("Invalid argument")
661        return self.position
662
663    def read(self, size=None):
664        """Read data from the file.
665        """
666        if size is None:
667            size = self.size - self.position
668        else:
669            size = min(size, self.size - self.position)
670
671        buf = b""
672        while size > 0:
673            while True:
674                data, start, stop, offset = self.map[self.map_index]
675                if start <= self.position < stop:
676                    break
677                else:
678                    self.map_index += 1
679                    if self.map_index == len(self.map):
680                        self.map_index = 0
681            length = min(size, stop - self.position)
682            if data:
683                self.fileobj.seek(offset + (self.position - start))
684                b = self.fileobj.read(length)
685                if len(b) != length:
686                    raise ReadError("unexpected end of data")
687                buf += b
688            else:
689                buf += NUL * length
690            size -= length
691            self.position += length
692        return buf
693
694    def readinto(self, b):
695        buf = self.read(len(b))
696        b[:len(buf)] = buf
697        return len(buf)
698
699    def close(self):
700        self.closed = True
701#class _FileInFile
702
703class ExFileObject(io.BufferedReader):
704
705    def __init__(self, tarfile, tarinfo):
706        fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
707                tarinfo.size, tarinfo.sparse)
708        super().__init__(fileobj)
709#class ExFileObject
710
711#------------------
712# Exported Classes
713#------------------
714class TarInfo(object):
715    """Informational class which holds the details about an
716       archive member given by a tar header block.
717       TarInfo objects are returned by TarFile.getmember(),
718       TarFile.getmembers() and TarFile.gettarinfo() and are
719       usually created internally.
720    """
721
722    __slots__ = dict(
723        name = 'Name of the archive member.',
724        mode = 'Permission bits.',
725        uid = 'User ID of the user who originally stored this member.',
726        gid = 'Group ID of the user who originally stored this member.',
727        size = 'Size in bytes.',
728        mtime = 'Time of last modification.',
729        chksum = 'Header checksum.',
730        type = ('File type. type is usually one of these constants: '
731                'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
732                'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
733        linkname = ('Name of the target file name, which is only present '
734                    'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
735        uname = 'User name.',
736        gname = 'Group name.',
737        devmajor = 'Device major number.',
738        devminor = 'Device minor number.',
739        offset = 'The tar header starts here.',
740        offset_data = "The file's data starts here.",
741        pax_headers = ('A dictionary containing key-value pairs of an '
742                       'associated pax extended header.'),
743        sparse = 'Sparse member information.',
744        tarfile = None,
745        _sparse_structs = None,
746        _link_target = None,
747        )
748
749    def __init__(self, name=""):
750        """Construct a TarInfo object. name is the optional name
751           of the member.
752        """
753        self.name = name        # member name
754        self.mode = 0o644       # file permissions
755        self.uid = 0            # user id
756        self.gid = 0            # group id
757        self.size = 0           # file size
758        self.mtime = 0          # modification time
759        self.chksum = 0         # header checksum
760        self.type = REGTYPE     # member type
761        self.linkname = ""      # link name
762        self.uname = ""         # user name
763        self.gname = ""         # group name
764        self.devmajor = 0       # device major number
765        self.devminor = 0       # device minor number
766
767        self.offset = 0         # the tar header starts here
768        self.offset_data = 0    # the file's data starts here
769
770        self.sparse = None      # sparse member information
771        self.pax_headers = {}   # pax header information
772
773    @property
774    def path(self):
775        'In pax headers, "name" is called "path".'
776        return self.name
777
778    @path.setter
779    def path(self, name):
780        self.name = name
781
782    @property
783    def linkpath(self):
784        'In pax headers, "linkname" is called "linkpath".'
785        return self.linkname
786
787    @linkpath.setter
788    def linkpath(self, linkname):
789        self.linkname = linkname
790
791    def __repr__(self):
792        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
793
794    def get_info(self):
795        """Return the TarInfo's attributes as a dictionary.
796        """
797        info = {
798            "name":     self.name,
799            "mode":     self.mode & 0o7777,
800            "uid":      self.uid,
801            "gid":      self.gid,
802            "size":     self.size,
803            "mtime":    self.mtime,
804            "chksum":   self.chksum,
805            "type":     self.type,
806            "linkname": self.linkname,
807            "uname":    self.uname,
808            "gname":    self.gname,
809            "devmajor": self.devmajor,
810            "devminor": self.devminor
811        }
812
813        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
814            info["name"] += "/"
815
816        return info
817
818    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
819        """Return a tar header as a string of 512 byte blocks.
820        """
821        info = self.get_info()
822
823        if format == USTAR_FORMAT:
824            return self.create_ustar_header(info, encoding, errors)
825        elif format == GNU_FORMAT:
826            return self.create_gnu_header(info, encoding, errors)
827        elif format == PAX_FORMAT:
828            return self.create_pax_header(info, encoding)
829        else:
830            raise ValueError("invalid format")
831
832    def create_ustar_header(self, info, encoding, errors):
833        """Return the object as a ustar header block.
834        """
835        info["magic"] = POSIX_MAGIC
836
837        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
838            raise ValueError("linkname is too long")
839
840        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
841            info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
842
843        return self._create_header(info, USTAR_FORMAT, encoding, errors)
844
845    def create_gnu_header(self, info, encoding, errors):
846        """Return the object as a GNU header block sequence.
847        """
848        info["magic"] = GNU_MAGIC
849
850        buf = b""
851        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
852            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
853
854        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
855            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
856
857        return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
858
859    def create_pax_header(self, info, encoding):
860        """Return the object as a ustar header block. If it cannot be
861           represented this way, prepend a pax extended header sequence
862           with supplement information.
863        """
864        info["magic"] = POSIX_MAGIC
865        pax_headers = self.pax_headers.copy()
866
867        # Test string fields for values that exceed the field length or cannot
868        # be represented in ASCII encoding.
869        for name, hname, length in (
870                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
871                ("uname", "uname", 32), ("gname", "gname", 32)):
872
873            if hname in pax_headers:
874                # The pax header has priority.
875                continue
876
877            # Try to encode the string as ASCII.
878            try:
879                info[name].encode("ascii", "strict")
880            except UnicodeEncodeError:
881                pax_headers[hname] = info[name]
882                continue
883
884            if len(info[name]) > length:
885                pax_headers[hname] = info[name]
886
887        # Test number fields for values that exceed the field limit or values
888        # that like to be stored as float.
889        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
890            if name in pax_headers:
891                # The pax header has priority. Avoid overflow.
892                info[name] = 0
893                continue
894
895            val = info[name]
896            if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
897                pax_headers[name] = str(val)
898                info[name] = 0
899
900        # Create a pax extended header if necessary.
901        if pax_headers:
902            buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
903        else:
904            buf = b""
905
906        return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
907
908    @classmethod
909    def create_pax_global_header(cls, pax_headers):
910        """Return the object as a pax global header block sequence.
911        """
912        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
913
914    def _posix_split_name(self, name, encoding, errors):
915        """Split a name longer than 100 chars into a prefix
916           and a name part.
917        """
918        components = name.split("/")
919        for i in range(1, len(components)):
920            prefix = "/".join(components[:i])
921            name = "/".join(components[i:])
922            if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
923                    len(name.encode(encoding, errors)) <= LENGTH_NAME:
924                break
925        else:
926            raise ValueError("name is too long")
927
928        return prefix, name
929
930    @staticmethod
931    def _create_header(info, format, encoding, errors):
932        """Return a header block. info is a dictionary with file
933           information, format must be one of the *_FORMAT constants.
934        """
935        parts = [
936            stn(info.get("name", ""), 100, encoding, errors),
937            itn(info.get("mode", 0) & 0o7777, 8, format),
938            itn(info.get("uid", 0), 8, format),
939            itn(info.get("gid", 0), 8, format),
940            itn(info.get("size", 0), 12, format),
941            itn(info.get("mtime", 0), 12, format),
942            b"        ", # checksum field
943            info.get("type", REGTYPE),
944            stn(info.get("linkname", ""), 100, encoding, errors),
945            info.get("magic", POSIX_MAGIC),
946            stn(info.get("uname", ""), 32, encoding, errors),
947            stn(info.get("gname", ""), 32, encoding, errors),
948            itn(info.get("devmajor", 0), 8, format),
949            itn(info.get("devminor", 0), 8, format),
950            stn(info.get("prefix", ""), 155, encoding, errors)
951        ]
952
953        buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
954        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
955        buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
956        return buf
957
958    @staticmethod
959    def _create_payload(payload):
960        """Return the string payload filled with zero bytes
961           up to the next 512 byte border.
962        """
963        blocks, remainder = divmod(len(payload), BLOCKSIZE)
964        if remainder > 0:
965            payload += (BLOCKSIZE - remainder) * NUL
966        return payload
967
968    @classmethod
969    def _create_gnu_long_header(cls, name, type, encoding, errors):
970        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
971           for name.
972        """
973        name = name.encode(encoding, errors) + NUL
974
975        info = {}
976        info["name"] = "././@LongLink"
977        info["type"] = type
978        info["size"] = len(name)
979        info["magic"] = GNU_MAGIC
980
981        # create extended header + name blocks.
982        return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
983                cls._create_payload(name)
984
985    @classmethod
986    def _create_pax_generic_header(cls, pax_headers, type, encoding):
987        """Return a POSIX.1-2008 extended or global header sequence
988           that contains a list of keyword, value pairs. The values
989           must be strings.
990        """
991        # Check if one of the fields contains surrogate characters and thereby
992        # forces hdrcharset=BINARY, see _proc_pax() for more information.
993        binary = False
994        for keyword, value in pax_headers.items():
995            try:
996                value.encode("utf-8", "strict")
997            except UnicodeEncodeError:
998                binary = True
999                break
1000
1001        records = b""
1002        if binary:
1003            # Put the hdrcharset field at the beginning of the header.
1004            records += b"21 hdrcharset=BINARY\n"
1005
1006        for keyword, value in pax_headers.items():
1007            keyword = keyword.encode("utf-8")
1008            if binary:
1009                # Try to restore the original byte representation of `value'.
1010                # Needless to say, that the encoding must match the string.
1011                value = value.encode(encoding, "surrogateescape")
1012            else:
1013                value = value.encode("utf-8")
1014
1015            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1016            n = p = 0
1017            while True:
1018                n = l + len(str(p))
1019                if n == p:
1020                    break
1021                p = n
1022            records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1023
1024        # We use a hardcoded "././@PaxHeader" name like star does
1025        # instead of the one that POSIX recommends.
1026        info = {}
1027        info["name"] = "././@PaxHeader"
1028        info["type"] = type
1029        info["size"] = len(records)
1030        info["magic"] = POSIX_MAGIC
1031
1032        # Create pax header + record blocks.
1033        return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1034                cls._create_payload(records)
1035
1036    @classmethod
1037    def frombuf(cls, buf, encoding, errors):
1038        """Construct a TarInfo object from a 512 byte bytes object.
1039        """
1040        if len(buf) == 0:
1041            raise EmptyHeaderError("empty header")
1042        if len(buf) != BLOCKSIZE:
1043            raise TruncatedHeaderError("truncated header")
1044        if buf.count(NUL) == BLOCKSIZE:
1045            raise EOFHeaderError("end of file header")
1046
1047        chksum = nti(buf[148:156])
1048        if chksum not in calc_chksums(buf):
1049            raise InvalidHeaderError("bad checksum")
1050
1051        obj = cls()
1052        obj.name = nts(buf[0:100], encoding, errors)
1053        obj.mode = nti(buf[100:108])
1054        obj.uid = nti(buf[108:116])
1055        obj.gid = nti(buf[116:124])
1056        obj.size = nti(buf[124:136])
1057        obj.mtime = nti(buf[136:148])
1058        obj.chksum = chksum
1059        obj.type = buf[156:157]
1060        obj.linkname = nts(buf[157:257], encoding, errors)
1061        obj.uname = nts(buf[265:297], encoding, errors)
1062        obj.gname = nts(buf[297:329], encoding, errors)
1063        obj.devmajor = nti(buf[329:337])
1064        obj.devminor = nti(buf[337:345])
1065        prefix = nts(buf[345:500], encoding, errors)
1066
1067        # Old V7 tar format represents a directory as a regular
1068        # file with a trailing slash.
1069        if obj.type == AREGTYPE and obj.name.endswith("/"):
1070            obj.type = DIRTYPE
1071
1072        # The old GNU sparse format occupies some of the unused
1073        # space in the buffer for up to 4 sparse structures.
1074        # Save them for later processing in _proc_sparse().
1075        if obj.type == GNUTYPE_SPARSE:
1076            pos = 386
1077            structs = []
1078            for i in range(4):
1079                try:
1080                    offset = nti(buf[pos:pos + 12])
1081                    numbytes = nti(buf[pos + 12:pos + 24])
1082                except ValueError:
1083                    break
1084                structs.append((offset, numbytes))
1085                pos += 24
1086            isextended = bool(buf[482])
1087            origsize = nti(buf[483:495])
1088            obj._sparse_structs = (structs, isextended, origsize)
1089
1090        # Remove redundant slashes from directories.
1091        if obj.isdir():
1092            obj.name = obj.name.rstrip("/")
1093
1094        # Reconstruct a ustar longname.
1095        if prefix and obj.type not in GNU_TYPES:
1096            obj.name = prefix + "/" + obj.name
1097        return obj
1098
1099    @classmethod
1100    def fromtarfile(cls, tarfile):
1101        """Return the next TarInfo object from TarFile object
1102           tarfile.
1103        """
1104        buf = tarfile.fileobj.read(BLOCKSIZE)
1105        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1106        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1107        return obj._proc_member(tarfile)
1108
1109    #--------------------------------------------------------------------------
1110    # The following are methods that are called depending on the type of a
1111    # member. The entry point is _proc_member() which can be overridden in a
1112    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1113    # implement the following
1114    # operations:
1115    # 1. Set self.offset_data to the position where the data blocks begin,
1116    #    if there is data that follows.
1117    # 2. Set tarfile.offset to the position where the next member's header will
1118    #    begin.
1119    # 3. Return self or another valid TarInfo object.
1120    def _proc_member(self, tarfile):
1121        """Choose the right processing method depending on
1122           the type and call it.
1123        """
1124        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1125            return self._proc_gnulong(tarfile)
1126        elif self.type == GNUTYPE_SPARSE:
1127            return self._proc_sparse(tarfile)
1128        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1129            return self._proc_pax(tarfile)
1130        else:
1131            return self._proc_builtin(tarfile)
1132
1133    def _proc_builtin(self, tarfile):
1134        """Process a builtin type or an unknown type which
1135           will be treated as a regular file.
1136        """
1137        self.offset_data = tarfile.fileobj.tell()
1138        offset = self.offset_data
1139        if self.isreg() or self.type not in SUPPORTED_TYPES:
1140            # Skip the following data blocks.
1141            offset += self._block(self.size)
1142        tarfile.offset = offset
1143
1144        # Patch the TarInfo object with saved global
1145        # header information.
1146        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1147
1148        return self
1149
1150    def _proc_gnulong(self, tarfile):
1151        """Process the blocks that hold a GNU longname
1152           or longlink member.
1153        """
1154        buf = tarfile.fileobj.read(self._block(self.size))
1155
1156        # Fetch the next header and process it.
1157        try:
1158            next = self.fromtarfile(tarfile)
1159        except HeaderError:
1160            raise SubsequentHeaderError("missing or bad subsequent header")
1161
1162        # Patch the TarInfo object from the next header with
1163        # the longname information.
1164        next.offset = self.offset
1165        if self.type == GNUTYPE_LONGNAME:
1166            next.name = nts(buf, tarfile.encoding, tarfile.errors)
1167        elif self.type == GNUTYPE_LONGLINK:
1168            next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1169
1170        return next
1171
1172    def _proc_sparse(self, tarfile):
1173        """Process a GNU sparse header plus extra headers.
1174        """
1175        # We already collected some sparse structures in frombuf().
1176        structs, isextended, origsize = self._sparse_structs
1177        del self._sparse_structs
1178
1179        # Collect sparse structures from extended header blocks.
1180        while isextended:
1181            buf = tarfile.fileobj.read(BLOCKSIZE)
1182            pos = 0
1183            for i in range(21):
1184                try:
1185                    offset = nti(buf[pos:pos + 12])
1186                    numbytes = nti(buf[pos + 12:pos + 24])
1187                except ValueError:
1188                    break
1189                if offset and numbytes:
1190                    structs.append((offset, numbytes))
1191                pos += 24
1192            isextended = bool(buf[504])
1193        self.sparse = structs
1194
1195        self.offset_data = tarfile.fileobj.tell()
1196        tarfile.offset = self.offset_data + self._block(self.size)
1197        self.size = origsize
1198        return self
1199
1200    def _proc_pax(self, tarfile):
1201        """Process an extended or global header as described in
1202           POSIX.1-2008.
1203        """
1204        # Read the header information.
1205        buf = tarfile.fileobj.read(self._block(self.size))
1206
1207        # A pax header stores supplemental information for either
1208        # the following file (extended) or all following files
1209        # (global).
1210        if self.type == XGLTYPE:
1211            pax_headers = tarfile.pax_headers
1212        else:
1213            pax_headers = tarfile.pax_headers.copy()
1214
1215        # Check if the pax header contains a hdrcharset field. This tells us
1216        # the encoding of the path, linkpath, uname and gname fields. Normally,
1217        # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1218        # implementations are allowed to store them as raw binary strings if
1219        # the translation to UTF-8 fails.
1220        match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1221        if match is not None:
1222            pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
1223
1224        # For the time being, we don't care about anything other than "BINARY".
1225        # The only other value that is currently allowed by the standard is
1226        # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1227        hdrcharset = pax_headers.get("hdrcharset")
1228        if hdrcharset == "BINARY":
1229            encoding = tarfile.encoding
1230        else:
1231            encoding = "utf-8"
1232
1233        # Parse pax header information. A record looks like that:
1234        # "%d %s=%s\n" % (length, keyword, value). length is the size
1235        # of the complete record including the length field itself and
1236        # the newline. keyword and value are both UTF-8 encoded strings.
1237        regex = re.compile(br"(\d+) ([^=]+)=")
1238        pos = 0
1239        while True:
1240            match = regex.match(buf, pos)
1241            if not match:
1242                break
1243
1244            length, keyword = match.groups()
1245            length = int(length)
1246            if length == 0:
1247                raise InvalidHeaderError("invalid header")
1248            value = buf[match.end(2) + 1:match.start(1) + length - 1]
1249
1250            # Normally, we could just use "utf-8" as the encoding and "strict"
1251            # as the error handler, but we better not take the risk. For
1252            # example, GNU tar <= 1.23 is known to store filenames it cannot
1253            # translate to UTF-8 as raw strings (unfortunately without a
1254            # hdrcharset=BINARY header).
1255            # We first try the strict standard encoding, and if that fails we
1256            # fall back on the user's encoding and error handler.
1257            keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
1258                    tarfile.errors)
1259            if keyword in PAX_NAME_FIELDS:
1260                value = self._decode_pax_field(value, encoding, tarfile.encoding,
1261                        tarfile.errors)
1262            else:
1263                value = self._decode_pax_field(value, "utf-8", "utf-8",
1264                        tarfile.errors)
1265
1266            pax_headers[keyword] = value
1267            pos += length
1268
1269        # Fetch the next header.
1270        try:
1271            next = self.fromtarfile(tarfile)
1272        except HeaderError:
1273            raise SubsequentHeaderError("missing or bad subsequent header")
1274
1275        # Process GNU sparse information.
1276        if "GNU.sparse.map" in pax_headers:
1277            # GNU extended sparse format version 0.1.
1278            self._proc_gnusparse_01(next, pax_headers)
1279
1280        elif "GNU.sparse.size" in pax_headers:
1281            # GNU extended sparse format version 0.0.
1282            self._proc_gnusparse_00(next, pax_headers, buf)
1283
1284        elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1285            # GNU extended sparse format version 1.0.
1286            self._proc_gnusparse_10(next, pax_headers, tarfile)
1287
1288        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1289            # Patch the TarInfo object with the extended header info.
1290            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1291            next.offset = self.offset
1292
1293            if "size" in pax_headers:
1294                # If the extended header replaces the size field,
1295                # we need to recalculate the offset where the next
1296                # header starts.
1297                offset = next.offset_data
1298                if next.isreg() or next.type not in SUPPORTED_TYPES:
1299                    offset += next._block(next.size)
1300                tarfile.offset = offset
1301
1302        return next
1303
1304    def _proc_gnusparse_00(self, next, pax_headers, buf):
1305        """Process a GNU tar extended sparse header, version 0.0.
1306        """
1307        offsets = []
1308        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1309            offsets.append(int(match.group(1)))
1310        numbytes = []
1311        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1312            numbytes.append(int(match.group(1)))
1313        next.sparse = list(zip(offsets, numbytes))
1314
1315    def _proc_gnusparse_01(self, next, pax_headers):
1316        """Process a GNU tar extended sparse header, version 0.1.
1317        """
1318        sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1319        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1320
1321    def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1322        """Process a GNU tar extended sparse header, version 1.0.
1323        """
1324        fields = None
1325        sparse = []
1326        buf = tarfile.fileobj.read(BLOCKSIZE)
1327        fields, buf = buf.split(b"\n", 1)
1328        fields = int(fields)
1329        while len(sparse) < fields * 2:
1330            if b"\n" not in buf:
1331                buf += tarfile.fileobj.read(BLOCKSIZE)
1332            number, buf = buf.split(b"\n", 1)
1333            sparse.append(int(number))
1334        next.offset_data = tarfile.fileobj.tell()
1335        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1336
1337    def _apply_pax_info(self, pax_headers, encoding, errors):
1338        """Replace fields with supplemental information from a previous
1339           pax extended or global header.
1340        """
1341        for keyword, value in pax_headers.items():
1342            if keyword == "GNU.sparse.name":
1343                setattr(self, "path", value)
1344            elif keyword == "GNU.sparse.size":
1345                setattr(self, "size", int(value))
1346            elif keyword == "GNU.sparse.realsize":
1347                setattr(self, "size", int(value))
1348            elif keyword in PAX_FIELDS:
1349                if keyword in PAX_NUMBER_FIELDS:
1350                    try:
1351                        value = PAX_NUMBER_FIELDS[keyword](value)
1352                    except ValueError:
1353                        value = 0
1354                if keyword == "path":
1355                    value = value.rstrip("/")
1356                setattr(self, keyword, value)
1357
1358        self.pax_headers = pax_headers.copy()
1359
1360    def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1361        """Decode a single field from a pax record.
1362        """
1363        try:
1364            return value.decode(encoding, "strict")
1365        except UnicodeDecodeError:
1366            return value.decode(fallback_encoding, fallback_errors)
1367
1368    def _block(self, count):
1369        """Round up a byte count by BLOCKSIZE and return it,
1370           e.g. _block(834) => 1024.
1371        """
1372        blocks, remainder = divmod(count, BLOCKSIZE)
1373        if remainder:
1374            blocks += 1
1375        return blocks * BLOCKSIZE
1376
1377    def isreg(self):
1378        'Return True if the Tarinfo object is a regular file.'
1379        return self.type in REGULAR_TYPES
1380
1381    def isfile(self):
1382        'Return True if the Tarinfo object is a regular file.'
1383        return self.isreg()
1384
1385    def isdir(self):
1386        'Return True if it is a directory.'
1387        return self.type == DIRTYPE
1388
1389    def issym(self):
1390        'Return True if it is a symbolic link.'
1391        return self.type == SYMTYPE
1392
1393    def islnk(self):
1394        'Return True if it is a hard link.'
1395        return self.type == LNKTYPE
1396
1397    def ischr(self):
1398        'Return True if it is a character device.'
1399        return self.type == CHRTYPE
1400
1401    def isblk(self):
1402        'Return True if it is a block device.'
1403        return self.type == BLKTYPE
1404
1405    def isfifo(self):
1406        'Return True if it is a FIFO.'
1407        return self.type == FIFOTYPE
1408
1409    def issparse(self):
1410        return self.sparse is not None
1411
1412    def isdev(self):
1413        'Return True if it is one of character device, block device or FIFO.'
1414        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1415# class TarInfo
1416
1417class TarFile(object):
1418    """The TarFile Class provides an interface to tar archives.
1419    """
1420
1421    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1422
1423    dereference = False         # If true, add content of linked file to the
1424                                # tar file, else the link.
1425
1426    ignore_zeros = False        # If true, skips empty or invalid blocks and
1427                                # continues processing.
1428
1429    errorlevel = 1              # If 0, fatal errors only appear in debug
1430                                # messages (if debug >= 0). If > 0, errors
1431                                # are passed to the caller as exceptions.
1432
1433    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1434
1435    encoding = ENCODING         # Encoding for 8-bit character strings.
1436
1437    errors = None               # Error handler for unicode conversion.
1438
1439    tarinfo = TarInfo           # The default TarInfo class to use.
1440
1441    fileobject = ExFileObject   # The file-object for extractfile().
1442
1443    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1444            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1445            errors="surrogateescape", pax_headers=None, debug=None,
1446            errorlevel=None, copybufsize=None):
1447        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1448           read from an existing archive, 'a' to append data to an existing
1449           file or 'w' to create a new file overwriting an existing one. `mode'
1450           defaults to 'r'.
1451           If `fileobj' is given, it is used for reading or writing data. If it
1452           can be determined, `mode' is overridden by `fileobj's mode.
1453           `fileobj' is not closed, when TarFile is closed.
1454        """
1455        modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1456        if mode not in modes:
1457            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1458        self.mode = mode
1459        self._mode = modes[mode]
1460
1461        if not fileobj:
1462            if self.mode == "a" and not os.path.exists(name):
1463                # Create nonexistent files in append mode.
1464                self.mode = "w"
1465                self._mode = "wb"
1466            fileobj = bltn_open(name, self._mode)
1467            self._extfileobj = False
1468        else:
1469            if (name is None and hasattr(fileobj, "name") and
1470                isinstance(fileobj.name, (str, bytes))):
1471                name = fileobj.name
1472            if hasattr(fileobj, "mode"):
1473                self._mode = fileobj.mode
1474            self._extfileobj = True
1475        self.name = os.path.abspath(name) if name else None
1476        self.fileobj = fileobj
1477
1478        # Init attributes.
1479        if format is not None:
1480            self.format = format
1481        if tarinfo is not None:
1482            self.tarinfo = tarinfo
1483        if dereference is not None:
1484            self.dereference = dereference
1485        if ignore_zeros is not None:
1486            self.ignore_zeros = ignore_zeros
1487        if encoding is not None:
1488            self.encoding = encoding
1489        self.errors = errors
1490
1491        if pax_headers is not None and self.format == PAX_FORMAT:
1492            self.pax_headers = pax_headers
1493        else:
1494            self.pax_headers = {}
1495
1496        if debug is not None:
1497            self.debug = debug
1498        if errorlevel is not None:
1499            self.errorlevel = errorlevel
1500
1501        # Init datastructures.
1502        self.copybufsize = copybufsize
1503        self.closed = False
1504        self.members = []       # list of members as TarInfo objects
1505        self._loaded = False    # flag if all members have been read
1506        self.offset = self.fileobj.tell()
1507                                # current position in the archive file
1508        self.inodes = {}        # dictionary caching the inodes of
1509                                # archive members already added
1510
1511        try:
1512            if self.mode == "r":
1513                self.firstmember = None
1514                self.firstmember = self.next()
1515
1516            if self.mode == "a":
1517                # Move to the end of the archive,
1518                # before the first empty block.
1519                while True:
1520                    self.fileobj.seek(self.offset)
1521                    try:
1522                        tarinfo = self.tarinfo.fromtarfile(self)
1523                        self.members.append(tarinfo)
1524                    except EOFHeaderError:
1525                        self.fileobj.seek(self.offset)
1526                        break
1527                    except HeaderError as e:
1528                        raise ReadError(str(e))
1529
1530            if self.mode in ("a", "w", "x"):
1531                self._loaded = True
1532
1533                if self.pax_headers:
1534                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1535                    self.fileobj.write(buf)
1536                    self.offset += len(buf)
1537        except:
1538            if not self._extfileobj:
1539                self.fileobj.close()
1540            self.closed = True
1541            raise
1542
1543    #--------------------------------------------------------------------------
1544    # Below are the classmethods which act as alternate constructors to the
1545    # TarFile class. The open() method is the only one that is needed for
1546    # public use; it is the "super"-constructor and is able to select an
1547    # adequate "sub"-constructor for a particular compression using the mapping
1548    # from OPEN_METH.
1549    #
1550    # This concept allows one to subclass TarFile without losing the comfort of
1551    # the super-constructor. A sub-constructor is registered and made available
1552    # by adding it to the mapping in OPEN_METH.
1553
1554    @classmethod
1555    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1556        """Open a tar archive for reading, writing or appending. Return
1557           an appropriate TarFile class.
1558
1559           mode:
1560           'r' or 'r:*' open for reading with transparent compression
1561           'r:'         open for reading exclusively uncompressed
1562           'r:gz'       open for reading with gzip compression
1563           'r:bz2'      open for reading with bzip2 compression
1564           'r:xz'       open for reading with lzma compression
1565           'a' or 'a:'  open for appending, creating the file if necessary
1566           'w' or 'w:'  open for writing without compression
1567           'w:gz'       open for writing with gzip compression
1568           'w:bz2'      open for writing with bzip2 compression
1569           'w:xz'       open for writing with lzma compression
1570
1571           'x' or 'x:'  create a tarfile exclusively without compression, raise
1572                        an exception if the file is already created
1573           'x:gz'       create a gzip compressed tarfile, raise an exception
1574                        if the file is already created
1575           'x:bz2'      create a bzip2 compressed tarfile, raise an exception
1576                        if the file is already created
1577           'x:xz'       create an lzma compressed tarfile, raise an exception
1578                        if the file is already created
1579
1580           'r|*'        open a stream of tar blocks with transparent compression
1581           'r|'         open an uncompressed stream of tar blocks for reading
1582           'r|gz'       open a gzip compressed stream of tar blocks
1583           'r|bz2'      open a bzip2 compressed stream of tar blocks
1584           'r|xz'       open an lzma compressed stream of tar blocks
1585           'w|'         open an uncompressed stream for writing
1586           'w|gz'       open a gzip compressed stream for writing
1587           'w|bz2'      open a bzip2 compressed stream for writing
1588           'w|xz'       open an lzma compressed stream for writing
1589        """
1590
1591        if not name and not fileobj:
1592            raise ValueError("nothing to open")
1593
1594        if mode in ("r", "r:*"):
1595            # Find out which *open() is appropriate for opening the file.
1596            def not_compressed(comptype):
1597                return cls.OPEN_METH[comptype] == 'taropen'
1598            for comptype in sorted(cls.OPEN_METH, key=not_compressed):
1599                func = getattr(cls, cls.OPEN_METH[comptype])
1600                if fileobj is not None:
1601                    saved_pos = fileobj.tell()
1602                try:
1603                    return func(name, "r", fileobj, **kwargs)
1604                except (ReadError, CompressionError):
1605                    if fileobj is not None:
1606                        fileobj.seek(saved_pos)
1607                    continue
1608            raise ReadError("file could not be opened successfully")
1609
1610        elif ":" in mode:
1611            filemode, comptype = mode.split(":", 1)
1612            filemode = filemode or "r"
1613            comptype = comptype or "tar"
1614
1615            # Select the *open() function according to
1616            # given compression.
1617            if comptype in cls.OPEN_METH:
1618                func = getattr(cls, cls.OPEN_METH[comptype])
1619            else:
1620                raise CompressionError("unknown compression type %r" % comptype)
1621            return func(name, filemode, fileobj, **kwargs)
1622
1623        elif "|" in mode:
1624            filemode, comptype = mode.split("|", 1)
1625            filemode = filemode or "r"
1626            comptype = comptype or "tar"
1627
1628            if filemode not in ("r", "w"):
1629                raise ValueError("mode must be 'r' or 'w'")
1630
1631            stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1632            try:
1633                t = cls(name, filemode, stream, **kwargs)
1634            except:
1635                stream.close()
1636                raise
1637            t._extfileobj = False
1638            return t
1639
1640        elif mode in ("a", "w", "x"):
1641            return cls.taropen(name, mode, fileobj, **kwargs)
1642
1643        raise ValueError("undiscernible mode")
1644
1645    @classmethod
1646    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1647        """Open uncompressed tar archive name for reading or writing.
1648        """
1649        if mode not in ("r", "a", "w", "x"):
1650            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1651        return cls(name, mode, fileobj, **kwargs)
1652
1653    @classmethod
1654    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1655        """Open gzip compressed tar archive name for reading or writing.
1656           Appending is not allowed.
1657        """
1658        if mode not in ("r", "w", "x"):
1659            raise ValueError("mode must be 'r', 'w' or 'x'")
1660
1661        try:
1662            from gzip import GzipFile
1663        except ImportError:
1664            raise CompressionError("gzip module is not available")
1665
1666        try:
1667            fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1668        except OSError:
1669            if fileobj is not None and mode == 'r':
1670                raise ReadError("not a gzip file")
1671            raise
1672
1673        try:
1674            t = cls.taropen(name, mode, fileobj, **kwargs)
1675        except OSError:
1676            fileobj.close()
1677            if mode == 'r':
1678                raise ReadError("not a gzip file")
1679            raise
1680        except:
1681            fileobj.close()
1682            raise
1683        t._extfileobj = False
1684        return t
1685
1686    @classmethod
1687    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1688        """Open bzip2 compressed tar archive name for reading or writing.
1689           Appending is not allowed.
1690        """
1691        if mode not in ("r", "w", "x"):
1692            raise ValueError("mode must be 'r', 'w' or 'x'")
1693
1694        try:
1695            from bz2 import BZ2File
1696        except ImportError:
1697            raise CompressionError("bz2 module is not available")
1698
1699        fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
1700
1701        try:
1702            t = cls.taropen(name, mode, fileobj, **kwargs)
1703        except (OSError, EOFError):
1704            fileobj.close()
1705            if mode == 'r':
1706                raise ReadError("not a bzip2 file")
1707            raise
1708        except:
1709            fileobj.close()
1710            raise
1711        t._extfileobj = False
1712        return t
1713
1714    @classmethod
1715    def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1716        """Open lzma compressed tar archive name for reading or writing.
1717           Appending is not allowed.
1718        """
1719        if mode not in ("r", "w", "x"):
1720            raise ValueError("mode must be 'r', 'w' or 'x'")
1721
1722        try:
1723            from lzma import LZMAFile, LZMAError
1724        except ImportError:
1725            raise CompressionError("lzma module is not available")
1726
1727        fileobj = LZMAFile(fileobj or name, mode, preset=preset)
1728
1729        try:
1730            t = cls.taropen(name, mode, fileobj, **kwargs)
1731        except (LZMAError, EOFError):
1732            fileobj.close()
1733            if mode == 'r':
1734                raise ReadError("not an lzma file")
1735            raise
1736        except:
1737            fileobj.close()
1738            raise
1739        t._extfileobj = False
1740        return t
1741
1742    # All *open() methods are registered here.
1743    OPEN_METH = {
1744        "tar": "taropen",   # uncompressed tar
1745        "gz":  "gzopen",    # gzip compressed tar
1746        "bz2": "bz2open",   # bzip2 compressed tar
1747        "xz":  "xzopen"     # lzma compressed tar
1748    }
1749
1750    #--------------------------------------------------------------------------
1751    # The public methods which TarFile provides:
1752
1753    def close(self):
1754        """Close the TarFile. In write-mode, two finishing zero blocks are
1755           appended to the archive.
1756        """
1757        if self.closed:
1758            return
1759
1760        self.closed = True
1761        try:
1762            if self.mode in ("a", "w", "x"):
1763                self.fileobj.write(NUL * (BLOCKSIZE * 2))
1764                self.offset += (BLOCKSIZE * 2)
1765                # fill up the end with zero-blocks
1766                # (like option -b20 for tar does)
1767                blocks, remainder = divmod(self.offset, RECORDSIZE)
1768                if remainder > 0:
1769                    self.fileobj.write(NUL * (RECORDSIZE - remainder))
1770        finally:
1771            if not self._extfileobj:
1772                self.fileobj.close()
1773
1774    def getmember(self, name):
1775        """Return a TarInfo object for member `name'. If `name' can not be
1776           found in the archive, KeyError is raised. If a member occurs more
1777           than once in the archive, its last occurrence is assumed to be the
1778           most up-to-date version.
1779        """
1780        tarinfo = self._getmember(name)
1781        if tarinfo is None:
1782            raise KeyError("filename %r not found" % name)
1783        return tarinfo
1784
1785    def getmembers(self):
1786        """Return the members of the archive as a list of TarInfo objects. The
1787           list has the same order as the members in the archive.
1788        """
1789        self._check()
1790        if not self._loaded:    # if we want to obtain a list of
1791            self._load()        # all members, we first have to
1792                                # scan the whole archive.
1793        return self.members
1794
1795    def getnames(self):
1796        """Return the members of the archive as a list of their names. It has
1797           the same order as the list returned by getmembers().
1798        """
1799        return [tarinfo.name for tarinfo in self.getmembers()]
1800
1801    def gettarinfo(self, name=None, arcname=None, fileobj=None):
1802        """Create a TarInfo object from the result of os.stat or equivalent
1803           on an existing file. The file is either named by `name', or
1804           specified as a file object `fileobj' with a file descriptor. If
1805           given, `arcname' specifies an alternative name for the file in the
1806           archive, otherwise, the name is taken from the 'name' attribute of
1807           'fileobj', or the 'name' argument. The name should be a text
1808           string.
1809        """
1810        self._check("awx")
1811
1812        # When fileobj is given, replace name by
1813        # fileobj's real name.
1814        if fileobj is not None:
1815            name = fileobj.name
1816
1817        # Building the name of the member in the archive.
1818        # Backward slashes are converted to forward slashes,
1819        # Absolute paths are turned to relative paths.
1820        if arcname is None:
1821            arcname = name
1822        drv, arcname = os.path.splitdrive(arcname)
1823        arcname = arcname.replace(os.sep, "/")
1824        arcname = arcname.lstrip("/")
1825
1826        # Now, fill the TarInfo object with
1827        # information specific for the file.
1828        tarinfo = self.tarinfo()
1829        tarinfo.tarfile = self  # Not needed
1830
1831        # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
1832        if fileobj is None:
1833            if not self.dereference:
1834                statres = os.lstat(name)
1835            else:
1836                statres = os.stat(name)
1837        else:
1838            statres = os.fstat(fileobj.fileno())
1839        linkname = ""
1840
1841        stmd = statres.st_mode
1842        if stat.S_ISREG(stmd):
1843            inode = (statres.st_ino, statres.st_dev)
1844            if not self.dereference and statres.st_nlink > 1 and \
1845                    inode in self.inodes and arcname != self.inodes[inode]:
1846                # Is it a hardlink to an already
1847                # archived file?
1848                type = LNKTYPE
1849                linkname = self.inodes[inode]
1850            else:
1851                # The inode is added only if its valid.
1852                # For win32 it is always 0.
1853                type = REGTYPE
1854                if inode[0]:
1855                    self.inodes[inode] = arcname
1856        elif stat.S_ISDIR(stmd):
1857            type = DIRTYPE
1858        elif stat.S_ISFIFO(stmd):
1859            type = FIFOTYPE
1860        elif stat.S_ISLNK(stmd):
1861            type = SYMTYPE
1862            linkname = os.readlink(name)
1863        elif stat.S_ISCHR(stmd):
1864            type = CHRTYPE
1865        elif stat.S_ISBLK(stmd):
1866            type = BLKTYPE
1867        else:
1868            return None
1869
1870        # Fill the TarInfo object with all
1871        # information we can get.
1872        tarinfo.name = arcname
1873        tarinfo.mode = stmd
1874        tarinfo.uid = statres.st_uid
1875        tarinfo.gid = statres.st_gid
1876        if type == REGTYPE:
1877            tarinfo.size = statres.st_size
1878        else:
1879            tarinfo.size = 0
1880        tarinfo.mtime = statres.st_mtime
1881        tarinfo.type = type
1882        tarinfo.linkname = linkname
1883        if pwd:
1884            try:
1885                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1886            except KeyError:
1887                pass
1888        if grp:
1889            try:
1890                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1891            except KeyError:
1892                pass
1893
1894        if type in (CHRTYPE, BLKTYPE):
1895            if hasattr(os, "major") and hasattr(os, "minor"):
1896                tarinfo.devmajor = os.major(statres.st_rdev)
1897                tarinfo.devminor = os.minor(statres.st_rdev)
1898        return tarinfo
1899
1900    def list(self, verbose=True, *, members=None):
1901        """Print a table of contents to sys.stdout. If `verbose' is False, only
1902           the names of the members are printed. If it is True, an `ls -l'-like
1903           output is produced. `members' is optional and must be a subset of the
1904           list returned by getmembers().
1905        """
1906        self._check()
1907
1908        if members is None:
1909            members = self
1910        for tarinfo in members:
1911            if verbose:
1912                _safe_print(stat.filemode(tarinfo.mode))
1913                _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1914                                       tarinfo.gname or tarinfo.gid))
1915                if tarinfo.ischr() or tarinfo.isblk():
1916                    _safe_print("%10s" %
1917                            ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
1918                else:
1919                    _safe_print("%10d" % tarinfo.size)
1920                _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1921                            % time.localtime(tarinfo.mtime)[:6])
1922
1923            _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
1924
1925            if verbose:
1926                if tarinfo.issym():
1927                    _safe_print("-> " + tarinfo.linkname)
1928                if tarinfo.islnk():
1929                    _safe_print("link to " + tarinfo.linkname)
1930            print()
1931
1932    def add(self, name, arcname=None, recursive=True, *, filter=None):
1933        """Add the file `name' to the archive. `name' may be any type of file
1934           (directory, fifo, symbolic link, etc.). If given, `arcname'
1935           specifies an alternative name for the file in the archive.
1936           Directories are added recursively by default. This can be avoided by
1937           setting `recursive' to False. `filter' is a function
1938           that expects a TarInfo object argument and returns the changed
1939           TarInfo object, if it returns None the TarInfo object will be
1940           excluded from the archive.
1941        """
1942        self._check("awx")
1943
1944        if arcname is None:
1945            arcname = name
1946
1947        # Skip if somebody tries to archive the archive...
1948        if self.name is not None and os.path.abspath(name) == self.name:
1949            self._dbg(2, "tarfile: Skipped %r" % name)
1950            return
1951
1952        self._dbg(1, name)
1953
1954        # Create a TarInfo object from the file.
1955        tarinfo = self.gettarinfo(name, arcname)
1956
1957        if tarinfo is None:
1958            self._dbg(1, "tarfile: Unsupported type %r" % name)
1959            return
1960
1961        # Change or exclude the TarInfo object.
1962        if filter is not None:
1963            tarinfo = filter(tarinfo)
1964            if tarinfo is None:
1965                self._dbg(2, "tarfile: Excluded %r" % name)
1966                return
1967
1968        # Append the tar header and data to the archive.
1969        if tarinfo.isreg():
1970            with bltn_open(name, "rb") as f:
1971                self.addfile(tarinfo, f)
1972
1973        elif tarinfo.isdir():
1974            self.addfile(tarinfo)
1975            if recursive:
1976                for f in sorted(os.listdir(name)):
1977                    self.add(os.path.join(name, f), os.path.join(arcname, f),
1978                            recursive, filter=filter)
1979
1980        else:
1981            self.addfile(tarinfo)
1982
1983    def addfile(self, tarinfo, fileobj=None):
1984        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1985           given, it should be a binary file, and tarinfo.size bytes are read
1986           from it and added to the archive. You can create TarInfo objects
1987           directly, or by using gettarinfo().
1988        """
1989        self._check("awx")
1990
1991        tarinfo = copy.copy(tarinfo)
1992
1993        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
1994        self.fileobj.write(buf)
1995        self.offset += len(buf)
1996        bufsize=self.copybufsize
1997        # If there's data to follow, append it.
1998        if fileobj is not None:
1999            copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
2000            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2001            if remainder > 0:
2002                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2003                blocks += 1
2004            self.offset += blocks * BLOCKSIZE
2005
2006        self.members.append(tarinfo)
2007
2008    def extractall(self, path=".", members=None, *, numeric_owner=False):
2009        """Extract all members from the archive to the current working
2010           directory and set owner, modification time and permissions on
2011           directories afterwards. `path' specifies a different directory
2012           to extract to. `members' is optional and must be a subset of the
2013           list returned by getmembers(). If `numeric_owner` is True, only
2014           the numbers for user/group names are used and not the names.
2015        """
2016        directories = []
2017
2018        if members is None:
2019            members = self
2020
2021        for tarinfo in members:
2022            if tarinfo.isdir():
2023                # Extract directories with a safe mode.
2024                directories.append(tarinfo)
2025                tarinfo = copy.copy(tarinfo)
2026                tarinfo.mode = 0o700
2027            # Do not set_attrs directories, as we will do that further down
2028            self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2029                         numeric_owner=numeric_owner)
2030
2031        # Reverse sort directories.
2032        directories.sort(key=lambda a: a.name)
2033        directories.reverse()
2034
2035        # Set correct owner, mtime and filemode on directories.
2036        for tarinfo in directories:
2037            dirpath = os.path.join(path, tarinfo.name)
2038            try:
2039                self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
2040                self.utime(tarinfo, dirpath)
2041                self.chmod(tarinfo, dirpath)
2042            except ExtractError as e:
2043                if self.errorlevel > 1:
2044                    raise
2045                else:
2046                    self._dbg(1, "tarfile: %s" % e)
2047
2048    def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
2049        """Extract a member from the archive to the current working directory,
2050           using its full name. Its file information is extracted as accurately
2051           as possible. `member' may be a filename or a TarInfo object. You can
2052           specify a different directory using `path'. File attributes (owner,
2053           mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2054           is True, only the numbers for user/group names are used and not
2055           the names.
2056        """
2057        self._check("r")
2058
2059        if isinstance(member, str):
2060            tarinfo = self.getmember(member)
2061        else:
2062            tarinfo = member
2063
2064        # Prepare the link target for makelink().
2065        if tarinfo.islnk():
2066            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2067
2068        try:
2069            self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2070                                 set_attrs=set_attrs,
2071                                 numeric_owner=numeric_owner)
2072        except OSError as e:
2073            if self.errorlevel > 0:
2074                raise
2075            else:
2076                if e.filename is None:
2077                    self._dbg(1, "tarfile: %s" % e.strerror)
2078                else:
2079                    self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2080        except ExtractError as e:
2081            if self.errorlevel > 1:
2082                raise
2083            else:
2084                self._dbg(1, "tarfile: %s" % e)
2085
2086    def extractfile(self, member):
2087        """Extract a member from the archive as a file object. `member' may be
2088           a filename or a TarInfo object. If `member' is a regular file or a
2089           link, an io.BufferedReader object is returned. Otherwise, None is
2090           returned.
2091        """
2092        self._check("r")
2093
2094        if isinstance(member, str):
2095            tarinfo = self.getmember(member)
2096        else:
2097            tarinfo = member
2098
2099        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2100            # Members with unknown types are treated as regular files.
2101            return self.fileobject(self, tarinfo)
2102
2103        elif tarinfo.islnk() or tarinfo.issym():
2104            if isinstance(self.fileobj, _Stream):
2105                # A small but ugly workaround for the case that someone tries
2106                # to extract a (sym)link as a file-object from a non-seekable
2107                # stream of tar blocks.
2108                raise StreamError("cannot extract (sym)link as file object")
2109            else:
2110                # A (sym)link's file object is its target's file object.
2111                return self.extractfile(self._find_link_target(tarinfo))
2112        else:
2113            # If there's no data associated with the member (directory, chrdev,
2114            # blkdev, etc.), return None instead of a file object.
2115            return None
2116
2117    def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2118                        numeric_owner=False):
2119        """Extract the TarInfo object tarinfo to a physical
2120           file called targetpath.
2121        """
2122        # Fetch the TarInfo object for the given name
2123        # and build the destination pathname, replacing
2124        # forward slashes to platform specific separators.
2125        targetpath = targetpath.rstrip("/")
2126        targetpath = targetpath.replace("/", os.sep)
2127
2128        # Create all upper directories.
2129        upperdirs = os.path.dirname(targetpath)
2130        if upperdirs and not os.path.exists(upperdirs):
2131            # Create directories that are not part of the archive with
2132            # default permissions.
2133            os.makedirs(upperdirs)
2134
2135        if tarinfo.islnk() or tarinfo.issym():
2136            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2137        else:
2138            self._dbg(1, tarinfo.name)
2139
2140        if tarinfo.isreg():
2141            self.makefile(tarinfo, targetpath)
2142        elif tarinfo.isdir():
2143            self.makedir(tarinfo, targetpath)
2144        elif tarinfo.isfifo():
2145            self.makefifo(tarinfo, targetpath)
2146        elif tarinfo.ischr() or tarinfo.isblk():
2147            self.makedev(tarinfo, targetpath)
2148        elif tarinfo.islnk() or tarinfo.issym():
2149            self.makelink(tarinfo, targetpath)
2150        elif tarinfo.type not in SUPPORTED_TYPES:
2151            self.makeunknown(tarinfo, targetpath)
2152        else:
2153            self.makefile(tarinfo, targetpath)
2154
2155        if set_attrs:
2156            self.chown(tarinfo, targetpath, numeric_owner)
2157            if not tarinfo.issym():
2158                self.chmod(tarinfo, targetpath)
2159                self.utime(tarinfo, targetpath)
2160
2161    #--------------------------------------------------------------------------
2162    # Below are the different file methods. They are called via
2163    # _extract_member() when extract() is called. They can be replaced in a
2164    # subclass to implement other functionality.
2165
2166    def makedir(self, tarinfo, targetpath):
2167        """Make a directory called targetpath.
2168        """
2169        try:
2170            # Use a safe mode for the directory, the real mode is set
2171            # later in _extract_member().
2172            os.mkdir(targetpath, 0o700)
2173        except FileExistsError:
2174            pass
2175
2176    def makefile(self, tarinfo, targetpath):
2177        """Make a file called targetpath.
2178        """
2179        source = self.fileobj
2180        source.seek(tarinfo.offset_data)
2181        bufsize = self.copybufsize
2182        with bltn_open(targetpath, "wb") as target:
2183            if tarinfo.sparse is not None:
2184                for offset, size in tarinfo.sparse:
2185                    target.seek(offset)
2186                    copyfileobj(source, target, size, ReadError, bufsize)
2187                target.seek(tarinfo.size)
2188                target.truncate()
2189            else:
2190                copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
2191
2192    def makeunknown(self, tarinfo, targetpath):
2193        """Make a file from a TarInfo object with an unknown type
2194           at targetpath.
2195        """
2196        self.makefile(tarinfo, targetpath)
2197        self._dbg(1, "tarfile: Unknown file type %r, " \
2198                     "extracted as regular file." % tarinfo.type)
2199
2200    def makefifo(self, tarinfo, targetpath):
2201        """Make a fifo called targetpath.
2202        """
2203        if hasattr(os, "mkfifo"):
2204            os.mkfifo(targetpath)
2205        else:
2206            raise ExtractError("fifo not supported by system")
2207
2208    def makedev(self, tarinfo, targetpath):
2209        """Make a character or block device called targetpath.
2210        """
2211        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2212            raise ExtractError("special devices not supported by system")
2213
2214        mode = tarinfo.mode
2215        if tarinfo.isblk():
2216            mode |= stat.S_IFBLK
2217        else:
2218            mode |= stat.S_IFCHR
2219
2220        os.mknod(targetpath, mode,
2221                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2222
2223    def makelink(self, tarinfo, targetpath):
2224        """Make a (symbolic) link called targetpath. If it cannot be created
2225          (platform limitation), we try to make a copy of the referenced file
2226          instead of a link.
2227        """
2228        try:
2229            # For systems that support symbolic and hard links.
2230            if tarinfo.issym():
2231                if os.path.lexists(targetpath):
2232                    # Avoid FileExistsError on following os.symlink.
2233                    os.unlink(targetpath)
2234                os.symlink(tarinfo.linkname, targetpath)
2235            else:
2236                # See extract().
2237                if os.path.exists(tarinfo._link_target):
2238                    os.link(tarinfo._link_target, targetpath)
2239                else:
2240                    self._extract_member(self._find_link_target(tarinfo),
2241                                         targetpath)
2242        except symlink_exception:
2243            try:
2244                self._extract_member(self._find_link_target(tarinfo),
2245                                     targetpath)
2246            except KeyError:
2247                raise ExtractError("unable to resolve link inside archive")
2248
2249    def chown(self, tarinfo, targetpath, numeric_owner):
2250        """Set owner of targetpath according to tarinfo. If numeric_owner
2251           is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2252           is False, fall back to .gid/.uid when the search based on name
2253           fails.
2254        """
2255        if hasattr(os, "geteuid") and os.geteuid() == 0:
2256            # We have to be root to do so.
2257            g = tarinfo.gid
2258            u = tarinfo.uid
2259            if not numeric_owner:
2260                try:
2261                    if grp:
2262                        g = grp.getgrnam(tarinfo.gname)[2]
2263                except KeyError:
2264                    pass
2265                try:
2266                    if pwd:
2267                        u = pwd.getpwnam(tarinfo.uname)[2]
2268                except KeyError:
2269                    pass
2270            try:
2271                if tarinfo.issym() and hasattr(os, "lchown"):
2272                    os.lchown(targetpath, u, g)
2273                else:
2274                    os.chown(targetpath, u, g)
2275            except OSError:
2276                raise ExtractError("could not change owner")
2277
2278    def chmod(self, tarinfo, targetpath):
2279        """Set file permissions of targetpath according to tarinfo.
2280        """
2281        try:
2282            os.chmod(targetpath, tarinfo.mode)
2283        except OSError:
2284            raise ExtractError("could not change mode")
2285
2286    def utime(self, tarinfo, targetpath):
2287        """Set modification time of targetpath according to tarinfo.
2288        """
2289        if not hasattr(os, 'utime'):
2290            return
2291        try:
2292            os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2293        except OSError:
2294            raise ExtractError("could not change modification time")
2295
2296    #--------------------------------------------------------------------------
2297    def next(self):
2298        """Return the next member of the archive as a TarInfo object, when
2299           TarFile is opened for reading. Return None if there is no more
2300           available.
2301        """
2302        self._check("ra")
2303        if self.firstmember is not None:
2304            m = self.firstmember
2305            self.firstmember = None
2306            return m
2307
2308        # Advance the file pointer.
2309        if self.offset != self.fileobj.tell():
2310            self.fileobj.seek(self.offset - 1)
2311            if not self.fileobj.read(1):
2312                raise ReadError("unexpected end of data")
2313
2314        # Read the next block.
2315        tarinfo = None
2316        while True:
2317            try:
2318                tarinfo = self.tarinfo.fromtarfile(self)
2319            except EOFHeaderError as e:
2320                if self.ignore_zeros:
2321                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2322                    self.offset += BLOCKSIZE
2323                    continue
2324            except InvalidHeaderError as e:
2325                if self.ignore_zeros:
2326                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2327                    self.offset += BLOCKSIZE
2328                    continue
2329                elif self.offset == 0:
2330                    raise ReadError(str(e))
2331            except EmptyHeaderError:
2332                if self.offset == 0:
2333                    raise ReadError("empty file")
2334            except TruncatedHeaderError as e:
2335                if self.offset == 0:
2336                    raise ReadError(str(e))
2337            except SubsequentHeaderError as e:
2338                raise ReadError(str(e))
2339            break
2340
2341        if tarinfo is not None:
2342            self.members.append(tarinfo)
2343        else:
2344            self._loaded = True
2345
2346        return tarinfo
2347
2348    #--------------------------------------------------------------------------
2349    # Little helper methods:
2350
2351    def _getmember(self, name, tarinfo=None, normalize=False):
2352        """Find an archive member by name from bottom to top.
2353           If tarinfo is given, it is used as the starting point.
2354        """
2355        # Ensure that all members have been loaded.
2356        members = self.getmembers()
2357
2358        # Limit the member search list up to tarinfo.
2359        if tarinfo is not None:
2360            members = members[:members.index(tarinfo)]
2361
2362        if normalize:
2363            name = os.path.normpath(name)
2364
2365        for member in reversed(members):
2366            if normalize:
2367                member_name = os.path.normpath(member.name)
2368            else:
2369                member_name = member.name
2370
2371            if name == member_name:
2372                return member
2373
2374    def _load(self):
2375        """Read through the entire archive file and look for readable
2376           members.
2377        """
2378        while True:
2379            tarinfo = self.next()
2380            if tarinfo is None:
2381                break
2382        self._loaded = True
2383
2384    def _check(self, mode=None):
2385        """Check if TarFile is still open, and if the operation's mode
2386           corresponds to TarFile's mode.
2387        """
2388        if self.closed:
2389            raise OSError("%s is closed" % self.__class__.__name__)
2390        if mode is not None and self.mode not in mode:
2391            raise OSError("bad operation for mode %r" % self.mode)
2392
2393    def _find_link_target(self, tarinfo):
2394        """Find the target member of a symlink or hardlink member in the
2395           archive.
2396        """
2397        if tarinfo.issym():
2398            # Always search the entire archive.
2399            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2400            limit = None
2401        else:
2402            # Search the archive before the link, because a hard link is
2403            # just a reference to an already archived file.
2404            linkname = tarinfo.linkname
2405            limit = tarinfo
2406
2407        member = self._getmember(linkname, tarinfo=limit, normalize=True)
2408        if member is None:
2409            raise KeyError("linkname %r not found" % linkname)
2410        return member
2411
2412    def __iter__(self):
2413        """Provide an iterator object.
2414        """
2415        if self._loaded:
2416            yield from self.members
2417            return
2418
2419        # Yield items using TarFile's next() method.
2420        # When all members have been read, set TarFile as _loaded.
2421        index = 0
2422        # Fix for SF #1100429: Under rare circumstances it can
2423        # happen that getmembers() is called during iteration,
2424        # which will have already exhausted the next() method.
2425        if self.firstmember is not None:
2426            tarinfo = self.next()
2427            index += 1
2428            yield tarinfo
2429
2430        while True:
2431            if index < len(self.members):
2432                tarinfo = self.members[index]
2433            elif not self._loaded:
2434                tarinfo = self.next()
2435                if not tarinfo:
2436                    self._loaded = True
2437                    return
2438            else:
2439                return
2440            index += 1
2441            yield tarinfo
2442
2443    def _dbg(self, level, msg):
2444        """Write debugging output to sys.stderr.
2445        """
2446        if level <= self.debug:
2447            print(msg, file=sys.stderr)
2448
2449    def __enter__(self):
2450        self._check()
2451        return self
2452
2453    def __exit__(self, type, value, traceback):
2454        if type is None:
2455            self.close()
2456        else:
2457            # An exception occurred. We must not call close() because
2458            # it would try to write end-of-archive blocks and padding.
2459            if not self._extfileobj:
2460                self.fileobj.close()
2461            self.closed = True
2462
2463#--------------------
2464# exported functions
2465#--------------------
2466def is_tarfile(name):
2467    """Return True if name points to a tar archive that we
2468       are able to handle, else return False.
2469    """
2470    try:
2471        t = open(name)
2472        t.close()
2473        return True
2474    except TarError:
2475        return False
2476
2477open = TarFile.open
2478
2479
2480def main():
2481    import argparse
2482
2483    description = 'A simple command-line interface for tarfile module.'
2484    parser = argparse.ArgumentParser(description=description)
2485    parser.add_argument('-v', '--verbose', action='store_true', default=False,
2486                        help='Verbose output')
2487    group = parser.add_mutually_exclusive_group(required=True)
2488    group.add_argument('-l', '--list', metavar='<tarfile>',
2489                       help='Show listing of a tarfile')
2490    group.add_argument('-e', '--extract', nargs='+',
2491                       metavar=('<tarfile>', '<output_dir>'),
2492                       help='Extract tarfile into target dir')
2493    group.add_argument('-c', '--create', nargs='+',
2494                       metavar=('<name>', '<file>'),
2495                       help='Create tarfile from sources')
2496    group.add_argument('-t', '--test', metavar='<tarfile>',
2497                       help='Test if a tarfile is valid')
2498    args = parser.parse_args()
2499
2500    if args.test is not None:
2501        src = args.test
2502        if is_tarfile(src):
2503            with open(src, 'r') as tar:
2504                tar.getmembers()
2505                print(tar.getmembers(), file=sys.stderr)
2506            if args.verbose:
2507                print('{!r} is a tar archive.'.format(src))
2508        else:
2509            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2510
2511    elif args.list is not None:
2512        src = args.list
2513        if is_tarfile(src):
2514            with TarFile.open(src, 'r:*') as tf:
2515                tf.list(verbose=args.verbose)
2516        else:
2517            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2518
2519    elif args.extract is not None:
2520        if len(args.extract) == 1:
2521            src = args.extract[0]
2522            curdir = os.curdir
2523        elif len(args.extract) == 2:
2524            src, curdir = args.extract
2525        else:
2526            parser.exit(1, parser.format_help())
2527
2528        if is_tarfile(src):
2529            with TarFile.open(src, 'r:*') as tf:
2530                tf.extractall(path=curdir)
2531            if args.verbose:
2532                if curdir == '.':
2533                    msg = '{!r} file is extracted.'.format(src)
2534                else:
2535                    msg = ('{!r} file is extracted '
2536                           'into {!r} directory.').format(src, curdir)
2537                print(msg)
2538        else:
2539            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2540
2541    elif args.create is not None:
2542        tar_name = args.create.pop(0)
2543        _, ext = os.path.splitext(tar_name)
2544        compressions = {
2545            # gz
2546            '.gz': 'gz',
2547            '.tgz': 'gz',
2548            # xz
2549            '.xz': 'xz',
2550            '.txz': 'xz',
2551            # bz2
2552            '.bz2': 'bz2',
2553            '.tbz': 'bz2',
2554            '.tbz2': 'bz2',
2555            '.tb2': 'bz2',
2556        }
2557        tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2558        tar_files = args.create
2559
2560        with TarFile.open(tar_name, tar_mode) as tf:
2561            for file_name in tar_files:
2562                tf.add(file_name)
2563
2564        if args.verbose:
2565            print('{!r} file created.'.format(tar_name))
2566
2567if __name__ == '__main__':
2568    main()
2569