1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import functools
8import importlib.util
9import io
10import itertools
11import os
12import posixpath
13import shutil
14import stat
15import struct
16import sys
17import threading
18import time
19import contextlib
20
21try:
22    import zlib # We may need its compression method
23    crc32 = zlib.crc32
24except ImportError:
25    zlib = None
26    crc32 = binascii.crc32
27
28try:
29    import bz2 # We may need its compression method
30except ImportError:
31    bz2 = None
32
33try:
34    import lzma # We may need its compression method
35except ImportError:
36    lzma = None
37
38__all__ = ["BadZipFile", "BadZipfile", "error",
39           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
40           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41           "Path"]
42
43class BadZipFile(Exception):
44    pass
45
46
47class LargeZipFile(Exception):
48    """
49    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50    and those extensions are disabled.
51    """
52
53error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
54
55
56ZIP64_LIMIT = (1 << 31) - 1
57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
58ZIP_MAX_COMMENT = (1 << 16) - 1
59
60# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
63ZIP_BZIP2 = 12
64ZIP_LZMA = 14
65# Other ZIP compression methods not supported
66
67DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
70LZMA_VERSION = 63
71# we recognize (but not necessarily support) all features up to that version
72MAX_EXTRACT_VERSION = 63
73
74# Below are some formats and associated data for reading/writing headers using
75# the struct module.  The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
79
80# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
82structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
85
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
102stringCentralDir = b"PK\001\002"
103sizeCentralDir = struct.calcsize(structCentralDir)
104
105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
110_CD_EXTRACT_SYSTEM = 4
111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
129stringFileHeader = b"PK\003\004"
130sizeFileHeader = struct.calcsize(structFileHeader)
131
132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
134_FH_EXTRACT_SYSTEM = 2
135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
145# The "Zip64 end of central directory locator" structure, magic number, and size
146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
167_DD_SIGNATURE = 0x08074b50
168
169_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
170
171def _strip_extra(extra, xids):
172    # Remove Extra Fields with specified IDs.
173    unpack = _EXTRA_FIELD_STRUCT.unpack
174    modified = False
175    buffer = []
176    start = i = 0
177    while i + 4 <= len(extra):
178        xid, xlen = unpack(extra[i : i + 4])
179        j = i + 4 + xlen
180        if xid in xids:
181            if i != start:
182                buffer.append(extra[start : i])
183            start = j
184            modified = True
185        i = j
186    if not modified:
187        return extra
188    return b''.join(buffer)
189
190def _check_zipfile(fp):
191    try:
192        if _EndRecData(fp):
193            return True         # file has correct magic number
194    except OSError:
195        pass
196    return False
197
198def is_zipfile(filename):
199    """Quickly see if a file is a ZIP file by checking the magic number.
200
201    The filename argument may be a file or file-like object too.
202    """
203    result = False
204    try:
205        if hasattr(filename, "read"):
206            result = _check_zipfile(fp=filename)
207        else:
208            with open(filename, "rb") as fp:
209                result = _check_zipfile(fp)
210    except OSError:
211        pass
212    return result
213
214def _EndRecData64(fpin, offset, endrec):
215    """
216    Read the ZIP64 end-of-archive records and use that to update endrec
217    """
218    try:
219        fpin.seek(offset - sizeEndCentDir64Locator, 2)
220    except OSError:
221        # If the seek fails, the file is not large enough to contain a ZIP64
222        # end-of-archive record, so just return the end record we were given.
223        return endrec
224
225    data = fpin.read(sizeEndCentDir64Locator)
226    if len(data) != sizeEndCentDir64Locator:
227        return endrec
228    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
229    if sig != stringEndArchive64Locator:
230        return endrec
231
232    if diskno != 0 or disks > 1:
233        raise BadZipFile("zipfiles that span multiple disks are not supported")
234
235    # Assume no 'zip64 extensible data'
236    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
237    data = fpin.read(sizeEndCentDir64)
238    if len(data) != sizeEndCentDir64:
239        return endrec
240    sig, sz, create_version, read_version, disk_num, disk_dir, \
241        dircount, dircount2, dirsize, diroffset = \
242        struct.unpack(structEndArchive64, data)
243    if sig != stringEndArchive64:
244        return endrec
245
246    # Update the original endrec using data from the ZIP64 record
247    endrec[_ECD_SIGNATURE] = sig
248    endrec[_ECD_DISK_NUMBER] = disk_num
249    endrec[_ECD_DISK_START] = disk_dir
250    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
251    endrec[_ECD_ENTRIES_TOTAL] = dircount2
252    endrec[_ECD_SIZE] = dirsize
253    endrec[_ECD_OFFSET] = diroffset
254    return endrec
255
256
257def _EndRecData(fpin):
258    """Return data from the "End of Central Directory" record, or None.
259
260    The data is a list of the nine items in the ZIP "End of central dir"
261    record followed by a tenth item, the file seek offset of this record."""
262
263    # Determine file size
264    fpin.seek(0, 2)
265    filesize = fpin.tell()
266
267    # Check to see if this is ZIP file with no archive comment (the
268    # "end of central directory" structure should be the last item in the
269    # file if this is the case).
270    try:
271        fpin.seek(-sizeEndCentDir, 2)
272    except OSError:
273        return None
274    data = fpin.read()
275    if (len(data) == sizeEndCentDir and
276        data[0:4] == stringEndArchive and
277        data[-2:] == b"\000\000"):
278        # the signature is correct and there's no comment, unpack structure
279        endrec = struct.unpack(structEndArchive, data)
280        endrec=list(endrec)
281
282        # Append a blank comment and record start offset
283        endrec.append(b"")
284        endrec.append(filesize - sizeEndCentDir)
285
286        # Try to read the "Zip64 end of central directory" structure
287        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
288
289    # Either this is not a ZIP file, or it is a ZIP file with an archive
290    # comment.  Search the end of the file for the "end of central directory"
291    # record signature. The comment is the last item in the ZIP file and may be
292    # up to 64K long.  It is assumed that the "end of central directory" magic
293    # number does not appear in the comment.
294    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
295    fpin.seek(maxCommentStart, 0)
296    data = fpin.read()
297    start = data.rfind(stringEndArchive)
298    if start >= 0:
299        # found the magic number; attempt to unpack and interpret
300        recData = data[start:start+sizeEndCentDir]
301        if len(recData) != sizeEndCentDir:
302            # Zip file is corrupted.
303            return None
304        endrec = list(struct.unpack(structEndArchive, recData))
305        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
306        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
307        endrec.append(comment)
308        endrec.append(maxCommentStart + start)
309
310        # Try to read the "Zip64 end of central directory" structure
311        return _EndRecData64(fpin, maxCommentStart + start - filesize,
312                             endrec)
313
314    # Unable to find a valid end of central directory structure
315    return None
316
317
318class ZipInfo (object):
319    """Class with attributes describing each file in the ZIP archive."""
320
321    __slots__ = (
322        'orig_filename',
323        'filename',
324        'date_time',
325        'compress_type',
326        '_compresslevel',
327        'comment',
328        'extra',
329        'create_system',
330        'create_version',
331        'extract_version',
332        'reserved',
333        'flag_bits',
334        'volume',
335        'internal_attr',
336        'external_attr',
337        'header_offset',
338        'CRC',
339        'compress_size',
340        'file_size',
341        '_raw_time',
342    )
343
344    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
345        self.orig_filename = filename   # Original file name in archive
346
347        # Terminate the file name at the first null byte.  Null bytes in file
348        # names are used as tricks by viruses in archives.
349        null_byte = filename.find(chr(0))
350        if null_byte >= 0:
351            filename = filename[0:null_byte]
352        # This is used to ensure paths in generated ZIP files always use
353        # forward slashes as the directory separator, as required by the
354        # ZIP format specification.
355        if os.sep != "/" and os.sep in filename:
356            filename = filename.replace(os.sep, "/")
357
358        self.filename = filename        # Normalized file name
359        self.date_time = date_time      # year, month, day, hour, min, sec
360
361        if date_time[0] < 1980:
362            raise ValueError('ZIP does not support timestamps before 1980')
363
364        # Standard values:
365        self.compress_type = ZIP_STORED # Type of compression for the file
366        self._compresslevel = None      # Level for the compressor
367        self.comment = b""              # Comment for each file
368        self.extra = b""                # ZIP extra data
369        if sys.platform == 'win32':
370            self.create_system = 0          # System which created ZIP archive
371        else:
372            # Assume everything else is unix-y
373            self.create_system = 3          # System which created ZIP archive
374        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
375        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
376        self.reserved = 0               # Must be zero
377        self.flag_bits = 0              # ZIP flag bits
378        self.volume = 0                 # Volume number of file header
379        self.internal_attr = 0          # Internal attributes
380        self.external_attr = 0          # External file attributes
381        # Other attributes are set by class ZipFile:
382        # header_offset         Byte offset to the file header
383        # CRC                   CRC-32 of the uncompressed file
384        # compress_size         Size of the compressed file
385        # file_size             Size of the uncompressed file
386
387    def __repr__(self):
388        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
389        if self.compress_type != ZIP_STORED:
390            result.append(' compress_type=%s' %
391                          compressor_names.get(self.compress_type,
392                                               self.compress_type))
393        hi = self.external_attr >> 16
394        lo = self.external_attr & 0xFFFF
395        if hi:
396            result.append(' filemode=%r' % stat.filemode(hi))
397        if lo:
398            result.append(' external_attr=%#x' % lo)
399        isdir = self.is_dir()
400        if not isdir or self.file_size:
401            result.append(' file_size=%r' % self.file_size)
402        if ((not isdir or self.compress_size) and
403            (self.compress_type != ZIP_STORED or
404             self.file_size != self.compress_size)):
405            result.append(' compress_size=%r' % self.compress_size)
406        result.append('>')
407        return ''.join(result)
408
409    def FileHeader(self, zip64=None):
410        """Return the per-file header as a bytes object."""
411        dt = self.date_time
412        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
413        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
414        if self.flag_bits & 0x08:
415            # Set these to zero because we write them after the file data
416            CRC = compress_size = file_size = 0
417        else:
418            CRC = self.CRC
419            compress_size = self.compress_size
420            file_size = self.file_size
421
422        extra = self.extra
423
424        min_version = 0
425        if zip64 is None:
426            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
427        if zip64:
428            fmt = '<HHQQ'
429            extra = extra + struct.pack(fmt,
430                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
431        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
432            if not zip64:
433                raise LargeZipFile("Filesize would require ZIP64 extensions")
434            # File is larger than what fits into a 4 byte integer,
435            # fall back to the ZIP64 extension
436            file_size = 0xffffffff
437            compress_size = 0xffffffff
438            min_version = ZIP64_VERSION
439
440        if self.compress_type == ZIP_BZIP2:
441            min_version = max(BZIP2_VERSION, min_version)
442        elif self.compress_type == ZIP_LZMA:
443            min_version = max(LZMA_VERSION, min_version)
444
445        self.extract_version = max(min_version, self.extract_version)
446        self.create_version = max(min_version, self.create_version)
447        filename, flag_bits = self._encodeFilenameFlags()
448        header = struct.pack(structFileHeader, stringFileHeader,
449                             self.extract_version, self.reserved, flag_bits,
450                             self.compress_type, dostime, dosdate, CRC,
451                             compress_size, file_size,
452                             len(filename), len(extra))
453        return header + filename + extra
454
455    def _encodeFilenameFlags(self):
456        try:
457            return self.filename.encode('ascii'), self.flag_bits
458        except UnicodeEncodeError:
459            return self.filename.encode('utf-8'), self.flag_bits | 0x800
460
461    def _decodeExtra(self):
462        # Try to decode the extra field.
463        extra = self.extra
464        unpack = struct.unpack
465        while len(extra) >= 4:
466            tp, ln = unpack('<HH', extra[:4])
467            if ln+4 > len(extra):
468                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
469            if tp == 0x0001:
470                if ln >= 24:
471                    counts = unpack('<QQQ', extra[4:28])
472                elif ln == 16:
473                    counts = unpack('<QQ', extra[4:20])
474                elif ln == 8:
475                    counts = unpack('<Q', extra[4:12])
476                elif ln == 0:
477                    counts = ()
478                else:
479                    raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
480
481                idx = 0
482
483                # ZIP64 extension (large files and/or large archives)
484                if self.file_size in (0xffffffffffffffff, 0xffffffff):
485                    if len(counts) <= idx:
486                        raise BadZipFile(
487                            "Corrupt zip64 extra field. File size not found."
488                        )
489                    self.file_size = counts[idx]
490                    idx += 1
491
492                if self.compress_size == 0xFFFFFFFF:
493                    if len(counts) <= idx:
494                        raise BadZipFile(
495                            "Corrupt zip64 extra field. Compress size not found."
496                        )
497                    self.compress_size = counts[idx]
498                    idx += 1
499
500                if self.header_offset == 0xffffffff:
501                    if len(counts) <= idx:
502                        raise BadZipFile(
503                            "Corrupt zip64 extra field. Header offset not found."
504                        )
505                    old = self.header_offset
506                    self.header_offset = counts[idx]
507                    idx+=1
508
509            extra = extra[ln+4:]
510
511    @classmethod
512    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
513        """Construct an appropriate ZipInfo for a file on the filesystem.
514
515        filename should be the path to a file or directory on the filesystem.
516
517        arcname is the name which it will have within the archive (by default,
518        this will be the same as filename, but without a drive letter and with
519        leading path separators removed).
520        """
521        if isinstance(filename, os.PathLike):
522            filename = os.fspath(filename)
523        st = os.stat(filename)
524        isdir = stat.S_ISDIR(st.st_mode)
525        mtime = time.localtime(st.st_mtime)
526        date_time = mtime[0:6]
527        if not strict_timestamps and date_time[0] < 1980:
528            date_time = (1980, 1, 1, 0, 0, 0)
529        elif not strict_timestamps and date_time[0] > 2107:
530            date_time = (2107, 12, 31, 23, 59, 59)
531        # Create ZipInfo instance to store file information
532        if arcname is None:
533            arcname = filename
534        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
535        while arcname[0] in (os.sep, os.altsep):
536            arcname = arcname[1:]
537        if isdir:
538            arcname += '/'
539        zinfo = cls(arcname, date_time)
540        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
541        if isdir:
542            zinfo.file_size = 0
543            zinfo.external_attr |= 0x10  # MS-DOS directory flag
544        else:
545            zinfo.file_size = st.st_size
546
547        return zinfo
548
549    def is_dir(self):
550        """Return True if this archive member is a directory."""
551        return self.filename[-1] == '/'
552
553
554# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
555# internal keys. We noticed that a direct implementation is faster than
556# relying on binascii.crc32().
557
558_crctable = None
559def _gen_crc(crc):
560    for j in range(8):
561        if crc & 1:
562            crc = (crc >> 1) ^ 0xEDB88320
563        else:
564            crc >>= 1
565    return crc
566
567# ZIP supports a password-based form of encryption. Even though known
568# plaintext attacks have been found against it, it is still useful
569# to be able to get data out of such a file.
570#
571# Usage:
572#     zd = _ZipDecrypter(mypwd)
573#     plain_bytes = zd(cypher_bytes)
574
575def _ZipDecrypter(pwd):
576    key0 = 305419896
577    key1 = 591751049
578    key2 = 878082192
579
580    global _crctable
581    if _crctable is None:
582        _crctable = list(map(_gen_crc, range(256)))
583    crctable = _crctable
584
585    def crc32(ch, crc):
586        """Compute the CRC32 primitive on one byte."""
587        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
588
589    def update_keys(c):
590        nonlocal key0, key1, key2
591        key0 = crc32(c, key0)
592        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
593        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
594        key2 = crc32(key1 >> 24, key2)
595
596    for p in pwd:
597        update_keys(p)
598
599    def decrypter(data):
600        """Decrypt a bytes object."""
601        result = bytearray()
602        append = result.append
603        for c in data:
604            k = key2 | 2
605            c ^= ((k * (k^1)) >> 8) & 0xFF
606            update_keys(c)
607            append(c)
608        return bytes(result)
609
610    return decrypter
611
612
613class LZMACompressor:
614
615    def __init__(self):
616        self._comp = None
617
618    def _init(self):
619        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
620        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
621            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
622        ])
623        return struct.pack('<BBH', 9, 4, len(props)) + props
624
625    def compress(self, data):
626        if self._comp is None:
627            return self._init() + self._comp.compress(data)
628        return self._comp.compress(data)
629
630    def flush(self):
631        if self._comp is None:
632            return self._init() + self._comp.flush()
633        return self._comp.flush()
634
635
636class LZMADecompressor:
637
638    def __init__(self):
639        self._decomp = None
640        self._unconsumed = b''
641        self.eof = False
642
643    def decompress(self, data):
644        if self._decomp is None:
645            self._unconsumed += data
646            if len(self._unconsumed) <= 4:
647                return b''
648            psize, = struct.unpack('<H', self._unconsumed[2:4])
649            if len(self._unconsumed) <= 4 + psize:
650                return b''
651
652            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
653                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
654                                               self._unconsumed[4:4 + psize])
655            ])
656            data = self._unconsumed[4 + psize:]
657            del self._unconsumed
658
659        result = self._decomp.decompress(data)
660        self.eof = self._decomp.eof
661        return result
662
663
664compressor_names = {
665    0: 'store',
666    1: 'shrink',
667    2: 'reduce',
668    3: 'reduce',
669    4: 'reduce',
670    5: 'reduce',
671    6: 'implode',
672    7: 'tokenize',
673    8: 'deflate',
674    9: 'deflate64',
675    10: 'implode',
676    12: 'bzip2',
677    14: 'lzma',
678    18: 'terse',
679    19: 'lz77',
680    97: 'wavpack',
681    98: 'ppmd',
682}
683
684def _check_compression(compression):
685    if compression == ZIP_STORED:
686        pass
687    elif compression == ZIP_DEFLATED:
688        if not zlib:
689            raise RuntimeError(
690                "Compression requires the (missing) zlib module")
691    elif compression == ZIP_BZIP2:
692        if not bz2:
693            raise RuntimeError(
694                "Compression requires the (missing) bz2 module")
695    elif compression == ZIP_LZMA:
696        if not lzma:
697            raise RuntimeError(
698                "Compression requires the (missing) lzma module")
699    else:
700        raise NotImplementedError("That compression method is not supported")
701
702
703def _get_compressor(compress_type, compresslevel=None):
704    if compress_type == ZIP_DEFLATED:
705        if compresslevel is not None:
706            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
707        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
708    elif compress_type == ZIP_BZIP2:
709        if compresslevel is not None:
710            return bz2.BZ2Compressor(compresslevel)
711        return bz2.BZ2Compressor()
712    # compresslevel is ignored for ZIP_LZMA
713    elif compress_type == ZIP_LZMA:
714        return LZMACompressor()
715    else:
716        return None
717
718
719def _get_decompressor(compress_type):
720    _check_compression(compress_type)
721    if compress_type == ZIP_STORED:
722        return None
723    elif compress_type == ZIP_DEFLATED:
724        return zlib.decompressobj(-15)
725    elif compress_type == ZIP_BZIP2:
726        return bz2.BZ2Decompressor()
727    elif compress_type == ZIP_LZMA:
728        return LZMADecompressor()
729    else:
730        descr = compressor_names.get(compress_type)
731        if descr:
732            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
733        else:
734            raise NotImplementedError("compression type %d" % (compress_type,))
735
736
737class _SharedFile:
738    def __init__(self, file, pos, close, lock, writing):
739        self._file = file
740        self._pos = pos
741        self._close = close
742        self._lock = lock
743        self._writing = writing
744        self.seekable = file.seekable
745        self.tell = file.tell
746
747    def seek(self, offset, whence=0):
748        with self._lock:
749            if self._writing():
750                raise ValueError("Can't reposition in the ZIP file while "
751                        "there is an open writing handle on it. "
752                        "Close the writing handle before trying to read.")
753            self._file.seek(offset, whence)
754            self._pos = self._file.tell()
755            return self._pos
756
757    def read(self, n=-1):
758        with self._lock:
759            if self._writing():
760                raise ValueError("Can't read from the ZIP file while there "
761                        "is an open writing handle on it. "
762                        "Close the writing handle before trying to read.")
763            self._file.seek(self._pos)
764            data = self._file.read(n)
765            self._pos = self._file.tell()
766            return data
767
768    def close(self):
769        if self._file is not None:
770            fileobj = self._file
771            self._file = None
772            self._close(fileobj)
773
774# Provide the tell method for unseekable stream
775class _Tellable:
776    def __init__(self, fp):
777        self.fp = fp
778        self.offset = 0
779
780    def write(self, data):
781        n = self.fp.write(data)
782        self.offset += n
783        return n
784
785    def tell(self):
786        return self.offset
787
788    def flush(self):
789        self.fp.flush()
790
791    def close(self):
792        self.fp.close()
793
794
795class ZipExtFile(io.BufferedIOBase):
796    """File-like object for reading an archive member.
797       Is returned by ZipFile.open().
798    """
799
800    # Max size supported by decompressor.
801    MAX_N = 1 << 31 - 1
802
803    # Read from compressed files in 4k blocks.
804    MIN_READ_SIZE = 4096
805
806    # Chunk size to read during seek
807    MAX_SEEK_READ = 1 << 24
808
809    def __init__(self, fileobj, mode, zipinfo, pwd=None,
810                 close_fileobj=False):
811        self._fileobj = fileobj
812        self._pwd = pwd
813        self._close_fileobj = close_fileobj
814
815        self._compress_type = zipinfo.compress_type
816        self._compress_left = zipinfo.compress_size
817        self._left = zipinfo.file_size
818
819        self._decompressor = _get_decompressor(self._compress_type)
820
821        self._eof = False
822        self._readbuffer = b''
823        self._offset = 0
824
825        self.newlines = None
826
827        self.mode = mode
828        self.name = zipinfo.filename
829
830        if hasattr(zipinfo, 'CRC'):
831            self._expected_crc = zipinfo.CRC
832            self._running_crc = crc32(b'')
833        else:
834            self._expected_crc = None
835
836        self._seekable = False
837        try:
838            if fileobj.seekable():
839                self._orig_compress_start = fileobj.tell()
840                self._orig_compress_size = zipinfo.compress_size
841                self._orig_file_size = zipinfo.file_size
842                self._orig_start_crc = self._running_crc
843                self._seekable = True
844        except AttributeError:
845            pass
846
847        self._decrypter = None
848        if pwd:
849            if zipinfo.flag_bits & 0x8:
850                # compare against the file type from extended local headers
851                check_byte = (zipinfo._raw_time >> 8) & 0xff
852            else:
853                # compare against the CRC otherwise
854                check_byte = (zipinfo.CRC >> 24) & 0xff
855            h = self._init_decrypter()
856            if h != check_byte:
857                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
858
859
860    def _init_decrypter(self):
861        self._decrypter = _ZipDecrypter(self._pwd)
862        # The first 12 bytes in the cypher stream is an encryption header
863        #  used to strengthen the algorithm. The first 11 bytes are
864        #  completely random, while the 12th contains the MSB of the CRC,
865        #  or the MSB of the file time depending on the header type
866        #  and is used to check the correctness of the password.
867        header = self._fileobj.read(12)
868        self._compress_left -= 12
869        return self._decrypter(header)[11]
870
871    def __repr__(self):
872        result = ['<%s.%s' % (self.__class__.__module__,
873                              self.__class__.__qualname__)]
874        if not self.closed:
875            result.append(' name=%r mode=%r' % (self.name, self.mode))
876            if self._compress_type != ZIP_STORED:
877                result.append(' compress_type=%s' %
878                              compressor_names.get(self._compress_type,
879                                                   self._compress_type))
880        else:
881            result.append(' [closed]')
882        result.append('>')
883        return ''.join(result)
884
885    def readline(self, limit=-1):
886        """Read and return a line from the stream.
887
888        If limit is specified, at most limit bytes will be read.
889        """
890
891        if limit < 0:
892            # Shortcut common case - newline found in buffer.
893            i = self._readbuffer.find(b'\n', self._offset) + 1
894            if i > 0:
895                line = self._readbuffer[self._offset: i]
896                self._offset = i
897                return line
898
899        return io.BufferedIOBase.readline(self, limit)
900
901    def peek(self, n=1):
902        """Returns buffered bytes without advancing the position."""
903        if n > len(self._readbuffer) - self._offset:
904            chunk = self.read(n)
905            if len(chunk) > self._offset:
906                self._readbuffer = chunk + self._readbuffer[self._offset:]
907                self._offset = 0
908            else:
909                self._offset -= len(chunk)
910
911        # Return up to 512 bytes to reduce allocation overhead for tight loops.
912        return self._readbuffer[self._offset: self._offset + 512]
913
914    def readable(self):
915        return True
916
917    def read(self, n=-1):
918        """Read and return up to n bytes.
919        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
920        """
921        if n is None or n < 0:
922            buf = self._readbuffer[self._offset:]
923            self._readbuffer = b''
924            self._offset = 0
925            while not self._eof:
926                buf += self._read1(self.MAX_N)
927            return buf
928
929        end = n + self._offset
930        if end < len(self._readbuffer):
931            buf = self._readbuffer[self._offset:end]
932            self._offset = end
933            return buf
934
935        n = end - len(self._readbuffer)
936        buf = self._readbuffer[self._offset:]
937        self._readbuffer = b''
938        self._offset = 0
939        while n > 0 and not self._eof:
940            data = self._read1(n)
941            if n < len(data):
942                self._readbuffer = data
943                self._offset = n
944                buf += data[:n]
945                break
946            buf += data
947            n -= len(data)
948        return buf
949
950    def _update_crc(self, newdata):
951        # Update the CRC using the given data.
952        if self._expected_crc is None:
953            # No need to compute the CRC if we don't have a reference value
954            return
955        self._running_crc = crc32(newdata, self._running_crc)
956        # Check the CRC if we're at the end of the file
957        if self._eof and self._running_crc != self._expected_crc:
958            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
959
960    def read1(self, n):
961        """Read up to n bytes with at most one read() system call."""
962
963        if n is None or n < 0:
964            buf = self._readbuffer[self._offset:]
965            self._readbuffer = b''
966            self._offset = 0
967            while not self._eof:
968                data = self._read1(self.MAX_N)
969                if data:
970                    buf += data
971                    break
972            return buf
973
974        end = n + self._offset
975        if end < len(self._readbuffer):
976            buf = self._readbuffer[self._offset:end]
977            self._offset = end
978            return buf
979
980        n = end - len(self._readbuffer)
981        buf = self._readbuffer[self._offset:]
982        self._readbuffer = b''
983        self._offset = 0
984        if n > 0:
985            while not self._eof:
986                data = self._read1(n)
987                if n < len(data):
988                    self._readbuffer = data
989                    self._offset = n
990                    buf += data[:n]
991                    break
992                if data:
993                    buf += data
994                    break
995        return buf
996
997    def _read1(self, n):
998        # Read up to n compressed bytes with at most one read() system call,
999        # decrypt and decompress them.
1000        if self._eof or n <= 0:
1001            return b''
1002
1003        # Read from file.
1004        if self._compress_type == ZIP_DEFLATED:
1005            ## Handle unconsumed data.
1006            data = self._decompressor.unconsumed_tail
1007            if n > len(data):
1008                data += self._read2(n - len(data))
1009        else:
1010            data = self._read2(n)
1011
1012        if self._compress_type == ZIP_STORED:
1013            self._eof = self._compress_left <= 0
1014        elif self._compress_type == ZIP_DEFLATED:
1015            n = max(n, self.MIN_READ_SIZE)
1016            data = self._decompressor.decompress(data, n)
1017            self._eof = (self._decompressor.eof or
1018                         self._compress_left <= 0 and
1019                         not self._decompressor.unconsumed_tail)
1020            if self._eof:
1021                data += self._decompressor.flush()
1022        else:
1023            data = self._decompressor.decompress(data)
1024            self._eof = self._decompressor.eof or self._compress_left <= 0
1025
1026        data = data[:self._left]
1027        self._left -= len(data)
1028        if self._left <= 0:
1029            self._eof = True
1030        self._update_crc(data)
1031        return data
1032
1033    def _read2(self, n):
1034        if self._compress_left <= 0:
1035            return b''
1036
1037        n = max(n, self.MIN_READ_SIZE)
1038        n = min(n, self._compress_left)
1039
1040        data = self._fileobj.read(n)
1041        self._compress_left -= len(data)
1042        if not data:
1043            raise EOFError
1044
1045        if self._decrypter is not None:
1046            data = self._decrypter(data)
1047        return data
1048
1049    def close(self):
1050        try:
1051            if self._close_fileobj:
1052                self._fileobj.close()
1053        finally:
1054            super().close()
1055
1056    def seekable(self):
1057        return self._seekable
1058
1059    def seek(self, offset, whence=0):
1060        if not self._seekable:
1061            raise io.UnsupportedOperation("underlying stream is not seekable")
1062        curr_pos = self.tell()
1063        if whence == 0: # Seek from start of file
1064            new_pos = offset
1065        elif whence == 1: # Seek from current position
1066            new_pos = curr_pos + offset
1067        elif whence == 2: # Seek from EOF
1068            new_pos = self._orig_file_size + offset
1069        else:
1070            raise ValueError("whence must be os.SEEK_SET (0), "
1071                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1072
1073        if new_pos > self._orig_file_size:
1074            new_pos = self._orig_file_size
1075
1076        if new_pos < 0:
1077            new_pos = 0
1078
1079        read_offset = new_pos - curr_pos
1080        buff_offset = read_offset + self._offset
1081
1082        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1083            # Just move the _offset index if the new position is in the _readbuffer
1084            self._offset = buff_offset
1085            read_offset = 0
1086        elif read_offset < 0:
1087            # Position is before the current position. Reset the ZipExtFile
1088            self._fileobj.seek(self._orig_compress_start)
1089            self._running_crc = self._orig_start_crc
1090            self._compress_left = self._orig_compress_size
1091            self._left = self._orig_file_size
1092            self._readbuffer = b''
1093            self._offset = 0
1094            self._decompressor = _get_decompressor(self._compress_type)
1095            self._eof = False
1096            read_offset = new_pos
1097            if self._decrypter is not None:
1098                self._init_decrypter()
1099
1100        while read_offset > 0:
1101            read_len = min(self.MAX_SEEK_READ, read_offset)
1102            self.read(read_len)
1103            read_offset -= read_len
1104
1105        return self.tell()
1106
1107    def tell(self):
1108        if not self._seekable:
1109            raise io.UnsupportedOperation("underlying stream is not seekable")
1110        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1111        return filepos
1112
1113
1114class _ZipWriteFile(io.BufferedIOBase):
1115    def __init__(self, zf, zinfo, zip64):
1116        self._zinfo = zinfo
1117        self._zip64 = zip64
1118        self._zipfile = zf
1119        self._compressor = _get_compressor(zinfo.compress_type,
1120                                           zinfo._compresslevel)
1121        self._file_size = 0
1122        self._compress_size = 0
1123        self._crc = 0
1124
1125    @property
1126    def _fileobj(self):
1127        return self._zipfile.fp
1128
1129    def writable(self):
1130        return True
1131
1132    def write(self, data):
1133        if self.closed:
1134            raise ValueError('I/O operation on closed file.')
1135        nbytes = len(data)
1136        self._file_size += nbytes
1137        self._crc = crc32(data, self._crc)
1138        if self._compressor:
1139            data = self._compressor.compress(data)
1140            self._compress_size += len(data)
1141        self._fileobj.write(data)
1142        return nbytes
1143
1144    def close(self):
1145        if self.closed:
1146            return
1147        try:
1148            super().close()
1149            # Flush any data from the compressor, and update header info
1150            if self._compressor:
1151                buf = self._compressor.flush()
1152                self._compress_size += len(buf)
1153                self._fileobj.write(buf)
1154                self._zinfo.compress_size = self._compress_size
1155            else:
1156                self._zinfo.compress_size = self._file_size
1157            self._zinfo.CRC = self._crc
1158            self._zinfo.file_size = self._file_size
1159
1160            # Write updated header info
1161            if self._zinfo.flag_bits & 0x08:
1162                # Write CRC and file sizes after the file data
1163                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1164                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1165                    self._zinfo.compress_size, self._zinfo.file_size))
1166                self._zipfile.start_dir = self._fileobj.tell()
1167            else:
1168                if not self._zip64:
1169                    if self._file_size > ZIP64_LIMIT:
1170                        raise RuntimeError(
1171                            'File size unexpectedly exceeded ZIP64 limit')
1172                    if self._compress_size > ZIP64_LIMIT:
1173                        raise RuntimeError(
1174                            'Compressed size unexpectedly exceeded ZIP64 limit')
1175                # Seek backwards and write file header (which will now include
1176                # correct CRC and file sizes)
1177
1178                # Preserve current position in file
1179                self._zipfile.start_dir = self._fileobj.tell()
1180                self._fileobj.seek(self._zinfo.header_offset)
1181                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1182                self._fileobj.seek(self._zipfile.start_dir)
1183
1184            # Successfully written: Add file to our caches
1185            self._zipfile.filelist.append(self._zinfo)
1186            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1187        finally:
1188            self._zipfile._writing = False
1189
1190
1191
1192class ZipFile:
1193    """ Class with methods to open, read, write, close, list zip files.
1194
1195    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1196                compresslevel=None)
1197
1198    file: Either the path to the file, or a file-like object.
1199          If it is a path, the file will be opened and closed by ZipFile.
1200    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1201          or append 'a'.
1202    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1203                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1204    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1205                needed, otherwise it will raise an exception when this would
1206                be necessary.
1207    compresslevel: None (default for the given compression type) or an integer
1208                   specifying the level to pass to the compressor.
1209                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1210                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1211                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1212
1213    """
1214
1215    fp = None                   # Set here since __del__ checks it
1216    _windows_illegal_name_trans_table = None
1217
1218    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1219                 compresslevel=None, *, strict_timestamps=True):
1220        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1221        or append 'a'."""
1222        if mode not in ('r', 'w', 'x', 'a'):
1223            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1224
1225        _check_compression(compression)
1226
1227        self._allowZip64 = allowZip64
1228        self._didModify = False
1229        self.debug = 0  # Level of printing: 0 through 3
1230        self.NameToInfo = {}    # Find file info given name
1231        self.filelist = []      # List of ZipInfo instances for archive
1232        self.compression = compression  # Method of compression
1233        self.compresslevel = compresslevel
1234        self.mode = mode
1235        self.pwd = None
1236        self._comment = b''
1237        self._strict_timestamps = strict_timestamps
1238
1239        # Check if we were passed a file-like object
1240        if isinstance(file, os.PathLike):
1241            file = os.fspath(file)
1242        if isinstance(file, str):
1243            # No, it's a filename
1244            self._filePassed = 0
1245            self.filename = file
1246            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1247                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1248            filemode = modeDict[mode]
1249            while True:
1250                try:
1251                    self.fp = io.open(file, filemode)
1252                except OSError:
1253                    if filemode in modeDict:
1254                        filemode = modeDict[filemode]
1255                        continue
1256                    raise
1257                break
1258        else:
1259            self._filePassed = 1
1260            self.fp = file
1261            self.filename = getattr(file, 'name', None)
1262        self._fileRefCnt = 1
1263        self._lock = threading.RLock()
1264        self._seekable = True
1265        self._writing = False
1266
1267        try:
1268            if mode == 'r':
1269                self._RealGetContents()
1270            elif mode in ('w', 'x'):
1271                # set the modified flag so central directory gets written
1272                # even if no files are added to the archive
1273                self._didModify = True
1274                try:
1275                    self.start_dir = self.fp.tell()
1276                except (AttributeError, OSError):
1277                    self.fp = _Tellable(self.fp)
1278                    self.start_dir = 0
1279                    self._seekable = False
1280                else:
1281                    # Some file-like objects can provide tell() but not seek()
1282                    try:
1283                        self.fp.seek(self.start_dir)
1284                    except (AttributeError, OSError):
1285                        self._seekable = False
1286            elif mode == 'a':
1287                try:
1288                    # See if file is a zip file
1289                    self._RealGetContents()
1290                    # seek to start of directory and overwrite
1291                    self.fp.seek(self.start_dir)
1292                except BadZipFile:
1293                    # file is not a zip file, just append
1294                    self.fp.seek(0, 2)
1295
1296                    # set the modified flag so central directory gets written
1297                    # even if no files are added to the archive
1298                    self._didModify = True
1299                    self.start_dir = self.fp.tell()
1300            else:
1301                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1302        except:
1303            fp = self.fp
1304            self.fp = None
1305            self._fpclose(fp)
1306            raise
1307
1308    def __enter__(self):
1309        return self
1310
1311    def __exit__(self, type, value, traceback):
1312        self.close()
1313
1314    def __repr__(self):
1315        result = ['<%s.%s' % (self.__class__.__module__,
1316                              self.__class__.__qualname__)]
1317        if self.fp is not None:
1318            if self._filePassed:
1319                result.append(' file=%r' % self.fp)
1320            elif self.filename is not None:
1321                result.append(' filename=%r' % self.filename)
1322            result.append(' mode=%r' % self.mode)
1323        else:
1324            result.append(' [closed]')
1325        result.append('>')
1326        return ''.join(result)
1327
1328    def _RealGetContents(self):
1329        """Read in the table of contents for the ZIP file."""
1330        fp = self.fp
1331        try:
1332            endrec = _EndRecData(fp)
1333        except OSError:
1334            raise BadZipFile("File is not a zip file")
1335        if not endrec:
1336            raise BadZipFile("File is not a zip file")
1337        if self.debug > 1:
1338            print(endrec)
1339        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1340        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1341        self._comment = endrec[_ECD_COMMENT]    # archive comment
1342
1343        # "concat" is zero, unless zip was concatenated to another file
1344        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1345        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1346            # If Zip64 extension structures are present, account for them
1347            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1348
1349        if self.debug > 2:
1350            inferred = concat + offset_cd
1351            print("given, inferred, offset", offset_cd, inferred, concat)
1352        # self.start_dir:  Position of start of central directory
1353        self.start_dir = offset_cd + concat
1354        fp.seek(self.start_dir, 0)
1355        data = fp.read(size_cd)
1356        fp = io.BytesIO(data)
1357        total = 0
1358        while total < size_cd:
1359            centdir = fp.read(sizeCentralDir)
1360            if len(centdir) != sizeCentralDir:
1361                raise BadZipFile("Truncated central directory")
1362            centdir = struct.unpack(structCentralDir, centdir)
1363            if centdir[_CD_SIGNATURE] != stringCentralDir:
1364                raise BadZipFile("Bad magic number for central directory")
1365            if self.debug > 2:
1366                print(centdir)
1367            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1368            flags = centdir[5]
1369            if flags & 0x800:
1370                # UTF-8 file names extension
1371                filename = filename.decode('utf-8')
1372            else:
1373                # Historical ZIP filename encoding
1374                filename = filename.decode('cp437')
1375            # Create ZipInfo instance to store file information
1376            x = ZipInfo(filename)
1377            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1378            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1379            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1380            (x.create_version, x.create_system, x.extract_version, x.reserved,
1381             x.flag_bits, x.compress_type, t, d,
1382             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1383            if x.extract_version > MAX_EXTRACT_VERSION:
1384                raise NotImplementedError("zip file version %.1f" %
1385                                          (x.extract_version / 10))
1386            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1387            # Convert date/time code to (year, month, day, hour, min, sec)
1388            x._raw_time = t
1389            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1390                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1391
1392            x._decodeExtra()
1393            x.header_offset = x.header_offset + concat
1394            self.filelist.append(x)
1395            self.NameToInfo[x.filename] = x
1396
1397            # update total bytes read from central directory
1398            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1399                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1400                     + centdir[_CD_COMMENT_LENGTH])
1401
1402            if self.debug > 2:
1403                print("total", total)
1404
1405
1406    def namelist(self):
1407        """Return a list of file names in the archive."""
1408        return [data.filename for data in self.filelist]
1409
1410    def infolist(self):
1411        """Return a list of class ZipInfo instances for files in the
1412        archive."""
1413        return self.filelist
1414
1415    def printdir(self, file=None):
1416        """Print a table of contents for the zip file."""
1417        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1418              file=file)
1419        for zinfo in self.filelist:
1420            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1421            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1422                  file=file)
1423
1424    def testzip(self):
1425        """Read all the files and check the CRC."""
1426        chunk_size = 2 ** 20
1427        for zinfo in self.filelist:
1428            try:
1429                # Read by chunks, to avoid an OverflowError or a
1430                # MemoryError with very large embedded files.
1431                with self.open(zinfo.filename, "r") as f:
1432                    while f.read(chunk_size):     # Check CRC-32
1433                        pass
1434            except BadZipFile:
1435                return zinfo.filename
1436
1437    def getinfo(self, name):
1438        """Return the instance of ZipInfo given 'name'."""
1439        info = self.NameToInfo.get(name)
1440        if info is None:
1441            raise KeyError(
1442                'There is no item named %r in the archive' % name)
1443
1444        return info
1445
1446    def setpassword(self, pwd):
1447        """Set default password for encrypted files."""
1448        if pwd and not isinstance(pwd, bytes):
1449            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1450        if pwd:
1451            self.pwd = pwd
1452        else:
1453            self.pwd = None
1454
1455    @property
1456    def comment(self):
1457        """The comment text associated with the ZIP file."""
1458        return self._comment
1459
1460    @comment.setter
1461    def comment(self, comment):
1462        if not isinstance(comment, bytes):
1463            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1464        # check for valid comment length
1465        if len(comment) > ZIP_MAX_COMMENT:
1466            import warnings
1467            warnings.warn('Archive comment is too long; truncating to %d bytes'
1468                          % ZIP_MAX_COMMENT, stacklevel=2)
1469            comment = comment[:ZIP_MAX_COMMENT]
1470        self._comment = comment
1471        self._didModify = True
1472
1473    def read(self, name, pwd=None):
1474        """Return file bytes for name."""
1475        with self.open(name, "r", pwd) as fp:
1476            return fp.read()
1477
1478    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1479        """Return file-like object for 'name'.
1480
1481        name is a string for the file name within the ZIP file, or a ZipInfo
1482        object.
1483
1484        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1485        write to a file newly added to the archive.
1486
1487        pwd is the password to decrypt files (only used for reading).
1488
1489        When writing, if the file size is not known in advance but may exceed
1490        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1491        files.  If the size is known in advance, it is best to pass a ZipInfo
1492        instance for name, with zinfo.file_size set.
1493        """
1494        if mode not in {"r", "w"}:
1495            raise ValueError('open() requires mode "r" or "w"')
1496        if pwd and not isinstance(pwd, bytes):
1497            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1498        if pwd and (mode == "w"):
1499            raise ValueError("pwd is only supported for reading files")
1500        if not self.fp:
1501            raise ValueError(
1502                "Attempt to use ZIP archive that was already closed")
1503
1504        # Make sure we have an info object
1505        if isinstance(name, ZipInfo):
1506            # 'name' is already an info object
1507            zinfo = name
1508        elif mode == 'w':
1509            zinfo = ZipInfo(name)
1510            zinfo.compress_type = self.compression
1511            zinfo._compresslevel = self.compresslevel
1512        else:
1513            # Get info object for name
1514            zinfo = self.getinfo(name)
1515
1516        if mode == 'w':
1517            return self._open_to_write(zinfo, force_zip64=force_zip64)
1518
1519        if self._writing:
1520            raise ValueError("Can't read from the ZIP file while there "
1521                    "is an open writing handle on it. "
1522                    "Close the writing handle before trying to read.")
1523
1524        # Open for reading:
1525        self._fileRefCnt += 1
1526        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1527                               self._fpclose, self._lock, lambda: self._writing)
1528        try:
1529            # Skip the file header:
1530            fheader = zef_file.read(sizeFileHeader)
1531            if len(fheader) != sizeFileHeader:
1532                raise BadZipFile("Truncated file header")
1533            fheader = struct.unpack(structFileHeader, fheader)
1534            if fheader[_FH_SIGNATURE] != stringFileHeader:
1535                raise BadZipFile("Bad magic number for file header")
1536
1537            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1538            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1539                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1540
1541            if zinfo.flag_bits & 0x20:
1542                # Zip 2.7: compressed patched data
1543                raise NotImplementedError("compressed patched data (flag bit 5)")
1544
1545            if zinfo.flag_bits & 0x40:
1546                # strong encryption
1547                raise NotImplementedError("strong encryption (flag bit 6)")
1548
1549            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
1550                # UTF-8 filename
1551                fname_str = fname.decode("utf-8")
1552            else:
1553                fname_str = fname.decode("cp437")
1554
1555            if fname_str != zinfo.orig_filename:
1556                raise BadZipFile(
1557                    'File name in directory %r and header %r differ.'
1558                    % (zinfo.orig_filename, fname))
1559
1560            # check for encrypted flag & handle password
1561            is_encrypted = zinfo.flag_bits & 0x1
1562            if is_encrypted:
1563                if not pwd:
1564                    pwd = self.pwd
1565                if not pwd:
1566                    raise RuntimeError("File %r is encrypted, password "
1567                                       "required for extraction" % name)
1568            else:
1569                pwd = None
1570
1571            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1572        except:
1573            zef_file.close()
1574            raise
1575
1576    def _open_to_write(self, zinfo, force_zip64=False):
1577        if force_zip64 and not self._allowZip64:
1578            raise ValueError(
1579                "force_zip64 is True, but allowZip64 was False when opening "
1580                "the ZIP file."
1581            )
1582        if self._writing:
1583            raise ValueError("Can't write to the ZIP file while there is "
1584                             "another write handle open on it. "
1585                             "Close the first handle before opening another.")
1586
1587        # Sizes and CRC are overwritten with correct data after processing the file
1588        if not hasattr(zinfo, 'file_size'):
1589            zinfo.file_size = 0
1590        zinfo.compress_size = 0
1591        zinfo.CRC = 0
1592
1593        zinfo.flag_bits = 0x00
1594        if zinfo.compress_type == ZIP_LZMA:
1595            # Compressed data includes an end-of-stream (EOS) marker
1596            zinfo.flag_bits |= 0x02
1597        if not self._seekable:
1598            zinfo.flag_bits |= 0x08
1599
1600        if not zinfo.external_attr:
1601            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1602
1603        # Compressed size can be larger than uncompressed size
1604        zip64 = self._allowZip64 and \
1605                (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1606
1607        if self._seekable:
1608            self.fp.seek(self.start_dir)
1609        zinfo.header_offset = self.fp.tell()
1610
1611        self._writecheck(zinfo)
1612        self._didModify = True
1613
1614        self.fp.write(zinfo.FileHeader(zip64))
1615
1616        self._writing = True
1617        return _ZipWriteFile(self, zinfo, zip64)
1618
1619    def extract(self, member, path=None, pwd=None):
1620        """Extract a member from the archive to the current working directory,
1621           using its full name. Its file information is extracted as accurately
1622           as possible. `member' may be a filename or a ZipInfo object. You can
1623           specify a different directory using `path'.
1624        """
1625        if path is None:
1626            path = os.getcwd()
1627        else:
1628            path = os.fspath(path)
1629
1630        return self._extract_member(member, path, pwd)
1631
1632    def extractall(self, path=None, members=None, pwd=None):
1633        """Extract all members from the archive to the current working
1634           directory. `path' specifies a different directory to extract to.
1635           `members' is optional and must be a subset of the list returned
1636           by namelist().
1637        """
1638        if members is None:
1639            members = self.namelist()
1640
1641        if path is None:
1642            path = os.getcwd()
1643        else:
1644            path = os.fspath(path)
1645
1646        for zipinfo in members:
1647            self._extract_member(zipinfo, path, pwd)
1648
1649    @classmethod
1650    def _sanitize_windows_name(cls, arcname, pathsep):
1651        """Replace bad characters and remove trailing dots from parts."""
1652        table = cls._windows_illegal_name_trans_table
1653        if not table:
1654            illegal = ':<>|"?*'
1655            table = str.maketrans(illegal, '_' * len(illegal))
1656            cls._windows_illegal_name_trans_table = table
1657        arcname = arcname.translate(table)
1658        # remove trailing dots
1659        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1660        # rejoin, removing empty parts.
1661        arcname = pathsep.join(x for x in arcname if x)
1662        return arcname
1663
1664    def _extract_member(self, member, targetpath, pwd):
1665        """Extract the ZipInfo object 'member' to a physical
1666           file on the path targetpath.
1667        """
1668        if not isinstance(member, ZipInfo):
1669            member = self.getinfo(member)
1670
1671        # build the destination pathname, replacing
1672        # forward slashes to platform specific separators.
1673        arcname = member.filename.replace('/', os.path.sep)
1674
1675        if os.path.altsep:
1676            arcname = arcname.replace(os.path.altsep, os.path.sep)
1677        # interpret absolute pathname as relative, remove drive letter or
1678        # UNC path, redundant separators, "." and ".." components.
1679        arcname = os.path.splitdrive(arcname)[1]
1680        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1681        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1682                                   if x not in invalid_path_parts)
1683        if os.path.sep == '\\':
1684            # filter illegal characters on Windows
1685            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1686
1687        targetpath = os.path.join(targetpath, arcname)
1688        targetpath = os.path.normpath(targetpath)
1689
1690        # Create all upper directories if necessary.
1691        upperdirs = os.path.dirname(targetpath)
1692        if upperdirs and not os.path.exists(upperdirs):
1693            os.makedirs(upperdirs)
1694
1695        if member.is_dir():
1696            if not os.path.isdir(targetpath):
1697                os.mkdir(targetpath)
1698            return targetpath
1699
1700        with self.open(member, pwd=pwd) as source, \
1701             open(targetpath, "wb") as target:
1702            shutil.copyfileobj(source, target)
1703
1704        return targetpath
1705
1706    def _writecheck(self, zinfo):
1707        """Check for errors before writing a file to the archive."""
1708        if zinfo.filename in self.NameToInfo:
1709            import warnings
1710            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1711        if self.mode not in ('w', 'x', 'a'):
1712            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1713        if not self.fp:
1714            raise ValueError(
1715                "Attempt to write ZIP archive that was already closed")
1716        _check_compression(zinfo.compress_type)
1717        if not self._allowZip64:
1718            requires_zip64 = None
1719            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1720                requires_zip64 = "Files count"
1721            elif zinfo.file_size > ZIP64_LIMIT:
1722                requires_zip64 = "Filesize"
1723            elif zinfo.header_offset > ZIP64_LIMIT:
1724                requires_zip64 = "Zipfile size"
1725            if requires_zip64:
1726                raise LargeZipFile(requires_zip64 +
1727                                   " would require ZIP64 extensions")
1728
1729    def write(self, filename, arcname=None,
1730              compress_type=None, compresslevel=None):
1731        """Put the bytes from filename into the archive under the name
1732        arcname."""
1733        if not self.fp:
1734            raise ValueError(
1735                "Attempt to write to ZIP archive that was already closed")
1736        if self._writing:
1737            raise ValueError(
1738                "Can't write to ZIP archive while an open writing handle exists"
1739            )
1740
1741        zinfo = ZipInfo.from_file(filename, arcname,
1742                                  strict_timestamps=self._strict_timestamps)
1743
1744        if zinfo.is_dir():
1745            zinfo.compress_size = 0
1746            zinfo.CRC = 0
1747        else:
1748            if compress_type is not None:
1749                zinfo.compress_type = compress_type
1750            else:
1751                zinfo.compress_type = self.compression
1752
1753            if compresslevel is not None:
1754                zinfo._compresslevel = compresslevel
1755            else:
1756                zinfo._compresslevel = self.compresslevel
1757
1758        if zinfo.is_dir():
1759            with self._lock:
1760                if self._seekable:
1761                    self.fp.seek(self.start_dir)
1762                zinfo.header_offset = self.fp.tell()  # Start of header bytes
1763                if zinfo.compress_type == ZIP_LZMA:
1764                # Compressed data includes an end-of-stream (EOS) marker
1765                    zinfo.flag_bits |= 0x02
1766
1767                self._writecheck(zinfo)
1768                self._didModify = True
1769
1770                self.filelist.append(zinfo)
1771                self.NameToInfo[zinfo.filename] = zinfo
1772                self.fp.write(zinfo.FileHeader(False))
1773                self.start_dir = self.fp.tell()
1774        else:
1775            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1776                shutil.copyfileobj(src, dest, 1024*8)
1777
1778    def writestr(self, zinfo_or_arcname, data,
1779                 compress_type=None, compresslevel=None):
1780        """Write a file into the archive.  The contents is 'data', which
1781        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1782        it is encoded as UTF-8 first.
1783        'zinfo_or_arcname' is either a ZipInfo instance or
1784        the name of the file in the archive."""
1785        if isinstance(data, str):
1786            data = data.encode("utf-8")
1787        if not isinstance(zinfo_or_arcname, ZipInfo):
1788            zinfo = ZipInfo(filename=zinfo_or_arcname,
1789                            date_time=time.localtime(time.time())[:6])
1790            zinfo.compress_type = self.compression
1791            zinfo._compresslevel = self.compresslevel
1792            if zinfo.filename[-1] == '/':
1793                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1794                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1795            else:
1796                zinfo.external_attr = 0o600 << 16     # ?rw-------
1797        else:
1798            zinfo = zinfo_or_arcname
1799
1800        if not self.fp:
1801            raise ValueError(
1802                "Attempt to write to ZIP archive that was already closed")
1803        if self._writing:
1804            raise ValueError(
1805                "Can't write to ZIP archive while an open writing handle exists."
1806            )
1807
1808        if compress_type is not None:
1809            zinfo.compress_type = compress_type
1810
1811        if compresslevel is not None:
1812            zinfo._compresslevel = compresslevel
1813
1814        zinfo.file_size = len(data)            # Uncompressed size
1815        with self._lock:
1816            with self.open(zinfo, mode='w') as dest:
1817                dest.write(data)
1818
1819    def __del__(self):
1820        """Call the "close()" method in case the user forgot."""
1821        self.close()
1822
1823    def close(self):
1824        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1825        records."""
1826        if self.fp is None:
1827            return
1828
1829        if self._writing:
1830            raise ValueError("Can't close the ZIP file while there is "
1831                             "an open writing handle on it. "
1832                             "Close the writing handle before closing the zip.")
1833
1834        try:
1835            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1836                with self._lock:
1837                    if self._seekable:
1838                        self.fp.seek(self.start_dir)
1839                    self._write_end_record()
1840        finally:
1841            fp = self.fp
1842            self.fp = None
1843            self._fpclose(fp)
1844
1845    def _write_end_record(self):
1846        for zinfo in self.filelist:         # write central directory
1847            dt = zinfo.date_time
1848            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1849            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1850            extra = []
1851            if zinfo.file_size > ZIP64_LIMIT \
1852               or zinfo.compress_size > ZIP64_LIMIT:
1853                extra.append(zinfo.file_size)
1854                extra.append(zinfo.compress_size)
1855                file_size = 0xffffffff
1856                compress_size = 0xffffffff
1857            else:
1858                file_size = zinfo.file_size
1859                compress_size = zinfo.compress_size
1860
1861            if zinfo.header_offset > ZIP64_LIMIT:
1862                extra.append(zinfo.header_offset)
1863                header_offset = 0xffffffff
1864            else:
1865                header_offset = zinfo.header_offset
1866
1867            extra_data = zinfo.extra
1868            min_version = 0
1869            if extra:
1870                # Append a ZIP64 field to the extra's
1871                extra_data = _strip_extra(extra_data, (1,))
1872                extra_data = struct.pack(
1873                    '<HH' + 'Q'*len(extra),
1874                    1, 8*len(extra), *extra) + extra_data
1875
1876                min_version = ZIP64_VERSION
1877
1878            if zinfo.compress_type == ZIP_BZIP2:
1879                min_version = max(BZIP2_VERSION, min_version)
1880            elif zinfo.compress_type == ZIP_LZMA:
1881                min_version = max(LZMA_VERSION, min_version)
1882
1883            extract_version = max(min_version, zinfo.extract_version)
1884            create_version = max(min_version, zinfo.create_version)
1885            try:
1886                filename, flag_bits = zinfo._encodeFilenameFlags()
1887                centdir = struct.pack(structCentralDir,
1888                                      stringCentralDir, create_version,
1889                                      zinfo.create_system, extract_version, zinfo.reserved,
1890                                      flag_bits, zinfo.compress_type, dostime, dosdate,
1891                                      zinfo.CRC, compress_size, file_size,
1892                                      len(filename), len(extra_data), len(zinfo.comment),
1893                                      0, zinfo.internal_attr, zinfo.external_attr,
1894                                      header_offset)
1895            except DeprecationWarning:
1896                print((structCentralDir, stringCentralDir, create_version,
1897                       zinfo.create_system, extract_version, zinfo.reserved,
1898                       zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1899                       zinfo.CRC, compress_size, file_size,
1900                       len(zinfo.filename), len(extra_data), len(zinfo.comment),
1901                       0, zinfo.internal_attr, zinfo.external_attr,
1902                       header_offset), file=sys.stderr)
1903                raise
1904            self.fp.write(centdir)
1905            self.fp.write(filename)
1906            self.fp.write(extra_data)
1907            self.fp.write(zinfo.comment)
1908
1909        pos2 = self.fp.tell()
1910        # Write end-of-zip-archive record
1911        centDirCount = len(self.filelist)
1912        centDirSize = pos2 - self.start_dir
1913        centDirOffset = self.start_dir
1914        requires_zip64 = None
1915        if centDirCount > ZIP_FILECOUNT_LIMIT:
1916            requires_zip64 = "Files count"
1917        elif centDirOffset > ZIP64_LIMIT:
1918            requires_zip64 = "Central directory offset"
1919        elif centDirSize > ZIP64_LIMIT:
1920            requires_zip64 = "Central directory size"
1921        if requires_zip64:
1922            # Need to write the ZIP64 end-of-archive records
1923            if not self._allowZip64:
1924                raise LargeZipFile(requires_zip64 +
1925                                   " would require ZIP64 extensions")
1926            zip64endrec = struct.pack(
1927                structEndArchive64, stringEndArchive64,
1928                44, 45, 45, 0, 0, centDirCount, centDirCount,
1929                centDirSize, centDirOffset)
1930            self.fp.write(zip64endrec)
1931
1932            zip64locrec = struct.pack(
1933                structEndArchive64Locator,
1934                stringEndArchive64Locator, 0, pos2, 1)
1935            self.fp.write(zip64locrec)
1936            centDirCount = min(centDirCount, 0xFFFF)
1937            centDirSize = min(centDirSize, 0xFFFFFFFF)
1938            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1939
1940        endrec = struct.pack(structEndArchive, stringEndArchive,
1941                             0, 0, centDirCount, centDirCount,
1942                             centDirSize, centDirOffset, len(self._comment))
1943        self.fp.write(endrec)
1944        self.fp.write(self._comment)
1945        if self.mode == "a":
1946            self.fp.truncate()
1947        self.fp.flush()
1948
1949    def _fpclose(self, fp):
1950        assert self._fileRefCnt > 0
1951        self._fileRefCnt -= 1
1952        if not self._fileRefCnt and not self._filePassed:
1953            fp.close()
1954
1955
1956class PyZipFile(ZipFile):
1957    """Class to create ZIP archives with Python library files and packages."""
1958
1959    def __init__(self, file, mode="r", compression=ZIP_STORED,
1960                 allowZip64=True, optimize=-1):
1961        ZipFile.__init__(self, file, mode=mode, compression=compression,
1962                         allowZip64=allowZip64)
1963        self._optimize = optimize
1964
1965    def writepy(self, pathname, basename="", filterfunc=None):
1966        """Add all files from "pathname" to the ZIP archive.
1967
1968        If pathname is a package directory, search the directory and
1969        all package subdirectories recursively for all *.py and enter
1970        the modules into the archive.  If pathname is a plain
1971        directory, listdir *.py and enter all modules.  Else, pathname
1972        must be a Python *.py file and the module will be put into the
1973        archive.  Added modules are always module.pyc.
1974        This method will compile the module.py into module.pyc if
1975        necessary.
1976        If filterfunc(pathname) is given, it is called with every argument.
1977        When it is False, the file or directory is skipped.
1978        """
1979        pathname = os.fspath(pathname)
1980        if filterfunc and not filterfunc(pathname):
1981            if self.debug:
1982                label = 'path' if os.path.isdir(pathname) else 'file'
1983                print('%s %r skipped by filterfunc' % (label, pathname))
1984            return
1985        dir, name = os.path.split(pathname)
1986        if os.path.isdir(pathname):
1987            initname = os.path.join(pathname, "__init__.py")
1988            if os.path.isfile(initname):
1989                # This is a package directory, add it
1990                if basename:
1991                    basename = "%s/%s" % (basename, name)
1992                else:
1993                    basename = name
1994                if self.debug:
1995                    print("Adding package in", pathname, "as", basename)
1996                fname, arcname = self._get_codename(initname[0:-3], basename)
1997                if self.debug:
1998                    print("Adding", arcname)
1999                self.write(fname, arcname)
2000                dirlist = sorted(os.listdir(pathname))
2001                dirlist.remove("__init__.py")
2002                # Add all *.py files and package subdirectories
2003                for filename in dirlist:
2004                    path = os.path.join(pathname, filename)
2005                    root, ext = os.path.splitext(filename)
2006                    if os.path.isdir(path):
2007                        if os.path.isfile(os.path.join(path, "__init__.py")):
2008                            # This is a package directory, add it
2009                            self.writepy(path, basename,
2010                                         filterfunc=filterfunc)  # Recursive call
2011                    elif ext == ".py":
2012                        if filterfunc and not filterfunc(path):
2013                            if self.debug:
2014                                print('file %r skipped by filterfunc' % path)
2015                            continue
2016                        fname, arcname = self._get_codename(path[0:-3],
2017                                                            basename)
2018                        if self.debug:
2019                            print("Adding", arcname)
2020                        self.write(fname, arcname)
2021            else:
2022                # This is NOT a package directory, add its files at top level
2023                if self.debug:
2024                    print("Adding files from directory", pathname)
2025                for filename in sorted(os.listdir(pathname)):
2026                    path = os.path.join(pathname, filename)
2027                    root, ext = os.path.splitext(filename)
2028                    if ext == ".py":
2029                        if filterfunc and not filterfunc(path):
2030                            if self.debug:
2031                                print('file %r skipped by filterfunc' % path)
2032                            continue
2033                        fname, arcname = self._get_codename(path[0:-3],
2034                                                            basename)
2035                        if self.debug:
2036                            print("Adding", arcname)
2037                        self.write(fname, arcname)
2038        else:
2039            if pathname[-3:] != ".py":
2040                raise RuntimeError(
2041                    'Files added with writepy() must end with ".py"')
2042            fname, arcname = self._get_codename(pathname[0:-3], basename)
2043            if self.debug:
2044                print("Adding file", arcname)
2045            self.write(fname, arcname)
2046
2047    def _get_codename(self, pathname, basename):
2048        """Return (filename, archivename) for the path.
2049
2050        Given a module name path, return the correct file path and
2051        archive name, compiling if necessary.  For example, given
2052        /python/lib/string, return (/python/lib/string.pyc, string).
2053        """
2054        def _compile(file, optimize=-1):
2055            import py_compile
2056            if self.debug:
2057                print("Compiling", file)
2058            try:
2059                py_compile.compile(file, doraise=True, optimize=optimize)
2060            except py_compile.PyCompileError as err:
2061                print(err.msg)
2062                return False
2063            return True
2064
2065        file_py  = pathname + ".py"
2066        file_pyc = pathname + ".pyc"
2067        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2068        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2069        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2070        if self._optimize == -1:
2071            # legacy mode: use whatever file is present
2072            if (os.path.isfile(file_pyc) and
2073                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2074                # Use .pyc file.
2075                arcname = fname = file_pyc
2076            elif (os.path.isfile(pycache_opt0) and
2077                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2078                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2079                # file name in the archive.
2080                fname = pycache_opt0
2081                arcname = file_pyc
2082            elif (os.path.isfile(pycache_opt1) and
2083                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2084                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2085                # file name in the archive.
2086                fname = pycache_opt1
2087                arcname = file_pyc
2088            elif (os.path.isfile(pycache_opt2) and
2089                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2090                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2091                # file name in the archive.
2092                fname = pycache_opt2
2093                arcname = file_pyc
2094            else:
2095                # Compile py into PEP 3147 pyc file.
2096                if _compile(file_py):
2097                    if sys.flags.optimize == 0:
2098                        fname = pycache_opt0
2099                    elif sys.flags.optimize == 1:
2100                        fname = pycache_opt1
2101                    else:
2102                        fname = pycache_opt2
2103                    arcname = file_pyc
2104                else:
2105                    fname = arcname = file_py
2106        else:
2107            # new mode: use given optimization level
2108            if self._optimize == 0:
2109                fname = pycache_opt0
2110                arcname = file_pyc
2111            else:
2112                arcname = file_pyc
2113                if self._optimize == 1:
2114                    fname = pycache_opt1
2115                elif self._optimize == 2:
2116                    fname = pycache_opt2
2117                else:
2118                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2119                    raise ValueError(msg)
2120            if not (os.path.isfile(fname) and
2121                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2122                if not _compile(file_py, optimize=self._optimize):
2123                    fname = arcname = file_py
2124        archivename = os.path.split(arcname)[1]
2125        if basename:
2126            archivename = "%s/%s" % (basename, archivename)
2127        return (fname, archivename)
2128
2129
2130def _parents(path):
2131    """
2132    Given a path with elements separated by
2133    posixpath.sep, generate all parents of that path.
2134
2135    >>> list(_parents('b/d'))
2136    ['b']
2137    >>> list(_parents('/b/d/'))
2138    ['/b']
2139    >>> list(_parents('b/d/f/'))
2140    ['b/d', 'b']
2141    >>> list(_parents('b'))
2142    []
2143    >>> list(_parents(''))
2144    []
2145    """
2146    return itertools.islice(_ancestry(path), 1, None)
2147
2148
2149def _ancestry(path):
2150    """
2151    Given a path with elements separated by
2152    posixpath.sep, generate all elements of that path
2153
2154    >>> list(_ancestry('b/d'))
2155    ['b/d', 'b']
2156    >>> list(_ancestry('/b/d/'))
2157    ['/b/d', '/b']
2158    >>> list(_ancestry('b/d/f/'))
2159    ['b/d/f', 'b/d', 'b']
2160    >>> list(_ancestry('b'))
2161    ['b']
2162    >>> list(_ancestry(''))
2163    []
2164    """
2165    path = path.rstrip(posixpath.sep)
2166    while path and path != posixpath.sep:
2167        yield path
2168        path, tail = posixpath.split(path)
2169
2170
2171_dedupe = dict.fromkeys
2172"""Deduplicate an iterable in original order"""
2173
2174
2175def _difference(minuend, subtrahend):
2176    """
2177    Return items in minuend not in subtrahend, retaining order
2178    with O(1) lookup.
2179    """
2180    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2181
2182
2183class CompleteDirs(ZipFile):
2184    """
2185    A ZipFile subclass that ensures that implied directories
2186    are always included in the namelist.
2187    """
2188
2189    @staticmethod
2190    def _implied_dirs(names):
2191        parents = itertools.chain.from_iterable(map(_parents, names))
2192        as_dirs = (p + posixpath.sep for p in parents)
2193        return _dedupe(_difference(as_dirs, names))
2194
2195    def namelist(self):
2196        names = super(CompleteDirs, self).namelist()
2197        return names + list(self._implied_dirs(names))
2198
2199    def _name_set(self):
2200        return set(self.namelist())
2201
2202    def resolve_dir(self, name):
2203        """
2204        If the name represents a directory, return that name
2205        as a directory (with the trailing slash).
2206        """
2207        names = self._name_set()
2208        dirname = name + '/'
2209        dir_match = name not in names and dirname in names
2210        return dirname if dir_match else name
2211
2212    @classmethod
2213    def make(cls, source):
2214        """
2215        Given a source (filename or zipfile), return an
2216        appropriate CompleteDirs subclass.
2217        """
2218        if isinstance(source, CompleteDirs):
2219            return source
2220
2221        if not isinstance(source, ZipFile):
2222            return cls(source)
2223
2224        # Only allow for FastPath when supplied zipfile is read-only
2225        if 'r' not in source.mode:
2226            cls = CompleteDirs
2227
2228        res = cls.__new__(cls)
2229        vars(res).update(vars(source))
2230        return res
2231
2232
2233class FastLookup(CompleteDirs):
2234    """
2235    ZipFile subclass to ensure implicit
2236    dirs exist and are resolved rapidly.
2237    """
2238    def namelist(self):
2239        with contextlib.suppress(AttributeError):
2240            return self.__names
2241        self.__names = super(FastLookup, self).namelist()
2242        return self.__names
2243
2244    def _name_set(self):
2245        with contextlib.suppress(AttributeError):
2246            return self.__lookup
2247        self.__lookup = super(FastLookup, self)._name_set()
2248        return self.__lookup
2249
2250
2251class Path:
2252    """
2253    A pathlib-compatible interface for zip files.
2254
2255    Consider a zip file with this structure::
2256
2257        .
2258        ├── a.txt
2259        └── b
2260            ├── c.txt
2261            └── d
2262                └── e.txt
2263
2264    >>> data = io.BytesIO()
2265    >>> zf = ZipFile(data, 'w')
2266    >>> zf.writestr('a.txt', 'content of a')
2267    >>> zf.writestr('b/c.txt', 'content of c')
2268    >>> zf.writestr('b/d/e.txt', 'content of e')
2269    >>> zf.filename = 'abcde.zip'
2270
2271    Path accepts the zipfile object itself or a filename
2272
2273    >>> root = Path(zf)
2274
2275    From there, several path operations are available.
2276
2277    Directory iteration (including the zip file itself):
2278
2279    >>> a, b = root.iterdir()
2280    >>> a
2281    Path('abcde.zip', 'a.txt')
2282    >>> b
2283    Path('abcde.zip', 'b/')
2284
2285    name property:
2286
2287    >>> b.name
2288    'b'
2289
2290    join with divide operator:
2291
2292    >>> c = b / 'c.txt'
2293    >>> c
2294    Path('abcde.zip', 'b/c.txt')
2295    >>> c.name
2296    'c.txt'
2297
2298    Read text:
2299
2300    >>> c.read_text()
2301    'content of c'
2302
2303    existence:
2304
2305    >>> c.exists()
2306    True
2307    >>> (b / 'missing.txt').exists()
2308    False
2309
2310    Coercion to string:
2311
2312    >>> str(c)
2313    'abcde.zip/b/c.txt'
2314    """
2315
2316    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2317
2318    def __init__(self, root, at=""):
2319        self.root = FastLookup.make(root)
2320        self.at = at
2321
2322    @property
2323    def open(self):
2324        return functools.partial(self.root.open, self.at)
2325
2326    @property
2327    def name(self):
2328        return posixpath.basename(self.at.rstrip("/"))
2329
2330    def read_text(self, *args, **kwargs):
2331        with self.open() as strm:
2332            return io.TextIOWrapper(strm, *args, **kwargs).read()
2333
2334    def read_bytes(self):
2335        with self.open() as strm:
2336            return strm.read()
2337
2338    def _is_child(self, path):
2339        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2340
2341    def _next(self, at):
2342        return Path(self.root, at)
2343
2344    def is_dir(self):
2345        return not self.at or self.at.endswith("/")
2346
2347    def is_file(self):
2348        return not self.is_dir()
2349
2350    def exists(self):
2351        return self.at in self.root._name_set()
2352
2353    def iterdir(self):
2354        if not self.is_dir():
2355            raise ValueError("Can't listdir a file")
2356        subs = map(self._next, self.root.namelist())
2357        return filter(self._is_child, subs)
2358
2359    def __str__(self):
2360        return posixpath.join(self.root.filename, self.at)
2361
2362    def __repr__(self):
2363        return self.__repr.format(self=self)
2364
2365    def joinpath(self, add):
2366        next = posixpath.join(self.at, add)
2367        return self._next(self.root.resolve_dir(next))
2368
2369    __truediv__ = joinpath
2370
2371    @property
2372    def parent(self):
2373        parent_at = posixpath.dirname(self.at.rstrip('/'))
2374        if parent_at:
2375            parent_at += '/'
2376        return self._next(parent_at)
2377
2378
2379def main(args=None):
2380    import argparse
2381
2382    description = 'A simple command-line interface for zipfile module.'
2383    parser = argparse.ArgumentParser(description=description)
2384    group = parser.add_mutually_exclusive_group(required=True)
2385    group.add_argument('-l', '--list', metavar='<zipfile>',
2386                       help='Show listing of a zipfile')
2387    group.add_argument('-e', '--extract', nargs=2,
2388                       metavar=('<zipfile>', '<output_dir>'),
2389                       help='Extract zipfile into target dir')
2390    group.add_argument('-c', '--create', nargs='+',
2391                       metavar=('<name>', '<file>'),
2392                       help='Create zipfile from sources')
2393    group.add_argument('-t', '--test', metavar='<zipfile>',
2394                       help='Test if a zipfile is valid')
2395    args = parser.parse_args(args)
2396
2397    if args.test is not None:
2398        src = args.test
2399        with ZipFile(src, 'r') as zf:
2400            badfile = zf.testzip()
2401        if badfile:
2402            print("The following enclosed file is corrupted: {!r}".format(badfile))
2403        print("Done testing")
2404
2405    elif args.list is not None:
2406        src = args.list
2407        with ZipFile(src, 'r') as zf:
2408            zf.printdir()
2409
2410    elif args.extract is not None:
2411        src, curdir = args.extract
2412        with ZipFile(src, 'r') as zf:
2413            zf.extractall(curdir)
2414
2415    elif args.create is not None:
2416        zip_name = args.create.pop(0)
2417        files = args.create
2418
2419        def addToZip(zf, path, zippath):
2420            if os.path.isfile(path):
2421                zf.write(path, zippath, ZIP_DEFLATED)
2422            elif os.path.isdir(path):
2423                if zippath:
2424                    zf.write(path, zippath)
2425                for nm in sorted(os.listdir(path)):
2426                    addToZip(zf,
2427                             os.path.join(path, nm), os.path.join(zippath, nm))
2428            # else: ignore
2429
2430        with ZipFile(zip_name, 'w') as zf:
2431            for path in files:
2432                zippath = os.path.basename(path)
2433                if not zippath:
2434                    zippath = os.path.basename(os.path.dirname(path))
2435                if zippath in ('', os.curdir, os.pardir):
2436                    zippath = ''
2437                addToZip(zf, path, zippath)
2438
2439
2440if __name__ == "__main__":
2441    main()
2442