1#!/usr/bin/python -u
2#
3# Python Bindings for LZMA
4#
5# Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de
6# 7-Zip Copyright (C) 1999-2010 Igor Pavlov
7# LZMA SDK Copyright (C) 1999-2010 Igor Pavlov
8#
9# This library is free software; you can redistribute it and/or
10# modify it under the terms of the GNU Lesser General Public
11# License as published by the Free Software Foundation; either
12# version 2.1 of the License, or (at your option) any later version.
13#
14# This library is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17# Lesser General Public License for more details.
18#
19# You should have received a copy of the GNU Lesser General Public
20# License along with this library; if not, write to the Free Software
21# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22#
23# $Id$
24#
25"""Read 7zip format archives."""
26
27from array import array
28from binascii import unhexlify
29from datetime import datetime
30import pylzma
31from struct import pack, unpack
32from zlib import crc32
33import zlib
34import bz2
35import os
36import sys
37try:
38    from io import BytesIO
39except ImportError:
40    from cStringIO import StringIO as BytesIO
41try:
42    from functools import reduce
43except ImportError:
44    # reduce is available in functools starting with Python 2.6
45    pass
46
47try:
48    from pytz import UTC
49except ImportError:
50    # pytz is optional, define own "UTC" timestamp
51    # reference implementation from Python documentation
52    from datetime import timedelta, tzinfo
53
54    ZERO = timedelta(0)
55
56    class UTC(tzinfo):
57        """UTC"""
58
59        def utcoffset(self, dt):
60            return ZERO
61
62        def tzname(self, dt):
63            return "UTC"
64
65        def dst(self, dt):
66            return ZERO
67
68        def _call__(self):
69            return self
70
71    UTC = UTC()
72
73try:
74    unicode
75except NameError:
76    # Python 3.x
77    def unicode(s, encoding):
78        return s
79else:
80    def bytes(s, encoding):
81        return s
82
83try:
84    long
85except NameError:
86    # Python 3.x
87    long = int
88
89try:
90    xrange
91except NameError:
92    # Python 3.x
93    xrange = range
94
95IS_PYTHON3 = sys.version_info[0] == 3
96
97NEED_BYTESWAP = sys.byteorder != 'little'
98
99if array('L').itemsize == 4:
100    ARRAY_TYPE_UINT32 = 'L'
101else:
102    assert array('I').itemsize == 4
103    ARRAY_TYPE_UINT32 = 'I'
104
105READ_BLOCKSIZE                   = 16384
106
107MAGIC_7Z                         = unhexlify('377abcaf271c')  # '7z\xbc\xaf\x27\x1c'
108
109PROPERTY_END                     = unhexlify('00')  # '\x00'
110PROPERTY_HEADER                  = unhexlify('01')  # '\x01'
111PROPERTY_ARCHIVE_PROPERTIES      = unhexlify('02')  # '\x02'
112PROPERTY_ADDITIONAL_STREAMS_INFO = unhexlify('03')  # '\x03'
113PROPERTY_MAIN_STREAMS_INFO       = unhexlify('04')  # '\x04'
114PROPERTY_FILES_INFO              = unhexlify('05')  # '\x05'
115PROPERTY_PACK_INFO               = unhexlify('06')  # '\x06'
116PROPERTY_UNPACK_INFO             = unhexlify('07')  # '\x07'
117PROPERTY_SUBSTREAMS_INFO         = unhexlify('08')  # '\x08'
118PROPERTY_SIZE                    = unhexlify('09')  # '\x09'
119PROPERTY_CRC                     = unhexlify('0a')  # '\x0a'
120PROPERTY_FOLDER                  = unhexlify('0b')  # '\x0b'
121PROPERTY_CODERS_UNPACK_SIZE      = unhexlify('0c')  # '\x0c'
122PROPERTY_NUM_UNPACK_STREAM       = unhexlify('0d')  # '\x0d'
123PROPERTY_EMPTY_STREAM            = unhexlify('0e')  # '\x0e'
124PROPERTY_EMPTY_FILE              = unhexlify('0f')  # '\x0f'
125PROPERTY_ANTI                    = unhexlify('10')  # '\x10'
126PROPERTY_NAME                    = unhexlify('11')  # '\x11'
127PROPERTY_CREATION_TIME           = unhexlify('12')  # '\x12'
128PROPERTY_LAST_ACCESS_TIME        = unhexlify('13')  # '\x13'
129PROPERTY_LAST_WRITE_TIME         = unhexlify('14')  # '\x14'
130PROPERTY_ATTRIBUTES              = unhexlify('15')  # '\x15'
131PROPERTY_COMMENT                 = unhexlify('16')  # '\x16'
132PROPERTY_ENCODED_HEADER          = unhexlify('17')  # '\x17'
133PROPERTY_START_POS               = unhexlify('18')  # '\x18'
134PROPERTY_DUMMY                   = unhexlify('19')  # '\x19'
135
136COMPRESSION_METHOD_COPY          = unhexlify('00')  # '\x00'
137COMPRESSION_METHOD_LZMA          = unhexlify('03')  # '\x03'
138COMPRESSION_METHOD_CRYPTO        = unhexlify('06')  # '\x06'
139COMPRESSION_METHOD_MISC          = unhexlify('04')  # '\x04'
140COMPRESSION_METHOD_MISC_ZIP      = unhexlify('0401')  # '\x04\x01'
141COMPRESSION_METHOD_MISC_BZIP     = unhexlify('0402')  # '\x04\x02'
142COMPRESSION_METHOD_7Z_AES256_SHA256 = unhexlify('06f10701')  # '\x06\xf1\x07\x01'
143COMPRESSION_METHOD_LZMA2         = unhexlify('21')  # '\x21'
144
145FILE_ATTRIBUTE_DIRECTORY = 0x10
146FILE_ATTRIBUTE_READONLY = 0x01
147FILE_ATTRIBUTE_HIDDEN = 0x02
148FILE_ATTRIBUTE_SYSTEM = 0x04
149FILE_ATTRIBUTE_ARCHIVE = 0x20
150
151# number of seconds between 1601/01/01 and 1970/01/01 (UTC)
152# used to adjust 7z FILETIME to Python timestamp
153TIMESTAMP_ADJUST                 = -11644473600
154
155def toTimestamp(filetime):
156    """Convert 7z FILETIME to Python timestamp."""
157    # FILETIME is 100-nanosecond intervals since 1601/01/01 (UTC)
158    return (filetime / 10000000.0) + TIMESTAMP_ADJUST
159
160def calculate_crc32(data, value=None, blocksize=1024*1024):
161    """Calculate CRC32 of strings with arbitrary lengths."""
162    length = len(data)
163    pos = blocksize
164    if value:
165        value = crc32(data[:pos], value)
166    else:
167        value = crc32(data[:pos])
168    while pos < length:
169        value = crc32(data[pos:pos+blocksize], value)
170        pos += blocksize
171
172    return value & 0xffffffff
173
174class ArchiveError(Exception):
175    pass
176
177class FormatError(ArchiveError):
178    pass
179
180class EncryptedArchiveError(ArchiveError):
181    pass
182
183class UnsupportedCompressionMethodError(ArchiveError):
184    pass
185
186class DecryptionError(ArchiveError):
187    pass
188
189class NoPasswordGivenError(DecryptionError):
190    pass
191
192class WrongPasswordError(DecryptionError):
193    pass
194
195class DecompressionError(ArchiveError):
196    pass
197
198class ArchiveTimestamp(long):
199    """Windows FILETIME timestamp."""
200
201    def __repr__(self):
202        return '%s(%d)' % (type(self).__name__, self)
203
204    def as_datetime(self):
205        """Convert FILETIME to Python datetime object."""
206        return datetime.fromtimestamp(toTimestamp(self), UTC)
207
208class Base(object):
209    """ base class with support for various basic read/write functions """
210
211    def _readReal64Bit(self, file):
212        res = file.read(8)
213        a, b = unpack('<LL', res)
214        return b << 32 | a, res
215
216    def _read64Bit(self, file):
217        b = ord(file.read(1))
218        mask = 0x80
219        for i in xrange(8):
220            if b & mask == 0:
221                bytes = array('B', file.read(i))
222                bytes.reverse()
223                value = (bytes and reduce(lambda x, y: x << 8 | y, bytes)) or 0
224                highpart = b & (mask - 1)
225                return value + (highpart << (i * 8))
226
227            mask >>= 1
228
229    def _readBoolean(self, file, count, checkall=0):
230        if checkall:
231            alldefined = file.read(1)
232            if alldefined != unhexlify('00'):
233                return [True] * count
234
235        result = []
236        b = 0
237        mask = 0
238        for i in xrange(count):
239            if mask == 0:
240                b = ord(file.read(1))
241                mask = 0x80
242            result.append(b & mask != 0)
243            mask >>= 1
244
245        return result
246
247    def checkcrc(self, crc, data):
248        check = calculate_crc32(data)
249        return crc == check
250
251
252class PackInfo(Base):
253    """ informations about packed streams """
254
255    def __init__(self, file):
256        self.packpos = self._read64Bit(file)
257        self.numstreams = self._read64Bit(file)
258        id = file.read(1)
259        if id == PROPERTY_SIZE:
260            self.packsizes = [self._read64Bit(file) for x in xrange(self.numstreams)]
261            id = file.read(1)
262
263            if id == PROPERTY_CRC:
264                self.crcs = [self._read64Bit(file) for x in xrange(self.numstreams)]
265                id = file.read(1)
266
267        if id != PROPERTY_END:
268            raise FormatError('end id expected but %s found' % repr(id))
269
270class Folder(Base):
271    """ a "Folder" represents a stream of compressed data """
272
273    solid = False
274
275    def __init__(self, file):
276        numcoders = self._read64Bit(file)
277        self.coders = []
278        self.digestdefined = False
279        totalin = 0
280        self.totalout = 0
281        for i in xrange(numcoders):
282            while True:
283                b = ord(file.read(1))
284                methodsize = b & 0xf
285                issimple = b & 0x10 == 0
286                noattributes = b & 0x20 == 0
287                last_alternative = b & 0x80 == 0
288                c = {}
289                c['method'] = file.read(methodsize)
290                if not issimple:
291                    c['numinstreams'] = self._read64Bit(file)
292                    c['numoutstreams'] = self._read64Bit(file)
293                else:
294                    c['numinstreams'] = 1
295                    c['numoutstreams'] = 1
296                totalin += c['numinstreams']
297                self.totalout += c['numoutstreams']
298                if not noattributes:
299                    c['properties'] = file.read(self._read64Bit(file))
300                self.coders.append(c)
301                if last_alternative:
302                    break
303
304        numbindpairs = self.totalout - 1
305        self.bindpairs = []
306        for i in xrange(numbindpairs):
307            self.bindpairs.append((self._read64Bit(file), self._read64Bit(file), ))
308
309        numpackedstreams = totalin - numbindpairs
310        self.packed_indexes = []
311        if numpackedstreams == 1:
312            for i in xrange(totalin):
313                if self.findInBindPair(i) < 0:
314                    self.packed_indexes.append(i)
315        elif numpackedstreams > 1:
316            for i in xrange(numpackedstreams):
317                self.packed_indexes.append(self._read64Bit(file))
318
319    def getUnpackSize(self):
320        if not self.unpacksizes:
321            return 0
322
323        for i in xrange(len(self.unpacksizes)-1, -1, -1):
324            if self.findOutBindPair(i):
325                return self.unpacksizes[i]
326
327        raise TypeError('not found')
328
329    def findInBindPair(self, index):
330        for idx, (a, b) in enumerate(self.bindpairs):
331            if a == index:
332                return idx
333        return -1
334
335    def findOutBindPair(self, index):
336        for idx, (a, b) in enumerate(self.bindpairs):
337            if b == index:
338                return idx
339        return -1
340
341    def isEncrypted(self):
342        return COMPRESSION_METHOD_7Z_AES256_SHA256 in [x['method'] for x in self.coders]
343
344class Digests(Base):
345    """ holds a list of checksums """
346
347    def __init__(self, file, count):
348        self.defined = self._readBoolean(file, count, checkall=1)
349        self.crcs = array(ARRAY_TYPE_UINT32, file.read(4*count))
350        if NEED_BYTESWAP:
351            self.crcs.byteswap()
352
353UnpackDigests = Digests
354
355class UnpackInfo(Base):
356    """ combines multiple folders """
357
358    def __init__(self, file):
359        id = file.read(1)
360        if id != PROPERTY_FOLDER:
361            raise FormatError('folder id expected but %s found' % repr(id))
362        self.numfolders = self._read64Bit(file)
363        self.folders = []
364        external = file.read(1)
365        if external == unhexlify('00'):
366            self.folders = [Folder(file) for x in xrange(self.numfolders)]
367        elif external == unhexlify('01'):
368            self.datastreamidx = self._read64Bit(file)
369        else:
370            raise FormatError('0x00 or 0x01 expected but %s found' % repr(external))
371
372        id = file.read(1)
373        if id != PROPERTY_CODERS_UNPACK_SIZE:
374            raise FormatError('coders unpack size id expected but %s found' % repr(id))
375
376        for folder in self.folders:
377            folder.unpacksizes = [self._read64Bit(file) for x in xrange(folder.totalout)]
378
379        id = file.read(1)
380        if id == PROPERTY_CRC:
381            digests = UnpackDigests(file, self.numfolders)
382            for idx, folder in enumerate(self.folders):
383                folder.digestdefined = digests.defined[idx]
384                folder.crc = digests.crcs[idx]
385
386            id = file.read(1)
387
388        if id != PROPERTY_END:
389            raise FormatError('end id expected but %s found' % repr(id))
390
391class SubstreamsInfo(Base):
392    """ defines the substreams of a folder """
393
394    def __init__(self, file, numfolders, folders):
395        self.digests = []
396        self.digestsdefined = []
397        id = file.read(1)
398        if id == PROPERTY_NUM_UNPACK_STREAM:
399            self.numunpackstreams = [self._read64Bit(file) for x in xrange(numfolders)]
400            id = file.read(1)
401        else:
402            self.numunpackstreams = [1]*numfolders
403
404        if id == PROPERTY_SIZE:
405            self.unpacksizes = []
406            for i in xrange(len(self.numunpackstreams)):
407                sum = 0
408                for j in xrange(1, self.numunpackstreams[i]):
409                    size = self._read64Bit(file)
410                    self.unpacksizes.append(size)
411                    sum += size
412                self.unpacksizes.append(folders[i].getUnpackSize() - sum)
413
414            id = file.read(1)
415
416        numdigests = 0
417        numdigeststotal = 0
418        for i in xrange(numfolders):
419            numsubstreams = self.numunpackstreams[i]
420            if numsubstreams != 1 or not folders[i].digestdefined:
421                numdigests += numsubstreams
422            numdigeststotal += numsubstreams
423
424        if id == PROPERTY_CRC:
425            digests = Digests(file, numdigests)
426            didx = 0
427            for i in xrange(numfolders):
428                folder = folders[i]
429                numsubstreams = self.numunpackstreams[i]
430                if numsubstreams == 1 and folder.digestdefined:
431                    self.digestsdefined.append(True)
432                    self.digests.append(folder.crc)
433                else:
434                    for j in xrange(numsubstreams):
435                        self.digestsdefined.append(digests.defined[didx])
436                        self.digests.append(digests.crcs[didx])
437                        didx += 1
438
439            id = file.read(1)
440
441        if id != PROPERTY_END:
442            raise FormatError('end id expected but %r found' % id)
443
444        if not self.digestsdefined:
445            self.digestsdefined = [False] * numdigeststotal
446            self.digests = [0] * numdigeststotal
447
448class StreamsInfo(Base):
449    """ informations about compressed streams """
450
451    def __init__(self, file):
452        id = file.read(1)
453        if id == PROPERTY_PACK_INFO:
454            self.packinfo = PackInfo(file)
455            id = file.read(1)
456
457        if id == PROPERTY_UNPACK_INFO:
458            self.unpackinfo = UnpackInfo(file)
459            id = file.read(1)
460
461        if id == PROPERTY_SUBSTREAMS_INFO:
462            self.substreamsinfo = SubstreamsInfo(file, self.unpackinfo.numfolders, self.unpackinfo.folders)
463            id = file.read(1)
464
465        if id != PROPERTY_END:
466            raise FormatError('end id expected but %s found' % repr(id))
467
468class FilesInfo(Base):
469    """ holds file properties """
470
471    def _readTimes(self, file, files, name):
472        defined = self._readBoolean(file, len(files), checkall=1)
473
474        # NOTE: the "external" flag is currently ignored, should be 0x00
475        external = file.read(1)
476        for i in xrange(len(files)):
477            if defined[i]:
478                files[i][name] = ArchiveTimestamp(self._readReal64Bit(file)[0])
479            else:
480                files[i][name] = None
481
482    def __init__(self, file):
483        self.numfiles = self._read64Bit(file)
484        self.files = [{'emptystream': False} for x in xrange(self.numfiles)]
485        numemptystreams = 0
486        while True:
487            typ = self._read64Bit(file)
488            if typ > 255:
489                raise FormatError('invalid type, must be below 256, is %d' % typ)
490
491            typ = pack('B', typ)
492            if typ == PROPERTY_END:
493                break
494
495            size = self._read64Bit(file)
496            if typ == PROPERTY_DUMMY:
497                # Added by newer versions of 7z to adjust padding.
498                file.seek(size, os.SEEK_CUR)
499                continue
500
501            buffer = BytesIO(file.read(size))
502            if typ == PROPERTY_EMPTY_STREAM:
503                isempty = self._readBoolean(buffer, self.numfiles)
504                list(map(lambda x, y: x.update({'emptystream': y}), self.files, isempty))
505                for x in isempty:
506                    if x: numemptystreams += 1
507                emptyfiles = [False] * numemptystreams
508                antifiles = [False] * numemptystreams
509            elif typ == PROPERTY_EMPTY_FILE:
510                emptyfiles = self._readBoolean(buffer, numemptystreams)
511            elif typ == PROPERTY_ANTI:
512                antifiles = self._readBoolean(buffer, numemptystreams)
513            elif typ == PROPERTY_NAME:
514                external = buffer.read(1)
515                if external != unhexlify('00'):
516                    self.dataindex = self._read64Bit(buffer)
517                    # XXX: evaluate external
518                    raise NotImplementedError
519
520                for f in self.files:
521                    name = ''
522                    while True:
523                        ch = buffer.read(2)
524                        if ch == unhexlify('0000'):
525                            f['filename'] = name
526                            break
527                        name += ch.decode('utf-16')
528            elif typ == PROPERTY_CREATION_TIME:
529                self._readTimes(buffer, self.files, 'creationtime')
530            elif typ == PROPERTY_LAST_ACCESS_TIME:
531                self._readTimes(buffer, self.files, 'lastaccesstime')
532            elif typ == PROPERTY_LAST_WRITE_TIME:
533                self._readTimes(buffer, self.files, 'lastwritetime')
534            elif typ == PROPERTY_ATTRIBUTES:
535                defined = self._readBoolean(buffer, self.numfiles, checkall=1)
536                external = buffer.read(1)
537                if external != unhexlify('00'):
538                    self.dataindex = self._read64Bit(buffer)
539                    # XXX: evaluate external
540                    raise NotImplementedError
541
542                for idx, f in enumerate(self.files):
543                    if defined[idx]:
544                        f['attributes'] = unpack('<L', buffer.read(4))[0]
545                    else:
546                        f['attributes'] = None
547            else:
548                raise FormatError('invalid type %r' % (typ))
549
550class Header(Base):
551    """ the archive header """
552
553    def __init__(self, file):
554        id = file.read(1)
555        if id == PROPERTY_ARCHIVE_PROPERTIES:
556            self.properties = ArchiveProperties(file)
557            id = file.read(1)
558
559        if id == PROPERTY_ADDITIONAL_STREAMS_INFO:
560            self.additional_streams = StreamsInfo(file)
561            id = file.read(1)
562
563        if id == PROPERTY_MAIN_STREAMS_INFO:
564            self.main_streams = StreamsInfo(file)
565            id = file.read(1)
566
567        if id == PROPERTY_FILES_INFO:
568            self.files = FilesInfo(file)
569            id = file.read(1)
570
571        if id != PROPERTY_END:
572            raise FormatError('end id expected but %s found' % (repr(id)))
573
574class ArchiveFile(Base):
575    """ wrapper around a file in the archive """
576
577    def __init__(self, info, start, src_start, folder, archive, maxsize=None):
578        self.digest = None
579        self._archive = archive
580        self._file = archive._file
581        self._start = start
582        self._src_start = src_start
583        self._folder = folder
584        # maxsize is only valid for solid archives
585        self._maxsize = maxsize
586        for k, v in info.items():
587            setattr(self, k, v)
588        self.size = self.uncompressed = self._uncompressed[-1]
589        if not hasattr(self, 'filename'):
590            # compressed file is stored without a name, generate one
591            try:
592                basefilename = self._file.name
593            except AttributeError:
594                # 7z archive file doesn't have a name
595                self.filename = 'contents'
596            else:
597                self.filename = os.path.splitext(os.path.basename(basefilename))[0]
598        self.reset()
599        self._decoders = {
600            COMPRESSION_METHOD_COPY: '_read_copy',
601            COMPRESSION_METHOD_LZMA: '_read_lzma',
602            COMPRESSION_METHOD_LZMA2: '_read_lzma2',
603            COMPRESSION_METHOD_MISC_ZIP: '_read_zip',
604            COMPRESSION_METHOD_MISC_BZIP: '_read_bzip',
605            COMPRESSION_METHOD_7Z_AES256_SHA256: '_read_7z_aes256_sha256',
606        }
607
608    def _is_encrypted(self):
609        return self._folder.isEncrypted()
610
611    def reset(self):
612        self.pos = 0
613
614    def read(self):
615        if not self.size:
616            return ''
617        elif not self._folder.coders:
618            raise TypeError("file has no coder informations")
619
620        data = None
621        num_coders = len(self._folder.coders)
622        for level, coder in enumerate(self._folder.coders):
623            method = coder['method']
624            decoder = None
625            while method and decoder is None:
626                decoder = self._decoders.get(method, None)
627                method = method[:-1]
628
629            if decoder is None:
630                raise UnsupportedCompressionMethodError(repr(coder['method']))
631
632            data = getattr(self, decoder)(coder, data, level, num_coders)
633
634        return data
635
636    def _read_copy(self, coder, input, level, num_coders):
637        size = self._uncompressed[level]
638        if not input:
639            self._file.seek(self._src_start)
640            input = self._file.read(size)
641        return input[self._start:self._start+size]
642
643    def _read_from_decompressor(self, coder, decompressor, input, level, num_coders, can_partial_decompress=True, with_cache=False):
644        size = self._uncompressed[level]
645        data = ''
646        idx = 0
647        cnt = 0
648        properties = coder.get('properties', None)
649        if properties:
650            decompressor.decompress(properties)
651        total = self.compressed
652        is_last_coder = (level + 1) == num_coders
653        if not input and is_last_coder:
654            remaining = self._start+size
655            out = BytesIO()
656            cache = getattr(self._folder, '_decompress_cache', None)
657            if cache is not None:
658                data, pos, decompressor = cache
659                out.write(data)
660                remaining -= len(data)
661                self._file.seek(pos)
662            else:
663                self._file.seek(self._src_start)
664            checkremaining = is_last_coder and not self._folder.solid and can_partial_decompress
665            while remaining > 0:
666                data = self._file.read(READ_BLOCKSIZE)
667                if checkremaining or (with_cache and len(data) < READ_BLOCKSIZE):
668                    tmp = decompressor.decompress(data, remaining)
669                else:
670                    tmp = decompressor.decompress(data)
671                if not tmp and not data:
672                    raise DecompressionError('end of stream while decompressing')
673                out.write(tmp)
674                remaining -= len(tmp)
675
676            data = out.getvalue()
677            if with_cache and self._folder.solid:
678                # don't decompress start of solid archive for next file
679                # TODO: limit size of cached data
680                self._folder._decompress_cache = (data, self._file.tell(), decompressor)
681        else:
682            if not input:
683                self._file.seek(self._src_start)
684                input = self._file.read(total)
685            if is_last_coder and can_partial_decompress:
686                data = decompressor.decompress(input, self._start+size)
687            else:
688                data = decompressor.decompress(input)
689                if can_partial_decompress and not is_last_coder:
690                    return data
691
692        return data[self._start:self._start+size]
693
694    def _read_lzma(self, coder, input, level, num_coders):
695        size = self._uncompressed[level]
696        is_last_coder = (level + 1) == num_coders
697        if is_last_coder and not self._folder.solid:
698            dec = pylzma.decompressobj(maxlength=self._start+size)
699        else:
700            dec = pylzma.decompressobj()
701        try:
702            return self._read_from_decompressor(coder, dec, input, level, num_coders, with_cache=True)
703        except ValueError:
704            if self._is_encrypted():
705                raise WrongPasswordError('invalid password')
706
707            raise
708
709    def _read_lzma2(self, coder, input, level, num_coders):
710        size = self._uncompressed[level]
711        is_last_coder = (level + 1) == num_coders
712        if is_last_coder and not self._folder.solid:
713            dec = pylzma.decompressobj(maxlength=self._start+size, lzma2=True)
714        else:
715            dec = pylzma.decompressobj(lzma2=True)
716        try:
717            return self._read_from_decompressor(coder, dec, input, level, num_coders, with_cache=True)
718        except ValueError:
719            if self._is_encrypted():
720                raise WrongPasswordError('invalid password')
721
722            raise
723
724    def _read_zip(self, coder, input, level, num_coders):
725        dec = zlib.decompressobj(-15)
726        return self._read_from_decompressor(coder, dec, input, level, num_coders)
727
728    def _read_bzip(self, coder, input, level, num_coders):
729        dec = bz2.BZ2Decompressor()
730        return self._read_from_decompressor(coder, dec, input, level, num_coders, can_partial_decompress=False)
731
732    def _read_7z_aes256_sha256(self, coder, input, level, num_coders):
733        if not self._archive.password:
734            raise NoPasswordGivenError()
735
736        # TODO: this needs some sanity checks
737        firstbyte = coder['properties'][0]
738        if not IS_PYTHON3:
739            firstbyte = ord(firstbyte)
740        numcyclespower = firstbyte & 0x3f
741        if firstbyte & 0xc0 != 0:
742            saltsize = (firstbyte >> 7) & 1
743            ivsize = (firstbyte >> 6) & 1
744
745            secondbyte = coder['properties'][1]
746            if not IS_PYTHON3:
747                secondbyte = ord(secondbyte)
748            saltsize += (secondbyte >> 4)
749            ivsize += (secondbyte & 0x0f)
750
751            assert len(coder['properties']) == 2+saltsize+ivsize
752            salt = coder['properties'][2:2+saltsize]
753            iv = coder['properties'][2+saltsize:2+saltsize+ivsize]
754            assert len(salt) == saltsize
755            assert len(iv) == ivsize
756            assert numcyclespower <= 24
757            if ivsize < 16:
758                iv += bytes('\x00'*(16-ivsize), 'ascii')
759        else:
760            salt = iv = bytes('', 'ascii')
761
762        password = self._archive.password.encode('utf-16-le')
763        key = pylzma.calculate_key(password, numcyclespower, salt=salt)
764        cipher = pylzma.AESDecrypt(key, iv=iv)
765        if not input:
766            self._file.seek(self._src_start)
767            input = self._file.read(self.compressed)
768        result = cipher.decrypt(input)
769        return result
770
771    def checkcrc(self):
772        if self.digest is None:
773            return True
774
775        self.reset()
776        data = self.read()
777        return super(ArchiveFile, self).checkcrc(self.digest, data)
778
779
780class Archive7z(Base):
781    """ the archive itself """
782
783    def __init__(self, file, password=None):
784        self._file = file
785        self.password = password
786        self.header = file.read(len(MAGIC_7Z))
787        if self.header != MAGIC_7Z:
788            raise FormatError('not a 7z file')
789        self.version = unpack('BB', file.read(2))
790
791        self.startheadercrc = unpack('<L', file.read(4))[0]
792        self.nextheaderofs, data = self._readReal64Bit(file)
793        crc = calculate_crc32(data)
794        self.nextheadersize, data = self._readReal64Bit(file)
795        crc = calculate_crc32(data, crc)
796        data = file.read(4)
797        self.nextheadercrc = unpack('<L', data)[0]
798        crc = calculate_crc32(data, crc)
799        if crc != self.startheadercrc:
800            raise FormatError('invalid header data')
801        self.afterheader = file.tell()
802
803        file.seek(self.nextheaderofs, 1)
804        buffer = BytesIO(file.read(self.nextheadersize))
805        if not self.checkcrc(self.nextheadercrc, buffer.getvalue()):
806            raise FormatError('invalid header data')
807
808        while True:
809            id = buffer.read(1)
810            if not id or id == PROPERTY_HEADER:
811                break
812
813            if id != PROPERTY_ENCODED_HEADER:
814                raise TypeError('Unknown field: %r' % (id))
815
816            streams = StreamsInfo(buffer)
817            file.seek(self.afterheader + 0)
818            data = bytes('', 'ascii')
819            src_start = self.afterheader
820            for folder in streams.unpackinfo.folders:
821                if folder.isEncrypted() and not password:
822                    raise NoPasswordGivenError()
823
824                src_start += streams.packinfo.packpos
825                uncompressed = folder.unpacksizes
826                if not isinstance(uncompressed, (list, tuple)):
827                    uncompressed = [uncompressed] * len(folder.coders)
828                info = {
829                    'compressed': streams.packinfo.packsizes[0],
830                    '_uncompressed': uncompressed,
831                }
832                tmp = ArchiveFile(info, 0, src_start, folder, self)
833                uncompressed_size = uncompressed[-1]
834                folderdata = tmp.read()[:uncompressed_size]
835                src_start += uncompressed_size
836
837                if folder.digestdefined:
838                    if not self.checkcrc(folder.crc, folderdata):
839                        raise FormatError('invalid block data')
840
841                data += folderdata
842
843            buffer = BytesIO(data)
844
845        self.files = []
846        self.files_map = {}
847        if not id:
848            # empty archive
849            self.solid = False
850            self.numfiles = 0
851            self.filenames = []
852            return
853
854        self.header = Header(buffer)
855        files = self.header.files
856        if hasattr(self.header, 'main_streams'):
857            folders = self.header.main_streams.unpackinfo.folders
858            packinfo = self.header.main_streams.packinfo
859            subinfo = self.header.main_streams.substreamsinfo
860            packsizes = packinfo.packsizes
861            self.solid = packinfo.numstreams == 1
862            if hasattr(subinfo, 'unpacksizes'):
863                unpacksizes = subinfo.unpacksizes
864            else:
865                unpacksizes = [x.unpacksizes for x in folders]
866        else:
867            # TODO(fancycode): is it necessary to provide empty values for folder, packinfo, etc?
868            self.solid = False
869
870        fidx = 0
871        obidx = 0
872        streamidx = 0
873        src_pos = self.afterheader
874        pos = 0
875        folder_pos = src_pos
876        maxsize = (self.solid and packinfo.packsizes[0]) or None
877        for info in files.files:
878            # Skip all directory entries.
879            attributes = info.get('attributes', None)
880            if attributes and attributes & FILE_ATTRIBUTE_DIRECTORY != 0:
881                continue
882
883            if not info['emptystream']:
884                folder = folders[fidx]
885                if streamidx == 0:
886                    folder.solid = subinfo.numunpackstreams[fidx] > 1
887
888                maxsize = (folder.solid and packinfo.packsizes[fidx]) or None
889                uncompressed = unpacksizes[obidx]
890                if not isinstance(uncompressed, (list, tuple)):
891                    uncompressed = [uncompressed] * len(folder.coders)
892                if pos > 0:
893                    # file is part of solid archive
894                    assert fidx < len(packsizes), 'Folder outside index for solid archive'
895                    info['compressed'] = packsizes[fidx]
896                elif fidx < len(packsizes):
897                    # file is compressed
898                    info['compressed'] = packsizes[fidx]
899                else:
900                    # file is not compressed
901                    info['compressed'] = uncompressed
902                info['_uncompressed'] = uncompressed
903            else:
904                info['compressed'] = 0
905                info['_uncompressed'] = [0]
906                folder = None
907                maxsize = 0
908
909            file = ArchiveFile(info, pos, src_pos, folder, self, maxsize=maxsize)
910            if folder is not None and subinfo.digestsdefined[obidx]:
911                file.digest = subinfo.digests[obidx]
912            self.files.append(file)
913            if folder is not None and folder.solid:
914                pos += unpacksizes[obidx]
915            else:
916                src_pos += info['compressed']
917            obidx += 1
918            streamidx += 1
919            if folder is not None and streamidx >= subinfo.numunpackstreams[fidx]:
920                pos = 0
921                folder_pos += packinfo.packsizes[fidx]
922                src_pos = folder_pos
923                fidx += 1
924                streamidx = 0
925
926        self.numfiles = len(self.files)
927        self.filenames = list(map(lambda x: x.filename, self.files))
928        self.files_map.update([(x.filename, x) for x in self.files])
929
930    # interface like TarFile
931
932    def getmember(self, name):
933        if isinstance(name, (int, long)):
934            try:
935                return self.files[name]
936            except IndexError:
937                return None
938
939        return self.files_map.get(name, None)
940
941    def getmembers(self):
942        return self.files
943
944    def getnames(self):
945        return self.filenames
946
947    def list(self, verbose=True, file=sys.stdout):
948        file.write('total %d files in %sarchive\n' % (self.numfiles, (self.solid and 'solid ') or ''))
949        if not verbose:
950            file.write('\n'.join(self.filenames) + '\n')
951            return
952
953        for f in self.files:
954            extra = (f.compressed and '%10d ' % (f.compressed)) or ' '
955            file.write('%10d%s%.8x %s\n' % (f.size, extra, f.digest, f.filename))
956
957if __name__ == '__main__':
958    f = Archive7z(open('test.7z', 'rb'))
959    #f = Archive7z(open('pylzma.7z', 'rb'))
960    f.list()
961