# Copyright (c) Twisted Matrix Laboratories. # See LICENSE for details. """ Tests for L{twisted.python.zipstream} """ import random import zipfile from hashlib import md5 from twisted.python import zipstream, filepath from twisted.trial import unittest class FileEntryMixin: """ File entry classes should behave as file-like objects """ def getFileEntry(self, contents): """ Return an appropriate zip file entry """ filename = self.mktemp() z = zipfile.ZipFile(filename, 'w', self.compression) z.writestr('content', contents) z.close() z = zipstream.ChunkingZipFile(filename, 'r') return z.readfile('content') def test_isatty(self): """ zip files should not be ttys, so isatty() should be false """ self.assertEqual(self.getFileEntry('').isatty(), False) def test_closed(self): """ The C{closed} attribute should reflect whether C{close()} has been called. """ fileEntry = self.getFileEntry('') self.assertEqual(fileEntry.closed, False) fileEntry.close() self.assertEqual(fileEntry.closed, True) def test_readline(self): """ C{readline()} should mirror L{file.readline} and return up to a single deliminter. """ fileEntry = self.getFileEntry('hoho\nho') self.assertEqual(fileEntry.readline(), 'hoho\n') self.assertEqual(fileEntry.readline(), 'ho') self.assertEqual(fileEntry.readline(), '') def test_next(self): """ Zip file entries should implement the iterator protocol as files do. """ fileEntry = self.getFileEntry('ho\nhoho') self.assertEqual(fileEntry.next(), 'ho\n') self.assertEqual(fileEntry.next(), 'hoho') self.assertRaises(StopIteration, fileEntry.next) def test_readlines(self): """ C{readlines()} should return a list of all the lines. """ fileEntry = self.getFileEntry('ho\nho\nho') self.assertEqual(fileEntry.readlines(), ['ho\n', 'ho\n', 'ho']) def test_iteration(self): """ C{__iter__()} and C{xreadlines()} should return C{self}. """ fileEntry = self.getFileEntry('') self.assertIdentical(iter(fileEntry), fileEntry) self.assertIdentical(fileEntry.xreadlines(), fileEntry) def test_readWhole(self): """ C{.read()} should read the entire file. """ contents = "Hello, world!" entry = self.getFileEntry(contents) self.assertEqual(entry.read(), contents) def test_readPartial(self): """ C{.read(num)} should read num bytes from the file. """ contents = "0123456789" entry = self.getFileEntry(contents) one = entry.read(4) two = entry.read(200) self.assertEqual(one, "0123") self.assertEqual(two, "456789") def test_tell(self): """ C{.tell()} should return the number of bytes that have been read so far. """ contents = "x" * 100 entry = self.getFileEntry(contents) entry.read(2) self.assertEqual(entry.tell(), 2) entry.read(4) self.assertEqual(entry.tell(), 6) class DeflatedZipFileEntryTest(FileEntryMixin, unittest.TestCase): """ DeflatedZipFileEntry should be file-like """ compression = zipfile.ZIP_DEFLATED class ZipFileEntryTest(FileEntryMixin, unittest.TestCase): """ ZipFileEntry should be file-like """ compression = zipfile.ZIP_STORED class ZipstreamTest(unittest.TestCase): """ Tests for twisted.python.zipstream """ def setUp(self): """ Creates junk data that can be compressed and a test directory for any files that will be created """ self.testdir = filepath.FilePath(self.mktemp()) self.testdir.makedirs() self.unzipdir = self.testdir.child('unzipped') self.unzipdir.makedirs() def makeZipFile(self, contents, directory=''): """ Makes a zip file archive containing len(contents) files. Contents should be a list of strings, each string being the content of one file. """ zpfilename = self.testdir.child('zipfile.zip').path zpfile = zipfile.ZipFile(zpfilename, 'w') for i, content in enumerate(contents): filename = str(i) if directory: filename = directory + "/" + filename zpfile.writestr(filename, content) zpfile.close() return zpfilename def test_invalidMode(self): """ A ChunkingZipFile opened in write-mode should not allow .readfile(), and raise a RuntimeError instead. """ czf = zipstream.ChunkingZipFile(self.mktemp(), "w") self.assertRaises(RuntimeError, czf.readfile, "something") def test_closedArchive(self): """ A closed ChunkingZipFile should raise a L{RuntimeError} when .readfile() is invoked. """ czf = zipstream.ChunkingZipFile(self.makeZipFile(["something"]), "r") czf.close() self.assertRaises(RuntimeError, czf.readfile, "something") def test_invalidHeader(self): """ A zipfile entry with the wrong magic number should raise BadZipfile for readfile(), but that should not affect other files in the archive. """ fn = self.makeZipFile(["test contents", "more contents"]) zf = zipfile.ZipFile(fn, "r") zeroOffset = zf.getinfo("0").header_offset zf.close() # Zero out just the one header. scribble = file(fn, "r+b") scribble.seek(zeroOffset, 0) scribble.write(chr(0) * 4) scribble.close() czf = zipstream.ChunkingZipFile(fn) self.assertRaises(zipfile.BadZipfile, czf.readfile, "0") self.assertEqual(czf.readfile("1").read(), "more contents") def test_filenameMismatch(self): """ A zipfile entry with a different filename than is found in the central directory should raise BadZipfile. """ fn = self.makeZipFile(["test contents", "more contents"]) zf = zipfile.ZipFile(fn, "r") info = zf.getinfo("0") info.filename = "not zero" zf.close() scribble = file(fn, "r+b") scribble.seek(info.header_offset, 0) scribble.write(info.FileHeader()) scribble.close() czf = zipstream.ChunkingZipFile(fn) self.assertRaises(zipfile.BadZipfile, czf.readfile, "0") self.assertEqual(czf.readfile("1").read(), "more contents") def test_unsupportedCompression(self): """ A zipfile which describes an unsupported compression mechanism should raise BadZipfile. """ fn = self.mktemp() zf = zipfile.ZipFile(fn, "w") zi = zipfile.ZipInfo("0") zf.writestr(zi, "some data") # Mangle its compression type in the central directory; can't do this # before the writestr call or zipfile will (correctly) tell us not to # pass bad compression types :) zi.compress_type = 1234 zf.close() czf = zipstream.ChunkingZipFile(fn) self.assertRaises(zipfile.BadZipfile, czf.readfile, "0") def test_extraData(self): """ readfile() should skip over 'extra' data present in the zip metadata. """ fn = self.mktemp() zf = zipfile.ZipFile(fn, 'w') zi = zipfile.ZipInfo("0") zi.extra = "hello, extra" zf.writestr(zi, "the real data") zf.close() czf = zipstream.ChunkingZipFile(fn) self.assertEqual(czf.readfile("0").read(), "the real data") def test_unzipIterChunky(self): """ L{twisted.python.zipstream.unzipIterChunky} returns an iterator which must be exhausted to completely unzip the input archive. """ numfiles = 10 contents = ['This is test file %d!' % i for i in range(numfiles)] zpfilename = self.makeZipFile(contents) list(zipstream.unzipIterChunky(zpfilename, self.unzipdir.path)) self.assertEqual( set(self.unzipdir.listdir()), set(map(str, range(numfiles)))) for child in self.unzipdir.children(): num = int(child.basename()) self.assertEqual(child.getContent(), contents[num]) def test_unzipIterChunkyDirectory(self): """ The path to which a file is extracted by L{zipstream.unzipIterChunky} is determined by joining the C{directory} argument to C{unzip} with the path within the archive of the file being extracted. """ numfiles = 10 contents = ['This is test file %d!' % i for i in range(numfiles)] zpfilename = self.makeZipFile(contents, 'foo') list(zipstream.unzipIterChunky(zpfilename, self.unzipdir.path)) self.assertEqual( set(self.unzipdir.child('foo').listdir()), set(map(str, range(numfiles)))) for child in self.unzipdir.child('foo').children(): num = int(child.basename()) self.assertEqual(child.getContent(), contents[num]) # XXX these tests are kind of gross and old, but I think unzipIterChunky is # kind of a gross function anyway. We should really write an abstract # copyTo/moveTo that operates on FilePath and make sure ZipPath can support # it, then just deprecate / remove this stuff. def _unzipIterChunkyTest(self, compression, chunksize, lower, upper): """ unzipIterChunky should unzip the given number of bytes per iteration. """ junk = ' '.join([str(random.random()) for n in xrange(1000)]) junkmd5 = md5(junk).hexdigest() tempdir = filepath.FilePath(self.mktemp()) tempdir.makedirs() zfpath = tempdir.child('bigfile.zip').path self._makebigfile(zfpath, compression, junk) uziter = zipstream.unzipIterChunky(zfpath, tempdir.path, chunksize=chunksize) r = uziter.next() # test that the number of chunks is in the right ballpark; # this could theoretically be any number but statistically it # should always be in this range approx = lower < r < upper self.failUnless(approx) for r in uziter: pass self.assertEqual(r, 0) newmd5 = md5( tempdir.child("zipstreamjunk").open().read()).hexdigest() self.assertEqual(newmd5, junkmd5) def test_unzipIterChunkyStored(self): """ unzipIterChunky should unzip the given number of bytes per iteration on a stored archive. """ self._unzipIterChunkyTest(zipfile.ZIP_STORED, 500, 35, 45) def test_chunkyDeflated(self): """ unzipIterChunky should unzip the given number of bytes per iteration on a deflated archive. """ self._unzipIterChunkyTest(zipfile.ZIP_DEFLATED, 972, 23, 27) def _makebigfile(self, filename, compression, junk): """ Create a zip file with the given file name and compression scheme. """ zf = zipfile.ZipFile(filename, 'w', compression) for i in range(10): fn = 'zipstream%d' % i zf.writestr(fn, "") zf.writestr('zipstreamjunk', junk) zf.close()