1{-# LANGUAGE CPP #-}
2{-# LANGUAGE DeriveDataTypeable #-}
3{-# LANGUAGE ViewPatterns #-}
4------------------------------------------------------------------------
5-- |
6-- Module      : Codec.Archive.Zip
7-- Copyright   : John MacFarlane
8-- License     : BSD3
9--
10-- Maintainer  : John MacFarlane < jgm at berkeley dot edu >
11-- Stability   : unstable
12-- Portability : so far only tested on GHC
13--
14-- The zip-archive library provides functions for creating, modifying,
15-- and extracting files from zip archives.
16--
17-- Certain simplifying assumptions are made about the zip archives: in
18-- particular, there is no support for strong encryption, zip files that span
19-- multiple disks, ZIP64, OS-specific file attributes, or compression
20-- methods other than Deflate.  However, the library should be able to
21-- read the most common zip archives, and the archives it produces should
22-- be readable by all standard unzip programs.
23--
24-- As an example of the use of the library, a standalone zip archiver
25-- and extracter, Zip.hs, is provided in the source distribution.
26--
27-- For more information on the format of zip archives, consult
28-- <http://www.pkware.com/documents/casestudies/APPNOTE.TXT>
29------------------------------------------------------------------------
30
31module Codec.Archive.Zip
32       (
33
34       -- * Data structures
35         Archive (..)
36       , Entry (..)
37       , CompressionMethod (..)
38       , EncryptionMethod (..)
39       , ZipOption (..)
40       , ZipException (..)
41       , emptyArchive
42
43       -- * Pure functions for working with zip archives
44       , toArchive
45       , toArchiveOrFail
46       , fromArchive
47       , filesInArchive
48       , addEntryToArchive
49       , deleteEntryFromArchive
50       , findEntryByPath
51       , fromEntry
52       , fromEncryptedEntry
53       , isEncryptedEntry
54       , toEntry
55#ifndef _WINDOWS
56       , isEntrySymbolicLink
57       , symbolicLinkEntryTarget
58       , entryCMode
59#endif
60
61       -- * IO functions for working with zip archives
62       , readEntry
63       , writeEntry
64#ifndef _WINDOWS
65       , writeSymbolicLinkEntry
66#endif
67       , addFilesToArchive
68       , extractFilesFromArchive
69
70       ) where
71
72import Data.Time.Calendar ( toGregorian, fromGregorian )
73import Data.Time.Clock ( UTCTime(..) )
74import Data.Time.Clock.POSIX ( posixSecondsToUTCTime, utcTimeToPOSIXSeconds )
75import Data.Time.LocalTime ( TimeOfDay(..), timeToTimeOfDay )
76import Data.Bits ( shiftL, shiftR, (.&.), (.|.), xor, testBit )
77import Data.Binary
78import Data.Binary.Get
79import Data.Binary.Put
80import Data.List (nub, find, intercalate)
81import Data.Data (Data)
82import Data.Typeable (Typeable)
83import Text.Printf
84import System.FilePath
85import System.Directory
86       (doesDirectoryExist, getDirectoryContents,
87        createDirectoryIfMissing, getModificationTime)
88import Control.Monad ( when, unless, zipWithM_ )
89import qualified Control.Exception as E
90import System.IO ( stderr, hPutStrLn )
91import qualified Data.Digest.CRC32 as CRC32
92import qualified Data.Map as M
93#if MIN_VERSION_binary(0,6,0)
94import Control.Applicative
95#endif
96#ifndef _WINDOWS
97import System.Posix.Files ( setFileTimes, setFileMode, fileMode, getSymbolicLinkStatus, symbolicLinkMode, readSymbolicLink, isSymbolicLink, unionFileModes, createSymbolicLink )
98import System.Posix.Types ( CMode(..) )
99import Data.List (partition)
100import Data.Maybe (fromJust)
101#endif
102
103-- from bytestring
104import qualified Data.ByteString as S
105import qualified Data.ByteString.Lazy as B
106import qualified Data.ByteString.Lazy.Char8 as C
107
108-- text
109import qualified Data.Text.Lazy as TL
110import qualified Data.Text.Lazy.Encoding as TL
111
112-- from zlib
113import qualified Codec.Compression.Zlib.Raw as Zlib
114
115#if !MIN_VERSION_binary(0, 6, 0)
116manySig :: Word32 -> Get a -> Get [a]
117manySig sig p = do
118    sig' <- lookAhead getWord32le
119    if sig == sig'
120        then do
121            r <- p
122            rs <- manySig sig p
123            return $ r : rs
124        else return []
125#endif
126
127
128------------------------------------------------------------------------
129
130-- | Structured representation of a zip archive, including directory
131-- information and contents (in lazy bytestrings).
132data Archive = Archive
133                { zEntries                :: [Entry]              -- ^ Files in zip archive
134                , zSignature              :: Maybe B.ByteString   -- ^ Digital signature
135                , zComment                :: B.ByteString         -- ^ Comment for whole zip archive
136                } deriving (Read, Show)
137
138instance Binary Archive where
139  put = putArchive
140  get = getArchive
141
142-- | Representation of an archived file, including content and metadata.
143data Entry = Entry
144               { eRelativePath            :: FilePath            -- ^ Relative path, using '/' as separator
145               , eCompressionMethod       :: CompressionMethod   -- ^ Compression method
146               , eEncryptionMethod        :: EncryptionMethod    -- ^ Encryption method
147               , eLastModified            :: Integer             -- ^ Modification time (seconds since unix epoch)
148               , eCRC32                   :: Word32              -- ^ CRC32 checksum
149               , eCompressedSize          :: Word32              -- ^ Compressed size in bytes
150               , eUncompressedSize        :: Word32              -- ^ Uncompressed size in bytes
151               , eExtraField              :: B.ByteString        -- ^ Extra field - unused by this library
152               , eFileComment             :: B.ByteString        -- ^ File comment - unused by this library
153               , eVersionMadeBy           :: Word16              -- ^ Version made by field
154               , eInternalFileAttributes  :: Word16              -- ^ Internal file attributes - unused by this library
155               , eExternalFileAttributes  :: Word32              -- ^ External file attributes (system-dependent)
156               , eCompressedData          :: B.ByteString        -- ^ Compressed contents of file
157               } deriving (Read, Show, Eq)
158
159-- | Compression methods.
160data CompressionMethod = Deflate
161                       | NoCompression
162                       deriving (Read, Show, Eq)
163
164data EncryptionMethod = NoEncryption            -- ^ Entry is not encrypted
165                      | PKWAREEncryption Word8  -- ^ Entry is encrypted with the traditional PKWARE encryption
166                      deriving (Read, Show, Eq)
167
168-- | The way the password should be verified during entry decryption
169data PKWAREVerificationType = CheckTimeByte
170                            | CheckCRCByte
171                            deriving (Read, Show, Eq)
172
173-- | Options for 'addFilesToArchive' and 'extractFilesFromArchive'.
174data ZipOption = OptRecursive               -- ^ Recurse into directories when adding files
175               | OptVerbose                 -- ^ Print information to stderr
176               | OptDestination FilePath    -- ^ Directory in which to extract
177               | OptLocation FilePath Bool  -- ^ Where to place file when adding files and whether to append current path
178               | OptPreserveSymbolicLinks   -- ^ Preserve symbolic links as such. This option is ignored on Windows.
179               deriving (Read, Show, Eq)
180
181data ZipException =
182    CRC32Mismatch FilePath
183  | UnsafePath FilePath
184  | CannotWriteEncryptedEntry FilePath
185  deriving (Show, Typeable, Data, Eq)
186
187instance E.Exception ZipException
188
189-- | A zip archive with no contents.
190emptyArchive :: Archive
191emptyArchive = Archive
192                { zEntries                  = []
193                , zSignature              = Nothing
194                , zComment                = B.empty }
195
196-- | Reads an 'Archive' structure from a raw zip archive (in a lazy bytestring).
197toArchive :: B.ByteString -> Archive
198toArchive = decode
199
200-- | Like 'toArchive', but returns an 'Either' value instead of raising an
201-- error if the archive cannot be decoded.  NOTE:  This function only
202-- works properly when the library is compiled against binary >= 0.7.
203-- With earlier versions, it will always return a Right value,
204-- raising an error if parsing fails.
205toArchiveOrFail :: B.ByteString -> Either String Archive
206#if MIN_VERSION_binary(0,7,0)
207toArchiveOrFail bs = case decodeOrFail bs of
208                           Left (_,_,e)  -> Left e
209                           Right (_,_,x) -> Right x
210#else
211toArchiveOrFail bs = Right $ toArchive bs
212#endif
213
214-- | Writes an 'Archive' structure to a raw zip archive (in a lazy bytestring).
215fromArchive :: Archive -> B.ByteString
216fromArchive = encode
217
218-- | Returns a list of files in a zip archive.
219filesInArchive :: Archive -> [FilePath]
220filesInArchive = map eRelativePath . zEntries
221
222-- | Adds an entry to a zip archive, or updates an existing entry.
223addEntryToArchive :: Entry -> Archive -> Archive
224addEntryToArchive entry archive =
225  let archive'   = deleteEntryFromArchive (eRelativePath entry) archive
226      oldEntries = zEntries archive'
227  in  archive' { zEntries = entry : oldEntries }
228
229-- | Deletes an entry from a zip archive.
230deleteEntryFromArchive :: FilePath -> Archive -> Archive
231deleteEntryFromArchive path archive =
232  archive { zEntries = [e | e <- zEntries archive
233                       , not (eRelativePath e `matches` path)] }
234
235-- | Returns Just the zip entry with the specified path, or Nothing.
236findEntryByPath :: FilePath -> Archive -> Maybe Entry
237findEntryByPath path archive =
238  find (\e -> path `matches` eRelativePath e) (zEntries archive)
239
240-- | Returns uncompressed contents of zip entry.
241fromEntry :: Entry -> B.ByteString
242fromEntry entry =
243  decompressData (eCompressionMethod entry) (eCompressedData entry)
244
245-- | Returns decrypted and uncompressed contents of zip entry.
246fromEncryptedEntry :: String -> Entry -> Maybe B.ByteString
247fromEncryptedEntry password entry =
248  decompressData (eCompressionMethod entry) <$> decryptData password (eEncryptionMethod entry) (eCompressedData entry)
249
250-- | Check if an 'Entry' is encrypted
251isEncryptedEntry :: Entry -> Bool
252isEncryptedEntry entry =
253  case eEncryptionMethod entry of
254    (PKWAREEncryption _) -> True
255    _ -> False
256
257-- | Create an 'Entry' with specified file path, modification time, and contents.
258toEntry :: FilePath         -- ^ File path for entry
259        -> Integer          -- ^ Modification time for entry (seconds since unix epoch)
260        -> B.ByteString     -- ^ Contents of entry
261        -> Entry
262toEntry path modtime contents =
263  let uncompressedSize = B.length contents
264      compressedData = compressData Deflate contents
265      compressedSize = B.length compressedData
266      -- only use compression if it helps!
267      (compressionMethod, finalData, finalSize) =
268        if uncompressedSize <= compressedSize
269           then (NoCompression, contents, uncompressedSize)
270           else (Deflate, compressedData, compressedSize)
271      crc32 = CRC32.crc32 contents
272  in  Entry { eRelativePath            = normalizePath path
273            , eCompressionMethod       = compressionMethod
274            , eEncryptionMethod        = NoEncryption
275            , eLastModified            = modtime
276            , eCRC32                   = crc32
277            , eCompressedSize          = fromIntegral finalSize
278            , eUncompressedSize        = fromIntegral uncompressedSize
279            , eExtraField              = B.empty
280            , eFileComment             = B.empty
281            , eVersionMadeBy           = 0  -- FAT
282            , eInternalFileAttributes  = 0  -- potentially non-text
283            , eExternalFileAttributes  = 0  -- appropriate if from stdin
284            , eCompressedData          = finalData
285            }
286
287-- | Generates a 'Entry' from a file or directory.
288readEntry :: [ZipOption] -> FilePath -> IO Entry
289readEntry opts path = do
290  isDir <- doesDirectoryExist path
291#ifdef _WINDOWS
292  let isSymLink = False
293#else
294  fs <- getSymbolicLinkStatus path
295  let isSymLink = isSymbolicLink fs
296#endif
297 -- make sure directories end in / and deal with the OptLocation option
298  let path' = let p = path ++ (case reverse path of
299                                    ('/':_) -> ""
300                                    _ | isDir && not isSymLink -> "/"
301                                    _ | isDir && isSymLink -> ""
302                                      | otherwise -> "") in
303              (case [(l,a) | OptLocation l a <- opts] of
304                    ((l,a):_) -> if a then l </> p else l </> takeFileName p
305                    _         -> p)
306  contents <-
307#ifndef _WINDOWS
308              if isSymLink
309                 then do
310                   linkTarget <- readSymbolicLink path
311                   return $ C.pack linkTarget
312                 else
313#endif
314                   if isDir
315                      then
316                        return B.empty
317                      else
318                        B.fromStrict <$> S.readFile path
319  modEpochTime <- (floor . utcTimeToPOSIXSeconds) <$> getModificationTime path
320  let entry = toEntry path' modEpochTime contents
321
322  entryE <-
323#ifdef _WINDOWS
324        return $ entry { eVersionMadeBy = 0x0000 } -- FAT/VFAT/VFAT32 file attributes
325#else
326        do
327           let fm = if isSymLink
328                      then unionFileModes symbolicLinkMode (fileMode fs)
329                      else fileMode fs
330
331           let modes = fromIntegral $ shiftL (toInteger fm) 16
332           return $ entry { eExternalFileAttributes = modes,
333                            eVersionMadeBy = 0x0300 } -- UNIX file attributes
334#endif
335
336  when (OptVerbose `elem` opts) $ do
337    let compmethod = case eCompressionMethod entryE of
338                     Deflate       -> "deflated"
339                     NoCompression -> "stored"
340    hPutStrLn stderr $
341      printf "  adding: %s (%s %.f%%)" (eRelativePath entryE)
342      compmethod (100 - (100 * compressionRatio entryE))
343  return entryE
344
345-- | Writes contents of an 'Entry' to a file.  Throws a
346-- 'CRC32Mismatch' exception if the CRC32 checksum for the entry
347-- does not match the uncompressed data.
348writeEntry :: [ZipOption] -> Entry -> IO ()
349writeEntry opts entry = do
350  when (isEncryptedEntry entry) $
351    E.throwIO $ CannotWriteEncryptedEntry (eRelativePath entry)
352  let relpath = eRelativePath entry
353  let isUnsafePath = ".." `elem` splitDirectories relpath
354  when isUnsafePath $
355    E.throwIO $ UnsafePath relpath
356  path <- case [d | OptDestination d <- opts] of
357             (x:_) -> return (x </> relpath)
358             _ | isAbsolute relpath
359                   -> E.throwIO $ UnsafePath relpath
360               | otherwise
361                   -> return relpath
362  -- create directories if needed
363  let dir = takeDirectory path
364  exists <- doesDirectoryExist dir
365  unless exists $ do
366    createDirectoryIfMissing True dir
367    when (OptVerbose `elem` opts) $
368      hPutStrLn stderr $ "  creating: " ++ dir
369  if not (null path) && last path == '/' -- path is a directory
370     then return ()
371     else do
372       when (OptVerbose `elem` opts) $
373         hPutStrLn stderr $ case eCompressionMethod entry of
374                                 Deflate       -> " inflating: " ++ path
375                                 NoCompression -> "extracting: " ++ path
376       let uncompressedData = fromEntry entry
377       if eCRC32 entry == CRC32.crc32 uncompressedData
378          then B.writeFile path uncompressedData
379          else E.throwIO $ CRC32Mismatch path
380#ifndef _WINDOWS
381       let modes = fromIntegral $ shiftR (eExternalFileAttributes entry) 16
382       when (eVersionMadeBy entry .&. 0xFF00 == 0x0300 &&
383         modes /= 0) $ setFileMode path modes
384#endif
385  -- Note that last modified times are supported only for POSIX, not for
386  -- Windows.
387  setFileTimeStamp path (eLastModified entry)
388
389#ifndef _WINDOWS
390-- | Write an 'Entry' representing a symbolic link to a file.
391-- If the 'Entry' does not represent a symbolic link or
392-- the options do not contain 'OptPreserveSymbolicLinks`, this
393-- function behaves like `writeEntry`.
394writeSymbolicLinkEntry :: [ZipOption] -> Entry -> IO ()
395writeSymbolicLinkEntry opts entry =
396  if OptPreserveSymbolicLinks `notElem` opts
397     then writeEntry opts entry
398     else do
399        if isEntrySymbolicLink entry
400           then do
401             let prefixPath = case [d | OptDestination d <- opts] of
402                                   (x:_) -> x
403                                   _     -> ""
404             let targetPath = fromJust . symbolicLinkEntryTarget $ entry
405             let symlinkPath = prefixPath </> eRelativePath entry
406             when (OptVerbose `elem` opts) $ do
407               hPutStrLn stderr $ "linking " ++ symlinkPath ++ " to " ++ targetPath
408             createSymbolicLink targetPath symlinkPath
409           else writeEntry opts entry
410
411
412-- | Get the target of a 'Entry' representing a symbolic link. This might fail
413-- if the 'Entry' does not represent a symbolic link
414symbolicLinkEntryTarget :: Entry -> Maybe FilePath
415symbolicLinkEntryTarget entry | isEntrySymbolicLink entry = Just . C.unpack $ fromEntry entry
416                              | otherwise = Nothing
417
418-- | Check if an 'Entry' represents a symbolic link
419isEntrySymbolicLink :: Entry -> Bool
420isEntrySymbolicLink entry = entryCMode entry .&. symbolicLinkMode == symbolicLinkMode
421
422-- | Get the 'eExternalFileAttributes' of an 'Entry' as a 'CMode' a.k.a. 'FileMode'
423entryCMode :: Entry -> CMode
424entryCMode entry = CMode (fromIntegral $ shiftR (eExternalFileAttributes entry) 16)
425#endif
426
427-- | Add the specified files to an 'Archive'.  If 'OptRecursive' is specified,
428-- recursively add files contained in directories. if 'OptPreserveSymbolicLinks'
429-- is specified, don't recurse into it. If 'OptVerbose' is specified,
430-- print messages to stderr.
431addFilesToArchive :: [ZipOption] -> Archive -> [FilePath] -> IO Archive
432addFilesToArchive opts archive files = do
433  filesAndChildren <- if OptRecursive `elem` opts
434#ifdef _WINDOWS
435                         then mapM getDirectoryContentsRecursive files >>= return . nub . concat
436#else
437                         then nub . concat <$> mapM (getDirectoryContentsRecursive' opts) files
438#endif
439                         else return files
440  entries <- mapM (readEntry opts) filesAndChildren
441  return $ foldr addEntryToArchive archive entries
442
443-- | Extract all files from an 'Archive', creating directories
444-- as needed.  If 'OptVerbose' is specified, print messages to stderr.
445-- Note that the last-modified time is set correctly only in POSIX,
446-- not in Windows.
447-- This function fails if encrypted entries are present
448extractFilesFromArchive :: [ZipOption] -> Archive -> IO ()
449extractFilesFromArchive opts archive = do
450  let entries = zEntries archive
451  if OptPreserveSymbolicLinks `elem` opts
452    then do
453#ifdef _WINDOWS
454      mapM_ (writeEntry opts) entries
455#else
456      let (symbolicLinkEntries, nonSymbolicLinkEntries) = partition isEntrySymbolicLink entries
457      mapM_ (writeEntry opts) nonSymbolicLinkEntries
458      mapM_ (writeSymbolicLinkEntry opts) symbolicLinkEntries
459#endif
460    else mapM_ (writeEntry opts) entries
461
462--------------------------------------------------------------------------------
463-- Internal functions for reading and writing zip binary format.
464
465-- Note that even on Windows, zip files use "/" internally as path separator.
466normalizePath :: FilePath -> String
467normalizePath path =
468  let dir   = takeDirectory path
469      fn    = takeFileName path
470      (_drive, dir') = splitDrive dir
471      -- note: some versions of filepath return ["."] if no dir
472      dirParts = filter (/=".") $ splitDirectories dir'
473  in  intercalate "/" (dirParts ++ [fn])
474
475-- Equality modulo normalization.  So, "./foo" `matches` "foo".
476matches :: FilePath -> FilePath -> Bool
477matches fp1 fp2 = normalizePath fp1 == normalizePath fp2
478
479-- | Uncompress a lazy bytestring.
480compressData :: CompressionMethod -> B.ByteString -> B.ByteString
481compressData Deflate       = Zlib.compress
482compressData NoCompression = id
483
484-- | Compress a lazy bytestring.
485decompressData :: CompressionMethod -> B.ByteString -> B.ByteString
486decompressData Deflate       = Zlib.decompress
487decompressData NoCompression = id
488
489-- | Decrypt a lazy bytestring
490-- Returns Nothing if password is incorrect
491decryptData :: String -> EncryptionMethod -> B.ByteString -> Maybe B.ByteString
492decryptData _ NoEncryption s = Just s
493decryptData password (PKWAREEncryption controlByte) s =
494  let headerlen = 12
495      initKeys = (305419896, 591751049, 878082192)
496      startKeys = B.foldl pkwareUpdateKeys initKeys (C.pack password)
497      (header, content) = B.splitAt headerlen $ snd $ B.mapAccumL pkwareDecryptByte startKeys s
498  in if B.last header == controlByte
499        then Just content
500        else Nothing
501
502-- | PKWARE decryption context
503type DecryptionCtx = (Word32, Word32, Word32)
504
505-- | An interation of the PKWARE decryption algorithm
506pkwareDecryptByte :: DecryptionCtx -> Word8 -> (DecryptionCtx, Word8)
507pkwareDecryptByte keys@(_, _, key2) inB =
508  let tmp = key2 .|. 2
509      tmp' = fromIntegral ((tmp * (tmp `xor` 1)) `shiftR` 8) :: Word8
510      outB = inB `xor` tmp'
511  in (pkwareUpdateKeys keys outB, outB)
512
513-- | Update decryption keys after a decrypted byte
514pkwareUpdateKeys :: DecryptionCtx -> Word8 -> DecryptionCtx
515pkwareUpdateKeys (key0, key1, key2) inB =
516  let key0' = CRC32.crc32Update (key0 `xor` 0xffffffff) [inB] `xor` 0xffffffff
517      key1' = (key1 + (key0' .&. 0xff)) * 134775813 + 1
518      key1Byte = fromIntegral (key1' `shiftR` 24) :: Word8
519      key2' = CRC32.crc32Update (key2 `xor` 0xffffffff) [key1Byte] `xor` 0xffffffff
520  in (key0', key1', key2')
521
522-- | Calculate compression ratio for an entry (for verbose output).
523compressionRatio :: Entry -> Float
524compressionRatio entry =
525  if eUncompressedSize entry == 0
526     then 1
527     else fromIntegral (eCompressedSize entry) / fromIntegral (eUncompressedSize entry)
528
529-- | MSDOS datetime: a pair of Word16s (date, time) with the following structure:
530--
531-- > DATE bit     0 - 4           5 - 8           9 - 15
532-- >      value   day (1 - 31)    month (1 - 12)  years from 1980
533-- > TIME bit     0 - 4           5 - 10          11 - 15
534-- >      value   seconds*        minute          hour
535-- >              *stored in two-second increments
536--
537data MSDOSDateTime = MSDOSDateTime { msDOSDate :: Word16
538                                   , msDOSTime :: Word16
539                                   } deriving (Read, Show, Eq)
540
541-- | Epoch time corresponding to the minimum DOS DateTime (Jan 1 1980 00:00:00).
542minMSDOSDateTime :: Integer
543minMSDOSDateTime = 315532800
544
545-- | Convert a clock time to a MSDOS datetime.  The MSDOS time will be relative to UTC.
546epochTimeToMSDOSDateTime :: Integer -> MSDOSDateTime
547epochTimeToMSDOSDateTime epochtime | epochtime < minMSDOSDateTime =
548  epochTimeToMSDOSDateTime minMSDOSDateTime
549  -- if time is earlier than minimum DOS datetime, return minimum
550epochTimeToMSDOSDateTime epochtime =
551  let
552    UTCTime
553      (toGregorian -> (fromInteger -> year, month, day))
554      (timeToTimeOfDay -> (TimeOfDay hour minutes (floor -> sec)))
555      = posixSecondsToUTCTime (fromIntegral epochtime)
556
557    dosTime = toEnum $ (sec `div` 2) + shiftL minutes 5 + shiftL hour 11
558    dosDate = toEnum $ day + shiftL month 5 + shiftL (year - 1980) 9
559  in  MSDOSDateTime { msDOSDate = dosDate, msDOSTime = dosTime }
560
561-- | Convert a MSDOS datetime to a 'ClockTime'.
562msDOSDateTimeToEpochTime :: MSDOSDateTime -> Integer
563msDOSDateTimeToEpochTime MSDOSDateTime {msDOSDate = dosDate, msDOSTime = dosTime} =
564  let seconds = fromIntegral $ 2 * (dosTime .&. 0O37)
565      minutes = fromIntegral $ shiftR dosTime 5 .&. 0O77
566      hour    = fromIntegral $ shiftR dosTime 11
567      day     = fromIntegral $ dosDate .&. 0O37
568      month   = fromIntegral ((shiftR dosDate 5) .&. 0O17)
569      year    = fromIntegral $ shiftR dosDate 9
570      utc = UTCTime (fromGregorian (1980 + year) month day) (3600 * hour + 60 * minutes + seconds)
571  in floor (utcTimeToPOSIXSeconds utc)
572
573#ifndef _WINDOWS
574getDirectoryContentsRecursive' :: [ZipOption] -> FilePath -> IO [FilePath]
575getDirectoryContentsRecursive' opts path =
576  if OptPreserveSymbolicLinks `elem` opts
577     then do
578       isDir <- doesDirectoryExist path
579       if isDir
580          then do
581            isSymLink <- fmap isSymbolicLink $ getSymbolicLinkStatus path
582            if isSymLink
583               then return [path]
584               else getDirectoryContentsRecursivelyBy (getDirectoryContentsRecursive' opts) path
585          else return [path]
586     else getDirectoryContentsRecursive path
587#endif
588
589getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
590getDirectoryContentsRecursive path = do
591  isDir <- doesDirectoryExist path
592  if isDir
593     then getDirectoryContentsRecursivelyBy getDirectoryContentsRecursive path
594     else return [path]
595
596getDirectoryContentsRecursivelyBy :: (FilePath -> IO [FilePath]) -> FilePath -> IO [FilePath]
597getDirectoryContentsRecursivelyBy exploreMethod path = do
598       contents <- getDirectoryContents path
599       let contents' = map (path </>) $ filter (`notElem` ["..","."]) contents
600       children <- mapM exploreMethod contents'
601       if path == "."
602          then return (concat children)
603          else return (path : concat children)
604
605
606setFileTimeStamp :: FilePath -> Integer -> IO ()
607#ifdef _WINDOWS
608setFileTimeStamp _ _ = return () -- TODO: figure out how to set the timestamp on Windows
609#else
610setFileTimeStamp file epochtime = do
611  let epochtime' = fromInteger epochtime
612  setFileTimes file epochtime' epochtime'
613#endif
614
615-- A zip file has the following format (*'d items are not supported in this implementation):
616--
617-- >   [local file header 1]
618-- >   [file data 1]
619-- >   [data descriptor 1*]
620-- >   .
621-- >   .
622-- >   .
623-- >   [local file header n]
624-- >   [file data n]
625-- >   [data descriptor n*]
626-- >   [archive decryption header*]
627-- >   [archive extra data record*]
628-- >   [central directory]
629-- >   [zip64 end of central directory record*]
630-- >   [zip64 end of central directory locator*]
631-- >   [end of central directory record]
632--
633-- Files stored in arbitrary order.  All values are stored in
634-- little-endian byte order unless otherwise specified.
635--
636--  Central directory structure:
637--
638-- >   [file header 1]
639-- >   .
640-- >   .
641-- >   .
642-- >   [file header n]
643-- >   [digital signature]
644--
645--  End of central directory record:
646--
647-- >   end of central dir signature    4 bytes  (0x06054b50)
648-- >   number of this disk             2 bytes
649-- >   number of the disk with the
650-- >   start of the central directory  2 bytes
651-- >   total number of entries in the
652-- >   central directory on this disk  2 bytes
653-- >   total number of entries in
654-- >   the central directory           2 bytes
655-- >   size of the central directory   4 bytes
656-- >   offset of start of central
657-- >   directory with respect to
658-- >   the starting disk number        4 bytes
659-- >   .ZIP file comment length        2 bytes
660-- >   .ZIP file comment       (variable size)
661
662getArchive :: Get Archive
663getArchive = do
664#if MIN_VERSION_binary(0,6,0)
665  locals <- many getLocalFile
666  files <- many (getFileHeader (M.fromList locals))
667  digSig <- Just `fmap` getDigitalSignature <|> return Nothing
668#else
669  locals <- manySig 0x04034b50 getLocalFile
670  files <- manySig 0x02014b50 (getFileHeader (M.fromList locals))
671  digSig <- lookAheadM getDigitalSignature
672#endif
673  endSig <- getWord32le
674  unless (endSig == 0x06054b50)
675    $ fail "Did not find end of central directory signature"
676  skip 2 -- disk number
677  skip 2 -- disk number of central directory
678  skip 2 -- num entries on this disk
679  skip 2 -- num entries in central directory
680  skip 4 -- central directory size
681  skip 4 -- offset of central directory
682  commentLength <- getWord16le
683  zipComment <- getLazyByteString (toEnum $ fromEnum commentLength)
684  return Archive
685           { zEntries                = files
686           , zSignature              = digSig
687           , zComment                = zipComment
688           }
689
690putArchive :: Archive -> Put
691putArchive archive = do
692  mapM_ putLocalFile $ zEntries archive
693  let localFileSizes = map localFileSize $ zEntries archive
694  let offsets = scanl (+) 0 localFileSizes
695  let cdOffset = last offsets
696  _ <- zipWithM_ putFileHeader offsets (zEntries archive)
697  putDigitalSignature $ zSignature archive
698  putWord32le 0x06054b50
699  putWord16le 0 -- disk number
700  putWord16le 0 -- disk number of central directory
701  putWord16le $ fromIntegral $ length $ zEntries archive -- number of entries this disk
702  putWord16le $ fromIntegral $ length $ zEntries archive -- number of entries
703  putWord32le $ sum $ map fileHeaderSize $ zEntries archive  -- size of central directory
704  putWord32le $ fromIntegral cdOffset                    -- offset of central dir
705  putWord16le $ fromIntegral $ B.length $ zComment archive
706  putLazyByteString $ zComment archive
707
708
709fileHeaderSize :: Entry -> Word32
710fileHeaderSize f =
711  fromIntegral $ 4 + 2 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 +
712    fromIntegral (B.length $ fromString $ normalizePath $ eRelativePath f) +
713    B.length (eExtraField f) + B.length (eFileComment f)
714
715localFileSize :: Entry -> Word32
716localFileSize f =
717  fromIntegral $ 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4 + 2 + 2 +
718    fromIntegral (B.length $ fromString $ normalizePath $ eRelativePath f) +
719    B.length (eExtraField f) + B.length (eCompressedData f)
720
721-- Local file header:
722--
723-- >    local file header signature     4 bytes  (0x04034b50)
724-- >    version needed to extract       2 bytes
725-- >    general purpose bit flag        2 bytes
726-- >    compression method              2 bytes
727-- >    last mod file time              2 bytes
728-- >    last mod file date              2 bytes
729-- >    crc-32                          4 bytes
730-- >    compressed size                 4 bytes
731-- >    uncompressed size               4 bytes
732-- >    file name length                2 bytes
733-- >    extra field length              2 bytes
734--
735-- >    file name (variable size)
736-- >    extra field (variable size)
737--
738-- Note that if bit 3 of the general purpose bit flag is set, then the
739-- compressed size will be 0 and the size will be stored instead in a
740-- data descriptor record AFTER the file contents. The record normally
741-- begins with the signature 0x08074b50, then 4 bytes crc-32, 4 bytes
742-- compressed size, 4 bytes uncompressed size.
743
744getLocalFile :: Get (Word32, B.ByteString)
745getLocalFile = do
746  offset <- bytesRead
747  getWord32le >>= ensure (== 0x04034b50)
748  skip 2  -- version
749  bitflag <- getWord16le
750  skip 2  -- compressionMethod
751  skip 2  -- last mod file time
752  skip 2  -- last mod file date
753  skip 4  -- crc32
754  compressedSize <- getWord32le
755  when (compressedSize == 0xFFFFFFFF) $
756    fail "Can't read ZIP64 archive."
757  skip 4  -- uncompressedsize
758  fileNameLength <- getWord16le
759  extraFieldLength <- getWord16le
760  skip (fromIntegral fileNameLength)  -- filename
761  skip (fromIntegral extraFieldLength) -- extra field
762  compressedData <- if bitflag .&. 0O10 == 0
763      then getLazyByteString (fromIntegral compressedSize)
764      else -- If bit 3 of general purpose bit flag is set,
765           -- then we need to read until we get to the
766           -- data descriptor record.  We assume that the
767           -- record has signature 0x08074b50; this is not required
768           -- by the specification but is common.
769           do raw <- getWordsTilSig 0x08074b50
770              skip 4 -- crc32
771              cs <- getWord32le  -- compressed size
772              skip 4 -- uncompressed size
773              if fromIntegral cs == B.length raw
774                 then return raw
775                 else fail "Content size mismatch in data descriptor record"
776  return (fromIntegral offset, compressedData)
777
778getWordsTilSig :: Word32 -> Get B.ByteString
779#if MIN_VERSION_binary(0, 6, 0)
780getWordsTilSig sig = (B.fromChunks . reverse) `fmap` go Nothing []
781  where
782    sig' = S.pack [fromIntegral $ sig .&. 0xFF,
783                   fromIntegral $ sig `shiftR`  8 .&. 0xFF,
784                   fromIntegral $ sig `shiftR` 16 .&. 0xFF,
785                   fromIntegral $ sig `shiftR` 24 .&. 0xFF]
786    chunkSize = 16384
787    --chunkSize = 4 -- for testing prefix match
788    checkChunk chunk = do -- find in content
789          let (prefix, start) = S.breakSubstring sig' chunk
790          if S.null start
791            then return $ Right chunk
792            else return $ Left $ S.length prefix
793    go :: Maybe (Word8, Word8, Word8) -> [S.ByteString] -> Get [S.ByteString]
794    go prefixes acc = do
795      -- note: lookAheadE will rewind if the result is Left
796      eitherChunkOrIndex <- lookAheadE $ do
797          chunk <- getByteString chunkSize <|> B.toStrict `fmap` getRemainingLazyByteString
798          case prefixes of
799            Just (byte3,byte2,byte1) ->
800              let len = S.length chunk in
801                if len >= 1 &&
802                   S.pack [byte3,byte2,byte1,S.index chunk 0] == sig'
803                then return $ Left $ -3
804                else if len >= 2 &&
805                   S.pack [byte2,byte1,S.index chunk 0,S.index chunk 1] == sig'
806                then return $ Left $ -2
807                else if len >= 3 &&
808                   S.pack [byte1,S.index chunk 0,S.index chunk 1,S.index chunk 2] == sig'
809                then return $ Left $ -1
810                else checkChunk chunk
811            Nothing -> checkChunk chunk
812      case eitherChunkOrIndex of
813        Left index -> if index < 0
814            then do -- prefix match
815                skip (4 + index) -- skip over partial match in next chunk
816                return $ (S.take (S.length (head acc) + index) (head acc)) : (tail acc)
817            else do -- match inside this chunk
818                lastchunk <- getByteString index -- must read again
819                skip 4
820                return (lastchunk:acc)
821        Right chunk -> if len == chunkSize
822            then go prefixes' (chunk:acc)
823            else fail $ "getWordsTilSig: signature not found before EOF"
824          where
825            len = S.length chunk
826            prefixes' = Just $ (S.index chunk (len - 3), S.index chunk (len - 2), S.index chunk (len - 1))
827#else
828getWordsTilSig sig = B.pack `fmap` go []
829  where
830    go acc = do
831      sig' <- lookAhead getWord32le
832      if sig == sig'
833          then skip 4 >> return (reverse acc)
834          else do
835              w <- getWord8
836              go (w:acc)
837#endif
838
839putLocalFile :: Entry -> Put
840putLocalFile f = do
841  putWord32le 0x04034b50
842  putWord16le 20 -- version needed to extract (>=2.0)
843  putWord16le 0x802  -- general purpose bit flag (bit 1 = max compression, bit 11 = UTF-8)
844  putWord16le $ case eCompressionMethod f of
845                     NoCompression -> 0
846                     Deflate       -> 8
847  let modTime = epochTimeToMSDOSDateTime $ eLastModified f
848  putWord16le $ msDOSTime modTime
849  putWord16le $ msDOSDate modTime
850  putWord32le $ eCRC32 f
851  putWord32le $ eCompressedSize f
852  putWord32le $ eUncompressedSize f
853  putWord16le $ fromIntegral $ B.length $ fromString
854              $ normalizePath $ eRelativePath f
855  putWord16le $ fromIntegral $ B.length $ eExtraField f
856  putLazyByteString $ fromString $ normalizePath $ eRelativePath f
857  putLazyByteString $ eExtraField f
858  putLazyByteString $ eCompressedData f
859
860-- File header structure:
861--
862-- >    central file header signature   4 bytes  (0x02014b50)
863-- >    version made by                 2 bytes
864-- >    version needed to extract       2 bytes
865-- >    general purpose bit flag        2 bytes
866-- >    compression method              2 bytes
867-- >    last mod file time              2 bytes
868-- >    last mod file date              2 bytes
869-- >    crc-32                          4 bytes
870-- >    compressed size                 4 bytes
871-- >    uncompressed size               4 bytes
872-- >    file name length                2 bytes
873-- >    extra field length              2 bytes
874-- >    file comment length             2 bytes
875-- >    disk number start               2 bytes
876-- >    internal file attributes        2 bytes
877-- >    external file attributes        4 bytes
878-- >    relative offset of local header 4 bytes
879--
880-- >    file name (variable size)
881-- >    extra field (variable size)
882-- >    file comment (variable size)
883
884getFileHeader :: M.Map Word32 B.ByteString -- ^ map of (offset, content) pairs returned by getLocalFile
885              -> Get Entry
886getFileHeader locals = do
887  getWord32le >>= ensure (== 0x02014b50)
888  vmb <- getWord16le  -- version made by
889  versionNeededToExtract <- getWord8
890  skip 1 -- upper byte indicates OS part of "version needed to extract"
891  unless (versionNeededToExtract <= 20) $
892    fail "This archive requires zip >= 2.0 to extract."
893  bitflag <- getWord16le
894  rawCompressionMethod <- getWord16le
895  compressionMethod <- case rawCompressionMethod of
896                        0 -> return NoCompression
897                        8 -> return Deflate
898                        _ -> fail $ "Unknown compression method " ++ show rawCompressionMethod
899  lastModFileTime <- getWord16le
900  lastModFileDate <- getWord16le
901  crc32 <- getWord32le
902  encryptionMethod <- case (testBit bitflag 0, testBit bitflag 3, testBit bitflag 6) of
903                        (False, _, _) -> return NoEncryption
904                        (True, False, False) -> return $ PKWAREEncryption (fromIntegral (crc32 `shiftR` 24))
905                        (True, True, False) -> return $ PKWAREEncryption (fromIntegral (lastModFileTime `shiftR` 8))
906                        (True, _, True) -> fail "Strong encryption is not supported"
907
908  compressedSize <- getWord32le
909  uncompressedSize <- getWord32le
910  fileNameLength <- getWord16le
911  extraFieldLength <- getWord16le
912  fileCommentLength <- getWord16le
913  skip 2 -- disk number start
914  internalFileAttributes <- getWord16le
915  externalFileAttributes <- getWord32le
916  relativeOffset <- getWord32le
917  fileName <- getLazyByteString (toEnum $ fromEnum fileNameLength)
918  extraField <- getLazyByteString (toEnum $ fromEnum extraFieldLength)
919  fileComment <- getLazyByteString (toEnum $ fromEnum fileCommentLength)
920  compressedData <- case M.lookup relativeOffset locals of
921                    Just x  -> return x
922                    Nothing -> fail $ "Unable to find data at offset " ++
923                                        show relativeOffset
924  return Entry
925            { eRelativePath            = toString fileName
926            , eCompressionMethod       = compressionMethod
927            , eEncryptionMethod        = encryptionMethod
928            , eLastModified            = msDOSDateTimeToEpochTime $
929                                         MSDOSDateTime { msDOSDate = lastModFileDate,
930                                                         msDOSTime = lastModFileTime }
931            , eCRC32                   = crc32
932            , eCompressedSize          = compressedSize
933            , eUncompressedSize        = uncompressedSize
934            , eExtraField              = extraField
935            , eFileComment             = fileComment
936            , eVersionMadeBy           = vmb
937            , eInternalFileAttributes  = internalFileAttributes
938            , eExternalFileAttributes  = externalFileAttributes
939            , eCompressedData          = compressedData
940            }
941
942putFileHeader :: Word32        -- ^ offset
943              -> Entry
944              -> Put
945putFileHeader offset local = do
946  putWord32le 0x02014b50
947  putWord16le $ eVersionMadeBy local
948  putWord16le 20 -- version needed to extract (>= 2.0)
949  putWord16le 0x802  -- general purpose bit flag (bit 1 = max compression, bit 11 = UTF-8)
950  putWord16le $ case eCompressionMethod local of
951                     NoCompression -> 0
952                     Deflate       -> 8
953  let modTime = epochTimeToMSDOSDateTime $ eLastModified local
954  putWord16le $ msDOSTime modTime
955  putWord16le $ msDOSDate modTime
956  putWord32le $ eCRC32 local
957  putWord32le $ eCompressedSize local
958  putWord32le $ eUncompressedSize local
959  putWord16le $ fromIntegral $ B.length $ fromString
960              $ normalizePath $ eRelativePath local
961  putWord16le $ fromIntegral $ B.length $ eExtraField local
962  putWord16le $ fromIntegral $ B.length $ eFileComment local
963  putWord16le 0  -- disk number start
964  putWord16le $ eInternalFileAttributes local
965  putWord32le $ eExternalFileAttributes local
966  putWord32le offset
967  putLazyByteString $ fromString $ normalizePath $ eRelativePath local
968  putLazyByteString $ eExtraField local
969  putLazyByteString $ eFileComment local
970
971--  Digital signature:
972--
973-- >     header signature                4 bytes  (0x05054b50)
974-- >     size of data                    2 bytes
975-- >     signature data (variable size)
976
977#if MIN_VERSION_binary(0,6,0)
978getDigitalSignature :: Get B.ByteString
979getDigitalSignature = do
980  getWord32le >>= ensure (== 0x05054b50)
981  sigSize <- getWord16le
982  getLazyByteString (toEnum $ fromEnum sigSize)
983#else
984getDigitalSignature :: Get (Maybe B.ByteString)
985getDigitalSignature = do
986  hdrSig <- getWord32le
987  if hdrSig /= 0x05054b50
988     then return Nothing
989     else do
990        sigSize <- getWord16le
991        getLazyByteString (toEnum $ fromEnum sigSize) >>= return . Just
992#endif
993
994putDigitalSignature :: Maybe B.ByteString -> Put
995putDigitalSignature Nothing = return ()
996putDigitalSignature (Just sig) = do
997  putWord32le 0x05054b50
998  putWord16le $ fromIntegral $ B.length sig
999  putLazyByteString sig
1000
1001ensure :: (a -> Bool) -> a -> Get ()
1002ensure p val =
1003  if p val
1004     then return ()
1005     else fail "ensure not satisfied"
1006
1007toString :: B.ByteString -> String
1008toString = TL.unpack . TL.decodeUtf8
1009
1010fromString :: String -> B.ByteString
1011fromString = TL.encodeUtf8 . TL.pack
1012