1{-# LANGUAGE CPP #-} 2{-# LANGUAGE DeriveDataTypeable #-} 3{-# LANGUAGE ViewPatterns #-} 4------------------------------------------------------------------------ 5-- | 6-- Module : Codec.Archive.Zip 7-- Copyright : John MacFarlane 8-- License : BSD3 9-- 10-- Maintainer : John MacFarlane < jgm at berkeley dot edu > 11-- Stability : unstable 12-- Portability : so far only tested on GHC 13-- 14-- The zip-archive library provides functions for creating, modifying, 15-- and extracting files from zip archives. 16-- 17-- Certain simplifying assumptions are made about the zip archives: in 18-- particular, there is no support for strong encryption, zip files that span 19-- multiple disks, ZIP64, OS-specific file attributes, or compression 20-- methods other than Deflate. However, the library should be able to 21-- read the most common zip archives, and the archives it produces should 22-- be readable by all standard unzip programs. 23-- 24-- As an example of the use of the library, a standalone zip archiver 25-- and extracter, Zip.hs, is provided in the source distribution. 26-- 27-- For more information on the format of zip archives, consult 28-- <http://www.pkware.com/documents/casestudies/APPNOTE.TXT> 29------------------------------------------------------------------------ 30 31module Codec.Archive.Zip 32 ( 33 34 -- * Data structures 35 Archive (..) 36 , Entry (..) 37 , CompressionMethod (..) 38 , EncryptionMethod (..) 39 , ZipOption (..) 40 , ZipException (..) 41 , emptyArchive 42 43 -- * Pure functions for working with zip archives 44 , toArchive 45 , toArchiveOrFail 46 , fromArchive 47 , filesInArchive 48 , addEntryToArchive 49 , deleteEntryFromArchive 50 , findEntryByPath 51 , fromEntry 52 , fromEncryptedEntry 53 , isEncryptedEntry 54 , toEntry 55#ifndef _WINDOWS 56 , isEntrySymbolicLink 57 , symbolicLinkEntryTarget 58 , entryCMode 59#endif 60 61 -- * IO functions for working with zip archives 62 , readEntry 63 , writeEntry 64#ifndef _WINDOWS 65 , writeSymbolicLinkEntry 66#endif 67 , addFilesToArchive 68 , extractFilesFromArchive 69 70 ) where 71 72import Data.Time.Calendar ( toGregorian, fromGregorian ) 73import Data.Time.Clock ( UTCTime(..) ) 74import Data.Time.Clock.POSIX ( posixSecondsToUTCTime, utcTimeToPOSIXSeconds ) 75import Data.Time.LocalTime ( TimeOfDay(..), timeToTimeOfDay ) 76import Data.Bits ( shiftL, shiftR, (.&.), (.|.), xor, testBit ) 77import Data.Binary 78import Data.Binary.Get 79import Data.Binary.Put 80import Data.List (nub, find, intercalate) 81import Data.Data (Data) 82import Data.Typeable (Typeable) 83import Text.Printf 84import System.FilePath 85import System.Directory 86 (doesDirectoryExist, getDirectoryContents, 87 createDirectoryIfMissing, getModificationTime) 88import Control.Monad ( when, unless, zipWithM_ ) 89import qualified Control.Exception as E 90import System.IO ( stderr, hPutStrLn ) 91import qualified Data.Digest.CRC32 as CRC32 92import qualified Data.Map as M 93#if MIN_VERSION_binary(0,6,0) 94import Control.Applicative 95#endif 96#ifndef _WINDOWS 97import System.Posix.Files ( setFileTimes, setFileMode, fileMode, getSymbolicLinkStatus, symbolicLinkMode, readSymbolicLink, isSymbolicLink, unionFileModes, createSymbolicLink ) 98import System.Posix.Types ( CMode(..) ) 99import Data.List (partition) 100import Data.Maybe (fromJust) 101#endif 102 103-- from bytestring 104import qualified Data.ByteString as S 105import qualified Data.ByteString.Lazy as B 106import qualified Data.ByteString.Lazy.Char8 as C 107 108-- text 109import qualified Data.Text.Lazy as TL 110import qualified Data.Text.Lazy.Encoding as TL 111 112-- from zlib 113import qualified Codec.Compression.Zlib.Raw as Zlib 114 115#if !MIN_VERSION_binary(0, 6, 0) 116manySig :: Word32 -> Get a -> Get [a] 117manySig sig p = do 118 sig' <- lookAhead getWord32le 119 if sig == sig' 120 then do 121 r <- p 122 rs <- manySig sig p 123 return $ r : rs 124 else return [] 125#endif 126 127 128------------------------------------------------------------------------ 129 130-- | Structured representation of a zip archive, including directory 131-- information and contents (in lazy bytestrings). 132data Archive = Archive 133 { zEntries :: [Entry] -- ^ Files in zip archive 134 , zSignature :: Maybe B.ByteString -- ^ Digital signature 135 , zComment :: B.ByteString -- ^ Comment for whole zip archive 136 } deriving (Read, Show) 137 138instance Binary Archive where 139 put = putArchive 140 get = getArchive 141 142-- | Representation of an archived file, including content and metadata. 143data Entry = Entry 144 { eRelativePath :: FilePath -- ^ Relative path, using '/' as separator 145 , eCompressionMethod :: CompressionMethod -- ^ Compression method 146 , eEncryptionMethod :: EncryptionMethod -- ^ Encryption method 147 , eLastModified :: Integer -- ^ Modification time (seconds since unix epoch) 148 , eCRC32 :: Word32 -- ^ CRC32 checksum 149 , eCompressedSize :: Word32 -- ^ Compressed size in bytes 150 , eUncompressedSize :: Word32 -- ^ Uncompressed size in bytes 151 , eExtraField :: B.ByteString -- ^ Extra field - unused by this library 152 , eFileComment :: B.ByteString -- ^ File comment - unused by this library 153 , eVersionMadeBy :: Word16 -- ^ Version made by field 154 , eInternalFileAttributes :: Word16 -- ^ Internal file attributes - unused by this library 155 , eExternalFileAttributes :: Word32 -- ^ External file attributes (system-dependent) 156 , eCompressedData :: B.ByteString -- ^ Compressed contents of file 157 } deriving (Read, Show, Eq) 158 159-- | Compression methods. 160data CompressionMethod = Deflate 161 | NoCompression 162 deriving (Read, Show, Eq) 163 164data EncryptionMethod = NoEncryption -- ^ Entry is not encrypted 165 | PKWAREEncryption Word8 -- ^ Entry is encrypted with the traditional PKWARE encryption 166 deriving (Read, Show, Eq) 167 168-- | The way the password should be verified during entry decryption 169data PKWAREVerificationType = CheckTimeByte 170 | CheckCRCByte 171 deriving (Read, Show, Eq) 172 173-- | Options for 'addFilesToArchive' and 'extractFilesFromArchive'. 174data ZipOption = OptRecursive -- ^ Recurse into directories when adding files 175 | OptVerbose -- ^ Print information to stderr 176 | OptDestination FilePath -- ^ Directory in which to extract 177 | OptLocation FilePath Bool -- ^ Where to place file when adding files and whether to append current path 178 | OptPreserveSymbolicLinks -- ^ Preserve symbolic links as such. This option is ignored on Windows. 179 deriving (Read, Show, Eq) 180 181data ZipException = 182 CRC32Mismatch FilePath 183 | UnsafePath FilePath 184 | CannotWriteEncryptedEntry FilePath 185 deriving (Show, Typeable, Data, Eq) 186 187instance E.Exception ZipException 188 189-- | A zip archive with no contents. 190emptyArchive :: Archive 191emptyArchive = Archive 192 { zEntries = [] 193 , zSignature = Nothing 194 , zComment = B.empty } 195 196-- | Reads an 'Archive' structure from a raw zip archive (in a lazy bytestring). 197toArchive :: B.ByteString -> Archive 198toArchive = decode 199 200-- | Like 'toArchive', but returns an 'Either' value instead of raising an 201-- error if the archive cannot be decoded. NOTE: This function only 202-- works properly when the library is compiled against binary >= 0.7. 203-- With earlier versions, it will always return a Right value, 204-- raising an error if parsing fails. 205toArchiveOrFail :: B.ByteString -> Either String Archive 206#if MIN_VERSION_binary(0,7,0) 207toArchiveOrFail bs = case decodeOrFail bs of 208 Left (_,_,e) -> Left e 209 Right (_,_,x) -> Right x 210#else 211toArchiveOrFail bs = Right $ toArchive bs 212#endif 213 214-- | Writes an 'Archive' structure to a raw zip archive (in a lazy bytestring). 215fromArchive :: Archive -> B.ByteString 216fromArchive = encode 217 218-- | Returns a list of files in a zip archive. 219filesInArchive :: Archive -> [FilePath] 220filesInArchive = map eRelativePath . zEntries 221 222-- | Adds an entry to a zip archive, or updates an existing entry. 223addEntryToArchive :: Entry -> Archive -> Archive 224addEntryToArchive entry archive = 225 let archive' = deleteEntryFromArchive (eRelativePath entry) archive 226 oldEntries = zEntries archive' 227 in archive' { zEntries = entry : oldEntries } 228 229-- | Deletes an entry from a zip archive. 230deleteEntryFromArchive :: FilePath -> Archive -> Archive 231deleteEntryFromArchive path archive = 232 archive { zEntries = [e | e <- zEntries archive 233 , not (eRelativePath e `matches` path)] } 234 235-- | Returns Just the zip entry with the specified path, or Nothing. 236findEntryByPath :: FilePath -> Archive -> Maybe Entry 237findEntryByPath path archive = 238 find (\e -> path `matches` eRelativePath e) (zEntries archive) 239 240-- | Returns uncompressed contents of zip entry. 241fromEntry :: Entry -> B.ByteString 242fromEntry entry = 243 decompressData (eCompressionMethod entry) (eCompressedData entry) 244 245-- | Returns decrypted and uncompressed contents of zip entry. 246fromEncryptedEntry :: String -> Entry -> Maybe B.ByteString 247fromEncryptedEntry password entry = 248 decompressData (eCompressionMethod entry) <$> decryptData password (eEncryptionMethod entry) (eCompressedData entry) 249 250-- | Check if an 'Entry' is encrypted 251isEncryptedEntry :: Entry -> Bool 252isEncryptedEntry entry = 253 case eEncryptionMethod entry of 254 (PKWAREEncryption _) -> True 255 _ -> False 256 257-- | Create an 'Entry' with specified file path, modification time, and contents. 258toEntry :: FilePath -- ^ File path for entry 259 -> Integer -- ^ Modification time for entry (seconds since unix epoch) 260 -> B.ByteString -- ^ Contents of entry 261 -> Entry 262toEntry path modtime contents = 263 let uncompressedSize = B.length contents 264 compressedData = compressData Deflate contents 265 compressedSize = B.length compressedData 266 -- only use compression if it helps! 267 (compressionMethod, finalData, finalSize) = 268 if uncompressedSize <= compressedSize 269 then (NoCompression, contents, uncompressedSize) 270 else (Deflate, compressedData, compressedSize) 271 crc32 = CRC32.crc32 contents 272 in Entry { eRelativePath = normalizePath path 273 , eCompressionMethod = compressionMethod 274 , eEncryptionMethod = NoEncryption 275 , eLastModified = modtime 276 , eCRC32 = crc32 277 , eCompressedSize = fromIntegral finalSize 278 , eUncompressedSize = fromIntegral uncompressedSize 279 , eExtraField = B.empty 280 , eFileComment = B.empty 281 , eVersionMadeBy = 0 -- FAT 282 , eInternalFileAttributes = 0 -- potentially non-text 283 , eExternalFileAttributes = 0 -- appropriate if from stdin 284 , eCompressedData = finalData 285 } 286 287-- | Generates a 'Entry' from a file or directory. 288readEntry :: [ZipOption] -> FilePath -> IO Entry 289readEntry opts path = do 290 isDir <- doesDirectoryExist path 291#ifdef _WINDOWS 292 let isSymLink = False 293#else 294 fs <- getSymbolicLinkStatus path 295 let isSymLink = isSymbolicLink fs 296#endif 297 -- make sure directories end in / and deal with the OptLocation option 298 let path' = let p = path ++ (case reverse path of 299 ('/':_) -> "" 300 _ | isDir && not isSymLink -> "/" 301 _ | isDir && isSymLink -> "" 302 | otherwise -> "") in 303 (case [(l,a) | OptLocation l a <- opts] of 304 ((l,a):_) -> if a then l </> p else l </> takeFileName p 305 _ -> p) 306 contents <- 307#ifndef _WINDOWS 308 if isSymLink 309 then do 310 linkTarget <- readSymbolicLink path 311 return $ C.pack linkTarget 312 else 313#endif 314 if isDir 315 then 316 return B.empty 317 else 318 B.fromStrict <$> S.readFile path 319 modEpochTime <- (floor . utcTimeToPOSIXSeconds) <$> getModificationTime path 320 let entry = toEntry path' modEpochTime contents 321 322 entryE <- 323#ifdef _WINDOWS 324 return $ entry { eVersionMadeBy = 0x0000 } -- FAT/VFAT/VFAT32 file attributes 325#else 326 do 327 let fm = if isSymLink 328 then unionFileModes symbolicLinkMode (fileMode fs) 329 else fileMode fs 330 331 let modes = fromIntegral $ shiftL (toInteger fm) 16 332 return $ entry { eExternalFileAttributes = modes, 333 eVersionMadeBy = 0x0300 } -- UNIX file attributes 334#endif 335 336 when (OptVerbose `elem` opts) $ do 337 let compmethod = case eCompressionMethod entryE of 338 Deflate -> "deflated" 339 NoCompression -> "stored" 340 hPutStrLn stderr $ 341 printf " adding: %s (%s %.f%%)" (eRelativePath entryE) 342 compmethod (100 - (100 * compressionRatio entryE)) 343 return entryE 344 345-- | Writes contents of an 'Entry' to a file. Throws a 346-- 'CRC32Mismatch' exception if the CRC32 checksum for the entry 347-- does not match the uncompressed data. 348writeEntry :: [ZipOption] -> Entry -> IO () 349writeEntry opts entry = do 350 when (isEncryptedEntry entry) $ 351 E.throwIO $ CannotWriteEncryptedEntry (eRelativePath entry) 352 let relpath = eRelativePath entry 353 let isUnsafePath = ".." `elem` splitDirectories relpath 354 when isUnsafePath $ 355 E.throwIO $ UnsafePath relpath 356 path <- case [d | OptDestination d <- opts] of 357 (x:_) -> return (x </> relpath) 358 _ | isAbsolute relpath 359 -> E.throwIO $ UnsafePath relpath 360 | otherwise 361 -> return relpath 362 -- create directories if needed 363 let dir = takeDirectory path 364 exists <- doesDirectoryExist dir 365 unless exists $ do 366 createDirectoryIfMissing True dir 367 when (OptVerbose `elem` opts) $ 368 hPutStrLn stderr $ " creating: " ++ dir 369 if not (null path) && last path == '/' -- path is a directory 370 then return () 371 else do 372 when (OptVerbose `elem` opts) $ 373 hPutStrLn stderr $ case eCompressionMethod entry of 374 Deflate -> " inflating: " ++ path 375 NoCompression -> "extracting: " ++ path 376 let uncompressedData = fromEntry entry 377 if eCRC32 entry == CRC32.crc32 uncompressedData 378 then B.writeFile path uncompressedData 379 else E.throwIO $ CRC32Mismatch path 380#ifndef _WINDOWS 381 let modes = fromIntegral $ shiftR (eExternalFileAttributes entry) 16 382 when (eVersionMadeBy entry .&. 0xFF00 == 0x0300 && 383 modes /= 0) $ setFileMode path modes 384#endif 385 -- Note that last modified times are supported only for POSIX, not for 386 -- Windows. 387 setFileTimeStamp path (eLastModified entry) 388 389#ifndef _WINDOWS 390-- | Write an 'Entry' representing a symbolic link to a file. 391-- If the 'Entry' does not represent a symbolic link or 392-- the options do not contain 'OptPreserveSymbolicLinks`, this 393-- function behaves like `writeEntry`. 394writeSymbolicLinkEntry :: [ZipOption] -> Entry -> IO () 395writeSymbolicLinkEntry opts entry = 396 if OptPreserveSymbolicLinks `notElem` opts 397 then writeEntry opts entry 398 else do 399 if isEntrySymbolicLink entry 400 then do 401 let prefixPath = case [d | OptDestination d <- opts] of 402 (x:_) -> x 403 _ -> "" 404 let targetPath = fromJust . symbolicLinkEntryTarget $ entry 405 let symlinkPath = prefixPath </> eRelativePath entry 406 when (OptVerbose `elem` opts) $ do 407 hPutStrLn stderr $ "linking " ++ symlinkPath ++ " to " ++ targetPath 408 createSymbolicLink targetPath symlinkPath 409 else writeEntry opts entry 410 411 412-- | Get the target of a 'Entry' representing a symbolic link. This might fail 413-- if the 'Entry' does not represent a symbolic link 414symbolicLinkEntryTarget :: Entry -> Maybe FilePath 415symbolicLinkEntryTarget entry | isEntrySymbolicLink entry = Just . C.unpack $ fromEntry entry 416 | otherwise = Nothing 417 418-- | Check if an 'Entry' represents a symbolic link 419isEntrySymbolicLink :: Entry -> Bool 420isEntrySymbolicLink entry = entryCMode entry .&. symbolicLinkMode == symbolicLinkMode 421 422-- | Get the 'eExternalFileAttributes' of an 'Entry' as a 'CMode' a.k.a. 'FileMode' 423entryCMode :: Entry -> CMode 424entryCMode entry = CMode (fromIntegral $ shiftR (eExternalFileAttributes entry) 16) 425#endif 426 427-- | Add the specified files to an 'Archive'. If 'OptRecursive' is specified, 428-- recursively add files contained in directories. if 'OptPreserveSymbolicLinks' 429-- is specified, don't recurse into it. If 'OptVerbose' is specified, 430-- print messages to stderr. 431addFilesToArchive :: [ZipOption] -> Archive -> [FilePath] -> IO Archive 432addFilesToArchive opts archive files = do 433 filesAndChildren <- if OptRecursive `elem` opts 434#ifdef _WINDOWS 435 then mapM getDirectoryContentsRecursive files >>= return . nub . concat 436#else 437 then nub . concat <$> mapM (getDirectoryContentsRecursive' opts) files 438#endif 439 else return files 440 entries <- mapM (readEntry opts) filesAndChildren 441 return $ foldr addEntryToArchive archive entries 442 443-- | Extract all files from an 'Archive', creating directories 444-- as needed. If 'OptVerbose' is specified, print messages to stderr. 445-- Note that the last-modified time is set correctly only in POSIX, 446-- not in Windows. 447-- This function fails if encrypted entries are present 448extractFilesFromArchive :: [ZipOption] -> Archive -> IO () 449extractFilesFromArchive opts archive = do 450 let entries = zEntries archive 451 if OptPreserveSymbolicLinks `elem` opts 452 then do 453#ifdef _WINDOWS 454 mapM_ (writeEntry opts) entries 455#else 456 let (symbolicLinkEntries, nonSymbolicLinkEntries) = partition isEntrySymbolicLink entries 457 mapM_ (writeEntry opts) nonSymbolicLinkEntries 458 mapM_ (writeSymbolicLinkEntry opts) symbolicLinkEntries 459#endif 460 else mapM_ (writeEntry opts) entries 461 462-------------------------------------------------------------------------------- 463-- Internal functions for reading and writing zip binary format. 464 465-- Note that even on Windows, zip files use "/" internally as path separator. 466normalizePath :: FilePath -> String 467normalizePath path = 468 let dir = takeDirectory path 469 fn = takeFileName path 470 (_drive, dir') = splitDrive dir 471 -- note: some versions of filepath return ["."] if no dir 472 dirParts = filter (/=".") $ splitDirectories dir' 473 in intercalate "/" (dirParts ++ [fn]) 474 475-- Equality modulo normalization. So, "./foo" `matches` "foo". 476matches :: FilePath -> FilePath -> Bool 477matches fp1 fp2 = normalizePath fp1 == normalizePath fp2 478 479-- | Uncompress a lazy bytestring. 480compressData :: CompressionMethod -> B.ByteString -> B.ByteString 481compressData Deflate = Zlib.compress 482compressData NoCompression = id 483 484-- | Compress a lazy bytestring. 485decompressData :: CompressionMethod -> B.ByteString -> B.ByteString 486decompressData Deflate = Zlib.decompress 487decompressData NoCompression = id 488 489-- | Decrypt a lazy bytestring 490-- Returns Nothing if password is incorrect 491decryptData :: String -> EncryptionMethod -> B.ByteString -> Maybe B.ByteString 492decryptData _ NoEncryption s = Just s 493decryptData password (PKWAREEncryption controlByte) s = 494 let headerlen = 12 495 initKeys = (305419896, 591751049, 878082192) 496 startKeys = B.foldl pkwareUpdateKeys initKeys (C.pack password) 497 (header, content) = B.splitAt headerlen $ snd $ B.mapAccumL pkwareDecryptByte startKeys s 498 in if B.last header == controlByte 499 then Just content 500 else Nothing 501 502-- | PKWARE decryption context 503type DecryptionCtx = (Word32, Word32, Word32) 504 505-- | An interation of the PKWARE decryption algorithm 506pkwareDecryptByte :: DecryptionCtx -> Word8 -> (DecryptionCtx, Word8) 507pkwareDecryptByte keys@(_, _, key2) inB = 508 let tmp = key2 .|. 2 509 tmp' = fromIntegral ((tmp * (tmp `xor` 1)) `shiftR` 8) :: Word8 510 outB = inB `xor` tmp' 511 in (pkwareUpdateKeys keys outB, outB) 512 513-- | Update decryption keys after a decrypted byte 514pkwareUpdateKeys :: DecryptionCtx -> Word8 -> DecryptionCtx 515pkwareUpdateKeys (key0, key1, key2) inB = 516 let key0' = CRC32.crc32Update (key0 `xor` 0xffffffff) [inB] `xor` 0xffffffff 517 key1' = (key1 + (key0' .&. 0xff)) * 134775813 + 1 518 key1Byte = fromIntegral (key1' `shiftR` 24) :: Word8 519 key2' = CRC32.crc32Update (key2 `xor` 0xffffffff) [key1Byte] `xor` 0xffffffff 520 in (key0', key1', key2') 521 522-- | Calculate compression ratio for an entry (for verbose output). 523compressionRatio :: Entry -> Float 524compressionRatio entry = 525 if eUncompressedSize entry == 0 526 then 1 527 else fromIntegral (eCompressedSize entry) / fromIntegral (eUncompressedSize entry) 528 529-- | MSDOS datetime: a pair of Word16s (date, time) with the following structure: 530-- 531-- > DATE bit 0 - 4 5 - 8 9 - 15 532-- > value day (1 - 31) month (1 - 12) years from 1980 533-- > TIME bit 0 - 4 5 - 10 11 - 15 534-- > value seconds* minute hour 535-- > *stored in two-second increments 536-- 537data MSDOSDateTime = MSDOSDateTime { msDOSDate :: Word16 538 , msDOSTime :: Word16 539 } deriving (Read, Show, Eq) 540 541-- | Epoch time corresponding to the minimum DOS DateTime (Jan 1 1980 00:00:00). 542minMSDOSDateTime :: Integer 543minMSDOSDateTime = 315532800 544 545-- | Convert a clock time to a MSDOS datetime. The MSDOS time will be relative to UTC. 546epochTimeToMSDOSDateTime :: Integer -> MSDOSDateTime 547epochTimeToMSDOSDateTime epochtime | epochtime < minMSDOSDateTime = 548 epochTimeToMSDOSDateTime minMSDOSDateTime 549 -- if time is earlier than minimum DOS datetime, return minimum 550epochTimeToMSDOSDateTime epochtime = 551 let 552 UTCTime 553 (toGregorian -> (fromInteger -> year, month, day)) 554 (timeToTimeOfDay -> (TimeOfDay hour minutes (floor -> sec))) 555 = posixSecondsToUTCTime (fromIntegral epochtime) 556 557 dosTime = toEnum $ (sec `div` 2) + shiftL minutes 5 + shiftL hour 11 558 dosDate = toEnum $ day + shiftL month 5 + shiftL (year - 1980) 9 559 in MSDOSDateTime { msDOSDate = dosDate, msDOSTime = dosTime } 560 561-- | Convert a MSDOS datetime to a 'ClockTime'. 562msDOSDateTimeToEpochTime :: MSDOSDateTime -> Integer 563msDOSDateTimeToEpochTime MSDOSDateTime {msDOSDate = dosDate, msDOSTime = dosTime} = 564 let seconds = fromIntegral $ 2 * (dosTime .&. 0O37) 565 minutes = fromIntegral $ shiftR dosTime 5 .&. 0O77 566 hour = fromIntegral $ shiftR dosTime 11 567 day = fromIntegral $ dosDate .&. 0O37 568 month = fromIntegral ((shiftR dosDate 5) .&. 0O17) 569 year = fromIntegral $ shiftR dosDate 9 570 utc = UTCTime (fromGregorian (1980 + year) month day) (3600 * hour + 60 * minutes + seconds) 571 in floor (utcTimeToPOSIXSeconds utc) 572 573#ifndef _WINDOWS 574getDirectoryContentsRecursive' :: [ZipOption] -> FilePath -> IO [FilePath] 575getDirectoryContentsRecursive' opts path = 576 if OptPreserveSymbolicLinks `elem` opts 577 then do 578 isDir <- doesDirectoryExist path 579 if isDir 580 then do 581 isSymLink <- fmap isSymbolicLink $ getSymbolicLinkStatus path 582 if isSymLink 583 then return [path] 584 else getDirectoryContentsRecursivelyBy (getDirectoryContentsRecursive' opts) path 585 else return [path] 586 else getDirectoryContentsRecursive path 587#endif 588 589getDirectoryContentsRecursive :: FilePath -> IO [FilePath] 590getDirectoryContentsRecursive path = do 591 isDir <- doesDirectoryExist path 592 if isDir 593 then getDirectoryContentsRecursivelyBy getDirectoryContentsRecursive path 594 else return [path] 595 596getDirectoryContentsRecursivelyBy :: (FilePath -> IO [FilePath]) -> FilePath -> IO [FilePath] 597getDirectoryContentsRecursivelyBy exploreMethod path = do 598 contents <- getDirectoryContents path 599 let contents' = map (path </>) $ filter (`notElem` ["..","."]) contents 600 children <- mapM exploreMethod contents' 601 if path == "." 602 then return (concat children) 603 else return (path : concat children) 604 605 606setFileTimeStamp :: FilePath -> Integer -> IO () 607#ifdef _WINDOWS 608setFileTimeStamp _ _ = return () -- TODO: figure out how to set the timestamp on Windows 609#else 610setFileTimeStamp file epochtime = do 611 let epochtime' = fromInteger epochtime 612 setFileTimes file epochtime' epochtime' 613#endif 614 615-- A zip file has the following format (*'d items are not supported in this implementation): 616-- 617-- > [local file header 1] 618-- > [file data 1] 619-- > [data descriptor 1*] 620-- > . 621-- > . 622-- > . 623-- > [local file header n] 624-- > [file data n] 625-- > [data descriptor n*] 626-- > [archive decryption header*] 627-- > [archive extra data record*] 628-- > [central directory] 629-- > [zip64 end of central directory record*] 630-- > [zip64 end of central directory locator*] 631-- > [end of central directory record] 632-- 633-- Files stored in arbitrary order. All values are stored in 634-- little-endian byte order unless otherwise specified. 635-- 636-- Central directory structure: 637-- 638-- > [file header 1] 639-- > . 640-- > . 641-- > . 642-- > [file header n] 643-- > [digital signature] 644-- 645-- End of central directory record: 646-- 647-- > end of central dir signature 4 bytes (0x06054b50) 648-- > number of this disk 2 bytes 649-- > number of the disk with the 650-- > start of the central directory 2 bytes 651-- > total number of entries in the 652-- > central directory on this disk 2 bytes 653-- > total number of entries in 654-- > the central directory 2 bytes 655-- > size of the central directory 4 bytes 656-- > offset of start of central 657-- > directory with respect to 658-- > the starting disk number 4 bytes 659-- > .ZIP file comment length 2 bytes 660-- > .ZIP file comment (variable size) 661 662getArchive :: Get Archive 663getArchive = do 664#if MIN_VERSION_binary(0,6,0) 665 locals <- many getLocalFile 666 files <- many (getFileHeader (M.fromList locals)) 667 digSig <- Just `fmap` getDigitalSignature <|> return Nothing 668#else 669 locals <- manySig 0x04034b50 getLocalFile 670 files <- manySig 0x02014b50 (getFileHeader (M.fromList locals)) 671 digSig <- lookAheadM getDigitalSignature 672#endif 673 endSig <- getWord32le 674 unless (endSig == 0x06054b50) 675 $ fail "Did not find end of central directory signature" 676 skip 2 -- disk number 677 skip 2 -- disk number of central directory 678 skip 2 -- num entries on this disk 679 skip 2 -- num entries in central directory 680 skip 4 -- central directory size 681 skip 4 -- offset of central directory 682 commentLength <- getWord16le 683 zipComment <- getLazyByteString (toEnum $ fromEnum commentLength) 684 return Archive 685 { zEntries = files 686 , zSignature = digSig 687 , zComment = zipComment 688 } 689 690putArchive :: Archive -> Put 691putArchive archive = do 692 mapM_ putLocalFile $ zEntries archive 693 let localFileSizes = map localFileSize $ zEntries archive 694 let offsets = scanl (+) 0 localFileSizes 695 let cdOffset = last offsets 696 _ <- zipWithM_ putFileHeader offsets (zEntries archive) 697 putDigitalSignature $ zSignature archive 698 putWord32le 0x06054b50 699 putWord16le 0 -- disk number 700 putWord16le 0 -- disk number of central directory 701 putWord16le $ fromIntegral $ length $ zEntries archive -- number of entries this disk 702 putWord16le $ fromIntegral $ length $ zEntries archive -- number of entries 703 putWord32le $ sum $ map fileHeaderSize $ zEntries archive -- size of central directory 704 putWord32le $ fromIntegral cdOffset -- offset of central dir 705 putWord16le $ fromIntegral $ B.length $ zComment archive 706 putLazyByteString $ zComment archive 707 708 709fileHeaderSize :: Entry -> Word32 710fileHeaderSize f = 711 fromIntegral $ 4 + 2 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 712 fromIntegral (B.length $ fromString $ normalizePath $ eRelativePath f) + 713 B.length (eExtraField f) + B.length (eFileComment f) 714 715localFileSize :: Entry -> Word32 716localFileSize f = 717 fromIntegral $ 4 + 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4 + 2 + 2 + 718 fromIntegral (B.length $ fromString $ normalizePath $ eRelativePath f) + 719 B.length (eExtraField f) + B.length (eCompressedData f) 720 721-- Local file header: 722-- 723-- > local file header signature 4 bytes (0x04034b50) 724-- > version needed to extract 2 bytes 725-- > general purpose bit flag 2 bytes 726-- > compression method 2 bytes 727-- > last mod file time 2 bytes 728-- > last mod file date 2 bytes 729-- > crc-32 4 bytes 730-- > compressed size 4 bytes 731-- > uncompressed size 4 bytes 732-- > file name length 2 bytes 733-- > extra field length 2 bytes 734-- 735-- > file name (variable size) 736-- > extra field (variable size) 737-- 738-- Note that if bit 3 of the general purpose bit flag is set, then the 739-- compressed size will be 0 and the size will be stored instead in a 740-- data descriptor record AFTER the file contents. The record normally 741-- begins with the signature 0x08074b50, then 4 bytes crc-32, 4 bytes 742-- compressed size, 4 bytes uncompressed size. 743 744getLocalFile :: Get (Word32, B.ByteString) 745getLocalFile = do 746 offset <- bytesRead 747 getWord32le >>= ensure (== 0x04034b50) 748 skip 2 -- version 749 bitflag <- getWord16le 750 skip 2 -- compressionMethod 751 skip 2 -- last mod file time 752 skip 2 -- last mod file date 753 skip 4 -- crc32 754 compressedSize <- getWord32le 755 when (compressedSize == 0xFFFFFFFF) $ 756 fail "Can't read ZIP64 archive." 757 skip 4 -- uncompressedsize 758 fileNameLength <- getWord16le 759 extraFieldLength <- getWord16le 760 skip (fromIntegral fileNameLength) -- filename 761 skip (fromIntegral extraFieldLength) -- extra field 762 compressedData <- if bitflag .&. 0O10 == 0 763 then getLazyByteString (fromIntegral compressedSize) 764 else -- If bit 3 of general purpose bit flag is set, 765 -- then we need to read until we get to the 766 -- data descriptor record. We assume that the 767 -- record has signature 0x08074b50; this is not required 768 -- by the specification but is common. 769 do raw <- getWordsTilSig 0x08074b50 770 skip 4 -- crc32 771 cs <- getWord32le -- compressed size 772 skip 4 -- uncompressed size 773 if fromIntegral cs == B.length raw 774 then return raw 775 else fail "Content size mismatch in data descriptor record" 776 return (fromIntegral offset, compressedData) 777 778getWordsTilSig :: Word32 -> Get B.ByteString 779#if MIN_VERSION_binary(0, 6, 0) 780getWordsTilSig sig = (B.fromChunks . reverse) `fmap` go Nothing [] 781 where 782 sig' = S.pack [fromIntegral $ sig .&. 0xFF, 783 fromIntegral $ sig `shiftR` 8 .&. 0xFF, 784 fromIntegral $ sig `shiftR` 16 .&. 0xFF, 785 fromIntegral $ sig `shiftR` 24 .&. 0xFF] 786 chunkSize = 16384 787 --chunkSize = 4 -- for testing prefix match 788 checkChunk chunk = do -- find in content 789 let (prefix, start) = S.breakSubstring sig' chunk 790 if S.null start 791 then return $ Right chunk 792 else return $ Left $ S.length prefix 793 go :: Maybe (Word8, Word8, Word8) -> [S.ByteString] -> Get [S.ByteString] 794 go prefixes acc = do 795 -- note: lookAheadE will rewind if the result is Left 796 eitherChunkOrIndex <- lookAheadE $ do 797 chunk <- getByteString chunkSize <|> B.toStrict `fmap` getRemainingLazyByteString 798 case prefixes of 799 Just (byte3,byte2,byte1) -> 800 let len = S.length chunk in 801 if len >= 1 && 802 S.pack [byte3,byte2,byte1,S.index chunk 0] == sig' 803 then return $ Left $ -3 804 else if len >= 2 && 805 S.pack [byte2,byte1,S.index chunk 0,S.index chunk 1] == sig' 806 then return $ Left $ -2 807 else if len >= 3 && 808 S.pack [byte1,S.index chunk 0,S.index chunk 1,S.index chunk 2] == sig' 809 then return $ Left $ -1 810 else checkChunk chunk 811 Nothing -> checkChunk chunk 812 case eitherChunkOrIndex of 813 Left index -> if index < 0 814 then do -- prefix match 815 skip (4 + index) -- skip over partial match in next chunk 816 return $ (S.take (S.length (head acc) + index) (head acc)) : (tail acc) 817 else do -- match inside this chunk 818 lastchunk <- getByteString index -- must read again 819 skip 4 820 return (lastchunk:acc) 821 Right chunk -> if len == chunkSize 822 then go prefixes' (chunk:acc) 823 else fail $ "getWordsTilSig: signature not found before EOF" 824 where 825 len = S.length chunk 826 prefixes' = Just $ (S.index chunk (len - 3), S.index chunk (len - 2), S.index chunk (len - 1)) 827#else 828getWordsTilSig sig = B.pack `fmap` go [] 829 where 830 go acc = do 831 sig' <- lookAhead getWord32le 832 if sig == sig' 833 then skip 4 >> return (reverse acc) 834 else do 835 w <- getWord8 836 go (w:acc) 837#endif 838 839putLocalFile :: Entry -> Put 840putLocalFile f = do 841 putWord32le 0x04034b50 842 putWord16le 20 -- version needed to extract (>=2.0) 843 putWord16le 0x802 -- general purpose bit flag (bit 1 = max compression, bit 11 = UTF-8) 844 putWord16le $ case eCompressionMethod f of 845 NoCompression -> 0 846 Deflate -> 8 847 let modTime = epochTimeToMSDOSDateTime $ eLastModified f 848 putWord16le $ msDOSTime modTime 849 putWord16le $ msDOSDate modTime 850 putWord32le $ eCRC32 f 851 putWord32le $ eCompressedSize f 852 putWord32le $ eUncompressedSize f 853 putWord16le $ fromIntegral $ B.length $ fromString 854 $ normalizePath $ eRelativePath f 855 putWord16le $ fromIntegral $ B.length $ eExtraField f 856 putLazyByteString $ fromString $ normalizePath $ eRelativePath f 857 putLazyByteString $ eExtraField f 858 putLazyByteString $ eCompressedData f 859 860-- File header structure: 861-- 862-- > central file header signature 4 bytes (0x02014b50) 863-- > version made by 2 bytes 864-- > version needed to extract 2 bytes 865-- > general purpose bit flag 2 bytes 866-- > compression method 2 bytes 867-- > last mod file time 2 bytes 868-- > last mod file date 2 bytes 869-- > crc-32 4 bytes 870-- > compressed size 4 bytes 871-- > uncompressed size 4 bytes 872-- > file name length 2 bytes 873-- > extra field length 2 bytes 874-- > file comment length 2 bytes 875-- > disk number start 2 bytes 876-- > internal file attributes 2 bytes 877-- > external file attributes 4 bytes 878-- > relative offset of local header 4 bytes 879-- 880-- > file name (variable size) 881-- > extra field (variable size) 882-- > file comment (variable size) 883 884getFileHeader :: M.Map Word32 B.ByteString -- ^ map of (offset, content) pairs returned by getLocalFile 885 -> Get Entry 886getFileHeader locals = do 887 getWord32le >>= ensure (== 0x02014b50) 888 vmb <- getWord16le -- version made by 889 versionNeededToExtract <- getWord8 890 skip 1 -- upper byte indicates OS part of "version needed to extract" 891 unless (versionNeededToExtract <= 20) $ 892 fail "This archive requires zip >= 2.0 to extract." 893 bitflag <- getWord16le 894 rawCompressionMethod <- getWord16le 895 compressionMethod <- case rawCompressionMethod of 896 0 -> return NoCompression 897 8 -> return Deflate 898 _ -> fail $ "Unknown compression method " ++ show rawCompressionMethod 899 lastModFileTime <- getWord16le 900 lastModFileDate <- getWord16le 901 crc32 <- getWord32le 902 encryptionMethod <- case (testBit bitflag 0, testBit bitflag 3, testBit bitflag 6) of 903 (False, _, _) -> return NoEncryption 904 (True, False, False) -> return $ PKWAREEncryption (fromIntegral (crc32 `shiftR` 24)) 905 (True, True, False) -> return $ PKWAREEncryption (fromIntegral (lastModFileTime `shiftR` 8)) 906 (True, _, True) -> fail "Strong encryption is not supported" 907 908 compressedSize <- getWord32le 909 uncompressedSize <- getWord32le 910 fileNameLength <- getWord16le 911 extraFieldLength <- getWord16le 912 fileCommentLength <- getWord16le 913 skip 2 -- disk number start 914 internalFileAttributes <- getWord16le 915 externalFileAttributes <- getWord32le 916 relativeOffset <- getWord32le 917 fileName <- getLazyByteString (toEnum $ fromEnum fileNameLength) 918 extraField <- getLazyByteString (toEnum $ fromEnum extraFieldLength) 919 fileComment <- getLazyByteString (toEnum $ fromEnum fileCommentLength) 920 compressedData <- case M.lookup relativeOffset locals of 921 Just x -> return x 922 Nothing -> fail $ "Unable to find data at offset " ++ 923 show relativeOffset 924 return Entry 925 { eRelativePath = toString fileName 926 , eCompressionMethod = compressionMethod 927 , eEncryptionMethod = encryptionMethod 928 , eLastModified = msDOSDateTimeToEpochTime $ 929 MSDOSDateTime { msDOSDate = lastModFileDate, 930 msDOSTime = lastModFileTime } 931 , eCRC32 = crc32 932 , eCompressedSize = compressedSize 933 , eUncompressedSize = uncompressedSize 934 , eExtraField = extraField 935 , eFileComment = fileComment 936 , eVersionMadeBy = vmb 937 , eInternalFileAttributes = internalFileAttributes 938 , eExternalFileAttributes = externalFileAttributes 939 , eCompressedData = compressedData 940 } 941 942putFileHeader :: Word32 -- ^ offset 943 -> Entry 944 -> Put 945putFileHeader offset local = do 946 putWord32le 0x02014b50 947 putWord16le $ eVersionMadeBy local 948 putWord16le 20 -- version needed to extract (>= 2.0) 949 putWord16le 0x802 -- general purpose bit flag (bit 1 = max compression, bit 11 = UTF-8) 950 putWord16le $ case eCompressionMethod local of 951 NoCompression -> 0 952 Deflate -> 8 953 let modTime = epochTimeToMSDOSDateTime $ eLastModified local 954 putWord16le $ msDOSTime modTime 955 putWord16le $ msDOSDate modTime 956 putWord32le $ eCRC32 local 957 putWord32le $ eCompressedSize local 958 putWord32le $ eUncompressedSize local 959 putWord16le $ fromIntegral $ B.length $ fromString 960 $ normalizePath $ eRelativePath local 961 putWord16le $ fromIntegral $ B.length $ eExtraField local 962 putWord16le $ fromIntegral $ B.length $ eFileComment local 963 putWord16le 0 -- disk number start 964 putWord16le $ eInternalFileAttributes local 965 putWord32le $ eExternalFileAttributes local 966 putWord32le offset 967 putLazyByteString $ fromString $ normalizePath $ eRelativePath local 968 putLazyByteString $ eExtraField local 969 putLazyByteString $ eFileComment local 970 971-- Digital signature: 972-- 973-- > header signature 4 bytes (0x05054b50) 974-- > size of data 2 bytes 975-- > signature data (variable size) 976 977#if MIN_VERSION_binary(0,6,0) 978getDigitalSignature :: Get B.ByteString 979getDigitalSignature = do 980 getWord32le >>= ensure (== 0x05054b50) 981 sigSize <- getWord16le 982 getLazyByteString (toEnum $ fromEnum sigSize) 983#else 984getDigitalSignature :: Get (Maybe B.ByteString) 985getDigitalSignature = do 986 hdrSig <- getWord32le 987 if hdrSig /= 0x05054b50 988 then return Nothing 989 else do 990 sigSize <- getWord16le 991 getLazyByteString (toEnum $ fromEnum sigSize) >>= return . Just 992#endif 993 994putDigitalSignature :: Maybe B.ByteString -> Put 995putDigitalSignature Nothing = return () 996putDigitalSignature (Just sig) = do 997 putWord32le 0x05054b50 998 putWord16le $ fromIntegral $ B.length sig 999 putLazyByteString sig 1000 1001ensure :: (a -> Bool) -> a -> Get () 1002ensure p val = 1003 if p val 1004 then return () 1005 else fail "ensure not satisfied" 1006 1007toString :: B.ByteString -> String 1008toString = TL.unpack . TL.decodeUtf8 1009 1010fromString :: String -> B.ByteString 1011fromString = TL.encodeUtf8 . TL.pack 1012