1 /* Copyright (C) 2017 Wildfire Games.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 /*
24  * archive backend for Zip files.
25  */
26 
27 #include "precompiled.h"
28 #include "lib/file/archive/archive_zip.h"
29 
30 #include <time.h>
31 #include <limits>
32 
33 #include "lib/utf8.h"
34 #include "lib/bits.h"
35 #include "lib/byte_order.h"
36 #include "lib/allocators/pool.h"
37 #include "lib/sysdep/filesystem.h"
38 #include "lib/file/archive/archive.h"
39 #include "lib/file/archive/codec_zlib.h"
40 #include "lib/file/archive/stream.h"
41 #include "lib/file/file.h"
42 #include "lib/file/io/io.h"
43 
44 //-----------------------------------------------------------------------------
45 // timestamp conversion: DOS FAT <-> Unix time_t
46 //-----------------------------------------------------------------------------
47 
time_t_from_FAT(u32 fat_timedate)48 static time_t time_t_from_FAT(u32 fat_timedate)
49 {
50 	const u32 fat_time = bits(fat_timedate, 0, 15);
51 	const u32 fat_date = bits(fat_timedate, 16, 31);
52 
53 	struct tm t;							// struct tm format:
54 	t.tm_sec   = bits(fat_time, 0,4) * 2;	// [0,59]
55 	t.tm_min   = bits(fat_time, 5,10);		// [0,59]
56 	t.tm_hour  = bits(fat_time, 11,15);		// [0,23]
57 	t.tm_mday  = bits(fat_date, 0,4);		// [1,31]
58 	t.tm_mon   = bits(fat_date, 5,8) - 1;	// [0,11]
59 	t.tm_year  = bits(fat_date, 9,15) + 80;	// since 1900
60 	t.tm_isdst = -1;	// unknown - let libc determine
61 
62 	// otherwise: totally bogus, and at the limit of 32-bit time_t
63 	ENSURE(t.tm_year < 138);
64 
65 	time_t ret = mktime(&t);
66 	ENSURE(ret != (time_t)-1);	// mktime shouldn't fail
67 	return ret;
68 }
69 
70 
FAT_from_time_t(time_t time)71 static u32 FAT_from_time_t(time_t time)
72 {
73 	// (values are adjusted for DST)
74 	struct tm* t = localtime(&time);
75 
76 	const u16 fat_time = u16(
77 		(t->tm_sec/2) |		    // 5
78 		(u16(t->tm_min) << 5) | // 6
79 		(u16(t->tm_hour) << 11)	// 5
80 		);
81 
82 	const u16 fat_date = u16(
83 		(t->tm_mday) |            // 5
84 		(u16(t->tm_mon+1) << 5) | // 4
85 		(u16(t->tm_year-80) << 9) // 7
86 		);
87 
88 	u32 fat_timedate = u32_from_u16(fat_date, fat_time);
89 	return fat_timedate;
90 }
91 
92 
93 //-----------------------------------------------------------------------------
94 // Zip archive definitions
95 //-----------------------------------------------------------------------------
96 
97 static const u32 cdfh_magic = FOURCC_LE('P','K','\1','\2');
98 static const u32  lfh_magic = FOURCC_LE('P','K','\3','\4');
99 static const u32 ecdr_magic = FOURCC_LE('P','K','\5','\6');
100 
101 enum ZipMethod
102 {
103 	ZIP_METHOD_NONE    = 0,
104 	ZIP_METHOD_DEFLATE = 8
105 };
106 
107 #pragma pack(push, 1)
108 
109 class LFH
110 {
111 public:
Init(const CFileInfo & fileInfo,off_t csize,ZipMethod method,u32 checksum,const Path & pathname)112 	void Init(const CFileInfo& fileInfo, off_t csize, ZipMethod method, u32 checksum, const Path& pathname)
113 	{
114 		const std::string pathnameUTF8 = utf8_from_wstring(pathname.string());
115 		const size_t pathnameSize = pathnameUTF8.length();
116 
117 		m_magic     = lfh_magic;
118 		m_x1        = to_le16(0);
119 		m_flags     = to_le16(0);
120 		m_method    = to_le16(u16_from_larger(method));
121 		m_fat_mtime = to_le32(FAT_from_time_t(fileInfo.MTime()));
122 		m_crc       = to_le32(checksum);
123 		m_csize     = to_le32(u32_from_larger(csize));
124 		m_usize     = to_le32(u32_from_larger(fileInfo.Size()));
125 		m_fn_len    = to_le16(u16_from_larger(pathnameSize));
126 		m_e_len     = to_le16(0);
127 
128 		memcpy((char*)this + sizeof(LFH), pathnameUTF8.c_str(), pathnameSize);
129 	}
130 
Size() const131 	size_t Size() const
132 	{
133 		ENSURE(m_magic == lfh_magic);
134 		size_t size = sizeof(LFH);
135 		size += read_le16(&m_fn_len);
136 		size += read_le16(&m_e_len);
137 		// note: LFH doesn't have a comment field!
138 		return size;
139 	}
140 
141 private:
142 	u32 m_magic;
143 	u16 m_x1;			// version needed
144 	u16 m_flags;
145 	u16 m_method;
146 	u32 m_fat_mtime;	// last modified time (DOS FAT format)
147 	u32 m_crc;
148 	u32 m_csize;
149 	u32 m_usize;
150 	u16 m_fn_len;
151 	u16 m_e_len;
152 };
153 
154 cassert(sizeof(LFH) == 30);
155 
156 
157 class CDFH
158 {
159 public:
Init(const CFileInfo & fileInfo,off_t ofs,off_t csize,ZipMethod method,u32 checksum,const Path & pathname,size_t slack)160 	void Init(const CFileInfo& fileInfo, off_t ofs, off_t csize, ZipMethod method, u32 checksum, const Path& pathname, size_t slack)
161 	{
162 		const std::string pathnameUTF8 = utf8_from_wstring(pathname.string());
163 		const size_t pathnameLength = pathnameUTF8.length();
164 
165 		m_magic     = cdfh_magic;
166 		m_x1        = to_le32(0);
167 		m_flags     = to_le16(0);
168 		m_method    = to_le16(u16_from_larger(method));
169 		m_fat_mtime = to_le32(FAT_from_time_t(fileInfo.MTime()));
170 		m_crc       = to_le32(checksum);
171 		m_csize     = to_le32(u32_from_larger(csize));
172 		m_usize     = to_le32(u32_from_larger(fileInfo.Size()));
173 		m_fn_len    = to_le16(u16_from_larger(pathnameLength));
174 		m_e_len     = to_le16(0);
175 		m_c_len     = to_le16(u16_from_larger((size_t)slack));
176 		m_x2        = to_le32(0);
177 		m_x3        = to_le32(0);
178 		m_lfh_ofs   = to_le32(u32_from_larger(ofs));
179 
180 		memcpy((char*)this + sizeof(CDFH), pathnameUTF8.c_str(), pathnameLength);
181 	}
182 
Pathname() const183 	Path Pathname() const
184 	{
185 		const size_t length = (size_t)read_le16(&m_fn_len);
186 		const char* pathname = (const char*)this + sizeof(CDFH); // not 0-terminated!
187 		return Path(std::string(pathname, length));
188 	}
189 
HeaderOffset() const190 	off_t HeaderOffset() const
191 	{
192 		return read_le32(&m_lfh_ofs);
193 	}
194 
USize() const195 	off_t USize() const
196 	{
197 		return (off_t)read_le32(&m_usize);
198 	}
199 
CSize() const200 	off_t CSize() const
201 	{
202 		return (off_t)read_le32(&m_csize);
203 	}
204 
Method() const205 	ZipMethod Method() const
206 	{
207 		return (ZipMethod)read_le16(&m_method);
208 	}
209 
Checksum() const210 	u32 Checksum() const
211 	{
212 		return read_le32(&m_crc);
213 	}
214 
MTime() const215 	time_t MTime() const
216 	{
217 		const u32 fat_mtime = read_le32(&m_fat_mtime);
218 		return time_t_from_FAT(fat_mtime);
219 	}
220 
Size() const221 	size_t Size() const
222 	{
223 		size_t size = sizeof(CDFH);
224 		size += read_le16(&m_fn_len);
225 		size += read_le16(&m_e_len);
226 		size += read_le16(&m_c_len);
227 		return size;
228 	}
229 
230 private:
231 	u32 m_magic;
232 	u32 m_x1;			// versions
233 	u16 m_flags;
234 	u16 m_method;
235 	u32 m_fat_mtime;	// last modified time (DOS FAT format)
236 	u32 m_crc;
237 	u32 m_csize;
238 	u32 m_usize;
239 	u16 m_fn_len;
240 	u16 m_e_len;
241 	u16 m_c_len;
242 	u32 m_x2;			// spanning
243 	u32 m_x3;			// attributes
244 	u32 m_lfh_ofs;
245 };
246 
247 cassert(sizeof(CDFH) == 46);
248 
249 
250 class ECDR
251 {
252 public:
Init(size_t cd_numEntries,off_t cd_ofs,size_t cd_size)253 	void Init(size_t cd_numEntries, off_t cd_ofs, size_t cd_size)
254 	{
255 		m_magic         = ecdr_magic;
256 		m_diskNum       = to_le16(0);
257 		m_cd_diskNum    = to_le16(0);
258 		m_cd_numEntriesOnDisk = to_le16(u16_from_larger(cd_numEntries));
259 		m_cd_numEntries = m_cd_numEntriesOnDisk;
260 		m_cd_size       = to_le32(u32_from_larger(cd_size));
261 		m_cd_ofs        = to_le32(u32_from_larger(cd_ofs));
262 		m_comment_len   = to_le16(0);
263 	}
264 
Decompose(size_t & cd_numEntries,off_t & cd_ofs,size_t & cd_size) const265 	void Decompose(size_t& cd_numEntries, off_t& cd_ofs, size_t& cd_size) const
266 	{
267 		cd_numEntries = (size_t)read_le16(&m_cd_numEntries);
268 		cd_ofs       = (off_t)read_le32(&m_cd_ofs);
269 		cd_size      = (size_t)read_le32(&m_cd_size);
270 	}
271 
272 private:
273 	u32 m_magic;
274 	u16 m_diskNum;
275 	u16 m_cd_diskNum;
276 	u16 m_cd_numEntriesOnDisk;
277 	u16 m_cd_numEntries;
278 	u32 m_cd_size;
279 	u32 m_cd_ofs;
280 	u16 m_comment_len;
281 };
282 
283 cassert(sizeof(ECDR) == 22);
284 
285 #pragma pack(pop)
286 
287 
288 //-----------------------------------------------------------------------------
289 // ArchiveFile_Zip
290 //-----------------------------------------------------------------------------
291 
292 class ArchiveFile_Zip : public IArchiveFile
293 {
294 public:
ArchiveFile_Zip(const PFile & file,off_t ofs,off_t csize,u32 checksum,ZipMethod method)295 	ArchiveFile_Zip(const PFile& file, off_t ofs, off_t csize, u32 checksum, ZipMethod method)
296 		: m_file(file), m_ofs(ofs)
297 		, m_csize(csize), m_checksum(checksum), m_method((u16)method)
298 		, m_flags(NeedsFixup)
299 	{
300 	}
301 
Precedence() const302 	virtual size_t Precedence() const
303 	{
304 		return 2u;
305 	}
306 
LocationCode() const307 	virtual wchar_t LocationCode() const
308 	{
309 		return 'A';
310 	}
311 
Path() const312 	virtual OsPath Path() const
313 	{
314 		return m_file->Pathname();
315 	}
316 
Load(const OsPath & UNUSED (name),const shared_ptr<u8> & buf,size_t size) const317 	virtual Status Load(const OsPath& UNUSED(name), const shared_ptr<u8>& buf, size_t size) const
318 	{
319 		AdjustOffset();
320 
321 		PICodec codec;
322 		switch(m_method)
323 		{
324 		case ZIP_METHOD_NONE:
325 			codec = CreateCodec_ZLibNone();
326 			break;
327 		case ZIP_METHOD_DEFLATE:
328 			codec = CreateDecompressor_ZLibDeflate();
329 			break;
330 		default:
331 			WARN_RETURN(ERR::ARCHIVE_UNKNOWN_METHOD);
332 		}
333 
334 		Stream stream(codec);
335 		stream.SetOutputBuffer(buf.get(), size);
336 		io::Operation op(*m_file.get(), 0, m_csize, m_ofs);
337 		StreamFeeder streamFeeder(stream);
338 		RETURN_STATUS_IF_ERR(io::Run(op, io::Parameters(), streamFeeder));
339 		RETURN_STATUS_IF_ERR(stream.Finish());
340 #if CODEC_COMPUTE_CHECKSUM
341 		ENSURE(m_checksum == stream.Checksum());
342 #endif
343 
344 		return INFO::OK;
345 	}
346 
347 private:
348 	enum Flags
349 	{
350 		// indicates m_ofs points to a "local file header" instead of
351 		// the file data. a fixup routine is called when reading the file;
352 		// it skips past the LFH and clears this flag.
353 		// this is somewhat of a hack, but vital to archive open performance.
354 		// without it, we'd have to scan through the entire archive file,
355 		// which can take *seconds*.
356 		// (we cannot use the information in CDFH, because its 'extra' field
357 		// has been observed to differ from that of the LFH)
358 		// since we read the LFH right before the rest of the file, the block
359 		// cache will absorb the IO cost.
360 		NeedsFixup = 1
361 	};
362 
363 	struct LFH_Copier
364 	{
LFH_CopierArchiveFile_Zip::LFH_Copier365 		LFH_Copier(u8* lfh_dst, size_t lfh_bytes_remaining)
366 			: lfh_dst(lfh_dst), lfh_bytes_remaining(lfh_bytes_remaining)
367 		{
368 		}
369 
370 		// this code grabs an LFH struct from file block(s) that are
371 		// passed to the callback. usually, one call copies the whole thing,
372 		// but the LFH may straddle a block boundary.
373 		//
374 		// rationale: this allows using temp buffers for zip_fixup_lfh,
375 		// which avoids involving the file buffer manager and thus
376 		// avoids cluttering the trace and cache contents.
operator ()ArchiveFile_Zip::LFH_Copier377 		Status operator()(const u8* block, size_t size) const
378 		{
379 			ENSURE(size <= lfh_bytes_remaining);
380 			memcpy(lfh_dst, block, size);
381 			lfh_dst += size;
382 			lfh_bytes_remaining -= size;
383 
384 			return INFO::OK;
385 		}
386 
387 		mutable u8* lfh_dst;
388 		mutable size_t lfh_bytes_remaining;
389 	};
390 
391 	/**
392 	 * fix up m_ofs (adjust it to point to cdata instead of the LFH).
393 	 *
394 	 * note: we cannot use CDFH filename and extra field lengths to skip
395 	 * past LFH since that may not mirror CDFH (has happened).
396 	 *
397 	 * this is called at file-open time instead of while mounting to
398 	 * reduce seeks: since reading the file will typically follow, the
399 	 * block cache entirely absorbs the IO cost.
400 	 **/
AdjustOffset() const401 	void AdjustOffset() const
402 	{
403 		if(!(m_flags & NeedsFixup))
404 			return;
405 		m_flags &= ~NeedsFixup;
406 
407 		// performance note: this ends up reading one file block, which is
408 		// only in the block cache if the file starts in the same block as a
409 		// previously read file (i.e. both are small).
410 		LFH lfh;
411 		io::Operation op(*m_file.get(), 0, sizeof(LFH), m_ofs);
412 		if(io::Run(op, io::Parameters(), LFH_Copier((u8*)&lfh, sizeof(LFH))) == INFO::OK)
413 			m_ofs += (off_t)lfh.Size();
414 	}
415 
416 	PFile m_file;
417 
418 	// all relevant LFH/CDFH fields not covered by CFileInfo
419 	mutable off_t m_ofs;
420 	off_t m_csize;
421 	u32 m_checksum;
422 	u16 m_method;
423 	mutable u16 m_flags;
424 };
425 
426 
427 //-----------------------------------------------------------------------------
428 // ArchiveReader_Zip
429 //-----------------------------------------------------------------------------
430 
431 class ArchiveReader_Zip : public IArchiveReader
432 {
433 public:
ArchiveReader_Zip(const OsPath & pathname)434 	ArchiveReader_Zip(const OsPath& pathname)
435 		: m_file(new File(pathname, O_RDONLY))
436 	{
437 		CFileInfo fileInfo;
438 		GetFileInfo(pathname, &fileInfo);
439 		m_fileSize = fileInfo.Size();
440 		const size_t minFileSize = sizeof(LFH)+sizeof(CDFH)+sizeof(ECDR);
441 		ENSURE(m_fileSize >= off_t(minFileSize));
442 	}
443 
ReadEntries(ArchiveEntryCallback cb,uintptr_t cbData)444 	virtual Status ReadEntries(ArchiveEntryCallback cb, uintptr_t cbData)
445 	{
446 		// locate and read Central Directory
447 		off_t cd_ofs = 0;
448 		size_t cd_numEntries = 0;
449 		size_t cd_size = 0;
450 		RETURN_STATUS_IF_ERR(LocateCentralDirectory(m_file, m_fileSize, cd_ofs, cd_numEntries, cd_size));
451 		UniqueRange buf(io::Allocate(cd_size));
452 
453 		io::Operation op(*m_file.get(), buf.get(), cd_size, cd_ofs);
454 		RETURN_STATUS_IF_ERR(io::Run(op));
455 
456 		// iterate over Central Directory
457 		const u8* pos = (const u8*)buf.get();
458 		for(size_t i = 0; i < cd_numEntries; i++)
459 		{
460 			// scan for next CDFH
461 			CDFH* cdfh = (CDFH*)FindRecord((const u8*)buf.get(), cd_size, pos, cdfh_magic, sizeof(CDFH));
462 			if(!cdfh)
463 				WARN_RETURN(ERR::CORRUPTED);
464 
465 			const Path relativePathname(cdfh->Pathname());
466 			if(!relativePathname.IsDirectory())
467 			{
468 				const OsPath name = relativePathname.Filename();
469 				CFileInfo fileInfo(name, cdfh->USize(), cdfh->MTime());
470 				shared_ptr<ArchiveFile_Zip> archiveFile(new ArchiveFile_Zip(m_file, cdfh->HeaderOffset(), cdfh->CSize(), cdfh->Checksum(), cdfh->Method()));
471 				cb(relativePathname, fileInfo, archiveFile, cbData);
472 			}
473 
474 			pos += cdfh->Size();
475 		}
476 
477 		return INFO::OK;
478 	}
479 
480 private:
481 	/**
482 	 * Scan buffer for a Zip file record.
483 	 *
484 	 * @param buf
485 	 * @param size
486 	 * @param start position within buffer
487 	 * @param magic signature of record
488 	 * @param recordSize size of record (including signature)
489 	 * @return pointer to record within buffer or 0 if not found.
490 	 **/
FindRecord(const u8 * buf,size_t size,const u8 * start,u32 magic,size_t recordSize)491 	static const u8* FindRecord(const u8* buf, size_t size, const u8* start, u32 magic, size_t recordSize)
492 	{
493 		// (don't use <start> as the counter - otherwise we can't tell if
494 		// scanning within the buffer was necessary.)
495 		for(const u8* p = start; p <= buf+size-recordSize; p++)
496 		{
497 			// found it
498 			if(*(u32*)p == magic)
499 			{
500 				ENSURE(p == start);	// otherwise, the archive is a bit broken
501 				return p;
502 			}
503 		}
504 
505 		// passed EOF, didn't find it.
506 		// note: do not warn - this happens in the initial ECDR search at
507 		// EOF if the archive contains a comment field.
508 		return 0;
509 	}
510 
511 	// search for ECDR in the last <maxScanSize> bytes of the file.
512 	// if found, fill <dst_ecdr> with a copy of the (little-endian) ECDR and
513 	// return INFO::OK, otherwise IO error or ERR::CORRUPTED.
ScanForEcdr(const PFile & file,off_t fileSize,u8 * buf,size_t maxScanSize,size_t & cd_numEntries,off_t & cd_ofs,size_t & cd_size)514 	static Status ScanForEcdr(const PFile& file, off_t fileSize, u8* buf, size_t maxScanSize, size_t& cd_numEntries, off_t& cd_ofs, size_t& cd_size)
515 	{
516 		// don't scan more than the entire file
517 		const size_t scanSize = std::min(maxScanSize, size_t(fileSize));
518 
519 		// read desired chunk of file into memory
520 		const off_t ofs = fileSize - off_t(scanSize);
521 		io::Operation op(*file.get(), buf, scanSize, ofs);
522 		RETURN_STATUS_IF_ERR(io::Run(op));
523 
524 		// look for ECDR in buffer
525 		const ECDR* ecdr = (const ECDR*)FindRecord(buf, scanSize, buf, ecdr_magic, sizeof(ECDR));
526 		if(!ecdr)
527 			return INFO::CANNOT_HANDLE;
528 
529 		ecdr->Decompose(cd_numEntries, cd_ofs, cd_size);
530 		return INFO::OK;
531 	}
532 
LocateCentralDirectory(const PFile & file,off_t fileSize,off_t & cd_ofs,size_t & cd_numEntries,size_t & cd_size)533 	static Status LocateCentralDirectory(const PFile& file, off_t fileSize, off_t& cd_ofs, size_t& cd_numEntries, size_t& cd_size)
534 	{
535 		const size_t maxScanSize = 66000u;	// see below
536 		UniqueRange buf(io::Allocate(maxScanSize));
537 
538 		// expected case: ECDR at EOF; no file comment
539 		Status ret = ScanForEcdr(file, fileSize, (u8*)buf.get(), sizeof(ECDR), cd_numEntries, cd_ofs, cd_size);
540 		if(ret == INFO::OK)
541 			return INFO::OK;
542 		// worst case: ECDR precedes 64 KiB of file comment
543 		ret = ScanForEcdr(file, fileSize, (u8*)buf.get(), maxScanSize, cd_numEntries, cd_ofs, cd_size);
544 		if(ret == INFO::OK)
545 			return INFO::OK;
546 
547 		// both ECDR scans failed - this is not a valid Zip file.
548 		io::Operation op(*file.get(), buf.get(), sizeof(LFH));
549 		RETURN_STATUS_IF_ERR(io::Run(op));
550 		// the Zip file has an LFH but lacks an ECDR. this can happen if
551 		// the user hard-exits while an archive is being written.
552 		// notes:
553 		// - return ERR::CORRUPTED so VFS will not include this file.
554 		// - we could work around this by scanning all LFHs, but won't bother
555 		//   because it'd be slow.
556 		// - do not warn - the corrupt archive will be deleted on next
557 		//   successful archive builder run anyway.
558 		if(FindRecord((const u8*)buf.get(), sizeof(LFH), (const u8*)buf.get(), lfh_magic, sizeof(LFH)))
559 			return ERR::CORRUPTED;	// NOWARN
560 		// totally bogus
561 		else
562 			WARN_RETURN(ERR::ARCHIVE_UNKNOWN_FORMAT);
563 	}
564 
565 	PFile m_file;
566 	off_t m_fileSize;
567 };
568 
CreateArchiveReader_Zip(const OsPath & archivePathname)569 PIArchiveReader CreateArchiveReader_Zip(const OsPath& archivePathname)
570 {
571 	try
572 	{
573 		return PIArchiveReader(new ArchiveReader_Zip(archivePathname));
574 	}
575 	catch(Status)
576 	{
577 		return PIArchiveReader();
578 	}
579 }
580 
581 
582 //-----------------------------------------------------------------------------
583 // ArchiveWriter_Zip
584 //-----------------------------------------------------------------------------
585 
586 class ArchiveWriter_Zip : public IArchiveWriter
587 {
588 public:
ArchiveWriter_Zip(const OsPath & archivePathname,bool noDeflate)589 	ArchiveWriter_Zip(const OsPath& archivePathname, bool noDeflate)
590 		: m_file(new File(archivePathname, O_WRONLY)), m_fileSize(0)
591 		, m_numEntries(0), m_noDeflate(noDeflate)
592 	{
593 		THROW_STATUS_IF_ERR(pool_create(&m_cdfhPool, 10*MiB, 0));
594 	}
595 
~ArchiveWriter_Zip()596 	~ArchiveWriter_Zip()
597 	{
598 		// append an ECDR to the CDFH list (this allows us to
599 		// write out both to the archive file in one burst)
600 		const size_t cd_size = m_cdfhPool.da.pos;
601 		ECDR* ecdr = (ECDR*)pool_alloc(&m_cdfhPool, sizeof(ECDR));
602 		if(!ecdr)
603 			std::terminate();
604 		const off_t cd_ofs = m_fileSize;
605 		ecdr->Init(m_numEntries, cd_ofs, cd_size);
606 
607 		if(write(m_file->Descriptor(), m_cdfhPool.da.base, cd_size+sizeof(ECDR)) < 0)
608 			DEBUG_WARN_ERR(ERR::IO);	// no way to return error code
609 
610 		(void)pool_destroy(&m_cdfhPool);
611 	}
612 
AddFile(const OsPath & pathname,const OsPath & pathnameInArchive)613 	Status AddFile(const OsPath& pathname, const OsPath& pathnameInArchive)
614 	{
615 		CFileInfo fileInfo;
616 		RETURN_STATUS_IF_ERR(GetFileInfo(pathname, &fileInfo));
617 
618 		PFile file(new File);
619 		RETURN_STATUS_IF_ERR(file->Open(pathname, O_RDONLY));
620 
621 		return AddFileOrMemory(fileInfo, pathnameInArchive, file, NULL);
622 	}
623 
AddMemory(const u8 * data,size_t size,time_t mtime,const OsPath & pathnameInArchive)624 	Status AddMemory(const u8* data, size_t size, time_t mtime, const OsPath& pathnameInArchive)
625 	{
626 		CFileInfo fileInfo(pathnameInArchive, size, mtime);
627 
628 		return AddFileOrMemory(fileInfo, pathnameInArchive, PFile(), data);
629 	}
630 
AddFileOrMemory(const CFileInfo & fileInfo,const OsPath & pathnameInArchive,const PFile & file,const u8 * data)631 	Status AddFileOrMemory(const CFileInfo& fileInfo, const OsPath& pathnameInArchive, const PFile& file, const u8* data)
632 	{
633 		ENSURE((file && !data) || (data && !file));
634 
635 		const off_t usize = fileInfo.Size();
636 		// skip 0-length files.
637 		// rationale: zip.cpp needs to determine whether a CDFH entry is
638 		// a file or directory (the latter are written by some programs but
639 		// not needed - they'd only pollute the file table).
640 		// it looks like checking for usize=csize=0 is the safest way -
641 		// relying on file attributes (which are system-dependent!) is
642 		// even less safe.
643 		// we thus skip 0-length files to avoid confusing them with directories.
644 		if(!usize)
645 			return INFO::SKIPPED;
646 
647 		const size_t pathnameLength = pathnameInArchive.string().length();
648 
649 		// choose method and the corresponding codec
650 		ZipMethod method;
651 		PICodec codec;
652 		if(m_noDeflate || IsFileTypeIncompressible(pathnameInArchive))
653 		{
654 			method = ZIP_METHOD_NONE;
655 			codec = CreateCodec_ZLibNone();
656 		}
657 		else
658 		{
659 			method = ZIP_METHOD_DEFLATE;
660 			codec = CreateCompressor_ZLibDeflate();
661 		}
662 
663 		// allocate memory
664 		const size_t csizeMax = codec->MaxOutputSize(size_t(usize));
665 		UniqueRange buf(io::Allocate(sizeof(LFH) + pathnameLength + csizeMax));
666 
667 		// read and compress file contents
668 		size_t csize; u32 checksum;
669 		{
670 			u8* cdata = (u8*)buf.get() + sizeof(LFH) + pathnameLength;
671 			Stream stream(codec);
672 			stream.SetOutputBuffer(cdata, csizeMax);
673 			StreamFeeder streamFeeder(stream);
674 			if(file)
675 			{
676 				io::Operation op(*file.get(), 0, usize);
677 				RETURN_STATUS_IF_ERR(io::Run(op, io::Parameters(), streamFeeder));
678 			}
679 			else
680 			{
681 				RETURN_STATUS_IF_ERR(streamFeeder(data, usize));
682 			}
683 			RETURN_STATUS_IF_ERR(stream.Finish());
684 			csize = stream.OutSize();
685 			checksum = stream.Checksum();
686 		}
687 
688 		// build LFH
689 		{
690 			LFH* lfh = (LFH*)buf.get();
691 			lfh->Init(fileInfo, (off_t)csize, method, checksum, pathnameInArchive);
692 		}
693 
694 		// append a CDFH to the central directory (in memory)
695 		const off_t ofs = m_fileSize;
696 		const size_t prev_pos = m_cdfhPool.da.pos;	// (required to determine padding size)
697 		const size_t cdfhSize = sizeof(CDFH) + pathnameLength;
698 		CDFH* cdfh = (CDFH*)pool_alloc(&m_cdfhPool, cdfhSize);
699 		if(!cdfh)
700 			WARN_RETURN(ERR::NO_MEM);
701 		const size_t slack = m_cdfhPool.da.pos - prev_pos - cdfhSize;
702 		cdfh->Init(fileInfo, ofs, (off_t)csize, method, checksum, pathnameInArchive, slack);
703 		m_numEntries++;
704 
705 		// write LFH, pathname and cdata to file
706 		const size_t packageSize = sizeof(LFH) + pathnameLength + csize;
707 		if(write(m_file->Descriptor(), buf.get(), packageSize) < 0)
708 			WARN_RETURN(ERR::IO);
709 		m_fileSize += (off_t)packageSize;
710 
711 		return INFO::OK;
712 	}
713 
714 private:
IsFileTypeIncompressible(const OsPath & pathname)715 	static bool IsFileTypeIncompressible(const OsPath& pathname)
716 	{
717 		const OsPath extension = pathname.Extension();
718 
719 		// file extensions that we don't want to compress
720 		static const wchar_t* incompressibleExtensions[] =
721 		{
722 			L".zip", L".rar",
723 			L".jpg", L".jpeg", L".png",
724 			L".ogg", L".mp3"
725 		};
726 
727 		for(size_t i = 0; i < ARRAY_SIZE(incompressibleExtensions); i++)
728 		{
729 			if(extension == incompressibleExtensions[i])
730 				return true;
731 		}
732 
733 		return false;
734 	}
735 
736 	PFile m_file;
737 	off_t m_fileSize;
738 
739 	Pool m_cdfhPool;
740 	size_t m_numEntries;
741 
742 	bool m_noDeflate;
743 };
744 
CreateArchiveWriter_Zip(const OsPath & archivePathname,bool noDeflate)745 PIArchiveWriter CreateArchiveWriter_Zip(const OsPath& archivePathname, bool noDeflate)
746 {
747 	try
748 	{
749 		return PIArchiveWriter(new ArchiveWriter_Zip(archivePathname, noDeflate));
750 	}
751 	catch(Status)
752 	{
753 		return PIArchiveWriter();
754 	}
755 }
756