1 /* $Id: archive_zip.cpp 621323 2020-12-09 19:22:21Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Vladimir Ivanov
27 *
28 * File Description:
29 * Compression archive API - ZIP file support.
30 *
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <util/error_codes.hpp>
35 #include <util/compress/zlib.hpp>
36 #include "archive_zip.hpp"
37
38 #define NCBI_USE_ERRCODE_X Util_Compress
39
40
41 BEGIN_NCBI_SCOPE
42
43
44 // Directly include miniz library
45
46 // disable zlib emulation, we have it separately anyway
47 #define MINIZ_NO_ZLIB_APIS
48
49 // Disable miniz warning about using large files on BSD and Cygwin64
50 #if defined(NCBI_OS_BSD) || defined(NCBI_OS_CYGWIN)
51 # define fopen64 fopen
52 # define ftello64 ftello
53 # define fseeko64 fseeko
54 # define stat64 stat
55 # define freopen64 freopen
56 # define _LARGEFILE64_SOURCE
57 #endif
58
59 #include "miniz/miniz.c"
60 #include "miniz/miniz_zip.c"
61 #include "miniz/miniz_tdef.c"
62 #include "miniz/miniz_tinfl.c"
63
64
65 /////////////////////////////////////////////////////////////////////////
66 //
67 // Constants / macros / typedefs
68 //
69
70 /// ZIP archive handle type definition.
71 struct SZipHandle {
SZipHandleSZipHandle72 SZipHandle() {
73 Reset();
74 }
ResetSZipHandle75 void Reset(void) {
76 memset(&zip, 0, sizeof(zip));
77 }
78 mz_zip_archive zip;
79 };
80
81
82 // Macros to work with zip-archive handle
83 #define ZIP_HANDLE &(m_Handle->zip)
84 #define ZIP_CHECK _ASSERT(m_Handle != NULL)
85 #define ZIP_NEW \
86 { \
87 _ASSERT(m_Handle == NULL); \
88 m_Handle = new SZipHandle(); \
89 _ASSERT(m_Handle != NULL); \
90 }
91
92 #define ZIP_DELETE \
93 { \
94 _ASSERT(m_Handle != NULL); \
95 delete m_Handle; \
96 m_Handle = NULL; \
97 }
98
99 // Throw exception
100 #define ZIP_THROW(errcode, message) \
101 NCBI_THROW(CArchiveException, errcode, message)
102
103
104
105 /////////////////////////////////////////////////////////////////////////
106 //
107 // CZipArchive
108 //
109
~CArchiveZip(void)110 CArchiveZip::~CArchiveZip(void)
111 {
112 try {
113 if ( m_Handle ) {
114 Close();
115 delete m_Handle;
116 }
117 }
118 COMPRESS_HANDLE_EXCEPTIONS(94, "CArchiveZip::~CArchiveZip");
119 }
120
121
CreateFile(const string & filename)122 void CArchiveZip::CreateFile(const string& filename)
123 {
124 ZIP_NEW;
125 m_Mode = eWrite;
126 m_Location = eFile;
127 mz_bool status = mz_zip_writer_init_file(ZIP_HANDLE, filename.c_str(), 0);
128 if (!status) {
129 m_Handle = NULL;
130 ZIP_THROW(eCreate, "Cannot create archive file '" + filename + "'");
131 }
132 return;
133 }
134
135
CreateMemory(size_t initial_allocation_size)136 void CArchiveZip::CreateMemory(size_t initial_allocation_size)
137 {
138 ZIP_NEW;
139 m_Mode = eWrite;
140 m_Location = eMemory;
141 mz_bool status = mz_zip_writer_init_heap(ZIP_HANDLE, 0, initial_allocation_size);
142 if (!status) {
143 m_Handle = NULL;
144 ZIP_THROW(eCreate, "Cannot create archive in memory");
145 }
146 return;
147 }
148
149
OpenFile(const string & filename)150 void CArchiveZip::OpenFile(const string& filename)
151 {
152 ZIP_NEW;
153 m_Mode = eRead;
154 m_Location = eFile;
155 mz_bool status = mz_zip_reader_init_file(ZIP_HANDLE, filename.c_str(), 0);
156 if (!status) {
157 ZIP_DELETE;
158 ZIP_THROW(eOpen, "Cannot open archive file '" + filename + "'");
159 }
160 return;
161 }
162
163
OpenMemory(const void * buf,size_t size)164 void CArchiveZip::OpenMemory(const void* buf, size_t size)
165 {
166 ZIP_NEW;
167 m_Mode = eRead;
168 m_Location = eMemory;
169 mz_bool status = mz_zip_reader_init_mem(ZIP_HANDLE, buf, size, 0);
170 if (!status) {
171 ZIP_DELETE;
172 ZIP_THROW(eOpen, "Cannot open archive in memory");
173 }
174 return;
175 }
176
177
FinalizeMemory(void ** buf,size_t * size)178 void CArchiveZip::FinalizeMemory(void** buf, size_t* size)
179 {
180 _ASSERT(m_Location == eMemory);
181 _ASSERT(m_Mode == eWrite);
182 _ASSERT(buf);
183 _ASSERT(size);
184 ZIP_CHECK;
185
186 *buf = NULL;
187 *size = 0;
188 mz_bool status = mz_zip_writer_finalize_heap_archive(ZIP_HANDLE, buf, size);
189 if (!status) {
190 // Deallocate memory buffer to avoid memory leak
191 if (*buf) {
192 free(*buf);
193 *buf = NULL;
194 *size = 0;
195 }
196 ZIP_THROW(eMemory, "Cannot finalize archive in memory");
197 }
198 return;
199 }
200
201
Close(void)202 void CArchiveZip::Close(void)
203 {
204 _ASSERT(m_Mode == eRead || m_Mode == eWrite);
205 ZIP_CHECK;
206
207 mz_bool status = true;
208 switch(m_Mode) {
209 case eRead:
210 status = mz_zip_reader_end(ZIP_HANDLE);
211 break;
212 case eWrite:
213 // Automatically finalize file archive only.
214 // The archive located in memory will be lost
215 // on this step, unless FinalizeMemory() was
216 // not called before.
217 if (m_Location == eFile) {
218 status = mz_zip_writer_finalize_archive(ZIP_HANDLE);
219 }
220 if ( !mz_zip_writer_end(ZIP_HANDLE) ) {
221 status = false;
222 }
223 break;
224 default:
225 break;
226 }
227 if (!status) {
228 ZIP_THROW(eClose, "Error closing archive");
229 }
230 ZIP_DELETE;
231 return;
232 }
233
234
GetNumEntries(void)235 size_t CArchiveZip::GetNumEntries(void)
236 {
237 _ASSERT(m_Mode == eRead);
238 ZIP_CHECK;
239 mz_uint n = mz_zip_reader_get_num_files(ZIP_HANDLE);
240 return n;
241 }
242
243
GetEntryInfo(size_t index,CArchiveEntryInfo * info)244 void CArchiveZip::GetEntryInfo(size_t index, CArchiveEntryInfo* info)
245 {
246 _ASSERT(m_Mode == eRead);
247 _ASSERT(info);
248 ZIP_CHECK;
249
250 // Check index to fit 'unsigned int' which used internally in miniz
251 if (index > (size_t)kMax_UInt) {
252 NCBI_THROW(CCoreException, eInvalidArg, "Bad index value");
253 }
254 // Get file informations
255 mz_zip_archive_file_stat fs;
256 mz_bool status = mz_zip_reader_file_stat(ZIP_HANDLE, (mz_uint)index, &fs);
257 if (!status) {
258 ZIP_THROW(eList, "Cannot get entry information by index " +
259 NStr::SizetToString(index));
260 }
261 // Copy known data into CArchiveEntryInfo
262 info->m_Index = index;
263 info->m_CompressedSize = fs.m_comp_size;
264 info->m_Stat.st_size = fs.m_uncomp_size;
265 info->m_Stat.st_atime = fs.m_time;
266 info->m_Stat.st_ctime = fs.m_time;
267 info->m_Stat.st_mtime = fs.m_time;
268 info->m_Name.assign(fs.m_filename);
269 info->m_Comment.assign(fs.m_comment, fs.m_comment_size);
270
271 // Rough check on a directory (using MS-DOS type compatible attribute)?
272 status = mz_zip_reader_is_file_a_directory(ZIP_HANDLE, (mz_uint)index);
273 info->m_Type = status ? CDirEntry::eDir : CDirEntry::eFile;
274
275 // miniz don't work with entry attributes, because it
276 // is very OS- and creation software dependent.
277 // Try to analyze some common cases for Unix-type attributes:
278
279 char ver = (char)(fs.m_version_made_by >> 8);
280 mode_t mode = (fs.m_external_attr >> 16) & 0xFFFF;
281
282 switch (ver) {
283 // Unix
284 case 1: // Amiga
285 case 2: // VAX VMS
286 case 3: // Unix
287 case 4: // VM/CMS
288 case 5: // Atari ST
289 case 7: // Macintosh
290 case 8: // Z-System
291 case 9: // CP/M
292 {{
293 info->m_Stat.st_mode = mode;
294 info->m_Type = CDirEntry::GetType(info->m_Stat);
295 if (info->m_Type == CDirEntry::eUnknown) {
296 // Reset attributes value, we cannot be sure that
297 // it hold correct value
298 info->m_Stat.st_mode = 0;
299 }
300 }}
301 break;
302 // Dos
303 case 0: // MS-DOS or OS/2 FAT
304 case 6: // OS/2 HPFS
305 // Unknown
306 default:
307 break;
308 }
309 return;
310 }
311
312
HaveSupport_Type(CDirEntry::EType type)313 bool CArchiveZip::HaveSupport_Type(CDirEntry::EType type)
314 {
315 switch (type) {
316 // supported
317 case CDirEntry::eFile:
318 case CDirEntry::eDir:
319 return true;
320 // unsupported
321 case CDirEntry::eLink:
322 case CDirEntry::eBlockSpecial:
323 case CDirEntry::eCharSpecial:
324 case CDirEntry::ePipe:
325 case CDirEntry::eDoor:
326 case CDirEntry::eSocket:
327 case CDirEntry::eUnknown:
328 default:
329 break;
330 }
331 return false;
332 }
333
334
ExtractEntryToFileSystem(const CArchiveEntryInfo & info,const string & dst_path)335 void CArchiveZip::ExtractEntryToFileSystem(const CArchiveEntryInfo& info,
336 const string& dst_path)
337 {
338 _ASSERT(m_Mode == eRead);
339 ZIP_CHECK;
340
341 // If this is a directory entry, we should create it.
342 if (info.GetType() == CDirEntry::eDir) {
343 if (!CDir(dst_path).CreatePath()) {
344 ZIP_THROW(eExtract, "Cannot create directory '" + dst_path + "'");
345 }
346 return;
347 }
348 // The code below extract files only.
349 mz_bool status;
350 MZ_FILE *pFile = MZ_FOPEN(dst_path.c_str(), "wb");
351 if (!pFile) {
352 ZIP_THROW(eExtract, "Cannot create target file '" + dst_path + "'");
353 }
354 status = mz_zip_reader_extract_to_callback(ZIP_HANDLE, (mz_uint)info.m_Index,
355 mz_zip_file_write_callback, pFile, 0);
356 if (MZ_FCLOSE(pFile) == EOF) {
357 ZIP_THROW(eExtract, "Error close file '" + dst_path + "'");
358 }
359 if (!status) {
360 ZIP_THROW(eExtract, "Error extracting entry with index '" +
361 NStr::SizetToString(info.m_Index) + " to file '" + dst_path + "'");
362 }
363 return;
364 }
365
366
ExtractEntryToMemory(const CArchiveEntryInfo & info,void * buf,size_t size)367 void CArchiveZip::ExtractEntryToMemory(const CArchiveEntryInfo& info, void* buf, size_t size)
368 {
369 _ASSERT(m_Mode == eRead);
370 _ASSERT(buf);
371 _ASSERT(size);
372 ZIP_CHECK;
373
374 // If this is a directory entry, skip it
375 if (info.GetType() == CDirEntry::eDir) {
376 return;
377 }
378 // The code below extract files only.
379 mz_bool status;
380 status = mz_zip_reader_extract_to_mem(ZIP_HANDLE, (mz_uint)info.m_Index, buf, size, 0);
381 if (!status) {
382 ZIP_THROW(eExtract, "Error extracting entry with index " +
383 NStr::SizetToString(info.m_Index) + " to memory");
384 }
385 return;
386 }
387
388
389 // Structure to pass all necessary data to write callback
390 struct SWriteCallbackData {
391 IArchive::Callback_Write callback;
392 const CArchiveEntryInfo* info;
393 };
394
395 // Callback for extracting data, call user-defined callback to do a real job.
396 extern "C"
397 {
s_ZipExtractCallback(void * params,mz_uint64,const void * buf,size_t n)398 static size_t s_ZipExtractCallback(void* params, mz_uint64 /*ofs*/, const void* buf, size_t n)
399 {
400 _ASSERT(params);
401 SWriteCallbackData& data = *(SWriteCallbackData*)(params);
402 // Call user callback
403 size_t processed = data.callback(*data.info, buf, n);
404 return processed;
405 }
406 }
407
ExtractEntryToCallback(const CArchiveEntryInfo & info,Callback_Write callback)408 void CArchiveZip::ExtractEntryToCallback(const CArchiveEntryInfo& info, Callback_Write callback)
409 {
410 _ASSERT(m_Mode == eRead);
411 ZIP_CHECK;
412
413 // If this is a directory entry, skip it
414 if (info.GetType() == CDirEntry::eDir) {
415 return;
416 }
417 // The code below extract files only.
418 SWriteCallbackData data;
419 data.callback = callback;
420 data.info = &info;
421 mz_bool status;
422 status = mz_zip_reader_extract_to_callback(ZIP_HANDLE, (mz_uint)info.m_Index,
423 s_ZipExtractCallback, &data, 0);
424 if (!status) {
425 ZIP_THROW(eExtract, "Error extracting entry with index " +
426 NStr::SizetToString(info.m_Index) + " to callback");
427 }
428 return;
429 }
430
431
432 // Dummy callback to test an entry extraction
433 extern "C"
434 {
s_ZipTestCallback(void *,mz_uint64,const void *,size_t n)435 static size_t s_ZipTestCallback(void* /*pOpaque*/, mz_uint64 /*ofs*/,
436 const void* /*pBuf*/, size_t n)
437 {
438 // Just return number of extracted bytes
439 return n;
440 }
441 }
442
TestEntry(const CArchiveEntryInfo & info)443 void CArchiveZip::TestEntry(const CArchiveEntryInfo& info)
444 {
445 _ASSERT(m_Mode == eRead);
446 ZIP_CHECK;
447
448 // If this is a directory entry, skip it
449 if (info.GetType() == CDirEntry::eDir) {
450 return;
451 }
452 // The code below test files only.
453 mz_bool status;
454 status = mz_zip_reader_extract_to_callback(ZIP_HANDLE, (mz_uint)info.m_Index,
455 s_ZipTestCallback, 0, 0);
456 if (!status) {
457 ZIP_THROW(eExtract, "Test entry with index " +
458 NStr::SizetToString(info.m_Index) + " failed");
459 }
460 return;
461 }
462
463
AddEntryFromFileSystem(const CArchiveEntryInfo & info,const string & src_path,ELevel level)464 void CArchiveZip::AddEntryFromFileSystem(const CArchiveEntryInfo& info,
465 const string& src_path, ELevel level)
466 {
467 const string& comment = info.m_Comment;
468 mz_uint16 comment_size = (mz_uint16)comment.size();
469 mz_bool status;
470 if (info.m_Type == CDirEntry::eDir) {
471 status = mz_zip_writer_add_mem_ex(ZIP_HANDLE, info.GetName().c_str(),
472 NULL, 0, /* empty buffer */
473 comment.c_str(), comment_size, (mz_uint)level, 0, 0);
474 } else {
475 // Files only
476 _ASSERT(info.m_Type == CDirEntry::eFile);
477 status = mz_zip_writer_add_file (ZIP_HANDLE,
478 info.GetName().c_str(), src_path.c_str(),
479 comment.c_str(), comment_size, (mz_uint)level);
480 }
481 if (!status) {
482 ZIP_THROW(eAppend, "Error appending entry '" + src_path + "' to archive");
483 }
484 return;
485 }
486
487
AddEntryFromMemory(const CArchiveEntryInfo & info,void * buf,size_t size,ELevel level)488 void CArchiveZip::AddEntryFromMemory(const CArchiveEntryInfo& info,
489 void* buf, size_t size, ELevel level)
490 {
491 const string& comment = info.m_Comment;
492 mz_uint16 comment_size = (mz_uint16)comment.size();
493 mz_bool status;
494 status = mz_zip_writer_add_mem_ex(ZIP_HANDLE, info.GetName().c_str(),
495 buf, size, comment.c_str(), comment_size, (mz_uint)level, 0, 0);
496 if (!status) {
497 ZIP_THROW(eAppend, "Error appending entry with name '" +
498 info.GetName() + "' from memory to archive");
499 }
500 return;
501 }
502
503
504 END_NCBI_SCOPE
505