1 /* $Id: archive_zip.cpp 621323 2020-12-09 19:22:21Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Vladimir Ivanov
27  *
28  * File Description:
29  *   Compression archive API - ZIP file support.
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <util/error_codes.hpp>
35 #include <util/compress/zlib.hpp>
36 #include "archive_zip.hpp"
37 
38 #define NCBI_USE_ERRCODE_X  Util_Compress
39 
40 
41 BEGIN_NCBI_SCOPE
42 
43 
44 // Directly include miniz library
45 
46 // disable zlib emulation, we have it separately anyway
47 #define MINIZ_NO_ZLIB_APIS
48 
49 // Disable miniz warning about using large files on BSD and Cygwin64
50 #if defined(NCBI_OS_BSD)  ||  defined(NCBI_OS_CYGWIN)
51 #  define fopen64   fopen
52 #  define ftello64  ftello
53 #  define fseeko64  fseeko
54 #  define stat64    stat
55 #  define freopen64 freopen
56 #  define _LARGEFILE64_SOURCE
57 #endif
58 
59 #include "miniz/miniz.c"
60 #include "miniz/miniz_zip.c"
61 #include "miniz/miniz_tdef.c"
62 #include "miniz/miniz_tinfl.c"
63 
64 
65 /////////////////////////////////////////////////////////////////////////
66 //
67 // Constants / macros / typedefs
68 //
69 
70 /// ZIP archive handle type definition.
71 struct SZipHandle {
SZipHandleSZipHandle72     SZipHandle() {
73         Reset();
74     }
ResetSZipHandle75     void Reset(void) {
76         memset(&zip, 0, sizeof(zip));
77     }
78     mz_zip_archive zip;
79 };
80 
81 
82 // Macros to work with zip-archive handle
83 #define ZIP_HANDLE  &(m_Handle->zip)
84 #define ZIP_CHECK   _ASSERT(m_Handle != NULL)
85 #define ZIP_NEW \
86     { \
87         _ASSERT(m_Handle == NULL);   \
88         m_Handle = new SZipHandle(); \
89         _ASSERT(m_Handle != NULL);   \
90     }
91 
92 #define ZIP_DELETE \
93     { \
94         _ASSERT(m_Handle != NULL);  \
95         delete m_Handle; \
96         m_Handle = NULL; \
97     }
98 
99 // Throw exception
100 #define ZIP_THROW(errcode, message) \
101     NCBI_THROW(CArchiveException, errcode, message)
102 
103 
104 
105 /////////////////////////////////////////////////////////////////////////
106 //
107 // CZipArchive
108 //
109 
~CArchiveZip(void)110 CArchiveZip::~CArchiveZip(void)
111 {
112     try {
113         if ( m_Handle ) {
114             Close();
115             delete m_Handle;
116         }
117     }
118     COMPRESS_HANDLE_EXCEPTIONS(94, "CArchiveZip::~CArchiveZip");
119 }
120 
121 
CreateFile(const string & filename)122 void CArchiveZip::CreateFile(const string& filename)
123 {
124     ZIP_NEW;
125     m_Mode = eWrite;
126     m_Location = eFile;
127     mz_bool status = mz_zip_writer_init_file(ZIP_HANDLE, filename.c_str(), 0);
128     if (!status) {
129         m_Handle = NULL;
130         ZIP_THROW(eCreate, "Cannot create archive file '" + filename + "'");
131     }
132     return;
133 }
134 
135 
CreateMemory(size_t initial_allocation_size)136 void CArchiveZip::CreateMemory(size_t initial_allocation_size)
137 {
138     ZIP_NEW;
139     m_Mode = eWrite;
140     m_Location = eMemory;
141     mz_bool status = mz_zip_writer_init_heap(ZIP_HANDLE, 0, initial_allocation_size);
142     if (!status) {
143         m_Handle = NULL;
144         ZIP_THROW(eCreate, "Cannot create archive in memory");
145     }
146     return;
147 }
148 
149 
OpenFile(const string & filename)150 void CArchiveZip::OpenFile(const string& filename)
151 {
152     ZIP_NEW;
153     m_Mode = eRead;
154     m_Location = eFile;
155     mz_bool status = mz_zip_reader_init_file(ZIP_HANDLE, filename.c_str(), 0);
156     if (!status) {
157         ZIP_DELETE;
158         ZIP_THROW(eOpen, "Cannot open archive file '" + filename + "'");
159     }
160     return;
161 }
162 
163 
OpenMemory(const void * buf,size_t size)164 void CArchiveZip::OpenMemory(const void* buf, size_t size)
165 {
166     ZIP_NEW;
167     m_Mode = eRead;
168     m_Location = eMemory;
169     mz_bool status = mz_zip_reader_init_mem(ZIP_HANDLE, buf, size, 0);
170     if (!status) {
171         ZIP_DELETE;
172         ZIP_THROW(eOpen, "Cannot open archive in memory");
173     }
174     return;
175 }
176 
177 
FinalizeMemory(void ** buf,size_t * size)178 void CArchiveZip::FinalizeMemory(void** buf, size_t* size)
179 {
180     _ASSERT(m_Location == eMemory);
181     _ASSERT(m_Mode == eWrite);
182     _ASSERT(buf);
183     _ASSERT(size);
184     ZIP_CHECK;
185 
186     *buf = NULL;
187     *size = 0;
188     mz_bool status = mz_zip_writer_finalize_heap_archive(ZIP_HANDLE, buf, size);
189     if (!status) {
190         // Deallocate memory buffer to avoid memory leak
191         if (*buf) {
192             free(*buf);
193             *buf = NULL;
194             *size = 0;
195         }
196         ZIP_THROW(eMemory, "Cannot finalize archive in memory");
197     }
198     return;
199 }
200 
201 
Close(void)202 void CArchiveZip::Close(void)
203 {
204     _ASSERT(m_Mode == eRead || m_Mode == eWrite);
205     ZIP_CHECK;
206 
207     mz_bool status = true;
208     switch(m_Mode) {
209     case eRead:
210         status = mz_zip_reader_end(ZIP_HANDLE);
211         break;
212     case eWrite:
213         // Automatically finalize file archive only.
214         // The archive located in memory will be lost
215         // on this step, unless FinalizeMemory() was
216         // not called before.
217         if (m_Location == eFile) {
218             status = mz_zip_writer_finalize_archive(ZIP_HANDLE);
219         }
220         if ( !mz_zip_writer_end(ZIP_HANDLE) ) {
221             status = false;
222         }
223         break;
224     default:
225         break;
226     }
227     if (!status) {
228         ZIP_THROW(eClose, "Error closing archive");
229     }
230     ZIP_DELETE;
231     return;
232 }
233 
234 
GetNumEntries(void)235 size_t CArchiveZip::GetNumEntries(void)
236 {
237     _ASSERT(m_Mode == eRead);
238     ZIP_CHECK;
239     mz_uint n = mz_zip_reader_get_num_files(ZIP_HANDLE);
240     return n;
241 }
242 
243 
GetEntryInfo(size_t index,CArchiveEntryInfo * info)244 void CArchiveZip::GetEntryInfo(size_t index, CArchiveEntryInfo* info)
245 {
246     _ASSERT(m_Mode == eRead);
247     _ASSERT(info);
248     ZIP_CHECK;
249 
250     // Check index to fit 'unsigned int' which used internally in miniz
251     if (index > (size_t)kMax_UInt) {
252         NCBI_THROW(CCoreException, eInvalidArg, "Bad index value");
253     }
254     // Get file informations
255     mz_zip_archive_file_stat fs;
256     mz_bool status = mz_zip_reader_file_stat(ZIP_HANDLE, (mz_uint)index, &fs);
257     if (!status) {
258         ZIP_THROW(eList, "Cannot get entry information by index " +
259             NStr::SizetToString(index));
260     }
261     // Copy known data into CArchiveEntryInfo
262     info->m_Index            = index;
263     info->m_CompressedSize   = fs.m_comp_size;
264     info->m_Stat.st_size     = fs.m_uncomp_size;
265     info->m_Stat.st_atime    = fs.m_time;
266     info->m_Stat.st_ctime    = fs.m_time;
267     info->m_Stat.st_mtime    = fs.m_time;
268     info->m_Name.assign(fs.m_filename);
269     info->m_Comment.assign(fs.m_comment, fs.m_comment_size);
270 
271     // Rough check on a directory (using MS-DOS type compatible attribute)?
272     status = mz_zip_reader_is_file_a_directory(ZIP_HANDLE, (mz_uint)index);
273     info->m_Type = status ? CDirEntry::eDir : CDirEntry::eFile;
274 
275     // miniz don't work with entry attributes, because it
276     // is very OS- and creation software dependent.
277     // Try to analyze some common cases for Unix-type attributes:
278 
279     char ver = (char)(fs.m_version_made_by >> 8);
280     mode_t mode = (fs.m_external_attr >> 16) & 0xFFFF;
281 
282     switch (ver) {
283     // Unix
284     case 1:  // Amiga
285     case 2:  // VAX VMS
286     case 3:  // Unix
287     case 4:  // VM/CMS
288     case 5:  // Atari ST
289     case 7:  // Macintosh
290     case 8:  // Z-System
291     case 9:  // CP/M
292         {{
293             info->m_Stat.st_mode = mode;
294             info->m_Type = CDirEntry::GetType(info->m_Stat);
295             if (info->m_Type == CDirEntry::eUnknown) {
296                 // Reset attributes value, we cannot be sure that
297                 // it hold correct value
298                 info->m_Stat.st_mode = 0;
299             }
300         }}
301         break;
302     // Dos
303     case 0:  // MS-DOS or OS/2 FAT
304     case 6:  // OS/2 HPFS
305     // Unknown
306     default:
307         break;
308     }
309     return;
310 }
311 
312 
HaveSupport_Type(CDirEntry::EType type)313 bool CArchiveZip::HaveSupport_Type(CDirEntry::EType type)
314 {
315     switch (type) {
316     // supported
317     case CDirEntry::eFile:
318     case CDirEntry::eDir:
319         return true;
320     // unsupported
321     case CDirEntry::eLink:
322     case CDirEntry::eBlockSpecial:
323     case CDirEntry::eCharSpecial:
324     case CDirEntry::ePipe:
325     case CDirEntry::eDoor:
326     case CDirEntry::eSocket:
327     case CDirEntry::eUnknown:
328     default:
329         break;
330     }
331     return false;
332 }
333 
334 
ExtractEntryToFileSystem(const CArchiveEntryInfo & info,const string & dst_path)335 void CArchiveZip::ExtractEntryToFileSystem(const CArchiveEntryInfo& info,
336                                            const string& dst_path)
337 {
338     _ASSERT(m_Mode == eRead);
339     ZIP_CHECK;
340 
341     // If this is a directory entry, we should create it.
342     if (info.GetType() == CDirEntry::eDir) {
343         if (!CDir(dst_path).CreatePath()) {
344             ZIP_THROW(eExtract, "Cannot create directory '" + dst_path + "'");
345         }
346         return;
347     }
348     // The code below extract files only.
349     mz_bool status;
350     MZ_FILE *pFile = MZ_FOPEN(dst_path.c_str(), "wb");
351     if (!pFile) {
352         ZIP_THROW(eExtract, "Cannot create target file '" + dst_path + "'");
353     }
354     status = mz_zip_reader_extract_to_callback(ZIP_HANDLE, (mz_uint)info.m_Index,
355                                                mz_zip_file_write_callback, pFile, 0);
356     if (MZ_FCLOSE(pFile) == EOF) {
357         ZIP_THROW(eExtract, "Error close file '" + dst_path + "'");
358     }
359     if (!status) {
360         ZIP_THROW(eExtract, "Error extracting entry with index '" +
361             NStr::SizetToString(info.m_Index) + " to file '" + dst_path + "'");
362     }
363     return;
364 }
365 
366 
ExtractEntryToMemory(const CArchiveEntryInfo & info,void * buf,size_t size)367 void CArchiveZip::ExtractEntryToMemory(const CArchiveEntryInfo& info, void* buf, size_t size)
368 {
369     _ASSERT(m_Mode == eRead);
370     _ASSERT(buf);
371     _ASSERT(size);
372     ZIP_CHECK;
373 
374     // If this is a directory entry, skip it
375     if (info.GetType() == CDirEntry::eDir) {
376         return;
377     }
378     // The code below extract files only.
379     mz_bool status;
380     status = mz_zip_reader_extract_to_mem(ZIP_HANDLE, (mz_uint)info.m_Index, buf, size, 0);
381     if (!status) {
382         ZIP_THROW(eExtract, "Error extracting entry with index " +
383             NStr::SizetToString(info.m_Index) + " to memory");
384     }
385     return;
386 }
387 
388 
389 // Structure to pass all necessary data to write callback
390 struct SWriteCallbackData {
391     IArchive::Callback_Write callback;
392     const CArchiveEntryInfo* info;
393 };
394 
395 // Callback for extracting data, call user-defined callback to do a real job.
396 extern "C"
397 {
s_ZipExtractCallback(void * params,mz_uint64,const void * buf,size_t n)398     static size_t s_ZipExtractCallback(void* params, mz_uint64 /*ofs*/, const void* buf, size_t n)
399     {
400         _ASSERT(params);
401         SWriteCallbackData& data = *(SWriteCallbackData*)(params);
402         // Call user callback
403         size_t processed = data.callback(*data.info, buf, n);
404         return processed;
405     }
406 }
407 
ExtractEntryToCallback(const CArchiveEntryInfo & info,Callback_Write callback)408 void CArchiveZip::ExtractEntryToCallback(const CArchiveEntryInfo& info, Callback_Write callback)
409 {
410     _ASSERT(m_Mode == eRead);
411     ZIP_CHECK;
412 
413     // If this is a directory entry, skip it
414     if (info.GetType() == CDirEntry::eDir) {
415         return;
416     }
417     // The code below extract files only.
418     SWriteCallbackData data;
419     data.callback = callback;
420     data.info     = &info;
421     mz_bool status;
422     status = mz_zip_reader_extract_to_callback(ZIP_HANDLE, (mz_uint)info.m_Index,
423                                                s_ZipExtractCallback, &data, 0);
424     if (!status) {
425         ZIP_THROW(eExtract, "Error extracting entry with index " +
426             NStr::SizetToString(info.m_Index) + " to callback");
427     }
428     return;
429 }
430 
431 
432 // Dummy callback to test an entry extraction
433 extern "C"
434 {
s_ZipTestCallback(void *,mz_uint64,const void *,size_t n)435     static size_t s_ZipTestCallback(void* /*pOpaque*/, mz_uint64 /*ofs*/,
436                                    const void* /*pBuf*/, size_t n)
437     {
438         // Just return number of extracted bytes
439         return n;
440     }
441 }
442 
TestEntry(const CArchiveEntryInfo & info)443 void CArchiveZip::TestEntry(const CArchiveEntryInfo& info)
444 {
445     _ASSERT(m_Mode == eRead);
446     ZIP_CHECK;
447 
448     // If this is a directory entry, skip it
449     if (info.GetType() == CDirEntry::eDir) {
450         return;
451     }
452     // The code below test files only.
453     mz_bool status;
454     status = mz_zip_reader_extract_to_callback(ZIP_HANDLE, (mz_uint)info.m_Index,
455                                                s_ZipTestCallback, 0, 0);
456     if (!status) {
457         ZIP_THROW(eExtract, "Test entry with index " +
458             NStr::SizetToString(info.m_Index) + " failed");
459     }
460     return;
461 }
462 
463 
AddEntryFromFileSystem(const CArchiveEntryInfo & info,const string & src_path,ELevel level)464 void CArchiveZip::AddEntryFromFileSystem(const CArchiveEntryInfo& info,
465                                          const string& src_path, ELevel level)
466 {
467     const string& comment = info.m_Comment;
468     mz_uint16 comment_size = (mz_uint16)comment.size();
469     mz_bool status;
470     if (info.m_Type == CDirEntry::eDir) {
471         status = mz_zip_writer_add_mem_ex(ZIP_HANDLE, info.GetName().c_str(),
472                                           NULL, 0, /* empty buffer */
473                                           comment.c_str(), comment_size, (mz_uint)level, 0, 0);
474     } else {
475         // Files only
476         _ASSERT(info.m_Type == CDirEntry::eFile);
477         status = mz_zip_writer_add_file  (ZIP_HANDLE,
478                                           info.GetName().c_str(), src_path.c_str(),
479                                           comment.c_str(), comment_size, (mz_uint)level);
480     }
481     if (!status) {
482         ZIP_THROW(eAppend, "Error appending entry '" + src_path + "' to archive");
483     }
484     return;
485 }
486 
487 
AddEntryFromMemory(const CArchiveEntryInfo & info,void * buf,size_t size,ELevel level)488 void CArchiveZip::AddEntryFromMemory(const CArchiveEntryInfo& info,
489                                      void* buf, size_t size, ELevel level)
490 {
491     const string& comment = info.m_Comment;
492     mz_uint16 comment_size = (mz_uint16)comment.size();
493     mz_bool status;
494     status = mz_zip_writer_add_mem_ex(ZIP_HANDLE, info.GetName().c_str(),
495                                       buf, size, comment.c_str(), comment_size, (mz_uint)level, 0, 0);
496     if (!status) {
497         ZIP_THROW(eAppend, "Error appending entry with name '" +
498             info.GetName() + "' from memory to archive");
499     }
500     return;
501 }
502 
503 
504 END_NCBI_SCOPE
505