1 /* $Id: archive.cpp 534861 2017-05-03 12:50:28Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Vladimir Ivanov,  Anton Lavrentiev
27  *
28  * File Description:
29  *   Compression archive API.
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistr.hpp>
35 #include <util/compress/archive.hpp>
36 #include <util/error_codes.hpp>
37 #include "archive_zip.hpp"
38 
39 #if !defined(NCBI_OS_MSWIN)  &&  !defined(NCBI_OS_UNIX)
40 #  error "Class CArchive can be defined on MS-Windows and UNIX platforms only!"
41 #endif
42 
43 #define NCBI_USE_ERRCODE_X  Util_Compress
44 
45 
46 BEGIN_NCBI_SCOPE
47 
48 
49 /////////////////////////////////////////////////////////////////////////////
50 //
51 // Helper routines
52 //
53 
s_FormatMessage(CArchiveException::TErrCode errcode,const string & message,const CArchiveEntryInfo & info)54 static string s_FormatMessage(CArchiveException::TErrCode errcode,
55                               const string& message,
56                               const CArchiveEntryInfo& info)
57 {
58     string msg;
59     switch (errcode) {
60     case CArchiveException::eUnsupportedEntryType:
61         if (message.empty()) {
62             msg = "Unsupported entry type for '" + info.GetName() + "'";
63             break;
64         }
65     case CArchiveException::eList:
66     case CArchiveException::eExtract:
67     case CArchiveException::eBackup:
68     case CArchiveException::eRestoreAttrs:
69         msg = message;
70         if (!info.GetName().empty()) {
71             msg += ", while in '" + info.GetName() + '\'';
72         }
73         break;
74     case CArchiveException::eMemory:
75     case CArchiveException::eCreate:
76     case CArchiveException::eBadName:
77     case CArchiveException::eAppend:
78     case CArchiveException::eOpen:
79     case CArchiveException::eClose:
80     default:
81         msg = message;
82         break;
83     }
84     return msg;
85 }
86 
87 
s_OSReason(int x_errno)88 static string s_OSReason(int x_errno)
89 {
90     const char* strerr = x_errno ? strerror(x_errno) : 0;
91     return strerr  &&  *strerr ? string(": ") + strerr : kEmptyStr;
92 }
93 
94 
95 //////////////////////////////////////////////////////////////////////////////
96 //
97 // Constants / macros / typedefs
98 //
99 
100 // Throw exception
101 #define ARCHIVE_THROW1(errcode) \
102     NCBI_THROW(CArchiveException, errcode, s_FormatMessage(CArchiveException::errcode, kEmptyStr, this->m_Current))
103 #define ARCHIVE_THROW(errcode, message) \
104     NCBI_THROW(CArchiveException, errcode, s_FormatMessage(CArchiveException::errcode, message, this->m_Current))
105 #define ARCHIVE_THROW_INFO(errcode, message, info) \
106     NCBI_THROW(CArchiveException, errcode, s_FormatMessage(CArchiveException::errcode, message, info))
107 
108 /*
109 // Post message
110 #define ARCHIVE_POST(subcode, severity, message) \
111     ERR_POST_X(subcode, (severity) << s_FormatMessage(this->m_Current, message)
112 */
113 // Get archive handle
114 #define ARCHIVE        m_Archive.get()
115 // Check archive handle
116 #define ARCHIVE_CHECK  _ASSERT(m_Archive.get() != NULL)
117 
118 // Macro to check flags bits
119 #define F_ISSET(mask) ((m_Flags & (mask)) == (mask))
120 
121 
122 
123 //////////////////////////////////////////////////////////////////////////////
124 //
125 // Auxiliary functions
126 //
127 
128 /* Create path from entry name and base directory.
129 */
s_ToFilesystemPath(const string & base_dir,const string & name)130 static string s_ToFilesystemPath(const string& base_dir, const string& name)
131 {
132     string path(CDirEntry::IsAbsolutePath(name)  ||  base_dir.empty()
133                 ? name : CDirEntry::ConcatPath(base_dir, name));
134     return CDirEntry::NormalizePath(path);
135 }
136 
137 
138 /* Create archive name from path.
139 */
s_ToArchiveName(const string & base_dir,const string & path,bool is_absolute_allowed)140 static string s_ToArchiveName(const string& base_dir, const string& path, bool is_absolute_allowed)
141 {
142     // NB: Path assumed to have been normalized
143     string retval = CDirEntry::AddTrailingPathSeparator(path);
144 
145 #ifdef NCBI_OS_MSWIN
146     // Convert to Unix format with forward slashes
147     NStr::ReplaceInPlace(retval, "\\", "/");
148     const NStr::ECase how = NStr::eNocase;
149 #else
150     const NStr::ECase how = NStr::eCase;
151 #endif //NCBI_OS_MSWIN
152 
153     bool absolute;
154     // Remove leading base dir from the path
155     if (!base_dir.empty()  &&  NStr::StartsWith(retval, base_dir, how)) {
156         if (retval.size() > base_dir.size()) {
157             retval.erase(0, base_dir.size()/*separator too*/);
158         } else {
159             retval.assign(1, '.');
160         }
161         absolute = false;
162     } else {
163         absolute = CDirEntry::IsAbsolutePath(retval);
164     }
165     SIZE_TYPE pos = 0;
166 
167 #ifdef NCBI_OS_MSWIN
168     // Remove a disk name if present
169     if (retval.size() > 1
170         &&  isalpha((unsigned char) retval[0])  &&  retval[1] == ':') {
171         pos = 2;
172     }
173 #endif //NCBI_OS_MSWIN
174 
175     // Remove any leading and trailing slashes
176     while (pos < retval.size()  &&  retval[pos] == '/') {
177         pos++;
178     }
179     if (pos) {
180         retval.erase(0, pos);
181     }
182     pos = retval.size();
183     while (pos > 0  &&  retval[pos - 1] == '/') {
184         --pos;
185     }
186     if (pos < retval.size()) {
187         retval.erase(pos);
188     }
189 
190     // Add leading slash back, if allowed
191     if (absolute  &&  is_absolute_allowed) {
192         retval.insert((SIZE_TYPE) 0, 1, '/');
193     }
194     return retval;
195 }
196 
197 
198 
199 //////////////////////////////////////////////////////////////////////////////
200 //
201 // CArchive
202 //
203 
CArchive(EFormat format)204 CArchive::CArchive(EFormat format)
205     : m_Format(format),
206       m_Flags(fDefault),
207       m_OpenMode(eNone),
208       m_Modified(false)
209 {
210     // Create a new archive object
211     switch (format) {
212         case eZip:
213             m_Archive.reset(new CArchiveZip());
214             break;
215         default:
216             _TROUBLE;
217     }
218     if ( !ARCHIVE ) {
219         ARCHIVE_THROW(eMemory, "Cannot create archive object");
220     }
221 }
222 
223 
~CArchive()224 CArchive::~CArchive()
225 {
226     try {
227         Close();
228         // Archive interface should be closed on this moment, just delete it.
229         if ( ARCHIVE ) {
230             m_Archive.reset();
231         }
232         // Delete owned masks
233         UnsetMask();
234     }
235     COMPRESS_HANDLE_EXCEPTIONS(93, "CArchive::~CArchive");
236 }
237 
238 
Close(void)239 void CArchive::Close(void)
240 {
241     if (m_OpenMode == eNone) {
242         return;
243     }
244     ARCHIVE_CHECK;
245     ARCHIVE->Close();
246     m_OpenMode = eNone;
247     m_Modified = false;
248 }
249 
250 
SetMask(CMask * mask,EOwnership own,EMaskType type,NStr::ECase acase)251 void CArchive::SetMask(CMask* mask, EOwnership  own, EMaskType type, NStr::ECase acase)
252 {
253     SMask* m = NULL;
254     switch (type) {
255         case eFullPathMask:
256             m = &m_MaskFullPath;
257             break;
258         case ePatternMask:
259             m = &m_MaskPattern;
260             break;
261         default:
262             _TROUBLE;
263     }
264     if (m->owned) {
265         delete m->mask;
266     }
267     m->mask  = mask;
268     m->acase = acase;
269     m->owned = mask ? own : eNoOwnership;
270 }
271 
272 
UnsetMask(EMaskType type)273 void CArchive::UnsetMask(EMaskType type)
274 {
275     SetMask(NULL, eNoOwnership, type);
276 }
277 
278 
UnsetMask(void)279 void CArchive::UnsetMask(void)
280 {
281     SetMask(NULL, eNoOwnership, eFullPathMask);
282     SetMask(NULL, eNoOwnership, ePatternMask);
283 }
284 
285 
SetBaseDir(const string & dirname)286 void CArchive::SetBaseDir(const string& dirname)
287 {
288     string s = CDirEntry::AddTrailingPathSeparator(dirname);
289 #ifdef NCBI_OS_MSWIN
290     // Always use forward slashes internally
291     NStr::ReplaceInPlace(s, "\\", "/");
292 #endif
293     s.swap(m_BaseDir);
294 }
295 
296 
HaveSupport(ESupport feature,int param)297 bool CArchive::HaveSupport(ESupport feature, int param)
298 {
299     ARCHIVE_CHECK;
300     switch (feature) {
301     case eType:
302         return ARCHIVE->HaveSupport_Type((CDirEntry::EType)param);
303     case eAbsolutePath:
304         return ARCHIVE->HaveSupport_AbsolutePath();
305     }
306     return false;
307 }
308 
309 
Create(void)310 void CArchive::Create(void)
311 {
312     ARCHIVE_CHECK;
313     x_Open(eCreate);
314 }
315 
316 
List(void)317 unique_ptr<CArchive::TEntries> CArchive::List(void)
318 {
319     ARCHIVE_CHECK;
320     x_Open(eList);
321     return x_ReadAndProcess(eList);
322 }
323 
324 
Test(void)325 unique_ptr<CArchive::TEntries> CArchive::Test(void)
326 {
327     ARCHIVE_CHECK;
328     x_Open(eTest);
329     return x_ReadAndProcess(eTest);
330 }
331 
332 
Extract(void)333 unique_ptr<CArchive::TEntries> CArchive::Extract(void)
334 {
335     ARCHIVE_CHECK;
336     x_Open(eExtract);
337     unique_ptr<TEntries> entries = x_ReadAndProcess(eExtract);
338     // Restore attributes of "postponed" directory entries
339     if (F_ISSET(fPreserveAll)) {
340         ITERATE(TEntries, e, *entries) {
341             if (e->GetType() == CDirEntry::eDir) {
342                 x_RestoreAttrs(*e);
343             }
344         }
345     }
346     return entries;
347 }
348 
349 
ExtractFileToMemory(const CArchiveEntryInfo & info,void * buf,size_t buf_size,size_t * out_size)350 void CArchive::ExtractFileToMemory(const CArchiveEntryInfo& info, void* buf, size_t buf_size, size_t* out_size)
351 {
352     ARCHIVE_CHECK;
353     if (!buf  || !buf_size) {
354         NCBI_THROW(CCoreException, eInvalidArg, "Bad memory buffer");
355     }
356     if (out_size) {
357         *out_size = 0;
358     }
359     CDirEntry::EType type = info.GetType();
360     if (type == CDirEntry::eUnknown  &&  !F_ISSET(fSkipUnsupported)) {
361         // Conform to POSIX-mandated behavior to extract as files
362         type = CDirEntry::eFile;
363     }
364     if (type != CDirEntry::eFile) {
365         ARCHIVE_THROW_INFO(eUnsupportedEntryType, kEmptyStr, info);
366     }
367     x_Open(eExtract);
368     ARCHIVE->ExtractEntryToMemory(info, buf, buf_size);
369     if (out_size) {
370         *out_size = (size_t)info.GetSize();
371     }
372     return;
373 }
374 
375 
ExtractFileToHeap(const CArchiveEntryInfo & info,void ** buf_ptr,size_t * buf_size_ptr)376 void CArchive::ExtractFileToHeap(const CArchiveEntryInfo& info, void** buf_ptr, size_t* buf_size_ptr)
377 {
378     ARCHIVE_CHECK;
379     if (!buf_ptr  || !buf_size_ptr) {
380         NCBI_THROW(CCoreException, eInvalidArg, "Bad pointers to memory buffer");
381     }
382     *buf_ptr = NULL;
383     *buf_size_ptr = 0;
384 
385     CDirEntry::EType type = info.GetType();
386     if (type == CDirEntry::eUnknown  &&  !F_ISSET(fSkipUnsupported)) {
387         // Conform to POSIX-mandated behavior to extract as files
388         type = CDirEntry::eFile;
389     }
390     if (type != CDirEntry::eFile) {
391         ARCHIVE_THROW_INFO(eUnsupportedEntryType, kEmptyStr, info);
392     }
393     // Get size of buffer for memory allocation
394     Uint8 uncompressed_size = info.GetSize();
395     if (!uncompressed_size) {
396         // File is empty, do nothing
397         return;
398     }
399     if ( uncompressed_size > get_limits(*buf_size_ptr).max() ) {
400         ARCHIVE_THROW(eMemory, "File is too big to extract to memory, its size is " +
401                                 NStr::Int8ToString(uncompressed_size));
402     }
403     // Allocate memory
404     size_t x_uncompressed_size = (size_t)uncompressed_size;
405     void* ptr = malloc(x_uncompressed_size);
406     if (!ptr) {
407         ARCHIVE_THROW(eMemory, "Cannot allocate " +
408                                NStr::Int8ToString(uncompressed_size) +
409                                " bytes on heap");
410     }
411     try {
412         // Extract file
413         ExtractFileToMemory(info, ptr, x_uncompressed_size, NULL);
414     } catch(...) {
415         free(ptr);
416         throw;
417     }
418     // Return result
419     *buf_ptr = ptr;
420     *buf_size_ptr = x_uncompressed_size;
421     return;
422 }
423 
424 
ExtractFileToCallback(const CArchiveEntryInfo & info,IArchive::Callback_Write callback)425 void CArchive::ExtractFileToCallback(const CArchiveEntryInfo& info,
426                                      IArchive::Callback_Write callback)
427 {
428     CDirEntry::EType type = info.GetType();
429     if (type == CDirEntry::eUnknown  &&  !F_ISSET(fSkipUnsupported)) {
430         // Conform to POSIX-mandated behavior to extract as files
431         type = CDirEntry::eFile;
432     }
433     if (type != CDirEntry::eFile) {
434         ARCHIVE_THROW_INFO(eUnsupportedEntryType, kEmptyStr, info);
435     }
436     x_Open(eExtract);
437     ARCHIVE->ExtractEntryToCallback(info, callback);
438     return;
439 }
440 
441 
Append(const string & path,ELevel level,const string & comment)442 unique_ptr<CArchive::TEntries> CArchive::Append(const string& path, ELevel level,
443                                               const string& comment)
444 {
445     ARCHIVE_CHECK;
446     x_Open(eAppend);
447     return x_Append(path, level, comment);
448 }
449 
450 
451 unique_ptr<CArchive::TEntries>
AppendFileFromMemory(const string & name_in_archive,void * buf,size_t buf_size,ELevel level,const string & comment)452 CArchive::AppendFileFromMemory(const string& name_in_archive, void* buf, size_t buf_size,
453                                ELevel level, const string& comment)
454 {
455     ARCHIVE_CHECK;
456     if (!buf  || !buf_size) {
457         NCBI_THROW(CCoreException, eInvalidArg, "Bad memory buffer");
458     }
459     x_Open(eAppend);
460     unique_ptr<TEntries> entries(new TEntries);
461 
462     // Clear the entry info
463     m_Current = CArchiveEntryInfo();
464 
465     // Get name of the current entry in archive
466     string temp = s_ToArchiveName(kEmptyStr, name_in_archive, HaveSupport(eAbsolutePath));
467     if (temp.empty()) {
468         ARCHIVE_THROW(eBadName, "Empty entry name is not allowed");
469     }
470 
471     // Fill out entry information
472     m_Current.m_Name.swap(temp);
473     m_Current.m_Type    = CDirEntry::eFile;
474     m_Current.m_Comment = comment;
475     entries->push_back(m_Current);
476 
477 #if 0
478     if (m_Format == eZip) {
479 //???
480     } else {
481         _TROUBLE;
482     }
483 #endif
484     ARCHIVE->AddEntryFromMemory(m_Current, buf, buf_size, level);
485     return entries;
486 }
487 
488 
SkipEntry(void)489 void CArchive::SkipEntry(void)
490 {
491     ARCHIVE->SkipEntry(m_Current);
492     return;
493 }
494 
495 
TestEntry(void)496 void CArchive::TestEntry(void)
497 {
498     CDirEntry::EType type = m_Current.GetType();
499     if (type == CDirEntry::eUnknown  &&  !F_ISSET(fSkipUnsupported)) {
500         // Conform to POSIX-mandated behavior to extract as files
501         type = CDirEntry::eFile;
502     }
503     switch (type) {
504     case CDirEntry::eFile:
505         ARCHIVE->TestEntry(m_Current);
506         break;
507     case CDirEntry::eDir:
508         break;
509     case CDirEntry::eLink:
510     case CDirEntry::ePipe:
511     case CDirEntry::eCharSpecial:
512     case CDirEntry::eBlockSpecial:
513         // Cannot be tested, do nothing
514         break;
515 
516     default:
517         ARCHIVE_THROW1(eUnsupportedEntryType);
518     }
519     return;
520 }
521 
522 
ExtractEntry(const CDirEntry & dst)523 void CArchive::ExtractEntry(const CDirEntry& dst)
524 {
525     CDirEntry::EType type = m_Current.GetType();
526     switch (type) {
527     case CDirEntry::eFile:
528         ARCHIVE->ExtractEntryToFileSystem(m_Current, dst.GetPath());
529         break;
530 
531     case CDirEntry::eDir:
532         // Directory should be already created in x_ExtractEntry().
533         // Attributes for a directory will be set only when all
534         // its files have been already extracted.
535         break;
536 
537     case CDirEntry::eLink:
538     case CDirEntry::ePipe:
539     case CDirEntry::eCharSpecial:
540     case CDirEntry::eBlockSpecial:
541     default:
542         ARCHIVE_THROW1(eUnsupportedEntryType);
543     }
544     return;
545 }
546 
547 
AppendEntry(const string & path,ELevel level)548 void CArchive::AppendEntry(const string& path, ELevel level)
549 {
550     ARCHIVE->AddEntryFromFileSystem(m_Current, path, level);
551     return;
552 }
553 
554 
x_Open(EAction action)555 void CArchive::x_Open(EAction action)
556 {
557     EOpenMode new_open_mode = EOpenMode(int(action) & eRW);
558 
559     if (m_OpenMode != eWO  &&  action == eAppend) {
560         // Appending to an existing archive is not implemented yet
561         _TROUBLE;
562     }
563     if (new_open_mode != m_OpenMode) {
564         Close();
565         Open(action);
566         m_OpenMode = new_open_mode;
567     }
568 #if 0
569 /*
570 //    bool isread = (action & eRO) > 0;
571 ???
572     if (!m_Modified) {
573         // Check if Create() is followed by Append()
574         if (m_OpenMode != eWO  &&  action == eAppend) {
575             toend = true;
576         }
577     } else if (action != eAppend) {
578         // Previous action shouldn't be eCreate
579         _ASSERT(m_OpenMode != eWO);
580         if (m_Modified) {
581             m_Modified = false;
582         }
583     }
584 */
585 #endif
586     return;
587 }
588 
589 
x_ReadAndProcess(EAction action)590 unique_ptr<CArchive::TEntries> CArchive::x_ReadAndProcess(EAction action)
591 {
592     _ASSERT(action);
593     unique_ptr<TEntries> entries(new TEntries);
594 
595     // Get number of files in archive
596     size_t n = ARCHIVE->GetNumEntries();
597 
598     // Process all entries
599     for (size_t i = 0;  i < n;  i++) {
600         m_Current.Reset();
601         // Get next entry
602         ARCHIVE->GetEntryInfo(i, &m_Current);
603         if ( m_Current.m_Name.empty() ) {
604             ARCHIVE_THROW(eBadName, "Empty entry name in archive");
605         }
606 
607         // Match file name with the set of masks
608 
609         bool match = true;
610         // Replace backslashes with forward slashes
611         string path = m_Current.m_Name;
612         if ( m_MaskFullPath.mask ) {
613             match = m_MaskFullPath.mask->Match(path, m_MaskFullPath.acase);
614         }
615         if ( match  &&  m_MaskPattern.mask ) {
616             match = false;
617             list<CTempString> elems;
618             NStr::Split(path, "/", elems, NStr::fSplit_MergeDelimiters);
619             ITERATE(list<CTempString>, it, elems) {
620                 if (m_MaskPattern.mask->Match(*it, m_MaskPattern.acase)) {
621                     match = true;
622                     break;
623                 }
624             }
625         }
626         if ( !match ) {
627             continue;
628         }
629 
630         // User callback
631         if (!Checkpoint(m_Current, action)) {
632             // Skip current entry
633             SkipEntry();
634             continue;
635         }
636 
637         // Process current entry
638         switch (action) {
639             case eList:
640                 SkipEntry();
641                 break;
642             case eExtract:
643                 // It calls ExtractEntry() inside
644                 x_ExtractEntry(entries.get());
645                 break;
646             case eTest:
647                 TestEntry();
648                 break;
649             default:
650                 // Undefined action
651                _TROUBLE;
652         }
653         // Add entry into the list of processed entries
654         entries->push_back(m_Current);
655     }
656     return entries;
657 }
658 
659 
660 // Deleter for temporary file for safe extraction
661 struct CTmpEntryDeleter {
DeleteCTmpEntryDeleter662     static void Delete(CDirEntry* entry) {
663         if ( entry->GetPath().empty() ) {
664             return;
665         }
666         entry->Remove();
667     }
668 };
669 
x_ExtractEntry(const TEntries * prev_entries)670 void CArchive::x_ExtractEntry(const TEntries* prev_entries)
671 {
672     CDirEntry::EType type = m_Current.GetType();
673 
674     // Destination for extraction
675     unique_ptr<CDirEntry> dst(
676         CDirEntry::CreateObject(type,
677             CDirEntry::NormalizePath(CDirEntry::ConcatPath(m_BaseDir, m_Current.GetName()))));
678     // Dereference link if requested
679     if (type == CDirEntry::eLink  &&  F_ISSET(fFollowLinks)) {
680         dst->DereferenceLink();
681     }
682     // Actual type in file system (if exists)
683     CDirEntry::EType dst_type = dst->GetType();
684 
685     // Look if extraction is allowed (when the destination exists)
686     bool found = false;
687 
688     if (dst_type != CDirEntry::eUnknown) {
689         // Check if destination entry is ours (previous revision of the same file)
690         if (prev_entries) {
691             ITERATE(TEntries, e, *prev_entries) {
692                 if (e->GetName() == m_Current.GetName()  &&
693                     e->GetType() == m_Current.GetType()) {
694                     found = true;
695                     break;
696                 }
697             }
698         }
699         // Not ours
700         if (!found) {
701             // Can overwrite it?
702             if (!F_ISSET(fOverwrite)) {
703                 // File already exists, and cannot be changed
704                 return;
705             } else {
706                 // Can update?
707                 // Note, that we update directories always, because the archive
708                 // can contain other subtree of this existing directory.
709                 if (F_ISSET(fUpdate)  &&  type != CDirEntry::eDir) {
710                     // Make sure that destination entry is not older than current entry
711                     time_t dst_time;
712                     if (dst->GetTimeT(&dst_time)
713                         &&  m_Current.GetModificationTime() <= dst_time) {
714                         return;
715                     }
716                 }
717                 // Have equal types?
718                 if (F_ISSET(fEqualTypes)  &&  type != dst_type) {
719                     ARCHIVE_THROW(eExtract, "Cannot overwrite '" + dst->GetPath() +
720                         "' with an archive entry of different type");
721                 }
722             }
723             if (F_ISSET(fBackup)) {
724                 // Need to backup the existing destination
725                 CDirEntry backup(*dst);
726                 if (!backup.Backup(kEmptyStr, CDirEntry::eBackup_Rename)) {
727                     int x_errno = errno;
728                     ARCHIVE_THROW(eBackup, "Failed to backup '" + dst->GetPath() + '\'' + s_OSReason(x_errno));
729                 }
730             }
731         }
732         // Entry with the same name exists and can be overwritten
733         found = true;
734     }
735 
736     // Extraction
737 
738     CDirEntry tmp;
739 #  ifdef NCBI_OS_UNIX
740     // Set private settings for newly created files,
741     // only current user can read or modify it.
742     mode_t u = umask(0);
743     umask(u & 077);
744     try {
745 #  endif
746 
747     // Create directory
748     string dirname = dst->GetDir();
749     if (!dirname.empty()) {
750         if (!CDir(dirname).CreatePath()) {
751             int x_errno = errno;
752             ARCHIVE_THROW(eExtract, "Cannot create directory '" + dirname + '\'' + s_OSReason(x_errno));
753         }
754     }
755     if (type == CDirEntry::eFile) {
756         // Always use temporary file for safe file extraction
757         AutoPtr<CDirEntry, CTmpEntryDeleter> tmp_deleter;
758         tmp.Reset(CDirEntry::GetTmpNameEx(dst->GetDir(), ".tmp_ncbiarch_", CDirEntry::eTmpFileCreate));
759         tmp_deleter.reset(&tmp);
760         // Extract file
761         ExtractEntry(tmp);
762         // Rename it to destination name
763         if (!tmp.Rename(dst->GetPath(), found ? CDirEntry::fRF_Overwrite : CDirEntry::fRF_Default)) {
764             int x_errno = errno;
765             ARCHIVE_THROW(eExtract, "Cannot rename temporary file to '" +
766                 dst->GetPath() + "' back in place" + s_OSReason(x_errno));
767         }
768         // Restore attributes after renaming
769         x_RestoreAttrs(m_Current, &(*dst));
770         // Reset temporary object to prevent file deletion after its successful renaming
771         tmp.Reset(kEmptyStr);
772 
773     } else if (type == CDirEntry::eDir) {
774         // Do nothing
775     } else {
776         //???
777         ARCHIVE_THROW1(eUnsupportedEntryType);
778     }
779 
780 #  ifdef NCBI_OS_UNIX
781     } catch (...) {
782         umask(u);
783         throw;
784     }
785     umask(u);
786 #  endif
787 }
788 
789 
x_RestoreAttrs(const CArchiveEntryInfo & info,const CDirEntry * dst) const790 void CArchive::x_RestoreAttrs(const CArchiveEntryInfo& info,
791                               const CDirEntry*         dst) const
792 {
793     unique_ptr<CDirEntry> path_ptr;  // deleter
794     if (!dst) {
795         path_ptr.reset(CDirEntry::CreateObject(CDirEntry::EType(info.GetType()),
796                        CDirEntry::NormalizePath(CDirEntry::ConcatPath(m_BaseDir, info.GetName()))));
797         dst = path_ptr.get();
798     }
799 
800     // Date/time.
801     // Set the time before permissions because on some platforms
802     // this setting can also affect file permissions.
803     if (F_ISSET(fPreserveTime)) {
804         time_t mtime(info.GetModificationTime());
805         time_t atime(info.GetLastAccessTime());
806         time_t ctime(info.GetCreationTime());
807         if (!dst->SetTimeT(&mtime, &atime, &ctime)) {
808             int x_errno = errno;
809             ARCHIVE_THROW(eRestoreAttrs, "Cannot restore date/time for '" +
810                 dst->GetPath() + '\'' + s_OSReason(x_errno));
811         }
812     }
813 
814     // Owner.
815     // This must precede changing permissions because on some
816     // systems chown() clears the set[ug]id bits for non-superusers
817     // thus resulting in incorrect permissions.
818     if (F_ISSET(fPreserveOwner)) {
819         unsigned int uid, gid;
820         // 2-tier trial:  first using the names, then using numeric IDs.
821         // Note that it is often impossible to restore the original owner
822         // without the super-user rights so no error checking is done here.
823         if (!dst->SetOwner(info.GetUserName(), info.GetGroupName(),
824                            eIgnoreLinks, &uid, &gid)  &&
825             !dst->SetOwner(kEmptyStr, info.GetGroupName(), eIgnoreLinks)) {
826 
827             if (uid != info.GetUserId()  ||  gid != info.GetGroupId()) {
828                 string user  = NStr::UIntToString(info.GetUserId());
829                 string group = NStr::UIntToString(info.GetGroupId());
830                 if (!dst->SetOwner(user, group, eIgnoreLinks)) {
831                      dst->SetOwner(kEmptyStr, group, eIgnoreLinks);
832                 }
833             }
834         }
835     }
836 
837     // Mode.
838     // Set them last.
839     if ((F_ISSET(fPreserveMode))  &&
840         info.GetType() != CDirEntry::ePipe  &&
841         info.GetType() != CDirEntry::eCharSpecial &&
842         info.GetType() != CDirEntry::eBlockSpecial)
843     {
844         bool failed = false;
845         int x_errno;
846 #ifdef NCBI_OS_UNIX
847         // We cannot change permissions for sym.links because lchmod()
848         // is not portable and is not implemented on majority of platforms.
849         if (info.GetType() != CDirEntry::eLink) {
850             // Use raw mode here to restore most of the bits
851             mode_t mode = info.m_Stat.st_mode;
852             if (mode  &&  chmod(dst->GetPath().c_str(), mode) != 0) {
853                 // May fail due to setuid/setgid bits -- strip'em and try again
854                 if (mode &   (S_ISUID | S_ISGID)) {
855                     mode &= ~(S_ISUID | S_ISGID);
856                     failed = chmod(dst->GetPath().c_str(), mode) != 0;
857                 } else {
858                     failed = true;
859                 }
860                 x_errno = errno;
861             }
862         }
863 #else
864         mode_t mode = info.GetMode();
865         // Do not try to set zero permissions, it is just not defined
866         if ( mode != 0 ) {
867             CDirEntry::TMode user, group, other;
868             CDirEntry::TSpecialModeBits special_bits;
869             CDirEntry::ModeFromModeT(mode, &user, &group, &other, &special_bits);
870             failed = !dst->SetMode(user, group, other, special_bits);
871             x_errno = errno;
872         }
873 #endif
874         if (failed) {
875             ARCHIVE_THROW(eRestoreAttrs, "Cannot change mode for '" + dst->GetPath() + '\'' + s_OSReason(x_errno));
876         }
877     }
878 }
879 
880 
x_Append(const string & src_path,ELevel level,const string & comment,const TEntries * toc)881 unique_ptr<CArchive::TEntries> CArchive::x_Append(const string&   src_path,
882                                                 ELevel          level,
883                                                 const string&   comment,
884                                                 const TEntries* toc)
885 {
886     unique_ptr<TEntries> entries(new TEntries);
887 
888     const EFollowLinks follow_links = (m_Flags & fFollowLinks) ? eFollowLinks : eIgnoreLinks;
889     bool update = true;
890 
891     // Clear the entry info
892     m_Current = CArchiveEntryInfo();
893     // Compose entry name for relative names
894     string path = s_ToFilesystemPath(m_BaseDir, src_path);
895 
896     // Get dir entry information
897     CDirEntry entry(path);
898     CDirEntry::SStat st;
899     if (!entry.Stat(&st, follow_links)) {
900         int x_errno = errno;
901         ARCHIVE_THROW(eOpen, "Cannot get status of '" + path + '\''+ s_OSReason(x_errno));
902     }
903     CDirEntry::EType type = CDirEntry::GetType(st.orig);
904 
905     // Get name of the current entry in archive
906     string temp = s_ToArchiveName(m_BaseDir, path, HaveSupport(eAbsolutePath));
907     if (temp.empty()) {
908         ARCHIVE_THROW(eBadName, "Empty entry name in archive");
909     }
910 
911     // Match masks
912 
913     bool match = true;
914     if ( m_MaskFullPath.mask ) {
915         match = m_MaskFullPath.mask->Match(temp, m_MaskFullPath.acase);
916     }
917     if ( match  &&  m_MaskPattern.mask ) {
918         list<CTempString> elems;
919         NStr::Split(temp, "/", elems, NStr::fSplit_MergeDelimiters);
920         ITERATE(list<CTempString>, it, elems) {
921             if (*it == "..") {
922                 ARCHIVE_THROW(eBadName, "Name '" + temp + "' embeds parent directory ('..')");
923             }
924             if (m_MaskPattern.mask->Match(*it, m_MaskPattern.acase)) {
925                 match = true;
926                 break;
927             }
928         }
929     }
930     if ( !match ) {
931         goto out;
932     }
933 
934     // Check support for this entry type by current archive format
935     if (type == CDirEntry::eUnknown  ||  !ARCHIVE->HaveSupport_Type(type)) {
936         if (F_ISSET(fSkipUnsupported)) {
937             goto out;
938         }
939         ARCHIVE_THROW(eUnsupportedEntryType, "Cannot append to archive, unsupported entry type for '" + path + "', ");
940     }
941 
942     if (type == CDirEntry::eDir  &&  temp != "/"  &&  temp != ".") {
943         temp += '/';
944     }
945 
946     // Fill out entry information
947     m_Current.m_Name.swap(temp);
948     m_Current.m_Type    = type;
949     m_Current.m_Comment = comment;
950 
951 #if 0
952     if (m_Format == eZip) {
953 
954     } else {
955         _TROUBLE;
956     }
957 #endif
958 #if 0
959     if (type == CDirEntry::eLink) {
960         _ASSERT(!follow_links);
961         m_Current.m_LinkName = entry.LookupLink();
962         if (m_Current.m_LinkName.empty()) {
963             ARCHIVE_THROW(eBadName, "Empty link name is not allowed");
964         }
965     }
966     unsigned int uid = 0, gid = 0;
967     entry.GetOwner(&m_Current.m_UserName, &m_Current.m_GroupName, follow_links, &uid, &gid);
968 #ifdef NCBI_OS_UNIX
969     if (NStr::UIntToString(uid) == m_Current.GetUserName()) {
970         m_Current.m_UserName.erase();
971     }
972     if (NStr::UIntToString(gid) == m_Current.GetGroupName()) {
973         m_Current.m_GroupName.erase();
974     }
975 #endif //NCBI_OS_UNIX
976 #ifdef NCBI_OS_MSWIN
977     // These are fake but we don't want to leave plain 0 (root) in there
978     st.orig.st_uid = (uid_t) uid;
979     st.orig.st_gid = (gid_t) gid;
980 #endif //NCBI_OS_MSWIN
981 
982     m_Current.m_Stat = st.orig;
983     // Fixup for mode bits
984     m_Current.m_Stat.st_mode = (mode_t) s_ModeToTar(st.orig.st_mode);
985 #endif
986 
987     // Check if we need to update this entry in the archive
988 #if 0
989     if (toc) {
990         bool found = false;
991 
992         if (type != CDirEntry::eUnknown) {
993             // Start searching from the end of the list, to find
994             // the most recent entry (if any) first
995             _ASSERT(temp.empty());
996             REVERSE_ITERATE(TEntries, e, *toc) {
997                 if (!temp.empty()) {
998                     if (e->GetType() == CTarEntryInfo::eHardLink  ||
999                         temp != s_ToFilesystemPath(m_BaseDir, e->GetName())) {
1000                         continue;
1001                     }
1002                 } else if (path == s_ToFilesystemPath(m_BaseDir,e->GetName())){
1003                     found = true;
1004                     if (e->GetType() == CTarEntryInfo::eHardLink) {
1005                         temp = s_ToFilesystemPath(m_BaseDir, e->GetLinkName());
1006                         continue;
1007                     }
1008                 } else {
1009                     continue;
1010                 }
1011                 if (m_Current.GetType() != e->GetType()) {
1012                     if (m_Flags & fEqualTypes) {
1013                         goto out;
1014                     }
1015                 } else if (m_Current.GetType() == CTarEntryInfo::eLink
1016                            &&  m_Current.GetLinkName() == e->GetLinkName()) {
1017                     goto out;
1018                 }
1019                 if (m_Current.GetModificationTime() <=
1020                     e->GetModificationTime()) {
1021                     update = false;  // same(or older), no update
1022                 }
1023                 break;
1024             }
1025         }
1026 
1027         if (!update  ||  (!found  &&  (m_Flags & (fUpdate & ~fOverwrite)))) {
1028             if (type != CDirEntry::eDir  &&  type != CDirEntry::eUnknown) {
1029                 goto out;
1030             }
1031             // Directories always get recursive treatment later
1032             update = false;
1033         }
1034     }
1035 #endif
1036 
1037     // Append the entry
1038 
1039     switch (type) {
1040     case CDirEntry::eFile:
1041         _ASSERT(update);
1042         if (x_AppendEntry(path, level)) {
1043             m_Modified = true;
1044             entries->push_back(m_Current);
1045         }
1046         break;
1047 
1048     case CDirEntry::eLink:
1049     case CDirEntry::eBlockSpecial:
1050     case CDirEntry::eCharSpecial:
1051     case CDirEntry::ePipe:
1052     case CDirEntry::eDoor:
1053     case CDirEntry::eSocket:
1054         _ASSERT(update);
1055         m_Current.m_Stat.st_size = 0;
1056         if (x_AppendEntry(path)) {
1057             entries->push_back(m_Current);
1058         }
1059         break;
1060 
1061     case CDirEntry::eDir:
1062         if (update  &&  m_Current.m_Name != ".") {
1063             // Add information about directory itself
1064             m_Current.m_Stat.st_size = 0;
1065             if (x_AppendEntry(path)) {
1066                 entries->push_back(m_Current);
1067             }
1068         }
1069         if (type == CDirEntry::eDir) {
1070             // Append/update all files from that directory
1071             CDir::TEntries dir = CDir(path).GetEntries("*", CDir::eIgnoreRecursive);
1072             ITERATE(CDir::TEntries, e, dir) {
1073                 unique_ptr<TEntries> add = x_Append((*e)->GetPath(), level, kEmptyStr, toc);
1074                 entries->splice(entries->end(), *add);
1075             }
1076         }
1077         break;
1078 
1079     default:
1080         _TROUBLE;
1081     }
1082  out:
1083     return entries;
1084 }
1085 
1086 
x_AppendEntry(const string & path,ELevel level)1087 bool CArchive::x_AppendEntry(const string& path, ELevel level)
1088 {
1089     // User callback
1090     if (!Checkpoint(m_Current, eAppend)) {
1091         return false;
1092     }
1093     AppendEntry(path, level);
1094     m_Modified = true;
1095     return true;
1096 }
1097 
1098 
1099 
1100 //////////////////////////////////////////////////////////////////////////////
1101 //
1102 // CArchiveFile
1103 //
1104 
CArchiveFile(EFormat format,const string & filename)1105 CArchiveFile::CArchiveFile(EFormat format, const string& filename)
1106     : CArchive(format)
1107 {
1108     // CArchive
1109     m_Location = IArchive::eFile;
1110     m_Flags    = fDefault;
1111     // CArchiveFile
1112     m_FileName = filename;
1113     return;
1114 }
1115 
1116 
Open(EAction action)1117 void CArchiveFile::Open(EAction action)
1118 {
1119     bool isread = (action & eRO) > 0;
1120     if (isread) {
1121         ARCHIVE->OpenFile(m_FileName);
1122     } else {
1123         ARCHIVE->CreateFile(m_FileName);
1124     }
1125     return;
1126 }
1127 
1128 
1129 
1130 //////////////////////////////////////////////////////////////////////////////
1131 //
1132 // CArchiveMemory
1133 //
1134 
CArchiveMemory(EFormat format,const void * ptr,size_t size)1135 CArchiveMemory::CArchiveMemory(EFormat format, const void* ptr, size_t size)
1136     : CArchive(format)
1137 {
1138     // CArchive
1139     m_Location = IArchive::eMemory;
1140     m_Flags    = fDefault;
1141     // CArchiveMemory
1142     m_Buf      = ptr;
1143     m_BufSize  = size;
1144     m_InitialAllocationSize = 0;
1145     return;
1146 }
1147 
1148 
Create(void)1149 void CArchiveMemory::Create(void)
1150 {
1151     Create(0);
1152 }
1153 
1154 
Create(size_t initial_allocation_size)1155 void CArchiveMemory::Create(size_t initial_allocation_size)
1156 {
1157     ARCHIVE_CHECK;
1158     m_InitialAllocationSize = initial_allocation_size;
1159     m_Buf = NULL;
1160     m_OwnBuf.reset();
1161     x_Open(eCreate);
1162     return;
1163 }
1164 
1165 
Open(EAction action)1166 void CArchiveMemory::Open(EAction action)
1167 {
1168     bool isread = (action & eRO) > 0;
1169     if (isread) {
1170         ARCHIVE->OpenMemory(m_Buf, m_BufSize);
1171     } else {
1172         ARCHIVE->CreateMemory(m_InitialAllocationSize);
1173     }
1174     return;
1175 }
1176 
1177 
Finalize(void ** buf_ptr,size_t * buf_size_ptr)1178 void CArchiveMemory::Finalize(void** buf_ptr, size_t* buf_size_ptr)
1179 {
1180     if (!buf_ptr  || !buf_size_ptr) {
1181         NCBI_THROW(CCoreException, eInvalidArg, "Bad memory buffer");
1182     }
1183     ARCHIVE_CHECK;
1184     ARCHIVE->FinalizeMemory(buf_ptr, buf_size_ptr);
1185     m_Buf     = *buf_ptr;
1186     m_BufSize = *buf_size_ptr;
1187     return;
1188 }
1189 
1190 
Save(const string & filename)1191 void CArchiveMemory::Save(const string& filename)
1192 {
1193     ARCHIVE_CHECK;
1194     if (!m_Buf || !m_BufSize) {
1195         NCBI_THROW(CCoreException, eInvalidArg, "Bad memory buffer");
1196     }
1197     CFileIO fio;
1198     fio.Open(filename, CFileIO::eCreate, CFileIO::eReadWrite);
1199     size_t n_written = fio.Write(m_Buf, m_BufSize);
1200     if (n_written != m_BufSize) {
1201         ARCHIVE_THROW(eWrite, "Failed to write archive to file");
1202     }
1203     fio.Close();
1204 }
1205 
1206 
Load(const string & filename)1207 void CArchiveMemory::Load(const string& filename)
1208 {
1209     // Close current archive, if any
1210     Close();
1211 
1212     // Get file size and allocate memory to load it
1213     CFile f(filename);
1214     Int8 filesize = f.GetLength();
1215     if (filesize < 0) {
1216         int x_errno = errno;
1217         ARCHIVE_THROW(eOpen, "Cannot get status of '" + filename + '\''+ s_OSReason(x_errno));
1218     }
1219     if (!filesize) {
1220         ARCHIVE_THROW(eOpen, "Cannot load empty file '" + filename + "' to memory");
1221     }
1222     AutoArray<char> tmp((size_t)filesize);
1223 
1224     // Read file into temporary buffer
1225     CFileIO fio;
1226     fio.Open(filename, CFileIO::eOpen, CFileIO::eRead);
1227     size_t n_read = fio.Read(tmp.get(), (size_t)filesize);
1228     if (n_read != (size_t)filesize) {
1229         ARCHIVE_THROW(eWrite, "Failed to load archive to memory");
1230     }
1231     fio.Close();
1232 
1233     // Set new buffer
1234     m_OwnBuf  = tmp;
1235     m_Buf     = m_OwnBuf.get();
1236     m_BufSize = n_read;
1237 }
1238 
1239 
1240 END_NCBI_SCOPE
1241