1 #ifndef UTIL_COMPRESS__ARCHIVE__HPP
2 #define UTIL_COMPRESS__ARCHIVE__HPP
3 
4 /* $Id: archive.hpp 534859 2017-05-03 12:47:35Z ivanov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors:  Vladimir Ivanov
30  *
31  * File Description:
32  *   Compression archive API.
33  *
34  */
35 
36 ///  @file archive.hpp
37 ///  Archive API.
38 
39 #include <util/compress/archive_.hpp>
40 
41 
42 /** @addtogroup Compression
43  *
44  * @{
45  */
46 
47 BEGIN_NCBI_SCOPE
48 
49 
50 // Forward declarations
51 class CArchiveZip;
52 
53 
54 //////////////////////////////////////////////////////////////////////////////
55 ///
56 /// CArchive - base class for file- or memory-based archives.
57 ///
58 /// Do not use it directly, use CArchiveFile or CArchiveMemory instead.
59 /// Throws exceptions on errors.
60 
61 class NCBI_XUTIL_EXPORT CArchive
62 {
63 public:
64     // Type definitions
65     typedef CCompression::ELevel ELevel;
66 
67     /// Archive formats
68     enum EFormat {
69         eZip
70     };
71 
72     /// General flags
73     enum EFlags {
74         // --- Extract --- (fUpdate also applies to Update)
75         /// Allow to overwrite destinations with entries from the archive
76         fOverwrite          = (1<<3),
77         /// Only update entries that are older than those already existing
78         fUpdate             = (1<<4) | fOverwrite,
79         /// Backup destinations if they exist (all entries including dirs)
80         fBackup             = (1<<5) | fOverwrite,
81         /// If destination entry exists, it must have the same type as source
82         fEqualTypes         = (1<<6),
83         /// Create extracted files with the original ownership
84         fPreserveOwner      = (1<<7),
85         /// Create extracted files with the original permissions
86         fPreserveMode       = (1<<8),
87         /// Preserve date/times for extracted files
88         /// Note, that some formats, as zip for example, store modification
89         /// time only, so creation and last access time will be the same as
90         /// modification time. And even it can be a little bit off due a
91         /// rounding errors.
92         fPreserveTime       = (1<<9),
93         /// Preserve all file attributes
94         fPreserveAll        = fPreserveOwner | fPreserveMode | fPreserveTime,
95         // --- Extract/Append/Update ---
96         /// Follow symbolic links (instead of storing/extracting them)
97         fFollowLinks        = (1<<2),
98         // --- Extract/List/Append/Update ---
99         /// Skip unsupported entries rather than making files out of them
100         /// when extracting (the latter is the default POSIX requirement).
101         /// On adding entry of unsupported type to the archive it will be
102         /// skipping also.
103         fSkipUnsupported    = (1<<15),
104         // --- Miscellaneous ---
105         /// Default flags
106         fDefault            = fOverwrite | fPreserveAll
107     };
108     typedef unsigned int TFlags;  ///< Bitwise OR of EFlags
109 
110     /// Define a list of entries.
111     typedef list<CArchiveEntryInfo> TEntries;
112 
113 
114     //------------------------------------------------------------------------
115     // Constructors
116     //------------------------------------------------------------------------
117 
118 protected:
119     /// Construct an archive object of specified format.
120     ///
121     /// Declared as protected to avoid direct usage.
122     /// Use derived classes CArchiveFile or CArchiveMemory instead.
123     /// @param format
124     ///   Archive format
125     /// @sa
126     ///   CArchiveFile, CArchiveMemory
127     CArchive(EFormat format);
128 
129 public:
130     /// Destructor
131     ///
132     /// Close the archive if currently open.
133     /// @sa
134     ///   Close
135     virtual ~CArchive(void);
136 
137     //------------------------------------------------------------------------
138     // Main functions
139     //------------------------------------------------------------------------
140 
141     /// Create a new empty archive.
142     ///
143     /// @sa
144     ///   Append
145     virtual void Create(void);
146 
147     /// Close the archive making sure all pending output is flushed.
148     ///
149     /// @sa
150     ///   ~CArchive
151     virtual void Close(void);
152 
153     /// Get information about archive entries.
154     ///
155     /// @return
156     ///   An array containing information on those archive entries, whose
157     ///   names match the preset mask if any, or all entries otherwise.
158     /// @sa
159     ///   SetMask
160     virtual unique_ptr<TEntries> List(void);
161 
162     /// Verify archive integrity.
163     ///
164     /// Read through the archive without actually extracting anything from it.
165     /// Test all archive entries, whose names match the preset mask.
166     /// @return
167     ///   An array containing information on those archive entries, whose
168     ///   names match the preset mask if any, or all entries otherwise.
169     /// @sa
170     ///   SetMask
171     virtual unique_ptr<TEntries> Test(void);
172 
173     /// Extract the entire archive.
174     ///
175     /// Extract all archive entries, whose names match the preset mask.
176     /// Entries will be extracted into either current directory or
177     /// a directory otherwise specified by SetBaseDir().
178     /// @return
179     ///   A list of entries that have been actually extracted.
180     /// @sa
181     ///   SetMask, SetBaseDir
182     virtual unique_ptr<TEntries> Extract(void);
183 
184     /// Extract single file entry to a memory buffer.
185     ///
186     /// @param info
187     ///   [in]  Entry to extract.
188     /// @param buf
189     ///   [in]  Memory buffer for extracted data.
190     /// @param buf_size
191     ///   [in]  Size of memory buffer.
192     /// @param out_size
193     ///   [out] Size of extracted data in the buffer.
194     /// @note
195     ///   The buffer size should be big enough to fit whole extracted file.
196     /// @sa
197     ///   ExtractFileToHeap, EctractFileToCallback, CArchiveEntryInfo::GetSize, List
198     virtual void ExtractFileToMemory(const CArchiveEntryInfo& info,
199                                      void* buf, size_t buf_size,
200                                      size_t* /*out*/ out_size);
201 
202     /// Extract single file entry to a dynamically allocated memory buffer.
203     ///
204     /// @param info
205     ///   [in]  Entry to extract.
206     /// @param buf_ptr
207     ///   [out] Pointer to an allocated memory buffer.
208     /// @param buf_size_ptr
209     ///   [out] Size of allocated memory buffer, it is equal to the size of extracted data.
210     /// @note
211     ///   Do not forget to deallocate memory buffer after usage.
212     ///   Use free() or AutoPtr<char, CDeleter<char>>.
213     /// @sa
214     ///   ExtractFileToMemory, EctractFileToCallback, CArchiveEntryInfo::GetSize, List
215     virtual void ExtractFileToHeap(const CArchiveEntryInfo& info,
216                                    void** buf_ptr, size_t* buf_size_ptr);
217 
218     /// Extract single file entry using user-defined callback.
219     ///
220     /// @param info
221     ///   [in] Entry to extract.
222     /// @param callback
223     ///   [in] User callback for processing extracted data on the fly.
224     /// @sa
225     ///   ExtractFileToMemory, EctractFileToHeap, CArchiveEntryInfo::GetSize, List
226     virtual void ExtractFileToCallback(const CArchiveEntryInfo& info,
227                                        IArchive::Callback_Write callback);
228 
229     /// Append an entry to the archive.
230     ///
231     /// Appended entry can be either a file, directory, symbolic link or etc.
232     /// Each archive format have its own list of supported entry types.
233     /// The name is taken with respect to the base directory, if any set.
234     /// Adding a directory results in all its files and subdirectories to
235     /// get added (examine the return value to find out what has been added).
236     /// The names of all appended entries will be converted to Unix format
237     /// (only forward slashes in the paths, and drive letter, if any on
238     /// MS-Windows, stripped).
239     /// @param path
240     ///   Path to appended entry.
241     /// @param level
242     ///   Compression level (if selected format support it, or default).
243     /// @param comment
244     ///   Optional comment for the appended entry (if selected format support it).
245     ///   For directories the comment will be added to upper directory only.
246     /// @return
247     ///   A list of entries appended.
248     /// @note
249     ///   On the current moment you can use this method to add files to newly
250     ///   created archive only, modifying existed archive is not allowed.
251     /// @sa
252     ///   Create, AppendFileFromMemory, SetBaseDir, HaveSupport, Update
253     virtual unique_ptr<TEntries> Append(const string& path,
254                                         ELevel level = CCompression::eLevel_Default,
255                                         const string& comment = kEmptyStr);
256 
257     /// Append a single file entry to the created archive using data from memory buffer.
258     ///
259     /// These function assign the current local time to added entry.
260     /// @param name_in_archive
261     ///   Name of the file entry in the archive. You can use any symbols,
262     ///   that allowed for file names. Also, you can use relative path here,
263     ///   if you want to create some structure in the archive and put the data
264     ///   to a file in the subdirectory.
265     /// @param buf
266     ///   Buffer with data to add.
267     /// @param buf_size
268     ///   Size of data in the buffer.
269     /// @param level
270     ///   Compression level (if selected format support it, or default).
271     /// @param comment
272     ///   Optional comment for the appended entry (if selected format support it).
273     /// @return
274     ///   An information about added entry.
275     /// @note
276     ///   On the current moment you can use this method to add files to newly
277     ///   created archive only, modification existing archive is not allowed.
278     /// @sa
279     ///   Create, Append
280     virtual unique_ptr<CArchive::TEntries>
281         AppendFileFromMemory(const string& name_in_archive,
282                              void* buf, size_t buf_size,
283                              ELevel level = CCompression::eLevel_Default,
284                              const string& comment = kEmptyStr);
285 
286     //------------------------------------------------------------------------
287     // Utility functions
288     //------------------------------------------------------------------------
289 
290     /// Get flags.
GetFlags(void) const291     virtual TFlags GetFlags(void) const { return m_Flags; }
292     /// Set flags.
SetFlags(TFlags flags)293     virtual void   SetFlags(TFlags flags) { m_Flags = flags; }
294 
295     /// Get base directory to use for files while extracting from/adding to
296     /// the archive, and in the latter case used only for relative paths.
297     /// @sa
298     ///   SetBaseDir
GetBaseDir(void) const299     virtual const string& GetBaseDir(void) const { return m_BaseDir; }
300 
301     /// Set base directory to use for files while extracting from/adding to
302     /// the archive, and in the latter case used only for relative paths.
303     /// @sa
304     ///   GetBaseDir
305     virtual void SetBaseDir(const string& dirname);
306 
307     /// Mask type enumerator.
308     /// @enum eFullPathMask
309     ///   CMask can select both inclusions and exclusions (in this order) of
310     ///   fully-qualified archive entries. Whole entry name will be matched.
311     ///   It always use Unix format for path matching, so please use forward
312     ///   slash "/" as directory delimiter in the masks.
313     /// @enum ePatternMask
314     ///   CMask can select both inclusions and exclusions (in this order) of
315     ///   patterns of archive entries. If eFullMask use the full path
316     ///   to match mask, that ePatternMask allow to match each path component,
317     ///   including names for each subdirectory and/or file name. This type
318     ///   of mask is used only if eFullMask matches or not specified.
319     enum EMaskType {
320         eFullPathMask,
321         ePatternMask
322     };
323 
324     /// Set name mask for processing.
325     ///
326     /// The set of masks is used to process existing entries in the archive,
327     /// and apply to list, extract and append operations.
328     /// If masks are not defined then all archive entries will be processed.
329     /// Each "mask" is a set of inclusion and exclusion patterns, each of them
330     /// can be a wildcard file mask or regular expression.
331     /// @param mask
332     ///   Set of masks (NULL unset the current set without setting a new one).
333     /// @param own
334     ///   Whether to take ownership on the mask (delete upon CArchive destruction).
335     /// @param type
336     ///   Type of the mask. You can set two types of masks at the same time.
337     ///   The mask with type eFullPathMask applies to whole path name.
338     ///   The mask with type ePatternMask applies to each path component, to all
339     ///   subdirectories or file name, and if one of them matches, the entry
340     ///   will be processed. If masks for both types are set, the entry will
341     ///   be processed if it matches for each of them.
342     /// @sa
343     ///    UnsetMask, CMaskFileName, CMaskRegexp
344     /// @note
345     ///   Unset mask means wildcard processing (all entries match).
346     void SetMask(CMask*      mask,
347                  EOwnership  own   = eNoOwnership,
348                  EMaskType   type  = eFullPathMask,
349                  NStr::ECase acase = NStr::eNocase);
350 
351     /// Unset name mask for processing.
352     ///
353     /// @sa
354     ///    SetMask
355     /// @note
356     ///   Unset mask means wildcard processing (all entries match).
357     void UnsetMask(EMaskType type);
358     void UnsetMask(void);
359 
360     /// Support check enumerator.
361     ///
362     /// Use HaveSupport() to check that current archive format have support for
363     /// specific feature.
364     /// @enum eType
365     ///    Check that archive can store entries with specific directory entry type.
366     /// @enum eAbsolutePath
367     ///    Archive can store full absolute path entries. Otherwise they will
368     ///    be converted to relative path from root directory.
369     /// @sa HaveSupport
370     enum ESupport {
371         eType,
372         eAbsolutePath
373     };
374 
375     /// Check that current archive format have support for specific features.
376     ///
377     /// @param feature
378     ///   Name of the feature to check.
379     /// @param param
380     ///   Additional parameter (for eType only).
381     /// @sa ESupport
382     bool HaveSupport(ESupport feature, int param = 0);
383 
384 protected:
385     /// Archive open mode
386     enum EOpenMode {
387         eNone = 0,
388         eRO   = 1,
389         eWO   = 2,
390         eRW   = eRO | eWO
391     };
392 
393     /// Action, performed on the archive
394     enum EAction {
395         eUndefined =  eNone,
396         eCreate    = (1 <<  8) | eWO,
397         eAppend    = (1 <<  9) | eWO,
398         eList      = (1 << 10) | eRO,
399         eUpdate    = eList | eAppend,
400         eExtract   = (1 << 11) | eRO,
401         eTest      = eList | eExtract
402     };
403 
404     /// Mask storage
405     struct SMask {
406         CMask*       mask;
407         NStr::ECase  acase;
408         EOwnership   owned;
SMaskCArchive::SMask409         SMask(void)
410             : mask(0), acase(NStr::eNocase), owned(eNoOwnership)
411         {}
412     };
413 
414 protected:
415     //------------------------------------------------------------------------
416     // User-redefinable callback
417     //------------------------------------------------------------------------
418 
419     /// Return false to skip the current entry when processing.
420     ///
421     /// Note that the callback can encounter multiple entries of the same file
422     /// in case the archive has been updated (so only the last occurrence is
423     /// the actual copy of the file when extracted).
Checkpoint(const CArchiveEntryInfo &,EAction)424     virtual bool Checkpoint(const CArchiveEntryInfo& /*current*/, EAction /*action*/)
425         { return true; }
426 
427 protected:
428     //------------------------------------------------------------------------
429     // Redefinable methods for inherited classes
430     //------------------------------------------------------------------------
431 
432     /// Open archive.
433     virtual void Open(EAction action) = 0;
434     /// Process current entry (List/Test/Extract/Append)
435     virtual void SkipEntry   (void);
436     virtual void TestEntry   (void);
437     virtual void ExtractEntry(const CDirEntry& dst);
438     virtual void AppendEntry (const string& path, ELevel level);
439 
440 protected:
441     //------------------------------------------------------------------------
442     // Internal processing methods
443     //------------------------------------------------------------------------
444 
445     // Open archive.
446     // Wrapper around Open() that perform all necessary checks and processing.
447     void x_Open(EAction action);
448 
449     // Read the archive and do the requested "action" on current entry.
450     unique_ptr<TEntries> x_ReadAndProcess(EAction action);
451 
452     // Append an entry from the file system to the archive.
453     unique_ptr<TEntries> x_Append(const string&   path,
454                                   ELevel          level,
455                                   const string&   comment,
456                                   const TEntries* toc = NULL);
457     // Append a single entry from the file system to the archive.
458     // Wrapper around AppendEntry().
459     // Return FALSE if entry should be skipped (via user Checkpoint()).
460     bool x_AppendEntry(const string& path, ELevel level = CCompression::eLevel_Default);
461 
462     // Extract current entry.
463     // Wrapper around ExtractEntry() that perform all necessary checks and
464     // flags processing.
465     void x_ExtractEntry(const TEntries* prev_entries);
466 
467     // Restore attributes of an entry in the file system.
468     // If "dst" is not specified, then the destination path will be
469     // constructed from "info", and the base directory (if any).  Otherwise,
470     // "dst" will be used "as is", assuming it corresponds to "info".
471     void x_RestoreAttrs(const CArchiveEntryInfo& info,
472                         const CDirEntry*         dst = NULL) const;
473 
474 protected:
475     unique_ptr<IArchive>  m_Archive;       ///< Pointer to interface to EFormat-specific archive support
476     EFormat               m_Format;        ///< Archive format
477     IArchive::ELocation   m_Location;      ///< Archive location (file/memory)
478     TFlags                m_Flags;         ///< Bitwise OR of flags
479     string                m_BaseDir;       ///< Base directory for relative paths
480     CArchiveEntryInfo     m_Current;       ///< Information about current entry being processed
481     SMask                 m_MaskFullPath;  ///< Set of masks for operations (full path)
482     SMask                 m_MaskPattern;   ///< Set of masks for operations (path components)
483     EOpenMode             m_OpenMode;      ///< What was it opened for
484     bool                  m_Modified;      ///< True after at least one write
485 
486 protected:
487     // Prohibit assignment and copy
488     CArchive& operator=(const CArchive&);
489     CArchive(const CArchive&);
490 };
491 
492 
493 
494 //////////////////////////////////////////////////////////////////////////////
495 ///
496 /// CArchiveFile -- file-based archive.
497 ///
498 /// Throws exceptions on errors.
499 
500 class NCBI_XUTIL_EXPORT CArchiveFile : public CArchive
501 {
502 public:
503     /// Constructor for file-based archive.
504     ///
505     /// @param format
506     ///   Archive format.
507     /// @param filename
508     ///   Path to archive file name.
509     ///   Note, that directory in that archive file will be create should exists.
510     /// @sa
511     ///   Create, Extract, List, Test, Append
512     CArchiveFile(EFormat format, const string& filename);
513 
514 protected:
515     /// Open the archive for specified action.
516     virtual void Open(EAction action);
517 
518 protected:
519     string m_FileName;   ///< Archive file name
520 
521 private:
522     // Prohibit assignment and copy
523     CArchiveFile& operator=(const CArchiveFile&);
524     CArchiveFile(const CArchiveFile&);
525 };
526 
527 
528 
529 //////////////////////////////////////////////////////////////////////////////
530 ///
531 /// CArchiveMemory -- memory-based archive.
532 ///
533 /// Throws exceptions on errors.
534 
535 class NCBI_XUTIL_EXPORT CArchiveMemory : public CArchive
536 {
537 public:
538     /// Constructor for memory-based archive.
539     ///
540     /// @param format
541     ///   Archive format.
542     /// @param buf
543     ///   Pointer to an archive located in memory. Used only to open already
544     ///   existed archive for reading. Never used if you would like to create
545     ///   new archive, see Create().
546     /// @param buf_size
547     ///   Size of the archive.
548     /// @sa
549     ///   Create, Extract, List, Test, Append
550     CArchiveMemory(EFormat format, const void* buf = NULL, size_t buf_size = 0);
551 
552     /// Create a new empty archive in memory.
553     ///
554     /// @param initial_allocation_size
555     ///   Estimated size of the archive, if known.
556     ///   Bigger size allow to avoid extra memory reallocations.
557     /// @sa
558     ///   Append, Finalize
559     virtual void Create(size_t initial_allocation_size);
560     virtual void Create(void);
561 
562     /// Save current opened/created archive to file.
563     ///
564     /// @param filename
565     ///   Path to the archive file name. The directory in that archive
566     ///   file will be create should exists. If destination file
567     ///   already exists, it will be overwritten.
568     /// @note
569     ///   Newly created archive should be finalized first.
570     /// @sa
571     ///   Create, Finalize, Load
572     void Save(const string& filename);
573 
574     /// Load existing archive from file system to memory.
575     ///
576     /// @param filename
577     ///   Path to the existing archive.
578     /// @note
579     ///   If you have opened or created archive, it will be automatically closed.
580     /// @sa
581     ///   Open, Save
582     void Load(const string& filename);
583 
584     /// Finalize the archive created in memory.
585     ///
586     /// Return pointer to a buffer with created archive and its size.
587     /// After this call you cannot write to archive anymore, but you can
588     /// read from it. Returning pointer to buffer and its size also
589     /// will be saved internally and used for opening archive for reading
590     /// (see constructor).
591     /// @param buf_ptr
592     ///   Pointer to an archive located in memory.
593     /// @param buf_size_ptr
594     ///   Size of the newly created archive.
595     /// @note
596     ///   Do not forget to deallocate memory buffer after usage.
597     ///   Use free() or AutoPtr<char, CDeleter<char>>.
598     /// @sa
599     ///   Create, Close
600     virtual void Finalize(void** buf_ptr, size_t* buf_size_ptr);
601 
602 protected:
603     /// Open the archive for specified action.
604     virtual void Open(EAction action);
605 
606 protected:
607     // Open
608     const void* m_Buf;         ///< Buffer where the opening archive is located
609     size_t      m_BufSize;     ///< Size of m_Buf
610     /// Holder for the pointer to memory buffer that will be automatically
611     /// deallocated if we own it (used for Load() only).
612     /// m_Buf will have the same pointer value.
613     AutoArray<char> m_OwnBuf;
614     // Create
615     ///< Initial allocation size for created archive
616     size_t m_InitialAllocationSize;
617 
618 private:
619     // Prohibit assignment and copy
620     CArchiveMemory& operator=(const CArchiveMemory&);
621     CArchiveMemory(const CArchiveMemory&);
622 };
623 
624 
625 END_NCBI_SCOPE
626 
627 
628 /* @} */
629 
630 
631 #endif  /* UTIL_COMPRESS__ARCHIVE__HPP */
632