1 #ifndef UTIL_COMPRESS__TAR__HPP
2 #define UTIL_COMPRESS__TAR__HPP
3 
4 /* $Id: tar.hpp 587245 2019-05-31 23:39:51Z lavr $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors:  Vladimir Ivanov
30  *           Anton Lavrentiev
31  *
32  * File Description:
33  *   Tar archive API
34  */
35 
36 ///  @file
37 ///  Tar archive API.
38 ///
39 ///  Supports subsets of POSIX.1-1988 (ustar), POSIX 1003.1-2001 (posix), old
40 ///  GNU (POSIX 1003.1), and V7 formats (all partially but reasonably).  New
41 ///  archives are created using POSIX (genuine ustar) format, using GNU
42 ///  extensions for long names/links only when unavoidable.  It cannot,
43 ///  however, handle all the exotics like sparse files (except for GNU/1.0
44 ///  sparse PAX extension) and contiguous files (yet still can work around both
45 ///  of them gracefully, if needed), multivolume / incremental archives, etc.
46 ///  but just regular files, devices (character or block), FIFOs, directories,
47 ///  and limited links:  can extract both hard- and symlinks, but can store
48 ///  symlinks only.  Also, this implementation is only minimally PAX(Portable
49 ///  Archive eXchange)-aware for file extractions (and does not yet use any PAX
50 ///  extensions to store the files).
51 ///
52 
53 #include <corelib/ncbifile.hpp>
54 #include <utility>
55 
56 
57 /** @addtogroup Compression
58  *
59  * @{
60  */
61 
62 
63 BEGIN_NCBI_SCOPE
64 
65 
66 /////////////////////////////////////////////////////////////////////////////
67 ///
68 /// TTarMode --
69 ///
70 /// Permission bits as defined in tar
71 ///
72 
73 enum ETarModeBits {
74     // Special mode bits
75     fTarSetUID   = 04000,       ///< set UID on execution
76     fTarSetGID   = 02000,       ///< set GID on execution
77     fTarSticky   = 01000,       ///< reserved (sticky bit)
78     // File permissions
79     fTarURead    = 00400,       ///< read by owner
80     fTarUWrite   = 00200,       ///< write by owner
81     fTarUExecute = 00100,       ///< execute/search by owner
82     fTarGRead    = 00040,       ///< read by group
83     fTarGWrite   = 00020,       ///< write by group
84     fTarGExecute = 00010,       ///< execute/search by group
85     fTarORead    = 00004,       ///< read by other
86     fTarOWrite   = 00002,       ///< write by other
87     fTarOExecute = 00001        ///< execute/search by other
88 };
89 typedef unsigned int TTarMode;  ///< Bitwise OR of ETarModeBits
90 
91 
92 /////////////////////////////////////////////////////////////////////////////
93 ///
94 /// CTarException --
95 ///
96 /// Define exceptions generated by the API.
97 /// Exception text may include detailed dump of a tar header (when appropriate)
98 /// if fDumpEntryHeaders is set in the archive flags.
99 ///
100 /// CTarException inherits its basic functionality from CCoreException
101 /// and defines additional error codes for tar archive operations.
102 ///
103 /// @sa
104 ///   CTar::SetFlags
105 
106 class NCBI_XUTIL_EXPORT CTarException : public CCoreException
107 {
108 public:
109     /// Error types that file operations can generate.
110     enum EErrCode {
111         eUnsupportedTarFormat,
112         eUnsupportedEntryType,
113         eUnsupportedSource,
114         eNameTooLong,
115         eChecksum,
116         eBadName,
117         eCreate,
118         eOpen,
119         eRead,
120         eWrite,
121         eBackup,
122         eMemory,
123         eRestoreAttrs
124     };
125 
126     /// Translate from an error code value to its string representation.
GetErrCodeString(void) const127     virtual const char* GetErrCodeString(void) const override
128     {
129         switch (GetErrCode()) {
130         case eUnsupportedTarFormat: return "eUnsupportedTarFormat";
131         case eUnsupportedEntryType: return "eUnsupportedEntryType";
132         case eUnsupportedSource:    return "eUnsupportedSource";
133         case eNameTooLong:          return "eNameTooLong";
134         case eChecksum:             return "eChecksum";
135         case eBadName:              return "eBadName";
136         case eCreate:               return "eCreate";
137         case eOpen:                 return "eOpen";
138         case eRead:                 return "eRead";
139         case eWrite:                return "eWrite";
140         case eBackup:               return "eBackup";
141         case eMemory:               return "eMemory";
142         case eRestoreAttrs:         return "eRestoreAttrs";
143         default:                    return CException::GetErrCodeString();
144         }
145     }
146 
147     // Standard exception boilerplate code.
148     NCBI_EXCEPTION_DEFAULT(CTarException, CCoreException);
149 };
150 
151 
152 //////////////////////////////////////////////////////////////////////////////
153 ///
154 /// CTarEntryInfo class
155 ///
156 /// Information about a tar archive entry.
157 
158 class NCBI_XUTIL_EXPORT CTarEntryInfo
159 {
160 public:
161     /// Archive entry type.
162     enum EType {
163         eFile        = CDirEntry::eFile,         ///< Regular file
164         eDir         = CDirEntry::eDir,          ///< Directory
165         eSymLink     = CDirEntry::eSymLink,      ///< Symbolic link
166         ePipe        = CDirEntry::ePipe,         ///< Pipe (FIFO)
167         eCharDev     = CDirEntry::eCharSpecial,  ///< Character device
168         eBlockDev    = CDirEntry::eBlockSpecial, ///< Block device
169         eUnknown     = CDirEntry::eUnknown,      ///< Unknown type
170         eHardLink,                               ///< Hard link
171         eVolHeader,                              ///< Volume header
172         ePAXHeader,                              ///< PAX extended header
173         eSparseFile,                             ///< GNU/STAR sparse file
174         eGNULongName,                            ///< GNU long name
175         eGNULongLink                             ///< GNU long link
176     };
177 
178     /// Position type.
179     enum EPos {
180         ePos_Header,
181         ePos_Data
182     };
183 
184     // No setters -- they are not needed for access by the user, and thus are
185     // done directly from CTar for the sake of performance and code clarity.
186 
187     // Getters only!
GetType(void) const188     EType         GetType(void)              const { return m_Type;      }
GetName(void) const189     const string& GetName(void)              const { return m_Name;      }
GetLinkName(void) const190     const string& GetLinkName(void)          const { return m_LinkName;  }
GetUserName(void) const191     const string& GetUserName(void)          const { return m_UserName;  }
GetGroupName(void) const192     const string& GetGroupName(void)         const { return m_GroupName; }
GetModificationTime(void) const193     time_t        GetModificationTime(void)  const
194     { return m_Stat.orig.st_mtime; }
GetModificationCTime(void) const195     CTime         GetModificationCTime(void) const
196     { CTime mtime(m_Stat.orig.st_mtime);
197       mtime.SetNanoSecond(m_Stat.mtime_nsec);
198       return mtime;                }
GetLastAccessTime(void) const199     time_t        GetLastAccessTime(void)    const
200     { return m_Stat.orig.st_atime; }
GetLastAccessCTime(void) const201     CTime         GetLastAccessCTime(void)   const
202     { CTime atime(m_Stat.orig.st_atime);
203       atime.SetNanoSecond(m_Stat.atime_nsec);
204       return atime;                }
GetCreationTime(void) const205     time_t        GetCreationTime(void)      const
206     { return m_Stat.orig.st_ctime; }
GetCreationCTime(void) const207     CTime         GetCreationCTime(void)     const
208     { CTime ctime(m_Stat.orig.st_ctime);
209       ctime.SetNanoSecond(m_Stat.ctime_nsec);
210       return ctime;                }
GetSize(void) const211     Uint8         GetSize(void)              const
212     { return m_Stat.orig.st_size;  }
213     TTarMode      GetMode(void)              const;// Raw mode as stored in tar
214     void          GetMode(CDirEntry::TMode*            user_mode,
215                           CDirEntry::TMode*            group_mode   = 0,
216                           CDirEntry::TMode*            other_mode   = 0,
217                           CDirEntry::TSpecialModeBits* special_bits = 0) const;
218     unsigned int  GetMajor(void)             const;
219     unsigned int  GetMinor(void)             const;
GetUserId(void) const220     unsigned int  GetUserId(void)            const
221     { return m_Stat.orig.st_uid;   }
GetGroupId(void) const222     unsigned int  GetGroupId(void)           const
223     { return m_Stat.orig.st_gid;   }
GetPosition(EPos which) const224     Uint8         GetPosition(EPos which)    const
225     { return which == ePos_Header ? m_Pos : m_Pos + m_HeaderSize; }
226 
227     // Comparison operator.
operator ==(const CTarEntryInfo & info) const228     bool operator == (const CTarEntryInfo& info) const
229     { return (m_Type       == info.m_Type                        &&
230               m_Name       == info.m_Name                        &&
231               m_LinkName   == info.m_LinkName                    &&
232               m_UserName   == info.m_UserName                    &&
233               m_GroupName  == info.m_GroupName                   &&
234               m_HeaderSize == info.m_HeaderSize                  &&
235               memcmp(&m_Stat,&info.m_Stat, sizeof(m_Stat)) == 0  &&
236               m_Pos        == info.m_Pos ? true : false);         }
237 
238 protected:
239     // Constructor.
CTarEntryInfo(Uint8 pos=0)240     CTarEntryInfo(Uint8 pos = 0)
241         : m_Type(eUnknown), m_HeaderSize(0), m_Pos(pos)
242     { memset(&m_Stat, 0, sizeof(m_Stat));                         }
243 
244     EType            m_Type;       ///< Type
245     string           m_Name;       ///< Entry name
246     string           m_LinkName;   ///< Link name if type is e{Sym|Hard}Link
247     string           m_UserName;   ///< User name
248     string           m_GroupName;  ///< Group name
249     streamsize       m_HeaderSize; ///< Total size of all headers for the entry
250     CDirEntry::SStat m_Stat;       ///< Direntry-compatible info
251     Uint8            m_Pos;        ///< Entry (not data!) position in archive
252 
253     friend class CTar;             // Setter
254 };
255 
256 
257 /// User-creatable info for streaming into a tar.
258 /// Since the entry info is built largerly incomplete, all getters have been
259 /// disabled;  should some be needed they could be brought back by subclassing
260 /// and redeclaring the necessary one(s) in the public part of the new class.
261 class CTarUserEntryInfo : protected CTarEntryInfo
262 {
263 public:
CTarUserEntryInfo(const string & name,Uint8 size)264     CTarUserEntryInfo(const string& name, Uint8 size)
265     {
266         m_Name              = name;
267         m_Stat.orig.st_size = size;
268     }
269 
270     friend class CTar;             // Accessor
271 };
272 
273 
274 /// Nice TOC(table of contents) printout.
275 NCBI_XUTIL_EXPORT ostream& operator << (ostream&, const CTarEntryInfo&);
276 
277 
278 /// Forward declaration of a tar header used internally.
279 struct STarHeader;
280 
281 
282 //////////////////////////////////////////////////////////////////////////////
283 ///
284 /// CTar class
285 ///
286 /// (Throws exceptions on most errors.)
287 /// Note that if stream constructor is used, then CTar can only perform one
288 /// pass over the archive.  This means that only one full action will succeed
289 /// (and if the action was to update -- e.g. append -- the archive, it has to
290 /// be explicitly followed by Close() when no more appends are expected).
291 /// Before the next read/update action, the stream position has to be reset
292 /// explicitly to the beginning of the archive, or it may also remain at the
293 /// end of the archive for a series of successive append operations.
294 
295 class NCBI_XUTIL_EXPORT CTar
296 {
297 public:
298     /// General flags
299     enum EFlags {
300         // --- Extract/List/Test ---
301         /// Ignore blocks of zeros in archive.
302         //  Generally, 2 or more consecutive zero blocks indicate EOT.
303         fIgnoreZeroBlocks   = (1<<1),
304 
305         // --- Extract/Append/Update ---
306         /// Follow symbolic links (instead of storing/extracting them)
307         fFollowLinks        = (1<<2),
308 
309         // --- Extract --- (NB: fUpdate also applies to Update)
310         /// Allow to overwrite destinations with entries from the archive
311         fOverwrite          = (1<<3),
312         /// Only update entries that are older than those already existing
313         fUpdate             = (1<<4) | fOverwrite,
314         /// Backup destinations if they exist (all entries including dirs)
315         fBackup             = (1<<5) | fOverwrite,
316         /// If destination entry exists, it must have the same type as source
317         fEqualTypes         = (1<<6),
318         /// Create extracted files with the original ownership
319         fPreserveOwner      = (1<<7),
320         /// Create extracted files with the original permissions
321         fPreserveMode       = (1<<8),
322         /// Preserve date/times for extracted files
323         fPreserveTime       = (1<<9),
324         /// Preserve all file attributes
325         fPreserveAll        = fPreserveOwner | fPreserveMode | fPreserveTime,
326         /// Preserve absolute path instead of stripping the leadind slash('/')
327         fKeepAbsolutePath   = (1<<12),
328         /// Do not extract PAX GNU/1.0 sparse files (treat 'em as unsupported)
329         fSparseUnsupported  = (1<<13),
330 
331         // --- Extract/List ---
332         /// Skip unsupported entries rather than make files out of them when
333         /// extracting (the latter is the default behavior required by POSIX)
334         fSkipUnsupported    = (1<<15),
335 
336         // --- Append ---
337         /// Ignore unreadable files/dirs (still warn them, but don't stop)
338         fIgnoreUnreadable   = (1<<17),
339         /// Always use OldGNU headers for long names (default:only when needed)
340         fLongNameSupplement = (1<<18),
341 
342         // --- Debugging ---
343         fDumpEntryHeaders   = (1<<20),
344         fSlowSkipWithRead   = (1<<21),
345 
346         // --- Miscellaneous ---
347         /// Stream tar data through
348         fStreamPipeThrough  = (1<<24),
349         /// Do not trim tar file size after append/update
350         fTarfileNoTruncate  = (1<<26),
351         /// Suppress NCBI signatures in entry headers
352         fStandardHeaderOnly = (1<<28),
353 
354         /// Default flags
355         fDefault            = fOverwrite | fPreserveAll
356     };
357     typedef unsigned int TFlags;  ///< Bitwise OR of EFlags
358 
359     /// Mask type enumerator.
360     /// @enum eExtractMask
361     ///   CMask can select both inclusions and exclusions (in this order) of
362     ///   fully-qualified archive entries for listing or extraction, so that
363     ///   e.g. ".svn" does not match an entry like "a/.svn" for processing.
364     /// @enum eExcludeMask
365     ///   CMask can select both exclusions and inclusions (in this order) of
366     ///   patterns of the archive entries for all operations (excepting eTest),
367     ///   and so that ".svn" matches "a/b/c/.svn".
368     enum EMaskType {
369         eExtractMask = 0,  ///< exact for list or extract
370         eExcludeMask       ///< pattern for all but test
371     };
372 
373     /// Constructors
374     CTar(const string& filename, size_t blocking_factor = 20);
375     /// Stream version does not at all use stream positioning and so is safe on
376     /// non-positionable streams, like pipes/sockets (or magnetic tapes :-I).
377     CTar(CNcbiIos& stream, size_t blocking_factor = 20);
378 
379     /// Destructor (finalize the archive if currently open).
380     /// @sa
381     ///   Close
382     virtual ~CTar();
383 
384 
385     /// Define a list of entries.
386     typedef list<CTarEntryInfo> TEntries;
387 
388     /// Define a list of files with sizes (directories and specials, such as
389     /// devices, must be given with sizes of 0;  symlinks -- with the sizes
390     /// of the names they are linking to).
391     typedef pair<string, Uint8> TFile;
392     typedef list<TFile>         TFiles;
393 
394 
395     //------------------------------------------------------------------------
396     // Main functions
397     //------------------------------------------------------------------------
398 
399     /// Create a new empty archive.
400     ///
401     /// If a file with such a name already exists it will be overwritten.
402     /// @sa
403     ///   Append
404     void Create(void);
405 
406     /// Close the archive making sure all pending output is flushed.
407     ///
408     /// Normally, direct call of this method need _not_ intersperse successive
409     /// archive manipulations by other methods, as they open and close the
410     /// archive automagically as needed.  Rather, this call is to make sure the
411     /// archive is complete earlier than it otherwise usually be done
412     /// automatically in the destructor of the CTar object.
413     /// @sa
414     ///   ~CTar
415     void Close(void);
416 
417     /// Append an entry at the end of the archive that already exists.
418     ///
419     /// Appended entry can be either a file, a directory, a symbolic link,
420     /// a device special file (block or character), or a FIFO special file,
421     /// subject to any exclusions as set by SetMask() with eExcludeMask.
422     /// The name is taken with respect to the base directory, if any set.
423     ///
424     /// Adding a directory results in all its files and subdirectories (subject
425     //  for the exclusion mask) to get added: examine the return value to find
426     /// out what has been added.
427     ///
428     /// Note that the final name of an entry may not contain embedded '..'.
429     /// Leading slash in the absolute paths will be retained.  The names of
430     /// all appended entries will be converted to Unix format (that is, to
431     /// have only forward slashes in the paths, and drive letter, if any on
432     /// MS-Windows, stripped).  All entries will be added at the logical end
433     /// (not always EOF) of the archive, when appending to a non-empty one.
434     ///
435     /// @note Adding to a stream archive does not seek to the logical end of
436     /// the archive but begins at the current position right away.
437     ///
438     /// @return
439     ///   A list of entries appended.
440     /// @sa
441     ///   Create, Update, SetBaseDir, SetMask
442     unique_ptr<TEntries> Append(const string& name);
443 
444     /// Append an entry from a stream (exactly entry.GetSize() bytes).
445     /// @note
446     ///   Name masks (if any set with SetMask()) are all ignored.
447     /// @return
448     ///   A list (containing this one entry) with full archive info filled in
449     /// @sa
450     ///   Append
451     unique_ptr<TEntries> Append(const CTarUserEntryInfo& entry,
452                                 CNcbiIstream& is);
453 
454     /// Look whether more recent copies of the archive members are available in
455     /// the file system, and if so, append them to the archive:
456     ///
457     /// - if fUpdate is set in processing flags, only the existing archive
458     /// entries (including directories) will be updated;  that is, Update(".")
459     /// won't recursively add "." if "." is not an archive member;  it will,
460     /// however, do the recursive update should "." be found in the archive;
461     ///
462     /// - if fUpdate is unset, the existing entries will be updated (if their
463     /// file system counterparts are newer), and nonexistent entries will be
464     /// added to the archive;  that is, Update(".") will recursively scan "."
465     /// to update both existing entries (if newer files found), and also add
466     /// new entries for any files/directories, which are currently not in.
467     ///
468     /// @note Updating stream archive may (and most certainly will) cause
469     /// zero-filled gaps in the archive (can be read with "ignore zeroes").
470     ///
471     /// @return
472     ///   A list of entries that have been updated.
473     /// @sa
474     ///   Append, SetBaseDir, SetMask, SetFlags
475     unique_ptr<TEntries> Update(const string& name);
476 
477     /// Extract the entire archive (into either current directory or a
478     /// directory otherwise specified by SetBaseDir()).
479     ///
480     /// If the same-named files exist, they will be replaced (subject to
481     /// fOverwrite) or backed up (fBackup), unless fUpdate is set, which would
482     /// cause the replacement / backup only if the files are older than the
483     /// archive entries.  Note that if fOverwrite is stripped, no matching
484     /// files will be updated / backed up / overwritten, but skipped.
485     ///
486     /// Extract all archive entries, whose names match the pre-set mask.
487     /// @note
488     ///   Unlike Append(), extracting a matching directory does *not*
489     ///   automatically extract all files within:  for them to be extracted,
490     ///   they still must match the mask.  So if there is a directory "dir/"
491     ///   stored in the archive, the extract mask can be "dir/*" for the
492     ///   entire subtree to be extracted.  Note that "dir/" will only extract
493     ///   the directory itself, and "dir" won't cause that directory to be
494     ///   extracted at all (mismatch due to the trailing slash '/' missing).
495     /// @return
496     ///   A list of entries that have been actually extracted.
497     /// @sa
498     ///   SetMask, SetBaseDir, SetFlags
499     unique_ptr<TEntries> Extract(void);
500 
501     /// Get information about all matching archive entries.
502     ///
503     /// @return
504     ///   An array containing information on those archive entries, whose
505     ///   names match the pre-set mask.
506     /// @sa
507     ///   SetMask
508     unique_ptr<TEntries> List(void);
509 
510     /// Verify archive integrity.
511     ///
512     /// Read through the archive without actually extracting anything from it.
513     /// Flag fDumpEntryHeaders causes most of archive headers to be dumped to
514     /// the log (with eDiag_Info) as the Test() advances through the archive.
515     /// @sa
516     ///   SetFlags
517     void Test(void);
518 
519 
520     //------------------------------------------------------------------------
521     // Utility functions
522     //------------------------------------------------------------------------
523 
524     /// Get processing flags.
525     TFlags GetFlags(void) const;
526 
527     /// Set processing flags.
528     void   SetFlags(TFlags flags);
529 
530     /// Get current stream position.
531     Uint8  GetCurrentPosition(void) const;
532 
533     /// Set name mask.
534     ///
535     /// The set of masks is used to process existing entries in the archive:
536     /// both the extract and exclude masks apply to the list and extract
537     /// operations, and only the exclude mask apply to the named append.
538     /// If masks are not defined then all archive entries will be processed.
539     ///
540     /// @note Unset mask means wildcard processing (all entries match).
541     ///
542     /// @param mask
543     ///   Set of masks (0 to unset the current set without setting a new one).
544     /// @param own
545     ///   Whether to take ownership on the mask (delete upon CTar destruction).
546     /// @sa
547     //    SetFlags
548     void SetMask(CMask*      mask,
549                  EOwnership  own   = eNoOwnership,
550                  EMaskType   type  = eExtractMask,
551                  NStr::ECase acase = NStr::eCase);
552 
553     /// Get base directory to use for files while extracting from/adding to
554     /// the archive, and in the latter case used only for relative paths.
555     /// @sa
556     ///   SetBaseDir
557     const string& GetBaseDir(void) const;
558 
559     /// Set base directory to use for files while extracting from/adding to
560     /// the archive, and in the latter case used only for relative paths.
561     /// @sa
562     ///   GetBaseDir
563     void          SetBaseDir(const string& dirname);
564 
565     /// Return archive size as if all specified input entries were put in it.
566     /// Note that the return value is not the exact but the upper bound of
567     /// what the archive size can be expected.  This call does not recurse
568     /// into any subdirectories but relies solely upon the information as
569     /// passed via the parameter.
570     ///
571     /// The returned size includes all necessary alignments and padding.
572     /// @return
573     ///   An upper estimate of archive size given that all specified files
574     ///   were stored in it (the actual size may turn out to be smaller).
575     static Uint8 EstimateArchiveSize(const TFiles& files,
576                                      size_t blocking_factor = 20,
577                                      const string& base_dir = kEmptyStr);
578 
579 
580     //------------------------------------------------------------------------
581     // Streaming
582     //------------------------------------------------------------------------
583 
584     /// Iterate over the archive forward and return first (or next) entry.
585     ///
586     /// When using this method (possibly along with GetNextEntryData()), the
587     /// archive stream (if any) must not be accessed outside the CTar API,
588     /// because otherwise inconsistency in data may result.
589     /// An application may call GetNextEntryData() to stream some or all of the
590     /// data out of this entry, or it may call GetNextEntryInfo() again to skip
591     /// to the next archive entry, etc.
592     /// Note that the archive can contain multiple versions of the same entry
593     /// (in case if an update was done on it), all of which but the last one
594     /// are to be ignored.  This call traverses through all those entry
595     /// versions, and sequentially exposes them to the application level.
596     /// See test suite (in test/test_tar.cpp) for a usage example.
597     /// @return
598     ///   Pointer to next entry info in the archive or 0 if EOF encountered.
599     /// @sa
600     ///   CTarEntryInfo, GetNextEntryData
601     const CTarEntryInfo* GetNextEntryInfo(void);
602 
603     /// Create and return an IReader, which can extract the current archive
604     /// entry that has been previously returned via GetNextEntryInfo.
605     ///
606     /// The returned pointer is non-zero only if the current entry is a file
607     /// (even of size 0).  The ownership of the pointer is passed to the caller
608     /// (so it has to be explicitly deleted when no longer needed).
609     /// The IReader may be used to read all or part of data out of the entry
610     /// without affecting GetNextEntryInfo()'s ability to find any following
611     /// entry in the archive.
612     /// See test suite (in test/test_tar.cpp) for a usage example.
613     /// @return
614     ///   Pointer to IReader, or 0 if the current entry is not a file.
615     /// @sa
616     ///   GetNextEntryData, IReader, CRStream
617     IReader*             GetNextEntryData(void);
618 
619     /// Create and return an IReader, which can extract contents of one named
620     /// file (which can be requested by a name mask in the "name" parameter).
621     ///
622     /// The tar archive is deemed to be in the specified stream "is", properly
623     /// positioned (either at the beginning of the archive, or at any
624     /// CTarEntryInfo::GetPosition(ePos_Header)'s result possibly off-set
625     /// with some fixed archive base position, e.g. if there is any preamble).
626     /// The extraction is done at the first matching entry only, then stops.
627     /// @note fStreamPipeThrough will be ignored if passed in flags.
628     /// See test suite (in test/test_tar.cpp) for a usage example.
629     /// @return
630     ///   IReader interface to read the file contents with;  0 on error.
631     /// @sa
632     ///   CTarEntryInfo::GetPosition, Extract, SetMask, SetFlags,
633     ///   GetNextEntryInfo, GetNextEntryData, IReader, CRStream
634     static IReader* Extract(CNcbiIstream& is, const string& name,
635                             TFlags flags = fSkipUnsupported);
636 
637 protected:
638     //------------------------------------------------------------------------
639     // User-redefinable callback
640     //------------------------------------------------------------------------
641 
642     /// Return false to skip the current entry when reading;
643     /// the return code gets ignored when writing.
644     ///
645     /// Note that the callback can encounter multiple entries of the same file
646     /// in case the archive has been updated (so only the last occurrence is
647     /// the actual copy of the file when extracted).
Checkpoint(const CTarEntryInfo &,bool)648     virtual bool Checkpoint(const CTarEntryInfo& /*current*/,
649                             bool /*ifwrite: write==true, read==false*/)
650     { return true; }
651 
652 private:
653     /// Archive open mode and action
654     enum EOpenMode {
655         eNone = 0,
656         eWO   = 1,
657         eRO   = 2,
658         eRW   = eRO | eWO
659     };
660     enum EAction {
661         eUndefined =  eNone,
662         eList      = (1 << 2) | eRO,
663         eAppend    = (1 << 3) | eRW,
664         eUpdate    = eList | eAppend,
665         eExtract   = (1 << 4) | eRO,
666         eTest      = eList | eExtract,
667         eCreate    = (1 << 5) | eWO,
668         eInternal  = (1 << 6) | eRO
669     };
670     /// I/O completion code
671     enum EStatus {
672         eFailure = -1,
673         eSuccess =  0,
674         eContinue,
675         eZeroBlock,
676         eEOF
677     };
678     /// Mask storage
679     struct SMask {
680         CMask*      mask;
681         NStr::ECase acase;
682         EOwnership  owned;
683 
SMaskCTar::SMask684         SMask(void)
685             : mask(0), acase(NStr::eNocase), owned(eNoOwnership)
686         { }
687     };
688 
689     // Common part of initialization.
690     void x_Init(void);
691 
692     // Open/close the archive.
693     void x_Open(EAction action);
694     void x_Close(bool truncate);  // NB: "truncate" effects file archives only
695 
696     // Flush the archive (w/EOT);  return "true" if it is okay to truncate
697     bool x_Flush(bool nothrow = false);
698 
699     // Backspace and fast-forward the archive.
700     void x_Backspace(EAction action);  // NB: m_ZeroBlockCount blocks back
701     void x_Skip(Uint8 blocks);         // NB: Can do by either skip or read
702 
703     // Parse in extended entry information (PAX) for the current entry.
704     EStatus x_ParsePAXData(const string& data);
705 
706     // Read information about current entry in the archive.
707     EStatus x_ReadEntryInfo(bool dump, bool pax);
708 
709     // Pack current name or linkname into archive entry header.
710     bool x_PackCurrentName(STarHeader* header, bool link);
711 
712     // Write information for current entry into the archive.
713     void x_WriteEntryInfo(const string& name);
714 
715     // Read the archive and do the requested "action" on current entry.
716     unique_ptr<TEntries> x_ReadAndProcess(EAction action);
717 
718     // Process current entry from the archive (the actual size passed in).
719     // If action != eExtract, then just skip the entry without any processing.
720     // Return true iff the entry was successfully extracted (ie with eExtract).
721     bool x_ProcessEntry(EAction action, Uint8 size, const TEntries* done);
722 
723     // Extract current entry (archived size passed in) from the archive into
724     // the file system, and update the size still remaining in the archive, if
725     // any.  Return true if the extraction succeeded, false otherwise.
726     bool x_ExtractEntry(Uint8& size, const CDirEntry* dst,
727                         const CDirEntry* src);
728 
729     // Extract file data from the archive.
730     void x_ExtractPlainFile (Uint8& size, const CDirEntry* dst);
731     bool x_ExtractSparseFile(Uint8& size, const CDirEntry* dst,
732                              bool dump = false);
733 
734     // Restore attributes of an entry in the file system.
735     // If "path" is not specified, then the destination path will be
736     // constructed from "info", and the base directory (if any).  Otherwise,
737     // "path" will be used "as is", assuming it corresponds to "info".
738     void x_RestoreAttrs(const CTarEntryInfo& info,
739                         TFlags               what,
740                         const CDirEntry*     path = 0,
741                         TTarMode             perm = 0/*override*/) const;
742 
743     // Read a text string terminated with '\n'.
744     string x_ReadLine(Uint8& size, const char*& data, size_t& nread);
745 
746     // Read/write specified number of bytes from/to the archive.
747     const char* x_ReadArchive (size_t& n);
748     void        x_WriteArchive(size_t  n, const char* buffer = 0);
749 
750     // Append an entry from the file system to the archive.
751     unique_ptr<TEntries> x_Append(const string& name, const TEntries* toc = 0);
752 
753     // Append an entry from an istream to the archive.
754     unique_ptr<TEntries> x_Append(const CTarUserEntryInfo& entry,
755                                   CNcbiIstream& is);
756 
757     // Append data from an istream to the archive.
758     void x_AppendStream(const string& name, CNcbiIstream& is);
759 
760     // Append a regular file to the archive.
761     bool x_AppendFile(const string& file);
762 
763 private:
764     string        m_FileName;       ///< Tar archive file name (only if file)
765     CNcbiFstream* m_FileStream;     ///< File stream of the archive (if file)
766     CNcbiIos&     m_Stream;         ///< Archive stream (used for all I/O)
767     size_t        m_ZeroBlockCount; ///< Zero blocks seen in between entries
768     const size_t  m_BufferSize;     ///< Buffer(record) size for I/O operations
769     size_t        m_BufferPos;      ///< Position within the record
770     Uint8         m_StreamPos;      ///< Position in stream (0-based)
771     char*         m_BufPtr;         ///< Page-unaligned buffer pointer
772     char*         m_Buffer;         ///< I/O buffer (page-aligned)
773     SMask         m_Mask[2];        ///< Entry masks for operations
774     EOpenMode     m_OpenMode;       ///< What was it opened for
775     bool          m_Modified;       ///< True after at least one write
776     bool          m_Bad;            ///< True if a fatal output error occurred
777     TFlags        m_Flags;          ///< Bitwise OR of flags
778     string        m_BaseDir;        ///< Base directory for relative paths
779     CTarEntryInfo m_Current;        ///< Current entry being processed
780 
781 private:
782     // Prohibit assignment and copy
783     CTar& operator=(const CTar&);
784     CTar(const CTar&);
785 
786     friend class CTarReader;
787 };
788 
789 
790 //////////////////////////////////////////////////////////////////////////////
791 //
792 // Inline methods
793 //
794 
795 inline
Create(void)796 void CTar::Create(void)
797 {
798     x_Open(eCreate);
799 }
800 
801 inline
Close(void)802 void CTar::Close(void)
803 {
804     x_Close(x_Flush());
805 }
806 
807 inline
Append(const string & name)808 unique_ptr<CTar::TEntries> CTar::Append(const string& name)
809 {
810     x_Open(eAppend);
811     return x_Append(name);
812 }
813 
814 inline
Append(const CTarUserEntryInfo & entry,CNcbiIstream & is)815 unique_ptr<CTar::TEntries> CTar::Append(const CTarUserEntryInfo& entry,
816                                         CNcbiIstream& is)
817 {
818     x_Open(eAppend);
819     return x_Append(entry, is);
820 }
821 
822 inline
Update(const string & name)823 unique_ptr<CTar::TEntries> CTar::Update(const string& name)
824 {
825     x_Open(eUpdate);
826     return x_Append(name, x_ReadAndProcess(eUpdate).get());
827 }
828 
829 inline
List(void)830 unique_ptr<CTar::TEntries> CTar::List(void)
831 {
832     x_Open(eList);
833     return x_ReadAndProcess(eList);
834 }
835 
836 inline
Test(void)837 void CTar::Test(void)
838 {
839     x_Open(eTest);
840     x_ReadAndProcess(eTest);
841 }
842 
843 inline
GetFlags(void) const844 CTar::TFlags CTar::GetFlags(void) const
845 {
846     return m_Flags;
847 }
848 
849 inline
SetFlags(TFlags flags)850 void CTar::SetFlags(TFlags flags)
851 {
852     m_Flags = flags;
853 }
854 
GetCurrentPosition(void) const855 inline Uint8 CTar::GetCurrentPosition(void) const
856 {
857     return m_StreamPos;
858 }
859 
860 inline
GetBaseDir(void) const861 const string& CTar::GetBaseDir(void) const
862 {
863     return m_BaseDir;
864 }
865 
866 
867 END_NCBI_SCOPE
868 
869 
870 /* @} */
871 
872 
873 #endif  /* UTIL_COMPRESS__TAR__HPP */
874