1 #ifndef UTIL_COMPRESS__TAR__HPP
2 #define UTIL_COMPRESS__TAR__HPP
3
4 /* $Id: tar.hpp 587245 2019-05-31 23:39:51Z lavr $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Authors: Vladimir Ivanov
30 * Anton Lavrentiev
31 *
32 * File Description:
33 * Tar archive API
34 */
35
36 /// @file
37 /// Tar archive API.
38 ///
39 /// Supports subsets of POSIX.1-1988 (ustar), POSIX 1003.1-2001 (posix), old
40 /// GNU (POSIX 1003.1), and V7 formats (all partially but reasonably). New
41 /// archives are created using POSIX (genuine ustar) format, using GNU
42 /// extensions for long names/links only when unavoidable. It cannot,
43 /// however, handle all the exotics like sparse files (except for GNU/1.0
44 /// sparse PAX extension) and contiguous files (yet still can work around both
45 /// of them gracefully, if needed), multivolume / incremental archives, etc.
46 /// but just regular files, devices (character or block), FIFOs, directories,
47 /// and limited links: can extract both hard- and symlinks, but can store
48 /// symlinks only. Also, this implementation is only minimally PAX(Portable
49 /// Archive eXchange)-aware for file extractions (and does not yet use any PAX
50 /// extensions to store the files).
51 ///
52
53 #include <corelib/ncbifile.hpp>
54 #include <utility>
55
56
57 /** @addtogroup Compression
58 *
59 * @{
60 */
61
62
63 BEGIN_NCBI_SCOPE
64
65
66 /////////////////////////////////////////////////////////////////////////////
67 ///
68 /// TTarMode --
69 ///
70 /// Permission bits as defined in tar
71 ///
72
73 enum ETarModeBits {
74 // Special mode bits
75 fTarSetUID = 04000, ///< set UID on execution
76 fTarSetGID = 02000, ///< set GID on execution
77 fTarSticky = 01000, ///< reserved (sticky bit)
78 // File permissions
79 fTarURead = 00400, ///< read by owner
80 fTarUWrite = 00200, ///< write by owner
81 fTarUExecute = 00100, ///< execute/search by owner
82 fTarGRead = 00040, ///< read by group
83 fTarGWrite = 00020, ///< write by group
84 fTarGExecute = 00010, ///< execute/search by group
85 fTarORead = 00004, ///< read by other
86 fTarOWrite = 00002, ///< write by other
87 fTarOExecute = 00001 ///< execute/search by other
88 };
89 typedef unsigned int TTarMode; ///< Bitwise OR of ETarModeBits
90
91
92 /////////////////////////////////////////////////////////////////////////////
93 ///
94 /// CTarException --
95 ///
96 /// Define exceptions generated by the API.
97 /// Exception text may include detailed dump of a tar header (when appropriate)
98 /// if fDumpEntryHeaders is set in the archive flags.
99 ///
100 /// CTarException inherits its basic functionality from CCoreException
101 /// and defines additional error codes for tar archive operations.
102 ///
103 /// @sa
104 /// CTar::SetFlags
105
106 class NCBI_XUTIL_EXPORT CTarException : public CCoreException
107 {
108 public:
109 /// Error types that file operations can generate.
110 enum EErrCode {
111 eUnsupportedTarFormat,
112 eUnsupportedEntryType,
113 eUnsupportedSource,
114 eNameTooLong,
115 eChecksum,
116 eBadName,
117 eCreate,
118 eOpen,
119 eRead,
120 eWrite,
121 eBackup,
122 eMemory,
123 eRestoreAttrs
124 };
125
126 /// Translate from an error code value to its string representation.
GetErrCodeString(void) const127 virtual const char* GetErrCodeString(void) const override
128 {
129 switch (GetErrCode()) {
130 case eUnsupportedTarFormat: return "eUnsupportedTarFormat";
131 case eUnsupportedEntryType: return "eUnsupportedEntryType";
132 case eUnsupportedSource: return "eUnsupportedSource";
133 case eNameTooLong: return "eNameTooLong";
134 case eChecksum: return "eChecksum";
135 case eBadName: return "eBadName";
136 case eCreate: return "eCreate";
137 case eOpen: return "eOpen";
138 case eRead: return "eRead";
139 case eWrite: return "eWrite";
140 case eBackup: return "eBackup";
141 case eMemory: return "eMemory";
142 case eRestoreAttrs: return "eRestoreAttrs";
143 default: return CException::GetErrCodeString();
144 }
145 }
146
147 // Standard exception boilerplate code.
148 NCBI_EXCEPTION_DEFAULT(CTarException, CCoreException);
149 };
150
151
152 //////////////////////////////////////////////////////////////////////////////
153 ///
154 /// CTarEntryInfo class
155 ///
156 /// Information about a tar archive entry.
157
158 class NCBI_XUTIL_EXPORT CTarEntryInfo
159 {
160 public:
161 /// Archive entry type.
162 enum EType {
163 eFile = CDirEntry::eFile, ///< Regular file
164 eDir = CDirEntry::eDir, ///< Directory
165 eSymLink = CDirEntry::eSymLink, ///< Symbolic link
166 ePipe = CDirEntry::ePipe, ///< Pipe (FIFO)
167 eCharDev = CDirEntry::eCharSpecial, ///< Character device
168 eBlockDev = CDirEntry::eBlockSpecial, ///< Block device
169 eUnknown = CDirEntry::eUnknown, ///< Unknown type
170 eHardLink, ///< Hard link
171 eVolHeader, ///< Volume header
172 ePAXHeader, ///< PAX extended header
173 eSparseFile, ///< GNU/STAR sparse file
174 eGNULongName, ///< GNU long name
175 eGNULongLink ///< GNU long link
176 };
177
178 /// Position type.
179 enum EPos {
180 ePos_Header,
181 ePos_Data
182 };
183
184 // No setters -- they are not needed for access by the user, and thus are
185 // done directly from CTar for the sake of performance and code clarity.
186
187 // Getters only!
GetType(void) const188 EType GetType(void) const { return m_Type; }
GetName(void) const189 const string& GetName(void) const { return m_Name; }
GetLinkName(void) const190 const string& GetLinkName(void) const { return m_LinkName; }
GetUserName(void) const191 const string& GetUserName(void) const { return m_UserName; }
GetGroupName(void) const192 const string& GetGroupName(void) const { return m_GroupName; }
GetModificationTime(void) const193 time_t GetModificationTime(void) const
194 { return m_Stat.orig.st_mtime; }
GetModificationCTime(void) const195 CTime GetModificationCTime(void) const
196 { CTime mtime(m_Stat.orig.st_mtime);
197 mtime.SetNanoSecond(m_Stat.mtime_nsec);
198 return mtime; }
GetLastAccessTime(void) const199 time_t GetLastAccessTime(void) const
200 { return m_Stat.orig.st_atime; }
GetLastAccessCTime(void) const201 CTime GetLastAccessCTime(void) const
202 { CTime atime(m_Stat.orig.st_atime);
203 atime.SetNanoSecond(m_Stat.atime_nsec);
204 return atime; }
GetCreationTime(void) const205 time_t GetCreationTime(void) const
206 { return m_Stat.orig.st_ctime; }
GetCreationCTime(void) const207 CTime GetCreationCTime(void) const
208 { CTime ctime(m_Stat.orig.st_ctime);
209 ctime.SetNanoSecond(m_Stat.ctime_nsec);
210 return ctime; }
GetSize(void) const211 Uint8 GetSize(void) const
212 { return m_Stat.orig.st_size; }
213 TTarMode GetMode(void) const;// Raw mode as stored in tar
214 void GetMode(CDirEntry::TMode* user_mode,
215 CDirEntry::TMode* group_mode = 0,
216 CDirEntry::TMode* other_mode = 0,
217 CDirEntry::TSpecialModeBits* special_bits = 0) const;
218 unsigned int GetMajor(void) const;
219 unsigned int GetMinor(void) const;
GetUserId(void) const220 unsigned int GetUserId(void) const
221 { return m_Stat.orig.st_uid; }
GetGroupId(void) const222 unsigned int GetGroupId(void) const
223 { return m_Stat.orig.st_gid; }
GetPosition(EPos which) const224 Uint8 GetPosition(EPos which) const
225 { return which == ePos_Header ? m_Pos : m_Pos + m_HeaderSize; }
226
227 // Comparison operator.
operator ==(const CTarEntryInfo & info) const228 bool operator == (const CTarEntryInfo& info) const
229 { return (m_Type == info.m_Type &&
230 m_Name == info.m_Name &&
231 m_LinkName == info.m_LinkName &&
232 m_UserName == info.m_UserName &&
233 m_GroupName == info.m_GroupName &&
234 m_HeaderSize == info.m_HeaderSize &&
235 memcmp(&m_Stat,&info.m_Stat, sizeof(m_Stat)) == 0 &&
236 m_Pos == info.m_Pos ? true : false); }
237
238 protected:
239 // Constructor.
CTarEntryInfo(Uint8 pos=0)240 CTarEntryInfo(Uint8 pos = 0)
241 : m_Type(eUnknown), m_HeaderSize(0), m_Pos(pos)
242 { memset(&m_Stat, 0, sizeof(m_Stat)); }
243
244 EType m_Type; ///< Type
245 string m_Name; ///< Entry name
246 string m_LinkName; ///< Link name if type is e{Sym|Hard}Link
247 string m_UserName; ///< User name
248 string m_GroupName; ///< Group name
249 streamsize m_HeaderSize; ///< Total size of all headers for the entry
250 CDirEntry::SStat m_Stat; ///< Direntry-compatible info
251 Uint8 m_Pos; ///< Entry (not data!) position in archive
252
253 friend class CTar; // Setter
254 };
255
256
257 /// User-creatable info for streaming into a tar.
258 /// Since the entry info is built largerly incomplete, all getters have been
259 /// disabled; should some be needed they could be brought back by subclassing
260 /// and redeclaring the necessary one(s) in the public part of the new class.
261 class CTarUserEntryInfo : protected CTarEntryInfo
262 {
263 public:
CTarUserEntryInfo(const string & name,Uint8 size)264 CTarUserEntryInfo(const string& name, Uint8 size)
265 {
266 m_Name = name;
267 m_Stat.orig.st_size = size;
268 }
269
270 friend class CTar; // Accessor
271 };
272
273
274 /// Nice TOC(table of contents) printout.
275 NCBI_XUTIL_EXPORT ostream& operator << (ostream&, const CTarEntryInfo&);
276
277
278 /// Forward declaration of a tar header used internally.
279 struct STarHeader;
280
281
282 //////////////////////////////////////////////////////////////////////////////
283 ///
284 /// CTar class
285 ///
286 /// (Throws exceptions on most errors.)
287 /// Note that if stream constructor is used, then CTar can only perform one
288 /// pass over the archive. This means that only one full action will succeed
289 /// (and if the action was to update -- e.g. append -- the archive, it has to
290 /// be explicitly followed by Close() when no more appends are expected).
291 /// Before the next read/update action, the stream position has to be reset
292 /// explicitly to the beginning of the archive, or it may also remain at the
293 /// end of the archive for a series of successive append operations.
294
295 class NCBI_XUTIL_EXPORT CTar
296 {
297 public:
298 /// General flags
299 enum EFlags {
300 // --- Extract/List/Test ---
301 /// Ignore blocks of zeros in archive.
302 // Generally, 2 or more consecutive zero blocks indicate EOT.
303 fIgnoreZeroBlocks = (1<<1),
304
305 // --- Extract/Append/Update ---
306 /// Follow symbolic links (instead of storing/extracting them)
307 fFollowLinks = (1<<2),
308
309 // --- Extract --- (NB: fUpdate also applies to Update)
310 /// Allow to overwrite destinations with entries from the archive
311 fOverwrite = (1<<3),
312 /// Only update entries that are older than those already existing
313 fUpdate = (1<<4) | fOverwrite,
314 /// Backup destinations if they exist (all entries including dirs)
315 fBackup = (1<<5) | fOverwrite,
316 /// If destination entry exists, it must have the same type as source
317 fEqualTypes = (1<<6),
318 /// Create extracted files with the original ownership
319 fPreserveOwner = (1<<7),
320 /// Create extracted files with the original permissions
321 fPreserveMode = (1<<8),
322 /// Preserve date/times for extracted files
323 fPreserveTime = (1<<9),
324 /// Preserve all file attributes
325 fPreserveAll = fPreserveOwner | fPreserveMode | fPreserveTime,
326 /// Preserve absolute path instead of stripping the leadind slash('/')
327 fKeepAbsolutePath = (1<<12),
328 /// Do not extract PAX GNU/1.0 sparse files (treat 'em as unsupported)
329 fSparseUnsupported = (1<<13),
330
331 // --- Extract/List ---
332 /// Skip unsupported entries rather than make files out of them when
333 /// extracting (the latter is the default behavior required by POSIX)
334 fSkipUnsupported = (1<<15),
335
336 // --- Append ---
337 /// Ignore unreadable files/dirs (still warn them, but don't stop)
338 fIgnoreUnreadable = (1<<17),
339 /// Always use OldGNU headers for long names (default:only when needed)
340 fLongNameSupplement = (1<<18),
341
342 // --- Debugging ---
343 fDumpEntryHeaders = (1<<20),
344 fSlowSkipWithRead = (1<<21),
345
346 // --- Miscellaneous ---
347 /// Stream tar data through
348 fStreamPipeThrough = (1<<24),
349 /// Do not trim tar file size after append/update
350 fTarfileNoTruncate = (1<<26),
351 /// Suppress NCBI signatures in entry headers
352 fStandardHeaderOnly = (1<<28),
353
354 /// Default flags
355 fDefault = fOverwrite | fPreserveAll
356 };
357 typedef unsigned int TFlags; ///< Bitwise OR of EFlags
358
359 /// Mask type enumerator.
360 /// @enum eExtractMask
361 /// CMask can select both inclusions and exclusions (in this order) of
362 /// fully-qualified archive entries for listing or extraction, so that
363 /// e.g. ".svn" does not match an entry like "a/.svn" for processing.
364 /// @enum eExcludeMask
365 /// CMask can select both exclusions and inclusions (in this order) of
366 /// patterns of the archive entries for all operations (excepting eTest),
367 /// and so that ".svn" matches "a/b/c/.svn".
368 enum EMaskType {
369 eExtractMask = 0, ///< exact for list or extract
370 eExcludeMask ///< pattern for all but test
371 };
372
373 /// Constructors
374 CTar(const string& filename, size_t blocking_factor = 20);
375 /// Stream version does not at all use stream positioning and so is safe on
376 /// non-positionable streams, like pipes/sockets (or magnetic tapes :-I).
377 CTar(CNcbiIos& stream, size_t blocking_factor = 20);
378
379 /// Destructor (finalize the archive if currently open).
380 /// @sa
381 /// Close
382 virtual ~CTar();
383
384
385 /// Define a list of entries.
386 typedef list<CTarEntryInfo> TEntries;
387
388 /// Define a list of files with sizes (directories and specials, such as
389 /// devices, must be given with sizes of 0; symlinks -- with the sizes
390 /// of the names they are linking to).
391 typedef pair<string, Uint8> TFile;
392 typedef list<TFile> TFiles;
393
394
395 //------------------------------------------------------------------------
396 // Main functions
397 //------------------------------------------------------------------------
398
399 /// Create a new empty archive.
400 ///
401 /// If a file with such a name already exists it will be overwritten.
402 /// @sa
403 /// Append
404 void Create(void);
405
406 /// Close the archive making sure all pending output is flushed.
407 ///
408 /// Normally, direct call of this method need _not_ intersperse successive
409 /// archive manipulations by other methods, as they open and close the
410 /// archive automagically as needed. Rather, this call is to make sure the
411 /// archive is complete earlier than it otherwise usually be done
412 /// automatically in the destructor of the CTar object.
413 /// @sa
414 /// ~CTar
415 void Close(void);
416
417 /// Append an entry at the end of the archive that already exists.
418 ///
419 /// Appended entry can be either a file, a directory, a symbolic link,
420 /// a device special file (block or character), or a FIFO special file,
421 /// subject to any exclusions as set by SetMask() with eExcludeMask.
422 /// The name is taken with respect to the base directory, if any set.
423 ///
424 /// Adding a directory results in all its files and subdirectories (subject
425 // for the exclusion mask) to get added: examine the return value to find
426 /// out what has been added.
427 ///
428 /// Note that the final name of an entry may not contain embedded '..'.
429 /// Leading slash in the absolute paths will be retained. The names of
430 /// all appended entries will be converted to Unix format (that is, to
431 /// have only forward slashes in the paths, and drive letter, if any on
432 /// MS-Windows, stripped). All entries will be added at the logical end
433 /// (not always EOF) of the archive, when appending to a non-empty one.
434 ///
435 /// @note Adding to a stream archive does not seek to the logical end of
436 /// the archive but begins at the current position right away.
437 ///
438 /// @return
439 /// A list of entries appended.
440 /// @sa
441 /// Create, Update, SetBaseDir, SetMask
442 unique_ptr<TEntries> Append(const string& name);
443
444 /// Append an entry from a stream (exactly entry.GetSize() bytes).
445 /// @note
446 /// Name masks (if any set with SetMask()) are all ignored.
447 /// @return
448 /// A list (containing this one entry) with full archive info filled in
449 /// @sa
450 /// Append
451 unique_ptr<TEntries> Append(const CTarUserEntryInfo& entry,
452 CNcbiIstream& is);
453
454 /// Look whether more recent copies of the archive members are available in
455 /// the file system, and if so, append them to the archive:
456 ///
457 /// - if fUpdate is set in processing flags, only the existing archive
458 /// entries (including directories) will be updated; that is, Update(".")
459 /// won't recursively add "." if "." is not an archive member; it will,
460 /// however, do the recursive update should "." be found in the archive;
461 ///
462 /// - if fUpdate is unset, the existing entries will be updated (if their
463 /// file system counterparts are newer), and nonexistent entries will be
464 /// added to the archive; that is, Update(".") will recursively scan "."
465 /// to update both existing entries (if newer files found), and also add
466 /// new entries for any files/directories, which are currently not in.
467 ///
468 /// @note Updating stream archive may (and most certainly will) cause
469 /// zero-filled gaps in the archive (can be read with "ignore zeroes").
470 ///
471 /// @return
472 /// A list of entries that have been updated.
473 /// @sa
474 /// Append, SetBaseDir, SetMask, SetFlags
475 unique_ptr<TEntries> Update(const string& name);
476
477 /// Extract the entire archive (into either current directory or a
478 /// directory otherwise specified by SetBaseDir()).
479 ///
480 /// If the same-named files exist, they will be replaced (subject to
481 /// fOverwrite) or backed up (fBackup), unless fUpdate is set, which would
482 /// cause the replacement / backup only if the files are older than the
483 /// archive entries. Note that if fOverwrite is stripped, no matching
484 /// files will be updated / backed up / overwritten, but skipped.
485 ///
486 /// Extract all archive entries, whose names match the pre-set mask.
487 /// @note
488 /// Unlike Append(), extracting a matching directory does *not*
489 /// automatically extract all files within: for them to be extracted,
490 /// they still must match the mask. So if there is a directory "dir/"
491 /// stored in the archive, the extract mask can be "dir/*" for the
492 /// entire subtree to be extracted. Note that "dir/" will only extract
493 /// the directory itself, and "dir" won't cause that directory to be
494 /// extracted at all (mismatch due to the trailing slash '/' missing).
495 /// @return
496 /// A list of entries that have been actually extracted.
497 /// @sa
498 /// SetMask, SetBaseDir, SetFlags
499 unique_ptr<TEntries> Extract(void);
500
501 /// Get information about all matching archive entries.
502 ///
503 /// @return
504 /// An array containing information on those archive entries, whose
505 /// names match the pre-set mask.
506 /// @sa
507 /// SetMask
508 unique_ptr<TEntries> List(void);
509
510 /// Verify archive integrity.
511 ///
512 /// Read through the archive without actually extracting anything from it.
513 /// Flag fDumpEntryHeaders causes most of archive headers to be dumped to
514 /// the log (with eDiag_Info) as the Test() advances through the archive.
515 /// @sa
516 /// SetFlags
517 void Test(void);
518
519
520 //------------------------------------------------------------------------
521 // Utility functions
522 //------------------------------------------------------------------------
523
524 /// Get processing flags.
525 TFlags GetFlags(void) const;
526
527 /// Set processing flags.
528 void SetFlags(TFlags flags);
529
530 /// Get current stream position.
531 Uint8 GetCurrentPosition(void) const;
532
533 /// Set name mask.
534 ///
535 /// The set of masks is used to process existing entries in the archive:
536 /// both the extract and exclude masks apply to the list and extract
537 /// operations, and only the exclude mask apply to the named append.
538 /// If masks are not defined then all archive entries will be processed.
539 ///
540 /// @note Unset mask means wildcard processing (all entries match).
541 ///
542 /// @param mask
543 /// Set of masks (0 to unset the current set without setting a new one).
544 /// @param own
545 /// Whether to take ownership on the mask (delete upon CTar destruction).
546 /// @sa
547 // SetFlags
548 void SetMask(CMask* mask,
549 EOwnership own = eNoOwnership,
550 EMaskType type = eExtractMask,
551 NStr::ECase acase = NStr::eCase);
552
553 /// Get base directory to use for files while extracting from/adding to
554 /// the archive, and in the latter case used only for relative paths.
555 /// @sa
556 /// SetBaseDir
557 const string& GetBaseDir(void) const;
558
559 /// Set base directory to use for files while extracting from/adding to
560 /// the archive, and in the latter case used only for relative paths.
561 /// @sa
562 /// GetBaseDir
563 void SetBaseDir(const string& dirname);
564
565 /// Return archive size as if all specified input entries were put in it.
566 /// Note that the return value is not the exact but the upper bound of
567 /// what the archive size can be expected. This call does not recurse
568 /// into any subdirectories but relies solely upon the information as
569 /// passed via the parameter.
570 ///
571 /// The returned size includes all necessary alignments and padding.
572 /// @return
573 /// An upper estimate of archive size given that all specified files
574 /// were stored in it (the actual size may turn out to be smaller).
575 static Uint8 EstimateArchiveSize(const TFiles& files,
576 size_t blocking_factor = 20,
577 const string& base_dir = kEmptyStr);
578
579
580 //------------------------------------------------------------------------
581 // Streaming
582 //------------------------------------------------------------------------
583
584 /// Iterate over the archive forward and return first (or next) entry.
585 ///
586 /// When using this method (possibly along with GetNextEntryData()), the
587 /// archive stream (if any) must not be accessed outside the CTar API,
588 /// because otherwise inconsistency in data may result.
589 /// An application may call GetNextEntryData() to stream some or all of the
590 /// data out of this entry, or it may call GetNextEntryInfo() again to skip
591 /// to the next archive entry, etc.
592 /// Note that the archive can contain multiple versions of the same entry
593 /// (in case if an update was done on it), all of which but the last one
594 /// are to be ignored. This call traverses through all those entry
595 /// versions, and sequentially exposes them to the application level.
596 /// See test suite (in test/test_tar.cpp) for a usage example.
597 /// @return
598 /// Pointer to next entry info in the archive or 0 if EOF encountered.
599 /// @sa
600 /// CTarEntryInfo, GetNextEntryData
601 const CTarEntryInfo* GetNextEntryInfo(void);
602
603 /// Create and return an IReader, which can extract the current archive
604 /// entry that has been previously returned via GetNextEntryInfo.
605 ///
606 /// The returned pointer is non-zero only if the current entry is a file
607 /// (even of size 0). The ownership of the pointer is passed to the caller
608 /// (so it has to be explicitly deleted when no longer needed).
609 /// The IReader may be used to read all or part of data out of the entry
610 /// without affecting GetNextEntryInfo()'s ability to find any following
611 /// entry in the archive.
612 /// See test suite (in test/test_tar.cpp) for a usage example.
613 /// @return
614 /// Pointer to IReader, or 0 if the current entry is not a file.
615 /// @sa
616 /// GetNextEntryData, IReader, CRStream
617 IReader* GetNextEntryData(void);
618
619 /// Create and return an IReader, which can extract contents of one named
620 /// file (which can be requested by a name mask in the "name" parameter).
621 ///
622 /// The tar archive is deemed to be in the specified stream "is", properly
623 /// positioned (either at the beginning of the archive, or at any
624 /// CTarEntryInfo::GetPosition(ePos_Header)'s result possibly off-set
625 /// with some fixed archive base position, e.g. if there is any preamble).
626 /// The extraction is done at the first matching entry only, then stops.
627 /// @note fStreamPipeThrough will be ignored if passed in flags.
628 /// See test suite (in test/test_tar.cpp) for a usage example.
629 /// @return
630 /// IReader interface to read the file contents with; 0 on error.
631 /// @sa
632 /// CTarEntryInfo::GetPosition, Extract, SetMask, SetFlags,
633 /// GetNextEntryInfo, GetNextEntryData, IReader, CRStream
634 static IReader* Extract(CNcbiIstream& is, const string& name,
635 TFlags flags = fSkipUnsupported);
636
637 protected:
638 //------------------------------------------------------------------------
639 // User-redefinable callback
640 //------------------------------------------------------------------------
641
642 /// Return false to skip the current entry when reading;
643 /// the return code gets ignored when writing.
644 ///
645 /// Note that the callback can encounter multiple entries of the same file
646 /// in case the archive has been updated (so only the last occurrence is
647 /// the actual copy of the file when extracted).
Checkpoint(const CTarEntryInfo &,bool)648 virtual bool Checkpoint(const CTarEntryInfo& /*current*/,
649 bool /*ifwrite: write==true, read==false*/)
650 { return true; }
651
652 private:
653 /// Archive open mode and action
654 enum EOpenMode {
655 eNone = 0,
656 eWO = 1,
657 eRO = 2,
658 eRW = eRO | eWO
659 };
660 enum EAction {
661 eUndefined = eNone,
662 eList = (1 << 2) | eRO,
663 eAppend = (1 << 3) | eRW,
664 eUpdate = eList | eAppend,
665 eExtract = (1 << 4) | eRO,
666 eTest = eList | eExtract,
667 eCreate = (1 << 5) | eWO,
668 eInternal = (1 << 6) | eRO
669 };
670 /// I/O completion code
671 enum EStatus {
672 eFailure = -1,
673 eSuccess = 0,
674 eContinue,
675 eZeroBlock,
676 eEOF
677 };
678 /// Mask storage
679 struct SMask {
680 CMask* mask;
681 NStr::ECase acase;
682 EOwnership owned;
683
SMaskCTar::SMask684 SMask(void)
685 : mask(0), acase(NStr::eNocase), owned(eNoOwnership)
686 { }
687 };
688
689 // Common part of initialization.
690 void x_Init(void);
691
692 // Open/close the archive.
693 void x_Open(EAction action);
694 void x_Close(bool truncate); // NB: "truncate" effects file archives only
695
696 // Flush the archive (w/EOT); return "true" if it is okay to truncate
697 bool x_Flush(bool nothrow = false);
698
699 // Backspace and fast-forward the archive.
700 void x_Backspace(EAction action); // NB: m_ZeroBlockCount blocks back
701 void x_Skip(Uint8 blocks); // NB: Can do by either skip or read
702
703 // Parse in extended entry information (PAX) for the current entry.
704 EStatus x_ParsePAXData(const string& data);
705
706 // Read information about current entry in the archive.
707 EStatus x_ReadEntryInfo(bool dump, bool pax);
708
709 // Pack current name or linkname into archive entry header.
710 bool x_PackCurrentName(STarHeader* header, bool link);
711
712 // Write information for current entry into the archive.
713 void x_WriteEntryInfo(const string& name);
714
715 // Read the archive and do the requested "action" on current entry.
716 unique_ptr<TEntries> x_ReadAndProcess(EAction action);
717
718 // Process current entry from the archive (the actual size passed in).
719 // If action != eExtract, then just skip the entry without any processing.
720 // Return true iff the entry was successfully extracted (ie with eExtract).
721 bool x_ProcessEntry(EAction action, Uint8 size, const TEntries* done);
722
723 // Extract current entry (archived size passed in) from the archive into
724 // the file system, and update the size still remaining in the archive, if
725 // any. Return true if the extraction succeeded, false otherwise.
726 bool x_ExtractEntry(Uint8& size, const CDirEntry* dst,
727 const CDirEntry* src);
728
729 // Extract file data from the archive.
730 void x_ExtractPlainFile (Uint8& size, const CDirEntry* dst);
731 bool x_ExtractSparseFile(Uint8& size, const CDirEntry* dst,
732 bool dump = false);
733
734 // Restore attributes of an entry in the file system.
735 // If "path" is not specified, then the destination path will be
736 // constructed from "info", and the base directory (if any). Otherwise,
737 // "path" will be used "as is", assuming it corresponds to "info".
738 void x_RestoreAttrs(const CTarEntryInfo& info,
739 TFlags what,
740 const CDirEntry* path = 0,
741 TTarMode perm = 0/*override*/) const;
742
743 // Read a text string terminated with '\n'.
744 string x_ReadLine(Uint8& size, const char*& data, size_t& nread);
745
746 // Read/write specified number of bytes from/to the archive.
747 const char* x_ReadArchive (size_t& n);
748 void x_WriteArchive(size_t n, const char* buffer = 0);
749
750 // Append an entry from the file system to the archive.
751 unique_ptr<TEntries> x_Append(const string& name, const TEntries* toc = 0);
752
753 // Append an entry from an istream to the archive.
754 unique_ptr<TEntries> x_Append(const CTarUserEntryInfo& entry,
755 CNcbiIstream& is);
756
757 // Append data from an istream to the archive.
758 void x_AppendStream(const string& name, CNcbiIstream& is);
759
760 // Append a regular file to the archive.
761 bool x_AppendFile(const string& file);
762
763 private:
764 string m_FileName; ///< Tar archive file name (only if file)
765 CNcbiFstream* m_FileStream; ///< File stream of the archive (if file)
766 CNcbiIos& m_Stream; ///< Archive stream (used for all I/O)
767 size_t m_ZeroBlockCount; ///< Zero blocks seen in between entries
768 const size_t m_BufferSize; ///< Buffer(record) size for I/O operations
769 size_t m_BufferPos; ///< Position within the record
770 Uint8 m_StreamPos; ///< Position in stream (0-based)
771 char* m_BufPtr; ///< Page-unaligned buffer pointer
772 char* m_Buffer; ///< I/O buffer (page-aligned)
773 SMask m_Mask[2]; ///< Entry masks for operations
774 EOpenMode m_OpenMode; ///< What was it opened for
775 bool m_Modified; ///< True after at least one write
776 bool m_Bad; ///< True if a fatal output error occurred
777 TFlags m_Flags; ///< Bitwise OR of flags
778 string m_BaseDir; ///< Base directory for relative paths
779 CTarEntryInfo m_Current; ///< Current entry being processed
780
781 private:
782 // Prohibit assignment and copy
783 CTar& operator=(const CTar&);
784 CTar(const CTar&);
785
786 friend class CTarReader;
787 };
788
789
790 //////////////////////////////////////////////////////////////////////////////
791 //
792 // Inline methods
793 //
794
795 inline
Create(void)796 void CTar::Create(void)
797 {
798 x_Open(eCreate);
799 }
800
801 inline
Close(void)802 void CTar::Close(void)
803 {
804 x_Close(x_Flush());
805 }
806
807 inline
Append(const string & name)808 unique_ptr<CTar::TEntries> CTar::Append(const string& name)
809 {
810 x_Open(eAppend);
811 return x_Append(name);
812 }
813
814 inline
Append(const CTarUserEntryInfo & entry,CNcbiIstream & is)815 unique_ptr<CTar::TEntries> CTar::Append(const CTarUserEntryInfo& entry,
816 CNcbiIstream& is)
817 {
818 x_Open(eAppend);
819 return x_Append(entry, is);
820 }
821
822 inline
Update(const string & name)823 unique_ptr<CTar::TEntries> CTar::Update(const string& name)
824 {
825 x_Open(eUpdate);
826 return x_Append(name, x_ReadAndProcess(eUpdate).get());
827 }
828
829 inline
List(void)830 unique_ptr<CTar::TEntries> CTar::List(void)
831 {
832 x_Open(eList);
833 return x_ReadAndProcess(eList);
834 }
835
836 inline
Test(void)837 void CTar::Test(void)
838 {
839 x_Open(eTest);
840 x_ReadAndProcess(eTest);
841 }
842
843 inline
GetFlags(void) const844 CTar::TFlags CTar::GetFlags(void) const
845 {
846 return m_Flags;
847 }
848
849 inline
SetFlags(TFlags flags)850 void CTar::SetFlags(TFlags flags)
851 {
852 m_Flags = flags;
853 }
854
GetCurrentPosition(void) const855 inline Uint8 CTar::GetCurrentPosition(void) const
856 {
857 return m_StreamPos;
858 }
859
860 inline
GetBaseDir(void) const861 const string& CTar::GetBaseDir(void) const
862 {
863 return m_BaseDir;
864 }
865
866
867 END_NCBI_SCOPE
868
869
870 /* @} */
871
872
873 #endif /* UTIL_COMPRESS__TAR__HPP */
874