1 #ifndef UTIL_COMPRESS__ARCHIVE__HPP 2 #define UTIL_COMPRESS__ARCHIVE__HPP 3 4 /* $Id: archive.hpp 534859 2017-05-03 12:47:35Z ivanov $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Authors: Vladimir Ivanov 30 * 31 * File Description: 32 * Compression archive API. 33 * 34 */ 35 36 /// @file archive.hpp 37 /// Archive API. 38 39 #include <util/compress/archive_.hpp> 40 41 42 /** @addtogroup Compression 43 * 44 * @{ 45 */ 46 47 BEGIN_NCBI_SCOPE 48 49 50 // Forward declarations 51 class CArchiveZip; 52 53 54 ////////////////////////////////////////////////////////////////////////////// 55 /// 56 /// CArchive - base class for file- or memory-based archives. 57 /// 58 /// Do not use it directly, use CArchiveFile or CArchiveMemory instead. 59 /// Throws exceptions on errors. 60 61 class NCBI_XUTIL_EXPORT CArchive 62 { 63 public: 64 // Type definitions 65 typedef CCompression::ELevel ELevel; 66 67 /// Archive formats 68 enum EFormat { 69 eZip 70 }; 71 72 /// General flags 73 enum EFlags { 74 // --- Extract --- (fUpdate also applies to Update) 75 /// Allow to overwrite destinations with entries from the archive 76 fOverwrite = (1<<3), 77 /// Only update entries that are older than those already existing 78 fUpdate = (1<<4) | fOverwrite, 79 /// Backup destinations if they exist (all entries including dirs) 80 fBackup = (1<<5) | fOverwrite, 81 /// If destination entry exists, it must have the same type as source 82 fEqualTypes = (1<<6), 83 /// Create extracted files with the original ownership 84 fPreserveOwner = (1<<7), 85 /// Create extracted files with the original permissions 86 fPreserveMode = (1<<8), 87 /// Preserve date/times for extracted files 88 /// Note, that some formats, as zip for example, store modification 89 /// time only, so creation and last access time will be the same as 90 /// modification time. And even it can be a little bit off due a 91 /// rounding errors. 92 fPreserveTime = (1<<9), 93 /// Preserve all file attributes 94 fPreserveAll = fPreserveOwner | fPreserveMode | fPreserveTime, 95 // --- Extract/Append/Update --- 96 /// Follow symbolic links (instead of storing/extracting them) 97 fFollowLinks = (1<<2), 98 // --- Extract/List/Append/Update --- 99 /// Skip unsupported entries rather than making files out of them 100 /// when extracting (the latter is the default POSIX requirement). 101 /// On adding entry of unsupported type to the archive it will be 102 /// skipping also. 103 fSkipUnsupported = (1<<15), 104 // --- Miscellaneous --- 105 /// Default flags 106 fDefault = fOverwrite | fPreserveAll 107 }; 108 typedef unsigned int TFlags; ///< Bitwise OR of EFlags 109 110 /// Define a list of entries. 111 typedef list<CArchiveEntryInfo> TEntries; 112 113 114 //------------------------------------------------------------------------ 115 // Constructors 116 //------------------------------------------------------------------------ 117 118 protected: 119 /// Construct an archive object of specified format. 120 /// 121 /// Declared as protected to avoid direct usage. 122 /// Use derived classes CArchiveFile or CArchiveMemory instead. 123 /// @param format 124 /// Archive format 125 /// @sa 126 /// CArchiveFile, CArchiveMemory 127 CArchive(EFormat format); 128 129 public: 130 /// Destructor 131 /// 132 /// Close the archive if currently open. 133 /// @sa 134 /// Close 135 virtual ~CArchive(void); 136 137 //------------------------------------------------------------------------ 138 // Main functions 139 //------------------------------------------------------------------------ 140 141 /// Create a new empty archive. 142 /// 143 /// @sa 144 /// Append 145 virtual void Create(void); 146 147 /// Close the archive making sure all pending output is flushed. 148 /// 149 /// @sa 150 /// ~CArchive 151 virtual void Close(void); 152 153 /// Get information about archive entries. 154 /// 155 /// @return 156 /// An array containing information on those archive entries, whose 157 /// names match the preset mask if any, or all entries otherwise. 158 /// @sa 159 /// SetMask 160 virtual unique_ptr<TEntries> List(void); 161 162 /// Verify archive integrity. 163 /// 164 /// Read through the archive without actually extracting anything from it. 165 /// Test all archive entries, whose names match the preset mask. 166 /// @return 167 /// An array containing information on those archive entries, whose 168 /// names match the preset mask if any, or all entries otherwise. 169 /// @sa 170 /// SetMask 171 virtual unique_ptr<TEntries> Test(void); 172 173 /// Extract the entire archive. 174 /// 175 /// Extract all archive entries, whose names match the preset mask. 176 /// Entries will be extracted into either current directory or 177 /// a directory otherwise specified by SetBaseDir(). 178 /// @return 179 /// A list of entries that have been actually extracted. 180 /// @sa 181 /// SetMask, SetBaseDir 182 virtual unique_ptr<TEntries> Extract(void); 183 184 /// Extract single file entry to a memory buffer. 185 /// 186 /// @param info 187 /// [in] Entry to extract. 188 /// @param buf 189 /// [in] Memory buffer for extracted data. 190 /// @param buf_size 191 /// [in] Size of memory buffer. 192 /// @param out_size 193 /// [out] Size of extracted data in the buffer. 194 /// @note 195 /// The buffer size should be big enough to fit whole extracted file. 196 /// @sa 197 /// ExtractFileToHeap, EctractFileToCallback, CArchiveEntryInfo::GetSize, List 198 virtual void ExtractFileToMemory(const CArchiveEntryInfo& info, 199 void* buf, size_t buf_size, 200 size_t* /*out*/ out_size); 201 202 /// Extract single file entry to a dynamically allocated memory buffer. 203 /// 204 /// @param info 205 /// [in] Entry to extract. 206 /// @param buf_ptr 207 /// [out] Pointer to an allocated memory buffer. 208 /// @param buf_size_ptr 209 /// [out] Size of allocated memory buffer, it is equal to the size of extracted data. 210 /// @note 211 /// Do not forget to deallocate memory buffer after usage. 212 /// Use free() or AutoPtr<char, CDeleter<char>>. 213 /// @sa 214 /// ExtractFileToMemory, EctractFileToCallback, CArchiveEntryInfo::GetSize, List 215 virtual void ExtractFileToHeap(const CArchiveEntryInfo& info, 216 void** buf_ptr, size_t* buf_size_ptr); 217 218 /// Extract single file entry using user-defined callback. 219 /// 220 /// @param info 221 /// [in] Entry to extract. 222 /// @param callback 223 /// [in] User callback for processing extracted data on the fly. 224 /// @sa 225 /// ExtractFileToMemory, EctractFileToHeap, CArchiveEntryInfo::GetSize, List 226 virtual void ExtractFileToCallback(const CArchiveEntryInfo& info, 227 IArchive::Callback_Write callback); 228 229 /// Append an entry to the archive. 230 /// 231 /// Appended entry can be either a file, directory, symbolic link or etc. 232 /// Each archive format have its own list of supported entry types. 233 /// The name is taken with respect to the base directory, if any set. 234 /// Adding a directory results in all its files and subdirectories to 235 /// get added (examine the return value to find out what has been added). 236 /// The names of all appended entries will be converted to Unix format 237 /// (only forward slashes in the paths, and drive letter, if any on 238 /// MS-Windows, stripped). 239 /// @param path 240 /// Path to appended entry. 241 /// @param level 242 /// Compression level (if selected format support it, or default). 243 /// @param comment 244 /// Optional comment for the appended entry (if selected format support it). 245 /// For directories the comment will be added to upper directory only. 246 /// @return 247 /// A list of entries appended. 248 /// @note 249 /// On the current moment you can use this method to add files to newly 250 /// created archive only, modifying existed archive is not allowed. 251 /// @sa 252 /// Create, AppendFileFromMemory, SetBaseDir, HaveSupport, Update 253 virtual unique_ptr<TEntries> Append(const string& path, 254 ELevel level = CCompression::eLevel_Default, 255 const string& comment = kEmptyStr); 256 257 /// Append a single file entry to the created archive using data from memory buffer. 258 /// 259 /// These function assign the current local time to added entry. 260 /// @param name_in_archive 261 /// Name of the file entry in the archive. You can use any symbols, 262 /// that allowed for file names. Also, you can use relative path here, 263 /// if you want to create some structure in the archive and put the data 264 /// to a file in the subdirectory. 265 /// @param buf 266 /// Buffer with data to add. 267 /// @param buf_size 268 /// Size of data in the buffer. 269 /// @param level 270 /// Compression level (if selected format support it, or default). 271 /// @param comment 272 /// Optional comment for the appended entry (if selected format support it). 273 /// @return 274 /// An information about added entry. 275 /// @note 276 /// On the current moment you can use this method to add files to newly 277 /// created archive only, modification existing archive is not allowed. 278 /// @sa 279 /// Create, Append 280 virtual unique_ptr<CArchive::TEntries> 281 AppendFileFromMemory(const string& name_in_archive, 282 void* buf, size_t buf_size, 283 ELevel level = CCompression::eLevel_Default, 284 const string& comment = kEmptyStr); 285 286 //------------------------------------------------------------------------ 287 // Utility functions 288 //------------------------------------------------------------------------ 289 290 /// Get flags. GetFlags(void) const291 virtual TFlags GetFlags(void) const { return m_Flags; } 292 /// Set flags. SetFlags(TFlags flags)293 virtual void SetFlags(TFlags flags) { m_Flags = flags; } 294 295 /// Get base directory to use for files while extracting from/adding to 296 /// the archive, and in the latter case used only for relative paths. 297 /// @sa 298 /// SetBaseDir GetBaseDir(void) const299 virtual const string& GetBaseDir(void) const { return m_BaseDir; } 300 301 /// Set base directory to use for files while extracting from/adding to 302 /// the archive, and in the latter case used only for relative paths. 303 /// @sa 304 /// GetBaseDir 305 virtual void SetBaseDir(const string& dirname); 306 307 /// Mask type enumerator. 308 /// @enum eFullPathMask 309 /// CMask can select both inclusions and exclusions (in this order) of 310 /// fully-qualified archive entries. Whole entry name will be matched. 311 /// It always use Unix format for path matching, so please use forward 312 /// slash "/" as directory delimiter in the masks. 313 /// @enum ePatternMask 314 /// CMask can select both inclusions and exclusions (in this order) of 315 /// patterns of archive entries. If eFullMask use the full path 316 /// to match mask, that ePatternMask allow to match each path component, 317 /// including names for each subdirectory and/or file name. This type 318 /// of mask is used only if eFullMask matches or not specified. 319 enum EMaskType { 320 eFullPathMask, 321 ePatternMask 322 }; 323 324 /// Set name mask for processing. 325 /// 326 /// The set of masks is used to process existing entries in the archive, 327 /// and apply to list, extract and append operations. 328 /// If masks are not defined then all archive entries will be processed. 329 /// Each "mask" is a set of inclusion and exclusion patterns, each of them 330 /// can be a wildcard file mask or regular expression. 331 /// @param mask 332 /// Set of masks (NULL unset the current set without setting a new one). 333 /// @param own 334 /// Whether to take ownership on the mask (delete upon CArchive destruction). 335 /// @param type 336 /// Type of the mask. You can set two types of masks at the same time. 337 /// The mask with type eFullPathMask applies to whole path name. 338 /// The mask with type ePatternMask applies to each path component, to all 339 /// subdirectories or file name, and if one of them matches, the entry 340 /// will be processed. If masks for both types are set, the entry will 341 /// be processed if it matches for each of them. 342 /// @sa 343 /// UnsetMask, CMaskFileName, CMaskRegexp 344 /// @note 345 /// Unset mask means wildcard processing (all entries match). 346 void SetMask(CMask* mask, 347 EOwnership own = eNoOwnership, 348 EMaskType type = eFullPathMask, 349 NStr::ECase acase = NStr::eNocase); 350 351 /// Unset name mask for processing. 352 /// 353 /// @sa 354 /// SetMask 355 /// @note 356 /// Unset mask means wildcard processing (all entries match). 357 void UnsetMask(EMaskType type); 358 void UnsetMask(void); 359 360 /// Support check enumerator. 361 /// 362 /// Use HaveSupport() to check that current archive format have support for 363 /// specific feature. 364 /// @enum eType 365 /// Check that archive can store entries with specific directory entry type. 366 /// @enum eAbsolutePath 367 /// Archive can store full absolute path entries. Otherwise they will 368 /// be converted to relative path from root directory. 369 /// @sa HaveSupport 370 enum ESupport { 371 eType, 372 eAbsolutePath 373 }; 374 375 /// Check that current archive format have support for specific features. 376 /// 377 /// @param feature 378 /// Name of the feature to check. 379 /// @param param 380 /// Additional parameter (for eType only). 381 /// @sa ESupport 382 bool HaveSupport(ESupport feature, int param = 0); 383 384 protected: 385 /// Archive open mode 386 enum EOpenMode { 387 eNone = 0, 388 eRO = 1, 389 eWO = 2, 390 eRW = eRO | eWO 391 }; 392 393 /// Action, performed on the archive 394 enum EAction { 395 eUndefined = eNone, 396 eCreate = (1 << 8) | eWO, 397 eAppend = (1 << 9) | eWO, 398 eList = (1 << 10) | eRO, 399 eUpdate = eList | eAppend, 400 eExtract = (1 << 11) | eRO, 401 eTest = eList | eExtract 402 }; 403 404 /// Mask storage 405 struct SMask { 406 CMask* mask; 407 NStr::ECase acase; 408 EOwnership owned; SMaskCArchive::SMask409 SMask(void) 410 : mask(0), acase(NStr::eNocase), owned(eNoOwnership) 411 {} 412 }; 413 414 protected: 415 //------------------------------------------------------------------------ 416 // User-redefinable callback 417 //------------------------------------------------------------------------ 418 419 /// Return false to skip the current entry when processing. 420 /// 421 /// Note that the callback can encounter multiple entries of the same file 422 /// in case the archive has been updated (so only the last occurrence is 423 /// the actual copy of the file when extracted). Checkpoint(const CArchiveEntryInfo &,EAction)424 virtual bool Checkpoint(const CArchiveEntryInfo& /*current*/, EAction /*action*/) 425 { return true; } 426 427 protected: 428 //------------------------------------------------------------------------ 429 // Redefinable methods for inherited classes 430 //------------------------------------------------------------------------ 431 432 /// Open archive. 433 virtual void Open(EAction action) = 0; 434 /// Process current entry (List/Test/Extract/Append) 435 virtual void SkipEntry (void); 436 virtual void TestEntry (void); 437 virtual void ExtractEntry(const CDirEntry& dst); 438 virtual void AppendEntry (const string& path, ELevel level); 439 440 protected: 441 //------------------------------------------------------------------------ 442 // Internal processing methods 443 //------------------------------------------------------------------------ 444 445 // Open archive. 446 // Wrapper around Open() that perform all necessary checks and processing. 447 void x_Open(EAction action); 448 449 // Read the archive and do the requested "action" on current entry. 450 unique_ptr<TEntries> x_ReadAndProcess(EAction action); 451 452 // Append an entry from the file system to the archive. 453 unique_ptr<TEntries> x_Append(const string& path, 454 ELevel level, 455 const string& comment, 456 const TEntries* toc = NULL); 457 // Append a single entry from the file system to the archive. 458 // Wrapper around AppendEntry(). 459 // Return FALSE if entry should be skipped (via user Checkpoint()). 460 bool x_AppendEntry(const string& path, ELevel level = CCompression::eLevel_Default); 461 462 // Extract current entry. 463 // Wrapper around ExtractEntry() that perform all necessary checks and 464 // flags processing. 465 void x_ExtractEntry(const TEntries* prev_entries); 466 467 // Restore attributes of an entry in the file system. 468 // If "dst" is not specified, then the destination path will be 469 // constructed from "info", and the base directory (if any). Otherwise, 470 // "dst" will be used "as is", assuming it corresponds to "info". 471 void x_RestoreAttrs(const CArchiveEntryInfo& info, 472 const CDirEntry* dst = NULL) const; 473 474 protected: 475 unique_ptr<IArchive> m_Archive; ///< Pointer to interface to EFormat-specific archive support 476 EFormat m_Format; ///< Archive format 477 IArchive::ELocation m_Location; ///< Archive location (file/memory) 478 TFlags m_Flags; ///< Bitwise OR of flags 479 string m_BaseDir; ///< Base directory for relative paths 480 CArchiveEntryInfo m_Current; ///< Information about current entry being processed 481 SMask m_MaskFullPath; ///< Set of masks for operations (full path) 482 SMask m_MaskPattern; ///< Set of masks for operations (path components) 483 EOpenMode m_OpenMode; ///< What was it opened for 484 bool m_Modified; ///< True after at least one write 485 486 protected: 487 // Prohibit assignment and copy 488 CArchive& operator=(const CArchive&); 489 CArchive(const CArchive&); 490 }; 491 492 493 494 ////////////////////////////////////////////////////////////////////////////// 495 /// 496 /// CArchiveFile -- file-based archive. 497 /// 498 /// Throws exceptions on errors. 499 500 class NCBI_XUTIL_EXPORT CArchiveFile : public CArchive 501 { 502 public: 503 /// Constructor for file-based archive. 504 /// 505 /// @param format 506 /// Archive format. 507 /// @param filename 508 /// Path to archive file name. 509 /// Note, that directory in that archive file will be create should exists. 510 /// @sa 511 /// Create, Extract, List, Test, Append 512 CArchiveFile(EFormat format, const string& filename); 513 514 protected: 515 /// Open the archive for specified action. 516 virtual void Open(EAction action); 517 518 protected: 519 string m_FileName; ///< Archive file name 520 521 private: 522 // Prohibit assignment and copy 523 CArchiveFile& operator=(const CArchiveFile&); 524 CArchiveFile(const CArchiveFile&); 525 }; 526 527 528 529 ////////////////////////////////////////////////////////////////////////////// 530 /// 531 /// CArchiveMemory -- memory-based archive. 532 /// 533 /// Throws exceptions on errors. 534 535 class NCBI_XUTIL_EXPORT CArchiveMemory : public CArchive 536 { 537 public: 538 /// Constructor for memory-based archive. 539 /// 540 /// @param format 541 /// Archive format. 542 /// @param buf 543 /// Pointer to an archive located in memory. Used only to open already 544 /// existed archive for reading. Never used if you would like to create 545 /// new archive, see Create(). 546 /// @param buf_size 547 /// Size of the archive. 548 /// @sa 549 /// Create, Extract, List, Test, Append 550 CArchiveMemory(EFormat format, const void* buf = NULL, size_t buf_size = 0); 551 552 /// Create a new empty archive in memory. 553 /// 554 /// @param initial_allocation_size 555 /// Estimated size of the archive, if known. 556 /// Bigger size allow to avoid extra memory reallocations. 557 /// @sa 558 /// Append, Finalize 559 virtual void Create(size_t initial_allocation_size); 560 virtual void Create(void); 561 562 /// Save current opened/created archive to file. 563 /// 564 /// @param filename 565 /// Path to the archive file name. The directory in that archive 566 /// file will be create should exists. If destination file 567 /// already exists, it will be overwritten. 568 /// @note 569 /// Newly created archive should be finalized first. 570 /// @sa 571 /// Create, Finalize, Load 572 void Save(const string& filename); 573 574 /// Load existing archive from file system to memory. 575 /// 576 /// @param filename 577 /// Path to the existing archive. 578 /// @note 579 /// If you have opened or created archive, it will be automatically closed. 580 /// @sa 581 /// Open, Save 582 void Load(const string& filename); 583 584 /// Finalize the archive created in memory. 585 /// 586 /// Return pointer to a buffer with created archive and its size. 587 /// After this call you cannot write to archive anymore, but you can 588 /// read from it. Returning pointer to buffer and its size also 589 /// will be saved internally and used for opening archive for reading 590 /// (see constructor). 591 /// @param buf_ptr 592 /// Pointer to an archive located in memory. 593 /// @param buf_size_ptr 594 /// Size of the newly created archive. 595 /// @note 596 /// Do not forget to deallocate memory buffer after usage. 597 /// Use free() or AutoPtr<char, CDeleter<char>>. 598 /// @sa 599 /// Create, Close 600 virtual void Finalize(void** buf_ptr, size_t* buf_size_ptr); 601 602 protected: 603 /// Open the archive for specified action. 604 virtual void Open(EAction action); 605 606 protected: 607 // Open 608 const void* m_Buf; ///< Buffer where the opening archive is located 609 size_t m_BufSize; ///< Size of m_Buf 610 /// Holder for the pointer to memory buffer that will be automatically 611 /// deallocated if we own it (used for Load() only). 612 /// m_Buf will have the same pointer value. 613 AutoArray<char> m_OwnBuf; 614 // Create 615 ///< Initial allocation size for created archive 616 size_t m_InitialAllocationSize; 617 618 private: 619 // Prohibit assignment and copy 620 CArchiveMemory& operator=(const CArchiveMemory&); 621 CArchiveMemory(const CArchiveMemory&); 622 }; 623 624 625 END_NCBI_SCOPE 626 627 628 /* @} */ 629 630 631 #endif /* UTIL_COMPRESS__ARCHIVE__HPP */ 632