1 /* $Id: tar.cpp 619589 2020-11-06 17:59:43Z lavr $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Vladimir Ivanov
27  *           Anton Lavrentiev
28  *
29  * File Description:
30  *   Tar archive API.
31  *
32  *   Supports subsets of POSIX.1-1988 (ustar), POSIX 1003.1-2001 (posix), old
33  *   GNU (POSIX 1003.1), and V7 formats (all partially but reasonably).  New
34  *   archives are created using POSIX (genuine ustar) format, using GNU
35  *   extensions for long names/links only when unavoidable.  It cannot,
36  *   however, handle all the exotics like sparse files (except for GNU/1.0
37  *   sparse PAX extension) and contiguous files (yet still can work around both
38  *   of them gracefully, if needed), multivolume / incremental archives, etc.
39  *   but just regular files, devices (character or block), FIFOs, directories,
40  *   and limited links:  can extract both hard- and symlinks, but can store
41  *   symlinks only.  Also, this implementation is only minimally PAX(Portable
42  *   Archive eXchange)-aware for file extractions (and does not yet use any PAX
43  *   extensions to store the files).
44  *
45  */
46 
47 #include <ncbi_pch.hpp>
48 // Cancel __wur (warn unused result) ill effects in GCC
49 #ifdef   _FORTIFY_SOURCE
50 #  undef _FORTIFY_SOURCE
51 #endif /*_FORTIFY_SOURCE*/
52 #define  _FORTIFY_SOURCE 0
53 #include <util/compress/tar.hpp>
54 #include <util/error_codes.hpp>
55 
56 #if !defined(NCBI_OS_UNIX)  &&  !defined(NCBI_OS_MSWIN)
57 #  error "Class CTar can be defined on UNIX and MS-Windows platforms only!"
58 #endif
59 
60 #if   defined(NCBI_OS_UNIX)
61 #  include "../../../corelib/ncbi_os_unix_p.hpp"
62 #  include <grp.h>
63 #  include <pwd.h>
64 #  include <unistd.h>
65 #  ifdef NCBI_OS_IRIX
66 #    include <sys/mkdev.h>
67 #  endif //NCBI_OS_IRIX
68 #  ifdef HAVE_SYS_SYSMACROS_H
69 #    include <sys/sysmacros.h>
70 #  endif //HAVE_SYS_SYSMACROS_H
71 #  ifdef NCBI_OS_DARWIN
72 // macOS supplies these as inline functions rather than macros.
73 #    define major major
74 #    define minor minor
75 #    define makedev makedev
76 #  endif
77 #  if !defined(major)  ||  !defined(minor)  ||  !defined(makedev)
78 #    error "Device macros undefined in this UNIX build!"
79 #  endif
80 #elif defined(NCBI_OS_MSWIN)
81 #  include "../../../corelib/ncbi_os_mswin_p.hpp"
82 #  include <io.h>
83 typedef unsigned int mode_t;
84 typedef unsigned int uid_t;
85 typedef unsigned int gid_t;
86 #endif //NCBI_OS...
87 
88 
89 #define NCBI_USE_ERRCODE_X  Util_Compress
90 #define NCBI_MODULE         NCBITAR
91 
92 
93 BEGIN_NCBI_SCOPE
94 
95 
96 /////////////////////////////////////////////////////////////////////////////
97 //
98 // TAR helper routines
99 //
100 
101 // Convert a number to an octal string padded to the left
102 // with [leading] zeros ('0') and having _no_ trailing '\0'.
s_NumToOctal(Uint8 val,char * ptr,size_t len)103 static bool s_NumToOctal(Uint8 val, char* ptr, size_t len)
104 {
105     _ASSERT(len > 0);
106     do {
107         ptr[--len] = char('0' + char(val & 7));
108         val >>= 3;
109     } while (len);
110     return val ? false : true;
111 }
112 
113 
114 // Convert an octal number (possibly preceded by spaces) to numeric form.
115 // Stop either at the end of the field or at first '\0' (if any).
s_OctalToNum(Uint8 & val,const char * ptr,size_t len)116 static bool s_OctalToNum(Uint8& val, const char* ptr, size_t len)
117 {
118     _ASSERT(ptr  &&  len > 0);
119     size_t i = *ptr ? 0 : 1;
120     while (i < len  &&  ptr[i]) {
121         if (!isspace((unsigned char) ptr[i]))
122             break;
123         ++i;
124     }
125     val = 0;
126     bool okay = false;
127     while (i < len  &&  '0' <= ptr[i]  &&  ptr[i] <= '7') {
128         okay  = true;
129         val <<= 3;
130         val  |= ptr[i++] - '0';
131     }
132     while (i < len  &&  ptr[i]) {
133         if (!isspace((unsigned char) ptr[i]))
134             return false;
135         ++i;
136     }
137     return okay;
138 }
139 
140 
s_NumToBase256(Uint8 val,char * ptr,size_t len)141 static bool s_NumToBase256(Uint8 val, char* ptr, size_t len)
142 {
143     _ASSERT(len > 0);
144     do {
145         ptr[--len] = (unsigned char)(val & 0xFF);
146         val >>= 8;
147     } while (len);
148     *ptr |= '\x80';  // set base-256 encoding flag
149     return val ? false : true;
150 }
151 
152 
153 // Return 0 (false) if conversion failed; 1 if the value converted to
154 // conventional octal representation (perhaps, with terminating '\0'
155 // sacrificed), or -1 if the value converted using base-256.
s_EncodeUint8(Uint8 val,char * ptr,size_t len)156 static int s_EncodeUint8(Uint8 val, char* ptr, size_t len)
157 {                                           // Max file size (for len == 12):
158     if (s_NumToOctal  (val, ptr,   len)) {  //   8GiB-1
159         return  1/*okay*/;
160     }
161     if (s_NumToOctal  (val, ptr, ++len)) {  //   64GiB-1
162         return  1/*okay*/;
163     }
164     if (s_NumToBase256(val, ptr,   len)) {  //   up to 2^94-1
165         return -1/*okay, base-256*/;
166     }
167     return 0/*failure*/;
168 }
169 
170 
171 // Return true if conversion succeeded;  false otherwise.
s_Base256ToNum(Uint8 & val,const char * ptr,size_t len)172 static bool s_Base256ToNum(Uint8& val, const char* ptr, size_t len)
173 {
174     const Uint8 lim = kMax_UI8 >> 8;
175     if (*ptr & '\x40') {  // negative base-256?
176         return false;
177     }
178     val = *ptr++ & '\x3F';
179     while (--len) {
180         if (val > lim) {
181             return false;
182         }
183         val <<= 8;
184         val  |= (unsigned char)(*ptr++);
185     }
186     return true;
187 }
188 
189 
190 // Return 0 (false) if conversion failed; 1 if the value was read into
191 // as a conventional octal string (perhaps, without the terminating '\0');
192 // or -1 if base-256 representation used.
s_DecodeUint8(Uint8 & val,const char * ptr,size_t len)193 static int s_DecodeUint8(Uint8& val, const char* ptr, size_t len)
194 {
195     if (*ptr & '\x80') {
196         return s_Base256ToNum(val, ptr, len) ? -1/*okay*/ : 0/*failure*/;
197     } else {
198         return s_OctalToNum  (val, ptr, len) ?  1/*okay*/ : 0/*failure*/;
199     }
200 }
201 
202 
s_TarToMode(TTarMode perm,CDirEntry::TMode * usr_mode,CDirEntry::TMode * grp_mode,CDirEntry::TMode * oth_mode,CDirEntry::TSpecialModeBits * special_bits)203 static void s_TarToMode(TTarMode                     perm,
204                         CDirEntry::TMode*            usr_mode,
205                         CDirEntry::TMode*            grp_mode,
206                         CDirEntry::TMode*            oth_mode,
207                         CDirEntry::TSpecialModeBits* special_bits)
208 {
209     // User
210     if (usr_mode) {
211         *usr_mode = ((perm & fTarURead    ? CDirEntry::fRead    : 0) |
212                      (perm & fTarUWrite   ? CDirEntry::fWrite   : 0) |
213                      (perm & fTarUExecute ? CDirEntry::fExecute : 0));
214     }
215 
216     // Group
217     if (grp_mode) {
218         *grp_mode = ((perm & fTarGRead    ? CDirEntry::fRead    : 0) |
219                      (perm & fTarGWrite   ? CDirEntry::fWrite   : 0) |
220                      (perm & fTarGExecute ? CDirEntry::fExecute : 0));
221     }
222 
223     // Others
224     if (oth_mode) {
225         *oth_mode = ((perm & fTarORead    ? CDirEntry::fRead    : 0) |
226                      (perm & fTarOWrite   ? CDirEntry::fWrite   : 0) |
227                      (perm & fTarOExecute ? CDirEntry::fExecute : 0));
228     }
229 
230     // Special bits
231     if (special_bits) {
232         *special_bits = ((perm & fTarSetUID ? CDirEntry::fSetUID : 0) |
233                          (perm & fTarSetGID ? CDirEntry::fSetGID : 0) |
234                          (perm & fTarSticky ? CDirEntry::fSticky : 0));
235     }
236 }
237 
238 
s_TarToMode(TTarMode perm)239 static mode_t s_TarToMode(TTarMode perm)
240 {
241     mode_t mode = (
242 #ifdef S_ISUID
243                    (perm & fTarSetUID   ? S_ISUID  : 0) |
244 #endif
245 #ifdef S_ISGID
246                    (perm & fTarSetGID   ? S_ISGID  : 0) |
247 #endif
248 #ifdef S_ISVTX
249                    (perm & fTarSticky   ? S_ISVTX  : 0) |
250 #endif
251 #if   defined(S_IRUSR)
252                    (perm & fTarURead    ? S_IRUSR  : 0) |
253 #elif defined(S_IREAD)
254                    (perm & fTarURead    ? S_IREAD  : 0) |
255 #endif
256 #if   defined(S_IWUSR)
257                    (perm & fTarUWrite   ? S_IWUSR  : 0) |
258 #elif defined(S_IWRITE)
259                    (perm & fTarUWrite   ? S_IWRITE : 0) |
260 #endif
261 #if   defined(S_IXUSR)
262                    (perm & fTarUExecute ? S_IXUSR  : 0) |
263 #elif defined(S_IEXEC)
264                    (perm & fTarUExecute ? S_IEXEC  : 0) |
265 #endif
266 #ifdef S_IRGRP
267                    (perm & fTarGRead    ? S_IRGRP  : 0) |
268 #endif
269 #ifdef S_IWGRP
270                    (perm & fTarGWrite   ? S_IWGRP  : 0) |
271 #endif
272 #ifdef S_IXGRP
273                    (perm & fTarGExecute ? S_IXGRP  : 0) |
274 #endif
275 #ifdef S_IROTH
276                    (perm & fTarORead    ? S_IROTH  : 0) |
277 #endif
278 #ifdef S_IWOTH
279                    (perm & fTarOWrite   ? S_IWOTH  : 0) |
280 #endif
281 #ifdef S_IXOTH
282                    (perm & fTarOExecute ? S_IXOTH  : 0) |
283 #endif
284                    0);
285     return mode;
286 }
287 
288 
s_ModeToTar(mode_t mode)289 static TTarMode s_ModeToTar(mode_t mode)
290 {
291     // Keep in mind that the mode may be extracted on a different platform
292     TTarMode perm = (
293 #ifdef S_ISUID
294                      (mode & S_ISUID  ? fTarSetUID   : 0) |
295 #endif
296 #ifdef S_ISGID
297                      (mode & S_ISGID  ? fTarSetGID   : 0) |
298 #endif
299 #ifdef S_ISVTX
300                      (mode & S_ISVTX  ? fTarSticky   : 0) |
301 #endif
302 #if   defined(S_IRUSR)
303                      (mode & S_IRUSR  ? fTarURead    : 0) |
304 #elif defined(S_IREAD)
305                      (mode & S_IREAD  ? fTarURead    : 0) |
306 #endif
307 #if   defined(S_IWUSR)
308                      (mode & S_IWUSR  ? fTarUWrite   : 0) |
309 #elif defined(S_IWRITE)
310                      (mode & S_IWRITE ? fTarUWrite   : 0) |
311 #endif
312 #if   defined(S_IXUSR)
313                      (mode & S_IXUSR  ? fTarUExecute : 0) |
314 #elif defined(S_IEXEC)
315                      (mode & S_IEXEC  ? fTarUExecute : 0) |
316 #endif
317 #if   defined(S_IRGRP)
318                      (mode & S_IRGRP  ? fTarGRead    : 0) |
319 #elif defined(S_IREAD)
320                      // emulate read permission when file is readable
321                      (mode & S_IREAD  ? fTarGRead    : 0) |
322 #endif
323 #ifdef S_IWGRP
324                      (mode & S_IWGRP  ? fTarGWrite   : 0) |
325 #endif
326 #ifdef S_IXGRP
327                      (mode & S_IXGRP  ? fTarGExecute : 0) |
328 #endif
329 #if   defined(S_IROTH)
330                      (mode & S_IROTH  ? fTarORead    : 0) |
331 #elif defined(S_IREAD)
332                      // emulate read permission when file is readable
333                      (mode & S_IREAD  ? fTarORead    : 0) |
334 #endif
335 #ifdef S_IWOTH
336                      (mode & S_IWOTH  ? fTarOWrite   : 0) |
337 #endif
338 #ifdef S_IXOTH
339                      (mode & S_IXOTH  ? fTarOExecute : 0) |
340 #endif
341                      0);
342 #if defined(S_IFMT)  ||  defined(_S_IFMT)
343     TTarMode mask = (TTarMode) mode;
344 #  ifdef S_IFMT
345     mask &=  S_IFMT;
346 #  else
347     mask &= _S_IFMT;
348 #  endif
349     if (!(mask & 07777)) {
350         perm |= mask;
351     }
352 #endif
353     return perm;
354 }
355 
356 
s_Length(const char * ptr,size_t maxsize)357 static size_t s_Length(const char* ptr, size_t maxsize)
358 {
359     const char* pos = (const char*) memchr(ptr, '\0', maxsize);
360     return pos ? (size_t)(pos - ptr) : maxsize;
361 }
362 
363 
364 //////////////////////////////////////////////////////////////////////////////
365 //
366 // Constants / macros / typedefs
367 //
368 
369 /// Round up to the nearest multiple of BLOCK_SIZE:
370 //#define ALIGN_SIZE(size)   SIZE_OF(BLOCK_OF(size + (BLOCK_SIZE-1)))
371 #define ALIGN_SIZE(size)  (((size) + (BLOCK_SIZE-1)) & ~(BLOCK_SIZE-1))
372 #define OFFSET_OF(size)   ( (size)                   &  (BLOCK_SIZE-1))
373 #define BLOCK_OF(pos)     ((pos) >> 9)
374 #define SIZE_OF(blk)      ((blk) << 9)
375 
376 /// Tar block size (512 bytes)
377 #define BLOCK_SIZE        SIZE_OF(1)
378 
379 
380 /// Recognized TAR formats
381 enum ETar_Format {
382     eTar_Unknown = 0,
383     eTar_Legacy  = 1,
384     eTar_OldGNU  = 2,
385     eTar_Ustar   = 4,
386     eTar_Posix   = 5,  // |= eTar_Ustar
387     eTar_Star    = 6   // |= eTar_Ustar
388 };
389 
390 
391 /// POSIX "ustar" tar archive member header
392 typedef struct STarHeader {   // byte offset
393     char name[100];           //   0
394     char mode[8];             // 100
395     char uid[8];              // 108
396     char gid[8];              // 116
397     char size[12];            // 124
398     char mtime[12];           // 136
399     char checksum[8];         // 148
400     char typeflag[1];         // 156
401     char linkname[100];       // 157
402     char magic[6];            // 257
403     char version[2];          // 263
404     char uname[32];           // 265
405     char gname[32];           // 297
406     char devmajor[8];         // 329
407     char devminor[8];         // 337
408     union {                   // 345
409         char prefix[155];     // NB: not valid with old GNU format (no need)
410         struct {              // NB:                old GNU format only
411             char atime[12];
412             char ctime[12];   // 357
413             char unused[17];  // 369
414             char sparse[96];  // 386 sparse map: ([12] offset + [12] size) x 4
415             char contind[1];  // 482 non-zero if continued in the next header
416             char realsize[12];// 483 true file size
417         } gnu;
418         struct {
419             char prefix[131]; // NB: prefix + 107: realsize (char[12]) for 'S'
420             char atime[12];   // 476
421             char ctime[12];   // 488
422         } star;
423     };                        // 500
424     // NCBI in last 4 bytes   // 508
425 } SHeader;
426 
427 
428 /// Block as a header.
429 union TBlock {
430     char    buffer[BLOCK_SIZE];
431     SHeader header;
432 };
433 
434 
s_TarChecksum(TBlock * block,bool isgnu)435 static bool s_TarChecksum(TBlock* block, bool isgnu)
436 {
437     SHeader* h = &block->header;
438     size_t len = sizeof(h->checksum) - (isgnu ? 2 : 1);
439 
440     // Compute the checksum
441     memset(h->checksum, ' ', sizeof(h->checksum));
442     unsigned long checksum = 0;
443     const unsigned char* p = (const unsigned char*) block->buffer;
444     for (size_t i = 0;  i < sizeof(block->buffer);  ++i) {
445         checksum += *p++;
446     }
447     // ustar:       '\0'-terminated checksum
448     // GNU special: 6 digits, then '\0', then a space [already in place]
449     if (!s_NumToOctal(checksum, h->checksum, len)) {
450         return false;
451     }
452     h->checksum[len] = '\0';
453     return true;
454 }
455 
456 
457 
458 //////////////////////////////////////////////////////////////////////////////
459 //
460 // CTarEntryInfo
461 //
462 
GetMode(void) const463 TTarMode CTarEntryInfo::GetMode(void) const
464 {
465     // Raw tar mode gets returned here (as kept in the info)
466     return (TTarMode)(m_Stat.orig.st_mode & 07777);
467 }
468 
469 
GetMode(CDirEntry::TMode * usr_mode,CDirEntry::TMode * grp_mode,CDirEntry::TMode * oth_mode,CDirEntry::TSpecialModeBits * special_bits) const470 void CTarEntryInfo::GetMode(CDirEntry::TMode*            usr_mode,
471                             CDirEntry::TMode*            grp_mode,
472                             CDirEntry::TMode*            oth_mode,
473                             CDirEntry::TSpecialModeBits* special_bits) const
474 {
475     s_TarToMode(GetMode(), usr_mode, grp_mode, oth_mode, special_bits);
476 }
477 
478 
GetMajor(void) const479 unsigned int CTarEntryInfo::GetMajor(void) const
480 {
481 #ifdef major
482     if (m_Type == eCharDev  ||  m_Type == eBlockDev) {
483         return major(m_Stat.orig.st_rdev);
484     }
485 #else
486     if (sizeof(int) >= 4  &&  sizeof(m_Stat.orig.st_rdev) >= 4) {
487         return (*((unsigned int*) &m_Stat.orig.st_rdev) >> 16) & 0xFFFF;
488     }
489 #endif //major
490     return (unsigned int)(-1);
491 }
492 
493 
GetMinor(void) const494 unsigned int CTarEntryInfo::GetMinor(void) const
495 {
496 #ifdef minor
497     if (m_Type == eCharDev  ||  m_Type == eBlockDev) {
498         return minor(m_Stat.orig.st_rdev);
499     }
500 #else
501     if (sizeof(int) >= 4  &&  sizeof(m_Stat.orig.st_rdev) >= 4) {
502         return *((unsigned int*) &m_Stat.orig.st_rdev) & 0xFFFF;
503     }
504 #endif //minor
505     return (unsigned int)(-1);
506 }
507 
508 
s_ModeAsString(TTarMode mode)509 static string s_ModeAsString(TTarMode mode)
510 {
511     char buf[9];
512     memset(buf, '-', sizeof(buf));
513 
514     char* usr = buf;
515     char* grp = usr + 3;
516     char* oth = grp + 3;
517 
518     if (mode & fTarURead) {
519         usr[0] = 'r';
520     }
521     if (mode & fTarUWrite) {
522         usr[1] = 'w';
523     }
524     if (mode & fTarUExecute) {
525         usr[2] = mode & fTarSetUID ? 's' : 'x';
526     } else if (mode & fTarSetUID) {
527         usr[2] = 'S';
528     }
529     if (mode & fTarGRead) {
530         grp[0] = 'r';
531     }
532     if (mode & fTarGWrite) {
533         grp[1] = 'w';
534     }
535     if (mode & fTarGExecute) {
536         grp[2] = mode & fTarSetGID ? 's' : 'x';
537     } else if (mode & fTarSetGID) {
538         grp[2] = 'S';
539     }
540     if (mode & fTarORead) {
541         oth[0] = 'r';
542     }
543     if (mode & fTarOWrite) {
544         oth[1] = 'w';
545     }
546     if (mode & fTarOExecute) {
547         oth[2] = mode & fTarSticky ? 't' : 'x';
548     } else if (mode & fTarSticky) {
549         oth[2] = 'T';
550     }
551 
552     return string(buf, sizeof(buf));
553 }
554 
555 
s_TypeAsChar(CTarEntryInfo::EType type)556 static char s_TypeAsChar(CTarEntryInfo::EType type)
557 {
558     switch (type) {
559     case CTarEntryInfo::eFile:
560     case CTarEntryInfo::eHardLink:
561         return '-';
562     case CTarEntryInfo::eSymLink:
563         return 'l';
564     case CTarEntryInfo::eDir:
565         return 'd';
566     case CTarEntryInfo::ePipe:
567         return 'p';
568     case CTarEntryInfo::eCharDev:
569         return 'c';
570     case CTarEntryInfo::eBlockDev:
571         return 'b';
572     case CTarEntryInfo::eVolHeader:
573         return 'V';
574     case CTarEntryInfo::eSparseFile:
575         return 'S';
576     default:
577         break;
578     }
579     return '?';
580 }
581 
582 
s_UserGroupAsString(const CTarEntryInfo & info)583 static string s_UserGroupAsString(const CTarEntryInfo& info)
584 {
585     string user(info.GetUserName());
586     if (user.empty()) {
587         NStr::UIntToString(user, info.GetUserId());
588     }
589     string group(info.GetGroupName());
590     if (group.empty()) {
591         NStr::UIntToString(group, info.GetGroupId());
592     }
593     return user + '/' + group;
594 }
595 
596 
s_MajorMinor(unsigned int n)597 static string s_MajorMinor(unsigned int n)
598 {
599     return n != (unsigned int)(-1) ? NStr::UIntToString(n) : string(1, '?');
600 }
601 
602 
s_SizeOrMajorMinor(const CTarEntryInfo & info)603 static string s_SizeOrMajorMinor(const CTarEntryInfo& info)
604 {
605     if (info.GetType() == CTarEntryInfo::eCharDev  ||
606         info.GetType() == CTarEntryInfo::eBlockDev) {
607         unsigned int major = info.GetMajor();
608         unsigned int minor = info.GetMinor();
609         return s_MajorMinor(major) + ',' + s_MajorMinor(minor);
610     } else if (info.GetType() == CTarEntryInfo::eDir      ||
611                info.GetType() == CTarEntryInfo::ePipe     ||
612                info.GetType() == CTarEntryInfo::eSymLink  ||
613                info.GetType() == CTarEntryInfo::eVolHeader) {
614         return string("-");
615     } else if (info.GetType() == CTarEntryInfo::eSparseFile  &&
616                info.GetSize() == 0) {
617         return string("?");
618     }
619     return NStr::NumericToString(info.GetSize());
620 }
621 
622 
operator <<(CNcbiOstream & os,const CTarEntryInfo & info)623 CNcbiOstream& operator << (CNcbiOstream& os, const CTarEntryInfo& info)
624 {
625     CTime mtime(info.GetModificationTime());
626     os << s_TypeAsChar(info.GetType())
627        << s_ModeAsString(info.GetMode())        << ' '
628        << setw(17) << s_UserGroupAsString(info) << ' '
629        << setw(10) << s_SizeOrMajorMinor(info)  << ' '
630        << mtime.ToLocalTime().AsString(" Y-M-D h:m:s ")
631        << info.GetName();
632     if (info.GetType() == CTarEntryInfo::eSymLink  ||
633         info.GetType() == CTarEntryInfo::eHardLink) {
634         os << " -> " << info.GetLinkName();
635     }
636     return os;
637 }
638 
639 
640 
641 //////////////////////////////////////////////////////////////////////////////
642 //
643 // Debugging utilities
644 //
645 
s_OSReason(int x_errno)646 static string s_OSReason(int x_errno)
647 {
648     static const char kUnknownError[] = "Unknown error";
649     const char* strerr;
650     char errbuf[80];
651     if (!x_errno)
652         return kEmptyStr;
653     strerr = ::strerror(x_errno);
654     if (!strerr  ||  !*strerr
655         ||  !NStr::strncasecmp(strerr,
656                                kUnknownError, sizeof(kUnknownError) - 1)) {
657         if (x_errno > 0) {
658             ::sprintf(errbuf, "Error %d", x_errno);
659         } else if (x_errno != -1) {
660             ::sprintf(errbuf, "Error 0x%08X", (unsigned int) x_errno);
661         } else {
662             ::strcpy (errbuf, "Unknown error (-1)");
663         }
664         strerr = errbuf;
665     }
666     _ASSERT(strerr  &&  *strerr);
667     return string(": ") + strerr;
668 }
669 
670 
s_PositionAsString(const string & file,Uint8 pos,size_t recsize,const string & entryname)671 static string s_PositionAsString(const string& file, Uint8 pos, size_t recsize,
672                                  const string& entryname)
673 {
674     _ASSERT(!OFFSET_OF(recsize));
675     _ASSERT(recsize >= BLOCK_SIZE);
676     string result;
677     if (!file.empty()) {
678         CDirEntry temp(file);
679         result = (temp.GetType() == CDirEntry::eFile ? temp.GetName() : file)
680             + ": ";
681     }
682     result += "At record " + NStr::NumericToString(pos / recsize);
683     if (recsize != BLOCK_SIZE) {
684         result +=
685             ", block " + NStr::NumericToString(BLOCK_OF(pos % recsize)) +
686             " [thru #" + NStr::NumericToString(BLOCK_OF(pos),
687                                                NStr::fWithCommas) + ']';
688     }
689     if (!entryname.empty()) {
690         result += ", while in '" + entryname + '\'';
691     }
692     return result + ":\n";
693 }
694 
695 
s_OffsetAsString(size_t offset)696 static string s_OffsetAsString(size_t offset)
697 {
698     char buf[20];
699     _ASSERT(offset < 1000);
700     _VERIFY(sprintf(buf, "%03u", (unsigned int) offset));
701     return buf;
702 }
703 
704 
memcchr(const char * s,char c,size_t len)705 static bool memcchr(const char* s, char c, size_t len)
706 {
707     for (size_t i = 0;  i < len;  ++i) {
708         if (s[i] != c)
709             return true;
710     }
711     return false;
712 }
713 
714 
s_Printable(const char * field,size_t maxsize,bool text)715 static string s_Printable(const char* field, size_t maxsize, bool text)
716 {
717     bool check = false;
718     if (!text  &&  maxsize > 1  &&  !*field) {
719         field++, maxsize--;
720         check = true;
721     }
722     size_t len = s_Length(field, maxsize);
723     string retval = NStr::PrintableString(CTempString(field,
724                                                       memcchr(field + len,
725                                                               '\0',
726                                                               maxsize - len)
727                                                       ? maxsize
728                                                       : len));
729     return check  &&  !retval.empty() ? "\\0" + retval : retval;
730 }
731 
732 
733 #if !defined(__GNUC__)  &&  !defined(offsetof)
734 #  define offsetof(T, F)  ((char*) &(((T*) 0)->F) - (char*) 0)
735 #endif
736 
737 
738 #define _STR(s)  #s
739 
740 #define TAR_PRINTABLE_EX(field, text, size)                             \
741     "@" + s_OffsetAsString((size_t) offsetof(SHeader, field)) +         \
742     "[" _STR(field) "]:" + string(14 - sizeof(_STR(field)), ' ') +      \
743     '"' + s_Printable(h->field, size, text  ||  ecxpt) + '"'
744 
745 #define TAR_PRINTABLE(field, text)                                      \
746     TAR_PRINTABLE_EX(field, text, sizeof(h->field))
747 
748 
749 #define TAR_GNU_REGION   "[gnu.region]:   "
750 #define TAR_GNU_CONTIND  "[gnu.contind]:  "
751 
s_DumpSparseMap(const SHeader * h,const char * sparse,const char * contind,bool ecxpt=false)752 static string s_DumpSparseMap(const SHeader* h, const char* sparse,
753                               const char* contind, bool ecxpt = false)
754 {
755     string dump;
756     size_t offset;
757     bool done = false;
758     string region(TAR_GNU_REGION);
759 
760     do {
761         if (memcchr(sparse, '\0', 24)) {
762             offset = (size_t)(sparse - (const char*) h);
763             if (!dump.empty())
764                 dump += '\n';
765             dump += '@' + s_OffsetAsString(offset);
766             if (!done) {
767                 Uint8 off, len;
768                 int ok_off = s_DecodeUint8(off, sparse,      12);
769                 int ok_len = s_DecodeUint8(len, sparse + 12, 12);
770                 if (ok_off & ok_len) {
771                     dump += region;
772                     region = ':' + string(sizeof(TAR_GNU_REGION) - 2, ' ');
773                     if (ok_off > 0) {
774                         dump += '"';
775                         dump += s_Printable(sparse, 12, ecxpt);
776                         dump += "\" ";
777                     } else {
778                         dump += string(14, ' ');
779                     }
780                     sparse += 12;
781                     if (ok_len > 0) {
782                         dump += '"';
783                         dump += s_Printable(sparse, 12, ecxpt);
784                         dump += "\" ";
785                     } else {
786                         dump += string(14, ' ');
787                     }
788                     sparse += 12;
789                     dump += "[@";
790                     dump += NStr::NumericToString(off);
791                     dump += ", ";
792                     dump += NStr::NumericToString(len);
793                     dump += ']';
794                     continue;
795                 }
796                 done = true;
797             }
798             dump += ':' + string(sizeof(TAR_GNU_REGION) - 2, ' ')
799                 + '"' + NStr::PrintableString(string(sparse, 24)) + '"';
800         } else {
801             done = true;
802         }
803         sparse += 24;
804     } while (sparse < contind);
805     if (!dump.empty()) {
806         dump += '\n';
807     }
808     offset = (size_t)(contind - (const char*) h);
809     dump += '@' + s_OffsetAsString(offset) + TAR_GNU_CONTIND
810         "\"" + NStr::PrintableString(string(contind, 1))
811         + (*contind ? "\" [to-be-cont'd]" : "\" [last]");
812     return dump;
813 }
814 
815 
s_DumpSparseMap(const vector<pair<Uint8,Uint8>> & bmap)816 static string s_DumpSparseMap(const vector< pair<Uint8, Uint8> >& bmap)
817 {
818     size_t size = bmap.size();
819     string dump("Regions: " + NStr::NumericToString(size));
820     for (size_t n = 0;  n < size;  ++n) {
821         dump += "\n    [" + NStr::NumericToString(n) + "]: @"
822             + NStr::NumericToString(bmap[n].first) + ", "
823             + NStr::NumericToString(bmap[n].second);
824     }
825     return dump;
826 }
827 
828 
s_DumpHeader(const SHeader * h,ETar_Format fmt,bool ecxpt=false)829 static string s_DumpHeader(const SHeader* h, ETar_Format fmt,
830                            bool ecxpt = false)
831 {
832     string dump;
833     Uint8 val;
834     int ok;
835 
836     dump += TAR_PRINTABLE(name, true);
837     dump += '\n';
838 
839     ok = s_OctalToNum(val, h->mode, sizeof(h->mode));
840     dump += TAR_PRINTABLE(mode, !ok);
841     if (ok  &&  val) {
842         dump += " [" + s_ModeAsString((TTarMode) val) + ']';
843     }
844     dump += '\n';
845 
846     ok = s_DecodeUint8(val, h->uid, sizeof(h->uid));
847     dump += TAR_PRINTABLE(uid, ok <= 0);
848     if (ok  &&  (ok < 0  ||  val > 7)) {
849         dump += " [" + NStr::NumericToString(val) + ']';
850         if (ok < 0) {
851             dump += " (base-256)";
852         }
853     }
854     dump += '\n';
855 
856     ok = s_DecodeUint8(val, h->gid, sizeof(h->gid));
857     dump += TAR_PRINTABLE(gid, ok <= 0);
858     if (ok  &&  (ok < 0  ||  val > 7)) {
859         dump += " [" + NStr::NumericToString(val) + ']';
860         if (ok < 0) {
861             dump += " (base-256)";
862         }
863     }
864     dump += '\n';
865 
866     ok = s_DecodeUint8(val, h->size, sizeof(h->size));
867     dump += TAR_PRINTABLE(size, ok <= 0);
868     if (ok  &&  (ok < 0  ||  val > 7)) {
869         dump += " [" + NStr::NumericToString(val) + ']';
870         if (ok  &&  h->typeflag[0] == 'S'  &&  fmt == eTar_OldGNU) {
871             dump += " w/o map(s)!";
872         }
873         if (ok < 0) {
874             dump += " (base-256)";
875         }
876     }
877     dump += '\n';
878 
879     ok = s_OctalToNum(val, h->mtime, sizeof(h->mtime));
880     dump += TAR_PRINTABLE(mtime, !ok);
881     if (ok  &&  val) {
882         CTime mtime((time_t) val);
883         ok = (Uint8) mtime.GetTimeT() == val ? true : false;
884         if (ok  ||  val > 7) {
885             dump += (" ["
886                      + (val > 7 ? NStr::NumericToString(val) + ", "      : "")
887                      + (ok ? mtime.ToLocalTime().AsString("Y-M-D h:m:s") : "")
888                      + ']');
889         }
890     }
891     dump += '\n';
892 
893     ok = s_OctalToNum(val, h->checksum, sizeof(h->checksum));
894     dump += TAR_PRINTABLE(checksum, !ok);
895     dump += '\n';
896 
897     // Classify to the extent possible to help debug the problem (if any)
898     dump += TAR_PRINTABLE(typeflag, true);
899     ok = false;
900     const char* tname = 0;
901     switch (h->typeflag[0]) {
902     case '\0':
903     case '0':
904         ok = true;
905         if (!(fmt & eTar_Ustar)  &&  fmt != eTar_OldGNU) {
906             size_t namelen = s_Length(h->name, sizeof(h->name));
907             if (namelen  &&  h->name[namelen - 1] == '/')
908                 tname = "legacy regular entry (dir)";
909         }
910         if (!tname)
911             tname = "legacy regular entry (file)";
912         tname += h->typeflag[0] ? 7/*skip "legacy "*/ : 0;
913         break;
914     case '\1':
915     case '1':
916         ok = true;
917 #ifdef NCBI_OS_UNIX
918         tname = "legacy hard link";
919 #else
920         tname = "legacy hard link - not FULLY supported";
921 #endif //NCBI_OS_UNIX
922         tname += h->typeflag[0] != '\1' ? 7/*skip "legacy "*/ : 0;
923         break;
924     case '\2':
925     case '2':
926         ok = true;
927 #ifdef NCBI_OS_UNIX
928         tname = "legacy symbolic link";
929 #else
930         tname = "legacy symbolic link - not FULLY supported";
931 #endif //NCBI_OS_UNIX
932         tname += h->typeflag[0] != '\2' ? 7/*skip "legacy "*/ : 0;
933         break;
934     case '3':
935 #ifdef NCBI_OS_UNIX
936         ok = true;
937 #endif //NCBI_OS_UNIX
938         tname = "character device";
939         break;
940     case '4':
941 #ifdef NCBI_OS_UNIX
942         ok = true;
943 #endif //NCBI_OS_UNIX
944         tname = "block device";
945         break;
946     case '5':
947         ok = true;
948         tname = "directory";
949         break;
950     case '6':
951 #ifdef NCBI_OS_UNIX
952         ok = true;
953 #endif //NCBI_OS_UNIX
954         tname = "FIFO";
955         break;
956     case '7':
957         tname = "contiguous";
958         break;
959     case 'g':
960         tname = "global extended header";
961         break;
962     case 'x':
963     case 'X':
964         if (fmt & eTar_Ustar) {
965             ok = true;
966             if (h->typeflag[0] == 'x') {
967                 tname = "extended (POSIX 1003.1-2001 [PAX]) header"
968                     " - not FULLY supported";
969             } else {
970                 tname = "extended (POSIX 1003.1-2001 [PAX] by Sun) header"
971                     " - not FULLY supported";
972             }
973         } else {
974             tname = "extended header";
975         }
976         break;
977     case 'A':
978         tname = "Solaris ACL";
979         break;
980     case 'D':
981         if (fmt == eTar_OldGNU) {
982             tname = "GNU extension: directory dump";
983         }
984         break;
985     case 'E':
986         tname = "Solaris extended attribute file";
987         break;
988     case 'I':
989         // CAUTION:  Entry size shows actual file size in the filesystem but
990         // no actual data blocks stored in the archive following the header!
991         tname = "Inode metadata only";
992         break;
993     case 'K':
994         if (fmt == eTar_OldGNU) {
995             ok = true;
996             tname = "GNU extension: long link";
997         }
998         break;
999     case 'L':
1000         if (fmt == eTar_OldGNU) {
1001             ok = true;
1002             tname = "GNU extension: long name";
1003         }
1004         break;
1005     case 'M':
1006         switch (fmt) {
1007         case eTar_OldGNU:
1008             tname = "GNU extension: multi-volume entry";
1009             break;
1010         case eTar_Star:
1011             tname = "STAR extension: multi-volume entry";
1012             break;
1013         default:
1014             break;
1015         }
1016         break;
1017     case 'N':
1018         if (fmt == eTar_OldGNU) {
1019             tname = "GNU extension (obsolete): long filename(s)";
1020         }
1021         break;
1022     case 'S':
1023         switch (fmt) {
1024         case eTar_OldGNU:
1025             // CAUTION:  Entry size does not include sparse entry map stored in
1026             // additional (non-standard) headers that may follow this header!
1027             tname = "GNU extension: sparse file";
1028             break;
1029         case eTar_Star:
1030             // Entry size already includes size of additional sparse file maps
1031             // that may follow this header before the actual file data.
1032             tname = "STAR extension: sparse file";
1033             break;
1034         default:
1035             break;
1036         }
1037         break;
1038     case 'V':
1039         ok = true;
1040         tname = "Volume header";
1041         break;
1042     default:
1043         break;
1044     }
1045     if (!tname  &&  'A' <= h->typeflag[0]  &&  h->typeflag[0] <= 'Z') {
1046         tname = "local vendor enhancement / user-defined extension";
1047     }
1048     dump += (" [" + string(tname ? tname : "reserved")
1049              + (ok
1050                 ? "]\n"
1051                 : " -- NOT SUPPORTED]\n"));
1052 
1053     dump += TAR_PRINTABLE(linkname, true);
1054     dump += '\n';
1055 
1056     switch (fmt) {
1057     case eTar_Legacy:  // NCBI never writes this header
1058         tname = "legacy (V7)";
1059         break;
1060     case eTar_OldGNU:
1061         if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1062             tname = "old GNU (NCBI)";
1063         } else {
1064             tname = "old GNU";
1065         }
1066         break;
1067     case eTar_Ustar:
1068         if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1069             tname = "ustar (NCBI)";
1070         } else {
1071             tname = "ustar";
1072         }
1073         break;
1074     case eTar_Posix:  // aka "pax"
1075         if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1076             tname = "posix (NCBI)";
1077         } else {
1078             tname = "posix";
1079         }
1080         break;
1081     case eTar_Star:  // NCBI never writes this header
1082         tname = "star";
1083         break;
1084     default:
1085         tname = 0;
1086         break;
1087     }
1088     dump += TAR_PRINTABLE(magic, true);
1089     if (tname) {
1090         dump += " [" + string(tname) + ']';
1091     }
1092     dump += '\n';
1093 
1094     dump += TAR_PRINTABLE(version, true);
1095 
1096     if (fmt != eTar_Legacy) {
1097         dump += '\n';
1098 
1099         dump += TAR_PRINTABLE(uname, true);
1100         dump += '\n';
1101 
1102         dump += TAR_PRINTABLE(gname, true);
1103         dump += '\n';
1104 
1105         ok = s_OctalToNum(val, h->devmajor, sizeof(h->devmajor));
1106         dump += TAR_PRINTABLE(devmajor, !ok);
1107         if (ok  &&  val > 7) {
1108             dump += " [" + NStr::NumericToString(val) + ']';
1109         }
1110         dump += '\n';
1111 
1112         ok = s_OctalToNum(val, h->devminor, sizeof(h->devminor));
1113         dump += TAR_PRINTABLE(devminor, !ok);
1114         if (ok  &&  val > 7) {
1115             dump += " [" + NStr::NumericToString(val) + ']';
1116         }
1117         dump += '\n';
1118 
1119         switch (fmt) {
1120         case eTar_Star:
1121             if (h->typeflag[0] == 'S') {
1122                 dump += TAR_PRINTABLE_EX(star.prefix, true, 107);
1123                 const char* realsize = h->star.prefix + 107;
1124                 ok = s_DecodeUint8(val, realsize, 12);
1125                 dump += "@"
1126                     + s_OffsetAsString((size_t)(realsize - (const char*) h))
1127                     + "[star.realsize]:\""
1128                     + s_Printable(realsize, 12, !ok  ||  ecxpt) + '"';
1129                 if (ok  &&  (ok < 0  ||  val > 7)) {
1130                     dump += " [" + NStr::NumericToString(val) + ']';
1131                     if (ok < 0) {
1132                         dump += " (base-256)";
1133                     }
1134                 }
1135             } else {
1136                 dump += TAR_PRINTABLE(star.prefix, true);
1137             }
1138             dump += '\n';
1139 
1140             ok = s_OctalToNum(val, h->star.atime, sizeof(h->star.atime));
1141             dump += TAR_PRINTABLE(star.atime, !ok);
1142             if (ok  &&  val) {
1143                 CTime atime((time_t) val);
1144                 ok = (Uint8) atime.GetTimeT() == val ? true : false;
1145                 if (ok  ||  val > 7) {
1146                     dump += (" ["
1147                              + (val > 7 ? NStr::NumericToString(val)+", " : "")
1148                              + (ok
1149                                 ? atime.ToLocalTime().AsString("Y-M-D h:m:s")
1150                                 : "")
1151                              + ']');
1152                 }
1153             }
1154             dump += '\n';
1155 
1156             ok = s_OctalToNum(val, h->star.ctime, sizeof(h->star.ctime));
1157             dump += TAR_PRINTABLE(star.ctime, !ok);
1158             if (ok  &&  val) {
1159                 CTime ctime((time_t) val);
1160                 ok = (Uint8) ctime.GetTimeT() == val ? true : false;
1161                 if (ok  ||  val > 7) {
1162                     dump += (" ["
1163                              + (val > 7 ? NStr::NumericToString(val)+", " : "")
1164                              + (ok
1165                                 ? ctime.ToLocalTime().AsString("Y-M-D h:m:s")
1166                                 : "")
1167                              + ']');
1168                 }
1169             }
1170             tname = (const char*) &h->star + sizeof(h->star);
1171             break;
1172 
1173         case eTar_OldGNU:
1174             ok = s_OctalToNum(val, h->gnu.atime, sizeof(h->gnu.atime));
1175             dump += TAR_PRINTABLE(gnu.atime, !ok);
1176             if (ok  &&  val) {
1177                 CTime atime((time_t) val);
1178                 ok = (Uint8) atime.GetTimeT() == val ? true : false;
1179                 if (ok  ||  val > 7) {
1180                     dump += (" ["
1181                              + (val > 7 ? NStr::NumericToString(val)+", " : "")
1182                              + (ok
1183                                 ? atime.ToLocalTime().AsString("Y-M-D h:m:s")
1184                                 : "")
1185                              + ']');
1186                 }
1187             }
1188             dump += '\n';
1189 
1190             ok = s_OctalToNum(val, h->gnu.ctime, sizeof(h->gnu.ctime));
1191             dump += TAR_PRINTABLE(gnu.ctime, !ok);
1192             if (ok  &&  val) {
1193                 CTime ctime((time_t) val);
1194                 ok = (Uint8) ctime.GetTimeT() == val ? true : false;
1195                 if (ok  ||  val > 7) {
1196                     dump += (" ["
1197                              + (val > 7 ? NStr::NumericToString(val)+", " : "")
1198                              + (ok
1199                                 ? ctime.ToLocalTime().AsString("Y-M-D h:m:s")
1200                                 : "")
1201                              + ']');
1202                 }
1203             }
1204 
1205             if (h->typeflag[0] == 'S') {
1206                 if (memcchr(h->gnu.unused, '\0', sizeof(h->gnu.unused))) {
1207                     dump += '\n';
1208                     dump += TAR_PRINTABLE(gnu.unused, true);
1209                 }
1210                 dump += '\n' + s_DumpSparseMap(h, h->gnu.sparse,
1211                                                h->gnu.contind, ecxpt);
1212                 if (memcchr(h->gnu.realsize, '\0', sizeof(h->gnu.realsize))) {
1213                     ok = s_DecodeUint8(val, h->gnu.realsize,
1214                                        sizeof(h->gnu.realsize));
1215                     dump += '\n';
1216                     dump += TAR_PRINTABLE(gnu.realsize, ok <= 0);
1217                     if (ok  &&  (ok < 0  ||  val > 7)) {
1218                         dump += " [" + NStr::NumericToString(val) + ']';
1219                     }
1220                     if (ok < 0) {
1221                         dump += " (base-256)";
1222                     }
1223                 }
1224                 tname = (const char*) &h->gnu + sizeof(h->gnu);
1225             } else {
1226                 tname = h->gnu.ctime + sizeof(h->gnu.ctime);
1227             }
1228             break;
1229 
1230         default:
1231             dump += TAR_PRINTABLE(prefix, true);
1232             tname = h->prefix + sizeof(h->prefix);
1233             break;
1234         }
1235     } else {
1236         tname = h->version + sizeof(h->version);
1237     }
1238 
1239     size_t n = 0;
1240     while (&tname[n] < (const char*) h + BLOCK_SIZE) {
1241         if (tname[n]) {
1242             size_t offset = (size_t)(&tname[n] - (const char*) h);
1243             size_t len = BLOCK_SIZE - offset;
1244             if (len & ~0xF) {  // len > 16
1245                 len = 0x10;    // len = 16
1246             }
1247             const char* e = (const char*) memchr(&tname[n], '\0', len);
1248             if (e) {
1249                 len = (size_t)(e - &tname[n]);
1250                 ok = s_DecodeUint8(val, &tname[n], len);
1251             } else {
1252                 if (len  > (offset & 0xF)) {
1253                     len -= (offset & 0xF);
1254                 }
1255                 ok = false;
1256             }
1257             _ASSERT(len);
1258             dump += "\n@" + s_OffsetAsString(offset) + ':' + string(15, ' ')
1259                 + '"' + NStr::PrintableString(string(&tname[n], len)) + '"';
1260             if (ok) {
1261                 CTime time((time_t) val);
1262                 bool okaytime = (Uint8) time.GetTimeT() == val;
1263                 if (ok < 0  ||  val > 7  ||  okaytime) {
1264                     dump += " [";
1265                     if (ok < 0  ||  val > 7) {
1266                         dump += NStr::NumericToString(val);
1267                     }
1268                     if (ok < 0) {
1269                         dump += "] (base-256)";
1270                     } else if (okaytime) {
1271                         if (val > 7) {
1272                             dump += ", ";
1273                         }
1274                         dump += time.ToLocalTime().AsString("Y-M-D h:m:s]");
1275                     } else {
1276                         dump += ']';
1277                     }
1278                 }
1279             }
1280             n += len;
1281         } else {
1282             n++;
1283         }
1284     }
1285 
1286     return dump;
1287 }
1288 
1289 #undef TAR_PRINTABLE
1290 
1291 #undef _STR
1292 
1293 
s_SetStateSafe(CNcbiIos & ios,IOS_BASE::iostate state)1294 inline void s_SetStateSafe(CNcbiIos& ios, IOS_BASE::iostate state) throw()
1295 {
1296     try {
1297         ios.setstate(state);
1298     } catch (IOS_BASE::failure&) {
1299         ;
1300     }
1301 }
1302 
1303 
1304 //////////////////////////////////////////////////////////////////////////////
1305 //
1306 // CTar
1307 //
1308 
CTar(const string & filename,size_t blocking_factor)1309 CTar::CTar(const string& filename, size_t blocking_factor)
1310     : m_FileName(filename),
1311       m_FileStream(new CNcbiFstream),
1312       m_Stream(*m_FileStream),
1313       m_ZeroBlockCount(0),
1314       m_BufferSize(SIZE_OF(blocking_factor)),
1315       m_BufferPos(0),
1316       m_StreamPos(0),
1317       m_BufPtr(0),
1318       m_Buffer(0),
1319       m_OpenMode(eNone),
1320       m_Modified(false),
1321       m_Bad(false),
1322       m_Flags(fDefault)
1323 {
1324     x_Init();
1325 }
1326 
1327 
CTar(CNcbiIos & stream,size_t blocking_factor)1328 CTar::CTar(CNcbiIos& stream, size_t blocking_factor)
1329     : m_FileName(kEmptyStr),
1330       m_FileStream(0),
1331       m_Stream(stream),
1332       m_ZeroBlockCount(0),
1333       m_BufferSize(SIZE_OF(blocking_factor)),
1334       m_BufferPos(0),
1335       m_StreamPos(0),
1336       m_BufPtr(0),
1337       m_Buffer(0),
1338       m_OpenMode(eNone),
1339       m_Modified(false),
1340       m_Bad(false),
1341       m_Flags(fDefault)
1342 {
1343     x_Init();
1344 }
1345 
1346 
~CTar()1347 CTar::~CTar()
1348 {
1349     // Close stream(s)
1350     x_Close(x_Flush(true/*no_throw*/));
1351     delete m_FileStream;
1352     m_FileStream = 0;
1353 
1354     // Delete owned masks
1355     for (size_t i = 0;  i < sizeof(m_Mask) / sizeof(m_Mask[0]);  ++i) {
1356         SetMask(0, eNoOwnership, EMaskType(i));
1357     }
1358 
1359     // Delete buffer
1360     delete[] m_BufPtr;
1361     m_BufPtr = 0;
1362 }
1363 
1364 
1365 #define TAR_THROW(who, errcode, message)                                \
1366     NCBI_THROW(CTarException, errcode,                                  \
1367                s_PositionAsString(who->m_FileName, who->m_StreamPos,    \
1368                                   who->m_BufferSize,                    \
1369                                   who->m_Current.GetName()) + (message))
1370 
1371 #define TAR_THROW_EX(who, errcode, message, h, fmt)                     \
1372     TAR_THROW(who, errcode,                                             \
1373               who->m_Flags & fDumpEntryHeaders                          \
1374               ? string(message) + ":\n" + s_DumpHeader(h, fmt, true)    \
1375               : string(message))
1376 
1377 #define TAR_POST(subcode, severity, message)                            \
1378     ERR_POST_X(subcode, (severity) <<                                   \
1379                s_PositionAsString(m_FileName, m_StreamPos, m_BufferSize,\
1380                                   m_Current.GetName()) + (message))
1381 
1382 
x_Init(void)1383 void CTar::x_Init(void)
1384 {
1385     _ASSERT(!OFFSET_OF(m_BufferSize));
1386     size_t pagesize = (size_t) CSystemInfo::GetVirtualMemoryPageSize();
1387     if (pagesize < 4096  ||  (pagesize & (pagesize - 1))) {
1388         pagesize = 4096;  // reasonable default
1389     }
1390     size_t pagemask = pagesize - 1;
1391     m_BufPtr = new char[m_BufferSize + pagemask];
1392     // Make m_Buffer page-aligned
1393     m_Buffer = m_BufPtr +
1394         ((((size_t) m_BufPtr + pagemask) & ~pagemask) - (size_t) m_BufPtr);
1395 }
1396 
1397 
x_Flush(bool no_throw)1398 bool CTar::x_Flush(bool no_throw)
1399 {
1400     m_Current.m_Name.erase();
1401     if (m_BufferPos == m_BufferSize) {
1402         m_Bad = true;  // In case of unhandled exception(s)
1403     }
1404     if (m_Bad  ||  !m_OpenMode) {
1405         return false;
1406     }
1407     if (!m_Modified  &&
1408         (m_FileStream  ||  !(m_Flags & fStreamPipeThrough)  ||  !m_StreamPos)){
1409         return false;
1410     }
1411 
1412     _ASSERT(m_BufferPos < m_BufferSize);
1413     if (m_BufferPos  ||  m_ZeroBlockCount < 2) {
1414         // Assure proper blocking factor and pad the archive as necessary
1415         size_t zbc = m_ZeroBlockCount;
1416         size_t pad = m_BufferSize - m_BufferPos;
1417         memset(m_Buffer + m_BufferPos, 0, pad);
1418         x_WriteArchive(pad, no_throw ? (const char*)(-1L) : 0);
1419         _ASSERT(!(m_BufferPos % m_BufferSize) // m_BufferSize if write error
1420                 &&  !m_Bad == !m_BufferPos);
1421         if (!m_Bad  &&  (zbc += BLOCK_OF(pad)) < 2) {
1422             // Write EOT (two zero blocks), if have not padded enough already
1423             memset(m_Buffer, 0, m_BufferSize - pad);
1424             x_WriteArchive(m_BufferSize, no_throw ? (const char*)(-1L) : 0);
1425             _ASSERT(!(m_BufferPos % m_BufferSize)
1426                     &&  !m_Bad == !m_BufferPos);
1427             if (!m_Bad  &&  (zbc += BLOCK_OF(m_BufferSize)) < 2) {
1428                 _ASSERT(zbc == 1  &&  m_BufferSize == BLOCK_SIZE);
1429                 x_WriteArchive(BLOCK_SIZE, no_throw ? (const char*)(-1L) : 0);
1430                 _ASSERT(!(m_BufferPos % m_BufferSize)
1431                         &&  !m_Bad == !m_BufferPos);
1432             }
1433         }
1434         m_ZeroBlockCount = zbc;
1435     }
1436     _ASSERT(!OFFSET_OF(m_BufferPos));
1437 
1438     if (!m_Bad  &&  m_Stream.rdbuf()->PUBSYNC() != 0) {
1439         m_Bad = true;
1440         int x_errno = errno;
1441         s_SetStateSafe(m_Stream, NcbiBadbit);
1442         if (!no_throw) {
1443             TAR_THROW(this, eWrite,
1444                       "Archive flush failed" + s_OSReason(x_errno));
1445         }
1446         TAR_POST(83, Error,
1447                  "Archive flush failed" + s_OSReason(x_errno));
1448     }
1449     if (!m_Bad) {
1450         m_Modified = false;
1451     }
1452     return true;
1453 }
1454 
1455 
s_TruncateFile(const string & filename,Uint8 filesize)1456 static int s_TruncateFile(const string& filename, Uint8 filesize)
1457 {
1458     int x_error = 0;
1459 #ifdef NCBI_OS_UNIX
1460     if (::truncate(filename.c_str(), (off_t) filesize) != 0)
1461         x_error = errno;
1462 #endif //NCBI_OS_UNIX
1463 #ifdef NCBI_OS_MSWIN
1464     TXString x_filename(_T_XSTRING(filename));
1465     HANDLE handle = ::CreateFile(x_filename.c_str(), GENERIC_WRITE,
1466                                  0/*sharing*/, NULL, OPEN_EXISTING,
1467                                  FILE_ATTRIBUTE_NORMAL, NULL);
1468     if (handle != INVALID_HANDLE_VALUE) {
1469         LARGE_INTEGER x_filesize;
1470         x_filesize.QuadPart = filesize;
1471         if (!::SetFilePointerEx(handle, x_filesize, NULL, FILE_BEGIN)
1472             ||  !::SetEndOfFile(handle)) {
1473             x_error = (int) ::GetLastError();
1474         }
1475         bool closed = ::CloseHandle(handle) ? true : false;
1476         if (!x_error  &&  !closed) {
1477             x_error = (int) ::GetLastError();
1478         }
1479     } else {
1480         x_error = (int) ::GetLastError();
1481     }
1482 #endif //NCBI_OS_MSWIN
1483     return x_error;
1484 }
1485 
1486 
x_Close(bool truncate)1487 void CTar::x_Close(bool truncate)
1488 {
1489     if (m_FileStream  &&  m_FileStream->is_open()) {
1490         m_FileStream->close();
1491         if (!m_Bad  &&  !m_FileStream->good()) {
1492             int x_errno = errno;
1493             TAR_POST(104, Error,
1494                      "Cannot close archive" + s_OSReason(x_errno));
1495             m_Bad = true;
1496         }
1497         if (!m_Bad  &&  !(m_Flags & fTarfileNoTruncate)  &&  truncate) {
1498             s_TruncateFile(m_FileName, m_StreamPos);
1499         }
1500     }
1501     m_OpenMode  = eNone;
1502     m_Modified  = false;
1503     m_BufferPos = 0;
1504     m_Bad = false;
1505 }
1506 
1507 
x_Open(EAction action)1508 void CTar::x_Open(EAction action)
1509 {
1510     _ASSERT(action);
1511     bool toend = false;
1512     // We can only open a named file here, and if an external stream is being
1513     // used as an archive, it must be explicitly repositioned by user's code
1514     // (outside of this class) before each archive operation.
1515     if (!m_FileStream) {
1516         if (!m_Modified) {
1517             // Check if Create() is followed by Append()
1518             if (m_OpenMode != eWO  &&  action == eAppend
1519                 &&  (m_Flags & fStreamPipeThrough)) {
1520                 toend = true;
1521             }
1522         } else if (action != eAppend) {
1523             _ASSERT(m_OpenMode != eWO);  // NB: Prev action != eCreate
1524             if (m_Flags & fStreamPipeThrough) {
1525                 x_Flush();  // NB: resets m_Modified to false if successful
1526             }
1527             if (m_Modified) {
1528                 if (!m_Bad) {
1529                     TAR_POST(1, Warning,
1530                              "Pending changes may be discarded"
1531                              " upon reopen of in-stream archive");
1532                 }
1533                 m_Modified = false;
1534             }
1535         }
1536         m_Current.m_Name.erase();
1537         if (m_Bad || (m_Stream.rdstate() & ~NcbiEofbit) || !m_Stream.rdbuf()) {
1538             TAR_THROW(this, eOpen,
1539                       "Archive I/O stream is in bad state");
1540         } else {
1541             m_OpenMode = EOpenMode(int(action) & eRW);
1542             _ASSERT(m_OpenMode != eNone);
1543         }
1544         if (action != eAppend  &&  action != eInternal) {
1545             m_BufferPos = 0;
1546             m_StreamPos = 0;
1547         }
1548 #ifdef NCBI_OS_MSWIN
1549         if (&m_Stream == &cin) {
1550             HANDLE handle = (HANDLE) _get_osfhandle(_fileno(stdin));
1551             if (GetFileType(handle) != FILE_TYPE_DISK) {
1552                 m_Flags |= fSlowSkipWithRead;
1553             }
1554         }
1555 #endif //NCBI_OS_MSWIN
1556     } else {
1557         _ASSERT(&m_Stream == m_FileStream);
1558         EOpenMode mode = EOpenMode(int(action) & eRW);
1559         _ASSERT(mode != eNone);
1560         if (action != eAppend  &&  action != eCreate/*mode == eWO*/) {
1561             x_Flush();
1562         } else {
1563             m_Current.m_Name.erase();
1564         }
1565         if (mode == eWO  ||  m_OpenMode < mode) {
1566             // Need to (re-)open the archive file
1567             if (m_OpenMode != eWO  &&  action == eAppend) {
1568                 toend = true;
1569             }
1570             x_Close(false);  // NB: m_OpenMode = eNone; m_Modified = false
1571             m_StreamPos = 0;
1572             switch (mode) {
1573             case eWO:
1574                 // WO access
1575                 _ASSERT(action == eCreate);
1576                 // Note that m_Modified is untouched
1577                 m_FileStream->open(m_FileName.c_str(),
1578                                    IOS_BASE::out    |
1579                                    IOS_BASE::binary | IOS_BASE::trunc);
1580                 break;
1581             case eRO:
1582                 // RO access
1583                 _ASSERT(action != eCreate);
1584                 m_FileStream->open(m_FileName.c_str(),
1585                                    IOS_BASE::in     |
1586                                    IOS_BASE::binary);
1587                 break;
1588             case eRW:
1589                 // RW access
1590                 _ASSERT(action != eCreate);
1591                 m_FileStream->open(m_FileName.c_str(),
1592                                    IOS_BASE::in     | IOS_BASE::out |
1593                                    IOS_BASE::binary);
1594                 break;
1595             default:
1596                 _TROUBLE;
1597                 break;
1598             }
1599             if (!m_FileStream->is_open()  ||  !m_FileStream->good()) {
1600                 int x_errno = errno;
1601                 TAR_THROW(this, eOpen,
1602                           "Cannot open archive" + s_OSReason(x_errno));
1603             } else {
1604                 m_OpenMode = mode;
1605             }
1606         } else {
1607             // No need to reopen the archive file
1608             _ASSERT(m_OpenMode > eWO  &&  action != eCreate);
1609             if (m_Bad) {
1610                 TAR_THROW(this, eOpen,
1611                           "Archive file is in bad state");
1612             }
1613             if (action != eAppend  &&  action != eInternal) {
1614                 m_BufferPos = 0;
1615                 m_StreamPos = 0;
1616                 m_FileStream->seekg(0);
1617             }
1618         }
1619     }
1620     if (toend) {
1621         _ASSERT(!m_Modified  &&  action == eAppend);
1622         // There may be an extra and unnecessary archive file scanning
1623         // if Append() follows Update() that caused no modifications;
1624         // but there is no way to distinguish this, currently :-/
1625         // Also, this sequence should be a real rarity in practice.
1626         x_ReadAndProcess(eAppend);  // to position at logical EOF
1627     }
1628     _ASSERT(!(m_Stream.rdstate() & ~NcbiEofbit));
1629     _ASSERT(m_Stream.rdbuf());
1630 }
1631 
1632 
Extract(void)1633 unique_ptr<CTar::TEntries> CTar::Extract(void)
1634 {
1635     x_Open(eExtract);
1636     unique_ptr<TEntries> entries = x_ReadAndProcess(eExtract);
1637 
1638     // Restore attributes of "postponed" directory entries
1639     if (m_Flags & fPreserveAll) {
1640         ITERATE(TEntries, e, *entries) {
1641             if (e->GetType() == CTarEntryInfo::eDir) {
1642                 x_RestoreAttrs(*e, m_Flags);
1643             }
1644         }
1645     }
1646 
1647     return entries;
1648 }
1649 
1650 
GetNextEntryInfo(void)1651 const CTarEntryInfo* CTar::GetNextEntryInfo(void)
1652 {
1653     if (m_Bad) {
1654         return 0;
1655     }
1656     if (m_OpenMode & eRO) {
1657         x_Skip(BLOCK_OF(m_Current.GetPosition(CTarEntryInfo::ePos_Data)
1658                         + ALIGN_SIZE(m_Current.GetSize()) - m_StreamPos));
1659     } else {
1660         x_Open(eInternal);
1661     }
1662     unique_ptr<TEntries> temp = x_ReadAndProcess(eInternal);
1663     _ASSERT(temp  &&  temp->size() < 2);
1664     if (temp->size() < 1) {
1665         return 0;
1666     }
1667     _ASSERT(m_Current == temp->front());
1668     return &m_Current;
1669 }
1670 
1671 
1672 // Return a pointer to buffer, which is always block-aligned, and reflect the
1673 // number of bytes available via the parameter.  Return NULL when unable to
1674 // read (either EOF or other read error).
x_ReadArchive(size_t & n)1675 const char* CTar::x_ReadArchive(size_t& n)
1676 {
1677     _ASSERT(!OFFSET_OF(m_BufferPos)  &&  m_BufferPos < m_BufferSize);
1678     _ASSERT(!OFFSET_OF(m_StreamPos));
1679     _ASSERT(n != 0);
1680     size_t nread;
1681     if (!m_BufferPos) {
1682         nread = 0;
1683         do {
1684             streamsize xread;
1685             IOS_BASE::iostate iostate = m_Stream.rdstate();
1686             if (!iostate) {  // NB: good()
1687 #ifdef NCBI_COMPILER_MIPSPRO
1688                 try {
1689                     // Work around a bug in MIPSPro 7.3's streambuf::xsgetn()
1690                     CNcbiIstream* is = dynamic_cast<CNcbiIstream*>(&m_Stream);
1691                     _ASSERT(is);
1692                     is->read (m_Buffer                  + nread,
1693                               (streamsize)(m_BufferSize - nread));
1694                     xread = is->gcount();
1695                     if (xread > 0) {
1696                         is->clear();
1697                     }
1698                 } catch (IOS_BASE::failure&) {
1699                     xread = m_Stream.rdstate() & NcbiEofbit ? 0 : -1;
1700                 }
1701 #else
1702                 try {
1703                     xread = m_Stream.rdbuf()->
1704                         sgetn(m_Buffer                  + nread,
1705                               (streamsize)(m_BufferSize - nread));
1706 #  ifdef NCBI_COMPILER_WORKSHOP
1707                     if (xread < 0) {
1708                         xread = 0;  // NB: WS6 is known to return -1 :-/
1709                     }
1710 #  endif //NCBI_COMPILER_WORKSHOP
1711                 } catch (IOS_BASE::failure&) {
1712                     xread = -1;
1713                 }
1714 #endif //NCBI_COMPILER_MIPSPRO
1715             } else {
1716                 xread = iostate == NcbiEofbit ? 0 : -1;
1717             }
1718             if (xread <= 0) {
1719                 if (nread  &&  (m_Flags & fDumpEntryHeaders)) {
1720                     TAR_POST(57, xread ? Error : Warning,
1721                              "Short read (" + NStr::NumericToString(nread)
1722                              + (xread ? ")" : "): EOF"));
1723                 }
1724                 s_SetStateSafe(m_Stream, xread < 0 ? NcbiBadbit : NcbiEofbit);
1725                 if (nread) {
1726                     break;
1727                 }
1728                 return 0;
1729             }
1730             nread += (size_t) xread;
1731         } while (nread < m_BufferSize);
1732         memset(m_Buffer + nread, 0, m_BufferSize - nread);
1733     } else {
1734         nread = m_BufferSize - m_BufferPos;
1735     }
1736     if (n > nread) {
1737         n = nread;
1738     }
1739     size_t xpos = m_BufferPos;
1740     m_BufferPos += ALIGN_SIZE(n);
1741     _ASSERT(xpos < m_BufferPos  &&  m_BufferPos <= m_BufferSize);
1742     if (m_BufferPos == m_BufferSize) {
1743         m_BufferPos  = 0;
1744         if (!m_FileStream  &&  (m_Flags & fStreamPipeThrough)) {
1745             size_t zbc = m_ZeroBlockCount;
1746             x_WriteArchive(m_BufferSize);
1747             m_StreamPos -= m_BufferSize;
1748             _ASSERT(m_BufferPos == 0);
1749             m_ZeroBlockCount = zbc;
1750         }
1751     }
1752     _ASSERT(!OFFSET_OF(m_BufferPos)  &&  m_BufferPos < m_BufferSize);
1753     return m_Buffer + xpos;
1754 }
1755 
1756 
1757 // All partial internal (i.e. in-buffer) block writes are _not_ block-aligned;
1758 // but all external writes (i.e. when "src" is provided) _are_ block-aligned.
x_WriteArchive(size_t nwrite,const char * src)1759 void CTar::x_WriteArchive(size_t nwrite, const char* src)
1760 {
1761     if (!nwrite  ||  m_Bad) {
1762         return;
1763     }
1764     m_Modified = true;
1765     m_ZeroBlockCount = 0;
1766     do {
1767         _ASSERT(m_BufferPos < m_BufferSize);
1768         size_t avail = m_BufferSize - m_BufferPos;
1769         if (avail > nwrite) {
1770             avail = nwrite;
1771         }
1772         size_t advance = avail;
1773         if (src  &&  src != (const char*)(-1L)) {
1774             memcpy(m_Buffer + m_BufferPos, src, avail);
1775             size_t pad = ALIGN_SIZE(avail) - avail;
1776             memset(m_Buffer + m_BufferPos + avail, 0, pad);
1777             advance += pad;
1778             src += avail;
1779         }
1780         m_BufferPos += advance;
1781         _ASSERT(m_BufferPos <= m_BufferSize);
1782         if (m_BufferPos == m_BufferSize) {
1783             size_t nwritten = 0;
1784             do {
1785                 int x_errno;
1786                 streamsize xwritten;
1787                 IOS_BASE::iostate iostate = m_Stream.rdstate();
1788                 if (!(iostate & ~NcbiEofbit)) {  // NB: good() OR eof()
1789                     try {
1790                         xwritten = m_Stream.rdbuf()
1791                             ->sputn(m_Buffer                  + nwritten,
1792                                     (streamsize)(m_BufferSize - nwritten));
1793                     } catch (IOS_BASE::failure&) {
1794                         xwritten = -1;
1795                     }
1796                     if (xwritten > 0) {
1797                         if (iostate) {
1798                             m_Stream.clear();
1799                         }
1800                         x_errno = 0;
1801                     } else {
1802                         x_errno = errno;
1803                     }
1804                 } else {
1805                     xwritten = -1;
1806                     x_errno = 0;
1807                 }
1808                 if (xwritten <= 0) {
1809                     m_Bad = true;
1810                     s_SetStateSafe(m_Stream, NcbiBadbit);
1811                     if (src != (const char*)(-1L)) {
1812                         TAR_THROW(this, eWrite,
1813                                   "Archive write failed" +s_OSReason(x_errno));
1814                     }
1815                     TAR_POST(84, Error,
1816                              "Archive write failed" + s_OSReason(x_errno));
1817                     return;
1818                 }
1819                 nwritten += (size_t) xwritten;
1820             } while (nwritten < m_BufferSize);
1821             m_BufferPos = 0;
1822         }
1823         m_StreamPos += advance;
1824         nwrite      -= avail;
1825     } while (nwrite);
1826     _ASSERT(m_BufferPos < m_BufferSize);
1827 }
1828 
1829 
1830 // PAX (Portable Archive Interchange) extraction support
1831 
1832 // Define bitmasks for extended numeric information (must fit in perm mask)
1833 typedef enum {
1834     fPAXNone          = 0,
1835     fPAXSparseGNU_1_0 = 1 << 0,
1836     fPAXSparse        = 1 << 1,
1837     fPAXMtime         = 1 << 2,
1838     fPAXAtime         = 1 << 3,
1839     fPAXCtime         = 1 << 4,
1840     fPAXSize          = 1 << 5,
1841     fPAXUid           = 1 << 6,
1842     fPAXGid           = 1 << 7
1843 } EPAXBit;
1844 typedef unsigned int TPAXBits;  // Bitwise-OR of EPAXBit(s)
1845 
1846 
1847 // Parse "len" bytes of "str" as numeric "valp[.fraq]"
s_ParsePAXNumeric(Uint8 * valp,const char * str,size_t len,string * fraq,EPAXBit assign)1848 static bool s_ParsePAXNumeric(Uint8* valp, const char* str, size_t len,
1849                               string* fraq, EPAXBit assign)
1850 {
1851     _ASSERT(valp  &&  str[len] == '\n');
1852     if (!isdigit((unsigned char)(*str))) {
1853         return false;
1854     }
1855     const char* p = (const char*) memchr(str, '.', len);
1856     if (!p) {
1857         p = str + len;
1858     } else if (fraq == (string*)(-1L)) {
1859         // no decimal point allowed
1860         return false;
1861     }
1862     Uint8 val;
1863     try {
1864         val = NStr::StringToUInt8(CTempString(str, (size_t)(p - str)));
1865     } catch (...) {
1866         return false;
1867     }
1868     if (*p == '.'  &&  ++p != str + len) {
1869         len -= (size_t)(p - str);
1870         _ASSERT(len);
1871         for (size_t n = 0;  n < len;  ++n) {
1872             if (!isdigit((unsigned char) p[n])) {
1873                 return false;
1874             }
1875         }
1876         if (assign  &&  fraq) {
1877             fraq->assign(p, len);
1878         }
1879     } // else (*p == '\n' || !*p)
1880     if (assign) {
1881         *valp = val;
1882     }
1883     return true;
1884 }
1885 
1886 
s_AllLowerCase(const char * str,size_t len)1887 static bool s_AllLowerCase(const char* str, size_t len)
1888 {
1889     for (size_t i = 0;  i < len;  ++i) {
1890         unsigned char c = (unsigned char) str[i];
1891         if (!isalpha(c)  ||  !islower(c))
1892             return false;
1893     }
1894     return true;
1895 }
1896 
1897 
1898 // Raise 10 to the power of n
ipow10(unsigned int n)1899 static Uint8 ipow10(unsigned int n)
1900 {
1901     _ASSERT(n < 10);
1902     // for small n this is the fastest
1903     return n ? 10 * ipow10(n - 1) : 1;
1904 }
1905 
1906 
1907 // NB: assumes fraq is all digits
s_FraqToNanosec(const string & fraq)1908 static long s_FraqToNanosec(const string& fraq)
1909 {
1910     size_t len = fraq.size();
1911     if (!len)
1912         return 0;
1913     long result;
1914     if (len < 10) {
1915         Uint8 temp = NStr::StringToUInt8(fraq,
1916                                          NStr::fConvErr_NoThrow |
1917                                          NStr::fConvErr_NoErrMessage);
1918         result = (long)(temp * ipow10((unsigned int)(9 - len)));
1919     } else {
1920         Uint8 temp = NStr::StringToUInt8(CTempString(fraq, 0, 10),
1921                                          NStr::fConvErr_NoThrow |
1922                                          NStr::fConvErr_NoErrMessage);
1923         result = (long)((temp + 5) / 10);
1924     }
1925     _ASSERT(0L <= result  &&  result < 1000000000L);
1926     return result;
1927 }
1928 
1929 
x_ParsePAXData(const string & data)1930 CTar::EStatus CTar::x_ParsePAXData(const string& data)
1931 {
1932     Uint8 major = 0, minor = 0, size = 0, sparse = 0, uid = 0, gid = 0;
1933     Uint8 mtime = 0, atime = 0, ctime = 0, dummy = 0;
1934     string mtime_fraq, atime_fraq, ctime_fraq;
1935     string path, linkpath, name, uname, gname;
1936     string* nodot = (string*)(-1L);
1937     const struct SPAXParseTable {
1938         const char* key;
1939         Uint8*      val;  // non-null for numeric, else do as string
1940         string*     str;  // string or fraction part (if not -1)
1941         EPAXBit     bit;  // for numerics only
1942     } parser[] = {
1943         { "mtime",    &mtime, &mtime_fraq, fPAXMtime },  // num w/fraq: assign
1944         { "atime",    &atime, &atime_fraq, fPAXAtime },
1945         { "ctime",    &ctime, &ctime_fraq, fPAXCtime },
1946       /*{ "dummy",    &dummy, 0,           fPAXSome  },*/// num w/fraq: asg int
1947       /*{ "dummy",    &dummy, &fraq or 0,  fPAXNone  },*/// num w/fraq: ck.only
1948         { "size",     &size,  nodot,       fPAXSize  },  // number:     assign
1949         { "uid",      &uid,   nodot,       fPAXUid   },
1950         { "gid",      &gid,   nodot,       fPAXGid   },
1951       /*{ "dummy",    &dummy, nodot,       fPAXNone  },*/// number:     ck.only
1952         { "path",     0,      &path,       fPAXNone  },  // string:     assign
1953         { "linkpath", 0,      &linkpath,   fPAXNone  },
1954         { "uname",    0,      &uname,      fPAXNone  },
1955         { "gname",    0,      &gname,      fPAXNone  },
1956         { "comment",  0,      0,           fPAXNone  },  // string:     ck.only
1957         { "charset",  0,      0,           fPAXNone  },
1958         // GNU sparse extensions (NB: .size and .realsize don't go together)
1959         { "GNU.sparse.realsize", &sparse, nodot, fPAXSparse },
1960         { "GNU.sparse.major",    &major,  nodot, fPAXSparse },
1961         { "GNU.sparse.minor",    &minor,  nodot, fPAXSparse },
1962         { "GNU.sparse.size",     &dummy,  nodot, fPAXSparse },
1963         { "GNU.sparse.name",     0,       &name, fPAXNone   },
1964         // Other
1965         { "SCHILY.realsize",     &sparse, nodot, fPAXSparse }
1966     };
1967     const char* s = data.c_str();
1968     TPAXBits parsed = fPAXNone;
1969     size_t l = data.size();
1970 
1971     _ASSERT(l  &&  l == strlen(s));
1972     do {
1973         unsigned long len;
1974         size_t klen, vlen;
1975         const char* e;
1976         char *k, *v;
1977 
1978         if (!(e = (char*) memchr(s, '\n', l))) {
1979             e = s + l;
1980         }
1981         errno = 0;
1982         if (!isdigit((unsigned char)(*s))  ||  !(len = strtoul(s, &k, 10))
1983             ||  errno  ||  s + len - 1 != e  ||  (*k != ' '  &&  *k != '\t')
1984             ||  !(v = (char*) memchr(k, '=', (size_t)(e - k))) // NB: k < e
1985             ||  !(klen = (size_t)(v++ - ++k))
1986             ||  memchr(k, ' ', klen)  ||  memchr(k, '\t', klen)
1987             ||  !(vlen = (size_t)(e - v))) {
1988             TAR_POST(74, Error,
1989                      "Skipping malformed PAX data");
1990             return eFailure;
1991         }
1992         bool done = false;
1993         for (size_t n = 0;  n < sizeof(parser) / sizeof(parser[0]);  ++n) {
1994             if (strlen(parser[n].key) == klen
1995                 &&  memcmp(parser[n].key, k, klen) == 0) {
1996                 if (!parser[n].val) {
1997                     if (parser[n].str) {
1998                         parser[n].str->assign(v, vlen);
1999                     }
2000                 } else if (!s_ParsePAXNumeric(parser[n].val, v, vlen,
2001                                               parser[n].str, parser[n].bit)) {
2002                     TAR_POST(75, Error,
2003                              "Ignoring bad numeric \""
2004                              + CTempString(v, vlen)
2005                              + "\" in PAX value \""
2006                              + CTempString(k, klen) + '"');
2007                 } else {
2008                     parsed |= parser[n].bit;
2009                 }
2010                 done = true;
2011                 break;
2012             }
2013         }
2014         if (!done  &&  s_AllLowerCase(k, klen)/*&&  !memchr(k, '.', klen)*/) {
2015             TAR_POST(76, Warning,
2016                      "Ignoring unrecognized PAX value \""
2017                      + CTempString(k, klen) + '"');
2018         }
2019         if (!*e) {
2020             break;
2021         }
2022         l -= len;
2023         s  = ++e;
2024         _ASSERT(l == strlen(s));
2025     } while (l);
2026 
2027     if ((parsed & fPAXSparse)  &&  (sparse | dummy)) {
2028         if (sparse  &&  dummy  &&  sparse != dummy) {
2029             TAR_POST(95, Warning,
2030                      "Ignoring PAX GNU sparse file size "
2031                      + NStr::NumericToString(dummy)
2032                      + " when real size "
2033                      + NStr::NumericToString(sparse)
2034                      + " is also present");
2035         } else if (!dummy  &&  major == 1  &&  minor == 0) {
2036             if (!(m_Flags & fSparseUnsupported)) {
2037                 if (!name.empty()) {
2038                     if (!path.empty()) {
2039                         TAR_POST(96, Warning,
2040                                  "Replacing PAX file name \"" + path
2041                                  + "\" with GNU sparse file name \"" + name
2042                                  + '"');
2043                     }
2044                     path.swap(name);
2045                 }
2046                 parsed |= fPAXSparseGNU_1_0;
2047             }
2048             _ASSERT(sparse);
2049         } else if (!sparse) {
2050             sparse = dummy;
2051         }
2052         size = sparse;
2053     }
2054 
2055     m_Current.m_Name.swap(path);
2056     m_Current.m_LinkName.swap(linkpath);
2057     m_Current.m_UserName.swap(uname);
2058     m_Current.m_GroupName.swap(gname);
2059     m_Current.m_Stat.mtime_nsec    = s_FraqToNanosec(mtime_fraq);
2060     m_Current.m_Stat.atime_nsec    = s_FraqToNanosec(atime_fraq);
2061     m_Current.m_Stat.ctime_nsec    = s_FraqToNanosec(ctime_fraq);
2062     m_Current.m_Stat.orig.st_mtime = (time_t) mtime;
2063     m_Current.m_Stat.orig.st_atime = (time_t) atime;
2064     m_Current.m_Stat.orig.st_ctime = (time_t) ctime;
2065     m_Current.m_Stat.orig.st_size  = (off_t)  size;
2066     m_Current.m_Stat.orig.st_uid   = (uid_t)  uid;
2067     m_Current.m_Stat.orig.st_gid   = (gid_t)  gid;
2068     m_Current.m_Pos                = sparse;  //  real (expanded) file size
2069 
2070     m_Current.m_Stat.orig.st_mode  = (mode_t) parsed;
2071     return eContinue;
2072 }
2073 
2074 
s_Dump(const string & file,Uint8 pos,size_t recsize,const string & entryname,const SHeader * h,ETar_Format fmt,Uint8 datasize)2075 static void s_Dump(const string& file, Uint8 pos, size_t recsize,
2076                    const string& entryname, const SHeader* h,
2077                    ETar_Format fmt, Uint8 datasize)
2078 {
2079     _ASSERT(!OFFSET_OF(pos));
2080     EDiagSev level = SetDiagPostLevel(eDiag_Info);
2081     Uint8 blocks = BLOCK_OF(ALIGN_SIZE(datasize));
2082     ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2083              + s_DumpHeader(h, fmt) + '\n'
2084              + (blocks
2085                 &&  (h->typeflag[0] != 'S'
2086                      ||  fmt != eTar_OldGNU
2087                      ||  !*h->gnu.contind)
2088                 ? "Blocks of data:     " + NStr::NumericToString(blocks) + '\n'
2089                 : kEmptyStr));
2090     SetDiagPostLevel(level);
2091 }
2092 
2093 
s_DumpSparse(const string & file,Uint8 pos,size_t recsize,const string & entryname,const SHeader * h,const char * contind,Uint8 datasize)2094 static void s_DumpSparse(const string& file, Uint8 pos, size_t recsize,
2095                          const string& entryname, const SHeader* h,
2096                          const char* contind, Uint8 datasize)
2097 {
2098     _ASSERT(!OFFSET_OF(pos));
2099     EDiagSev level = SetDiagPostLevel(eDiag_Info);
2100     Uint8 blocks = !*contind ? BLOCK_OF(ALIGN_SIZE(datasize)) : 0;
2101     ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2102              + "GNU sparse file map header (cont'd):\n"
2103              + s_DumpSparseMap(h, (const char*) h, contind) + '\n'
2104              + (blocks
2105                 ? "Blocks of data:     " + NStr::NumericToString(blocks) + '\n'
2106                 : kEmptyStr));
2107     SetDiagPostLevel(level);
2108 }
2109 
2110 
s_DumpSparse(const string & file,Uint8 pos,size_t recsize,const string & entryname,const vector<pair<Uint8,Uint8>> & bmap)2111 static void s_DumpSparse(const string& file, Uint8 pos, size_t recsize,
2112                          const string& entryname,
2113                          const vector< pair<Uint8, Uint8> >& bmap)
2114 {
2115     _ASSERT(!OFFSET_OF(pos));
2116     EDiagSev level = SetDiagPostLevel(eDiag_Info);
2117     ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2118              + "PAX GNU/1.0 sparse file map data:\n"
2119              + s_DumpSparseMap(bmap) + '\n');
2120     SetDiagPostLevel(level);
2121 }
2122 
2123 
s_DumpZero(const string & file,Uint8 pos,size_t recsize,size_t zeroblock_count,bool eot=false)2124 static void s_DumpZero(const string& file, Uint8 pos, size_t recsize,
2125                        size_t zeroblock_count, bool eot = false)
2126 {
2127     _ASSERT(!OFFSET_OF(pos));
2128     EDiagSev level = SetDiagPostLevel(eDiag_Info);
2129     ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, kEmptyStr)
2130              + (zeroblock_count
2131                 ? "Zero block " + NStr::NumericToString(zeroblock_count)
2132                 : (eot ? "End-Of-Tape" : "End-Of-File")) + '\n');
2133     SetDiagPostLevel(level);
2134 }
2135 
2136 
s_IsOctal(char c)2137 static inline bool s_IsOctal(char c)
2138 {
2139     return '0' <= c  &&  c <= '7' ? true : false;
2140 }
2141 
2142 
x_ReadEntryInfo(bool dump,bool pax)2143 CTar::EStatus CTar::x_ReadEntryInfo(bool dump, bool pax)
2144 {
2145     // Read block
2146     const TBlock* block;
2147     size_t nread = sizeof(block->buffer);
2148     _ASSERT(sizeof(*block) == BLOCK_SIZE/*== sizeof(block->buffer)*/);
2149     if (!(block = (const TBlock*) x_ReadArchive(nread))) {
2150         return eEOF;
2151     }
2152     if (nread != BLOCK_SIZE) {
2153         TAR_THROW(this, eRead,
2154                   "Unexpected EOF in archive");
2155     }
2156     const SHeader* h = &block->header;
2157 
2158     // Check header format
2159     ETar_Format fmt = eTar_Unknown;
2160     if (memcmp(h->magic, "ustar", 6) == 0) {
2161         if ((h->star.prefix[sizeof(h->star.prefix) - 1] == '\0'
2162              &&  s_IsOctal(h->star.atime[0])  &&  h->star.atime[0] == ' '
2163              &&  s_IsOctal(h->star.ctime[0])  &&  h->star.ctime[0] == ' ')
2164             ||  strcmp(block->buffer + BLOCK_SIZE - 4, "tar") == 0) {
2165             fmt = eTar_Star;
2166         } else {
2167             fmt = pax ? eTar_Posix : eTar_Ustar;
2168         }
2169     } else if (memcmp(h->magic, "ustar  ", 8) == 0) {
2170         // Here the magic is protruded into the adjacent version field
2171         fmt = eTar_OldGNU;
2172     } else if (memcmp(h->magic, "\0\0\0\0\0", 6) == 0) {
2173         // We'll use this also to speedup corruption checks w/checksum
2174         fmt = eTar_Legacy;
2175     } else {
2176         TAR_THROW_EX(this, eUnsupportedTarFormat,
2177                      "Unrecognized header format", h, fmt);
2178     }
2179 
2180     Uint8 val;
2181     // Get checksum from header
2182     if (!s_OctalToNum(val, h->checksum, sizeof(h->checksum))) {
2183         // We must allow all zero bytes here in case of pad/zero blocks
2184         bool corrupt;
2185         if (fmt == eTar_Legacy) {
2186             corrupt = false;
2187             for (size_t i = 0;  i < sizeof(block->buffer);  ++i) {
2188                 if (block->buffer[i]) {
2189                     corrupt = true;
2190                     break;
2191                 }
2192             }
2193         } else {
2194             corrupt = true;
2195         }
2196         if (corrupt) {
2197             TAR_THROW_EX(this, eUnsupportedTarFormat,
2198                          "Bad checksum", h, fmt);
2199         }
2200         m_StreamPos += BLOCK_SIZE;  // NB: nread
2201         return eZeroBlock;
2202     }
2203     int checksum = int(val);
2204 
2205     // Compute both signed and unsigned checksums (for compatibility)
2206     int ssum = 0;
2207     unsigned int usum = 0;
2208     const char* p = block->buffer;
2209     for (size_t i = 0;  i < sizeof(block->buffer);  ++i)  {
2210         ssum +=                 *p;
2211         usum += (unsigned char)(*p);
2212         p++;
2213     }
2214     p = h->checksum;
2215     for (size_t j = 0;  j < sizeof(h->checksum);  ++j) {
2216         ssum -=                 *p  - ' ';
2217         usum -= (unsigned char)(*p) - ' ';
2218         p++;
2219     }
2220 
2221     // Compare checksum(s)
2222     if (checksum != ssum   &&  (unsigned int) checksum != usum) {
2223         string message = "Header checksum failed";
2224         if (m_Flags & fDumpEntryHeaders) {
2225             message += ", expected ";
2226             if (usum != (unsigned int) ssum) {
2227                 message += "either ";
2228             }
2229             if (usum > 7) {
2230                 message += "0";
2231             }
2232             message += NStr::NumericToString(usum, 0, 8);
2233             if (usum != (unsigned int) ssum) {
2234                 message += " or ";
2235                 if ((unsigned int) ssum > 7) {
2236                     message += "0";
2237                 }
2238                 message += NStr::NumericToString((unsigned int) ssum, 0, 8);
2239             }
2240         }
2241         TAR_THROW_EX(this, eChecksum,
2242                      message, h, fmt);
2243     }
2244 
2245     // Set all info members now (thus, validating the header block)
2246 
2247     m_Current.m_HeaderSize = BLOCK_SIZE;
2248     unsigned char tflag = toupper((unsigned char) h->typeflag[0]);
2249 
2250     // Name
2251     if (m_Current.GetName().empty()) {
2252         if ((fmt & eTar_Ustar)  &&  h->prefix[0]  &&  tflag != 'X') {
2253             const char* prefix = fmt != eTar_Star ? h->prefix : h->star.prefix;
2254             size_t      pfxlen = fmt != eTar_Star
2255                 ? s_Length(h->prefix,      sizeof(h->prefix))
2256                 : s_Length(h->star.prefix, h->typeflag[0] == 'S'
2257                            ? 107 :         sizeof(h->star.prefix));
2258             m_Current.m_Name
2259                 = CDirEntry::ConcatPath(string(prefix, pfxlen),
2260                                         string(h->name,
2261                                                s_Length(h->name,
2262                                                         sizeof(h->name))));
2263         } else {
2264             // Name prefix cannot be used
2265             m_Current.m_Name.assign(h->name,
2266                                     s_Length(h->name, sizeof(h->name)));
2267         }
2268     }
2269 
2270     // Mode
2271     if (!s_OctalToNum(val, h->mode, sizeof(h->mode))
2272         &&  (val  ||  h->typeflag[0] != 'V')) {
2273         TAR_THROW_EX(this, eUnsupportedTarFormat,
2274                      "Bad entry mode", h, fmt);
2275     }
2276     m_Current.m_Stat.orig.st_mode = (mode_t) val;
2277 
2278     // User Id
2279     if (!s_DecodeUint8(val, h->uid, sizeof(h->uid))
2280         &&  (val  ||  h->typeflag[0] != 'V')) {
2281         TAR_THROW_EX(this, eUnsupportedTarFormat,
2282                      "Bad user ID", h, fmt);
2283     }
2284     m_Current.m_Stat.orig.st_uid = (uid_t) val;
2285 
2286     // Group Id
2287     if (!s_DecodeUint8(val, h->gid, sizeof(h->gid))
2288         &&  (val  ||  h->typeflag[0] != 'V')) {
2289         TAR_THROW_EX(this, eUnsupportedTarFormat,
2290                      "Bad group ID", h, fmt);
2291     }
2292     m_Current.m_Stat.orig.st_gid = (gid_t) val;
2293 
2294     // Size
2295     if (!s_DecodeUint8(val, h->size, sizeof(h->size))
2296         &&  (val  ||  h->typeflag[0] != 'V')) {
2297         TAR_THROW_EX(this, eUnsupportedTarFormat,
2298                      "Bad entry size", h, fmt);
2299     }
2300     m_Current.m_Stat.orig.st_size = (off_t) val;
2301     if (m_Current.GetSize() != val) {
2302         ERR_POST_ONCE(Critical << "CAUTION:"
2303                       " ***"
2304                       " This run-time may not support large TAR entries"
2305                       " (have you built it --with-lfs?)"
2306                       " ***");
2307     }
2308 
2309     // Modification time
2310     if (!s_OctalToNum(val, h->mtime, sizeof(h->mtime))) {
2311         TAR_THROW_EX(this, eUnsupportedTarFormat,
2312                      "Bad modification time", h, fmt);
2313     }
2314     m_Current.m_Stat.orig.st_mtime = (time_t) val;
2315 
2316     if (fmt == eTar_OldGNU  ||  (fmt & eTar_Ustar)) {
2317         // User name
2318         m_Current.m_UserName.assign(h->uname,
2319                                     s_Length(h->uname, sizeof(h->uname)));
2320         // Group name
2321         m_Current.m_GroupName.assign(h->gname,
2322                                      s_Length(h->gname,sizeof(h->gname)));
2323     }
2324 
2325     if (fmt == eTar_OldGNU  ||  fmt == eTar_Star) {
2326         // GNU times may not be valid so checks are relaxed
2327         const char* time;
2328         size_t      tlen;
2329         time = fmt == eTar_Star ?        h->star.atime  :        h->gnu.atime;
2330         tlen = fmt == eTar_Star ? sizeof(h->star.atime) : sizeof(h->gnu.atime);
2331         if (!s_OctalToNum(val, time, tlen)) {
2332             if (fmt == eTar_Star  ||  memcchr(time, '\0', tlen)) {
2333                 TAR_THROW_EX(this, eUnsupportedTarFormat,
2334                              "Bad last access time", h, fmt);
2335             }
2336         } else {
2337             m_Current.m_Stat.orig.st_atime = (time_t) val;
2338         }
2339         time = fmt == eTar_Star ?        h->star.ctime  :        h->gnu.ctime;
2340         tlen = fmt == eTar_Star ? sizeof(h->star.ctime) : sizeof(h->gnu.ctime);
2341         if (!s_OctalToNum(val, time, tlen)) {
2342             if (fmt == eTar_Star  ||  memcchr(time, '\0', tlen)) {
2343                 TAR_THROW_EX(this, eUnsupportedTarFormat,
2344                              "Bad creation time", h, fmt);
2345             }
2346         } else {
2347             m_Current.m_Stat.orig.st_ctime = (time_t) val;
2348         }
2349     }
2350 
2351     // Entry type
2352     switch (h->typeflag[0]) {
2353     case '\0':
2354     case '0':
2355         if (!(fmt & eTar_Ustar)  &&  fmt != eTar_OldGNU) {
2356             size_t namelen = s_Length(h->name, sizeof(h->name));
2357             if (namelen  &&  h->name[namelen - 1] == '/') {
2358                 m_Current.m_Type = CTarEntryInfo::eDir;
2359                 m_Current.m_Stat.orig.st_size = 0;
2360                 break;
2361             }
2362         }
2363         m_Current.m_Type = CTarEntryInfo::eFile;
2364         break;
2365     case '\1':
2366     case '\2':
2367     case '1':
2368     case '2':
2369         m_Current.m_Type = (h->typeflag[0] == '\2'  ||  h->typeflag[0] == '2'
2370                             ? CTarEntryInfo::eSymLink
2371                             : CTarEntryInfo::eHardLink);
2372         m_Current.m_LinkName.assign(h->linkname,
2373                                     s_Length(h->linkname,sizeof(h->linkname)));
2374         if (m_Current.GetSize()) {
2375             if (m_Current.GetType() == CTarEntryInfo::eSymLink) {
2376                 // Mandatory to ignore
2377                 m_Current.m_Stat.orig.st_size = 0;
2378             } else if (fmt != eTar_Posix) {
2379                 TAR_POST(77, Warning,
2380                          "Non-zero hard-link size ("
2381                          + NStr::NumericToString(m_Current.GetSize())
2382                          + ") is ignored (non-PAX)");
2383                 m_Current.m_Stat.orig.st_size = 0;
2384             } // else POSIX (re-)allowed hard links to be followed by file data
2385         }
2386         break;
2387     case '3':
2388     case '4':
2389         m_Current.m_Type = (h->typeflag[0] == '3'
2390                             ? CTarEntryInfo::eCharDev
2391                             : CTarEntryInfo::eBlockDev);
2392         if (!s_OctalToNum(val, h->devminor, sizeof(h->devminor))) {
2393             TAR_THROW_EX(this, eUnsupportedTarFormat,
2394                          "Bad device minor number", h, fmt);
2395         }
2396         usum = (unsigned int) val;  // set aside
2397         if (!s_OctalToNum(val, h->devmajor, sizeof(h->devmajor))) {
2398             TAR_THROW_EX(this, eUnsupportedTarFormat,
2399                          "Bad device major number", h, fmt);
2400         }
2401 #ifdef makedev
2402         m_Current.m_Stat.orig.st_rdev = makedev((unsigned int) val, usum);
2403 #else
2404         if (sizeof(int) >= 4  &&  sizeof(m_Current.m_Stat.orig.st_rdev) >= 4) {
2405             *((unsigned int*) &m_Current.m_Stat.orig.st_rdev) =
2406                 (unsigned int)((val << 16) | usum);
2407         }
2408 #endif //makedev
2409         m_Current.m_Stat.orig.st_size = 0;
2410         break;
2411     case '5':
2412         m_Current.m_Type = CTarEntryInfo::eDir;
2413         m_Current.m_Stat.orig.st_size = 0;
2414         break;
2415     case '6':
2416         m_Current.m_Type = CTarEntryInfo::ePipe;
2417         m_Current.m_Stat.orig.st_size = 0;
2418         break;
2419     case '7':
2420         ERR_POST_ONCE(Critical << "CAUTION:"
2421                       " *** Contiguous TAR entries processed as regular files"
2422                       " ***");
2423         m_Current.m_Type = CTarEntryInfo::eFile;
2424         break;
2425     case 'K':
2426     case 'L':
2427     case 'S':
2428     case 'x':
2429     case 'X':
2430         if ((tflag == 'X'  &&  (fmt & eTar_Ustar))  ||
2431             (tflag != 'X'  &&  fmt == eTar_OldGNU)  ||
2432             (tflag == 'S'  &&  fmt == eTar_Star)) {
2433             // Assign actual type
2434             switch (tflag) {
2435             case 'K':
2436                 m_Current.m_Type = CTarEntryInfo::eGNULongLink;
2437                 break;
2438             case 'L':
2439                 m_Current.m_Type = CTarEntryInfo::eGNULongName;
2440                 break;
2441             case 'S':
2442                 m_Current.m_Type = CTarEntryInfo::eSparseFile;
2443                 break;
2444             case 'X':
2445                 if (pax) {
2446                     TAR_POST(78, Warning,
2447                              "Repetitious PAX headers,"
2448                              " archive may be corrupt");
2449                 }
2450                 fmt = eTar_Posix;  // upgrade
2451                 m_Current.m_Type = CTarEntryInfo::ePAXHeader;
2452                 break;
2453             default:
2454                 _TROUBLE;
2455                 break;
2456             }
2457 
2458             // Dump header
2459             size_t hsize = (size_t) m_Current.GetSize();
2460             if (dump) {
2461                 s_Dump(m_FileName, m_StreamPos, m_BufferSize,
2462                        m_Current.GetName(), h, fmt, hsize);
2463             }
2464             m_StreamPos += BLOCK_SIZE;  // NB: nread
2465 
2466             if (m_Current.m_Type == CTarEntryInfo::eSparseFile) {
2467                 const char* realsize = fmt != eTar_Star
2468                     ?        h->gnu.realsize  : h->star.prefix + 107;
2469                 size_t   realsizelen = fmt != eTar_Star
2470                     ? sizeof(h->gnu.realsize) : 12;
2471                 // Real file size (if present)
2472                 if (!s_DecodeUint8(val, realsize, realsizelen)) {
2473                     val = 0;
2474                 }
2475                 if (fmt == eTar_Star) {
2476                     // Archive file size includes sparse map, and already valid
2477                     m_Current.m_Pos = val;  // NB: real (expanded) file size
2478                     return eSuccess;
2479                 }
2480                 // Skip all GNU sparse file headers (they are not counted
2481                 // towards the sparse file size in the archive ("hsize")!)
2482                 const char* contind = h->gnu.contind;
2483                 while (*contind) {
2484                     _ASSERT(nread == BLOCK_SIZE);
2485                     if (!(block = (const TBlock*) x_ReadArchive(nread))
2486                         ||  nread != BLOCK_SIZE) {
2487                         TAR_THROW(this, eRead,
2488                                   "Unexpected EOF in GNU sparse file map"
2489                                   " extended header");
2490                     }
2491                     h = &block->header;
2492                     contind = block->buffer + (24 * 21)/*504*/;
2493                     if (dump) {
2494                         s_DumpSparse(m_FileName, m_StreamPos, m_BufferSize,
2495                                      m_Current.GetName(), h, contind, hsize);
2496                     }
2497                     m_Current.m_HeaderSize += BLOCK_SIZE;
2498                     m_StreamPos            += BLOCK_SIZE;  // NB: nread
2499                 }
2500                 m_Current.m_Pos = val;  // NB: real (expanded) file size
2501                 return eSuccess;
2502             }
2503 
2504             // Read in the extended header information
2505             val = ALIGN_SIZE(hsize);
2506             string data;
2507             while (hsize) {
2508                 nread = hsize;
2509                 const char* xbuf = x_ReadArchive(nread);
2510                 if (!xbuf) {
2511                     TAR_THROW(this, eRead,
2512                               string("Unexpected EOF in ") +
2513                               (m_Current.GetType()
2514                                == CTarEntryInfo::ePAXHeader
2515                                ? "PAX data" :
2516                                m_Current.GetType()
2517                                == CTarEntryInfo::eGNULongName
2518                                ? "long name"
2519                                : "long link"));
2520                 }
2521                 _ASSERT(nread);
2522                 data.append(xbuf, nread);
2523                 hsize       -=            nread;
2524                 m_StreamPos += ALIGN_SIZE(nread);
2525             }
2526             if (m_Current.GetType() != CTarEntryInfo::ePAXHeader) {
2527                 // Make sure there's no embedded '\0'(s)
2528                 data.resize(strlen(data.c_str()));
2529             }
2530             if (dump) {
2531                 EDiagSev level = SetDiagPostLevel(eDiag_Info);
2532                 ERR_POST(Info << '\n' + s_PositionAsString(m_FileName,
2533                                                            m_StreamPos - val,
2534                                                            m_BufferSize,
2535                                                            m_Current.GetName())
2536                          + (m_Current.GetType() == CTarEntryInfo::ePAXHeader
2537                             ? "PAX data:\n" :
2538                             m_Current.GetType() == CTarEntryInfo::eGNULongName
2539                             ? "Long name:          \""
2540                             : "Long link name:     \"")
2541                          + NStr::PrintableString(data,
2542                                                  m_Current.GetType()
2543                                                  == CTarEntryInfo::ePAXHeader
2544                                                  ? NStr::fNewLine_Passthru
2545                                                  : NStr::fNewLine_Quote)
2546                          + (m_Current.GetType() == CTarEntryInfo::ePAXHeader
2547                             ? data.size()  &&  data[data.size() - 1] == '\n'
2548                             ? kEmptyStr : "\n" : "\"\n"));
2549                 SetDiagPostLevel(level);
2550             }
2551             // Reset size because the data blocks have been all read
2552             m_Current.m_HeaderSize += val;
2553             m_Current.m_Stat.orig.st_size = 0;
2554             if (!val  ||  !data.size()) {
2555                 TAR_POST(79, Error,
2556                          "Skipping " + string(val ? "empty" : "zero-sized")
2557                          + " extended header data");
2558                 return eFailure;
2559             }
2560             switch (m_Current.GetType()) {
2561             case CTarEntryInfo::ePAXHeader:
2562                 return x_ParsePAXData(data);
2563             case CTarEntryInfo::eGNULongName:
2564                 m_Current.m_Name.swap(data);
2565                 return eContinue;
2566             case CTarEntryInfo::eGNULongLink:
2567                 m_Current.m_LinkName.swap(data);
2568                 return eContinue;
2569             default:
2570                 _TROUBLE;
2571                 break;
2572             }
2573             return eFailure;
2574         }
2575         /*FALLTHRU*/
2576     case 'V':
2577     case 'I':
2578         if (h->typeflag[0] == 'V'  ||  h->typeflag[0] == 'I') {
2579             // Safety for no data to actually follow
2580             m_Current.m_Stat.orig.st_size = 0;
2581             if (h->typeflag[0] == 'V') {
2582                 m_Current.m_Type = CTarEntryInfo::eVolHeader;
2583                 break;
2584             }
2585         }
2586         /*FALLTHRU*/
2587     default:
2588         m_Current.m_Type = CTarEntryInfo::eUnknown;
2589         break;
2590     }
2591 
2592     if (dump) {
2593         s_Dump(m_FileName, m_StreamPos, m_BufferSize,
2594                m_Current.GetName(), h, fmt, m_Current.GetSize());
2595     }
2596     m_StreamPos += BLOCK_SIZE;  // NB: nread
2597 
2598     return eSuccess;
2599 }
2600 
2601 
sx_Signature(TBlock * block)2602 static inline void sx_Signature(TBlock* block)
2603 {
2604     _ASSERT(sizeof(block->header) + 4 < sizeof(block->buffer));
2605     memcpy(block->buffer + sizeof(*block) - 4, "NCBI", 4);
2606 }
2607 
2608 
x_WriteEntryInfo(const string & name)2609 void CTar::x_WriteEntryInfo(const string& name)
2610 {
2611     // Prepare block info
2612     TBlock block;
2613     _ASSERT(sizeof(block) == BLOCK_SIZE/*== sizeof(block.buffer)*/);
2614     memset(block.buffer, 0, sizeof(block.buffer));
2615     SHeader* h = &block.header;
2616 
2617     // Name(s) ('\0'-terminated if fit entirely, otherwise not)
2618     if (!x_PackCurrentName(h, false)) {
2619         TAR_THROW(this, eNameTooLong,
2620                   "Name '" + m_Current.GetName()
2621                   + "' too long in entry '" + name + '\'');
2622     }
2623 
2624     CTarEntryInfo::EType type = m_Current.GetType();
2625 
2626     if (type == CTarEntryInfo::eSymLink  &&  !x_PackCurrentName(h, true)) {
2627         TAR_THROW(this, eNameTooLong,
2628                   "Link '" + m_Current.GetLinkName()
2629                   + "' too long in entry '" + name + '\'');
2630     }
2631 
2632     /* NOTE:  Although some sources on the Internet indicate that all but size,
2633      * mtime, and version numeric fields are '\0'-terminated, we could not
2634      * confirm that with existing tar programs, all of which we saw using
2635      * either '\0' or ' '-terminated values in both size and mtime fields.
2636      * For the ustar archive we have found a document that definitively tells
2637      * that _all_ numeric fields are '\0'-terminated, and that they can keep
2638      * up to "sizeof(field)-1" octal digits.  We follow it here.
2639      * However, GNU and ustar checksums seem to be different indeed, so we
2640      * don't use a trailing space for ustar, but for GNU only.
2641      */
2642 
2643     // Mode
2644     if (!s_NumToOctal(m_Current.GetMode(), h->mode, sizeof(h->mode) - 1)) {
2645         TAR_THROW(this, eMemory,
2646                   "Cannot store file mode");
2647     }
2648 
2649     // Update format as we go
2650     ETar_Format fmt = eTar_Ustar;
2651     int ok;
2652 
2653     // User ID
2654     ok = s_EncodeUint8(m_Current.GetUserId(), h->uid, sizeof(h->uid) - 1);
2655     if (!ok) {
2656         TAR_THROW(this, eMemory,
2657                   "Cannot store user ID");
2658     }
2659     if (ok < 0) {
2660         fmt = eTar_OldGNU;
2661     }
2662 
2663     // Group ID
2664     ok = s_EncodeUint8(m_Current.GetGroupId(), h->gid, sizeof(h->gid) - 1);
2665     if (!ok) {
2666         TAR_THROW(this, eMemory,
2667                   "Cannot store group ID");
2668     }
2669     if (ok < 0) {
2670         fmt = eTar_OldGNU;
2671     }
2672 
2673     // Size
2674     _ASSERT(type == CTarEntryInfo::eFile  ||  m_Current.GetSize() == 0);
2675     ok = s_EncodeUint8(m_Current.GetSize(), h->size, sizeof(h->size) - 1);
2676     if (!ok) {
2677         TAR_THROW(this, eMemory,
2678                   "Cannot store file size");
2679     }
2680     if (ok < 0) {
2681         fmt = eTar_OldGNU;
2682     }
2683 
2684     if (fmt != eTar_Ustar  &&  h->prefix[0]) {
2685         // Cannot downgrade to reflect encoding
2686         fmt  = eTar_Ustar;
2687     }
2688 
2689     // Modification time
2690     if (!s_NumToOctal(m_Current.GetModificationTime(),
2691                       h->mtime, sizeof(h->mtime) - 1)) {
2692         TAR_THROW(this, eMemory,
2693                   "Cannot store modification time");
2694     }
2695 
2696     bool device = false;
2697     // Type (GNU extension for SymLink)
2698     switch (type) {
2699     case CTarEntryInfo::eFile:
2700         h->typeflag[0] = '0';
2701         break;
2702     case CTarEntryInfo::eSymLink:
2703         h->typeflag[0] = '2';
2704         break;
2705     case CTarEntryInfo::eCharDev:
2706     case CTarEntryInfo::eBlockDev:
2707         h->typeflag[0] = type == CTarEntryInfo::eCharDev ? '3' : '4';
2708         if (!s_NumToOctal(m_Current.GetMajor(),
2709                           h->devmajor, sizeof(h->devmajor) - 1)) {
2710             TAR_THROW(this, eMemory,
2711                       "Cannot store major number");
2712         }
2713         if (!s_NumToOctal(m_Current.GetMinor(),
2714                           h->devminor, sizeof(h->devminor) - 1)) {
2715             TAR_THROW(this, eMemory,
2716                       "Cannot store minor number");
2717         }
2718         device = true;
2719         break;
2720     case CTarEntryInfo::eDir:
2721         h->typeflag[0] = '5';
2722         break;
2723     case CTarEntryInfo::ePipe:
2724         h->typeflag[0] = '6';
2725         break;
2726     default:
2727         _TROUBLE;
2728         TAR_THROW(this, eUnsupportedEntryType,
2729                   "Do not know how to archive entry '" + name
2730                   + "' of type #" + NStr::IntToString(int(type))
2731                   + ": Internal error");
2732         /*NOTREACHED*/
2733         break;
2734     }
2735 
2736     // User and group
2737     const string& usr = m_Current.GetUserName();
2738     size_t len = usr.size();
2739     if (len < sizeof(h->uname)) {
2740         memcpy(h->uname, usr.c_str(), len);
2741     }
2742     const string& grp = m_Current.GetGroupName();
2743     len = grp.size();
2744     if (len < sizeof(h->gname)) {
2745         memcpy(h->gname, grp.c_str(), len);
2746     }
2747 
2748     // Device numbers to complete the ustar header protocol (all fields ok)
2749     if (!device  &&  fmt != eTar_OldGNU) {
2750         s_NumToOctal(0, h->devmajor, sizeof(h->devmajor) - 1);
2751         s_NumToOctal(0, h->devminor, sizeof(h->devminor) - 1);
2752     }
2753 
2754     if (fmt != eTar_OldGNU) {
2755         // Magic
2756         strcpy(h->magic,   "ustar");
2757         // Version (EXCEPTION:  not '\0' terminated)
2758         memcpy(h->version, "00", 2);
2759     } else {
2760         // NB: Old GNU magic protrudes into adjacent version field
2761         memcpy(h->magic,   "ustar  ", 8);  // 2 spaces and '\0'-terminated
2762     }
2763 
2764     // NCBI signature if allowed
2765     if (!(m_Flags & fStandardHeaderOnly)) {
2766         sx_Signature(&block);
2767     }
2768 
2769     // Final step: checksumming
2770     if (!s_TarChecksum(&block, fmt == eTar_OldGNU ? true : false)) {
2771         TAR_THROW(this, eMemory,
2772                   "Cannot store checksum");
2773     }
2774 
2775     // Write header
2776     x_WriteArchive(sizeof(block.buffer), block.buffer);
2777     m_Current.m_HeaderSize = (streamsize)(m_StreamPos - m_Current.m_Pos);
2778 
2779     Checkpoint(m_Current, true/*write*/);
2780 }
2781 
2782 
x_PackCurrentName(STarHeader * h,bool link)2783 bool CTar::x_PackCurrentName(STarHeader* h, bool link)
2784 {
2785     const string& name = link ? m_Current.GetLinkName() : m_Current.GetName();
2786     size_t        size = link ? sizeof(h->linkname)     : sizeof(h->name);
2787     char*          dst = link ? h->linkname             : h->name;
2788     const char*    src = name.c_str();
2789     size_t         len = name.size();
2790 
2791     if (len <= size) {
2792         // Name fits!
2793         memcpy(dst, src, len);
2794         return true;
2795     }
2796 
2797     bool packed = false;
2798     if (!link  &&  len <= sizeof(h->prefix) + 1 + sizeof(h->name)) {
2799         // Try to split the long name into a prefix and a short name (POSIX)
2800         size_t i = len;
2801         if (i > sizeof(h->prefix)) {
2802             i = sizeof(h->prefix);
2803         }
2804         while (i > 0  &&  src[--i] != '/');
2805         if (i  &&  len - i <= sizeof(h->name) + 1) {
2806             memcpy(h->prefix, src,         i);
2807             memcpy(h->name,   src + i + 1, len - i - 1);
2808             if (!(m_Flags & fLongNameSupplement))
2809                 return true;
2810             packed = true;
2811         }
2812     }
2813 
2814     // Still, store the initial part in the original header
2815     if (!packed) {
2816         memcpy(dst, src, size);
2817     }
2818 
2819     // Prepare extended block header with the long name info (old GNU style)
2820     _ASSERT(!OFFSET_OF(m_BufferPos)  &&  m_BufferPos < m_BufferSize);
2821     TBlock* block = (TBlock*)(m_Buffer + m_BufferPos);
2822     memset(block->buffer, 0, sizeof(block->buffer));
2823     h = &block->header;
2824 
2825     // See above for comments about header filling
2826     ++len;  // write terminating '\0' as it can always be made to fit in
2827     strcpy(h->name, "././@LongLink");
2828     s_NumToOctal(0,         h->mode,  sizeof(h->mode) - 1);
2829     s_NumToOctal(0,         h->uid,   sizeof(h->uid)  - 1);
2830     s_NumToOctal(0,         h->gid,   sizeof(h->gid)  - 1);
2831     if (!s_EncodeUint8(len, h->size,  sizeof(h->size) - 1)) {
2832         return false;
2833     }
2834     s_NumToOctal(0,         h->mtime, sizeof(h->mtime)- 1);
2835     h->typeflag[0] = link ? 'K' : 'L';
2836 
2837     // Old GNU magic protrudes into adjacent version field
2838     memcpy(h->magic, "ustar  ", 8);  // 2 spaces and '\0'-terminated
2839 
2840     // NCBI signature if allowed
2841     if (!(m_Flags & fStandardHeaderOnly)) {
2842         sx_Signature(block);
2843     }
2844 
2845     s_TarChecksum(block, true);
2846 
2847     // Write the header
2848     x_WriteArchive(sizeof(block->buffer));
2849 
2850     // Store the full name in the extended block (will be aligned as necessary)
2851     x_WriteArchive(len, src);
2852 
2853     return true;
2854 }
2855 
2856 
x_Backspace(EAction action)2857 void CTar::x_Backspace(EAction action)
2858 {
2859     _ASSERT(SIZE_OF(m_ZeroBlockCount) <= m_StreamPos);
2860     _ASSERT(!OFFSET_OF(m_StreamPos));
2861     m_Current.m_Name.erase();
2862     if (!m_ZeroBlockCount) {
2863         return;
2864     }
2865 
2866     size_t gap = SIZE_OF(m_ZeroBlockCount);
2867     if (!m_FileStream) {
2868         if (gap > m_BufferPos) {
2869             if (action == eAppend  ||  action == eUpdate) {
2870                 TAR_POST(4, Warning,
2871                          "In-stream update may result in gapped tar archive");
2872             }
2873             gap = m_BufferPos;
2874             m_ZeroBlockCount -= BLOCK_OF(gap);
2875         }
2876         m_BufferPos -= gap;
2877         m_StreamPos -= gap;
2878         return;
2879     }
2880 
2881     // Tarfile here
2882     m_StreamPos -= gap;
2883     CT_POS_TYPE rec  = (CT_OFF_TYPE)(m_StreamPos / m_BufferSize);
2884     size_t      off  = (size_t)     (m_StreamPos % m_BufferSize);
2885     if (m_BufferPos == 0) {
2886         m_BufferPos += m_BufferSize;
2887     }
2888     if (gap > m_BufferPos) {
2889         m_BufferPos  = 0;
2890         size_t temp  = BLOCK_SIZE;
2891         // Re-fetch the entire record
2892         if (!m_FileStream->seekg(rec * m_BufferSize)
2893             // NB: successful positioning guarantees the stream was !fail(),
2894             // which means it might have only been either good() or eof()
2895             ||  (m_FileStream->clear(), !x_ReadArchive(temp))
2896             ||  temp != BLOCK_SIZE) {
2897             TAR_POST(65, Error,
2898                      "Archive backspace error in record reget");
2899             s_SetStateSafe(m_Stream, NcbiBadbit);
2900             return;
2901         }
2902         m_BufferPos  = off;
2903     } else {
2904         m_BufferPos -= gap;
2905     }
2906     _ASSERT(!OFFSET_OF(m_BufferPos)  &&  m_BufferPos < m_BufferSize);
2907 
2908     // Always reset the put position there
2909 #if defined(_LIBCPP_VERSION)  &&  _LIBCPP_VERSION <= 1101
2910     m_FileStream->clear();  // This is to only work around a bug
2911 #endif //_LIBCPP_VERSION
2912     if (!m_FileStream->seekp(rec * m_BufferSize)) {
2913         TAR_POST(80, Error,
2914                  "Archive backspace error in record reset");
2915         s_SetStateSafe(m_Stream, NcbiBadbit);
2916         return;
2917     }
2918     m_ZeroBlockCount = 0;
2919 }
2920 
2921 
s_MatchExcludeMask(const CTempString & name,const list<CTempString> & elems,const CMask * mask,NStr::ECase acase)2922 static bool s_MatchExcludeMask(const CTempString&       name,
2923                                const list<CTempString>& elems,
2924                                const CMask*             mask,
2925                                NStr::ECase              acase)
2926 {
2927     _ASSERT(!name.empty()  &&  mask);
2928     if (elems.empty()) {
2929         return mask->Match(name, acase);
2930     }
2931     if (elems.size() == 1) {
2932         return mask->Match(elems.front(), acase);
2933     }
2934     string temp;
2935     REVERSE_ITERATE(list<CTempString>, e, elems) {
2936         temp = temp.empty() ? string(*e) : string(*e) + '/' + temp;
2937         if (mask->Match(temp, acase)) {
2938             return true;
2939         }
2940     }
2941     return false;
2942 }
2943 
2944 
x_ReadAndProcess(EAction action)2945 unique_ptr<CTar::TEntries> CTar::x_ReadAndProcess(EAction action)
2946 {
2947     unique_ptr<TEntries> done(new TEntries);
2948     _ASSERT(!OFFSET_OF(m_StreamPos));
2949     Uint8 pos = m_StreamPos;
2950     CTarEntryInfo xinfo;
2951 
2952     m_ZeroBlockCount = 0;
2953     for (;;) {
2954         // Next block is supposed to be a header
2955         m_Current = CTarEntryInfo(pos);
2956         m_Current.m_Name = xinfo.GetName();
2957         EStatus status = x_ReadEntryInfo
2958             (action == eTest  &&  (m_Flags & fDumpEntryHeaders),
2959              xinfo.GetType() == CTarEntryInfo::ePAXHeader);
2960         switch (status) {
2961         case eFailure:
2962         case eSuccess:
2963         case eContinue:
2964             if (m_ZeroBlockCount  &&  !(m_Flags & fIgnoreZeroBlocks)) {
2965                 Uint8 save_pos = m_StreamPos;
2966                 m_StreamPos   -= xinfo.m_HeaderSize + m_Current.m_HeaderSize;
2967                 m_StreamPos   -= SIZE_OF(m_ZeroBlockCount);
2968                 TAR_POST(5, Error,
2969                          "Interspersing zero block ignored");
2970                 m_StreamPos    = save_pos;
2971             }
2972             break;
2973 
2974         case eZeroBlock:
2975             m_ZeroBlockCount++;
2976             if (action == eTest  &&  (m_Flags & fDumpEntryHeaders)) {
2977                 s_DumpZero(m_FileName, m_StreamPos - BLOCK_SIZE, m_BufferSize,
2978                            m_ZeroBlockCount);
2979             }
2980             if ((m_Flags & fIgnoreZeroBlocks)  ||  m_ZeroBlockCount < 2) {
2981                 if (xinfo.GetType() == CTarEntryInfo::eUnknown) {
2982                     // Not yet reading an entry -- advance
2983                     pos += BLOCK_SIZE;
2984                 }
2985                 continue;
2986             }
2987             // Two zero blocks -> eEOF
2988             /*FALLTHRU*/
2989 
2990         case eEOF:
2991             if (action == eTest  &&  (m_Flags & fDumpEntryHeaders)) {
2992                 s_DumpZero(m_FileName, m_StreamPos, m_BufferSize, 0,
2993                            status != eEOF ? true : false);
2994             }
2995             if (xinfo.GetType() != CTarEntryInfo::eUnknown) {
2996                 TAR_POST(6, Error,
2997                          "Orphaned extended information ignored");
2998             } else if (m_ZeroBlockCount < 2  &&  action != eAppend) {
2999                 if (!m_StreamPos) {
3000                     TAR_THROW(this, eRead,
3001                               "Unexpected EOF in archive");
3002                 }
3003                 TAR_POST(58, Warning,
3004                          m_ZeroBlockCount
3005                          ? "Incomplete EOT in archive"
3006                          : "Missing EOT in archive");
3007             }
3008             x_Backspace(action);
3009             return done;
3010         }
3011         m_ZeroBlockCount = 0;
3012 
3013         //
3014         // Process entry
3015         //
3016         if (status == eContinue) {
3017             // Extended header information has just been read in
3018             xinfo.m_HeaderSize += m_Current.m_HeaderSize;
3019 
3020             switch (m_Current.GetType()) {
3021             case CTarEntryInfo::ePAXHeader:
3022                 xinfo.m_Pos = m_Current.m_Pos;  // NB: real (expanded) filesize
3023                 m_Current.m_Pos = pos;
3024                 if (xinfo.GetType() != CTarEntryInfo::eUnknown) {
3025                     TAR_POST(7, Error,
3026                              "Unused extended header replaced");
3027                 }
3028                 xinfo.m_Type = CTarEntryInfo::ePAXHeader;
3029                 xinfo.m_Name.swap(m_Current.m_Name);
3030                 xinfo.m_LinkName.swap(m_Current.m_LinkName);
3031                 xinfo.m_UserName.swap(m_Current.m_UserName);
3032                 xinfo.m_GroupName.swap(m_Current.m_GroupName);
3033                 xinfo.m_Stat = m_Current.m_Stat;
3034                 continue;
3035 
3036             case CTarEntryInfo::eGNULongName:
3037                 if (xinfo.GetType() == CTarEntryInfo::ePAXHeader
3038                     ||  !xinfo.GetName().empty()) {
3039                     TAR_POST(8, Error,
3040                              "Unused long name \"" + xinfo.GetName()
3041                              + "\" replaced");
3042                 }
3043                 // Latch next long name here then just skip
3044                 xinfo.m_Type = CTarEntryInfo::eGNULongName;
3045                 xinfo.m_Name.swap(m_Current.m_Name);
3046                 continue;
3047 
3048             case CTarEntryInfo::eGNULongLink:
3049                 if (xinfo.GetType() == CTarEntryInfo::ePAXHeader
3050                     ||  !xinfo.GetLinkName().empty()) {
3051                     TAR_POST(9, Error,
3052                              "Unused long link \"" + xinfo.GetLinkName()
3053                              + "\" replaced");
3054                 }
3055                 // Latch next long link here then just skip
3056                 xinfo.m_Type = CTarEntryInfo::eGNULongLink;
3057                 xinfo.m_LinkName.swap(m_Current.m_LinkName);
3058                 continue;
3059 
3060             default:
3061                 _TROUBLE;
3062                 NCBI_THROW(CCoreException, eCore, "Internal error");
3063                 /*NOTREACHED*/
3064                 break;
3065             }
3066         }
3067 
3068         // Fixup current 'info' with extended information obtained previously
3069         m_Current.m_HeaderSize += xinfo.m_HeaderSize;
3070         xinfo.m_HeaderSize = 0;
3071         if (!xinfo.GetName().empty()) {
3072             xinfo.m_Name.swap(m_Current.m_Name);
3073             xinfo.m_Name.erase();
3074         }
3075         if (!xinfo.GetLinkName().empty()) {
3076             xinfo.m_LinkName.swap(m_Current.m_LinkName);
3077             xinfo.m_LinkName.erase();
3078         }
3079         TPAXBits parsed;
3080         if (xinfo.GetType() == CTarEntryInfo::ePAXHeader) {
3081             parsed = (TPAXBits) xinfo.m_Stat.orig.st_mode;
3082             if (!xinfo.GetUserName().empty()) {
3083                 xinfo.m_UserName.swap(m_Current.m_UserName);
3084                 xinfo.m_UserName.erase();
3085             }
3086             if (!xinfo.GetGroupName().empty()) {
3087                 xinfo.m_GroupName.swap(m_Current.m_GroupName);
3088                 xinfo.m_GroupName.erase();
3089             }
3090             if (parsed & fPAXMtime) {
3091                 m_Current.m_Stat.orig.st_mtime = xinfo.m_Stat.orig.st_mtime;
3092                 m_Current.m_Stat.mtime_nsec    = xinfo.m_Stat.mtime_nsec;
3093             }
3094             if (parsed & fPAXAtime) {
3095                 m_Current.m_Stat.orig.st_atime = xinfo.m_Stat.orig.st_atime;
3096                 m_Current.m_Stat.atime_nsec    = xinfo.m_Stat.atime_nsec;
3097             }
3098             if (parsed & fPAXCtime) {
3099                 m_Current.m_Stat.orig.st_ctime = xinfo.m_Stat.orig.st_ctime;
3100                 m_Current.m_Stat.ctime_nsec    = xinfo.m_Stat.ctime_nsec;
3101             }
3102             if (parsed & fPAXSparse) {
3103                 // Mark to post-process below
3104                 xinfo.m_Type = CTarEntryInfo::eSparseFile;
3105             }
3106             if (parsed & fPAXSize) {
3107                 m_Current.m_Stat.orig.st_size = xinfo.m_Stat.orig.st_size;
3108             }
3109             if (parsed & fPAXUid) {
3110                 m_Current.m_Stat.orig.st_uid = xinfo.m_Stat.orig.st_uid;
3111             }
3112             if (parsed & fPAXGid) {
3113                 m_Current.m_Stat.orig.st_gid = xinfo.m_Stat.orig.st_gid;
3114             }
3115         } else {
3116             parsed = fPAXNone/*0*/;
3117         }
3118         if (m_Current.GetType() == CTarEntryInfo::eSparseFile) {
3119             xinfo.m_Type = CTarEntryInfo::eSparseFile;
3120             if (xinfo.m_Pos < m_Current.m_Pos) {
3121                 xinfo.m_Pos = m_Current.m_Pos;  // NB: real (expanded) filesize
3122             }
3123             m_Current.m_Pos = pos;
3124         }
3125         Uint8 size = m_Current.GetSize();  // NB: archive size to read
3126         if (xinfo.GetType() == CTarEntryInfo::eSparseFile) {
3127             if (m_Current.GetType() != CTarEntryInfo::eFile  &&
3128                 m_Current.GetType() != CTarEntryInfo::eSparseFile) {
3129                 TAR_POST(103, Error,
3130                          "Ignoring sparse data for non-plain file");
3131             } else if (parsed & fPAXSparseGNU_1_0) {
3132                 m_Current.m_Stat.orig.st_size = size ? (off_t) xinfo.m_Pos : 0;
3133                 m_Current.m_Type = CTarEntryInfo::eSparseFile;
3134             } else {
3135                 m_Current.m_Type = CTarEntryInfo::eUnknown;
3136                 if (size < xinfo.m_Pos) {
3137                     m_Current.m_Stat.orig.st_size = (off_t) xinfo.m_Pos;
3138                 }
3139             }
3140         }
3141         xinfo.m_Pos = 0;
3142         xinfo.m_Type = CTarEntryInfo::eUnknown;
3143         _ASSERT(status == eFailure  ||  status == eSuccess);
3144 
3145         // Last sanity check
3146         if (status != eFailure  &&  m_Current.GetName().empty()) {
3147             TAR_THROW(this, eBadName,
3148                       "Empty entry name in archive");
3149         }
3150         // User callback
3151         if (!Checkpoint(m_Current, false/*read*/)) {
3152             status = eFailure;
3153         }
3154 
3155         // Match file name with the set of masks
3156         bool match = (status != eSuccess ? false
3157                       : m_Mask[eExtractMask].mask  &&  (action == eList     ||
3158                                                         action == eExtract  ||
3159                                                         action == eInternal)
3160                       ? m_Mask[eExtractMask].mask->Match(m_Current.GetName(),
3161                                                          m_Mask[eExtractMask]
3162                                                          .acase)
3163                       : true);
3164         if (match  &&  m_Mask[eExcludeMask].mask  &&  action != eTest) {
3165             list<CTempString> elems;
3166             _ASSERT(!m_Current.GetName().empty());
3167             NStr::Split(m_Current.GetName(), "/", elems,
3168                         NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
3169             match = !s_MatchExcludeMask(m_Current.GetName(), elems,
3170                                         m_Mask[eExcludeMask].mask,
3171                                         m_Mask[eExcludeMask].acase);
3172         }
3173 
3174         // NB: match is 'false' when processing a failing entry
3175         if ((match  &&  action == eInternal)
3176             ||  x_ProcessEntry(match  &&  action == eExtract ? eExtract :
3177                                action == eTest ? eTest : eUndefined,
3178                                size, done.get())
3179             ||  (match  &&  (action == eList  ||  action == eUpdate))) {
3180             _ASSERT(status == eSuccess  &&  action != eTest);
3181             done->push_back(m_Current);
3182             if (action == eInternal) {
3183                 break;
3184             }
3185         }
3186 
3187         _ASSERT(!OFFSET_OF(m_StreamPos));
3188         pos = m_StreamPos;
3189     }
3190 
3191     return done;
3192 }
3193 
3194 
s_ToFilesystemPath(const string & base_dir,const string & name,bool noabs=false)3195 static string s_ToFilesystemPath(const string& base_dir, const string& name,
3196                                  bool noabs = false)
3197 {
3198     string path;
3199     _ASSERT(!name.empty());
3200     if (!base_dir.empty()  &&  (!CDirEntry::IsAbsolutePath(name)  ||  noabs)) {
3201         path = CDirEntry::ConcatPath(base_dir, name);
3202     } else {
3203         path = name;
3204         if (CDirEntry::IsAbsolutePath(path)  &&  noabs) {
3205 #ifdef NCBI_OS_MSWIN
3206             if (isalpha((unsigned char) path[0])  &&  path[1] == ':') {
3207                 // Drive
3208                 path.erase(0, 2);
3209             } else if ((path[0] == '/'  ||  path[0] == '\\')  &&
3210                        (path[1] == '/'  ||  path[1] == '\\')) {
3211                 // Network
3212                 path.erase(0, path.find_first_of("/\\", 2));
3213             }
3214 #endif //NCBI_OS_MSWIN
3215             if (path[0] == '/'  ||  path[0] == '\\') {
3216                 path.erase(0, 1);
3217             }
3218             if (path.empty()) {
3219                 path.assign(1, '.');
3220             }
3221         }
3222     }
3223     _ASSERT(!path.empty());
3224     return CDirEntry::NormalizePath(path);
3225 }
3226 
3227 
s_ToArchiveName(const string & base_dir,const string & path)3228 static string s_ToArchiveName(const string& base_dir, const string& path)
3229 {
3230     // NB: Path assumed to have been normalized
3231     string retval = CDirEntry::AddTrailingPathSeparator(path);
3232 
3233 #ifdef NCBI_OS_MSWIN
3234     // Convert to Unix format with forward slashes
3235     NStr::ReplaceInPlace(retval, "\\", "/");
3236     const NStr::ECase how = NStr::eNocase;
3237 #else
3238     const NStr::ECase how = NStr::eCase;
3239 #endif //NCBI_OS_MSWIN
3240 
3241     SIZE_TYPE pos = 0;
3242 
3243     bool absolute;
3244     // Remove leading base dir from the path
3245     if (!base_dir.empty()  &&  NStr::StartsWith(retval, base_dir, how)) {
3246         if (retval.size() > base_dir.size()) {
3247             retval.erase(0, base_dir.size()/*separator too*/);
3248         } else {
3249             retval.assign(1, '.');
3250         }
3251         absolute = false;
3252     } else {
3253         absolute = CDirEntry::IsAbsolutePath(retval);
3254 #ifdef NCBI_OS_MSWIN
3255         if (isalpha((unsigned char) retval[0])  &&  retval[1] == ':') {
3256             // Remove a disk name if present
3257             pos = 2;
3258         } else if (retval[0] == '/'  &&  retval[1] == '/') {
3259             // Network name if present
3260             pos = retval.find('/', 2);
3261             absolute = true;
3262         }
3263 #endif //NCBI_OS_MSWIN
3264     }
3265 
3266     // Remove any leading and trailing slashes
3267     while (pos < retval.size()  &&  retval[pos] == '/') {
3268         ++pos;
3269     }
3270     if (pos) {
3271         retval.erase(0, pos);
3272     }
3273     pos = retval.size();
3274     while (pos > 0  &&  retval[pos - 1] == '/') {
3275         --pos;
3276     }
3277     if (pos < retval.size()) {
3278         retval.erase(pos);
3279     }
3280 
3281     if (absolute) {
3282         retval.insert((SIZE_TYPE) 0, 1, '/');
3283     }
3284     return retval;
3285 }
3286 
3287 
3288 class CTarTempDirEntry : public CDirEntry
3289 {
3290 public:
CTarTempDirEntry(const CDirEntry & entry)3291     CTarTempDirEntry(const CDirEntry& entry)
3292         : CDirEntry(GetTmpNameEx(entry.GetDir(), "xNCBItArX")),
3293           m_Entry(entry), m_Pending(false), m_Activated(false)
3294     {
3295         _ASSERT(!Exists()  &&  m_Entry.GetType() != eDir);
3296         if (CDirEntry(m_Entry.GetPath()).Rename(GetPath())) {
3297             m_Activated = m_Pending = true;
3298             errno = 0;
3299         }
3300     }
3301 
~CTarTempDirEntry()3302     virtual ~CTarTempDirEntry()
3303     {
3304         if (m_Activated) {
3305             (void)(m_Pending ? Restore() : RemoveEntry());
3306         }
3307     }
3308 
Restore(void)3309     bool Restore(void)
3310     {
3311         m_Entry.Remove();
3312         errno = 0;
3313         bool renamed = Rename(m_Entry.GetPath());
3314         m_Activated = !renamed;
3315         m_Pending = false;
3316         return renamed;
3317     }
3318 
Release(void)3319     void Release(void)
3320     {
3321         m_Pending = false;
3322     }
3323 
3324 private:
3325     const CDirEntry& m_Entry;
3326     bool             m_Pending;
3327     bool             m_Activated;
3328 };
3329 
3330 
x_ProcessEntry(EAction action,Uint8 size,const CTar::TEntries * entries)3331 bool CTar::x_ProcessEntry(EAction action, Uint8 size,
3332                           const CTar::TEntries* entries)
3333 {
3334     CTarEntryInfo::EType type = m_Current.GetType();
3335     bool extract = action == eExtract;
3336 
3337     if (extract) {
3338         // Destination for extraction
3339         unique_ptr<CDirEntry> dst
3340             (CDirEntry::CreateObject(type == CTarEntryInfo::eSparseFile ?
3341                                      CDirEntry::eFile : CDirEntry::EType(type),
3342                                      s_ToFilesystemPath
3343                                      (m_BaseDir, m_Current.GetName(),
3344                                       !(m_Flags & fKeepAbsolutePath))));
3345         // Source for extraction
3346         unique_ptr<CDirEntry> src;
3347         // Direntry pending removal
3348         AutoPtr<CTarTempDirEntry> pending;
3349 
3350         // Dereference symlink if requested
3351         if (type != CTarEntryInfo::eSymLink  &&
3352             type != CTarEntryInfo::eHardLink  &&  (m_Flags & fFollowLinks)) {
3353             dst->DereferenceLink();
3354         }
3355 
3356         // Actual type in file system (if exists)
3357         CDirEntry::EType dst_type = dst->GetType();
3358 
3359         // Look if extraction is allowed (when the destination exists)
3360         if (dst_type != CDirEntry::eUnknown) {
3361             bool extracted = false; // check if ours (prev. revision extracted)
3362             if (entries) {
3363                 ITERATE(TEntries, e, *entries) {
3364                     if (e->GetName() == m_Current.GetName()  &&
3365                         e->GetType() == m_Current.GetType()) {
3366                         extracted = true;
3367                         break;
3368                     }
3369                 }
3370             }
3371             if (!extracted) {
3372                 // Can overwrite it?
3373                 if (!(m_Flags & fOverwrite)) {
3374                     // File already exists, and cannot be changed
3375                     extract = false;
3376                 }
3377                 // Can update?
3378                 else if ((m_Flags & fUpdate) == fUpdate  // NB: fOverwrite set
3379                          &&  (type == CTarEntryInfo::eDir  ||
3380                               // Make sure that dst is not newer than the entry
3381                               dst->IsNewer(m_Current.GetModificationCTime(),
3382                                            // NB: dst must exist
3383                                            CDirEntry::eIfAbsent_Throw))) {
3384                     extract = false;
3385                 }
3386                 // Have equal types?
3387                 else if (m_Flags & fEqualTypes) {
3388                     if (type == CTarEntryInfo::eHardLink) {
3389                         src.reset(new CDirEntry
3390                                   (s_ToFilesystemPath
3391                                    (m_BaseDir, m_Current.GetLinkName(),
3392                                     !(m_Flags & fKeepAbsolutePath))));
3393                         if (dst_type != src->GetType()) {
3394                             extract = false;
3395                         }
3396                     } else if (dst_type != CDirEntry::EType(type)) {
3397                         extract = false;
3398                     }
3399                 }
3400             }
3401             if (extract  &&  (type != CTarEntryInfo::eDir  ||
3402                               dst_type != CDirEntry::eDir)) {
3403                 if (!extracted  &&  (m_Flags & fBackup) == fBackup) {
3404                     // Need to backup the existing destination?
3405                     CDirEntry tmp(*dst);
3406                     if (!tmp.Backup(kEmptyStr, CDirEntry::eBackup_Rename)) {
3407                         int x_errno = CNcbiError::GetLast().Code();
3408                         TAR_THROW(this, eBackup,
3409                                   "Failed to backup '" + dst->GetPath() + '\''
3410                                   + s_OSReason(x_errno));
3411                     }
3412                 } else {
3413                     // Do removal safely until extraction is confirmed
3414                     pending.reset(new CTarTempDirEntry(*dst));
3415                     if (/*!pending->Exists()  ||*/  dst->Exists()) {
3416                         // Security concern:  do not attempt data extraction
3417                         // into special files etc, which can harm the system.
3418 #ifdef __GNUC__
3419                         int x_errno = errno ?: EEXIST;
3420 #else
3421                         int x_errno  = errno;
3422                         if (x_errno == 0) {
3423                             x_errno  = EEXIST;
3424                         }
3425 #endif //__GNUC__
3426                         extract = false;
3427                         TAR_THROW(this, eWrite,
3428                                   "Cannot extract '" + dst->GetPath() + '\''
3429                                   + s_OSReason(x_errno));
3430                     }
3431                 }
3432             }
3433         }
3434         if (extract) {
3435 #ifdef NCBI_OS_UNIX
3436             mode_t u;
3437             u = umask(022);
3438             umask(u & ~(S_IRUSR | S_IWUSR | S_IXUSR));
3439             try {
3440 #endif //NCBI_OS_UNIX
3441                 extract = x_ExtractEntry(size, dst.get(), src.get());
3442 #ifdef NCBI_OS_UNIX
3443             } catch (...) {
3444                 umask(u);
3445                 throw;
3446             }
3447             umask(u);
3448 #endif //NCBI_OS_UNIX
3449             if (pending) {
3450                 if (extract) {
3451                     pending->Release();
3452                 } else if (!pending->Restore()) {  // Undo delete
3453                     int x_errno = errno;
3454                     TAR_THROW(this, eWrite,
3455                               "Cannot restore '" + dst->GetPath()
3456                               + "' back in place" + s_OSReason(x_errno));
3457                 }
3458             }
3459         }
3460     } else if (m_Current.GetType() == CTarEntryInfo::eSparseFile  &&  size
3461                &&  action == eTest  &&  (m_Flags & fDumpEntryHeaders)) {
3462         unique_ptr<CDirEntry> dst
3463             (CDirEntry::CreateObject(CDirEntry::eFile,
3464                                      s_ToFilesystemPath
3465                                      (m_BaseDir, m_Current.GetName(),
3466                                       !(m_Flags & fKeepAbsolutePath))));
3467         (void) x_ExtractSparseFile(size, dst.get(), true);
3468     }
3469 
3470     x_Skip(BLOCK_OF(ALIGN_SIZE(size)));
3471 
3472     return extract;
3473 }
3474 
3475 
x_Skip(Uint8 blocks)3476 void CTar::x_Skip(Uint8 blocks)
3477 {
3478     _ASSERT(!OFFSET_OF(m_StreamPos));
3479     while (blocks) {
3480 #ifndef NCBI_COMPILER_WORKSHOP
3481         // RogueWave RTL is buggy in seeking pipes -- it clobbers
3482         // (discards) streambuf data instead of leaving it alone..
3483         if (!(m_Flags & (fSlowSkipWithRead | fStreamPipeThrough))
3484             &&  m_BufferPos == 0  &&  blocks >= BLOCK_OF(m_BufferSize)) {
3485             CT_OFF_TYPE fskip =
3486                 (CT_OFF_TYPE)(blocks / BLOCK_OF(m_BufferSize) * m_BufferSize);
3487             _ASSERT(ALIGN_SIZE(fskip) == fskip);
3488             if (m_Stream.rdbuf()->PUBSEEKOFF(fskip, IOS_BASE::cur)
3489                 != (CT_POS_TYPE)((CT_OFF_TYPE)(-1))) {
3490                 blocks      -= BLOCK_OF(fskip);
3491                 m_StreamPos +=          fskip;
3492                 continue;
3493             }
3494             if (m_FileStream) {
3495                 TAR_POST(2, Warning,
3496                          "Cannot fast skip in file archive,"
3497                          " reverting to slow skip");
3498             }
3499             m_Flags |= fSlowSkipWithRead;
3500         }
3501 #endif //NCBI_COMPILER_WORKSHOP
3502         size_t nskip = (blocks < BLOCK_OF(m_BufferSize)
3503                         ? (size_t) SIZE_OF(blocks)
3504                         : m_BufferSize);
3505         _ASSERT(ALIGN_SIZE(nskip) == nskip);
3506         if (!x_ReadArchive(nskip)) {
3507             TAR_THROW(this, eRead,
3508                       "Archive skip failed (EOF)");
3509         }
3510         _ASSERT(nskip);
3511         nskip        = ALIGN_SIZE(nskip);
3512         blocks      -= BLOCK_OF  (nskip);
3513         m_StreamPos +=            nskip;
3514     }
3515     _ASSERT(!OFFSET_OF(m_StreamPos));
3516 }
3517 
3518 
3519 // NB: Clobbers umask, must be restored after the call
x_ExtractEntry(Uint8 & size,const CDirEntry * dst,const CDirEntry * src)3520 bool CTar::x_ExtractEntry(Uint8& size, const CDirEntry* dst,
3521                           const CDirEntry* src)
3522 {
3523     CTarEntryInfo::EType type = m_Current.GetType();
3524     unique_ptr<CDirEntry> src_ptr;  // deleter
3525     bool extracted = true;  // assume best
3526 
3527     if (type == CTarEntryInfo::eUnknown  &&  !(m_Flags & fSkipUnsupported)) {
3528         // Conform to POSIX-mandated behavior to extract as files
3529         type = CTarEntryInfo::eFile;
3530     }
3531     switch (type) {
3532     case CTarEntryInfo::eSparseFile:  // NB: only PAX GNU/1.0 sparse file here
3533     case CTarEntryInfo::eHardLink:
3534     case CTarEntryInfo::eFile:
3535         {{
3536             _ASSERT(!dst->Exists());
3537             // Create base directory
3538             CDir dir(dst->GetDir());
3539             if (/*dir.GetPath() != "."  &&  */!dir.CreatePath()) {
3540                 int x_errno = errno;
3541                 TAR_THROW(this, eCreate,
3542                           "Cannot create directory '" + dir.GetPath() + '\''
3543                           + s_OSReason(x_errno));
3544             }
3545 
3546             if (type == CTarEntryInfo::eHardLink) {
3547                 if (!src) {
3548                     src_ptr.reset(new CDirEntry
3549                                   (s_ToFilesystemPath
3550                                    (m_BaseDir, m_Current.GetLinkName(),
3551                                     !(m_Flags & fKeepAbsolutePath))));
3552                     src = src_ptr.get();
3553                 }
3554                 if (src->GetType() == CDirEntry::eUnknown  &&  size) {
3555                     // Looks like a dangling hard link but luckily we have
3556                     // the actual file data (POSIX extension) so use it here.
3557                     type = CTarEntryInfo::eFile;
3558                 }
3559             }
3560 
3561             if (type == CTarEntryInfo::eHardLink) {
3562                 _ASSERT(src);
3563 #ifdef NCBI_OS_UNIX
3564                 if (link(src->GetPath().c_str(), dst->GetPath().c_str()) == 0){
3565                     if (m_Flags & fPreserveAll) {
3566                         x_RestoreAttrs(m_Current, m_Flags, dst);
3567                     }
3568                     break;
3569                 }
3570                 int x_errno = errno;
3571                 TAR_POST(10, Warning,
3572                          "Cannot hard-link '" + src->GetPath()
3573                          + "' and '" + dst->GetPath() + '\''
3574                          + s_OSReason(x_errno) + ", trying to copy");
3575 #endif //NCBI_OS_UNIX
3576                 if (!src->Copy(dst->GetPath(),
3577                                CDirEntry::fCF_Overwrite |
3578                                CDirEntry::fCF_PreserveAll)) {
3579                     TAR_POST(11, Error,
3580                              "Cannot hard-link '" + src->GetPath()
3581                              + "' and '" + dst->GetPath() + "' via copy");
3582                     extracted = false;
3583                     break;
3584                 }
3585             } else if (type == CTarEntryInfo::eSparseFile  &&  size) {
3586                 if (!(extracted = x_ExtractSparseFile(size, dst)))
3587                     break;
3588             } else {
3589                 x_ExtractPlainFile(size, dst);
3590             }
3591 
3592             // Restore attributes
3593             if (m_Flags & fPreserveAll) {
3594                 x_RestoreAttrs(m_Current, m_Flags, dst);
3595             }
3596         }}
3597         break;
3598 
3599     case CTarEntryInfo::eDir:
3600         {{
3601             const CDir* dir = dynamic_cast<const CDir*>(dst);
3602             if (!dir  ||  !dir->CreatePath()) {
3603                 int x_errno = !dir ? 0 : CNcbiError::GetLast().Code();
3604                 TAR_THROW(this, eCreate,
3605                           "Cannot create directory '" + dst->GetPath() + '\''
3606                           + (!dir
3607                              ? string(": Internal error")
3608                              : s_OSReason(x_errno)));
3609             }
3610             // NB: Attributes for a directory must be set only after all of its
3611             // entries have been already extracted.
3612             _ASSERT(size == 0);
3613         }}
3614         break;
3615 
3616     case CTarEntryInfo::eSymLink:
3617         {{
3618             const CSymLink* symlink = dynamic_cast<const CSymLink*>(dst);
3619             if (!symlink  ||  !symlink->Create(m_Current.GetLinkName())) {
3620                 int x_errno = !symlink ? 0 : CNcbiError::GetLast().Code();
3621                 string error = "Cannot create symlink '" + dst->GetPath()
3622                     + "' -> '" + m_Current.GetLinkName() + '\''
3623                     + (!symlink
3624                        ? string(": Internal error")
3625                        : s_OSReason(x_errno));
3626                 if (!symlink  ||  x_errno != ENOTSUP
3627                     ||  !(m_Flags & fSkipUnsupported)) {
3628                     TAR_THROW(this, eCreate, error);
3629                 }
3630                 TAR_POST(12, Error, error);
3631                 extracted = false;
3632             }
3633             _ASSERT(size == 0);
3634         }}
3635         break;
3636 
3637     case CTarEntryInfo::ePipe:
3638         {{
3639             _ASSERT(size == 0);
3640 #ifdef NCBI_OS_UNIX
3641             umask(0);
3642             int x_errno = 0;
3643             if (mkfifo(dst->GetPath().c_str(), m_Current.GetMode())/*!= 0*/) {
3644                 x_errno = errno;
3645                 extracted = false;
3646             }
3647             if (extracted) {
3648                 break;
3649             }
3650             string reason = s_OSReason(x_errno);
3651 #else
3652             int x_errno = ENOTSUP;
3653             string reason = ": Feature not supported by host OS";
3654             extracted = false;
3655 #endif //NCBI_OS_UNIX
3656             string error
3657                 = "Cannot create FIFO '" + dst->GetPath() + '\'' + reason;
3658             if (x_errno != ENOTSUP  ||  !(m_Flags & fSkipUnsupported)) {
3659                 TAR_THROW(this, eCreate, error);
3660             }
3661             TAR_POST(81, Error, error);
3662         }}
3663         break;
3664 
3665     case CTarEntryInfo::eCharDev:
3666     case CTarEntryInfo::eBlockDev:
3667         {{
3668             _ASSERT(size == 0);
3669 #ifdef NCBI_OS_UNIX
3670             umask(0);
3671             int x_errno = 0;
3672             mode_t m = (m_Current.GetMode() |
3673                         (type == CTarEntryInfo::eCharDev ? S_IFCHR : S_IFBLK));
3674             if (mknod(dst->GetPath().c_str(),m,m_Current.m_Stat.orig.st_rdev)){
3675                 x_errno = errno;
3676                 extracted = false;
3677             }
3678             if (extracted) {
3679                 break;
3680             }
3681             string reason = s_OSReason(x_errno);
3682 #else
3683             int x_errno = ENOTSUP;
3684             string reason = ": Feature not supported by host OS";
3685             extracted = false;
3686 #endif //NCBI_OS_UNIX
3687             string error
3688                 = "Cannot create " + string(type == CTarEntryInfo::eCharDev
3689                                             ? "character" : "block")
3690                 + " device '" + dst->GetPath() + '\'' + reason;
3691             if (x_errno != ENOTSUP  ||  !(m_Flags & fSkipUnsupported)) {
3692                 TAR_THROW(this, eCreate, error);
3693             }
3694             TAR_POST(82, Error, error);
3695         }}
3696         break;
3697 
3698     case CTarEntryInfo::eVolHeader:
3699         _ASSERT(size == 0);
3700         /*NOOP*/
3701         break;
3702 
3703     case CTarEntryInfo::ePAXHeader:
3704     case CTarEntryInfo::eGNULongName:
3705     case CTarEntryInfo::eGNULongLink:
3706         // Extended headers should have already been processed and not be here
3707         _TROUBLE;
3708         /*FALLTHRU*/
3709 
3710     default:
3711         TAR_POST(13, Error,
3712                  "Skipping unsupported entry '" + m_Current.GetName()
3713                  + "' of type #" + NStr::IntToString(int(type)));
3714         extracted = false;
3715         break;
3716     }
3717 
3718     return extracted;
3719 }
3720 
3721 
x_ExtractPlainFile(Uint8 & size,const CDirEntry * dst)3722 void CTar::x_ExtractPlainFile(Uint8& size, const CDirEntry* dst)
3723 {
3724     // FIXME:  Switch to CFileIO eventually to bypass ofstream's obscurity
3725     // w.r.t. errors, extra buffering etc.
3726     CNcbiOfstream ofs(dst->GetPath().c_str(),
3727                       IOS_BASE::trunc |
3728                       IOS_BASE::out   |
3729                       IOS_BASE::binary);
3730     if (!ofs) {
3731         int x_errno = errno;
3732         TAR_THROW(this, eCreate,
3733                   "Cannot create file '" + dst->GetPath() + '\''
3734                   + s_OSReason(x_errno));
3735     }
3736     if (m_Flags & fPreserveMode) {  // NB: secure
3737         x_RestoreAttrs(m_Current, fPreserveMode,
3738                        dst, fTarURead | fTarUWrite);
3739     }
3740 
3741     bool okay = ofs.good();
3742     if (okay) while (size) {
3743         // Read from the archive
3744         size_t nread = size < m_BufferSize ? (size_t) size : m_BufferSize;
3745         const char* data = x_ReadArchive(nread);
3746         if (!data) {
3747             TAR_THROW(this, eRead,
3748                       "Unexpected EOF in archive");
3749         }
3750         _ASSERT(nread  &&  ofs.good());
3751         // Write file to disk
3752         try {
3753             okay = ofs.write(data, (streamsize) nread) ? true : false;
3754         } catch (IOS_BASE::failure&) {
3755             okay = false;
3756         }
3757         if (!okay) {
3758             break;
3759         }
3760         size        -=            nread;
3761         m_StreamPos += ALIGN_SIZE(nread);
3762     }
3763 
3764     ofs.close();
3765     if (!okay  ||  !ofs.good()) {
3766         int x_errno = errno;
3767         TAR_THROW(this, eWrite,
3768                   "Cannot " + string(okay ? "close" : "write")
3769                   + " file '" + dst->GetPath()+ '\'' + s_OSReason(x_errno));
3770     }
3771 }
3772 
3773 
x_ReadLine(Uint8 & size,const char * & data,size_t & nread)3774 string CTar::x_ReadLine(Uint8& size, const char*& data, size_t& nread)
3775 {
3776     string line;
3777     for (;;) {
3778         size_t n;
3779         for (n = 0;  n < nread;  ++n) {
3780             if (!isprint((unsigned char) data[n])) {
3781                 break;
3782             }
3783         }
3784         line.append(data, n);
3785         if (n < nread) {
3786             if (data[n] == '\n') {
3787                 ++n;
3788             }
3789             data  += n;
3790             nread -= n;
3791             break;
3792         }
3793         if (!(nread = size < BLOCK_SIZE ? size : BLOCK_SIZE)) {
3794             break;
3795         }
3796         if (!(data = x_ReadArchive(nread))) {
3797             return kEmptyStr;
3798         }
3799         _ASSERT(nread);
3800         if (size >= nread) {
3801             size -= nread;
3802         } else {
3803             size  = 0;
3804         }
3805         m_StreamPos += ALIGN_SIZE(nread);
3806     }
3807     return line;
3808 }
3809 
3810 
3811 template<>
3812 struct Deleter<FILE>
3813 {
DeleteDeleter3814     static void Delete(FILE* fp) { fclose(fp); }
3815 };
3816 
3817 
3818 #ifdef NCBI_OS_MSWIN
3819 #  define NCBI_FILE_WO  "wb"
3820 #else
3821 #  define NCBI_FILE_WO  "w"
3822 #endif /*NCBI_OS_MSWIN*/
3823 
x_ExtractSparseFile(Uint8 & size,const CDirEntry * dst,bool dump)3824 bool CTar::x_ExtractSparseFile(Uint8& size, const CDirEntry* dst, bool dump)
3825 {
3826     _ASSERT(size);
3827 
3828     // Read sparse map first
3829     Uint8 pos = m_StreamPos;
3830     size_t nread = size < BLOCK_SIZE ? (size_t) size : BLOCK_SIZE;
3831     const char* data = x_ReadArchive(nread);
3832     if (!data) {
3833         TAR_THROW(this, eRead,
3834                   "Unexpected EOF in archive");
3835     }
3836     _ASSERT(nread);
3837     if (size >= nread) {
3838         size -= nread;
3839     } else {
3840         size  = 0;
3841     }
3842 
3843     string num(x_ReadLine(size, data, nread));  // "numblocks"
3844     Uint8 n = NStr::StringToUInt8(num,
3845                                   NStr::fConvErr_NoThrow |
3846                                   NStr::fConvErr_NoErrMessage);
3847     if (!n) {
3848         TAR_POST(97, Error,
3849                  "Cannot expand sparse file '" + dst->GetPath()
3850                  + "': Region count is "
3851                  + string(num.empty() ? "missing" : "invalid")
3852                  + " (\"" + num + "\")");
3853         m_StreamPos += ALIGN_SIZE(nread);
3854         return false;
3855     }
3856     m_StreamPos += ALIGN_SIZE(nread);
3857     vector< pair<Uint8, Uint8> > bmap(n);
3858 
3859     for (Uint8 i = 0;  i < n;  ++i) {  // "offset numbytes" pairs
3860         Uint8 val[2];
3861         for (int k = 0;  k < 2;  ++k) {
3862             num = x_ReadLine(size, data, nread);
3863             try {
3864                 val[k] = NStr::StringToUInt8(num);
3865             } catch (...) {
3866                 TAR_POST(98, Error,
3867                          "Cannot expand sparse file '" + dst->GetPath()
3868                          + "': Sparse map "
3869                          + string(k == 0 ? "offset" : "region size")
3870                          + '[' + NStr::NumericToString(i) + "] is "
3871                          + string(num.empty() ? "missing" : "invalid")
3872                          + " (\"" + num + "\")");
3873                 return false;
3874             }
3875         }
3876         bmap[i] = pair<Uint8, Uint8>(val[0], val[1]);
3877     }
3878     if (dump) {
3879         s_DumpSparse(m_FileName, pos, m_BufferSize, m_Current.GetName(), bmap);
3880         /* dontcare */
3881         return false;
3882     }
3883 
3884     // Write the file out
3885     AutoPtr<FILE> fp(::fopen(dst->GetPath().c_str(), NCBI_FILE_WO));
3886     if (!fp) {
3887         int x_errno = errno;
3888         TAR_THROW(this, eCreate,
3889                   "Cannot create file '" + dst->GetPath() + '\''
3890                   + s_OSReason(x_errno));
3891     }
3892     if (m_Flags & fPreserveMode) {  // NB: secure
3893         x_RestoreAttrs(m_Current, fPreserveMode,
3894                        dst, fTarURead | fTarUWrite);
3895     }
3896 
3897     nread = 0;
3898     Uint8 eof = 0;
3899     int x_error = 0;
3900     for (Uint8 i = 0;  i < n;  ++i) {
3901         Uint8 top = bmap[i].first + bmap[i].second;
3902         if (eof < top) {
3903             eof = top;
3904         }
3905         if (!bmap[i].second) {
3906             continue;
3907         }
3908         // non-empty region
3909         if (::fseek(fp.get(), (long) bmap[i].first, SEEK_SET) != 0) {
3910             x_error = errno;
3911             break;
3912         }
3913         Uint8 done = 0;
3914         do {
3915             if (!nread) {
3916                 nread = size < m_BufferSize ? (size_t) size : m_BufferSize;
3917                 if (!nread  ||  !(data = x_ReadArchive(nread))) {
3918                     x_error = errno;
3919                     TAR_POST(99, Error,
3920                              "Cannot read archive data for sparse file '"
3921                              + dst->GetPath() + "', region #"
3922                              + NStr::NumericToString(i)
3923                              + (nread
3924                                 ? s_OSReason(x_error)
3925                                 : string(": End-of-data")));
3926                     x_error = -1;
3927                     eof = 0;
3928                     break;
3929                 }
3930                 _ASSERT(nread);
3931                 size        -=            nread;
3932                 m_StreamPos += ALIGN_SIZE(nread);
3933             }
3934             size_t xread = nread;
3935             if (xread >          bmap[i].second - done) {
3936                 xread = (size_t)(bmap[i].second - done);
3937             }
3938             if (::fwrite(data, 1, xread, fp.get()) != xread) {
3939                 if (!(x_error = errno)) {
3940                     x_error = -1;  // Make sure non-zero
3941                 }
3942                 break;
3943             }
3944             done  += xread;
3945             data  += xread;
3946             nread -= xread;
3947         } while (done < bmap[i].second);
3948         if (x_error) {
3949             break;
3950         }
3951     }
3952 
3953     // Finalize the file
3954     bool closed = ::fclose(fp.release()) == 0 ? true : false;
3955     if (!x_error  &&  !closed) {
3956         x_error = errno;
3957     }
3958     string reason;
3959     if (x_error) {
3960         reason = s_OSReason(x_error);
3961     } else if (eof) {
3962         x_error = s_TruncateFile(dst->GetPath(), eof);
3963         if (x_error) {
3964 #ifdef NCBI_OS_MSWIN
3965             TCHAR* str = NULL;
3966             DWORD  rv = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
3967                                       FORMAT_MESSAGE_FROM_SYSTEM     |
3968                                       FORMAT_MESSAGE_MAX_WIDTH_MASK  |
3969                                       FORMAT_MESSAGE_IGNORE_INSERTS,
3970                                       NULL, (DWORD) x_error,
3971                                       MAKELANGID(LANG_NEUTRAL,SUBLANG_DEFAULT),
3972                                       (LPTSTR) &str, 0, NULL);
3973             if (str) {
3974                 if (rv) {
3975                     _ASSERT(*str);
3976                     reason = string(": ") + _T_STDSTRING(str);
3977                 }
3978                 ::LocalFree((HLOCAL) str);
3979             }
3980             if (reason.empty()) {
3981                 reason = ": Error 0x" + NStr::UIntToString(x_error, 0, 16);
3982             }
3983 #else
3984             reason = s_OSReason(x_error);
3985 #endif //NCBI_OS_MSWIN
3986         }
3987     }
3988     if (x_error) {
3989         _ASSERT(!reason.empty());
3990         TAR_POST(100, Error,
3991                  "Cannot write sparse file '" + dst->GetPath() + '\''+ reason);
3992         dst->Remove();
3993         return false;
3994     }
3995 
3996     return true;
3997 }
3998 
3999 
x_RestoreAttrs(const CTarEntryInfo & info,TFlags what,const CDirEntry * path,TTarMode perm) const4000 void CTar::x_RestoreAttrs(const CTarEntryInfo& info,
4001                           TFlags               what,
4002                           const CDirEntry*     path,
4003                           TTarMode             perm) const
4004 {
4005     unique_ptr<CDirEntry> path_ptr;  // deleter
4006     if (!path) {
4007         path_ptr.reset(new CDirEntry(s_ToFilesystemPath
4008                                      (m_BaseDir, info.GetName(),
4009                                       !(m_Flags & fKeepAbsolutePath))));
4010         path = path_ptr.get();
4011     }
4012 
4013     // Date/time.
4014     // Set the time before permissions because on some platforms this setting
4015     // can also affect file permissions.
4016     if (what & fPreserveTime) {
4017         CTime modification(info.GetModificationTime());
4018         CTime last_access(info.GetLastAccessTime());
4019         CTime creation(info.GetCreationTime());
4020         modification.SetNanoSecond(info.m_Stat.mtime_nsec);
4021         last_access.SetNanoSecond(info.m_Stat.atime_nsec);
4022         creation.SetNanoSecond(info.m_Stat.ctime_nsec);
4023         if (!path->SetTime(&modification, &last_access, &creation)) {
4024             int x_errno = CNcbiError::GetLast().Code();
4025             TAR_THROW(this, eRestoreAttrs,
4026                       "Cannot restore date/time of '" + path->GetPath() + '\''
4027                       + s_OSReason(x_errno));
4028         }
4029     }
4030 
4031     // Owner.
4032     // This must precede changing permissions because on some
4033     // systems chown() clears the set[ug]id bits for non-superusers
4034     // thus resulting in incorrect permissions.
4035     if (what & fPreserveOwner) {
4036         unsigned int uid, gid;
4037         // 2-tier trial:  first using the names, then using numeric IDs.
4038         // Note that it is often impossible to restore the original owner
4039         // without the super-user rights so no error checking is done here.
4040         if (!path->SetOwner(info.GetUserName(),
4041                             info.GetGroupName(),
4042                             eIgnoreLinks, &uid, &gid)  &&
4043             !path->SetOwner(kEmptyStr, info.GetGroupName(), eIgnoreLinks)) {
4044             if (uid != info.GetUserId()  ||  gid != info.GetGroupId()) {
4045                 string user = NStr::UIntToString(info.GetUserId());
4046                 string group = NStr::UIntToString(info.GetGroupId());
4047                 if (!path->SetOwner(user, group, eIgnoreLinks)) {
4048                     path->SetOwner(kEmptyStr, group, eIgnoreLinks);
4049                 }
4050             }
4051         }
4052     }
4053 
4054     // Mode.
4055     // Set them last.
4056     if ((what & fPreserveMode)
4057         &&  info.GetType() != CTarEntryInfo::ePipe
4058         &&  info.GetType() != CTarEntryInfo::eCharDev
4059         &&  info.GetType() != CTarEntryInfo::eBlockDev) {
4060         bool failed = false;
4061 #ifdef NCBI_OS_UNIX
4062         // We won't change permissions for sym.links because lchmod() is not
4063         // portable, and also is not implemented on majority of platforms.
4064         if (info.GetType() != CTarEntryInfo::eSymLink) {
4065             // Use raw mode here to restore most of the bits
4066             mode_t mode = s_TarToMode(perm ? perm : info.m_Stat.orig.st_mode);
4067             if (chmod(path->GetPath().c_str(), mode) != 0) {
4068                 // May fail due to setuid/setgid bits -- strip'em and try again
4069                 if (mode &   (S_ISUID | S_ISGID)) {
4070                     mode &= ~(S_ISUID | S_ISGID);
4071                     failed = chmod(path->GetPath().c_str(), mode) != 0;
4072                 } else {
4073                     failed = true;
4074                 }
4075                 CNcbiError::SetFromErrno();
4076             }
4077         }
4078 #else
4079         CDirEntry::TMode user, group, other;
4080         CDirEntry::TSpecialModeBits special_bits;
4081         if (perm) {
4082             s_TarToMode(perm, &user, &group, &other, &special_bits);
4083         } else {
4084             info.GetMode(&user, &group, &other, &special_bits);
4085         }
4086         failed = !path->SetMode(user, group, other, special_bits);
4087 #endif //NCBI_OS_UNIX
4088         if (failed) {
4089             int x_errno = CNcbiError::GetLast().Code();
4090             TAR_THROW(this, eRestoreAttrs,
4091                       "Cannot " + string(perm ? "change" : "restore")
4092                       + " mode bits of '" + path->GetPath() + '\''
4093                       + s_OSReason(x_errno));
4094         }
4095     }
4096 }
4097 
4098 
s_BaseDir(const string & dirname)4099 static string s_BaseDir(const string& dirname)
4100 {
4101     string path = s_ToFilesystemPath(kEmptyStr, dirname);
4102 #ifdef NCBI_OS_MSWIN
4103     // Replace backslashes with forward slashes
4104     NStr::ReplaceInPlace(path, "\\", "/");
4105 #endif //NCBI_OS_MSWIN
4106     if (!NStr::EndsWith(path, '/'))
4107         path += '/';
4108     return path;
4109 }
4110 
4111 
x_Append(const string & name,const TEntries * toc)4112 unique_ptr<CTar::TEntries> CTar::x_Append(const string&   name,
4113                                           const TEntries* toc)
4114 {
4115     unique_ptr<TEntries>       entries(new TEntries);
4116     unique_ptr<CDir::TEntries> dir;
4117 
4118     const EFollowLinks follow_links = (m_Flags & fFollowLinks ?
4119                                        eFollowLinks : eIgnoreLinks);
4120     unsigned int uid = 0, gid = 0;
4121     bool update = true;
4122 
4123     // Create the entry info
4124     m_Current = CTarEntryInfo(m_StreamPos);
4125 
4126     // Compose entry name for relative names
4127     string path = s_ToFilesystemPath(m_BaseDir, name);
4128 
4129     // Get direntry information
4130     CDirEntry entry(path);
4131     CDirEntry::SStat st;
4132     if (!entry.Stat(&st, follow_links)) {
4133         int x_errno = errno;
4134         TAR_THROW(this, eOpen,
4135                   "Cannot get status of '" + path + '\''+ s_OSReason(x_errno));
4136     }
4137     CDirEntry::EType type = CDirEntry::GetType(st.orig);
4138 
4139     string temp = s_ToArchiveName(m_BaseDir, path);
4140 
4141     if (temp.empty()) {
4142         TAR_THROW(this, eBadName,
4143                   "Empty entry name not allowed");
4144     }
4145 
4146     list<CTempString> elems;
4147     NStr::Split(temp, "/", elems,
4148                 NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
4149     if (find(elems.begin(), elems.end(), "..") != elems.end()) {
4150         TAR_THROW(this, eBadName,
4151                   "Name '" + temp + "' embeds parent directory (\"..\")");
4152     }
4153     if (m_Mask[eExcludeMask].mask
4154         &&  s_MatchExcludeMask(temp, elems,
4155                                m_Mask[eExcludeMask].mask,
4156                                m_Mask[eExcludeMask].acase)) {
4157         goto out;
4158     }
4159     elems.clear();
4160     if (type == CDirEntry::eDir  &&  temp != "/") {
4161         temp += '/';
4162     }
4163 
4164     m_Current.m_Name.swap(temp);
4165     m_Current.m_Type = CTarEntryInfo::EType(type);
4166     if (m_Current.GetType() == CTarEntryInfo::eSymLink) {
4167         _ASSERT(!follow_links);
4168         m_Current.m_LinkName = entry.LookupLink();
4169         if (m_Current.GetLinkName().empty()) {
4170             TAR_THROW(this, eBadName,
4171                       "Empty link name not allowed");
4172         }
4173     }
4174 
4175     entry.GetOwner(&m_Current.m_UserName, &m_Current.m_GroupName,
4176                    follow_links, &uid, &gid);
4177 #ifdef NCBI_OS_UNIX
4178     if (NStr::UIntToString(uid) == m_Current.GetUserName()) {
4179         m_Current.m_UserName.erase();
4180     }
4181     if (NStr::UIntToString(gid) == m_Current.GetGroupName()) {
4182         m_Current.m_GroupName.erase();
4183     }
4184 #endif //NCBI_OS_UNIX
4185 #ifdef NCBI_OS_MSWIN
4186     // These are fake but we don't want to leave plain 0 (Unix root) in there
4187     st.orig.st_uid = (uid_t) uid;
4188     st.orig.st_gid = (gid_t) gid;
4189 #endif //NCBI_OS_MSWIN
4190 
4191     m_Current.m_Stat = st;
4192     // Fixup for mode bits
4193     m_Current.m_Stat.orig.st_mode = (mode_t) s_ModeToTar(st.orig.st_mode);
4194 
4195     // Check if we need to update this entry in the archive
4196     if (toc) {
4197         bool found = false;
4198 
4199         if (type != CDirEntry::eUnknown) {
4200             // Start searching from the end of the list, to find
4201             // the most recent entry (if any) first
4202             _ASSERT(temp.empty());
4203             REVERSE_ITERATE(TEntries, e, *toc) {
4204                 if (!temp.empty()) {
4205                     if (e->GetType() == CTarEntryInfo::eHardLink  ||
4206                         temp != s_ToFilesystemPath(m_BaseDir, e->GetName())) {
4207                         continue;
4208                     }
4209                 } else if (path == s_ToFilesystemPath(m_BaseDir,e->GetName())){
4210                     found = true;
4211                     if (e->GetType() == CTarEntryInfo::eHardLink) {
4212                         temp = s_ToFilesystemPath(m_BaseDir, e->GetLinkName());
4213                         continue;
4214                     }
4215                 } else {
4216                     continue;
4217                 }
4218                 if (m_Current.GetType() != e->GetType()) {
4219                     if (m_Flags & fEqualTypes) {
4220                         goto out;
4221                     }
4222                 } else if (m_Current.GetType() == CTarEntryInfo::eSymLink
4223                            &&  m_Current.GetLinkName() == e->GetLinkName()) {
4224                     goto out;
4225                 }
4226                 if (m_Current.GetModificationCTime()
4227                     <= e->GetModificationCTime()) {
4228                     update = false;  // same(or older), no update
4229                 }
4230                 break;
4231             }
4232         }
4233 
4234         if (!update  ||  (!found  &&  (m_Flags & (fUpdate & ~fOverwrite)))) {
4235             if (type != CDirEntry::eDir  &&  type != CDirEntry::eUnknown) {
4236                 goto out;
4237             }
4238             // Directories always get recursive treatment later
4239             update = false;
4240         }
4241     }
4242 
4243     // Append the entry
4244     switch (type) {
4245     case CDirEntry::eFile:
4246         _ASSERT(update);
4247         if (x_AppendFile(path)) {
4248             entries->push_back(m_Current);
4249         }
4250         break;
4251 
4252     case CDirEntry::eBlockSpecial:
4253     case CDirEntry::eCharSpecial:
4254     case CDirEntry::eSymLink:
4255     case CDirEntry::ePipe:
4256         _ASSERT(update);
4257         m_Current.m_Stat.orig.st_size = 0;
4258         x_WriteEntryInfo(path);
4259         entries->push_back(m_Current);
4260         break;
4261 
4262     case CDirEntry::eDir:
4263         dir.reset(CDir(path).GetEntriesPtr(kEmptyStr, CDir::eIgnoreRecursive));
4264         if (!dir) {
4265             int x_errno = CNcbiError::GetLast().Code();
4266             string error =
4267                 "Cannot list directory '" + path + '\'' + s_OSReason(x_errno);
4268             if (m_Flags & fIgnoreUnreadable) {
4269                 TAR_POST(101, Error, error);
4270                 break;
4271             }
4272             TAR_THROW(this, eRead, error);
4273         }
4274         if (update) {
4275             m_Current.m_Stat.orig.st_size = 0;
4276             x_WriteEntryInfo(path);
4277             entries->push_back(m_Current);
4278         }
4279         // Append/update all files from that directory
4280         ITERATE(CDir::TEntries, e, *dir) {
4281             unique_ptr<TEntries> add = x_Append((*e)->GetPath(), toc);
4282             entries->splice(entries->end(), *add);
4283         }
4284         break;
4285 
4286     case CDirEntry::eDoor:
4287     case CDirEntry::eSocket:
4288         // Tar does not have any provisions to store this kind of entries
4289         if (!(m_Flags & fSkipUnsupported)) {
4290             TAR_POST(3, Warning,
4291                      "Skipping non-archiveable "
4292                      + string(type == CDirEntry::eSocket ? "socket" : "door")
4293                      + " '" + path + '\'');
4294         }
4295         break;
4296 
4297     case CDirEntry::eUnknown:
4298         if (!(m_Flags & fSkipUnsupported)) {
4299             TAR_THROW(this, eUnsupportedSource,
4300                       "Unable to archive '" + path + '\'');
4301         }
4302         /*FALLTHRU*/
4303 
4304     default:
4305         if (type != CDirEntry::eUnknown) {
4306             _TROUBLE;
4307         }
4308         TAR_POST(14, Error,
4309                  "Skipping unsupported source '" + path
4310                  + "' of type #" + NStr::IntToString(int(type)));
4311         break;
4312     }
4313 
4314  out:
4315     return entries;
4316 }
4317 
4318 
x_Append(const CTarUserEntryInfo & entry,CNcbiIstream & is)4319 unique_ptr<CTar::TEntries> CTar::x_Append(const CTarUserEntryInfo& entry,
4320                                           CNcbiIstream& is)
4321 {
4322     unique_ptr<TEntries> entries(new TEntries);
4323 
4324     // Create a temp entry info first
4325     m_Current = CTarEntryInfo(m_StreamPos);
4326 
4327     string temp = s_ToArchiveName(kEmptyStr, entry.GetName());
4328 
4329     while (NStr::EndsWith(temp, '/')) { // NB: directories are not allowed here
4330         temp.resize(temp.size() - 1);
4331     }
4332     if (temp.empty()) {
4333         TAR_THROW(this, eBadName,
4334                   "Empty entry name not allowed");
4335     }
4336 
4337     list<CTempString> elems;
4338     NStr::Split(temp, "/", elems,
4339                 NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
4340     if (find(elems.begin(), elems.end(), "..") != elems.end()) {
4341         TAR_THROW(this, eBadName,
4342                   "Name '" + temp + "' embeds parent directory (\"..\")");
4343     }
4344     elems.clear();
4345 
4346     // Recreate entry info
4347     m_Current = entry;
4348     m_Current.m_Name.swap(temp);
4349     m_Current.m_Pos = m_StreamPos;
4350     m_Current.m_Type = CTarEntryInfo::eFile;
4351 
4352     if (!is.good()) {
4353         TAR_THROW(this, eRead,
4354                   "Bad input file stream");
4355     }
4356 
4357     CTime::GetCurrentTimeT(&m_Current.m_Stat.orig.st_ctime,
4358                            &m_Current.m_Stat.ctime_nsec);
4359     m_Current.m_Stat.orig.st_mtime
4360         = m_Current.m_Stat.orig.st_atime
4361         = m_Current.m_Stat.orig.st_ctime;
4362     m_Current.m_Stat.mtime_nsec
4363         = m_Current.m_Stat.atime_nsec
4364         = m_Current.m_Stat.ctime_nsec;
4365 
4366 #ifdef NCBI_OS_UNIX
4367     // use regular file mode, adjusted with umask()
4368     mode_t mode = s_TarToMode(fTarURead | fTarUWrite |
4369                               fTarGRead | fTarGWrite |
4370                               fTarORead | fTarOWrite);
4371     mode_t u;
4372 #  ifdef HAVE_GETUMASK
4373     // NB: thread-safe
4374     u = getumask();
4375 #  else
4376     u = umask(022);
4377     umask(u);
4378 #  endif //HAVE_GETUMASK
4379     mode &= ~u;
4380     m_Current.m_Stat.orig.st_mode = (mode_t) s_ModeToTar(mode);
4381 
4382     m_Current.m_Stat.orig.st_uid = geteuid();
4383     m_Current.m_Stat.orig.st_gid = getegid();
4384 
4385     CUnixFeature::GetUserNameByUID(m_Current.m_Stat.orig.st_uid)
4386         .swap(m_Current.m_UserName);
4387     CUnixFeature::GetGroupNameByGID(m_Current.m_Stat.orig.st_gid)
4388         .swap(m_Current.m_GroupName);
4389 #endif //NCBI_OS_UNIX
4390 #ifdef NCBI_OS_MSWIN
4391     // safe file mode
4392     m_Current.m_Stat.orig.st_mode = (fTarURead | fTarUWrite |
4393                                      fTarGRead | fTarORead);
4394 
4395     unsigned int uid = 0, gid = 0;
4396     CWinSecurity::GetObjectOwner(CCurrentProcess::GetHandle(),
4397                                  SE_KERNEL_OBJECT,
4398                                  &m_Current.m_UserName,
4399                                  &m_Current.m_GroupName,
4400                                  &uid, &gid);
4401     // These are fake but we don't want to leave plain 0 (Unix root) in there
4402     m_Current.m_Stat.orig.st_uid = (uid_t) uid;
4403     m_Current.m_Stat.orig.st_gid = (gid_t) gid;
4404 #endif //NCBI_OS_MSWIN
4405 
4406     x_AppendStream(entry.GetName(), is);
4407 
4408     entries->push_back(m_Current);
4409     return entries;
4410 }
4411 
4412 
4413 // Regular entries only!
x_AppendStream(const string & name,CNcbiIstream & is)4414 void CTar::x_AppendStream(const string& name, CNcbiIstream& is)
4415 {
4416     _ASSERT(m_Current.GetType() == CTarEntryInfo::eFile);
4417 
4418     // Write entry header
4419     x_WriteEntryInfo(name);
4420 
4421     errno = 0;
4422     Uint8 size = m_Current.GetSize();
4423     while (size) {
4424         // Write file contents
4425         _ASSERT(m_BufferPos < m_BufferSize);
4426         size_t avail = m_BufferSize - m_BufferPos;
4427         if (avail > size) {
4428             avail = (size_t) size;
4429         }
4430         // Read file
4431         int x_errno = 0;
4432         streamsize xread;
4433         if (is.good()) {
4434             try {
4435                 if (!is.read(m_Buffer + m_BufferPos, (streamsize) avail)) {
4436                     x_errno = errno;
4437                     xread = -1;
4438                 } else {
4439                     xread = is.gcount();
4440                 }
4441             } catch (IOS_BASE::failure&) {
4442                 xread = -1;
4443             }
4444         } else {
4445             xread = -1;
4446         }
4447         if (xread <= 0) {
4448             ifstream* ifs = dynamic_cast<ifstream*>(&is);
4449             TAR_THROW(this, eRead,
4450                       "Cannot read "
4451                       + string(ifs ? "file" : "stream")
4452                       + " '" + name + '\'' + s_OSReason(x_errno));
4453         }
4454         // Write buffer to the archive
4455         avail = (size_t) xread;
4456         x_WriteArchive(avail);
4457         size -= avail;
4458     }
4459 
4460     // Write zeros to get the written size a multiple of BLOCK_SIZE
4461     size_t zero = ALIGN_SIZE(m_BufferPos) - m_BufferPos;
4462     memset(m_Buffer + m_BufferPos, 0, zero);
4463     x_WriteArchive(zero);
4464     _ASSERT(!OFFSET_OF(m_BufferPos)  &&  !OFFSET_OF(m_StreamPos));
4465 }
4466 
4467 
4468 // Regular files only!
x_AppendFile(const string & file)4469 bool CTar::x_AppendFile(const string& file)
4470 {
4471     _ASSERT(m_Current.GetType() == CTarEntryInfo::eFile);
4472 
4473     // FIXME:  Switch to CFileIO eventually to avoid ifstream's obscurity
4474     // w.r.t. errors, an extra layer of buffering etc.
4475     CNcbiIfstream ifs;
4476 
4477     // Open file
4478     ifs.open(file.c_str(), IOS_BASE::binary | IOS_BASE::in);
4479     if (!ifs) {
4480         int x_errno = errno;
4481         string error
4482             = "Cannot open file '" + file + '\'' + s_OSReason(x_errno);
4483         if (m_Flags & fIgnoreUnreadable) {
4484             TAR_POST(102, Error, error);
4485             return false;
4486         }
4487         TAR_THROW(this, eOpen, error);
4488     }
4489 
4490     x_AppendStream(file, ifs);
4491     return true;
4492 }
4493 
4494 
SetMask(CMask * mask,EOwnership own,EMaskType type,NStr::ECase acase)4495 void CTar::SetMask(CMask*    mask, EOwnership  own,
4496                    EMaskType type, NStr::ECase acase)
4497 {
4498     int idx = int(type);
4499     if (idx < 0  ||  sizeof(m_Mask)/sizeof(m_Mask[0]) <= (size_t) idx){
4500         TAR_THROW(this, eMemory,
4501                   "Mask type is out of range: " + NStr::IntToString(idx));
4502     }
4503     if (m_Mask[idx].owned) {
4504         delete m_Mask[idx].mask;
4505     }
4506     m_Mask[idx].mask  = mask;
4507     m_Mask[idx].acase = acase;
4508     m_Mask[idx].owned = mask ? own : eNoOwnership;
4509 }
4510 
4511 
SetBaseDir(const string & dirname)4512 void CTar::SetBaseDir(const string& dirname)
4513 {
4514     string dir = s_BaseDir(dirname);
4515     m_BaseDir.swap(dir);
4516 }
4517 
4518 
EstimateArchiveSize(const TFiles & files,size_t blocking_factor,const string & base_dir)4519 Uint8 CTar::EstimateArchiveSize(const TFiles& files,
4520                                 size_t blocking_factor,
4521                                 const string& base_dir)
4522 {
4523     const size_t buffer_size = SIZE_OF(blocking_factor);
4524     string prefix = s_BaseDir(base_dir);
4525     Uint8 result = 0;
4526 
4527     ITERATE(TFiles, f, files) {
4528         // Count in the file size
4529         result += BLOCK_SIZE/*header*/ + ALIGN_SIZE(f->second);
4530 
4531         // Count in the long name (if any)
4532         string path    = s_ToFilesystemPath(prefix, f->first);
4533         string name    = s_ToArchiveName   (prefix, path);
4534         size_t namelen = name.size() + 1;
4535         if (namelen > sizeof(((SHeader*) 0)->name)) {
4536             result += BLOCK_SIZE/*long name header*/ + ALIGN_SIZE(namelen);
4537         }
4538     }
4539     if (result) {
4540         result += BLOCK_SIZE << 1;  // EOT
4541         Uint8 padding = result % buffer_size;
4542         if (padding) {
4543             result += buffer_size - padding;
4544         }
4545     }
4546 
4547     return result;
4548 }
4549 
4550 
4551 class CTarReader : public IReader
4552 {
4553 public:
CTarReader(CTar * tar,EOwnership own=eNoOwnership)4554     CTarReader(CTar* tar, EOwnership own = eNoOwnership)
4555         : m_Read(0), m_Eof(false), m_Bad(false), m_Tar(tar, own)
4556     { }
4557 
4558     virtual ERW_Result Read(void* buf, size_t count, size_t* bytes_read = 0);
4559     virtual ERW_Result PendingCount(size_t* count);
4560 
4561 private:
4562     Uint8         m_Read;
4563     bool          m_Eof;
4564     bool          m_Bad;
4565     AutoPtr<CTar> m_Tar;
4566 };
4567 
4568 
Read(void * buf,size_t count,size_t * bytes_read)4569 ERW_Result CTarReader::Read(void* buf, size_t count, size_t* bytes_read)
4570 {
4571     if (m_Bad  ||  !count) {
4572         if (bytes_read) {
4573             *bytes_read = 0;
4574         }
4575         return m_Bad ? eRW_Error
4576             : (m_Read < m_Tar->m_Current.GetSize()  ||  !m_Eof) ? eRW_Success
4577             : eRW_Eof;
4578     }
4579 
4580     size_t read;
4581     _ASSERT(m_Tar->m_Current.GetSize() >= m_Read);
4582     Uint8  left = m_Tar->m_Current.GetSize() - m_Read;
4583     if (!left) {
4584         m_Eof = true;
4585         read = 0;
4586     } else {
4587         if (count >          left) {
4588             count = (size_t) left;
4589         }
4590 
4591         size_t off = (size_t) OFFSET_OF(m_Read);
4592         if (off) {
4593             read = BLOCK_SIZE - off;
4594             if (m_Tar->m_BufferPos) {
4595                 off += m_Tar->m_BufferPos  - BLOCK_SIZE;
4596             } else {
4597                 off += m_Tar->m_BufferSize - BLOCK_SIZE;
4598             }
4599             if (read > count) {
4600                 read = count;
4601             }
4602             memcpy(buf, m_Tar->m_Buffer + off, read);
4603             m_Read += read;
4604             count  -= read;
4605             if (!count) {
4606                 goto out;
4607             }
4608             buf = (char*) buf + read;
4609         } else {
4610             read = 0;
4611         }
4612 
4613         off = m_Tar->m_BufferPos;  // NB: x_ReadArchive() changes m_BufferPos
4614         if (m_Tar->x_ReadArchive(count)) {
4615             _ASSERT(count);
4616             memcpy(buf, m_Tar->m_Buffer + off, count);
4617             m_Read             +=            count;
4618             read               +=            count;
4619             m_Tar->m_StreamPos += ALIGN_SIZE(count);
4620             _ASSERT(!OFFSET_OF(m_Tar->m_StreamPos));
4621         } else {
4622             m_Bad = true;
4623             _ASSERT(!m_Tar->m_Stream.good());
4624             // If we don't throw here, it may look like an ordinary EOF
4625             TAR_THROW(m_Tar, eRead,
4626                       "Read error while streaming");
4627         }
4628     }
4629 
4630  out:
4631     _ASSERT(!m_Bad);
4632     if (bytes_read) {
4633         *bytes_read = read;
4634     }
4635     return m_Eof ? eRW_Eof : eRW_Success;
4636 }
4637 
4638 
PendingCount(size_t * count)4639 ERW_Result CTarReader::PendingCount(size_t* count)
4640 {
4641     if (m_Bad) {
4642         return eRW_Error;
4643     }
4644     _ASSERT(m_Tar->m_Current.GetSize() >= m_Read);
4645     Uint8 left = m_Tar->m_Current.GetSize() - m_Read;
4646     if (!left  &&  m_Eof) {
4647         return eRW_Eof;
4648     }
4649     size_t avail = BLOCK_SIZE - (size_t) OFFSET_OF(m_Read);
4650     _ASSERT(m_Tar->m_BufferPos < m_Tar->m_BufferSize);
4651     if (m_Tar->m_BufferPos) {
4652         avail += m_Tar->m_BufferSize - m_Tar->m_BufferPos;
4653     }
4654     if (!avail  &&  m_Tar->m_Stream.good()) {
4655         // NB: good() subsumes there's streambuf (bad() otherwise)
4656         streamsize sb_avail = m_Tar->m_Stream.rdbuf()->in_avail();
4657         if (sb_avail != -1) {
4658             avail = (size_t) sb_avail;
4659         }
4660     }
4661     *count = avail > left ? (size_t) left : avail;
4662     return eRW_Success;
4663 }
4664 
4665 
Extract(CNcbiIstream & is,const string & name,CTar::TFlags flags)4666 IReader* CTar::Extract(CNcbiIstream& is,
4667                        const string& name, CTar::TFlags flags)
4668 {
4669     unique_ptr<CTar> tar(new CTar(is, 1/*blocking factor*/));
4670     tar->SetFlags(flags & ~fStreamPipeThrough);
4671 
4672     unique_ptr<CMaskFileName> mask(new CMaskFileName);
4673     mask->Add(name);
4674     tar->SetMask(mask.get(), eTakeOwnership);
4675     mask.release();
4676 
4677     tar->x_Open(eInternal);
4678     unique_ptr<TEntries> temp = tar->x_ReadAndProcess(eInternal);
4679     _ASSERT(temp  &&  temp->size() < 2);
4680     if (temp->size() < 1) {
4681         return 0;
4682     }
4683 
4684     _ASSERT(tar->m_Current == temp->front());
4685     CTarEntryInfo::EType type = tar->m_Current.GetType();
4686     if (type != CTarEntryInfo::eFile
4687         &&  (type != CTarEntryInfo::eUnknown  ||  (flags & fSkipUnsupported))){
4688         return 0;
4689     }
4690 
4691     IReader* ir = new CTarReader(tar.get(), eTakeOwnership);
4692     tar.release();
4693     return ir;
4694 }
4695 
4696 
GetNextEntryData(void)4697 IReader* CTar::GetNextEntryData(void)
4698 {
4699     CTarEntryInfo::EType type = m_Current.GetType();
4700     return type != CTarEntryInfo::eFile
4701         &&  (type != CTarEntryInfo::eUnknown  ||  (m_Flags & fSkipUnsupported))
4702         ? 0 : new CTarReader(this);
4703 }
4704 
4705 
4706 END_NCBI_SCOPE
4707