1 /* $Id: tar.cpp 619589 2020-11-06 17:59:43Z lavr $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Vladimir Ivanov
27 * Anton Lavrentiev
28 *
29 * File Description:
30 * Tar archive API.
31 *
32 * Supports subsets of POSIX.1-1988 (ustar), POSIX 1003.1-2001 (posix), old
33 * GNU (POSIX 1003.1), and V7 formats (all partially but reasonably). New
34 * archives are created using POSIX (genuine ustar) format, using GNU
35 * extensions for long names/links only when unavoidable. It cannot,
36 * however, handle all the exotics like sparse files (except for GNU/1.0
37 * sparse PAX extension) and contiguous files (yet still can work around both
38 * of them gracefully, if needed), multivolume / incremental archives, etc.
39 * but just regular files, devices (character or block), FIFOs, directories,
40 * and limited links: can extract both hard- and symlinks, but can store
41 * symlinks only. Also, this implementation is only minimally PAX(Portable
42 * Archive eXchange)-aware for file extractions (and does not yet use any PAX
43 * extensions to store the files).
44 *
45 */
46
47 #include <ncbi_pch.hpp>
48 // Cancel __wur (warn unused result) ill effects in GCC
49 #ifdef _FORTIFY_SOURCE
50 # undef _FORTIFY_SOURCE
51 #endif /*_FORTIFY_SOURCE*/
52 #define _FORTIFY_SOURCE 0
53 #include <util/compress/tar.hpp>
54 #include <util/error_codes.hpp>
55
56 #if !defined(NCBI_OS_UNIX) && !defined(NCBI_OS_MSWIN)
57 # error "Class CTar can be defined on UNIX and MS-Windows platforms only!"
58 #endif
59
60 #if defined(NCBI_OS_UNIX)
61 # include "../../../corelib/ncbi_os_unix_p.hpp"
62 # include <grp.h>
63 # include <pwd.h>
64 # include <unistd.h>
65 # ifdef NCBI_OS_IRIX
66 # include <sys/mkdev.h>
67 # endif //NCBI_OS_IRIX
68 # ifdef HAVE_SYS_SYSMACROS_H
69 # include <sys/sysmacros.h>
70 # endif //HAVE_SYS_SYSMACROS_H
71 # ifdef NCBI_OS_DARWIN
72 // macOS supplies these as inline functions rather than macros.
73 # define major major
74 # define minor minor
75 # define makedev makedev
76 # endif
77 # if !defined(major) || !defined(minor) || !defined(makedev)
78 # error "Device macros undefined in this UNIX build!"
79 # endif
80 #elif defined(NCBI_OS_MSWIN)
81 # include "../../../corelib/ncbi_os_mswin_p.hpp"
82 # include <io.h>
83 typedef unsigned int mode_t;
84 typedef unsigned int uid_t;
85 typedef unsigned int gid_t;
86 #endif //NCBI_OS...
87
88
89 #define NCBI_USE_ERRCODE_X Util_Compress
90 #define NCBI_MODULE NCBITAR
91
92
93 BEGIN_NCBI_SCOPE
94
95
96 /////////////////////////////////////////////////////////////////////////////
97 //
98 // TAR helper routines
99 //
100
101 // Convert a number to an octal string padded to the left
102 // with [leading] zeros ('0') and having _no_ trailing '\0'.
s_NumToOctal(Uint8 val,char * ptr,size_t len)103 static bool s_NumToOctal(Uint8 val, char* ptr, size_t len)
104 {
105 _ASSERT(len > 0);
106 do {
107 ptr[--len] = char('0' + char(val & 7));
108 val >>= 3;
109 } while (len);
110 return val ? false : true;
111 }
112
113
114 // Convert an octal number (possibly preceded by spaces) to numeric form.
115 // Stop either at the end of the field or at first '\0' (if any).
s_OctalToNum(Uint8 & val,const char * ptr,size_t len)116 static bool s_OctalToNum(Uint8& val, const char* ptr, size_t len)
117 {
118 _ASSERT(ptr && len > 0);
119 size_t i = *ptr ? 0 : 1;
120 while (i < len && ptr[i]) {
121 if (!isspace((unsigned char) ptr[i]))
122 break;
123 ++i;
124 }
125 val = 0;
126 bool okay = false;
127 while (i < len && '0' <= ptr[i] && ptr[i] <= '7') {
128 okay = true;
129 val <<= 3;
130 val |= ptr[i++] - '0';
131 }
132 while (i < len && ptr[i]) {
133 if (!isspace((unsigned char) ptr[i]))
134 return false;
135 ++i;
136 }
137 return okay;
138 }
139
140
s_NumToBase256(Uint8 val,char * ptr,size_t len)141 static bool s_NumToBase256(Uint8 val, char* ptr, size_t len)
142 {
143 _ASSERT(len > 0);
144 do {
145 ptr[--len] = (unsigned char)(val & 0xFF);
146 val >>= 8;
147 } while (len);
148 *ptr |= '\x80'; // set base-256 encoding flag
149 return val ? false : true;
150 }
151
152
153 // Return 0 (false) if conversion failed; 1 if the value converted to
154 // conventional octal representation (perhaps, with terminating '\0'
155 // sacrificed), or -1 if the value converted using base-256.
s_EncodeUint8(Uint8 val,char * ptr,size_t len)156 static int s_EncodeUint8(Uint8 val, char* ptr, size_t len)
157 { // Max file size (for len == 12):
158 if (s_NumToOctal (val, ptr, len)) { // 8GiB-1
159 return 1/*okay*/;
160 }
161 if (s_NumToOctal (val, ptr, ++len)) { // 64GiB-1
162 return 1/*okay*/;
163 }
164 if (s_NumToBase256(val, ptr, len)) { // up to 2^94-1
165 return -1/*okay, base-256*/;
166 }
167 return 0/*failure*/;
168 }
169
170
171 // Return true if conversion succeeded; false otherwise.
s_Base256ToNum(Uint8 & val,const char * ptr,size_t len)172 static bool s_Base256ToNum(Uint8& val, const char* ptr, size_t len)
173 {
174 const Uint8 lim = kMax_UI8 >> 8;
175 if (*ptr & '\x40') { // negative base-256?
176 return false;
177 }
178 val = *ptr++ & '\x3F';
179 while (--len) {
180 if (val > lim) {
181 return false;
182 }
183 val <<= 8;
184 val |= (unsigned char)(*ptr++);
185 }
186 return true;
187 }
188
189
190 // Return 0 (false) if conversion failed; 1 if the value was read into
191 // as a conventional octal string (perhaps, without the terminating '\0');
192 // or -1 if base-256 representation used.
s_DecodeUint8(Uint8 & val,const char * ptr,size_t len)193 static int s_DecodeUint8(Uint8& val, const char* ptr, size_t len)
194 {
195 if (*ptr & '\x80') {
196 return s_Base256ToNum(val, ptr, len) ? -1/*okay*/ : 0/*failure*/;
197 } else {
198 return s_OctalToNum (val, ptr, len) ? 1/*okay*/ : 0/*failure*/;
199 }
200 }
201
202
s_TarToMode(TTarMode perm,CDirEntry::TMode * usr_mode,CDirEntry::TMode * grp_mode,CDirEntry::TMode * oth_mode,CDirEntry::TSpecialModeBits * special_bits)203 static void s_TarToMode(TTarMode perm,
204 CDirEntry::TMode* usr_mode,
205 CDirEntry::TMode* grp_mode,
206 CDirEntry::TMode* oth_mode,
207 CDirEntry::TSpecialModeBits* special_bits)
208 {
209 // User
210 if (usr_mode) {
211 *usr_mode = ((perm & fTarURead ? CDirEntry::fRead : 0) |
212 (perm & fTarUWrite ? CDirEntry::fWrite : 0) |
213 (perm & fTarUExecute ? CDirEntry::fExecute : 0));
214 }
215
216 // Group
217 if (grp_mode) {
218 *grp_mode = ((perm & fTarGRead ? CDirEntry::fRead : 0) |
219 (perm & fTarGWrite ? CDirEntry::fWrite : 0) |
220 (perm & fTarGExecute ? CDirEntry::fExecute : 0));
221 }
222
223 // Others
224 if (oth_mode) {
225 *oth_mode = ((perm & fTarORead ? CDirEntry::fRead : 0) |
226 (perm & fTarOWrite ? CDirEntry::fWrite : 0) |
227 (perm & fTarOExecute ? CDirEntry::fExecute : 0));
228 }
229
230 // Special bits
231 if (special_bits) {
232 *special_bits = ((perm & fTarSetUID ? CDirEntry::fSetUID : 0) |
233 (perm & fTarSetGID ? CDirEntry::fSetGID : 0) |
234 (perm & fTarSticky ? CDirEntry::fSticky : 0));
235 }
236 }
237
238
s_TarToMode(TTarMode perm)239 static mode_t s_TarToMode(TTarMode perm)
240 {
241 mode_t mode = (
242 #ifdef S_ISUID
243 (perm & fTarSetUID ? S_ISUID : 0) |
244 #endif
245 #ifdef S_ISGID
246 (perm & fTarSetGID ? S_ISGID : 0) |
247 #endif
248 #ifdef S_ISVTX
249 (perm & fTarSticky ? S_ISVTX : 0) |
250 #endif
251 #if defined(S_IRUSR)
252 (perm & fTarURead ? S_IRUSR : 0) |
253 #elif defined(S_IREAD)
254 (perm & fTarURead ? S_IREAD : 0) |
255 #endif
256 #if defined(S_IWUSR)
257 (perm & fTarUWrite ? S_IWUSR : 0) |
258 #elif defined(S_IWRITE)
259 (perm & fTarUWrite ? S_IWRITE : 0) |
260 #endif
261 #if defined(S_IXUSR)
262 (perm & fTarUExecute ? S_IXUSR : 0) |
263 #elif defined(S_IEXEC)
264 (perm & fTarUExecute ? S_IEXEC : 0) |
265 #endif
266 #ifdef S_IRGRP
267 (perm & fTarGRead ? S_IRGRP : 0) |
268 #endif
269 #ifdef S_IWGRP
270 (perm & fTarGWrite ? S_IWGRP : 0) |
271 #endif
272 #ifdef S_IXGRP
273 (perm & fTarGExecute ? S_IXGRP : 0) |
274 #endif
275 #ifdef S_IROTH
276 (perm & fTarORead ? S_IROTH : 0) |
277 #endif
278 #ifdef S_IWOTH
279 (perm & fTarOWrite ? S_IWOTH : 0) |
280 #endif
281 #ifdef S_IXOTH
282 (perm & fTarOExecute ? S_IXOTH : 0) |
283 #endif
284 0);
285 return mode;
286 }
287
288
s_ModeToTar(mode_t mode)289 static TTarMode s_ModeToTar(mode_t mode)
290 {
291 // Keep in mind that the mode may be extracted on a different platform
292 TTarMode perm = (
293 #ifdef S_ISUID
294 (mode & S_ISUID ? fTarSetUID : 0) |
295 #endif
296 #ifdef S_ISGID
297 (mode & S_ISGID ? fTarSetGID : 0) |
298 #endif
299 #ifdef S_ISVTX
300 (mode & S_ISVTX ? fTarSticky : 0) |
301 #endif
302 #if defined(S_IRUSR)
303 (mode & S_IRUSR ? fTarURead : 0) |
304 #elif defined(S_IREAD)
305 (mode & S_IREAD ? fTarURead : 0) |
306 #endif
307 #if defined(S_IWUSR)
308 (mode & S_IWUSR ? fTarUWrite : 0) |
309 #elif defined(S_IWRITE)
310 (mode & S_IWRITE ? fTarUWrite : 0) |
311 #endif
312 #if defined(S_IXUSR)
313 (mode & S_IXUSR ? fTarUExecute : 0) |
314 #elif defined(S_IEXEC)
315 (mode & S_IEXEC ? fTarUExecute : 0) |
316 #endif
317 #if defined(S_IRGRP)
318 (mode & S_IRGRP ? fTarGRead : 0) |
319 #elif defined(S_IREAD)
320 // emulate read permission when file is readable
321 (mode & S_IREAD ? fTarGRead : 0) |
322 #endif
323 #ifdef S_IWGRP
324 (mode & S_IWGRP ? fTarGWrite : 0) |
325 #endif
326 #ifdef S_IXGRP
327 (mode & S_IXGRP ? fTarGExecute : 0) |
328 #endif
329 #if defined(S_IROTH)
330 (mode & S_IROTH ? fTarORead : 0) |
331 #elif defined(S_IREAD)
332 // emulate read permission when file is readable
333 (mode & S_IREAD ? fTarORead : 0) |
334 #endif
335 #ifdef S_IWOTH
336 (mode & S_IWOTH ? fTarOWrite : 0) |
337 #endif
338 #ifdef S_IXOTH
339 (mode & S_IXOTH ? fTarOExecute : 0) |
340 #endif
341 0);
342 #if defined(S_IFMT) || defined(_S_IFMT)
343 TTarMode mask = (TTarMode) mode;
344 # ifdef S_IFMT
345 mask &= S_IFMT;
346 # else
347 mask &= _S_IFMT;
348 # endif
349 if (!(mask & 07777)) {
350 perm |= mask;
351 }
352 #endif
353 return perm;
354 }
355
356
s_Length(const char * ptr,size_t maxsize)357 static size_t s_Length(const char* ptr, size_t maxsize)
358 {
359 const char* pos = (const char*) memchr(ptr, '\0', maxsize);
360 return pos ? (size_t)(pos - ptr) : maxsize;
361 }
362
363
364 //////////////////////////////////////////////////////////////////////////////
365 //
366 // Constants / macros / typedefs
367 //
368
369 /// Round up to the nearest multiple of BLOCK_SIZE:
370 //#define ALIGN_SIZE(size) SIZE_OF(BLOCK_OF(size + (BLOCK_SIZE-1)))
371 #define ALIGN_SIZE(size) (((size) + (BLOCK_SIZE-1)) & ~(BLOCK_SIZE-1))
372 #define OFFSET_OF(size) ( (size) & (BLOCK_SIZE-1))
373 #define BLOCK_OF(pos) ((pos) >> 9)
374 #define SIZE_OF(blk) ((blk) << 9)
375
376 /// Tar block size (512 bytes)
377 #define BLOCK_SIZE SIZE_OF(1)
378
379
380 /// Recognized TAR formats
381 enum ETar_Format {
382 eTar_Unknown = 0,
383 eTar_Legacy = 1,
384 eTar_OldGNU = 2,
385 eTar_Ustar = 4,
386 eTar_Posix = 5, // |= eTar_Ustar
387 eTar_Star = 6 // |= eTar_Ustar
388 };
389
390
391 /// POSIX "ustar" tar archive member header
392 typedef struct STarHeader { // byte offset
393 char name[100]; // 0
394 char mode[8]; // 100
395 char uid[8]; // 108
396 char gid[8]; // 116
397 char size[12]; // 124
398 char mtime[12]; // 136
399 char checksum[8]; // 148
400 char typeflag[1]; // 156
401 char linkname[100]; // 157
402 char magic[6]; // 257
403 char version[2]; // 263
404 char uname[32]; // 265
405 char gname[32]; // 297
406 char devmajor[8]; // 329
407 char devminor[8]; // 337
408 union { // 345
409 char prefix[155]; // NB: not valid with old GNU format (no need)
410 struct { // NB: old GNU format only
411 char atime[12];
412 char ctime[12]; // 357
413 char unused[17]; // 369
414 char sparse[96]; // 386 sparse map: ([12] offset + [12] size) x 4
415 char contind[1]; // 482 non-zero if continued in the next header
416 char realsize[12];// 483 true file size
417 } gnu;
418 struct {
419 char prefix[131]; // NB: prefix + 107: realsize (char[12]) for 'S'
420 char atime[12]; // 476
421 char ctime[12]; // 488
422 } star;
423 }; // 500
424 // NCBI in last 4 bytes // 508
425 } SHeader;
426
427
428 /// Block as a header.
429 union TBlock {
430 char buffer[BLOCK_SIZE];
431 SHeader header;
432 };
433
434
s_TarChecksum(TBlock * block,bool isgnu)435 static bool s_TarChecksum(TBlock* block, bool isgnu)
436 {
437 SHeader* h = &block->header;
438 size_t len = sizeof(h->checksum) - (isgnu ? 2 : 1);
439
440 // Compute the checksum
441 memset(h->checksum, ' ', sizeof(h->checksum));
442 unsigned long checksum = 0;
443 const unsigned char* p = (const unsigned char*) block->buffer;
444 for (size_t i = 0; i < sizeof(block->buffer); ++i) {
445 checksum += *p++;
446 }
447 // ustar: '\0'-terminated checksum
448 // GNU special: 6 digits, then '\0', then a space [already in place]
449 if (!s_NumToOctal(checksum, h->checksum, len)) {
450 return false;
451 }
452 h->checksum[len] = '\0';
453 return true;
454 }
455
456
457
458 //////////////////////////////////////////////////////////////////////////////
459 //
460 // CTarEntryInfo
461 //
462
GetMode(void) const463 TTarMode CTarEntryInfo::GetMode(void) const
464 {
465 // Raw tar mode gets returned here (as kept in the info)
466 return (TTarMode)(m_Stat.orig.st_mode & 07777);
467 }
468
469
GetMode(CDirEntry::TMode * usr_mode,CDirEntry::TMode * grp_mode,CDirEntry::TMode * oth_mode,CDirEntry::TSpecialModeBits * special_bits) const470 void CTarEntryInfo::GetMode(CDirEntry::TMode* usr_mode,
471 CDirEntry::TMode* grp_mode,
472 CDirEntry::TMode* oth_mode,
473 CDirEntry::TSpecialModeBits* special_bits) const
474 {
475 s_TarToMode(GetMode(), usr_mode, grp_mode, oth_mode, special_bits);
476 }
477
478
GetMajor(void) const479 unsigned int CTarEntryInfo::GetMajor(void) const
480 {
481 #ifdef major
482 if (m_Type == eCharDev || m_Type == eBlockDev) {
483 return major(m_Stat.orig.st_rdev);
484 }
485 #else
486 if (sizeof(int) >= 4 && sizeof(m_Stat.orig.st_rdev) >= 4) {
487 return (*((unsigned int*) &m_Stat.orig.st_rdev) >> 16) & 0xFFFF;
488 }
489 #endif //major
490 return (unsigned int)(-1);
491 }
492
493
GetMinor(void) const494 unsigned int CTarEntryInfo::GetMinor(void) const
495 {
496 #ifdef minor
497 if (m_Type == eCharDev || m_Type == eBlockDev) {
498 return minor(m_Stat.orig.st_rdev);
499 }
500 #else
501 if (sizeof(int) >= 4 && sizeof(m_Stat.orig.st_rdev) >= 4) {
502 return *((unsigned int*) &m_Stat.orig.st_rdev) & 0xFFFF;
503 }
504 #endif //minor
505 return (unsigned int)(-1);
506 }
507
508
s_ModeAsString(TTarMode mode)509 static string s_ModeAsString(TTarMode mode)
510 {
511 char buf[9];
512 memset(buf, '-', sizeof(buf));
513
514 char* usr = buf;
515 char* grp = usr + 3;
516 char* oth = grp + 3;
517
518 if (mode & fTarURead) {
519 usr[0] = 'r';
520 }
521 if (mode & fTarUWrite) {
522 usr[1] = 'w';
523 }
524 if (mode & fTarUExecute) {
525 usr[2] = mode & fTarSetUID ? 's' : 'x';
526 } else if (mode & fTarSetUID) {
527 usr[2] = 'S';
528 }
529 if (mode & fTarGRead) {
530 grp[0] = 'r';
531 }
532 if (mode & fTarGWrite) {
533 grp[1] = 'w';
534 }
535 if (mode & fTarGExecute) {
536 grp[2] = mode & fTarSetGID ? 's' : 'x';
537 } else if (mode & fTarSetGID) {
538 grp[2] = 'S';
539 }
540 if (mode & fTarORead) {
541 oth[0] = 'r';
542 }
543 if (mode & fTarOWrite) {
544 oth[1] = 'w';
545 }
546 if (mode & fTarOExecute) {
547 oth[2] = mode & fTarSticky ? 't' : 'x';
548 } else if (mode & fTarSticky) {
549 oth[2] = 'T';
550 }
551
552 return string(buf, sizeof(buf));
553 }
554
555
s_TypeAsChar(CTarEntryInfo::EType type)556 static char s_TypeAsChar(CTarEntryInfo::EType type)
557 {
558 switch (type) {
559 case CTarEntryInfo::eFile:
560 case CTarEntryInfo::eHardLink:
561 return '-';
562 case CTarEntryInfo::eSymLink:
563 return 'l';
564 case CTarEntryInfo::eDir:
565 return 'd';
566 case CTarEntryInfo::ePipe:
567 return 'p';
568 case CTarEntryInfo::eCharDev:
569 return 'c';
570 case CTarEntryInfo::eBlockDev:
571 return 'b';
572 case CTarEntryInfo::eVolHeader:
573 return 'V';
574 case CTarEntryInfo::eSparseFile:
575 return 'S';
576 default:
577 break;
578 }
579 return '?';
580 }
581
582
s_UserGroupAsString(const CTarEntryInfo & info)583 static string s_UserGroupAsString(const CTarEntryInfo& info)
584 {
585 string user(info.GetUserName());
586 if (user.empty()) {
587 NStr::UIntToString(user, info.GetUserId());
588 }
589 string group(info.GetGroupName());
590 if (group.empty()) {
591 NStr::UIntToString(group, info.GetGroupId());
592 }
593 return user + '/' + group;
594 }
595
596
s_MajorMinor(unsigned int n)597 static string s_MajorMinor(unsigned int n)
598 {
599 return n != (unsigned int)(-1) ? NStr::UIntToString(n) : string(1, '?');
600 }
601
602
s_SizeOrMajorMinor(const CTarEntryInfo & info)603 static string s_SizeOrMajorMinor(const CTarEntryInfo& info)
604 {
605 if (info.GetType() == CTarEntryInfo::eCharDev ||
606 info.GetType() == CTarEntryInfo::eBlockDev) {
607 unsigned int major = info.GetMajor();
608 unsigned int minor = info.GetMinor();
609 return s_MajorMinor(major) + ',' + s_MajorMinor(minor);
610 } else if (info.GetType() == CTarEntryInfo::eDir ||
611 info.GetType() == CTarEntryInfo::ePipe ||
612 info.GetType() == CTarEntryInfo::eSymLink ||
613 info.GetType() == CTarEntryInfo::eVolHeader) {
614 return string("-");
615 } else if (info.GetType() == CTarEntryInfo::eSparseFile &&
616 info.GetSize() == 0) {
617 return string("?");
618 }
619 return NStr::NumericToString(info.GetSize());
620 }
621
622
operator <<(CNcbiOstream & os,const CTarEntryInfo & info)623 CNcbiOstream& operator << (CNcbiOstream& os, const CTarEntryInfo& info)
624 {
625 CTime mtime(info.GetModificationTime());
626 os << s_TypeAsChar(info.GetType())
627 << s_ModeAsString(info.GetMode()) << ' '
628 << setw(17) << s_UserGroupAsString(info) << ' '
629 << setw(10) << s_SizeOrMajorMinor(info) << ' '
630 << mtime.ToLocalTime().AsString(" Y-M-D h:m:s ")
631 << info.GetName();
632 if (info.GetType() == CTarEntryInfo::eSymLink ||
633 info.GetType() == CTarEntryInfo::eHardLink) {
634 os << " -> " << info.GetLinkName();
635 }
636 return os;
637 }
638
639
640
641 //////////////////////////////////////////////////////////////////////////////
642 //
643 // Debugging utilities
644 //
645
s_OSReason(int x_errno)646 static string s_OSReason(int x_errno)
647 {
648 static const char kUnknownError[] = "Unknown error";
649 const char* strerr;
650 char errbuf[80];
651 if (!x_errno)
652 return kEmptyStr;
653 strerr = ::strerror(x_errno);
654 if (!strerr || !*strerr
655 || !NStr::strncasecmp(strerr,
656 kUnknownError, sizeof(kUnknownError) - 1)) {
657 if (x_errno > 0) {
658 ::sprintf(errbuf, "Error %d", x_errno);
659 } else if (x_errno != -1) {
660 ::sprintf(errbuf, "Error 0x%08X", (unsigned int) x_errno);
661 } else {
662 ::strcpy (errbuf, "Unknown error (-1)");
663 }
664 strerr = errbuf;
665 }
666 _ASSERT(strerr && *strerr);
667 return string(": ") + strerr;
668 }
669
670
s_PositionAsString(const string & file,Uint8 pos,size_t recsize,const string & entryname)671 static string s_PositionAsString(const string& file, Uint8 pos, size_t recsize,
672 const string& entryname)
673 {
674 _ASSERT(!OFFSET_OF(recsize));
675 _ASSERT(recsize >= BLOCK_SIZE);
676 string result;
677 if (!file.empty()) {
678 CDirEntry temp(file);
679 result = (temp.GetType() == CDirEntry::eFile ? temp.GetName() : file)
680 + ": ";
681 }
682 result += "At record " + NStr::NumericToString(pos / recsize);
683 if (recsize != BLOCK_SIZE) {
684 result +=
685 ", block " + NStr::NumericToString(BLOCK_OF(pos % recsize)) +
686 " [thru #" + NStr::NumericToString(BLOCK_OF(pos),
687 NStr::fWithCommas) + ']';
688 }
689 if (!entryname.empty()) {
690 result += ", while in '" + entryname + '\'';
691 }
692 return result + ":\n";
693 }
694
695
s_OffsetAsString(size_t offset)696 static string s_OffsetAsString(size_t offset)
697 {
698 char buf[20];
699 _ASSERT(offset < 1000);
700 _VERIFY(sprintf(buf, "%03u", (unsigned int) offset));
701 return buf;
702 }
703
704
memcchr(const char * s,char c,size_t len)705 static bool memcchr(const char* s, char c, size_t len)
706 {
707 for (size_t i = 0; i < len; ++i) {
708 if (s[i] != c)
709 return true;
710 }
711 return false;
712 }
713
714
s_Printable(const char * field,size_t maxsize,bool text)715 static string s_Printable(const char* field, size_t maxsize, bool text)
716 {
717 bool check = false;
718 if (!text && maxsize > 1 && !*field) {
719 field++, maxsize--;
720 check = true;
721 }
722 size_t len = s_Length(field, maxsize);
723 string retval = NStr::PrintableString(CTempString(field,
724 memcchr(field + len,
725 '\0',
726 maxsize - len)
727 ? maxsize
728 : len));
729 return check && !retval.empty() ? "\\0" + retval : retval;
730 }
731
732
733 #if !defined(__GNUC__) && !defined(offsetof)
734 # define offsetof(T, F) ((char*) &(((T*) 0)->F) - (char*) 0)
735 #endif
736
737
738 #define _STR(s) #s
739
740 #define TAR_PRINTABLE_EX(field, text, size) \
741 "@" + s_OffsetAsString((size_t) offsetof(SHeader, field)) + \
742 "[" _STR(field) "]:" + string(14 - sizeof(_STR(field)), ' ') + \
743 '"' + s_Printable(h->field, size, text || ecxpt) + '"'
744
745 #define TAR_PRINTABLE(field, text) \
746 TAR_PRINTABLE_EX(field, text, sizeof(h->field))
747
748
749 #define TAR_GNU_REGION "[gnu.region]: "
750 #define TAR_GNU_CONTIND "[gnu.contind]: "
751
s_DumpSparseMap(const SHeader * h,const char * sparse,const char * contind,bool ecxpt=false)752 static string s_DumpSparseMap(const SHeader* h, const char* sparse,
753 const char* contind, bool ecxpt = false)
754 {
755 string dump;
756 size_t offset;
757 bool done = false;
758 string region(TAR_GNU_REGION);
759
760 do {
761 if (memcchr(sparse, '\0', 24)) {
762 offset = (size_t)(sparse - (const char*) h);
763 if (!dump.empty())
764 dump += '\n';
765 dump += '@' + s_OffsetAsString(offset);
766 if (!done) {
767 Uint8 off, len;
768 int ok_off = s_DecodeUint8(off, sparse, 12);
769 int ok_len = s_DecodeUint8(len, sparse + 12, 12);
770 if (ok_off & ok_len) {
771 dump += region;
772 region = ':' + string(sizeof(TAR_GNU_REGION) - 2, ' ');
773 if (ok_off > 0) {
774 dump += '"';
775 dump += s_Printable(sparse, 12, ecxpt);
776 dump += "\" ";
777 } else {
778 dump += string(14, ' ');
779 }
780 sparse += 12;
781 if (ok_len > 0) {
782 dump += '"';
783 dump += s_Printable(sparse, 12, ecxpt);
784 dump += "\" ";
785 } else {
786 dump += string(14, ' ');
787 }
788 sparse += 12;
789 dump += "[@";
790 dump += NStr::NumericToString(off);
791 dump += ", ";
792 dump += NStr::NumericToString(len);
793 dump += ']';
794 continue;
795 }
796 done = true;
797 }
798 dump += ':' + string(sizeof(TAR_GNU_REGION) - 2, ' ')
799 + '"' + NStr::PrintableString(string(sparse, 24)) + '"';
800 } else {
801 done = true;
802 }
803 sparse += 24;
804 } while (sparse < contind);
805 if (!dump.empty()) {
806 dump += '\n';
807 }
808 offset = (size_t)(contind - (const char*) h);
809 dump += '@' + s_OffsetAsString(offset) + TAR_GNU_CONTIND
810 "\"" + NStr::PrintableString(string(contind, 1))
811 + (*contind ? "\" [to-be-cont'd]" : "\" [last]");
812 return dump;
813 }
814
815
s_DumpSparseMap(const vector<pair<Uint8,Uint8>> & bmap)816 static string s_DumpSparseMap(const vector< pair<Uint8, Uint8> >& bmap)
817 {
818 size_t size = bmap.size();
819 string dump("Regions: " + NStr::NumericToString(size));
820 for (size_t n = 0; n < size; ++n) {
821 dump += "\n [" + NStr::NumericToString(n) + "]: @"
822 + NStr::NumericToString(bmap[n].first) + ", "
823 + NStr::NumericToString(bmap[n].second);
824 }
825 return dump;
826 }
827
828
s_DumpHeader(const SHeader * h,ETar_Format fmt,bool ecxpt=false)829 static string s_DumpHeader(const SHeader* h, ETar_Format fmt,
830 bool ecxpt = false)
831 {
832 string dump;
833 Uint8 val;
834 int ok;
835
836 dump += TAR_PRINTABLE(name, true);
837 dump += '\n';
838
839 ok = s_OctalToNum(val, h->mode, sizeof(h->mode));
840 dump += TAR_PRINTABLE(mode, !ok);
841 if (ok && val) {
842 dump += " [" + s_ModeAsString((TTarMode) val) + ']';
843 }
844 dump += '\n';
845
846 ok = s_DecodeUint8(val, h->uid, sizeof(h->uid));
847 dump += TAR_PRINTABLE(uid, ok <= 0);
848 if (ok && (ok < 0 || val > 7)) {
849 dump += " [" + NStr::NumericToString(val) + ']';
850 if (ok < 0) {
851 dump += " (base-256)";
852 }
853 }
854 dump += '\n';
855
856 ok = s_DecodeUint8(val, h->gid, sizeof(h->gid));
857 dump += TAR_PRINTABLE(gid, ok <= 0);
858 if (ok && (ok < 0 || val > 7)) {
859 dump += " [" + NStr::NumericToString(val) + ']';
860 if (ok < 0) {
861 dump += " (base-256)";
862 }
863 }
864 dump += '\n';
865
866 ok = s_DecodeUint8(val, h->size, sizeof(h->size));
867 dump += TAR_PRINTABLE(size, ok <= 0);
868 if (ok && (ok < 0 || val > 7)) {
869 dump += " [" + NStr::NumericToString(val) + ']';
870 if (ok && h->typeflag[0] == 'S' && fmt == eTar_OldGNU) {
871 dump += " w/o map(s)!";
872 }
873 if (ok < 0) {
874 dump += " (base-256)";
875 }
876 }
877 dump += '\n';
878
879 ok = s_OctalToNum(val, h->mtime, sizeof(h->mtime));
880 dump += TAR_PRINTABLE(mtime, !ok);
881 if (ok && val) {
882 CTime mtime((time_t) val);
883 ok = (Uint8) mtime.GetTimeT() == val ? true : false;
884 if (ok || val > 7) {
885 dump += (" ["
886 + (val > 7 ? NStr::NumericToString(val) + ", " : "")
887 + (ok ? mtime.ToLocalTime().AsString("Y-M-D h:m:s") : "")
888 + ']');
889 }
890 }
891 dump += '\n';
892
893 ok = s_OctalToNum(val, h->checksum, sizeof(h->checksum));
894 dump += TAR_PRINTABLE(checksum, !ok);
895 dump += '\n';
896
897 // Classify to the extent possible to help debug the problem (if any)
898 dump += TAR_PRINTABLE(typeflag, true);
899 ok = false;
900 const char* tname = 0;
901 switch (h->typeflag[0]) {
902 case '\0':
903 case '0':
904 ok = true;
905 if (!(fmt & eTar_Ustar) && fmt != eTar_OldGNU) {
906 size_t namelen = s_Length(h->name, sizeof(h->name));
907 if (namelen && h->name[namelen - 1] == '/')
908 tname = "legacy regular entry (dir)";
909 }
910 if (!tname)
911 tname = "legacy regular entry (file)";
912 tname += h->typeflag[0] ? 7/*skip "legacy "*/ : 0;
913 break;
914 case '\1':
915 case '1':
916 ok = true;
917 #ifdef NCBI_OS_UNIX
918 tname = "legacy hard link";
919 #else
920 tname = "legacy hard link - not FULLY supported";
921 #endif //NCBI_OS_UNIX
922 tname += h->typeflag[0] != '\1' ? 7/*skip "legacy "*/ : 0;
923 break;
924 case '\2':
925 case '2':
926 ok = true;
927 #ifdef NCBI_OS_UNIX
928 tname = "legacy symbolic link";
929 #else
930 tname = "legacy symbolic link - not FULLY supported";
931 #endif //NCBI_OS_UNIX
932 tname += h->typeflag[0] != '\2' ? 7/*skip "legacy "*/ : 0;
933 break;
934 case '3':
935 #ifdef NCBI_OS_UNIX
936 ok = true;
937 #endif //NCBI_OS_UNIX
938 tname = "character device";
939 break;
940 case '4':
941 #ifdef NCBI_OS_UNIX
942 ok = true;
943 #endif //NCBI_OS_UNIX
944 tname = "block device";
945 break;
946 case '5':
947 ok = true;
948 tname = "directory";
949 break;
950 case '6':
951 #ifdef NCBI_OS_UNIX
952 ok = true;
953 #endif //NCBI_OS_UNIX
954 tname = "FIFO";
955 break;
956 case '7':
957 tname = "contiguous";
958 break;
959 case 'g':
960 tname = "global extended header";
961 break;
962 case 'x':
963 case 'X':
964 if (fmt & eTar_Ustar) {
965 ok = true;
966 if (h->typeflag[0] == 'x') {
967 tname = "extended (POSIX 1003.1-2001 [PAX]) header"
968 " - not FULLY supported";
969 } else {
970 tname = "extended (POSIX 1003.1-2001 [PAX] by Sun) header"
971 " - not FULLY supported";
972 }
973 } else {
974 tname = "extended header";
975 }
976 break;
977 case 'A':
978 tname = "Solaris ACL";
979 break;
980 case 'D':
981 if (fmt == eTar_OldGNU) {
982 tname = "GNU extension: directory dump";
983 }
984 break;
985 case 'E':
986 tname = "Solaris extended attribute file";
987 break;
988 case 'I':
989 // CAUTION: Entry size shows actual file size in the filesystem but
990 // no actual data blocks stored in the archive following the header!
991 tname = "Inode metadata only";
992 break;
993 case 'K':
994 if (fmt == eTar_OldGNU) {
995 ok = true;
996 tname = "GNU extension: long link";
997 }
998 break;
999 case 'L':
1000 if (fmt == eTar_OldGNU) {
1001 ok = true;
1002 tname = "GNU extension: long name";
1003 }
1004 break;
1005 case 'M':
1006 switch (fmt) {
1007 case eTar_OldGNU:
1008 tname = "GNU extension: multi-volume entry";
1009 break;
1010 case eTar_Star:
1011 tname = "STAR extension: multi-volume entry";
1012 break;
1013 default:
1014 break;
1015 }
1016 break;
1017 case 'N':
1018 if (fmt == eTar_OldGNU) {
1019 tname = "GNU extension (obsolete): long filename(s)";
1020 }
1021 break;
1022 case 'S':
1023 switch (fmt) {
1024 case eTar_OldGNU:
1025 // CAUTION: Entry size does not include sparse entry map stored in
1026 // additional (non-standard) headers that may follow this header!
1027 tname = "GNU extension: sparse file";
1028 break;
1029 case eTar_Star:
1030 // Entry size already includes size of additional sparse file maps
1031 // that may follow this header before the actual file data.
1032 tname = "STAR extension: sparse file";
1033 break;
1034 default:
1035 break;
1036 }
1037 break;
1038 case 'V':
1039 ok = true;
1040 tname = "Volume header";
1041 break;
1042 default:
1043 break;
1044 }
1045 if (!tname && 'A' <= h->typeflag[0] && h->typeflag[0] <= 'Z') {
1046 tname = "local vendor enhancement / user-defined extension";
1047 }
1048 dump += (" [" + string(tname ? tname : "reserved")
1049 + (ok
1050 ? "]\n"
1051 : " -- NOT SUPPORTED]\n"));
1052
1053 dump += TAR_PRINTABLE(linkname, true);
1054 dump += '\n';
1055
1056 switch (fmt) {
1057 case eTar_Legacy: // NCBI never writes this header
1058 tname = "legacy (V7)";
1059 break;
1060 case eTar_OldGNU:
1061 if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1062 tname = "old GNU (NCBI)";
1063 } else {
1064 tname = "old GNU";
1065 }
1066 break;
1067 case eTar_Ustar:
1068 if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1069 tname = "ustar (NCBI)";
1070 } else {
1071 tname = "ustar";
1072 }
1073 break;
1074 case eTar_Posix: // aka "pax"
1075 if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1076 tname = "posix (NCBI)";
1077 } else {
1078 tname = "posix";
1079 }
1080 break;
1081 case eTar_Star: // NCBI never writes this header
1082 tname = "star";
1083 break;
1084 default:
1085 tname = 0;
1086 break;
1087 }
1088 dump += TAR_PRINTABLE(magic, true);
1089 if (tname) {
1090 dump += " [" + string(tname) + ']';
1091 }
1092 dump += '\n';
1093
1094 dump += TAR_PRINTABLE(version, true);
1095
1096 if (fmt != eTar_Legacy) {
1097 dump += '\n';
1098
1099 dump += TAR_PRINTABLE(uname, true);
1100 dump += '\n';
1101
1102 dump += TAR_PRINTABLE(gname, true);
1103 dump += '\n';
1104
1105 ok = s_OctalToNum(val, h->devmajor, sizeof(h->devmajor));
1106 dump += TAR_PRINTABLE(devmajor, !ok);
1107 if (ok && val > 7) {
1108 dump += " [" + NStr::NumericToString(val) + ']';
1109 }
1110 dump += '\n';
1111
1112 ok = s_OctalToNum(val, h->devminor, sizeof(h->devminor));
1113 dump += TAR_PRINTABLE(devminor, !ok);
1114 if (ok && val > 7) {
1115 dump += " [" + NStr::NumericToString(val) + ']';
1116 }
1117 dump += '\n';
1118
1119 switch (fmt) {
1120 case eTar_Star:
1121 if (h->typeflag[0] == 'S') {
1122 dump += TAR_PRINTABLE_EX(star.prefix, true, 107);
1123 const char* realsize = h->star.prefix + 107;
1124 ok = s_DecodeUint8(val, realsize, 12);
1125 dump += "@"
1126 + s_OffsetAsString((size_t)(realsize - (const char*) h))
1127 + "[star.realsize]:\""
1128 + s_Printable(realsize, 12, !ok || ecxpt) + '"';
1129 if (ok && (ok < 0 || val > 7)) {
1130 dump += " [" + NStr::NumericToString(val) + ']';
1131 if (ok < 0) {
1132 dump += " (base-256)";
1133 }
1134 }
1135 } else {
1136 dump += TAR_PRINTABLE(star.prefix, true);
1137 }
1138 dump += '\n';
1139
1140 ok = s_OctalToNum(val, h->star.atime, sizeof(h->star.atime));
1141 dump += TAR_PRINTABLE(star.atime, !ok);
1142 if (ok && val) {
1143 CTime atime((time_t) val);
1144 ok = (Uint8) atime.GetTimeT() == val ? true : false;
1145 if (ok || val > 7) {
1146 dump += (" ["
1147 + (val > 7 ? NStr::NumericToString(val)+", " : "")
1148 + (ok
1149 ? atime.ToLocalTime().AsString("Y-M-D h:m:s")
1150 : "")
1151 + ']');
1152 }
1153 }
1154 dump += '\n';
1155
1156 ok = s_OctalToNum(val, h->star.ctime, sizeof(h->star.ctime));
1157 dump += TAR_PRINTABLE(star.ctime, !ok);
1158 if (ok && val) {
1159 CTime ctime((time_t) val);
1160 ok = (Uint8) ctime.GetTimeT() == val ? true : false;
1161 if (ok || val > 7) {
1162 dump += (" ["
1163 + (val > 7 ? NStr::NumericToString(val)+", " : "")
1164 + (ok
1165 ? ctime.ToLocalTime().AsString("Y-M-D h:m:s")
1166 : "")
1167 + ']');
1168 }
1169 }
1170 tname = (const char*) &h->star + sizeof(h->star);
1171 break;
1172
1173 case eTar_OldGNU:
1174 ok = s_OctalToNum(val, h->gnu.atime, sizeof(h->gnu.atime));
1175 dump += TAR_PRINTABLE(gnu.atime, !ok);
1176 if (ok && val) {
1177 CTime atime((time_t) val);
1178 ok = (Uint8) atime.GetTimeT() == val ? true : false;
1179 if (ok || val > 7) {
1180 dump += (" ["
1181 + (val > 7 ? NStr::NumericToString(val)+", " : "")
1182 + (ok
1183 ? atime.ToLocalTime().AsString("Y-M-D h:m:s")
1184 : "")
1185 + ']');
1186 }
1187 }
1188 dump += '\n';
1189
1190 ok = s_OctalToNum(val, h->gnu.ctime, sizeof(h->gnu.ctime));
1191 dump += TAR_PRINTABLE(gnu.ctime, !ok);
1192 if (ok && val) {
1193 CTime ctime((time_t) val);
1194 ok = (Uint8) ctime.GetTimeT() == val ? true : false;
1195 if (ok || val > 7) {
1196 dump += (" ["
1197 + (val > 7 ? NStr::NumericToString(val)+", " : "")
1198 + (ok
1199 ? ctime.ToLocalTime().AsString("Y-M-D h:m:s")
1200 : "")
1201 + ']');
1202 }
1203 }
1204
1205 if (h->typeflag[0] == 'S') {
1206 if (memcchr(h->gnu.unused, '\0', sizeof(h->gnu.unused))) {
1207 dump += '\n';
1208 dump += TAR_PRINTABLE(gnu.unused, true);
1209 }
1210 dump += '\n' + s_DumpSparseMap(h, h->gnu.sparse,
1211 h->gnu.contind, ecxpt);
1212 if (memcchr(h->gnu.realsize, '\0', sizeof(h->gnu.realsize))) {
1213 ok = s_DecodeUint8(val, h->gnu.realsize,
1214 sizeof(h->gnu.realsize));
1215 dump += '\n';
1216 dump += TAR_PRINTABLE(gnu.realsize, ok <= 0);
1217 if (ok && (ok < 0 || val > 7)) {
1218 dump += " [" + NStr::NumericToString(val) + ']';
1219 }
1220 if (ok < 0) {
1221 dump += " (base-256)";
1222 }
1223 }
1224 tname = (const char*) &h->gnu + sizeof(h->gnu);
1225 } else {
1226 tname = h->gnu.ctime + sizeof(h->gnu.ctime);
1227 }
1228 break;
1229
1230 default:
1231 dump += TAR_PRINTABLE(prefix, true);
1232 tname = h->prefix + sizeof(h->prefix);
1233 break;
1234 }
1235 } else {
1236 tname = h->version + sizeof(h->version);
1237 }
1238
1239 size_t n = 0;
1240 while (&tname[n] < (const char*) h + BLOCK_SIZE) {
1241 if (tname[n]) {
1242 size_t offset = (size_t)(&tname[n] - (const char*) h);
1243 size_t len = BLOCK_SIZE - offset;
1244 if (len & ~0xF) { // len > 16
1245 len = 0x10; // len = 16
1246 }
1247 const char* e = (const char*) memchr(&tname[n], '\0', len);
1248 if (e) {
1249 len = (size_t)(e - &tname[n]);
1250 ok = s_DecodeUint8(val, &tname[n], len);
1251 } else {
1252 if (len > (offset & 0xF)) {
1253 len -= (offset & 0xF);
1254 }
1255 ok = false;
1256 }
1257 _ASSERT(len);
1258 dump += "\n@" + s_OffsetAsString(offset) + ':' + string(15, ' ')
1259 + '"' + NStr::PrintableString(string(&tname[n], len)) + '"';
1260 if (ok) {
1261 CTime time((time_t) val);
1262 bool okaytime = (Uint8) time.GetTimeT() == val;
1263 if (ok < 0 || val > 7 || okaytime) {
1264 dump += " [";
1265 if (ok < 0 || val > 7) {
1266 dump += NStr::NumericToString(val);
1267 }
1268 if (ok < 0) {
1269 dump += "] (base-256)";
1270 } else if (okaytime) {
1271 if (val > 7) {
1272 dump += ", ";
1273 }
1274 dump += time.ToLocalTime().AsString("Y-M-D h:m:s]");
1275 } else {
1276 dump += ']';
1277 }
1278 }
1279 }
1280 n += len;
1281 } else {
1282 n++;
1283 }
1284 }
1285
1286 return dump;
1287 }
1288
1289 #undef TAR_PRINTABLE
1290
1291 #undef _STR
1292
1293
s_SetStateSafe(CNcbiIos & ios,IOS_BASE::iostate state)1294 inline void s_SetStateSafe(CNcbiIos& ios, IOS_BASE::iostate state) throw()
1295 {
1296 try {
1297 ios.setstate(state);
1298 } catch (IOS_BASE::failure&) {
1299 ;
1300 }
1301 }
1302
1303
1304 //////////////////////////////////////////////////////////////////////////////
1305 //
1306 // CTar
1307 //
1308
CTar(const string & filename,size_t blocking_factor)1309 CTar::CTar(const string& filename, size_t blocking_factor)
1310 : m_FileName(filename),
1311 m_FileStream(new CNcbiFstream),
1312 m_Stream(*m_FileStream),
1313 m_ZeroBlockCount(0),
1314 m_BufferSize(SIZE_OF(blocking_factor)),
1315 m_BufferPos(0),
1316 m_StreamPos(0),
1317 m_BufPtr(0),
1318 m_Buffer(0),
1319 m_OpenMode(eNone),
1320 m_Modified(false),
1321 m_Bad(false),
1322 m_Flags(fDefault)
1323 {
1324 x_Init();
1325 }
1326
1327
CTar(CNcbiIos & stream,size_t blocking_factor)1328 CTar::CTar(CNcbiIos& stream, size_t blocking_factor)
1329 : m_FileName(kEmptyStr),
1330 m_FileStream(0),
1331 m_Stream(stream),
1332 m_ZeroBlockCount(0),
1333 m_BufferSize(SIZE_OF(blocking_factor)),
1334 m_BufferPos(0),
1335 m_StreamPos(0),
1336 m_BufPtr(0),
1337 m_Buffer(0),
1338 m_OpenMode(eNone),
1339 m_Modified(false),
1340 m_Bad(false),
1341 m_Flags(fDefault)
1342 {
1343 x_Init();
1344 }
1345
1346
~CTar()1347 CTar::~CTar()
1348 {
1349 // Close stream(s)
1350 x_Close(x_Flush(true/*no_throw*/));
1351 delete m_FileStream;
1352 m_FileStream = 0;
1353
1354 // Delete owned masks
1355 for (size_t i = 0; i < sizeof(m_Mask) / sizeof(m_Mask[0]); ++i) {
1356 SetMask(0, eNoOwnership, EMaskType(i));
1357 }
1358
1359 // Delete buffer
1360 delete[] m_BufPtr;
1361 m_BufPtr = 0;
1362 }
1363
1364
1365 #define TAR_THROW(who, errcode, message) \
1366 NCBI_THROW(CTarException, errcode, \
1367 s_PositionAsString(who->m_FileName, who->m_StreamPos, \
1368 who->m_BufferSize, \
1369 who->m_Current.GetName()) + (message))
1370
1371 #define TAR_THROW_EX(who, errcode, message, h, fmt) \
1372 TAR_THROW(who, errcode, \
1373 who->m_Flags & fDumpEntryHeaders \
1374 ? string(message) + ":\n" + s_DumpHeader(h, fmt, true) \
1375 : string(message))
1376
1377 #define TAR_POST(subcode, severity, message) \
1378 ERR_POST_X(subcode, (severity) << \
1379 s_PositionAsString(m_FileName, m_StreamPos, m_BufferSize,\
1380 m_Current.GetName()) + (message))
1381
1382
x_Init(void)1383 void CTar::x_Init(void)
1384 {
1385 _ASSERT(!OFFSET_OF(m_BufferSize));
1386 size_t pagesize = (size_t) CSystemInfo::GetVirtualMemoryPageSize();
1387 if (pagesize < 4096 || (pagesize & (pagesize - 1))) {
1388 pagesize = 4096; // reasonable default
1389 }
1390 size_t pagemask = pagesize - 1;
1391 m_BufPtr = new char[m_BufferSize + pagemask];
1392 // Make m_Buffer page-aligned
1393 m_Buffer = m_BufPtr +
1394 ((((size_t) m_BufPtr + pagemask) & ~pagemask) - (size_t) m_BufPtr);
1395 }
1396
1397
x_Flush(bool no_throw)1398 bool CTar::x_Flush(bool no_throw)
1399 {
1400 m_Current.m_Name.erase();
1401 if (m_BufferPos == m_BufferSize) {
1402 m_Bad = true; // In case of unhandled exception(s)
1403 }
1404 if (m_Bad || !m_OpenMode) {
1405 return false;
1406 }
1407 if (!m_Modified &&
1408 (m_FileStream || !(m_Flags & fStreamPipeThrough) || !m_StreamPos)){
1409 return false;
1410 }
1411
1412 _ASSERT(m_BufferPos < m_BufferSize);
1413 if (m_BufferPos || m_ZeroBlockCount < 2) {
1414 // Assure proper blocking factor and pad the archive as necessary
1415 size_t zbc = m_ZeroBlockCount;
1416 size_t pad = m_BufferSize - m_BufferPos;
1417 memset(m_Buffer + m_BufferPos, 0, pad);
1418 x_WriteArchive(pad, no_throw ? (const char*)(-1L) : 0);
1419 _ASSERT(!(m_BufferPos % m_BufferSize) // m_BufferSize if write error
1420 && !m_Bad == !m_BufferPos);
1421 if (!m_Bad && (zbc += BLOCK_OF(pad)) < 2) {
1422 // Write EOT (two zero blocks), if have not padded enough already
1423 memset(m_Buffer, 0, m_BufferSize - pad);
1424 x_WriteArchive(m_BufferSize, no_throw ? (const char*)(-1L) : 0);
1425 _ASSERT(!(m_BufferPos % m_BufferSize)
1426 && !m_Bad == !m_BufferPos);
1427 if (!m_Bad && (zbc += BLOCK_OF(m_BufferSize)) < 2) {
1428 _ASSERT(zbc == 1 && m_BufferSize == BLOCK_SIZE);
1429 x_WriteArchive(BLOCK_SIZE, no_throw ? (const char*)(-1L) : 0);
1430 _ASSERT(!(m_BufferPos % m_BufferSize)
1431 && !m_Bad == !m_BufferPos);
1432 }
1433 }
1434 m_ZeroBlockCount = zbc;
1435 }
1436 _ASSERT(!OFFSET_OF(m_BufferPos));
1437
1438 if (!m_Bad && m_Stream.rdbuf()->PUBSYNC() != 0) {
1439 m_Bad = true;
1440 int x_errno = errno;
1441 s_SetStateSafe(m_Stream, NcbiBadbit);
1442 if (!no_throw) {
1443 TAR_THROW(this, eWrite,
1444 "Archive flush failed" + s_OSReason(x_errno));
1445 }
1446 TAR_POST(83, Error,
1447 "Archive flush failed" + s_OSReason(x_errno));
1448 }
1449 if (!m_Bad) {
1450 m_Modified = false;
1451 }
1452 return true;
1453 }
1454
1455
s_TruncateFile(const string & filename,Uint8 filesize)1456 static int s_TruncateFile(const string& filename, Uint8 filesize)
1457 {
1458 int x_error = 0;
1459 #ifdef NCBI_OS_UNIX
1460 if (::truncate(filename.c_str(), (off_t) filesize) != 0)
1461 x_error = errno;
1462 #endif //NCBI_OS_UNIX
1463 #ifdef NCBI_OS_MSWIN
1464 TXString x_filename(_T_XSTRING(filename));
1465 HANDLE handle = ::CreateFile(x_filename.c_str(), GENERIC_WRITE,
1466 0/*sharing*/, NULL, OPEN_EXISTING,
1467 FILE_ATTRIBUTE_NORMAL, NULL);
1468 if (handle != INVALID_HANDLE_VALUE) {
1469 LARGE_INTEGER x_filesize;
1470 x_filesize.QuadPart = filesize;
1471 if (!::SetFilePointerEx(handle, x_filesize, NULL, FILE_BEGIN)
1472 || !::SetEndOfFile(handle)) {
1473 x_error = (int) ::GetLastError();
1474 }
1475 bool closed = ::CloseHandle(handle) ? true : false;
1476 if (!x_error && !closed) {
1477 x_error = (int) ::GetLastError();
1478 }
1479 } else {
1480 x_error = (int) ::GetLastError();
1481 }
1482 #endif //NCBI_OS_MSWIN
1483 return x_error;
1484 }
1485
1486
x_Close(bool truncate)1487 void CTar::x_Close(bool truncate)
1488 {
1489 if (m_FileStream && m_FileStream->is_open()) {
1490 m_FileStream->close();
1491 if (!m_Bad && !m_FileStream->good()) {
1492 int x_errno = errno;
1493 TAR_POST(104, Error,
1494 "Cannot close archive" + s_OSReason(x_errno));
1495 m_Bad = true;
1496 }
1497 if (!m_Bad && !(m_Flags & fTarfileNoTruncate) && truncate) {
1498 s_TruncateFile(m_FileName, m_StreamPos);
1499 }
1500 }
1501 m_OpenMode = eNone;
1502 m_Modified = false;
1503 m_BufferPos = 0;
1504 m_Bad = false;
1505 }
1506
1507
x_Open(EAction action)1508 void CTar::x_Open(EAction action)
1509 {
1510 _ASSERT(action);
1511 bool toend = false;
1512 // We can only open a named file here, and if an external stream is being
1513 // used as an archive, it must be explicitly repositioned by user's code
1514 // (outside of this class) before each archive operation.
1515 if (!m_FileStream) {
1516 if (!m_Modified) {
1517 // Check if Create() is followed by Append()
1518 if (m_OpenMode != eWO && action == eAppend
1519 && (m_Flags & fStreamPipeThrough)) {
1520 toend = true;
1521 }
1522 } else if (action != eAppend) {
1523 _ASSERT(m_OpenMode != eWO); // NB: Prev action != eCreate
1524 if (m_Flags & fStreamPipeThrough) {
1525 x_Flush(); // NB: resets m_Modified to false if successful
1526 }
1527 if (m_Modified) {
1528 if (!m_Bad) {
1529 TAR_POST(1, Warning,
1530 "Pending changes may be discarded"
1531 " upon reopen of in-stream archive");
1532 }
1533 m_Modified = false;
1534 }
1535 }
1536 m_Current.m_Name.erase();
1537 if (m_Bad || (m_Stream.rdstate() & ~NcbiEofbit) || !m_Stream.rdbuf()) {
1538 TAR_THROW(this, eOpen,
1539 "Archive I/O stream is in bad state");
1540 } else {
1541 m_OpenMode = EOpenMode(int(action) & eRW);
1542 _ASSERT(m_OpenMode != eNone);
1543 }
1544 if (action != eAppend && action != eInternal) {
1545 m_BufferPos = 0;
1546 m_StreamPos = 0;
1547 }
1548 #ifdef NCBI_OS_MSWIN
1549 if (&m_Stream == &cin) {
1550 HANDLE handle = (HANDLE) _get_osfhandle(_fileno(stdin));
1551 if (GetFileType(handle) != FILE_TYPE_DISK) {
1552 m_Flags |= fSlowSkipWithRead;
1553 }
1554 }
1555 #endif //NCBI_OS_MSWIN
1556 } else {
1557 _ASSERT(&m_Stream == m_FileStream);
1558 EOpenMode mode = EOpenMode(int(action) & eRW);
1559 _ASSERT(mode != eNone);
1560 if (action != eAppend && action != eCreate/*mode == eWO*/) {
1561 x_Flush();
1562 } else {
1563 m_Current.m_Name.erase();
1564 }
1565 if (mode == eWO || m_OpenMode < mode) {
1566 // Need to (re-)open the archive file
1567 if (m_OpenMode != eWO && action == eAppend) {
1568 toend = true;
1569 }
1570 x_Close(false); // NB: m_OpenMode = eNone; m_Modified = false
1571 m_StreamPos = 0;
1572 switch (mode) {
1573 case eWO:
1574 // WO access
1575 _ASSERT(action == eCreate);
1576 // Note that m_Modified is untouched
1577 m_FileStream->open(m_FileName.c_str(),
1578 IOS_BASE::out |
1579 IOS_BASE::binary | IOS_BASE::trunc);
1580 break;
1581 case eRO:
1582 // RO access
1583 _ASSERT(action != eCreate);
1584 m_FileStream->open(m_FileName.c_str(),
1585 IOS_BASE::in |
1586 IOS_BASE::binary);
1587 break;
1588 case eRW:
1589 // RW access
1590 _ASSERT(action != eCreate);
1591 m_FileStream->open(m_FileName.c_str(),
1592 IOS_BASE::in | IOS_BASE::out |
1593 IOS_BASE::binary);
1594 break;
1595 default:
1596 _TROUBLE;
1597 break;
1598 }
1599 if (!m_FileStream->is_open() || !m_FileStream->good()) {
1600 int x_errno = errno;
1601 TAR_THROW(this, eOpen,
1602 "Cannot open archive" + s_OSReason(x_errno));
1603 } else {
1604 m_OpenMode = mode;
1605 }
1606 } else {
1607 // No need to reopen the archive file
1608 _ASSERT(m_OpenMode > eWO && action != eCreate);
1609 if (m_Bad) {
1610 TAR_THROW(this, eOpen,
1611 "Archive file is in bad state");
1612 }
1613 if (action != eAppend && action != eInternal) {
1614 m_BufferPos = 0;
1615 m_StreamPos = 0;
1616 m_FileStream->seekg(0);
1617 }
1618 }
1619 }
1620 if (toend) {
1621 _ASSERT(!m_Modified && action == eAppend);
1622 // There may be an extra and unnecessary archive file scanning
1623 // if Append() follows Update() that caused no modifications;
1624 // but there is no way to distinguish this, currently :-/
1625 // Also, this sequence should be a real rarity in practice.
1626 x_ReadAndProcess(eAppend); // to position at logical EOF
1627 }
1628 _ASSERT(!(m_Stream.rdstate() & ~NcbiEofbit));
1629 _ASSERT(m_Stream.rdbuf());
1630 }
1631
1632
Extract(void)1633 unique_ptr<CTar::TEntries> CTar::Extract(void)
1634 {
1635 x_Open(eExtract);
1636 unique_ptr<TEntries> entries = x_ReadAndProcess(eExtract);
1637
1638 // Restore attributes of "postponed" directory entries
1639 if (m_Flags & fPreserveAll) {
1640 ITERATE(TEntries, e, *entries) {
1641 if (e->GetType() == CTarEntryInfo::eDir) {
1642 x_RestoreAttrs(*e, m_Flags);
1643 }
1644 }
1645 }
1646
1647 return entries;
1648 }
1649
1650
GetNextEntryInfo(void)1651 const CTarEntryInfo* CTar::GetNextEntryInfo(void)
1652 {
1653 if (m_Bad) {
1654 return 0;
1655 }
1656 if (m_OpenMode & eRO) {
1657 x_Skip(BLOCK_OF(m_Current.GetPosition(CTarEntryInfo::ePos_Data)
1658 + ALIGN_SIZE(m_Current.GetSize()) - m_StreamPos));
1659 } else {
1660 x_Open(eInternal);
1661 }
1662 unique_ptr<TEntries> temp = x_ReadAndProcess(eInternal);
1663 _ASSERT(temp && temp->size() < 2);
1664 if (temp->size() < 1) {
1665 return 0;
1666 }
1667 _ASSERT(m_Current == temp->front());
1668 return &m_Current;
1669 }
1670
1671
1672 // Return a pointer to buffer, which is always block-aligned, and reflect the
1673 // number of bytes available via the parameter. Return NULL when unable to
1674 // read (either EOF or other read error).
x_ReadArchive(size_t & n)1675 const char* CTar::x_ReadArchive(size_t& n)
1676 {
1677 _ASSERT(!OFFSET_OF(m_BufferPos) && m_BufferPos < m_BufferSize);
1678 _ASSERT(!OFFSET_OF(m_StreamPos));
1679 _ASSERT(n != 0);
1680 size_t nread;
1681 if (!m_BufferPos) {
1682 nread = 0;
1683 do {
1684 streamsize xread;
1685 IOS_BASE::iostate iostate = m_Stream.rdstate();
1686 if (!iostate) { // NB: good()
1687 #ifdef NCBI_COMPILER_MIPSPRO
1688 try {
1689 // Work around a bug in MIPSPro 7.3's streambuf::xsgetn()
1690 CNcbiIstream* is = dynamic_cast<CNcbiIstream*>(&m_Stream);
1691 _ASSERT(is);
1692 is->read (m_Buffer + nread,
1693 (streamsize)(m_BufferSize - nread));
1694 xread = is->gcount();
1695 if (xread > 0) {
1696 is->clear();
1697 }
1698 } catch (IOS_BASE::failure&) {
1699 xread = m_Stream.rdstate() & NcbiEofbit ? 0 : -1;
1700 }
1701 #else
1702 try {
1703 xread = m_Stream.rdbuf()->
1704 sgetn(m_Buffer + nread,
1705 (streamsize)(m_BufferSize - nread));
1706 # ifdef NCBI_COMPILER_WORKSHOP
1707 if (xread < 0) {
1708 xread = 0; // NB: WS6 is known to return -1 :-/
1709 }
1710 # endif //NCBI_COMPILER_WORKSHOP
1711 } catch (IOS_BASE::failure&) {
1712 xread = -1;
1713 }
1714 #endif //NCBI_COMPILER_MIPSPRO
1715 } else {
1716 xread = iostate == NcbiEofbit ? 0 : -1;
1717 }
1718 if (xread <= 0) {
1719 if (nread && (m_Flags & fDumpEntryHeaders)) {
1720 TAR_POST(57, xread ? Error : Warning,
1721 "Short read (" + NStr::NumericToString(nread)
1722 + (xread ? ")" : "): EOF"));
1723 }
1724 s_SetStateSafe(m_Stream, xread < 0 ? NcbiBadbit : NcbiEofbit);
1725 if (nread) {
1726 break;
1727 }
1728 return 0;
1729 }
1730 nread += (size_t) xread;
1731 } while (nread < m_BufferSize);
1732 memset(m_Buffer + nread, 0, m_BufferSize - nread);
1733 } else {
1734 nread = m_BufferSize - m_BufferPos;
1735 }
1736 if (n > nread) {
1737 n = nread;
1738 }
1739 size_t xpos = m_BufferPos;
1740 m_BufferPos += ALIGN_SIZE(n);
1741 _ASSERT(xpos < m_BufferPos && m_BufferPos <= m_BufferSize);
1742 if (m_BufferPos == m_BufferSize) {
1743 m_BufferPos = 0;
1744 if (!m_FileStream && (m_Flags & fStreamPipeThrough)) {
1745 size_t zbc = m_ZeroBlockCount;
1746 x_WriteArchive(m_BufferSize);
1747 m_StreamPos -= m_BufferSize;
1748 _ASSERT(m_BufferPos == 0);
1749 m_ZeroBlockCount = zbc;
1750 }
1751 }
1752 _ASSERT(!OFFSET_OF(m_BufferPos) && m_BufferPos < m_BufferSize);
1753 return m_Buffer + xpos;
1754 }
1755
1756
1757 // All partial internal (i.e. in-buffer) block writes are _not_ block-aligned;
1758 // but all external writes (i.e. when "src" is provided) _are_ block-aligned.
x_WriteArchive(size_t nwrite,const char * src)1759 void CTar::x_WriteArchive(size_t nwrite, const char* src)
1760 {
1761 if (!nwrite || m_Bad) {
1762 return;
1763 }
1764 m_Modified = true;
1765 m_ZeroBlockCount = 0;
1766 do {
1767 _ASSERT(m_BufferPos < m_BufferSize);
1768 size_t avail = m_BufferSize - m_BufferPos;
1769 if (avail > nwrite) {
1770 avail = nwrite;
1771 }
1772 size_t advance = avail;
1773 if (src && src != (const char*)(-1L)) {
1774 memcpy(m_Buffer + m_BufferPos, src, avail);
1775 size_t pad = ALIGN_SIZE(avail) - avail;
1776 memset(m_Buffer + m_BufferPos + avail, 0, pad);
1777 advance += pad;
1778 src += avail;
1779 }
1780 m_BufferPos += advance;
1781 _ASSERT(m_BufferPos <= m_BufferSize);
1782 if (m_BufferPos == m_BufferSize) {
1783 size_t nwritten = 0;
1784 do {
1785 int x_errno;
1786 streamsize xwritten;
1787 IOS_BASE::iostate iostate = m_Stream.rdstate();
1788 if (!(iostate & ~NcbiEofbit)) { // NB: good() OR eof()
1789 try {
1790 xwritten = m_Stream.rdbuf()
1791 ->sputn(m_Buffer + nwritten,
1792 (streamsize)(m_BufferSize - nwritten));
1793 } catch (IOS_BASE::failure&) {
1794 xwritten = -1;
1795 }
1796 if (xwritten > 0) {
1797 if (iostate) {
1798 m_Stream.clear();
1799 }
1800 x_errno = 0;
1801 } else {
1802 x_errno = errno;
1803 }
1804 } else {
1805 xwritten = -1;
1806 x_errno = 0;
1807 }
1808 if (xwritten <= 0) {
1809 m_Bad = true;
1810 s_SetStateSafe(m_Stream, NcbiBadbit);
1811 if (src != (const char*)(-1L)) {
1812 TAR_THROW(this, eWrite,
1813 "Archive write failed" +s_OSReason(x_errno));
1814 }
1815 TAR_POST(84, Error,
1816 "Archive write failed" + s_OSReason(x_errno));
1817 return;
1818 }
1819 nwritten += (size_t) xwritten;
1820 } while (nwritten < m_BufferSize);
1821 m_BufferPos = 0;
1822 }
1823 m_StreamPos += advance;
1824 nwrite -= avail;
1825 } while (nwrite);
1826 _ASSERT(m_BufferPos < m_BufferSize);
1827 }
1828
1829
1830 // PAX (Portable Archive Interchange) extraction support
1831
1832 // Define bitmasks for extended numeric information (must fit in perm mask)
1833 typedef enum {
1834 fPAXNone = 0,
1835 fPAXSparseGNU_1_0 = 1 << 0,
1836 fPAXSparse = 1 << 1,
1837 fPAXMtime = 1 << 2,
1838 fPAXAtime = 1 << 3,
1839 fPAXCtime = 1 << 4,
1840 fPAXSize = 1 << 5,
1841 fPAXUid = 1 << 6,
1842 fPAXGid = 1 << 7
1843 } EPAXBit;
1844 typedef unsigned int TPAXBits; // Bitwise-OR of EPAXBit(s)
1845
1846
1847 // Parse "len" bytes of "str" as numeric "valp[.fraq]"
s_ParsePAXNumeric(Uint8 * valp,const char * str,size_t len,string * fraq,EPAXBit assign)1848 static bool s_ParsePAXNumeric(Uint8* valp, const char* str, size_t len,
1849 string* fraq, EPAXBit assign)
1850 {
1851 _ASSERT(valp && str[len] == '\n');
1852 if (!isdigit((unsigned char)(*str))) {
1853 return false;
1854 }
1855 const char* p = (const char*) memchr(str, '.', len);
1856 if (!p) {
1857 p = str + len;
1858 } else if (fraq == (string*)(-1L)) {
1859 // no decimal point allowed
1860 return false;
1861 }
1862 Uint8 val;
1863 try {
1864 val = NStr::StringToUInt8(CTempString(str, (size_t)(p - str)));
1865 } catch (...) {
1866 return false;
1867 }
1868 if (*p == '.' && ++p != str + len) {
1869 len -= (size_t)(p - str);
1870 _ASSERT(len);
1871 for (size_t n = 0; n < len; ++n) {
1872 if (!isdigit((unsigned char) p[n])) {
1873 return false;
1874 }
1875 }
1876 if (assign && fraq) {
1877 fraq->assign(p, len);
1878 }
1879 } // else (*p == '\n' || !*p)
1880 if (assign) {
1881 *valp = val;
1882 }
1883 return true;
1884 }
1885
1886
s_AllLowerCase(const char * str,size_t len)1887 static bool s_AllLowerCase(const char* str, size_t len)
1888 {
1889 for (size_t i = 0; i < len; ++i) {
1890 unsigned char c = (unsigned char) str[i];
1891 if (!isalpha(c) || !islower(c))
1892 return false;
1893 }
1894 return true;
1895 }
1896
1897
1898 // Raise 10 to the power of n
ipow10(unsigned int n)1899 static Uint8 ipow10(unsigned int n)
1900 {
1901 _ASSERT(n < 10);
1902 // for small n this is the fastest
1903 return n ? 10 * ipow10(n - 1) : 1;
1904 }
1905
1906
1907 // NB: assumes fraq is all digits
s_FraqToNanosec(const string & fraq)1908 static long s_FraqToNanosec(const string& fraq)
1909 {
1910 size_t len = fraq.size();
1911 if (!len)
1912 return 0;
1913 long result;
1914 if (len < 10) {
1915 Uint8 temp = NStr::StringToUInt8(fraq,
1916 NStr::fConvErr_NoThrow |
1917 NStr::fConvErr_NoErrMessage);
1918 result = (long)(temp * ipow10((unsigned int)(9 - len)));
1919 } else {
1920 Uint8 temp = NStr::StringToUInt8(CTempString(fraq, 0, 10),
1921 NStr::fConvErr_NoThrow |
1922 NStr::fConvErr_NoErrMessage);
1923 result = (long)((temp + 5) / 10);
1924 }
1925 _ASSERT(0L <= result && result < 1000000000L);
1926 return result;
1927 }
1928
1929
x_ParsePAXData(const string & data)1930 CTar::EStatus CTar::x_ParsePAXData(const string& data)
1931 {
1932 Uint8 major = 0, minor = 0, size = 0, sparse = 0, uid = 0, gid = 0;
1933 Uint8 mtime = 0, atime = 0, ctime = 0, dummy = 0;
1934 string mtime_fraq, atime_fraq, ctime_fraq;
1935 string path, linkpath, name, uname, gname;
1936 string* nodot = (string*)(-1L);
1937 const struct SPAXParseTable {
1938 const char* key;
1939 Uint8* val; // non-null for numeric, else do as string
1940 string* str; // string or fraction part (if not -1)
1941 EPAXBit bit; // for numerics only
1942 } parser[] = {
1943 { "mtime", &mtime, &mtime_fraq, fPAXMtime }, // num w/fraq: assign
1944 { "atime", &atime, &atime_fraq, fPAXAtime },
1945 { "ctime", &ctime, &ctime_fraq, fPAXCtime },
1946 /*{ "dummy", &dummy, 0, fPAXSome },*/// num w/fraq: asg int
1947 /*{ "dummy", &dummy, &fraq or 0, fPAXNone },*/// num w/fraq: ck.only
1948 { "size", &size, nodot, fPAXSize }, // number: assign
1949 { "uid", &uid, nodot, fPAXUid },
1950 { "gid", &gid, nodot, fPAXGid },
1951 /*{ "dummy", &dummy, nodot, fPAXNone },*/// number: ck.only
1952 { "path", 0, &path, fPAXNone }, // string: assign
1953 { "linkpath", 0, &linkpath, fPAXNone },
1954 { "uname", 0, &uname, fPAXNone },
1955 { "gname", 0, &gname, fPAXNone },
1956 { "comment", 0, 0, fPAXNone }, // string: ck.only
1957 { "charset", 0, 0, fPAXNone },
1958 // GNU sparse extensions (NB: .size and .realsize don't go together)
1959 { "GNU.sparse.realsize", &sparse, nodot, fPAXSparse },
1960 { "GNU.sparse.major", &major, nodot, fPAXSparse },
1961 { "GNU.sparse.minor", &minor, nodot, fPAXSparse },
1962 { "GNU.sparse.size", &dummy, nodot, fPAXSparse },
1963 { "GNU.sparse.name", 0, &name, fPAXNone },
1964 // Other
1965 { "SCHILY.realsize", &sparse, nodot, fPAXSparse }
1966 };
1967 const char* s = data.c_str();
1968 TPAXBits parsed = fPAXNone;
1969 size_t l = data.size();
1970
1971 _ASSERT(l && l == strlen(s));
1972 do {
1973 unsigned long len;
1974 size_t klen, vlen;
1975 const char* e;
1976 char *k, *v;
1977
1978 if (!(e = (char*) memchr(s, '\n', l))) {
1979 e = s + l;
1980 }
1981 errno = 0;
1982 if (!isdigit((unsigned char)(*s)) || !(len = strtoul(s, &k, 10))
1983 || errno || s + len - 1 != e || (*k != ' ' && *k != '\t')
1984 || !(v = (char*) memchr(k, '=', (size_t)(e - k))) // NB: k < e
1985 || !(klen = (size_t)(v++ - ++k))
1986 || memchr(k, ' ', klen) || memchr(k, '\t', klen)
1987 || !(vlen = (size_t)(e - v))) {
1988 TAR_POST(74, Error,
1989 "Skipping malformed PAX data");
1990 return eFailure;
1991 }
1992 bool done = false;
1993 for (size_t n = 0; n < sizeof(parser) / sizeof(parser[0]); ++n) {
1994 if (strlen(parser[n].key) == klen
1995 && memcmp(parser[n].key, k, klen) == 0) {
1996 if (!parser[n].val) {
1997 if (parser[n].str) {
1998 parser[n].str->assign(v, vlen);
1999 }
2000 } else if (!s_ParsePAXNumeric(parser[n].val, v, vlen,
2001 parser[n].str, parser[n].bit)) {
2002 TAR_POST(75, Error,
2003 "Ignoring bad numeric \""
2004 + CTempString(v, vlen)
2005 + "\" in PAX value \""
2006 + CTempString(k, klen) + '"');
2007 } else {
2008 parsed |= parser[n].bit;
2009 }
2010 done = true;
2011 break;
2012 }
2013 }
2014 if (!done && s_AllLowerCase(k, klen)/*&& !memchr(k, '.', klen)*/) {
2015 TAR_POST(76, Warning,
2016 "Ignoring unrecognized PAX value \""
2017 + CTempString(k, klen) + '"');
2018 }
2019 if (!*e) {
2020 break;
2021 }
2022 l -= len;
2023 s = ++e;
2024 _ASSERT(l == strlen(s));
2025 } while (l);
2026
2027 if ((parsed & fPAXSparse) && (sparse | dummy)) {
2028 if (sparse && dummy && sparse != dummy) {
2029 TAR_POST(95, Warning,
2030 "Ignoring PAX GNU sparse file size "
2031 + NStr::NumericToString(dummy)
2032 + " when real size "
2033 + NStr::NumericToString(sparse)
2034 + " is also present");
2035 } else if (!dummy && major == 1 && minor == 0) {
2036 if (!(m_Flags & fSparseUnsupported)) {
2037 if (!name.empty()) {
2038 if (!path.empty()) {
2039 TAR_POST(96, Warning,
2040 "Replacing PAX file name \"" + path
2041 + "\" with GNU sparse file name \"" + name
2042 + '"');
2043 }
2044 path.swap(name);
2045 }
2046 parsed |= fPAXSparseGNU_1_0;
2047 }
2048 _ASSERT(sparse);
2049 } else if (!sparse) {
2050 sparse = dummy;
2051 }
2052 size = sparse;
2053 }
2054
2055 m_Current.m_Name.swap(path);
2056 m_Current.m_LinkName.swap(linkpath);
2057 m_Current.m_UserName.swap(uname);
2058 m_Current.m_GroupName.swap(gname);
2059 m_Current.m_Stat.mtime_nsec = s_FraqToNanosec(mtime_fraq);
2060 m_Current.m_Stat.atime_nsec = s_FraqToNanosec(atime_fraq);
2061 m_Current.m_Stat.ctime_nsec = s_FraqToNanosec(ctime_fraq);
2062 m_Current.m_Stat.orig.st_mtime = (time_t) mtime;
2063 m_Current.m_Stat.orig.st_atime = (time_t) atime;
2064 m_Current.m_Stat.orig.st_ctime = (time_t) ctime;
2065 m_Current.m_Stat.orig.st_size = (off_t) size;
2066 m_Current.m_Stat.orig.st_uid = (uid_t) uid;
2067 m_Current.m_Stat.orig.st_gid = (gid_t) gid;
2068 m_Current.m_Pos = sparse; // real (expanded) file size
2069
2070 m_Current.m_Stat.orig.st_mode = (mode_t) parsed;
2071 return eContinue;
2072 }
2073
2074
s_Dump(const string & file,Uint8 pos,size_t recsize,const string & entryname,const SHeader * h,ETar_Format fmt,Uint8 datasize)2075 static void s_Dump(const string& file, Uint8 pos, size_t recsize,
2076 const string& entryname, const SHeader* h,
2077 ETar_Format fmt, Uint8 datasize)
2078 {
2079 _ASSERT(!OFFSET_OF(pos));
2080 EDiagSev level = SetDiagPostLevel(eDiag_Info);
2081 Uint8 blocks = BLOCK_OF(ALIGN_SIZE(datasize));
2082 ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2083 + s_DumpHeader(h, fmt) + '\n'
2084 + (blocks
2085 && (h->typeflag[0] != 'S'
2086 || fmt != eTar_OldGNU
2087 || !*h->gnu.contind)
2088 ? "Blocks of data: " + NStr::NumericToString(blocks) + '\n'
2089 : kEmptyStr));
2090 SetDiagPostLevel(level);
2091 }
2092
2093
s_DumpSparse(const string & file,Uint8 pos,size_t recsize,const string & entryname,const SHeader * h,const char * contind,Uint8 datasize)2094 static void s_DumpSparse(const string& file, Uint8 pos, size_t recsize,
2095 const string& entryname, const SHeader* h,
2096 const char* contind, Uint8 datasize)
2097 {
2098 _ASSERT(!OFFSET_OF(pos));
2099 EDiagSev level = SetDiagPostLevel(eDiag_Info);
2100 Uint8 blocks = !*contind ? BLOCK_OF(ALIGN_SIZE(datasize)) : 0;
2101 ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2102 + "GNU sparse file map header (cont'd):\n"
2103 + s_DumpSparseMap(h, (const char*) h, contind) + '\n'
2104 + (blocks
2105 ? "Blocks of data: " + NStr::NumericToString(blocks) + '\n'
2106 : kEmptyStr));
2107 SetDiagPostLevel(level);
2108 }
2109
2110
s_DumpSparse(const string & file,Uint8 pos,size_t recsize,const string & entryname,const vector<pair<Uint8,Uint8>> & bmap)2111 static void s_DumpSparse(const string& file, Uint8 pos, size_t recsize,
2112 const string& entryname,
2113 const vector< pair<Uint8, Uint8> >& bmap)
2114 {
2115 _ASSERT(!OFFSET_OF(pos));
2116 EDiagSev level = SetDiagPostLevel(eDiag_Info);
2117 ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2118 + "PAX GNU/1.0 sparse file map data:\n"
2119 + s_DumpSparseMap(bmap) + '\n');
2120 SetDiagPostLevel(level);
2121 }
2122
2123
s_DumpZero(const string & file,Uint8 pos,size_t recsize,size_t zeroblock_count,bool eot=false)2124 static void s_DumpZero(const string& file, Uint8 pos, size_t recsize,
2125 size_t zeroblock_count, bool eot = false)
2126 {
2127 _ASSERT(!OFFSET_OF(pos));
2128 EDiagSev level = SetDiagPostLevel(eDiag_Info);
2129 ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, kEmptyStr)
2130 + (zeroblock_count
2131 ? "Zero block " + NStr::NumericToString(zeroblock_count)
2132 : (eot ? "End-Of-Tape" : "End-Of-File")) + '\n');
2133 SetDiagPostLevel(level);
2134 }
2135
2136
s_IsOctal(char c)2137 static inline bool s_IsOctal(char c)
2138 {
2139 return '0' <= c && c <= '7' ? true : false;
2140 }
2141
2142
x_ReadEntryInfo(bool dump,bool pax)2143 CTar::EStatus CTar::x_ReadEntryInfo(bool dump, bool pax)
2144 {
2145 // Read block
2146 const TBlock* block;
2147 size_t nread = sizeof(block->buffer);
2148 _ASSERT(sizeof(*block) == BLOCK_SIZE/*== sizeof(block->buffer)*/);
2149 if (!(block = (const TBlock*) x_ReadArchive(nread))) {
2150 return eEOF;
2151 }
2152 if (nread != BLOCK_SIZE) {
2153 TAR_THROW(this, eRead,
2154 "Unexpected EOF in archive");
2155 }
2156 const SHeader* h = &block->header;
2157
2158 // Check header format
2159 ETar_Format fmt = eTar_Unknown;
2160 if (memcmp(h->magic, "ustar", 6) == 0) {
2161 if ((h->star.prefix[sizeof(h->star.prefix) - 1] == '\0'
2162 && s_IsOctal(h->star.atime[0]) && h->star.atime[0] == ' '
2163 && s_IsOctal(h->star.ctime[0]) && h->star.ctime[0] == ' ')
2164 || strcmp(block->buffer + BLOCK_SIZE - 4, "tar") == 0) {
2165 fmt = eTar_Star;
2166 } else {
2167 fmt = pax ? eTar_Posix : eTar_Ustar;
2168 }
2169 } else if (memcmp(h->magic, "ustar ", 8) == 0) {
2170 // Here the magic is protruded into the adjacent version field
2171 fmt = eTar_OldGNU;
2172 } else if (memcmp(h->magic, "\0\0\0\0\0", 6) == 0) {
2173 // We'll use this also to speedup corruption checks w/checksum
2174 fmt = eTar_Legacy;
2175 } else {
2176 TAR_THROW_EX(this, eUnsupportedTarFormat,
2177 "Unrecognized header format", h, fmt);
2178 }
2179
2180 Uint8 val;
2181 // Get checksum from header
2182 if (!s_OctalToNum(val, h->checksum, sizeof(h->checksum))) {
2183 // We must allow all zero bytes here in case of pad/zero blocks
2184 bool corrupt;
2185 if (fmt == eTar_Legacy) {
2186 corrupt = false;
2187 for (size_t i = 0; i < sizeof(block->buffer); ++i) {
2188 if (block->buffer[i]) {
2189 corrupt = true;
2190 break;
2191 }
2192 }
2193 } else {
2194 corrupt = true;
2195 }
2196 if (corrupt) {
2197 TAR_THROW_EX(this, eUnsupportedTarFormat,
2198 "Bad checksum", h, fmt);
2199 }
2200 m_StreamPos += BLOCK_SIZE; // NB: nread
2201 return eZeroBlock;
2202 }
2203 int checksum = int(val);
2204
2205 // Compute both signed and unsigned checksums (for compatibility)
2206 int ssum = 0;
2207 unsigned int usum = 0;
2208 const char* p = block->buffer;
2209 for (size_t i = 0; i < sizeof(block->buffer); ++i) {
2210 ssum += *p;
2211 usum += (unsigned char)(*p);
2212 p++;
2213 }
2214 p = h->checksum;
2215 for (size_t j = 0; j < sizeof(h->checksum); ++j) {
2216 ssum -= *p - ' ';
2217 usum -= (unsigned char)(*p) - ' ';
2218 p++;
2219 }
2220
2221 // Compare checksum(s)
2222 if (checksum != ssum && (unsigned int) checksum != usum) {
2223 string message = "Header checksum failed";
2224 if (m_Flags & fDumpEntryHeaders) {
2225 message += ", expected ";
2226 if (usum != (unsigned int) ssum) {
2227 message += "either ";
2228 }
2229 if (usum > 7) {
2230 message += "0";
2231 }
2232 message += NStr::NumericToString(usum, 0, 8);
2233 if (usum != (unsigned int) ssum) {
2234 message += " or ";
2235 if ((unsigned int) ssum > 7) {
2236 message += "0";
2237 }
2238 message += NStr::NumericToString((unsigned int) ssum, 0, 8);
2239 }
2240 }
2241 TAR_THROW_EX(this, eChecksum,
2242 message, h, fmt);
2243 }
2244
2245 // Set all info members now (thus, validating the header block)
2246
2247 m_Current.m_HeaderSize = BLOCK_SIZE;
2248 unsigned char tflag = toupper((unsigned char) h->typeflag[0]);
2249
2250 // Name
2251 if (m_Current.GetName().empty()) {
2252 if ((fmt & eTar_Ustar) && h->prefix[0] && tflag != 'X') {
2253 const char* prefix = fmt != eTar_Star ? h->prefix : h->star.prefix;
2254 size_t pfxlen = fmt != eTar_Star
2255 ? s_Length(h->prefix, sizeof(h->prefix))
2256 : s_Length(h->star.prefix, h->typeflag[0] == 'S'
2257 ? 107 : sizeof(h->star.prefix));
2258 m_Current.m_Name
2259 = CDirEntry::ConcatPath(string(prefix, pfxlen),
2260 string(h->name,
2261 s_Length(h->name,
2262 sizeof(h->name))));
2263 } else {
2264 // Name prefix cannot be used
2265 m_Current.m_Name.assign(h->name,
2266 s_Length(h->name, sizeof(h->name)));
2267 }
2268 }
2269
2270 // Mode
2271 if (!s_OctalToNum(val, h->mode, sizeof(h->mode))
2272 && (val || h->typeflag[0] != 'V')) {
2273 TAR_THROW_EX(this, eUnsupportedTarFormat,
2274 "Bad entry mode", h, fmt);
2275 }
2276 m_Current.m_Stat.orig.st_mode = (mode_t) val;
2277
2278 // User Id
2279 if (!s_DecodeUint8(val, h->uid, sizeof(h->uid))
2280 && (val || h->typeflag[0] != 'V')) {
2281 TAR_THROW_EX(this, eUnsupportedTarFormat,
2282 "Bad user ID", h, fmt);
2283 }
2284 m_Current.m_Stat.orig.st_uid = (uid_t) val;
2285
2286 // Group Id
2287 if (!s_DecodeUint8(val, h->gid, sizeof(h->gid))
2288 && (val || h->typeflag[0] != 'V')) {
2289 TAR_THROW_EX(this, eUnsupportedTarFormat,
2290 "Bad group ID", h, fmt);
2291 }
2292 m_Current.m_Stat.orig.st_gid = (gid_t) val;
2293
2294 // Size
2295 if (!s_DecodeUint8(val, h->size, sizeof(h->size))
2296 && (val || h->typeflag[0] != 'V')) {
2297 TAR_THROW_EX(this, eUnsupportedTarFormat,
2298 "Bad entry size", h, fmt);
2299 }
2300 m_Current.m_Stat.orig.st_size = (off_t) val;
2301 if (m_Current.GetSize() != val) {
2302 ERR_POST_ONCE(Critical << "CAUTION:"
2303 " ***"
2304 " This run-time may not support large TAR entries"
2305 " (have you built it --with-lfs?)"
2306 " ***");
2307 }
2308
2309 // Modification time
2310 if (!s_OctalToNum(val, h->mtime, sizeof(h->mtime))) {
2311 TAR_THROW_EX(this, eUnsupportedTarFormat,
2312 "Bad modification time", h, fmt);
2313 }
2314 m_Current.m_Stat.orig.st_mtime = (time_t) val;
2315
2316 if (fmt == eTar_OldGNU || (fmt & eTar_Ustar)) {
2317 // User name
2318 m_Current.m_UserName.assign(h->uname,
2319 s_Length(h->uname, sizeof(h->uname)));
2320 // Group name
2321 m_Current.m_GroupName.assign(h->gname,
2322 s_Length(h->gname,sizeof(h->gname)));
2323 }
2324
2325 if (fmt == eTar_OldGNU || fmt == eTar_Star) {
2326 // GNU times may not be valid so checks are relaxed
2327 const char* time;
2328 size_t tlen;
2329 time = fmt == eTar_Star ? h->star.atime : h->gnu.atime;
2330 tlen = fmt == eTar_Star ? sizeof(h->star.atime) : sizeof(h->gnu.atime);
2331 if (!s_OctalToNum(val, time, tlen)) {
2332 if (fmt == eTar_Star || memcchr(time, '\0', tlen)) {
2333 TAR_THROW_EX(this, eUnsupportedTarFormat,
2334 "Bad last access time", h, fmt);
2335 }
2336 } else {
2337 m_Current.m_Stat.orig.st_atime = (time_t) val;
2338 }
2339 time = fmt == eTar_Star ? h->star.ctime : h->gnu.ctime;
2340 tlen = fmt == eTar_Star ? sizeof(h->star.ctime) : sizeof(h->gnu.ctime);
2341 if (!s_OctalToNum(val, time, tlen)) {
2342 if (fmt == eTar_Star || memcchr(time, '\0', tlen)) {
2343 TAR_THROW_EX(this, eUnsupportedTarFormat,
2344 "Bad creation time", h, fmt);
2345 }
2346 } else {
2347 m_Current.m_Stat.orig.st_ctime = (time_t) val;
2348 }
2349 }
2350
2351 // Entry type
2352 switch (h->typeflag[0]) {
2353 case '\0':
2354 case '0':
2355 if (!(fmt & eTar_Ustar) && fmt != eTar_OldGNU) {
2356 size_t namelen = s_Length(h->name, sizeof(h->name));
2357 if (namelen && h->name[namelen - 1] == '/') {
2358 m_Current.m_Type = CTarEntryInfo::eDir;
2359 m_Current.m_Stat.orig.st_size = 0;
2360 break;
2361 }
2362 }
2363 m_Current.m_Type = CTarEntryInfo::eFile;
2364 break;
2365 case '\1':
2366 case '\2':
2367 case '1':
2368 case '2':
2369 m_Current.m_Type = (h->typeflag[0] == '\2' || h->typeflag[0] == '2'
2370 ? CTarEntryInfo::eSymLink
2371 : CTarEntryInfo::eHardLink);
2372 m_Current.m_LinkName.assign(h->linkname,
2373 s_Length(h->linkname,sizeof(h->linkname)));
2374 if (m_Current.GetSize()) {
2375 if (m_Current.GetType() == CTarEntryInfo::eSymLink) {
2376 // Mandatory to ignore
2377 m_Current.m_Stat.orig.st_size = 0;
2378 } else if (fmt != eTar_Posix) {
2379 TAR_POST(77, Warning,
2380 "Non-zero hard-link size ("
2381 + NStr::NumericToString(m_Current.GetSize())
2382 + ") is ignored (non-PAX)");
2383 m_Current.m_Stat.orig.st_size = 0;
2384 } // else POSIX (re-)allowed hard links to be followed by file data
2385 }
2386 break;
2387 case '3':
2388 case '4':
2389 m_Current.m_Type = (h->typeflag[0] == '3'
2390 ? CTarEntryInfo::eCharDev
2391 : CTarEntryInfo::eBlockDev);
2392 if (!s_OctalToNum(val, h->devminor, sizeof(h->devminor))) {
2393 TAR_THROW_EX(this, eUnsupportedTarFormat,
2394 "Bad device minor number", h, fmt);
2395 }
2396 usum = (unsigned int) val; // set aside
2397 if (!s_OctalToNum(val, h->devmajor, sizeof(h->devmajor))) {
2398 TAR_THROW_EX(this, eUnsupportedTarFormat,
2399 "Bad device major number", h, fmt);
2400 }
2401 #ifdef makedev
2402 m_Current.m_Stat.orig.st_rdev = makedev((unsigned int) val, usum);
2403 #else
2404 if (sizeof(int) >= 4 && sizeof(m_Current.m_Stat.orig.st_rdev) >= 4) {
2405 *((unsigned int*) &m_Current.m_Stat.orig.st_rdev) =
2406 (unsigned int)((val << 16) | usum);
2407 }
2408 #endif //makedev
2409 m_Current.m_Stat.orig.st_size = 0;
2410 break;
2411 case '5':
2412 m_Current.m_Type = CTarEntryInfo::eDir;
2413 m_Current.m_Stat.orig.st_size = 0;
2414 break;
2415 case '6':
2416 m_Current.m_Type = CTarEntryInfo::ePipe;
2417 m_Current.m_Stat.orig.st_size = 0;
2418 break;
2419 case '7':
2420 ERR_POST_ONCE(Critical << "CAUTION:"
2421 " *** Contiguous TAR entries processed as regular files"
2422 " ***");
2423 m_Current.m_Type = CTarEntryInfo::eFile;
2424 break;
2425 case 'K':
2426 case 'L':
2427 case 'S':
2428 case 'x':
2429 case 'X':
2430 if ((tflag == 'X' && (fmt & eTar_Ustar)) ||
2431 (tflag != 'X' && fmt == eTar_OldGNU) ||
2432 (tflag == 'S' && fmt == eTar_Star)) {
2433 // Assign actual type
2434 switch (tflag) {
2435 case 'K':
2436 m_Current.m_Type = CTarEntryInfo::eGNULongLink;
2437 break;
2438 case 'L':
2439 m_Current.m_Type = CTarEntryInfo::eGNULongName;
2440 break;
2441 case 'S':
2442 m_Current.m_Type = CTarEntryInfo::eSparseFile;
2443 break;
2444 case 'X':
2445 if (pax) {
2446 TAR_POST(78, Warning,
2447 "Repetitious PAX headers,"
2448 " archive may be corrupt");
2449 }
2450 fmt = eTar_Posix; // upgrade
2451 m_Current.m_Type = CTarEntryInfo::ePAXHeader;
2452 break;
2453 default:
2454 _TROUBLE;
2455 break;
2456 }
2457
2458 // Dump header
2459 size_t hsize = (size_t) m_Current.GetSize();
2460 if (dump) {
2461 s_Dump(m_FileName, m_StreamPos, m_BufferSize,
2462 m_Current.GetName(), h, fmt, hsize);
2463 }
2464 m_StreamPos += BLOCK_SIZE; // NB: nread
2465
2466 if (m_Current.m_Type == CTarEntryInfo::eSparseFile) {
2467 const char* realsize = fmt != eTar_Star
2468 ? h->gnu.realsize : h->star.prefix + 107;
2469 size_t realsizelen = fmt != eTar_Star
2470 ? sizeof(h->gnu.realsize) : 12;
2471 // Real file size (if present)
2472 if (!s_DecodeUint8(val, realsize, realsizelen)) {
2473 val = 0;
2474 }
2475 if (fmt == eTar_Star) {
2476 // Archive file size includes sparse map, and already valid
2477 m_Current.m_Pos = val; // NB: real (expanded) file size
2478 return eSuccess;
2479 }
2480 // Skip all GNU sparse file headers (they are not counted
2481 // towards the sparse file size in the archive ("hsize")!)
2482 const char* contind = h->gnu.contind;
2483 while (*contind) {
2484 _ASSERT(nread == BLOCK_SIZE);
2485 if (!(block = (const TBlock*) x_ReadArchive(nread))
2486 || nread != BLOCK_SIZE) {
2487 TAR_THROW(this, eRead,
2488 "Unexpected EOF in GNU sparse file map"
2489 " extended header");
2490 }
2491 h = &block->header;
2492 contind = block->buffer + (24 * 21)/*504*/;
2493 if (dump) {
2494 s_DumpSparse(m_FileName, m_StreamPos, m_BufferSize,
2495 m_Current.GetName(), h, contind, hsize);
2496 }
2497 m_Current.m_HeaderSize += BLOCK_SIZE;
2498 m_StreamPos += BLOCK_SIZE; // NB: nread
2499 }
2500 m_Current.m_Pos = val; // NB: real (expanded) file size
2501 return eSuccess;
2502 }
2503
2504 // Read in the extended header information
2505 val = ALIGN_SIZE(hsize);
2506 string data;
2507 while (hsize) {
2508 nread = hsize;
2509 const char* xbuf = x_ReadArchive(nread);
2510 if (!xbuf) {
2511 TAR_THROW(this, eRead,
2512 string("Unexpected EOF in ") +
2513 (m_Current.GetType()
2514 == CTarEntryInfo::ePAXHeader
2515 ? "PAX data" :
2516 m_Current.GetType()
2517 == CTarEntryInfo::eGNULongName
2518 ? "long name"
2519 : "long link"));
2520 }
2521 _ASSERT(nread);
2522 data.append(xbuf, nread);
2523 hsize -= nread;
2524 m_StreamPos += ALIGN_SIZE(nread);
2525 }
2526 if (m_Current.GetType() != CTarEntryInfo::ePAXHeader) {
2527 // Make sure there's no embedded '\0'(s)
2528 data.resize(strlen(data.c_str()));
2529 }
2530 if (dump) {
2531 EDiagSev level = SetDiagPostLevel(eDiag_Info);
2532 ERR_POST(Info << '\n' + s_PositionAsString(m_FileName,
2533 m_StreamPos - val,
2534 m_BufferSize,
2535 m_Current.GetName())
2536 + (m_Current.GetType() == CTarEntryInfo::ePAXHeader
2537 ? "PAX data:\n" :
2538 m_Current.GetType() == CTarEntryInfo::eGNULongName
2539 ? "Long name: \""
2540 : "Long link name: \"")
2541 + NStr::PrintableString(data,
2542 m_Current.GetType()
2543 == CTarEntryInfo::ePAXHeader
2544 ? NStr::fNewLine_Passthru
2545 : NStr::fNewLine_Quote)
2546 + (m_Current.GetType() == CTarEntryInfo::ePAXHeader
2547 ? data.size() && data[data.size() - 1] == '\n'
2548 ? kEmptyStr : "\n" : "\"\n"));
2549 SetDiagPostLevel(level);
2550 }
2551 // Reset size because the data blocks have been all read
2552 m_Current.m_HeaderSize += val;
2553 m_Current.m_Stat.orig.st_size = 0;
2554 if (!val || !data.size()) {
2555 TAR_POST(79, Error,
2556 "Skipping " + string(val ? "empty" : "zero-sized")
2557 + " extended header data");
2558 return eFailure;
2559 }
2560 switch (m_Current.GetType()) {
2561 case CTarEntryInfo::ePAXHeader:
2562 return x_ParsePAXData(data);
2563 case CTarEntryInfo::eGNULongName:
2564 m_Current.m_Name.swap(data);
2565 return eContinue;
2566 case CTarEntryInfo::eGNULongLink:
2567 m_Current.m_LinkName.swap(data);
2568 return eContinue;
2569 default:
2570 _TROUBLE;
2571 break;
2572 }
2573 return eFailure;
2574 }
2575 /*FALLTHRU*/
2576 case 'V':
2577 case 'I':
2578 if (h->typeflag[0] == 'V' || h->typeflag[0] == 'I') {
2579 // Safety for no data to actually follow
2580 m_Current.m_Stat.orig.st_size = 0;
2581 if (h->typeflag[0] == 'V') {
2582 m_Current.m_Type = CTarEntryInfo::eVolHeader;
2583 break;
2584 }
2585 }
2586 /*FALLTHRU*/
2587 default:
2588 m_Current.m_Type = CTarEntryInfo::eUnknown;
2589 break;
2590 }
2591
2592 if (dump) {
2593 s_Dump(m_FileName, m_StreamPos, m_BufferSize,
2594 m_Current.GetName(), h, fmt, m_Current.GetSize());
2595 }
2596 m_StreamPos += BLOCK_SIZE; // NB: nread
2597
2598 return eSuccess;
2599 }
2600
2601
sx_Signature(TBlock * block)2602 static inline void sx_Signature(TBlock* block)
2603 {
2604 _ASSERT(sizeof(block->header) + 4 < sizeof(block->buffer));
2605 memcpy(block->buffer + sizeof(*block) - 4, "NCBI", 4);
2606 }
2607
2608
x_WriteEntryInfo(const string & name)2609 void CTar::x_WriteEntryInfo(const string& name)
2610 {
2611 // Prepare block info
2612 TBlock block;
2613 _ASSERT(sizeof(block) == BLOCK_SIZE/*== sizeof(block.buffer)*/);
2614 memset(block.buffer, 0, sizeof(block.buffer));
2615 SHeader* h = &block.header;
2616
2617 // Name(s) ('\0'-terminated if fit entirely, otherwise not)
2618 if (!x_PackCurrentName(h, false)) {
2619 TAR_THROW(this, eNameTooLong,
2620 "Name '" + m_Current.GetName()
2621 + "' too long in entry '" + name + '\'');
2622 }
2623
2624 CTarEntryInfo::EType type = m_Current.GetType();
2625
2626 if (type == CTarEntryInfo::eSymLink && !x_PackCurrentName(h, true)) {
2627 TAR_THROW(this, eNameTooLong,
2628 "Link '" + m_Current.GetLinkName()
2629 + "' too long in entry '" + name + '\'');
2630 }
2631
2632 /* NOTE: Although some sources on the Internet indicate that all but size,
2633 * mtime, and version numeric fields are '\0'-terminated, we could not
2634 * confirm that with existing tar programs, all of which we saw using
2635 * either '\0' or ' '-terminated values in both size and mtime fields.
2636 * For the ustar archive we have found a document that definitively tells
2637 * that _all_ numeric fields are '\0'-terminated, and that they can keep
2638 * up to "sizeof(field)-1" octal digits. We follow it here.
2639 * However, GNU and ustar checksums seem to be different indeed, so we
2640 * don't use a trailing space for ustar, but for GNU only.
2641 */
2642
2643 // Mode
2644 if (!s_NumToOctal(m_Current.GetMode(), h->mode, sizeof(h->mode) - 1)) {
2645 TAR_THROW(this, eMemory,
2646 "Cannot store file mode");
2647 }
2648
2649 // Update format as we go
2650 ETar_Format fmt = eTar_Ustar;
2651 int ok;
2652
2653 // User ID
2654 ok = s_EncodeUint8(m_Current.GetUserId(), h->uid, sizeof(h->uid) - 1);
2655 if (!ok) {
2656 TAR_THROW(this, eMemory,
2657 "Cannot store user ID");
2658 }
2659 if (ok < 0) {
2660 fmt = eTar_OldGNU;
2661 }
2662
2663 // Group ID
2664 ok = s_EncodeUint8(m_Current.GetGroupId(), h->gid, sizeof(h->gid) - 1);
2665 if (!ok) {
2666 TAR_THROW(this, eMemory,
2667 "Cannot store group ID");
2668 }
2669 if (ok < 0) {
2670 fmt = eTar_OldGNU;
2671 }
2672
2673 // Size
2674 _ASSERT(type == CTarEntryInfo::eFile || m_Current.GetSize() == 0);
2675 ok = s_EncodeUint8(m_Current.GetSize(), h->size, sizeof(h->size) - 1);
2676 if (!ok) {
2677 TAR_THROW(this, eMemory,
2678 "Cannot store file size");
2679 }
2680 if (ok < 0) {
2681 fmt = eTar_OldGNU;
2682 }
2683
2684 if (fmt != eTar_Ustar && h->prefix[0]) {
2685 // Cannot downgrade to reflect encoding
2686 fmt = eTar_Ustar;
2687 }
2688
2689 // Modification time
2690 if (!s_NumToOctal(m_Current.GetModificationTime(),
2691 h->mtime, sizeof(h->mtime) - 1)) {
2692 TAR_THROW(this, eMemory,
2693 "Cannot store modification time");
2694 }
2695
2696 bool device = false;
2697 // Type (GNU extension for SymLink)
2698 switch (type) {
2699 case CTarEntryInfo::eFile:
2700 h->typeflag[0] = '0';
2701 break;
2702 case CTarEntryInfo::eSymLink:
2703 h->typeflag[0] = '2';
2704 break;
2705 case CTarEntryInfo::eCharDev:
2706 case CTarEntryInfo::eBlockDev:
2707 h->typeflag[0] = type == CTarEntryInfo::eCharDev ? '3' : '4';
2708 if (!s_NumToOctal(m_Current.GetMajor(),
2709 h->devmajor, sizeof(h->devmajor) - 1)) {
2710 TAR_THROW(this, eMemory,
2711 "Cannot store major number");
2712 }
2713 if (!s_NumToOctal(m_Current.GetMinor(),
2714 h->devminor, sizeof(h->devminor) - 1)) {
2715 TAR_THROW(this, eMemory,
2716 "Cannot store minor number");
2717 }
2718 device = true;
2719 break;
2720 case CTarEntryInfo::eDir:
2721 h->typeflag[0] = '5';
2722 break;
2723 case CTarEntryInfo::ePipe:
2724 h->typeflag[0] = '6';
2725 break;
2726 default:
2727 _TROUBLE;
2728 TAR_THROW(this, eUnsupportedEntryType,
2729 "Do not know how to archive entry '" + name
2730 + "' of type #" + NStr::IntToString(int(type))
2731 + ": Internal error");
2732 /*NOTREACHED*/
2733 break;
2734 }
2735
2736 // User and group
2737 const string& usr = m_Current.GetUserName();
2738 size_t len = usr.size();
2739 if (len < sizeof(h->uname)) {
2740 memcpy(h->uname, usr.c_str(), len);
2741 }
2742 const string& grp = m_Current.GetGroupName();
2743 len = grp.size();
2744 if (len < sizeof(h->gname)) {
2745 memcpy(h->gname, grp.c_str(), len);
2746 }
2747
2748 // Device numbers to complete the ustar header protocol (all fields ok)
2749 if (!device && fmt != eTar_OldGNU) {
2750 s_NumToOctal(0, h->devmajor, sizeof(h->devmajor) - 1);
2751 s_NumToOctal(0, h->devminor, sizeof(h->devminor) - 1);
2752 }
2753
2754 if (fmt != eTar_OldGNU) {
2755 // Magic
2756 strcpy(h->magic, "ustar");
2757 // Version (EXCEPTION: not '\0' terminated)
2758 memcpy(h->version, "00", 2);
2759 } else {
2760 // NB: Old GNU magic protrudes into adjacent version field
2761 memcpy(h->magic, "ustar ", 8); // 2 spaces and '\0'-terminated
2762 }
2763
2764 // NCBI signature if allowed
2765 if (!(m_Flags & fStandardHeaderOnly)) {
2766 sx_Signature(&block);
2767 }
2768
2769 // Final step: checksumming
2770 if (!s_TarChecksum(&block, fmt == eTar_OldGNU ? true : false)) {
2771 TAR_THROW(this, eMemory,
2772 "Cannot store checksum");
2773 }
2774
2775 // Write header
2776 x_WriteArchive(sizeof(block.buffer), block.buffer);
2777 m_Current.m_HeaderSize = (streamsize)(m_StreamPos - m_Current.m_Pos);
2778
2779 Checkpoint(m_Current, true/*write*/);
2780 }
2781
2782
x_PackCurrentName(STarHeader * h,bool link)2783 bool CTar::x_PackCurrentName(STarHeader* h, bool link)
2784 {
2785 const string& name = link ? m_Current.GetLinkName() : m_Current.GetName();
2786 size_t size = link ? sizeof(h->linkname) : sizeof(h->name);
2787 char* dst = link ? h->linkname : h->name;
2788 const char* src = name.c_str();
2789 size_t len = name.size();
2790
2791 if (len <= size) {
2792 // Name fits!
2793 memcpy(dst, src, len);
2794 return true;
2795 }
2796
2797 bool packed = false;
2798 if (!link && len <= sizeof(h->prefix) + 1 + sizeof(h->name)) {
2799 // Try to split the long name into a prefix and a short name (POSIX)
2800 size_t i = len;
2801 if (i > sizeof(h->prefix)) {
2802 i = sizeof(h->prefix);
2803 }
2804 while (i > 0 && src[--i] != '/');
2805 if (i && len - i <= sizeof(h->name) + 1) {
2806 memcpy(h->prefix, src, i);
2807 memcpy(h->name, src + i + 1, len - i - 1);
2808 if (!(m_Flags & fLongNameSupplement))
2809 return true;
2810 packed = true;
2811 }
2812 }
2813
2814 // Still, store the initial part in the original header
2815 if (!packed) {
2816 memcpy(dst, src, size);
2817 }
2818
2819 // Prepare extended block header with the long name info (old GNU style)
2820 _ASSERT(!OFFSET_OF(m_BufferPos) && m_BufferPos < m_BufferSize);
2821 TBlock* block = (TBlock*)(m_Buffer + m_BufferPos);
2822 memset(block->buffer, 0, sizeof(block->buffer));
2823 h = &block->header;
2824
2825 // See above for comments about header filling
2826 ++len; // write terminating '\0' as it can always be made to fit in
2827 strcpy(h->name, "././@LongLink");
2828 s_NumToOctal(0, h->mode, sizeof(h->mode) - 1);
2829 s_NumToOctal(0, h->uid, sizeof(h->uid) - 1);
2830 s_NumToOctal(0, h->gid, sizeof(h->gid) - 1);
2831 if (!s_EncodeUint8(len, h->size, sizeof(h->size) - 1)) {
2832 return false;
2833 }
2834 s_NumToOctal(0, h->mtime, sizeof(h->mtime)- 1);
2835 h->typeflag[0] = link ? 'K' : 'L';
2836
2837 // Old GNU magic protrudes into adjacent version field
2838 memcpy(h->magic, "ustar ", 8); // 2 spaces and '\0'-terminated
2839
2840 // NCBI signature if allowed
2841 if (!(m_Flags & fStandardHeaderOnly)) {
2842 sx_Signature(block);
2843 }
2844
2845 s_TarChecksum(block, true);
2846
2847 // Write the header
2848 x_WriteArchive(sizeof(block->buffer));
2849
2850 // Store the full name in the extended block (will be aligned as necessary)
2851 x_WriteArchive(len, src);
2852
2853 return true;
2854 }
2855
2856
x_Backspace(EAction action)2857 void CTar::x_Backspace(EAction action)
2858 {
2859 _ASSERT(SIZE_OF(m_ZeroBlockCount) <= m_StreamPos);
2860 _ASSERT(!OFFSET_OF(m_StreamPos));
2861 m_Current.m_Name.erase();
2862 if (!m_ZeroBlockCount) {
2863 return;
2864 }
2865
2866 size_t gap = SIZE_OF(m_ZeroBlockCount);
2867 if (!m_FileStream) {
2868 if (gap > m_BufferPos) {
2869 if (action == eAppend || action == eUpdate) {
2870 TAR_POST(4, Warning,
2871 "In-stream update may result in gapped tar archive");
2872 }
2873 gap = m_BufferPos;
2874 m_ZeroBlockCount -= BLOCK_OF(gap);
2875 }
2876 m_BufferPos -= gap;
2877 m_StreamPos -= gap;
2878 return;
2879 }
2880
2881 // Tarfile here
2882 m_StreamPos -= gap;
2883 CT_POS_TYPE rec = (CT_OFF_TYPE)(m_StreamPos / m_BufferSize);
2884 size_t off = (size_t) (m_StreamPos % m_BufferSize);
2885 if (m_BufferPos == 0) {
2886 m_BufferPos += m_BufferSize;
2887 }
2888 if (gap > m_BufferPos) {
2889 m_BufferPos = 0;
2890 size_t temp = BLOCK_SIZE;
2891 // Re-fetch the entire record
2892 if (!m_FileStream->seekg(rec * m_BufferSize)
2893 // NB: successful positioning guarantees the stream was !fail(),
2894 // which means it might have only been either good() or eof()
2895 || (m_FileStream->clear(), !x_ReadArchive(temp))
2896 || temp != BLOCK_SIZE) {
2897 TAR_POST(65, Error,
2898 "Archive backspace error in record reget");
2899 s_SetStateSafe(m_Stream, NcbiBadbit);
2900 return;
2901 }
2902 m_BufferPos = off;
2903 } else {
2904 m_BufferPos -= gap;
2905 }
2906 _ASSERT(!OFFSET_OF(m_BufferPos) && m_BufferPos < m_BufferSize);
2907
2908 // Always reset the put position there
2909 #if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 1101
2910 m_FileStream->clear(); // This is to only work around a bug
2911 #endif //_LIBCPP_VERSION
2912 if (!m_FileStream->seekp(rec * m_BufferSize)) {
2913 TAR_POST(80, Error,
2914 "Archive backspace error in record reset");
2915 s_SetStateSafe(m_Stream, NcbiBadbit);
2916 return;
2917 }
2918 m_ZeroBlockCount = 0;
2919 }
2920
2921
s_MatchExcludeMask(const CTempString & name,const list<CTempString> & elems,const CMask * mask,NStr::ECase acase)2922 static bool s_MatchExcludeMask(const CTempString& name,
2923 const list<CTempString>& elems,
2924 const CMask* mask,
2925 NStr::ECase acase)
2926 {
2927 _ASSERT(!name.empty() && mask);
2928 if (elems.empty()) {
2929 return mask->Match(name, acase);
2930 }
2931 if (elems.size() == 1) {
2932 return mask->Match(elems.front(), acase);
2933 }
2934 string temp;
2935 REVERSE_ITERATE(list<CTempString>, e, elems) {
2936 temp = temp.empty() ? string(*e) : string(*e) + '/' + temp;
2937 if (mask->Match(temp, acase)) {
2938 return true;
2939 }
2940 }
2941 return false;
2942 }
2943
2944
x_ReadAndProcess(EAction action)2945 unique_ptr<CTar::TEntries> CTar::x_ReadAndProcess(EAction action)
2946 {
2947 unique_ptr<TEntries> done(new TEntries);
2948 _ASSERT(!OFFSET_OF(m_StreamPos));
2949 Uint8 pos = m_StreamPos;
2950 CTarEntryInfo xinfo;
2951
2952 m_ZeroBlockCount = 0;
2953 for (;;) {
2954 // Next block is supposed to be a header
2955 m_Current = CTarEntryInfo(pos);
2956 m_Current.m_Name = xinfo.GetName();
2957 EStatus status = x_ReadEntryInfo
2958 (action == eTest && (m_Flags & fDumpEntryHeaders),
2959 xinfo.GetType() == CTarEntryInfo::ePAXHeader);
2960 switch (status) {
2961 case eFailure:
2962 case eSuccess:
2963 case eContinue:
2964 if (m_ZeroBlockCount && !(m_Flags & fIgnoreZeroBlocks)) {
2965 Uint8 save_pos = m_StreamPos;
2966 m_StreamPos -= xinfo.m_HeaderSize + m_Current.m_HeaderSize;
2967 m_StreamPos -= SIZE_OF(m_ZeroBlockCount);
2968 TAR_POST(5, Error,
2969 "Interspersing zero block ignored");
2970 m_StreamPos = save_pos;
2971 }
2972 break;
2973
2974 case eZeroBlock:
2975 m_ZeroBlockCount++;
2976 if (action == eTest && (m_Flags & fDumpEntryHeaders)) {
2977 s_DumpZero(m_FileName, m_StreamPos - BLOCK_SIZE, m_BufferSize,
2978 m_ZeroBlockCount);
2979 }
2980 if ((m_Flags & fIgnoreZeroBlocks) || m_ZeroBlockCount < 2) {
2981 if (xinfo.GetType() == CTarEntryInfo::eUnknown) {
2982 // Not yet reading an entry -- advance
2983 pos += BLOCK_SIZE;
2984 }
2985 continue;
2986 }
2987 // Two zero blocks -> eEOF
2988 /*FALLTHRU*/
2989
2990 case eEOF:
2991 if (action == eTest && (m_Flags & fDumpEntryHeaders)) {
2992 s_DumpZero(m_FileName, m_StreamPos, m_BufferSize, 0,
2993 status != eEOF ? true : false);
2994 }
2995 if (xinfo.GetType() != CTarEntryInfo::eUnknown) {
2996 TAR_POST(6, Error,
2997 "Orphaned extended information ignored");
2998 } else if (m_ZeroBlockCount < 2 && action != eAppend) {
2999 if (!m_StreamPos) {
3000 TAR_THROW(this, eRead,
3001 "Unexpected EOF in archive");
3002 }
3003 TAR_POST(58, Warning,
3004 m_ZeroBlockCount
3005 ? "Incomplete EOT in archive"
3006 : "Missing EOT in archive");
3007 }
3008 x_Backspace(action);
3009 return done;
3010 }
3011 m_ZeroBlockCount = 0;
3012
3013 //
3014 // Process entry
3015 //
3016 if (status == eContinue) {
3017 // Extended header information has just been read in
3018 xinfo.m_HeaderSize += m_Current.m_HeaderSize;
3019
3020 switch (m_Current.GetType()) {
3021 case CTarEntryInfo::ePAXHeader:
3022 xinfo.m_Pos = m_Current.m_Pos; // NB: real (expanded) filesize
3023 m_Current.m_Pos = pos;
3024 if (xinfo.GetType() != CTarEntryInfo::eUnknown) {
3025 TAR_POST(7, Error,
3026 "Unused extended header replaced");
3027 }
3028 xinfo.m_Type = CTarEntryInfo::ePAXHeader;
3029 xinfo.m_Name.swap(m_Current.m_Name);
3030 xinfo.m_LinkName.swap(m_Current.m_LinkName);
3031 xinfo.m_UserName.swap(m_Current.m_UserName);
3032 xinfo.m_GroupName.swap(m_Current.m_GroupName);
3033 xinfo.m_Stat = m_Current.m_Stat;
3034 continue;
3035
3036 case CTarEntryInfo::eGNULongName:
3037 if (xinfo.GetType() == CTarEntryInfo::ePAXHeader
3038 || !xinfo.GetName().empty()) {
3039 TAR_POST(8, Error,
3040 "Unused long name \"" + xinfo.GetName()
3041 + "\" replaced");
3042 }
3043 // Latch next long name here then just skip
3044 xinfo.m_Type = CTarEntryInfo::eGNULongName;
3045 xinfo.m_Name.swap(m_Current.m_Name);
3046 continue;
3047
3048 case CTarEntryInfo::eGNULongLink:
3049 if (xinfo.GetType() == CTarEntryInfo::ePAXHeader
3050 || !xinfo.GetLinkName().empty()) {
3051 TAR_POST(9, Error,
3052 "Unused long link \"" + xinfo.GetLinkName()
3053 + "\" replaced");
3054 }
3055 // Latch next long link here then just skip
3056 xinfo.m_Type = CTarEntryInfo::eGNULongLink;
3057 xinfo.m_LinkName.swap(m_Current.m_LinkName);
3058 continue;
3059
3060 default:
3061 _TROUBLE;
3062 NCBI_THROW(CCoreException, eCore, "Internal error");
3063 /*NOTREACHED*/
3064 break;
3065 }
3066 }
3067
3068 // Fixup current 'info' with extended information obtained previously
3069 m_Current.m_HeaderSize += xinfo.m_HeaderSize;
3070 xinfo.m_HeaderSize = 0;
3071 if (!xinfo.GetName().empty()) {
3072 xinfo.m_Name.swap(m_Current.m_Name);
3073 xinfo.m_Name.erase();
3074 }
3075 if (!xinfo.GetLinkName().empty()) {
3076 xinfo.m_LinkName.swap(m_Current.m_LinkName);
3077 xinfo.m_LinkName.erase();
3078 }
3079 TPAXBits parsed;
3080 if (xinfo.GetType() == CTarEntryInfo::ePAXHeader) {
3081 parsed = (TPAXBits) xinfo.m_Stat.orig.st_mode;
3082 if (!xinfo.GetUserName().empty()) {
3083 xinfo.m_UserName.swap(m_Current.m_UserName);
3084 xinfo.m_UserName.erase();
3085 }
3086 if (!xinfo.GetGroupName().empty()) {
3087 xinfo.m_GroupName.swap(m_Current.m_GroupName);
3088 xinfo.m_GroupName.erase();
3089 }
3090 if (parsed & fPAXMtime) {
3091 m_Current.m_Stat.orig.st_mtime = xinfo.m_Stat.orig.st_mtime;
3092 m_Current.m_Stat.mtime_nsec = xinfo.m_Stat.mtime_nsec;
3093 }
3094 if (parsed & fPAXAtime) {
3095 m_Current.m_Stat.orig.st_atime = xinfo.m_Stat.orig.st_atime;
3096 m_Current.m_Stat.atime_nsec = xinfo.m_Stat.atime_nsec;
3097 }
3098 if (parsed & fPAXCtime) {
3099 m_Current.m_Stat.orig.st_ctime = xinfo.m_Stat.orig.st_ctime;
3100 m_Current.m_Stat.ctime_nsec = xinfo.m_Stat.ctime_nsec;
3101 }
3102 if (parsed & fPAXSparse) {
3103 // Mark to post-process below
3104 xinfo.m_Type = CTarEntryInfo::eSparseFile;
3105 }
3106 if (parsed & fPAXSize) {
3107 m_Current.m_Stat.orig.st_size = xinfo.m_Stat.orig.st_size;
3108 }
3109 if (parsed & fPAXUid) {
3110 m_Current.m_Stat.orig.st_uid = xinfo.m_Stat.orig.st_uid;
3111 }
3112 if (parsed & fPAXGid) {
3113 m_Current.m_Stat.orig.st_gid = xinfo.m_Stat.orig.st_gid;
3114 }
3115 } else {
3116 parsed = fPAXNone/*0*/;
3117 }
3118 if (m_Current.GetType() == CTarEntryInfo::eSparseFile) {
3119 xinfo.m_Type = CTarEntryInfo::eSparseFile;
3120 if (xinfo.m_Pos < m_Current.m_Pos) {
3121 xinfo.m_Pos = m_Current.m_Pos; // NB: real (expanded) filesize
3122 }
3123 m_Current.m_Pos = pos;
3124 }
3125 Uint8 size = m_Current.GetSize(); // NB: archive size to read
3126 if (xinfo.GetType() == CTarEntryInfo::eSparseFile) {
3127 if (m_Current.GetType() != CTarEntryInfo::eFile &&
3128 m_Current.GetType() != CTarEntryInfo::eSparseFile) {
3129 TAR_POST(103, Error,
3130 "Ignoring sparse data for non-plain file");
3131 } else if (parsed & fPAXSparseGNU_1_0) {
3132 m_Current.m_Stat.orig.st_size = size ? (off_t) xinfo.m_Pos : 0;
3133 m_Current.m_Type = CTarEntryInfo::eSparseFile;
3134 } else {
3135 m_Current.m_Type = CTarEntryInfo::eUnknown;
3136 if (size < xinfo.m_Pos) {
3137 m_Current.m_Stat.orig.st_size = (off_t) xinfo.m_Pos;
3138 }
3139 }
3140 }
3141 xinfo.m_Pos = 0;
3142 xinfo.m_Type = CTarEntryInfo::eUnknown;
3143 _ASSERT(status == eFailure || status == eSuccess);
3144
3145 // Last sanity check
3146 if (status != eFailure && m_Current.GetName().empty()) {
3147 TAR_THROW(this, eBadName,
3148 "Empty entry name in archive");
3149 }
3150 // User callback
3151 if (!Checkpoint(m_Current, false/*read*/)) {
3152 status = eFailure;
3153 }
3154
3155 // Match file name with the set of masks
3156 bool match = (status != eSuccess ? false
3157 : m_Mask[eExtractMask].mask && (action == eList ||
3158 action == eExtract ||
3159 action == eInternal)
3160 ? m_Mask[eExtractMask].mask->Match(m_Current.GetName(),
3161 m_Mask[eExtractMask]
3162 .acase)
3163 : true);
3164 if (match && m_Mask[eExcludeMask].mask && action != eTest) {
3165 list<CTempString> elems;
3166 _ASSERT(!m_Current.GetName().empty());
3167 NStr::Split(m_Current.GetName(), "/", elems,
3168 NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
3169 match = !s_MatchExcludeMask(m_Current.GetName(), elems,
3170 m_Mask[eExcludeMask].mask,
3171 m_Mask[eExcludeMask].acase);
3172 }
3173
3174 // NB: match is 'false' when processing a failing entry
3175 if ((match && action == eInternal)
3176 || x_ProcessEntry(match && action == eExtract ? eExtract :
3177 action == eTest ? eTest : eUndefined,
3178 size, done.get())
3179 || (match && (action == eList || action == eUpdate))) {
3180 _ASSERT(status == eSuccess && action != eTest);
3181 done->push_back(m_Current);
3182 if (action == eInternal) {
3183 break;
3184 }
3185 }
3186
3187 _ASSERT(!OFFSET_OF(m_StreamPos));
3188 pos = m_StreamPos;
3189 }
3190
3191 return done;
3192 }
3193
3194
s_ToFilesystemPath(const string & base_dir,const string & name,bool noabs=false)3195 static string s_ToFilesystemPath(const string& base_dir, const string& name,
3196 bool noabs = false)
3197 {
3198 string path;
3199 _ASSERT(!name.empty());
3200 if (!base_dir.empty() && (!CDirEntry::IsAbsolutePath(name) || noabs)) {
3201 path = CDirEntry::ConcatPath(base_dir, name);
3202 } else {
3203 path = name;
3204 if (CDirEntry::IsAbsolutePath(path) && noabs) {
3205 #ifdef NCBI_OS_MSWIN
3206 if (isalpha((unsigned char) path[0]) && path[1] == ':') {
3207 // Drive
3208 path.erase(0, 2);
3209 } else if ((path[0] == '/' || path[0] == '\\') &&
3210 (path[1] == '/' || path[1] == '\\')) {
3211 // Network
3212 path.erase(0, path.find_first_of("/\\", 2));
3213 }
3214 #endif //NCBI_OS_MSWIN
3215 if (path[0] == '/' || path[0] == '\\') {
3216 path.erase(0, 1);
3217 }
3218 if (path.empty()) {
3219 path.assign(1, '.');
3220 }
3221 }
3222 }
3223 _ASSERT(!path.empty());
3224 return CDirEntry::NormalizePath(path);
3225 }
3226
3227
s_ToArchiveName(const string & base_dir,const string & path)3228 static string s_ToArchiveName(const string& base_dir, const string& path)
3229 {
3230 // NB: Path assumed to have been normalized
3231 string retval = CDirEntry::AddTrailingPathSeparator(path);
3232
3233 #ifdef NCBI_OS_MSWIN
3234 // Convert to Unix format with forward slashes
3235 NStr::ReplaceInPlace(retval, "\\", "/");
3236 const NStr::ECase how = NStr::eNocase;
3237 #else
3238 const NStr::ECase how = NStr::eCase;
3239 #endif //NCBI_OS_MSWIN
3240
3241 SIZE_TYPE pos = 0;
3242
3243 bool absolute;
3244 // Remove leading base dir from the path
3245 if (!base_dir.empty() && NStr::StartsWith(retval, base_dir, how)) {
3246 if (retval.size() > base_dir.size()) {
3247 retval.erase(0, base_dir.size()/*separator too*/);
3248 } else {
3249 retval.assign(1, '.');
3250 }
3251 absolute = false;
3252 } else {
3253 absolute = CDirEntry::IsAbsolutePath(retval);
3254 #ifdef NCBI_OS_MSWIN
3255 if (isalpha((unsigned char) retval[0]) && retval[1] == ':') {
3256 // Remove a disk name if present
3257 pos = 2;
3258 } else if (retval[0] == '/' && retval[1] == '/') {
3259 // Network name if present
3260 pos = retval.find('/', 2);
3261 absolute = true;
3262 }
3263 #endif //NCBI_OS_MSWIN
3264 }
3265
3266 // Remove any leading and trailing slashes
3267 while (pos < retval.size() && retval[pos] == '/') {
3268 ++pos;
3269 }
3270 if (pos) {
3271 retval.erase(0, pos);
3272 }
3273 pos = retval.size();
3274 while (pos > 0 && retval[pos - 1] == '/') {
3275 --pos;
3276 }
3277 if (pos < retval.size()) {
3278 retval.erase(pos);
3279 }
3280
3281 if (absolute) {
3282 retval.insert((SIZE_TYPE) 0, 1, '/');
3283 }
3284 return retval;
3285 }
3286
3287
3288 class CTarTempDirEntry : public CDirEntry
3289 {
3290 public:
CTarTempDirEntry(const CDirEntry & entry)3291 CTarTempDirEntry(const CDirEntry& entry)
3292 : CDirEntry(GetTmpNameEx(entry.GetDir(), "xNCBItArX")),
3293 m_Entry(entry), m_Pending(false), m_Activated(false)
3294 {
3295 _ASSERT(!Exists() && m_Entry.GetType() != eDir);
3296 if (CDirEntry(m_Entry.GetPath()).Rename(GetPath())) {
3297 m_Activated = m_Pending = true;
3298 errno = 0;
3299 }
3300 }
3301
~CTarTempDirEntry()3302 virtual ~CTarTempDirEntry()
3303 {
3304 if (m_Activated) {
3305 (void)(m_Pending ? Restore() : RemoveEntry());
3306 }
3307 }
3308
Restore(void)3309 bool Restore(void)
3310 {
3311 m_Entry.Remove();
3312 errno = 0;
3313 bool renamed = Rename(m_Entry.GetPath());
3314 m_Activated = !renamed;
3315 m_Pending = false;
3316 return renamed;
3317 }
3318
Release(void)3319 void Release(void)
3320 {
3321 m_Pending = false;
3322 }
3323
3324 private:
3325 const CDirEntry& m_Entry;
3326 bool m_Pending;
3327 bool m_Activated;
3328 };
3329
3330
x_ProcessEntry(EAction action,Uint8 size,const CTar::TEntries * entries)3331 bool CTar::x_ProcessEntry(EAction action, Uint8 size,
3332 const CTar::TEntries* entries)
3333 {
3334 CTarEntryInfo::EType type = m_Current.GetType();
3335 bool extract = action == eExtract;
3336
3337 if (extract) {
3338 // Destination for extraction
3339 unique_ptr<CDirEntry> dst
3340 (CDirEntry::CreateObject(type == CTarEntryInfo::eSparseFile ?
3341 CDirEntry::eFile : CDirEntry::EType(type),
3342 s_ToFilesystemPath
3343 (m_BaseDir, m_Current.GetName(),
3344 !(m_Flags & fKeepAbsolutePath))));
3345 // Source for extraction
3346 unique_ptr<CDirEntry> src;
3347 // Direntry pending removal
3348 AutoPtr<CTarTempDirEntry> pending;
3349
3350 // Dereference symlink if requested
3351 if (type != CTarEntryInfo::eSymLink &&
3352 type != CTarEntryInfo::eHardLink && (m_Flags & fFollowLinks)) {
3353 dst->DereferenceLink();
3354 }
3355
3356 // Actual type in file system (if exists)
3357 CDirEntry::EType dst_type = dst->GetType();
3358
3359 // Look if extraction is allowed (when the destination exists)
3360 if (dst_type != CDirEntry::eUnknown) {
3361 bool extracted = false; // check if ours (prev. revision extracted)
3362 if (entries) {
3363 ITERATE(TEntries, e, *entries) {
3364 if (e->GetName() == m_Current.GetName() &&
3365 e->GetType() == m_Current.GetType()) {
3366 extracted = true;
3367 break;
3368 }
3369 }
3370 }
3371 if (!extracted) {
3372 // Can overwrite it?
3373 if (!(m_Flags & fOverwrite)) {
3374 // File already exists, and cannot be changed
3375 extract = false;
3376 }
3377 // Can update?
3378 else if ((m_Flags & fUpdate) == fUpdate // NB: fOverwrite set
3379 && (type == CTarEntryInfo::eDir ||
3380 // Make sure that dst is not newer than the entry
3381 dst->IsNewer(m_Current.GetModificationCTime(),
3382 // NB: dst must exist
3383 CDirEntry::eIfAbsent_Throw))) {
3384 extract = false;
3385 }
3386 // Have equal types?
3387 else if (m_Flags & fEqualTypes) {
3388 if (type == CTarEntryInfo::eHardLink) {
3389 src.reset(new CDirEntry
3390 (s_ToFilesystemPath
3391 (m_BaseDir, m_Current.GetLinkName(),
3392 !(m_Flags & fKeepAbsolutePath))));
3393 if (dst_type != src->GetType()) {
3394 extract = false;
3395 }
3396 } else if (dst_type != CDirEntry::EType(type)) {
3397 extract = false;
3398 }
3399 }
3400 }
3401 if (extract && (type != CTarEntryInfo::eDir ||
3402 dst_type != CDirEntry::eDir)) {
3403 if (!extracted && (m_Flags & fBackup) == fBackup) {
3404 // Need to backup the existing destination?
3405 CDirEntry tmp(*dst);
3406 if (!tmp.Backup(kEmptyStr, CDirEntry::eBackup_Rename)) {
3407 int x_errno = CNcbiError::GetLast().Code();
3408 TAR_THROW(this, eBackup,
3409 "Failed to backup '" + dst->GetPath() + '\''
3410 + s_OSReason(x_errno));
3411 }
3412 } else {
3413 // Do removal safely until extraction is confirmed
3414 pending.reset(new CTarTempDirEntry(*dst));
3415 if (/*!pending->Exists() ||*/ dst->Exists()) {
3416 // Security concern: do not attempt data extraction
3417 // into special files etc, which can harm the system.
3418 #ifdef __GNUC__
3419 int x_errno = errno ?: EEXIST;
3420 #else
3421 int x_errno = errno;
3422 if (x_errno == 0) {
3423 x_errno = EEXIST;
3424 }
3425 #endif //__GNUC__
3426 extract = false;
3427 TAR_THROW(this, eWrite,
3428 "Cannot extract '" + dst->GetPath() + '\''
3429 + s_OSReason(x_errno));
3430 }
3431 }
3432 }
3433 }
3434 if (extract) {
3435 #ifdef NCBI_OS_UNIX
3436 mode_t u;
3437 u = umask(022);
3438 umask(u & ~(S_IRUSR | S_IWUSR | S_IXUSR));
3439 try {
3440 #endif //NCBI_OS_UNIX
3441 extract = x_ExtractEntry(size, dst.get(), src.get());
3442 #ifdef NCBI_OS_UNIX
3443 } catch (...) {
3444 umask(u);
3445 throw;
3446 }
3447 umask(u);
3448 #endif //NCBI_OS_UNIX
3449 if (pending) {
3450 if (extract) {
3451 pending->Release();
3452 } else if (!pending->Restore()) { // Undo delete
3453 int x_errno = errno;
3454 TAR_THROW(this, eWrite,
3455 "Cannot restore '" + dst->GetPath()
3456 + "' back in place" + s_OSReason(x_errno));
3457 }
3458 }
3459 }
3460 } else if (m_Current.GetType() == CTarEntryInfo::eSparseFile && size
3461 && action == eTest && (m_Flags & fDumpEntryHeaders)) {
3462 unique_ptr<CDirEntry> dst
3463 (CDirEntry::CreateObject(CDirEntry::eFile,
3464 s_ToFilesystemPath
3465 (m_BaseDir, m_Current.GetName(),
3466 !(m_Flags & fKeepAbsolutePath))));
3467 (void) x_ExtractSparseFile(size, dst.get(), true);
3468 }
3469
3470 x_Skip(BLOCK_OF(ALIGN_SIZE(size)));
3471
3472 return extract;
3473 }
3474
3475
x_Skip(Uint8 blocks)3476 void CTar::x_Skip(Uint8 blocks)
3477 {
3478 _ASSERT(!OFFSET_OF(m_StreamPos));
3479 while (blocks) {
3480 #ifndef NCBI_COMPILER_WORKSHOP
3481 // RogueWave RTL is buggy in seeking pipes -- it clobbers
3482 // (discards) streambuf data instead of leaving it alone..
3483 if (!(m_Flags & (fSlowSkipWithRead | fStreamPipeThrough))
3484 && m_BufferPos == 0 && blocks >= BLOCK_OF(m_BufferSize)) {
3485 CT_OFF_TYPE fskip =
3486 (CT_OFF_TYPE)(blocks / BLOCK_OF(m_BufferSize) * m_BufferSize);
3487 _ASSERT(ALIGN_SIZE(fskip) == fskip);
3488 if (m_Stream.rdbuf()->PUBSEEKOFF(fskip, IOS_BASE::cur)
3489 != (CT_POS_TYPE)((CT_OFF_TYPE)(-1))) {
3490 blocks -= BLOCK_OF(fskip);
3491 m_StreamPos += fskip;
3492 continue;
3493 }
3494 if (m_FileStream) {
3495 TAR_POST(2, Warning,
3496 "Cannot fast skip in file archive,"
3497 " reverting to slow skip");
3498 }
3499 m_Flags |= fSlowSkipWithRead;
3500 }
3501 #endif //NCBI_COMPILER_WORKSHOP
3502 size_t nskip = (blocks < BLOCK_OF(m_BufferSize)
3503 ? (size_t) SIZE_OF(blocks)
3504 : m_BufferSize);
3505 _ASSERT(ALIGN_SIZE(nskip) == nskip);
3506 if (!x_ReadArchive(nskip)) {
3507 TAR_THROW(this, eRead,
3508 "Archive skip failed (EOF)");
3509 }
3510 _ASSERT(nskip);
3511 nskip = ALIGN_SIZE(nskip);
3512 blocks -= BLOCK_OF (nskip);
3513 m_StreamPos += nskip;
3514 }
3515 _ASSERT(!OFFSET_OF(m_StreamPos));
3516 }
3517
3518
3519 // NB: Clobbers umask, must be restored after the call
x_ExtractEntry(Uint8 & size,const CDirEntry * dst,const CDirEntry * src)3520 bool CTar::x_ExtractEntry(Uint8& size, const CDirEntry* dst,
3521 const CDirEntry* src)
3522 {
3523 CTarEntryInfo::EType type = m_Current.GetType();
3524 unique_ptr<CDirEntry> src_ptr; // deleter
3525 bool extracted = true; // assume best
3526
3527 if (type == CTarEntryInfo::eUnknown && !(m_Flags & fSkipUnsupported)) {
3528 // Conform to POSIX-mandated behavior to extract as files
3529 type = CTarEntryInfo::eFile;
3530 }
3531 switch (type) {
3532 case CTarEntryInfo::eSparseFile: // NB: only PAX GNU/1.0 sparse file here
3533 case CTarEntryInfo::eHardLink:
3534 case CTarEntryInfo::eFile:
3535 {{
3536 _ASSERT(!dst->Exists());
3537 // Create base directory
3538 CDir dir(dst->GetDir());
3539 if (/*dir.GetPath() != "." && */!dir.CreatePath()) {
3540 int x_errno = errno;
3541 TAR_THROW(this, eCreate,
3542 "Cannot create directory '" + dir.GetPath() + '\''
3543 + s_OSReason(x_errno));
3544 }
3545
3546 if (type == CTarEntryInfo::eHardLink) {
3547 if (!src) {
3548 src_ptr.reset(new CDirEntry
3549 (s_ToFilesystemPath
3550 (m_BaseDir, m_Current.GetLinkName(),
3551 !(m_Flags & fKeepAbsolutePath))));
3552 src = src_ptr.get();
3553 }
3554 if (src->GetType() == CDirEntry::eUnknown && size) {
3555 // Looks like a dangling hard link but luckily we have
3556 // the actual file data (POSIX extension) so use it here.
3557 type = CTarEntryInfo::eFile;
3558 }
3559 }
3560
3561 if (type == CTarEntryInfo::eHardLink) {
3562 _ASSERT(src);
3563 #ifdef NCBI_OS_UNIX
3564 if (link(src->GetPath().c_str(), dst->GetPath().c_str()) == 0){
3565 if (m_Flags & fPreserveAll) {
3566 x_RestoreAttrs(m_Current, m_Flags, dst);
3567 }
3568 break;
3569 }
3570 int x_errno = errno;
3571 TAR_POST(10, Warning,
3572 "Cannot hard-link '" + src->GetPath()
3573 + "' and '" + dst->GetPath() + '\''
3574 + s_OSReason(x_errno) + ", trying to copy");
3575 #endif //NCBI_OS_UNIX
3576 if (!src->Copy(dst->GetPath(),
3577 CDirEntry::fCF_Overwrite |
3578 CDirEntry::fCF_PreserveAll)) {
3579 TAR_POST(11, Error,
3580 "Cannot hard-link '" + src->GetPath()
3581 + "' and '" + dst->GetPath() + "' via copy");
3582 extracted = false;
3583 break;
3584 }
3585 } else if (type == CTarEntryInfo::eSparseFile && size) {
3586 if (!(extracted = x_ExtractSparseFile(size, dst)))
3587 break;
3588 } else {
3589 x_ExtractPlainFile(size, dst);
3590 }
3591
3592 // Restore attributes
3593 if (m_Flags & fPreserveAll) {
3594 x_RestoreAttrs(m_Current, m_Flags, dst);
3595 }
3596 }}
3597 break;
3598
3599 case CTarEntryInfo::eDir:
3600 {{
3601 const CDir* dir = dynamic_cast<const CDir*>(dst);
3602 if (!dir || !dir->CreatePath()) {
3603 int x_errno = !dir ? 0 : CNcbiError::GetLast().Code();
3604 TAR_THROW(this, eCreate,
3605 "Cannot create directory '" + dst->GetPath() + '\''
3606 + (!dir
3607 ? string(": Internal error")
3608 : s_OSReason(x_errno)));
3609 }
3610 // NB: Attributes for a directory must be set only after all of its
3611 // entries have been already extracted.
3612 _ASSERT(size == 0);
3613 }}
3614 break;
3615
3616 case CTarEntryInfo::eSymLink:
3617 {{
3618 const CSymLink* symlink = dynamic_cast<const CSymLink*>(dst);
3619 if (!symlink || !symlink->Create(m_Current.GetLinkName())) {
3620 int x_errno = !symlink ? 0 : CNcbiError::GetLast().Code();
3621 string error = "Cannot create symlink '" + dst->GetPath()
3622 + "' -> '" + m_Current.GetLinkName() + '\''
3623 + (!symlink
3624 ? string(": Internal error")
3625 : s_OSReason(x_errno));
3626 if (!symlink || x_errno != ENOTSUP
3627 || !(m_Flags & fSkipUnsupported)) {
3628 TAR_THROW(this, eCreate, error);
3629 }
3630 TAR_POST(12, Error, error);
3631 extracted = false;
3632 }
3633 _ASSERT(size == 0);
3634 }}
3635 break;
3636
3637 case CTarEntryInfo::ePipe:
3638 {{
3639 _ASSERT(size == 0);
3640 #ifdef NCBI_OS_UNIX
3641 umask(0);
3642 int x_errno = 0;
3643 if (mkfifo(dst->GetPath().c_str(), m_Current.GetMode())/*!= 0*/) {
3644 x_errno = errno;
3645 extracted = false;
3646 }
3647 if (extracted) {
3648 break;
3649 }
3650 string reason = s_OSReason(x_errno);
3651 #else
3652 int x_errno = ENOTSUP;
3653 string reason = ": Feature not supported by host OS";
3654 extracted = false;
3655 #endif //NCBI_OS_UNIX
3656 string error
3657 = "Cannot create FIFO '" + dst->GetPath() + '\'' + reason;
3658 if (x_errno != ENOTSUP || !(m_Flags & fSkipUnsupported)) {
3659 TAR_THROW(this, eCreate, error);
3660 }
3661 TAR_POST(81, Error, error);
3662 }}
3663 break;
3664
3665 case CTarEntryInfo::eCharDev:
3666 case CTarEntryInfo::eBlockDev:
3667 {{
3668 _ASSERT(size == 0);
3669 #ifdef NCBI_OS_UNIX
3670 umask(0);
3671 int x_errno = 0;
3672 mode_t m = (m_Current.GetMode() |
3673 (type == CTarEntryInfo::eCharDev ? S_IFCHR : S_IFBLK));
3674 if (mknod(dst->GetPath().c_str(),m,m_Current.m_Stat.orig.st_rdev)){
3675 x_errno = errno;
3676 extracted = false;
3677 }
3678 if (extracted) {
3679 break;
3680 }
3681 string reason = s_OSReason(x_errno);
3682 #else
3683 int x_errno = ENOTSUP;
3684 string reason = ": Feature not supported by host OS";
3685 extracted = false;
3686 #endif //NCBI_OS_UNIX
3687 string error
3688 = "Cannot create " + string(type == CTarEntryInfo::eCharDev
3689 ? "character" : "block")
3690 + " device '" + dst->GetPath() + '\'' + reason;
3691 if (x_errno != ENOTSUP || !(m_Flags & fSkipUnsupported)) {
3692 TAR_THROW(this, eCreate, error);
3693 }
3694 TAR_POST(82, Error, error);
3695 }}
3696 break;
3697
3698 case CTarEntryInfo::eVolHeader:
3699 _ASSERT(size == 0);
3700 /*NOOP*/
3701 break;
3702
3703 case CTarEntryInfo::ePAXHeader:
3704 case CTarEntryInfo::eGNULongName:
3705 case CTarEntryInfo::eGNULongLink:
3706 // Extended headers should have already been processed and not be here
3707 _TROUBLE;
3708 /*FALLTHRU*/
3709
3710 default:
3711 TAR_POST(13, Error,
3712 "Skipping unsupported entry '" + m_Current.GetName()
3713 + "' of type #" + NStr::IntToString(int(type)));
3714 extracted = false;
3715 break;
3716 }
3717
3718 return extracted;
3719 }
3720
3721
x_ExtractPlainFile(Uint8 & size,const CDirEntry * dst)3722 void CTar::x_ExtractPlainFile(Uint8& size, const CDirEntry* dst)
3723 {
3724 // FIXME: Switch to CFileIO eventually to bypass ofstream's obscurity
3725 // w.r.t. errors, extra buffering etc.
3726 CNcbiOfstream ofs(dst->GetPath().c_str(),
3727 IOS_BASE::trunc |
3728 IOS_BASE::out |
3729 IOS_BASE::binary);
3730 if (!ofs) {
3731 int x_errno = errno;
3732 TAR_THROW(this, eCreate,
3733 "Cannot create file '" + dst->GetPath() + '\''
3734 + s_OSReason(x_errno));
3735 }
3736 if (m_Flags & fPreserveMode) { // NB: secure
3737 x_RestoreAttrs(m_Current, fPreserveMode,
3738 dst, fTarURead | fTarUWrite);
3739 }
3740
3741 bool okay = ofs.good();
3742 if (okay) while (size) {
3743 // Read from the archive
3744 size_t nread = size < m_BufferSize ? (size_t) size : m_BufferSize;
3745 const char* data = x_ReadArchive(nread);
3746 if (!data) {
3747 TAR_THROW(this, eRead,
3748 "Unexpected EOF in archive");
3749 }
3750 _ASSERT(nread && ofs.good());
3751 // Write file to disk
3752 try {
3753 okay = ofs.write(data, (streamsize) nread) ? true : false;
3754 } catch (IOS_BASE::failure&) {
3755 okay = false;
3756 }
3757 if (!okay) {
3758 break;
3759 }
3760 size -= nread;
3761 m_StreamPos += ALIGN_SIZE(nread);
3762 }
3763
3764 ofs.close();
3765 if (!okay || !ofs.good()) {
3766 int x_errno = errno;
3767 TAR_THROW(this, eWrite,
3768 "Cannot " + string(okay ? "close" : "write")
3769 + " file '" + dst->GetPath()+ '\'' + s_OSReason(x_errno));
3770 }
3771 }
3772
3773
x_ReadLine(Uint8 & size,const char * & data,size_t & nread)3774 string CTar::x_ReadLine(Uint8& size, const char*& data, size_t& nread)
3775 {
3776 string line;
3777 for (;;) {
3778 size_t n;
3779 for (n = 0; n < nread; ++n) {
3780 if (!isprint((unsigned char) data[n])) {
3781 break;
3782 }
3783 }
3784 line.append(data, n);
3785 if (n < nread) {
3786 if (data[n] == '\n') {
3787 ++n;
3788 }
3789 data += n;
3790 nread -= n;
3791 break;
3792 }
3793 if (!(nread = size < BLOCK_SIZE ? size : BLOCK_SIZE)) {
3794 break;
3795 }
3796 if (!(data = x_ReadArchive(nread))) {
3797 return kEmptyStr;
3798 }
3799 _ASSERT(nread);
3800 if (size >= nread) {
3801 size -= nread;
3802 } else {
3803 size = 0;
3804 }
3805 m_StreamPos += ALIGN_SIZE(nread);
3806 }
3807 return line;
3808 }
3809
3810
3811 template<>
3812 struct Deleter<FILE>
3813 {
DeleteDeleter3814 static void Delete(FILE* fp) { fclose(fp); }
3815 };
3816
3817
3818 #ifdef NCBI_OS_MSWIN
3819 # define NCBI_FILE_WO "wb"
3820 #else
3821 # define NCBI_FILE_WO "w"
3822 #endif /*NCBI_OS_MSWIN*/
3823
x_ExtractSparseFile(Uint8 & size,const CDirEntry * dst,bool dump)3824 bool CTar::x_ExtractSparseFile(Uint8& size, const CDirEntry* dst, bool dump)
3825 {
3826 _ASSERT(size);
3827
3828 // Read sparse map first
3829 Uint8 pos = m_StreamPos;
3830 size_t nread = size < BLOCK_SIZE ? (size_t) size : BLOCK_SIZE;
3831 const char* data = x_ReadArchive(nread);
3832 if (!data) {
3833 TAR_THROW(this, eRead,
3834 "Unexpected EOF in archive");
3835 }
3836 _ASSERT(nread);
3837 if (size >= nread) {
3838 size -= nread;
3839 } else {
3840 size = 0;
3841 }
3842
3843 string num(x_ReadLine(size, data, nread)); // "numblocks"
3844 Uint8 n = NStr::StringToUInt8(num,
3845 NStr::fConvErr_NoThrow |
3846 NStr::fConvErr_NoErrMessage);
3847 if (!n) {
3848 TAR_POST(97, Error,
3849 "Cannot expand sparse file '" + dst->GetPath()
3850 + "': Region count is "
3851 + string(num.empty() ? "missing" : "invalid")
3852 + " (\"" + num + "\")");
3853 m_StreamPos += ALIGN_SIZE(nread);
3854 return false;
3855 }
3856 m_StreamPos += ALIGN_SIZE(nread);
3857 vector< pair<Uint8, Uint8> > bmap(n);
3858
3859 for (Uint8 i = 0; i < n; ++i) { // "offset numbytes" pairs
3860 Uint8 val[2];
3861 for (int k = 0; k < 2; ++k) {
3862 num = x_ReadLine(size, data, nread);
3863 try {
3864 val[k] = NStr::StringToUInt8(num);
3865 } catch (...) {
3866 TAR_POST(98, Error,
3867 "Cannot expand sparse file '" + dst->GetPath()
3868 + "': Sparse map "
3869 + string(k == 0 ? "offset" : "region size")
3870 + '[' + NStr::NumericToString(i) + "] is "
3871 + string(num.empty() ? "missing" : "invalid")
3872 + " (\"" + num + "\")");
3873 return false;
3874 }
3875 }
3876 bmap[i] = pair<Uint8, Uint8>(val[0], val[1]);
3877 }
3878 if (dump) {
3879 s_DumpSparse(m_FileName, pos, m_BufferSize, m_Current.GetName(), bmap);
3880 /* dontcare */
3881 return false;
3882 }
3883
3884 // Write the file out
3885 AutoPtr<FILE> fp(::fopen(dst->GetPath().c_str(), NCBI_FILE_WO));
3886 if (!fp) {
3887 int x_errno = errno;
3888 TAR_THROW(this, eCreate,
3889 "Cannot create file '" + dst->GetPath() + '\''
3890 + s_OSReason(x_errno));
3891 }
3892 if (m_Flags & fPreserveMode) { // NB: secure
3893 x_RestoreAttrs(m_Current, fPreserveMode,
3894 dst, fTarURead | fTarUWrite);
3895 }
3896
3897 nread = 0;
3898 Uint8 eof = 0;
3899 int x_error = 0;
3900 for (Uint8 i = 0; i < n; ++i) {
3901 Uint8 top = bmap[i].first + bmap[i].second;
3902 if (eof < top) {
3903 eof = top;
3904 }
3905 if (!bmap[i].second) {
3906 continue;
3907 }
3908 // non-empty region
3909 if (::fseek(fp.get(), (long) bmap[i].first, SEEK_SET) != 0) {
3910 x_error = errno;
3911 break;
3912 }
3913 Uint8 done = 0;
3914 do {
3915 if (!nread) {
3916 nread = size < m_BufferSize ? (size_t) size : m_BufferSize;
3917 if (!nread || !(data = x_ReadArchive(nread))) {
3918 x_error = errno;
3919 TAR_POST(99, Error,
3920 "Cannot read archive data for sparse file '"
3921 + dst->GetPath() + "', region #"
3922 + NStr::NumericToString(i)
3923 + (nread
3924 ? s_OSReason(x_error)
3925 : string(": End-of-data")));
3926 x_error = -1;
3927 eof = 0;
3928 break;
3929 }
3930 _ASSERT(nread);
3931 size -= nread;
3932 m_StreamPos += ALIGN_SIZE(nread);
3933 }
3934 size_t xread = nread;
3935 if (xread > bmap[i].second - done) {
3936 xread = (size_t)(bmap[i].second - done);
3937 }
3938 if (::fwrite(data, 1, xread, fp.get()) != xread) {
3939 if (!(x_error = errno)) {
3940 x_error = -1; // Make sure non-zero
3941 }
3942 break;
3943 }
3944 done += xread;
3945 data += xread;
3946 nread -= xread;
3947 } while (done < bmap[i].second);
3948 if (x_error) {
3949 break;
3950 }
3951 }
3952
3953 // Finalize the file
3954 bool closed = ::fclose(fp.release()) == 0 ? true : false;
3955 if (!x_error && !closed) {
3956 x_error = errno;
3957 }
3958 string reason;
3959 if (x_error) {
3960 reason = s_OSReason(x_error);
3961 } else if (eof) {
3962 x_error = s_TruncateFile(dst->GetPath(), eof);
3963 if (x_error) {
3964 #ifdef NCBI_OS_MSWIN
3965 TCHAR* str = NULL;
3966 DWORD rv = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
3967 FORMAT_MESSAGE_FROM_SYSTEM |
3968 FORMAT_MESSAGE_MAX_WIDTH_MASK |
3969 FORMAT_MESSAGE_IGNORE_INSERTS,
3970 NULL, (DWORD) x_error,
3971 MAKELANGID(LANG_NEUTRAL,SUBLANG_DEFAULT),
3972 (LPTSTR) &str, 0, NULL);
3973 if (str) {
3974 if (rv) {
3975 _ASSERT(*str);
3976 reason = string(": ") + _T_STDSTRING(str);
3977 }
3978 ::LocalFree((HLOCAL) str);
3979 }
3980 if (reason.empty()) {
3981 reason = ": Error 0x" + NStr::UIntToString(x_error, 0, 16);
3982 }
3983 #else
3984 reason = s_OSReason(x_error);
3985 #endif //NCBI_OS_MSWIN
3986 }
3987 }
3988 if (x_error) {
3989 _ASSERT(!reason.empty());
3990 TAR_POST(100, Error,
3991 "Cannot write sparse file '" + dst->GetPath() + '\''+ reason);
3992 dst->Remove();
3993 return false;
3994 }
3995
3996 return true;
3997 }
3998
3999
x_RestoreAttrs(const CTarEntryInfo & info,TFlags what,const CDirEntry * path,TTarMode perm) const4000 void CTar::x_RestoreAttrs(const CTarEntryInfo& info,
4001 TFlags what,
4002 const CDirEntry* path,
4003 TTarMode perm) const
4004 {
4005 unique_ptr<CDirEntry> path_ptr; // deleter
4006 if (!path) {
4007 path_ptr.reset(new CDirEntry(s_ToFilesystemPath
4008 (m_BaseDir, info.GetName(),
4009 !(m_Flags & fKeepAbsolutePath))));
4010 path = path_ptr.get();
4011 }
4012
4013 // Date/time.
4014 // Set the time before permissions because on some platforms this setting
4015 // can also affect file permissions.
4016 if (what & fPreserveTime) {
4017 CTime modification(info.GetModificationTime());
4018 CTime last_access(info.GetLastAccessTime());
4019 CTime creation(info.GetCreationTime());
4020 modification.SetNanoSecond(info.m_Stat.mtime_nsec);
4021 last_access.SetNanoSecond(info.m_Stat.atime_nsec);
4022 creation.SetNanoSecond(info.m_Stat.ctime_nsec);
4023 if (!path->SetTime(&modification, &last_access, &creation)) {
4024 int x_errno = CNcbiError::GetLast().Code();
4025 TAR_THROW(this, eRestoreAttrs,
4026 "Cannot restore date/time of '" + path->GetPath() + '\''
4027 + s_OSReason(x_errno));
4028 }
4029 }
4030
4031 // Owner.
4032 // This must precede changing permissions because on some
4033 // systems chown() clears the set[ug]id bits for non-superusers
4034 // thus resulting in incorrect permissions.
4035 if (what & fPreserveOwner) {
4036 unsigned int uid, gid;
4037 // 2-tier trial: first using the names, then using numeric IDs.
4038 // Note that it is often impossible to restore the original owner
4039 // without the super-user rights so no error checking is done here.
4040 if (!path->SetOwner(info.GetUserName(),
4041 info.GetGroupName(),
4042 eIgnoreLinks, &uid, &gid) &&
4043 !path->SetOwner(kEmptyStr, info.GetGroupName(), eIgnoreLinks)) {
4044 if (uid != info.GetUserId() || gid != info.GetGroupId()) {
4045 string user = NStr::UIntToString(info.GetUserId());
4046 string group = NStr::UIntToString(info.GetGroupId());
4047 if (!path->SetOwner(user, group, eIgnoreLinks)) {
4048 path->SetOwner(kEmptyStr, group, eIgnoreLinks);
4049 }
4050 }
4051 }
4052 }
4053
4054 // Mode.
4055 // Set them last.
4056 if ((what & fPreserveMode)
4057 && info.GetType() != CTarEntryInfo::ePipe
4058 && info.GetType() != CTarEntryInfo::eCharDev
4059 && info.GetType() != CTarEntryInfo::eBlockDev) {
4060 bool failed = false;
4061 #ifdef NCBI_OS_UNIX
4062 // We won't change permissions for sym.links because lchmod() is not
4063 // portable, and also is not implemented on majority of platforms.
4064 if (info.GetType() != CTarEntryInfo::eSymLink) {
4065 // Use raw mode here to restore most of the bits
4066 mode_t mode = s_TarToMode(perm ? perm : info.m_Stat.orig.st_mode);
4067 if (chmod(path->GetPath().c_str(), mode) != 0) {
4068 // May fail due to setuid/setgid bits -- strip'em and try again
4069 if (mode & (S_ISUID | S_ISGID)) {
4070 mode &= ~(S_ISUID | S_ISGID);
4071 failed = chmod(path->GetPath().c_str(), mode) != 0;
4072 } else {
4073 failed = true;
4074 }
4075 CNcbiError::SetFromErrno();
4076 }
4077 }
4078 #else
4079 CDirEntry::TMode user, group, other;
4080 CDirEntry::TSpecialModeBits special_bits;
4081 if (perm) {
4082 s_TarToMode(perm, &user, &group, &other, &special_bits);
4083 } else {
4084 info.GetMode(&user, &group, &other, &special_bits);
4085 }
4086 failed = !path->SetMode(user, group, other, special_bits);
4087 #endif //NCBI_OS_UNIX
4088 if (failed) {
4089 int x_errno = CNcbiError::GetLast().Code();
4090 TAR_THROW(this, eRestoreAttrs,
4091 "Cannot " + string(perm ? "change" : "restore")
4092 + " mode bits of '" + path->GetPath() + '\''
4093 + s_OSReason(x_errno));
4094 }
4095 }
4096 }
4097
4098
s_BaseDir(const string & dirname)4099 static string s_BaseDir(const string& dirname)
4100 {
4101 string path = s_ToFilesystemPath(kEmptyStr, dirname);
4102 #ifdef NCBI_OS_MSWIN
4103 // Replace backslashes with forward slashes
4104 NStr::ReplaceInPlace(path, "\\", "/");
4105 #endif //NCBI_OS_MSWIN
4106 if (!NStr::EndsWith(path, '/'))
4107 path += '/';
4108 return path;
4109 }
4110
4111
x_Append(const string & name,const TEntries * toc)4112 unique_ptr<CTar::TEntries> CTar::x_Append(const string& name,
4113 const TEntries* toc)
4114 {
4115 unique_ptr<TEntries> entries(new TEntries);
4116 unique_ptr<CDir::TEntries> dir;
4117
4118 const EFollowLinks follow_links = (m_Flags & fFollowLinks ?
4119 eFollowLinks : eIgnoreLinks);
4120 unsigned int uid = 0, gid = 0;
4121 bool update = true;
4122
4123 // Create the entry info
4124 m_Current = CTarEntryInfo(m_StreamPos);
4125
4126 // Compose entry name for relative names
4127 string path = s_ToFilesystemPath(m_BaseDir, name);
4128
4129 // Get direntry information
4130 CDirEntry entry(path);
4131 CDirEntry::SStat st;
4132 if (!entry.Stat(&st, follow_links)) {
4133 int x_errno = errno;
4134 TAR_THROW(this, eOpen,
4135 "Cannot get status of '" + path + '\''+ s_OSReason(x_errno));
4136 }
4137 CDirEntry::EType type = CDirEntry::GetType(st.orig);
4138
4139 string temp = s_ToArchiveName(m_BaseDir, path);
4140
4141 if (temp.empty()) {
4142 TAR_THROW(this, eBadName,
4143 "Empty entry name not allowed");
4144 }
4145
4146 list<CTempString> elems;
4147 NStr::Split(temp, "/", elems,
4148 NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
4149 if (find(elems.begin(), elems.end(), "..") != elems.end()) {
4150 TAR_THROW(this, eBadName,
4151 "Name '" + temp + "' embeds parent directory (\"..\")");
4152 }
4153 if (m_Mask[eExcludeMask].mask
4154 && s_MatchExcludeMask(temp, elems,
4155 m_Mask[eExcludeMask].mask,
4156 m_Mask[eExcludeMask].acase)) {
4157 goto out;
4158 }
4159 elems.clear();
4160 if (type == CDirEntry::eDir && temp != "/") {
4161 temp += '/';
4162 }
4163
4164 m_Current.m_Name.swap(temp);
4165 m_Current.m_Type = CTarEntryInfo::EType(type);
4166 if (m_Current.GetType() == CTarEntryInfo::eSymLink) {
4167 _ASSERT(!follow_links);
4168 m_Current.m_LinkName = entry.LookupLink();
4169 if (m_Current.GetLinkName().empty()) {
4170 TAR_THROW(this, eBadName,
4171 "Empty link name not allowed");
4172 }
4173 }
4174
4175 entry.GetOwner(&m_Current.m_UserName, &m_Current.m_GroupName,
4176 follow_links, &uid, &gid);
4177 #ifdef NCBI_OS_UNIX
4178 if (NStr::UIntToString(uid) == m_Current.GetUserName()) {
4179 m_Current.m_UserName.erase();
4180 }
4181 if (NStr::UIntToString(gid) == m_Current.GetGroupName()) {
4182 m_Current.m_GroupName.erase();
4183 }
4184 #endif //NCBI_OS_UNIX
4185 #ifdef NCBI_OS_MSWIN
4186 // These are fake but we don't want to leave plain 0 (Unix root) in there
4187 st.orig.st_uid = (uid_t) uid;
4188 st.orig.st_gid = (gid_t) gid;
4189 #endif //NCBI_OS_MSWIN
4190
4191 m_Current.m_Stat = st;
4192 // Fixup for mode bits
4193 m_Current.m_Stat.orig.st_mode = (mode_t) s_ModeToTar(st.orig.st_mode);
4194
4195 // Check if we need to update this entry in the archive
4196 if (toc) {
4197 bool found = false;
4198
4199 if (type != CDirEntry::eUnknown) {
4200 // Start searching from the end of the list, to find
4201 // the most recent entry (if any) first
4202 _ASSERT(temp.empty());
4203 REVERSE_ITERATE(TEntries, e, *toc) {
4204 if (!temp.empty()) {
4205 if (e->GetType() == CTarEntryInfo::eHardLink ||
4206 temp != s_ToFilesystemPath(m_BaseDir, e->GetName())) {
4207 continue;
4208 }
4209 } else if (path == s_ToFilesystemPath(m_BaseDir,e->GetName())){
4210 found = true;
4211 if (e->GetType() == CTarEntryInfo::eHardLink) {
4212 temp = s_ToFilesystemPath(m_BaseDir, e->GetLinkName());
4213 continue;
4214 }
4215 } else {
4216 continue;
4217 }
4218 if (m_Current.GetType() != e->GetType()) {
4219 if (m_Flags & fEqualTypes) {
4220 goto out;
4221 }
4222 } else if (m_Current.GetType() == CTarEntryInfo::eSymLink
4223 && m_Current.GetLinkName() == e->GetLinkName()) {
4224 goto out;
4225 }
4226 if (m_Current.GetModificationCTime()
4227 <= e->GetModificationCTime()) {
4228 update = false; // same(or older), no update
4229 }
4230 break;
4231 }
4232 }
4233
4234 if (!update || (!found && (m_Flags & (fUpdate & ~fOverwrite)))) {
4235 if (type != CDirEntry::eDir && type != CDirEntry::eUnknown) {
4236 goto out;
4237 }
4238 // Directories always get recursive treatment later
4239 update = false;
4240 }
4241 }
4242
4243 // Append the entry
4244 switch (type) {
4245 case CDirEntry::eFile:
4246 _ASSERT(update);
4247 if (x_AppendFile(path)) {
4248 entries->push_back(m_Current);
4249 }
4250 break;
4251
4252 case CDirEntry::eBlockSpecial:
4253 case CDirEntry::eCharSpecial:
4254 case CDirEntry::eSymLink:
4255 case CDirEntry::ePipe:
4256 _ASSERT(update);
4257 m_Current.m_Stat.orig.st_size = 0;
4258 x_WriteEntryInfo(path);
4259 entries->push_back(m_Current);
4260 break;
4261
4262 case CDirEntry::eDir:
4263 dir.reset(CDir(path).GetEntriesPtr(kEmptyStr, CDir::eIgnoreRecursive));
4264 if (!dir) {
4265 int x_errno = CNcbiError::GetLast().Code();
4266 string error =
4267 "Cannot list directory '" + path + '\'' + s_OSReason(x_errno);
4268 if (m_Flags & fIgnoreUnreadable) {
4269 TAR_POST(101, Error, error);
4270 break;
4271 }
4272 TAR_THROW(this, eRead, error);
4273 }
4274 if (update) {
4275 m_Current.m_Stat.orig.st_size = 0;
4276 x_WriteEntryInfo(path);
4277 entries->push_back(m_Current);
4278 }
4279 // Append/update all files from that directory
4280 ITERATE(CDir::TEntries, e, *dir) {
4281 unique_ptr<TEntries> add = x_Append((*e)->GetPath(), toc);
4282 entries->splice(entries->end(), *add);
4283 }
4284 break;
4285
4286 case CDirEntry::eDoor:
4287 case CDirEntry::eSocket:
4288 // Tar does not have any provisions to store this kind of entries
4289 if (!(m_Flags & fSkipUnsupported)) {
4290 TAR_POST(3, Warning,
4291 "Skipping non-archiveable "
4292 + string(type == CDirEntry::eSocket ? "socket" : "door")
4293 + " '" + path + '\'');
4294 }
4295 break;
4296
4297 case CDirEntry::eUnknown:
4298 if (!(m_Flags & fSkipUnsupported)) {
4299 TAR_THROW(this, eUnsupportedSource,
4300 "Unable to archive '" + path + '\'');
4301 }
4302 /*FALLTHRU*/
4303
4304 default:
4305 if (type != CDirEntry::eUnknown) {
4306 _TROUBLE;
4307 }
4308 TAR_POST(14, Error,
4309 "Skipping unsupported source '" + path
4310 + "' of type #" + NStr::IntToString(int(type)));
4311 break;
4312 }
4313
4314 out:
4315 return entries;
4316 }
4317
4318
x_Append(const CTarUserEntryInfo & entry,CNcbiIstream & is)4319 unique_ptr<CTar::TEntries> CTar::x_Append(const CTarUserEntryInfo& entry,
4320 CNcbiIstream& is)
4321 {
4322 unique_ptr<TEntries> entries(new TEntries);
4323
4324 // Create a temp entry info first
4325 m_Current = CTarEntryInfo(m_StreamPos);
4326
4327 string temp = s_ToArchiveName(kEmptyStr, entry.GetName());
4328
4329 while (NStr::EndsWith(temp, '/')) { // NB: directories are not allowed here
4330 temp.resize(temp.size() - 1);
4331 }
4332 if (temp.empty()) {
4333 TAR_THROW(this, eBadName,
4334 "Empty entry name not allowed");
4335 }
4336
4337 list<CTempString> elems;
4338 NStr::Split(temp, "/", elems,
4339 NStr::fSplit_MergeDelimiters | NStr::fSplit_Truncate);
4340 if (find(elems.begin(), elems.end(), "..") != elems.end()) {
4341 TAR_THROW(this, eBadName,
4342 "Name '" + temp + "' embeds parent directory (\"..\")");
4343 }
4344 elems.clear();
4345
4346 // Recreate entry info
4347 m_Current = entry;
4348 m_Current.m_Name.swap(temp);
4349 m_Current.m_Pos = m_StreamPos;
4350 m_Current.m_Type = CTarEntryInfo::eFile;
4351
4352 if (!is.good()) {
4353 TAR_THROW(this, eRead,
4354 "Bad input file stream");
4355 }
4356
4357 CTime::GetCurrentTimeT(&m_Current.m_Stat.orig.st_ctime,
4358 &m_Current.m_Stat.ctime_nsec);
4359 m_Current.m_Stat.orig.st_mtime
4360 = m_Current.m_Stat.orig.st_atime
4361 = m_Current.m_Stat.orig.st_ctime;
4362 m_Current.m_Stat.mtime_nsec
4363 = m_Current.m_Stat.atime_nsec
4364 = m_Current.m_Stat.ctime_nsec;
4365
4366 #ifdef NCBI_OS_UNIX
4367 // use regular file mode, adjusted with umask()
4368 mode_t mode = s_TarToMode(fTarURead | fTarUWrite |
4369 fTarGRead | fTarGWrite |
4370 fTarORead | fTarOWrite);
4371 mode_t u;
4372 # ifdef HAVE_GETUMASK
4373 // NB: thread-safe
4374 u = getumask();
4375 # else
4376 u = umask(022);
4377 umask(u);
4378 # endif //HAVE_GETUMASK
4379 mode &= ~u;
4380 m_Current.m_Stat.orig.st_mode = (mode_t) s_ModeToTar(mode);
4381
4382 m_Current.m_Stat.orig.st_uid = geteuid();
4383 m_Current.m_Stat.orig.st_gid = getegid();
4384
4385 CUnixFeature::GetUserNameByUID(m_Current.m_Stat.orig.st_uid)
4386 .swap(m_Current.m_UserName);
4387 CUnixFeature::GetGroupNameByGID(m_Current.m_Stat.orig.st_gid)
4388 .swap(m_Current.m_GroupName);
4389 #endif //NCBI_OS_UNIX
4390 #ifdef NCBI_OS_MSWIN
4391 // safe file mode
4392 m_Current.m_Stat.orig.st_mode = (fTarURead | fTarUWrite |
4393 fTarGRead | fTarORead);
4394
4395 unsigned int uid = 0, gid = 0;
4396 CWinSecurity::GetObjectOwner(CCurrentProcess::GetHandle(),
4397 SE_KERNEL_OBJECT,
4398 &m_Current.m_UserName,
4399 &m_Current.m_GroupName,
4400 &uid, &gid);
4401 // These are fake but we don't want to leave plain 0 (Unix root) in there
4402 m_Current.m_Stat.orig.st_uid = (uid_t) uid;
4403 m_Current.m_Stat.orig.st_gid = (gid_t) gid;
4404 #endif //NCBI_OS_MSWIN
4405
4406 x_AppendStream(entry.GetName(), is);
4407
4408 entries->push_back(m_Current);
4409 return entries;
4410 }
4411
4412
4413 // Regular entries only!
x_AppendStream(const string & name,CNcbiIstream & is)4414 void CTar::x_AppendStream(const string& name, CNcbiIstream& is)
4415 {
4416 _ASSERT(m_Current.GetType() == CTarEntryInfo::eFile);
4417
4418 // Write entry header
4419 x_WriteEntryInfo(name);
4420
4421 errno = 0;
4422 Uint8 size = m_Current.GetSize();
4423 while (size) {
4424 // Write file contents
4425 _ASSERT(m_BufferPos < m_BufferSize);
4426 size_t avail = m_BufferSize - m_BufferPos;
4427 if (avail > size) {
4428 avail = (size_t) size;
4429 }
4430 // Read file
4431 int x_errno = 0;
4432 streamsize xread;
4433 if (is.good()) {
4434 try {
4435 if (!is.read(m_Buffer + m_BufferPos, (streamsize) avail)) {
4436 x_errno = errno;
4437 xread = -1;
4438 } else {
4439 xread = is.gcount();
4440 }
4441 } catch (IOS_BASE::failure&) {
4442 xread = -1;
4443 }
4444 } else {
4445 xread = -1;
4446 }
4447 if (xread <= 0) {
4448 ifstream* ifs = dynamic_cast<ifstream*>(&is);
4449 TAR_THROW(this, eRead,
4450 "Cannot read "
4451 + string(ifs ? "file" : "stream")
4452 + " '" + name + '\'' + s_OSReason(x_errno));
4453 }
4454 // Write buffer to the archive
4455 avail = (size_t) xread;
4456 x_WriteArchive(avail);
4457 size -= avail;
4458 }
4459
4460 // Write zeros to get the written size a multiple of BLOCK_SIZE
4461 size_t zero = ALIGN_SIZE(m_BufferPos) - m_BufferPos;
4462 memset(m_Buffer + m_BufferPos, 0, zero);
4463 x_WriteArchive(zero);
4464 _ASSERT(!OFFSET_OF(m_BufferPos) && !OFFSET_OF(m_StreamPos));
4465 }
4466
4467
4468 // Regular files only!
x_AppendFile(const string & file)4469 bool CTar::x_AppendFile(const string& file)
4470 {
4471 _ASSERT(m_Current.GetType() == CTarEntryInfo::eFile);
4472
4473 // FIXME: Switch to CFileIO eventually to avoid ifstream's obscurity
4474 // w.r.t. errors, an extra layer of buffering etc.
4475 CNcbiIfstream ifs;
4476
4477 // Open file
4478 ifs.open(file.c_str(), IOS_BASE::binary | IOS_BASE::in);
4479 if (!ifs) {
4480 int x_errno = errno;
4481 string error
4482 = "Cannot open file '" + file + '\'' + s_OSReason(x_errno);
4483 if (m_Flags & fIgnoreUnreadable) {
4484 TAR_POST(102, Error, error);
4485 return false;
4486 }
4487 TAR_THROW(this, eOpen, error);
4488 }
4489
4490 x_AppendStream(file, ifs);
4491 return true;
4492 }
4493
4494
SetMask(CMask * mask,EOwnership own,EMaskType type,NStr::ECase acase)4495 void CTar::SetMask(CMask* mask, EOwnership own,
4496 EMaskType type, NStr::ECase acase)
4497 {
4498 int idx = int(type);
4499 if (idx < 0 || sizeof(m_Mask)/sizeof(m_Mask[0]) <= (size_t) idx){
4500 TAR_THROW(this, eMemory,
4501 "Mask type is out of range: " + NStr::IntToString(idx));
4502 }
4503 if (m_Mask[idx].owned) {
4504 delete m_Mask[idx].mask;
4505 }
4506 m_Mask[idx].mask = mask;
4507 m_Mask[idx].acase = acase;
4508 m_Mask[idx].owned = mask ? own : eNoOwnership;
4509 }
4510
4511
SetBaseDir(const string & dirname)4512 void CTar::SetBaseDir(const string& dirname)
4513 {
4514 string dir = s_BaseDir(dirname);
4515 m_BaseDir.swap(dir);
4516 }
4517
4518
EstimateArchiveSize(const TFiles & files,size_t blocking_factor,const string & base_dir)4519 Uint8 CTar::EstimateArchiveSize(const TFiles& files,
4520 size_t blocking_factor,
4521 const string& base_dir)
4522 {
4523 const size_t buffer_size = SIZE_OF(blocking_factor);
4524 string prefix = s_BaseDir(base_dir);
4525 Uint8 result = 0;
4526
4527 ITERATE(TFiles, f, files) {
4528 // Count in the file size
4529 result += BLOCK_SIZE/*header*/ + ALIGN_SIZE(f->second);
4530
4531 // Count in the long name (if any)
4532 string path = s_ToFilesystemPath(prefix, f->first);
4533 string name = s_ToArchiveName (prefix, path);
4534 size_t namelen = name.size() + 1;
4535 if (namelen > sizeof(((SHeader*) 0)->name)) {
4536 result += BLOCK_SIZE/*long name header*/ + ALIGN_SIZE(namelen);
4537 }
4538 }
4539 if (result) {
4540 result += BLOCK_SIZE << 1; // EOT
4541 Uint8 padding = result % buffer_size;
4542 if (padding) {
4543 result += buffer_size - padding;
4544 }
4545 }
4546
4547 return result;
4548 }
4549
4550
4551 class CTarReader : public IReader
4552 {
4553 public:
CTarReader(CTar * tar,EOwnership own=eNoOwnership)4554 CTarReader(CTar* tar, EOwnership own = eNoOwnership)
4555 : m_Read(0), m_Eof(false), m_Bad(false), m_Tar(tar, own)
4556 { }
4557
4558 virtual ERW_Result Read(void* buf, size_t count, size_t* bytes_read = 0);
4559 virtual ERW_Result PendingCount(size_t* count);
4560
4561 private:
4562 Uint8 m_Read;
4563 bool m_Eof;
4564 bool m_Bad;
4565 AutoPtr<CTar> m_Tar;
4566 };
4567
4568
Read(void * buf,size_t count,size_t * bytes_read)4569 ERW_Result CTarReader::Read(void* buf, size_t count, size_t* bytes_read)
4570 {
4571 if (m_Bad || !count) {
4572 if (bytes_read) {
4573 *bytes_read = 0;
4574 }
4575 return m_Bad ? eRW_Error
4576 : (m_Read < m_Tar->m_Current.GetSize() || !m_Eof) ? eRW_Success
4577 : eRW_Eof;
4578 }
4579
4580 size_t read;
4581 _ASSERT(m_Tar->m_Current.GetSize() >= m_Read);
4582 Uint8 left = m_Tar->m_Current.GetSize() - m_Read;
4583 if (!left) {
4584 m_Eof = true;
4585 read = 0;
4586 } else {
4587 if (count > left) {
4588 count = (size_t) left;
4589 }
4590
4591 size_t off = (size_t) OFFSET_OF(m_Read);
4592 if (off) {
4593 read = BLOCK_SIZE - off;
4594 if (m_Tar->m_BufferPos) {
4595 off += m_Tar->m_BufferPos - BLOCK_SIZE;
4596 } else {
4597 off += m_Tar->m_BufferSize - BLOCK_SIZE;
4598 }
4599 if (read > count) {
4600 read = count;
4601 }
4602 memcpy(buf, m_Tar->m_Buffer + off, read);
4603 m_Read += read;
4604 count -= read;
4605 if (!count) {
4606 goto out;
4607 }
4608 buf = (char*) buf + read;
4609 } else {
4610 read = 0;
4611 }
4612
4613 off = m_Tar->m_BufferPos; // NB: x_ReadArchive() changes m_BufferPos
4614 if (m_Tar->x_ReadArchive(count)) {
4615 _ASSERT(count);
4616 memcpy(buf, m_Tar->m_Buffer + off, count);
4617 m_Read += count;
4618 read += count;
4619 m_Tar->m_StreamPos += ALIGN_SIZE(count);
4620 _ASSERT(!OFFSET_OF(m_Tar->m_StreamPos));
4621 } else {
4622 m_Bad = true;
4623 _ASSERT(!m_Tar->m_Stream.good());
4624 // If we don't throw here, it may look like an ordinary EOF
4625 TAR_THROW(m_Tar, eRead,
4626 "Read error while streaming");
4627 }
4628 }
4629
4630 out:
4631 _ASSERT(!m_Bad);
4632 if (bytes_read) {
4633 *bytes_read = read;
4634 }
4635 return m_Eof ? eRW_Eof : eRW_Success;
4636 }
4637
4638
PendingCount(size_t * count)4639 ERW_Result CTarReader::PendingCount(size_t* count)
4640 {
4641 if (m_Bad) {
4642 return eRW_Error;
4643 }
4644 _ASSERT(m_Tar->m_Current.GetSize() >= m_Read);
4645 Uint8 left = m_Tar->m_Current.GetSize() - m_Read;
4646 if (!left && m_Eof) {
4647 return eRW_Eof;
4648 }
4649 size_t avail = BLOCK_SIZE - (size_t) OFFSET_OF(m_Read);
4650 _ASSERT(m_Tar->m_BufferPos < m_Tar->m_BufferSize);
4651 if (m_Tar->m_BufferPos) {
4652 avail += m_Tar->m_BufferSize - m_Tar->m_BufferPos;
4653 }
4654 if (!avail && m_Tar->m_Stream.good()) {
4655 // NB: good() subsumes there's streambuf (bad() otherwise)
4656 streamsize sb_avail = m_Tar->m_Stream.rdbuf()->in_avail();
4657 if (sb_avail != -1) {
4658 avail = (size_t) sb_avail;
4659 }
4660 }
4661 *count = avail > left ? (size_t) left : avail;
4662 return eRW_Success;
4663 }
4664
4665
Extract(CNcbiIstream & is,const string & name,CTar::TFlags flags)4666 IReader* CTar::Extract(CNcbiIstream& is,
4667 const string& name, CTar::TFlags flags)
4668 {
4669 unique_ptr<CTar> tar(new CTar(is, 1/*blocking factor*/));
4670 tar->SetFlags(flags & ~fStreamPipeThrough);
4671
4672 unique_ptr<CMaskFileName> mask(new CMaskFileName);
4673 mask->Add(name);
4674 tar->SetMask(mask.get(), eTakeOwnership);
4675 mask.release();
4676
4677 tar->x_Open(eInternal);
4678 unique_ptr<TEntries> temp = tar->x_ReadAndProcess(eInternal);
4679 _ASSERT(temp && temp->size() < 2);
4680 if (temp->size() < 1) {
4681 return 0;
4682 }
4683
4684 _ASSERT(tar->m_Current == temp->front());
4685 CTarEntryInfo::EType type = tar->m_Current.GetType();
4686 if (type != CTarEntryInfo::eFile
4687 && (type != CTarEntryInfo::eUnknown || (flags & fSkipUnsupported))){
4688 return 0;
4689 }
4690
4691 IReader* ir = new CTarReader(tar.get(), eTakeOwnership);
4692 tar.release();
4693 return ir;
4694 }
4695
4696
GetNextEntryData(void)4697 IReader* CTar::GetNextEntryData(void)
4698 {
4699 CTarEntryInfo::EType type = m_Current.GetType();
4700 return type != CTarEntryInfo::eFile
4701 && (type != CTarEntryInfo::eUnknown || (m_Flags & fSkipUnsupported))
4702 ? 0 : new CTarReader(this);
4703 }
4704
4705
4706 END_NCBI_SCOPE
4707