1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #define HANDLING_EXTENDED_HEADERS 0
27 
28 #include <kfs/extern.h>
29 #include <klib/defs.h>
30 #include <klib/rc.h>
31 #include <kfs/file.h>
32 #include <kfs/mmap.h>
33 #include <kfs/arc.h>
34 #include <kfs/toc.h>
35 #include <kfs/tar.h>
36 #include <klib/log.h>
37 #include <klib/debug.h>
38 
39 #include "toc-priv.h"
40 #include <os-native.h>
41 #include <sysalloc.h>
42 #include <strtol.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <stdio.h> /* temporary for development */
46 
47 #include <limits.h>
48 #include <sys/types.h>
49 
50 #ifdef _DEBUGGING
51 #define TAR_FUNC_ENTRY() DBGMSG (DBG_KFS, DBG_FLAG(DBG_KFS_TARENTRY), ("Enter: %s\n", __func__))
52 #define TAR_DEBUG(msg) DBGMSG (DBG_KFS, DBG_FLAG(DBG_KFS_TAR), msg)
53 #else
54 #define TAR_FUNC_ENTRY()
55 #define TAR_DEBUG(msg)
56 #endif
57 
58 /* -----
59  * offset of is the count of bytes between the base of a structure and
60  * a particular member of that structure
61  */
62 #ifndef OFFSET_OF
63 #define OFFSET_OF(structure,member) ((size_t)((&(((structure*)0)->member))-(0)))
64 #endif
65 
66 /* -----
67  * Hide any definition of sun that might have come from sun compilers
68  * or the like
69  */
70 #undef sun
71 #undef SUN
72 
73 
74 #define PASTE_2(a,b)   a##b
75 #define PASTE_3(a,b,c) a##b##c
76 #define STRINGIFY(a)   #a
77 
78 
79 /* ======================================================================
80  * return an ASCII string that describes a header type
81  *
82  * Needs to match tar_header_type_enum.
83  */
84 #if _DEBUGGING
get_bool_string(bool b)85 static const char * get_bool_string (bool b)
86 {
87     /* this curiosity comes from an embedded programmer who worried that
88        while false is generally considered 0b00000000, true is usually
89        considered 0b00000001 but sometimes 0b11111111, e.g MC68000. */
90     switch ( ( int ) b )
91     {
92     case true:
93     return "true";
94     case false:
95     return "false";
96     default:
97     return "not-false";
98     }
99 }
100 #endif
101 
102 
103 /* ======================================================================
104  * Header format structures
105  *
106  * Tar (tape archiver) started possibly as a unix utility in BSD rivalling
107  * the cpio from the AT&T System III.  The exact derivation is unimportant
108  * and some names might be slightly misleading based on historical
109  * inaccuracies but successful implmentation is not dependant on such
110  * accuracy in historical trivia but rather on accuracy in technical details.
111  *
112  * Posix attempted to standardize the growingly divergent variants of tar
113  * but it has led to only slightly more standardized variants with vaguely
114  * compatible extensions.
115  *
116  * For the purposes of this implmentation we will refer to the known to the
117  * author versions of tar and include the tar like aspects of the posix pax
118  * replacement for tar.
119  *
120  * Cpio support could be added if desired with only some difficulty.
121  *
122  * Supported known variants for this implmentation will be:
123  *  V7  - the oldest known common base defitions for a tar header
124  *         block ( possibly from Unix V7?)
125  *      POSIX   - Posix.1-1988 initial restandardization of a header block
126  *        This versio introduced the ustar name for a tar header
127  *        and includes that term as a "magic" constant.
128  *  PAX - Posix.1.2001 headers for pax a tar derivative that us a
129  *        peaceful attempt to unify the tar and cpio formats apparently.
130  *        It isn't different than POSIX in the ustar block but instead
131  *        is an introduction of two new values of a link field in the
132  *        tar header that defines what comes next (see headers defined
133  *        below).
134  *  SUN - an extension to the POSIX tar header format from SunOS 5
135  *  STAR 85 - pre-POSIX extensions to tar from Heorg Schilling (ask him its
136  *        the bestest most greatest tar until STAR94)
137  *  STAR 94 - A redo of star based on the POSIX ustar tar header.  A less
138  *        broken than most implmentation of a POSIX/ustar tar header
139  *        based tar. Schilling says its the only real implmentation of
140  *        a ustar based tar but it isn't fully compliant by design.
141  *  GNU 89  - a selected variant of tar from FSF/GNU that is a broken
142  *        implementation of a POSIX/ustar header based tar.
143  *  GNU 01  - A slight redo of the FSF/GNU tar format.  There are actually
144  *        evolving variants all of which are still somewhat broken
145  *        implementations of a ustar based tar header format.
146  *
147  * Along with these variants of a tar/ustar semi-standard tar header there are
148  * other header blocks and other significant blocks that are also tracked in
149  * this implmentation of a tar reader.
150  *  ZERO BLOCK - a block of 512 zero bytes that is supposed to be padding at
151  *        the end of a tar file to meet some super blocksize.  Based on
152  *        where it fits in it would be found when looking for a header
153  *        for the next file included in a tar archive.
154  *  RAW - a convention to mean a header block of a type that has not been
155  *        determined.
156  */
157 #define TYPES() \
158     type_(UNDEFINED) type_(ZERO_BLOCK) type_(CPIO) type_(V7) type_(POSIX) \
159         type_(SUN) type_(STAR_85) type_(STAR_94)  type_(GNU_89) type_(SPARSE)
160 
161 
162 #define type_(e) PASTE_2(TAR_,e),
163 
164 typedef enum tar_header_type
165 {
166     TYPES()
167     TAR_TYPE_COUNT
168 } tar_header_type;
169 
170 #undef type_
171 #define type_(e) STRINGIFY(e),
get_type_string(tar_header_type t)172 static const char * get_type_string(tar_header_type t)
173 {
174     static const char * type_error = "Error";
175     static const char * type_strings[] =
176         {
177             TYPES()
178         };
179     if ((t < 0) || (t >= TAR_TYPE_COUNT))
180     return type_error;
181     return type_strings[t];
182 }
183 #undef type_
184 #undef TYPES
185 
186 
187 /* =============================================================================
188  * Tar headers are almost ASCII based but definitely byte/octet based so all
189  * elements are best defined as arrays of char and use casts to signed and unsigned
190  * where appropriate in interpretation..
191  *
192  * All Tar files or streams are divided into blocks of 512 bytes
193  * This is significant in the file data in that the last block
194  * of a file is supposed to be padded with NUL to fill out a block
195  * and then be followed by two blocks of all NUL bytes.
196  * headers are also 512 bytes with various but fairly consistent
197  * interpretations of what is where with in that block
198  *
199  * Most tar utilities further define super blocks consisting of a number of blocks
200  * typically 10 of them for a length of 5120 bytes.  This is irrelevant for this
201  * implmentation.  By definition a tar file ends with two "zero blocks" and enough
202  * more after that to fill one of these super blocks.  We ignore all aspects of
203  * this.
204  */
205 #define TAR_BLOCK_SIZE      (512)
206 typedef char tar_raw_block [TAR_BLOCK_SIZE];
207 #define BLOCKS_FOR_BYTES(byte_count)    ((byte_count+TAR_BLOCK_SIZE-1)/TAR_BLOCK_SIZE)
208 
209 /* --------------------------------------------------------------------------------
210  * lengths of various tar header fields
211  */
212 
213 /* --------------------
214  * Tar file names are always 100 bytes long and include preceding
215  * path names.  The utilities do not precluded paths that put the
216  * files outside of the "base" where the tar file was created.
217  *
218  * To handle tar files made by older tar utilities if the last
219  * character is '/' then the file should be assumed to be a directory.
220  *
221  * This 'type' is used for both the name of the object being archived
222  * and the link target if it is a hard or soft link.
223  */
224 #define TAR_NAME_LEN        (100)
225 typedef char    tar_file_name   [TAR_NAME_LEN];
226 
227 /* --------------------
228  * Tar mode strings are always 8 bytes long.
229  *
230  * 9 file access permissions bits and three execution mode bits.
231  *
232  * Zero '0' not NUL pre-fill unused bytes.  a user permision of 0644 would be stored as
233  * "0000644" with a NUL terminator.
234  *
235  * The format is 7 octal ASCII bytes with only the last 4 being
236  * significant.  That is the first three are always '0'.  The 8th
237  * byte is NUL.
238  *
239  * Older utilities might put preceding ' ' characters or
240  * instead of pre-fill have post fill ' ' or NUL.
241  */
242 #define TAR_MODE_LEN        (8)
243 typedef char    tar_file_mode   [TAR_MODE_LEN];
244 /* -----
245  * These are the bits if the mode is in binary (octal defines for ease of interpretation)
246  * they match the st_mode field from the stat()/fstat() struct stat.
247  *
248  * GNU tar puts the file type bits from the stat structure in the mode of the tar file.
249  * Other tar implmentations might as well though no where is this dktefined as required or
250  * recommended or even supported.
251  */
252 #define TAR_SUID_BIT    (04000) /* set UID on execution */
253 #define TAR_GUID_BIT    (02000) /* set GID on execution */
254 #define TAR_STICKY_BIT  (01000) /* save text / sticky bit */
255 /* file permissions */
256 #define TAR_MODE_OREAD  (00400) /* read by owner */
257 #define TAR_MODE_OWRITE (00200) /* write by owner */
258 #define TAR_MODE_OEXEC  (00100) /* execute by owner */
259 #define TAR_MODE_GREAD  (00040) /* read by group */
260 #define TAR_MODE_GWRITE (00020) /* write by group */
261 #define TAR_MODE_GEXEC  (00010) /* execute by group */
262 #define TAR_MODE_WREAD  (00004) /* read by other */
263 #define TAR_MODE_WWRITE (00002) /* write by other */
264 #define TAR_MODE_WEXEC  (00001) /* execute by other */
265 
266 /* -----
267  * These are the bits once converted into ASCII
268  * with in an ASCII byte these bits are actually usable so no conversion needed
269  * '0' = 0x30 / 060
270  * '1' = 0x31 / 061
271  * '2' = 0x32 / 062
272  * '3' = 0x33 / 063
273  * '4' = 0x34 / 064
274  * '5' = 0x35 / 065
275  * '6' = 0x36 / 066
276  * '7' = 0x37 / 067
277  */
278 #define TAR_MODE_READ       (0x01)
279 #define TAR_MODE_WRITE      (0x02)
280 #define TAR_MODE_EXEC       (0x04)
281 #define TAR_MODE_STICKY     (0x01)
282 #define TAR_MODE_GUID       (0x02)
283 #define TAR_MODE_SUID       (0x04)
284 #define TAR_MODE_OWNER_BYTE (6)
285 #define TAR_MODE_GROUP_BYTE (5)
286 #define TAR_MODE_WORLD_BYTE (4)
287 #define TAR_MODE_USER_BYTE  (3)
288 #define TAR_MODE_EXEC_BYTE  (2)
289 
290 /* --------------------
291  * Tar user (and group) numeric IDs are put into 8 bytes.
292  *
293  * There is a '0' prefill and as terminating NUL.
294  *
295  * Older utilities might put preceding ' ' characters or
296  * instead of pre-fill have post fill ' ' or NUL.
297  */
298 #define TAR_ID_LEN      (8)
299 typedef char    tar_id      [TAR_ID_LEN];
300 
301 /* --------------------
302  * Tar file size elements are 12 bytes long with 11 used
303  * for octal characters making the maximum size of a file
304  * for pure classic or Posix tar limited to 8 GBytes.
305  * Various tar utilities handle longer files in different
306  * ways if at all.
307  *
308  * A length of 100 bytes would be stored as "00000000144".
309  *
310  * Links and some other special values are archived with a
311  * length of zero and thus no data blocks.
312  *
313  * There is a '0' prefill and as terminating NUL.
314  *
315  * Older utilities might put preceding ' ' characters or
316  * instead of pre-fill have post fill ' ' or NUL.
317  *
318  * GNU tar implmentations use alternative intrpretatopms of
319  * this and possibly other fields using mime base 64 or
320  * base 256 (big endian nonstandard sized binary)
321  */
322 #define TAR_SIZE_LEN        (12)
323 typedef char    tar_size    [TAR_SIZE_LEN];
324 #define MAX_TAR_FILE_SIZE   (077777777777)
325 
326 /* --------------------
327  * Tar file modification/access/creation times are 12 bytes
328  * long.  This holds 11 octal ASCII digits representing the
329  * number of seconds since 01/01/1970 00:00 UTC.
330  *
331  * There is a '0' prefill and as terminating NUL.
332  *
333  * Older utilities might put preceding ' ' characters or
334  * instead of pre-fill have post fill ' ' or NUL.
335  */
336 #define TAR_TIME_LEN        (12)
337 typedef char    tar_time    [TAR_TIME_LEN];
338 
339 /* --------------------
340  * Tar has a weak checksum protection of part of the tar header
341  * that is 8 bytes long and again uses 7 ASCII octal digits.
342  *
343  * There is a '0' prefill and as terminating NUL.
344  *
345  * Older utilities might put preceding ' ' characters or
346  * instead of pre-fill have post fill ' ' or NUL.
347  */
348 #define TAR_CSUM_LEN        (8)
349 typedef char    tar_csum    [TAR_CSUM_LEN];
350 #define csum_blanks     ("       ")
351 
352 /* --------------------
353  * The tar link is a single byte that expresses the type of file
354  * or special value represented by this entry.
355  */
356 typedef char    tar_link;
357 
358 #define LINKS()                   \
359     link_('\0',OLDNORMAL_FILE)    \
360     link_('0',NORMAL_FILE)        \
361     link_('1',HARD_LINK)          \
362     link_('2',SYMBOLIC_LINK)      \
363     link_('3',CHARACTER_SPECIAL)  \
364     link_('4',BLOCK_SPECIAL)      \
365     link_('5',DIRECTORY)          \
366     link_('6',FIFO)               \
367     link_('7',CONTIGUOUS_FILE)    \
368     link_('A',SOLARIS_ACL)        \
369     link_('D',GNU_DUMPDIR)        \
370     link_('E',SOLARIS_ACL_FILE)   \
371     link_('I',INODE_METADATA)     \
372     link_('K',NEXT_LONG_LINK)     \
373     link_('L',NEXT_LONG_NAME)     \
374     link_('M',MULTI_VOLUME)       \
375     link_('N',GNU1989_LONG_NAMES) \
376     link_('S',SPARSE)             \
377     link_('V',VOLUME_NAME)        \
378     link_('X',SUN_XHDR)           \
379     link_('g',PAX_GLOBAL_XHDR)    \
380     link_('x',PAX_XHDR)
381 
382 #define link_(v,n)      PASTE_2(LINK_,n) = v,
383 
384 /* LINK_COUNT is a count not a mac value */
385 enum e_tar_link
386 {
387     LINKS()
388     LINK_COUNT
389 };
390 #undef link_
391 
392 #if _DEBUGGING
393 #define link_(v,n) {STRINGIFY(n), v},
394 struct nv_pair
395 {
396     const char * name;
397     tar_link  link;
398 };
get_link_string(tar_link l)399 static const char * get_link_string(tar_link l)
400 {
401     static const struct nv_pair pairs[] =
402         {
403             LINKS()
404             {NULL, 0}
405         };
406     unsigned int ix;
407 
408     for (ix = 0; pairs[ix].name != NULL; ++ix)
409         if (pairs[ix].link == l)
410             return pairs[ix].name;
411     return "UNDEFINED";
412 }
413 #undef link_
414 #endif
415 #undef LINKS
416 
417 
418 /* --------------------
419  * The tar magic string is 6 bytes long.
420  */
421 #define TAR_MAGIC_LEN       (6)
422 typedef char    tar_magic   [TAR_MAGIC_LEN];
423 #define POSIX_MAGIC_CONST   "ustar"     /* includes terminating NUL */
424 
425 /* --------------------
426  * The tar version string is two bytes long and uses both bytes
427  */
428 #define TAR_VERSION_LEN     (2)
429 typedef char    tar_version [TAR_VERSION_LEN];
430 #define POSIX_VERSION_CONST "00"        /* does not include terminating NUL */
431 
432 /* --------------------
433  * The Posix strong user/group name is 32 bytes long
434  */
435 #define TAR_STRNAME_LEN     (32)
436 typedef char    tar_strname [TAR_STRNAME_LEN];
437 
438 /* --------------------
439  * The dev? strings are 8 bytes long
440  */
441 #define TAR_DEV_LEN     (8)
442 typedef char    tar_dev     [TAR_DEV_LEN];
443 
444 /* --------------------
445  * Posix prefix is 155 bytes that can be put before the name to give a path of
446  * 255 bytes instad of the smaller limit of 99.
447  */
448 #define TAR_PREFIX_LEN      (155)
449 typedef char    tar_prefix  [TAR_PREFIX_LEN];
450 
451 /*
452  * Sun extensions
453  */
454 
455 /* --------------------
456  */
457 typedef char sun_extnum;
458 #define SUN_FULLSIZE_LEN    (10)
459 typedef char    sun_fullsize    [SUN_FULLSIZE_LEN];
460 
461 /* --------------------
462  * star extensions
463  *
464  * star85 is old star from 1985
465  */
466 typedef char    star85_version;
467 #define STAR85_FILETYPE_LEN (8)
468 typedef char    star85_filetype [STAR85_FILETYPE_LEN];
469 #define STAR85_TYPE_LEN     (12)
470 typedef char    star85_type [STAR85_TYPE_LEN];
471 #define STAR85_RDEV_LEN     (12)
472 typedef char    star85_rdev [STAR85_RDEV_LEN];
473 /* ignoring the 11 byte rdev with minor bits */
474 #define STAR85_UNAME_LEN    (16)
475 typedef char    star85_uname    [STAR85_UNAME_LEN];
476 #define STAR85_GNAME_LEN    (15)
477 typedef char    star85_gname    [STAR85_GNAME_LEN];
478 #define STAR_XMAGIC_LEN     (4)
479 typedef char    star_magic  [STAR_XMAGIC_LEN];
480 #define STAR_MAGIC_CONST    ("tar")
481 #define NSTAR_PREFIX_LEN    (1)
482 typedef char    nstar_prefix    [NSTAR_PREFIX_LEN];
483 
484 /* --------------------
485  * gnu extensions
486  */
487 #define GNU89_MAGIC_LEN     (8)
488 typedef char    gnu89_magic [GNU89_MAGIC_LEN];
489 #define GNU_89_MAGIC_CONST  "ustar  "       /* includes terminating NUL */
490 #define GNU_89_GNUMAGIC_CONST   "GNUtar "       /* includes terminating NUL */
491 #define GNU89_LONGNAMES_LEN (4)
492 typedef char    gnu89_longnames [GNU89_LONGNAMES_LEN];
493 
494 
495 /* --------------------
496  * shared between the feuding star and gnu tar
497  */
498 typedef char    tar_isextended ;
499 typedef struct  tar_sparse
500 {
501     tar_size    offset;
502     tar_size    num_bytes;
503 } tar_sparse;
504 #define GNU_SPARSES_IN_EXTRA_HEADER     (16)
505 #define GNU_SPARSES_IN_OLD_HEADER       (4)
506 #define GNU_SPARSES_IN_SPARSE_HEADER        (21)
507 #define STAR_SPARSES_IN_HEADER          (4)
508 #define STAR_SPARSES_IN_EXT_HEADER      (21)
509 
510 
511 /* ----------------------------------------
512  * The various tar header formats
513  *
514  * Note that in all the tar, posix and pax formats the first 257 bytes are
515  * the same.  In all posix ad almost compliant formats the first
516  * 345 bytes are the same (except GNU 89).  It is abuse of the 155
517  * bytes of the prefix that make star and gnu truly not posix compliant
518  * Sun used the 12 bytes after the prefix so is still psox compliant.
519  */
520 typedef struct  tar_v7_header
521 {
522     /* type     member name          and offset */
523     tar_file_name   name;               /*   0 */
524     tar_file_mode   mode;               /* 100 */
525     tar_id      uid;                /* 108 */
526     tar_id      gid;                /* 116 */
527     tar_size        size;               /* 124 */
528     tar_time        mtime;              /* 136 */
529     tar_csum        csum;               /* 148 */
530     tar_link        link;               /* 156 */
531     tar_file_name   linkname;           /* 157 */
532     /* end of header                    // 257 */
533 } tar_v7_header;
534 
535 typedef struct tar_posix_header
536 {
537     /* type     member name          and offset */
538     tar_file_name   name;               /*   0 */
539     tar_file_mode   mode;               /* 100 */
540     tar_id      uid;                /* 108 */
541     tar_id      gid;                /* 116 */
542     tar_size        size;               /* 124 */
543     tar_time        mtime;              /* 136 */
544     tar_csum        csum;               /* 148 */
545     tar_link        link;               /* 156 */
546     tar_file_name   linkname;           /* 157 */
547     tar_magic       magic;              /* 257 */
548     tar_version     version;            /* 263 */
549     tar_strname     uname;              /* 265 */
550     tar_strname     gname;              /* 297 */
551     tar_dev     devmajor;           /* 329 */
552     tar_dev     devminor;           /* 337 */
553     tar_prefix      prefix;             /* 345 */
554     /* end of header                    // 500 */
555 } tar_posix_header, tar_pax_header;
556 
557 typedef struct tar_sun_header
558 {
559     /* type     member name          and offset */
560     tar_file_name   name;               /*   0 */
561     tar_file_mode   mode;               /* 100 */
562     tar_id      uid;                /* 108 */
563     tar_id      gid;                /* 116 */
564     tar_size        size;               /* 124 */
565     tar_time        mtime;              /* 136 */
566     tar_csum        csum;               /* 148 */
567     tar_link        link;               /* 156 */
568     tar_file_name   linkname;           /* 157 */
569     tar_magic       magic;              /* 257 */
570     tar_version     version;            /* 263 */
571     tar_strname     uname;              /* 265 */
572     tar_strname     gname;              /* 297 */
573     tar_dev     devmajor;           /* 329 */
574     tar_dev     devminor;           /* 337 */
575     tar_prefix      prefix;             /* 345 */
576     sun_extnum      extnum;             /* 500 non-conformant */
577     sun_extnum      extcount;           /* 501 non-conformant */
578     sun_fullsize    fullsize;           /* 502 non-conformant */
579     /* end of header                    // 512 non-conformant */
580 } tar_sun_header;
581 
582 typedef struct tar_star_85_header
583 {
584     /* type     member name          and offset */
585     tar_file_name   name;               /*   0 */
586     tar_file_mode   mode;               /* 100 */
587     tar_id      uid;                /* 108 */
588     tar_id      gid;                /* 116 */
589     tar_size        size;               /* 124 */
590     tar_time        mtime;              /* 136 */
591     tar_csum        csum;               /* 148 */
592     tar_link        link;               /* 156 */
593     tar_file_name   linkname;           /* 157 */
594     star85_version  starversion;            /* 257 non-conformant */
595     star85_filetype starfiletype;           /* 258 internal type of file? non-conformant */
596     star85_type     startype;           /* 266 type of file (UNIX)? non-conformant */
597     star85_rdev     rdev;               /* 278 non-conformant */
598     tar_time        atime;              /* 290 non-conformant */
599     tar_time        ctime;              /* 302 non-conformant */
600     star85_uname    uname;              /* 314 non-conformant */
601     star85_gname    gname;              /* 330 non-conformant */
602     tar_prefix      prefix;             /* 345 non-conformant */
603     char        ___fill0[8];            /* 500 non-conformant */
604     star_magic      xmagic;             /* 508 non-conformant */
605     /* end of header                    // 512 non-conformant */
606 } tar_star_85_header;
607 typedef struct tar_star_94_header
608 {
609     /* type     member name          and offset */
610     tar_file_name   name;               /*   0 */
611     tar_file_mode   mode;               /* 100 */
612     tar_id      uid;                /* 108 */
613     tar_id      gid;                /* 116 */
614     tar_size        size;               /* 124 */
615     tar_time        mtime;              /* 136 */
616     tar_csum        csum;               /* 148 */
617     tar_link        link;               /* 156 */
618     tar_file_name   linkname;           /* 157 */
619     tar_magic       magic;              /* 257 */
620     tar_version     version;            /* 263 */
621     tar_strname     uname;              /* 265 */
622     tar_strname     gname;              /* 297 */
623     tar_dev     devmajor;           /* 329 */
624     tar_dev     devminor;           /* 337 */
625     nstar_prefix    prefix;             /* 345 */
626     char        ___fill0;           /* 346 */
627     char        ___fill1[8];            /* 347 */
628     tar_isextended  isextended;         /* 355 non-conformant */
629     tar_sparse      sparse[STAR_SPARSES_IN_HEADER]; /* 356 non-conformant */
630     tar_size        realsize;           /* 452 non-conformant */
631     tar_size        offset;             /* 464 non-conformant */
632     tar_time        atime;              /* 476 non-conformant */
633     tar_time        ctime;              /* 488 non-conformant */
634     char        ___fill2[8];            /* 500 */
635     star_magic      xmagic;             /* 508 non-conformant */
636     /* end of header                    // 512 */
637 } tar_star_94_header;
638 
639 typedef struct tar_gnu_89_header
640 {
641     /* type     member name          and offset */
642     tar_file_name   name;               /*   0 */
643     tar_file_mode   mode;               /* 100 */
644     tar_id      uid;                /* 108 */
645     tar_id      gid;                /* 116 */
646     tar_size        size;               /* 124 */
647     tar_time        mtime;              /* 136 */
648     tar_csum        csum;               /* 148 */
649     tar_link        link;               /* 156 */
650     tar_file_name   linkname;           /* 157 */
651     gnu89_magic     magic;              /* 257 non-conformant */
652     tar_strname     uname;              /* 265 */
653     tar_strname     gname;              /* 297 */
654     tar_dev     devmajor;           /* 329 */
655     tar_dev     devminor;           /* 337 */
656     tar_time        atime;              /* 345 non-conformant */
657     tar_time        ctime;              /* 357 non-conformant */
658     tar_size        offset;             /* 369 non-conformant */
659     gnu89_longnames longnames;          /* 381 non-conformant */
660     char        ___fill0[1];            /* 385 */
661     tar_sparse      sparse  [GNU_SPARSES_IN_OLD_HEADER];/* 386 optional sparse */
662     tar_isextended  isextended;         /* 482 non-conformant */
663     tar_size        realsize;           /* 483 non-conformant */
664     /* end of header                    // 495 */
665 } tar_gnu_89_header;
666 
667 typedef struct tar_gnu_99_header
668 {
669     /* type     member name          and offset */
670     tar_file_name   name;               /*   0 */
671     tar_file_mode   mode;               /* 100 */
672     tar_id      uid;                /* 108 */
673     tar_id      gid;                /* 116 */
674     tar_size        size;               /* 124 */
675     tar_time        mtime;              /* 136 */
676     tar_csum        csum;               /* 148 */
677     tar_link        link;               /* 156 */
678     tar_file_name   linkname;           /* 157 */
679     gnu89_magic     magic;              /* 257 non-conformant */
680     tar_strname     uname;              /* 265 */
681     tar_strname     gname;              /* 297 */
682     tar_dev     devmajor;           /* 329 */
683     tar_dev     devminor;           /* 337 */
684     tar_time        atime;              /* 345 non-conformant */
685     tar_time        ctime;              /* 357 non-conformant */
686     tar_size        offset;             /* 369 non-conformant */
687     gnu89_longnames longnames;          /* 381 non-conformant */
688     char        ___fill0[1];            /* 385 */
689     tar_sparse      sparse  [GNU_SPARSES_IN_OLD_HEADER];/* 386 non-conformant optional sparse */
690     tar_isextended  isextended;         /* 482 non-conformant */
691     tar_size        realsize;           /* 483 non-conformant */
692     /* end of header                    // 495 */
693 } tar_gnu_99_header;
694 
695 typedef struct tar_sparse_header
696 {
697     /* type     member name          and offset */
698     tar_sparse      sparse  [STAR_SPARSES_IN_EXT_HEADER];/* 0 */
699     tar_isextended  isextended;         /* 504 */
700 } tar_sparse_header;                    /* 505 end of header */
701 
702 typedef struct tar_cpio_header
703 {
704     /* type     member name          and offset */
705     char        magic       [6];        /*   0  must be "070707" */
706 #define TAR_CPIO_MAGIC_CONST ("070707")
707     char        dev     [6];        /*   6  (dev,ino) is unique for each file in archive */
708     char        ino     [6];        /*  12  see dev */
709     char        mode        [6];        /*  18 */
710     char        uid     [6];        /*  24 */
711     char        gif     [6];        /*  30 */
712     char        nlink       [6];        /*  36 */
713     char        rdev        [6];        /*  42 */
714     char        mtime       [11];       /*  48 */
715     char        namesize    [6];        /*  59 */
716     char        filesize    [6];        /*  65 */
717     char        buff        [1];        /*  71  name and file data */
718 } tar_cpio_header;
719 
720 typedef union tar_header
721 {
722     /* ----------
723      * The TAR header is to be zero filled by definition but some tar
724      * programs use ' ' instead.
725      *
726      * accept zeros ('0'), spaces (' ') or NULs (0x00) as equivalent where reasonable
727      *
728      * This name refers to the entire header as a single undifferentiated
729      * sequence of bytes.
730      */
731     tar_raw_block   raw;        /* tar block as an array of bytes: used for 0 blocks here */
732     tar_cpio_header cpio;       /* cpio not really tar */
733     tar_v7_header   tar;        /* classic header back to the beginnings of memory */
734     tar_posix_header    posix;      /* ustar or Posix 1003.1 header */
735     tar_sun_header  suntar;     /* Sun Microsystems tar header */
736     tar_star_85_header  star_85;    /* star header from 1985 (pre-Posix) */
737     tar_star_94_header  star_94;    /* star header from 1994 (post-Posix) */
738     tar_gnu_89_header   gnu_89;     /* gnu header from circa 1989 (post-posix but broken) */
739     tar_gnu_99_header   gnu_99;         /* gnu header from ???? */
740     tar_sparse_header   sparse;     /* star / gnu extended sparses header */
741 } tar_header;
742 
743 
744 /* ======================================================================
745  * local module-wide function like macros
746  */
747 /* -----
748  * offset of is the count of bytes between the base of a structure and
749  * a particular member of that structure
750  */
751 #define OFFSET_OF(structure,member) ((size_t)((&(((structure*)0)->member))-(0)))
752 
753 
754 /* ======================================================================
755  * compilation unit local functions
756  */
757 
758 /* ======================================================================
759  * return the RFC 2045 base 64 value for a byte character
760  * -1 for any out of range
761  *
762  * base 64 uses A-Z as 0-25, a-z as 26-51, 0-9 as 52-61, + as 62 and / as 63
763  */
decode_base64char(uint8_t byte)764 static int64_t decode_base64char (uint8_t byte)
765 {
766     /*
767      * trade off of space for table versus time to upsize the return
768      * and reupsize where it is used
769      */
770     static const int8_t table[] =
771     {
772         /*
773          * 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
774          */
775         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 00 */
776         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 10 */
777         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* 20 */
778         52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 30 */
779         -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* 40 */
780         15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* 50 */
781         -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* 60 */
782         41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* 70 */
783         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 80 */
784         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 90 */
785         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* A0 */
786         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* B0 */
787         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* C0 */
788         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* D0 */
789         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* E0 */
790         -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1  /* F0 */
791     };
792     return table[byte];
793 }
794 
795 
796 /* ======================================================================
797  * return the RFC 2045 base 64 value of a string at <str> of length <len>
798  *
799  * Interpreted the specification to mean that you simply ignore any out
800  * of range characters.  They are not treated as bytes of 0 as that would
801  * mean a shift of earlier bytes.
802  *
803  * Each character is 6 bits of the final value.
804  */
decode_base64string(const uint8_t * str,size_t len)805 static int64_t decode_base64string (const uint8_t* str, size_t len)
806 {
807     int64_t     result = 0;
808     int64_t     temp = 0;
809     size_t  ix;
810 
811     for (ix = 0; ix < len; ++ix)
812     {
813     if ((temp = decode_base64char(str[ix])) >= 0)
814     {
815         result <<= 6; /* result *= 64 */
816         result += temp;
817     }
818     }
819     return result;
820 }
821 
822 
823 /* ======================================================================
824  * tar_strtoll
825  *
826  * This function will convert a string in a tar header into a 64 signed
827  * integer.
828  *
829  * The original tar header used just octal numbers in ascii in fixed length
830  * fields.  As normal in the computer software world these "obviously plenty
831  * big" fields became way too small.
832  *
833  * GNU tar invented two approaches to make these numeric fields "bigger" but
834  * only prolonged the pain by squeezing the new numbers into the same fields.
835  *
836  * In the tar v7 and ustar based tar headers the fields are:
837  *  Name    Size    Octal Range Interpretation
838  *  mode    8   0-2097151   bit flags
839  *  uid 8   0-2097151
840  *  gid 8   0-2097151
841  *  size    12  0-8589934591    up to 8 Giga-Byte files
842  *  mtime   12  0-8589934591    1970/01/01 00:00:00 GMT-2242/03/16 12:56:31 GMT
843  *
844  *  atime   12  0-8589934591    1970/01/01 00:00:00 GMT-2242/03/16 12:56:31 GMT
845  *  ctime   12  0-8589934591    1970/01/01 00:00:00 GMT-2242/03/16 12:56:31 GMT
846  *
847  * GNU's first and already obsolete approach was to put Mime base-64 numbers
848  * With these the first byte is '+' or '-' to signal it isn't octal ASCII.
849  *
850  * GNU's second approach is base-256 which is a big endian binary string of lengths other
851  * than the 1, 2, 4 or 8 bytes of the standard integer types.  In this approach the first
852  * byte is either 0x80 for a positive number or 0xFF for a negative number.
853  */
854 
tar_strtoll(const uint8_t * str,size_t len,bool silent)855 static int64_t tar_strtoll ( const uint8_t * str, size_t len, bool silent )
856 {
857     int64_t result = 0;
858     bool negative = false;
859     uint8_t temp_buff[24];  /* long enough to hold all octal bytes for 64 bit numbers */
860 
861     /* -----
862      * force a NUL in case the source doesn't have one; this is for
863      * strtoll() on ASCII Octal
864      */
865     if ( len >= sizeof temp_buff )
866         len = sizeof temp_buff - 1;
867     memmove (temp_buff, str, len);
868     temp_buff[ len ] = 0x00;
869 
870     /* -----
871      * Most are going to be simple ASCII octal using '0'-'7' with NUL terminator
872      * leading 0 is not required but of course is accepted to match tar
873      * header specifications
874      */
875     if (((temp_buff[0] >= '0')&&(temp_buff[0] <= '7'))||(temp_buff[0] == ' '))
876     {
877         /* use stdlib strtoll - longest expected goes a few bits into the upper longword */
878         result = strtoi64((char*)temp_buff,NULL,8);
879     }
880     /* -----
881      * "base-256" well that is "binary" big endian of some length
882      *
883      * For fields longer than eight bytes upper bytes will shift out of
884      * significance into the bit bucket.
885      */
886     else if ((temp_buff[0] == 0x80)||(temp_buff[0] == 0xFF)) /* from GNU tar */
887     {
888         unsigned int ix;
889         negative = (bool)((temp_buff[0] == 0xFF) ? true : false);
890         result = temp_buff[0] & 0x7F; /* toss first flag bit */
891         for (ix = 1; ix < len; ++ix)
892         {
893             result <<= 8; /* result *= 256; */
894             result += temp_buff[ix];
895         }
896         if (negative)
897             result = -result;
898     }
899     /* -----
900      * "base-64" an already dumped idea from GNU tar
901      */
902     else if ((temp_buff[0] == '+')||(temp_buff[0] == '-')) /* from GNU tar */
903     {
904         int64_t temp;
905         negative = (bool)((temp_buff[0] == '-') ? true : false);
906         temp = decode_base64string(temp_buff+1,len-1);
907         /* potential overflow */
908         result = negative ? -temp : temp;
909     }
910 
911     /* -----
912      * look for an empty field of all NUL
913      */
914     else if (temp_buff[0] == 0x00)
915     {
916         unsigned int ix;
917         for (ix = 1; ix < len; ++ix)
918         {
919             if (temp_buff[ix] != 0x00)
920                 goto fail;
921         }
922     }
923     /* -----
924      * no idea what it is then
925      */
926     else
927     {
928     fail:
929         result = 0; /* as good a guess as any */
930         TAR_DEBUG (("%s: unknown integer storage type %c%c%c%c%c%c%c%c\n",
931                     temp_buff[0],temp_buff[1],temp_buff[2],temp_buff[3],
932                     temp_buff[4],temp_buff[5],temp_buff[6],temp_buff[7]));
933         if ( ! silent ) {
934             PLOGMSG (klogErr, (klogErr, "unknown integer storage type "
935                                      "$(B0)$(B1)$(B2)$(B3)$(B4)$(B5)$(B6)$(B7)",
936                            "B0=%c,B1=%c,B2=%c,B3=%c,B4=%c,B5=%c,B6=%c,B7=%c",
937                            temp_buff[0],temp_buff[1],temp_buff[2],temp_buff[3],
938                            temp_buff[4],temp_buff[5],temp_buff[6],temp_buff[7]));
939         }
940     }
941     return result;
942 }
943 
944 /* ======================================================================
945  * tar_header_type
946  * determine most probable tar header block type
947  */
what_header_type(const tar_header * header)948 static tar_header_type  what_header_type(const tar_header* header)
949 {
950     /* -----
951      * we'll assume its bad until we find a better guess
952      */
953     tar_header_type type = TAR_UNDEFINED;
954 
955     /* -----
956      * look for a cpio header though we aren't expecting to support it quite yet
957      */
958     if (strncmp(header->cpio.magic, TAR_CPIO_MAGIC_CONST, sizeof(header->cpio.magic)) == 0)
959     {
960     type = TAR_CPIO;
961     }
962     /* -----
963      * look for extended headers of some type
964      *
965      * look for posix based extensions as hopefully most likely
966      */
967     else if (strcmp(header->posix.magic, POSIX_MAGIC_CONST) == 0)
968     {
969     /* -----
970      * we have a post posix standard tar header but we aren't done yet
971      *
972      * First we look for star extensions to the header
973      */
974     if (strcmp(header->star_94.xmagic, STAR_MAGIC_CONST) == 0)
975     {
976 
977         /* -----
978          * we have either an old or new star archive
979          */
980         if (strcmp(header->star_94.magic, POSIX_MAGIC_CONST) == 0)
981         {
982         /* -----
983          * we have a new star type archive
984          */
985         type = TAR_STAR_94;
986         }
987         else
988         {
989         /* -----
990          * we have a old star type archive
991          */
992         type = TAR_STAR_85;
993         }
994     }
995     /* -----
996      * Next we look for SunOS5 extensions to the header
997      */
998     else if (header->suntar.extnum &&
999          header->suntar.extcount &&
1000          header->suntar.fullsize[0])
1001     {
1002         type = TAR_SUN;
1003     }
1004     /* -----
1005      * Else it seems to be the generic Posix tar header
1006      */
1007     else
1008     {
1009         type = TAR_POSIX;
1010     }
1011     }
1012     /* -----
1013      * Not a proper magic for a POSIX ustar header so look for the improper magic of GNU's tar
1014      */
1015     else if (strcmp(header->star_94.magic, GNU_89_MAGIC_CONST) == 0)
1016     {
1017     /* we have an old GNU not really posix compliant archive type */
1018     type = TAR_GNU_89;
1019     }
1020     /* -----
1021      * If there is anything else there we don't know what to do with it
1022      */
1023     else if (header->posix.magic[0] != 0)
1024     {
1025     /* -----
1026      * Log an anomaly showing what we found in the magic field
1027      */
1028     char temp_str   [9];
1029     memset(temp_str,0,sizeof(temp_str));
1030     string_copy(temp_str, sizeof(temp_str), header->posix.magic, 8);
1031         TAR_DEBUG(("%s: unknown header type magic [%s]\n",
1032                    __func__, temp_str));
1033     type = TAR_UNDEFINED;
1034     }
1035     /* -----
1036      * not knowing what we have lets see if it is a block of all zeroes
1037      * knowing we can ignore it
1038      */
1039     else
1040     {
1041     /* -----
1042      * If this were a legitimate V7 (well just old style)
1043      * tar header the first character would be non-NUL
1044      * and the link would be on eof the old types
1045      */
1046     if ((header->raw[0]>= ' ')&&(header->raw[0] <= '~'))
1047     {
1048         switch (header->tar.link)
1049         {
1050         case LINK_OLDNORMAL_FILE:
1051         case LINK_NORMAL_FILE:
1052         case LINK_HARD_LINK:
1053         case LINK_SYMBOLIC_LINK:
1054         case LINK_CHARACTER_SPECIAL:
1055         case LINK_BLOCK_SPECIAL:
1056         case LINK_DIRECTORY:
1057         case LINK_FIFO:
1058         case LINK_CONTIGUOUS_FILE:
1059         /* GNU TAR will do this to us */
1060         case LINK_NEXT_LONG_LINK:
1061         case LINK_NEXT_LONG_NAME:
1062         case LINK_GNU1989_LONG_NAMES:
1063         type = TAR_V7;
1064         break;
1065         default:
1066         break;
1067         }
1068     }
1069     else
1070     {
1071         /* -----
1072          * look for anything not zero
1073          */
1074         unsigned int ix;
1075 
1076         for (ix = 0; ix < sizeof(tar_header); ++ix)
1077         {
1078         if (header->raw[ix])
1079         {
1080             /* -----
1081              * non-zero so quit looking
1082              */
1083             break;
1084         }
1085         }
1086         /* -----
1087          * if we got to the end we know they are all zero so say so
1088          */
1089         if (ix == sizeof(tar_header))
1090         {
1091         type = TAR_ZERO_BLOCK;
1092         }
1093     }
1094     }
1095     TAR_DEBUG (("%s: %s(%d)\n", __func__, get_type_string(type), type));
1096     return type;
1097 }
1098 
1099 
1100 typedef struct sparse_data
1101 {
1102     struct sparse_data* next;
1103     uint64_t        offset;
1104     uint64_t        size;
1105 } sparse_data;
1106 /* ======================================================================
1107  */
1108 
sparse_data_make(sparse_data ** new_item,uint64_t offset,uint64_t size)1109 static rc_t sparse_data_make (sparse_data ** new_item, uint64_t offset, uint64_t size)
1110 {
1111     sparse_data * p;
1112 
1113     p = malloc (sizeof (sparse_data));
1114     if (p != NULL)
1115     {
1116         p->offset = offset;
1117         p->size = size;
1118         p->next = NULL;
1119         *new_item = p;
1120         return 0;
1121     }
1122     return -1;
1123 }
1124 
sparse_data_push(sparse_data ** q,uint64_t offset,uint64_t size)1125 static rc_t sparse_data_push(sparse_data ** q, uint64_t offset, uint64_t size)
1126 {
1127     if (q == NULL)
1128         return -1;
1129     if (*q == NULL)
1130         return sparse_data_make (q, offset, size);
1131     return sparse_data_push (&((*q)->next), offset, size);
1132 }
1133 
sparse_data_pop(sparse_data ** q,sparse_data ** item)1134 static rc_t sparse_data_pop (sparse_data ** q, sparse_data **item)
1135 {
1136     if ((q == NULL) || (item == NULL))
1137         return -1;
1138 
1139     *item = *q;
1140     *q = (*item)->next;
1141     (*item)->next = NULL;
1142     return 0;
1143 }
1144 
sparse_data_kill(sparse_data ** q)1145 static rc_t sparse_data_kill (sparse_data ** q)
1146 {
1147     if (q == NULL)
1148         return -1;
1149     if (*q == NULL)
1150         return 0;
1151     if ((*q)->next != NULL)
1152         return sparse_data_kill(&(*q)->next);
1153 
1154     free (*q);
1155     *q = NULL;
1156     return 0;
1157 }
1158 
1159 /* ======================================================================
1160  * local module-wide variables (reduction in parameter pushing)
1161  *
1162  * Module shared variables; kinda like C++ class elements
1163  */
1164 typedef struct KTarState
1165 {
1166     /* TODO: optimize chunk/sparse lists by counting as pushed */
1167     const KFile *   kfile;      /* KFS reference to the specific file being parsed */
1168     const KMMap *   kmmap;      /* KFS memory mapping for a portion of that file */
1169     const void *    map;        /* where the tar file got put by mmap */
1170     sparse_data *   sparse_q;
1171     KTocChunk *     chunks;     /* table of chunks: logical_position, source_position, size */
1172     KToc *      toc;        /* the Table of Contents we are building */
1173     size_t      tar_length; /* how long is the tar file */
1174     size_t      buffer_length;  /* how long is the window into the buffer */
1175     uint64_t        buffer_start;   /* how far into the tar file is the buffer start */
1176     uint64_t        buffer_limit;   /* how far into the tar file is the buffer end */
1177     uint32_t        num_chunks;
1178     /* -----
1179      * zero blocks are only supposed to pad out the end of a tar file to
1180      * match a larger block size.  If we find something after a zero block
1181      * it is a file error of some type
1182      */
1183     bool        found_zero_block;
1184     bool        found_second_zero_block;
1185 } KTarState;
1186 
make_chunk_list(KTarState * self,uint64_t file_offset)1187 static rc_t make_chunk_list(KTarState * self, uint64_t file_offset)
1188 {
1189     uint64_t        count = self->num_chunks;
1190     uint64_t        source_position = file_offset;
1191     uint64_t        ix;
1192     sparse_data *   psd;
1193     rc_t        ret;
1194 
1195     if (self->chunks)
1196     free (self->chunks); /* shouldn't happen */
1197     self->chunks = malloc( (size_t)( count * sizeof(KTocChunk) ) );
1198     if (self->chunks == NULL)
1199     return -1;
1200 
1201 
1202     for (ix = 0; ix < count; ++ix)
1203     {
1204     ret = sparse_data_pop (&self->sparse_q, &psd);
1205     if (ret != 0)
1206         return ret;
1207     self->chunks[ix].logical_position = psd->offset;
1208     self->chunks[ix].source_position = source_position;
1209     self->chunks[ix].size = psd->size;
1210     source_position += psd->size;
1211     sparse_data_kill(&psd);
1212     }
1213     return 0;
1214 }
1215 
whack_chunk_list(KTarState * self)1216 static void whack_chunk_list(KTarState * self)
1217 {
1218     free (self->chunks);
1219     self->chunks = NULL;
1220     self->num_chunks = 0;
1221 }
1222 
1223 #if HANDLING_EXTENDED_HEADERS
1224 /* ======================================================================
1225  * mini class for handling pax/posix/ustar
1226  * extended headers and global extended headers
1227  *
1228  * Many of the values are included to get past range limits imposed by the ustar format
1229  * particular for string length and charcter set or shortish integral values.  <ekyword>
1230  * and <value> below are UTF-8.
1231  *
1232  * values in the header are string values written as with a 'printf' using the form
1233  *  printf("%d %s=%s\n",<length>,<keyword>,<value>)
1234  *
1235  * <length> is described ambiguously as
1236  *  "The <length> field shall be the decimal length of the extended header record in octets,
1237  *   including the trailing <newline>."
1238  * So does that include the length of <length>? or not?
1239  *
1240  * The field <keyword> is allowed in a pax Extended Header include but are not limited to
1241  *  atime       time_t but with fractional seconds maybe
1242  *  charset     enumeration list
1243  *  comment     human readable comments
1244  *  gid     integer gid allowing numbers greater than ustar limit of 2097151 (07777777)
1245  *  gname       over rides xhdr gid and ustar gname and gid
1246  *  linkpath
1247  *  mtime       time_t but with franctional seconds maybe
1248  *  path
1249  *  realtime.<ANY>
1250  *  security.<ANY>
1251  *  size
1252  *  uid     integer uid allowing numbers greater than ustar limit of 2097151 (07777777)
1253  *  uname
1254  * Any other keywords desired can be included but might not be meaningful to many applications.
1255  * Keyword can have pretty much any character in it except '='.
1256  *
1257  * <value> is a UTF-8 string that ends with the '\n'.
1258  *
1259  *
1260  * charset is limited to (omit the quotation marks)
1261  *      <value>           Formal Standard
1262  * "ISO-IR 646 1990"        ISO/IEC 646:1990
1263  * "ISO-IR 8859 1 1998"     ISO/IEC 8859-1:1998
1264  * "ISO-IR 8859 2 1999"     ISO/IEC 8859-2:1999
1265  * "ISO-IR 8859 3 1999"     ISO/IEC 8859-3:1999
1266  * "ISO-IR 8859 4 1998"     ISO/IEC 8859-4:1998
1267  * "ISO-IR 8859 5 1999"     ISO/IEC 8859-5:1999
1268  * "ISO-IR 8859 6 1999"     ISO/IEC 8859-6:1999
1269  * "ISO-IR 8859 7 1987"     ISO/IEC 8859-7:1987
1270  * "ISO-IR 8859 8 1999"     ISO/IEC 8859-8:1999
1271  * "ISO-IR 8859 9 1999"     ISO/IEC 8859-9:1999
1272  * "ISO-IR 8859 10 1998"    ISO/IEC 8859-10:1998
1273  * "ISO-IR 8859 13 1998"    ISO/IEC 8859-13:1998
1274  * "ISO-IR 8859 14 1998"    ISO/IEC 8859-14:1998
1275  * "ISO-IR 8859 15 1999"    ISO/IEC 8859-15:1999
1276  * "ISO-IR 10646 2000"      ISO/IEC 10646:2000
1277  * "ISO-IR 10646 2000 UTF-8"    ISO/IEC 10646, UTF-8 encoding
1278  * "BINARY"         None.
1279  */
1280 typedef enum pax_charset
1281 {
1282     PAX_CS_NOT_SPECIFIED,
1283     PAX_CS_ISO_IR_646_1990,
1284     PAX_CS_ISO_IR_8859_1_1998,
1285     PAX_CS_ISO_IR_8859_2_1999,
1286     PAX_CS_ISO_IR_8859_3_1999,
1287     PAX_CS_ISO_IR_8859_4_1998,
1288     PAX_CS_ISO_IR_8859_5_1999,
1289     PAX_CS_ISO_IR_8859_6_1999,
1290     PAX_CS_PAX_CS_ISO_IR_8859_7_1997,
1291     PAX_CS_ISO_IR_8859_8_1999,
1292     PAX_CS_ISO_IR_8859_9_1999,
1293     PAX_CS_ISO_IR_8859_10_1998,
1294     PAX_CS_ISO_IR_8859_13_1998,
1295     PAX_CS_ISO_IR_8859_14_1998,
1296     PAX_CS_ISO_IR_8859_15_1998,
1297     PAX_CS_ISO_IR_1064_2000,
1298     PAX_CS_ISO_IR_1064_2000_UTF_8,
1299     PAX_CS_BINARY
1300 } pax_charset;
1301 
1302 static const char * pax_charset_strings[] =
1303 {
1304     "Not Specified",
1305     "ISO-IR 646 1990",
1306     "ISO-IR 8859 1 1998",
1307     "ISO-IR 8859 2 1999",
1308     "ISO-IR 8859 3 1999",
1309     "ISO-IR 8859 4 1998",
1310     "ISO-IR 8859 5 1999",
1311     "ISO-IR 8859 6 1999",
1312     "ISO-IR 8859 7 1987",
1313     "ISO-IR 8859 8 1999",
1314     "ISO-IR 8859 9 1999",
1315     "ISO-IR 8859 10 1998",
1316     "ISO-IR 8859 13 1998",
1317     "ISO-IR 8859 14 1998",
1318     "ISO-IR 8859 15 1999",
1319     "ISO-IR 10646 2000",
1320     "ISO-IR 10646 2000 UTF-8",
1321     "BINARY",
1322     NULL
1323 };
1324 
pax_xhdr_parse_charset_string(char * string)1325 static pax_charset pax_xhdr_parse_charset_string(char*string)
1326 {
1327     int ix;     /* index */
1328     const char * ps;    /* pointer to string */
1329 
1330     for (ps = pax_charset_strings[ix= 0]; ps; ps = pax_charset_strings[++ix])
1331     if (strcmp(ps,string) == 0)
1332         return ix;
1333     return PAX_CS_NOT_SPECIFIED;    /* matched none so set it to not specified */
1334 }
1335 
1336 typedef struct pax_xheader pax_xheader;
1337 struct pax_xheader
1338 {
1339     char *  path;       /* supercedes .posix.name */
1340     char *  linkpath;   /* supercedes .posix.linkname */
1341     char *  uname;      /* user name supercedes .posix.uname uid and .posix.uid */
1342     char *  gname;      /* group name supercedes .posix.gname gid and .posix.gid */
1343     time_t  atime;      /* supercedes .posix.atime */
1344     time_t  mtime;      /* supercedes .posix.mtime */
1345     uid_t   uid;        /* supercedes .posix.uid */
1346     uint32_t    gid;        /* supercedes .posix.gname */
1347     uint64_t    size;       /* supercedes .posix.size */
1348     pax_charset charset;    /* not supported by us? */
1349 };
1350 
pax_xhdr_create(void)1351 static pax_xheader *    pax_xhdr_create (void)
1352 {
1353     pax_xheader * pxv = malloc (sizeof(pax_xheader));
1354     memset (pxv,0,sizeof(pax_xheader));
1355     return pxv;
1356 }
1357 
pax_xhdr_delete(pax_xheader * self)1358 static void pax_xhdr_delete(pax_xheader * self)
1359 {
1360     if (self->path)
1361     free (self->path);
1362     if (self->linkpath)
1363     free (self->linkpath);
1364     if (self->uname)
1365     free (self->uname);
1366     if (self->gname)
1367     free (self->gname);
1368     free (self);
1369 }
1370 
pax_xhdr_set_general_string(char ** str,const char * val)1371 static rc_t  pax_xhdr_set_general_string (char ** str, const char * val)
1372 {
1373     size_t  len;    /* how much memory */
1374     char      * nl; /* point to any new line in source */
1375     rc_t    rc = 0; /* return code; assume success */
1376 
1377     if (*str)       /* if already set, free the old value */
1378     free (*str);
1379     nl = strchr (val, '\n');
1380     if (nl)
1381     {
1382     len = nl - val + 1; /* +1 for NUL */
1383     }
1384     else
1385     {
1386         size_t size;
1387         len = string_measure(val, &size) + 1;
1388     }
1389     *str = malloc (len);
1390     if (*str)
1391     {
1392         string_copy(*str, len, val, len-1);
1393         (*str)[len-1] = '\0'; /* if it was a '\n' terminated value this is needed not worth the check if needed */
1394     }
1395     else
1396     {
1397     rc = RC(rcFS/*?*/,rcAllocating,0/*?*/,rcNoObj/*?*/,rcNull); /* well its not 0 */
1398     }
1399     return rc;
1400 }
1401 
1402 /* ----------
1403  * new_path points to a string that ends with either a '\n' or a NUL
1404  */
pax_xhdr_set_path(pax_xheader * self,char * new_path)1405 LIB_EXPORT rc_t CC pax_xhdr_set_path(pax_xheader * self, char * new_path)
1406 {
1407     return pax_xhdr_set_general_string(&(self->path),new_path);
1408 }
1409 
1410 /* ----------
1411  * new_path points to a string that ends with either a '\n' or a NUL
1412  */
pax_xhdr_set_linkpath(pax_xheader * self,char * new_path)1413 LIB_EXPORT rc_t CC pax_xhdr_set_linkpath(pax_xheader * self, char * new_path)
1414 {
1415     return pax_xhdr_set_general_string(&(self->linkpath),new_path);
1416 }
1417 
1418 /* ----------
1419  * new_name points to a string that ends with either a '\n' or a NUL
1420  */
pax_xhdr_set_uname(pax_xheader * self,char * new_name)1421 LIB_EXPORT rc_t CC pax_xhdr_set_uname(pax_xheader * self, char * new_name)
1422 {
1423     return pax_xhdr_set_general_string(&(self->uname),new_name);
1424 }
1425 
1426 /* ----------
1427  * new_name points to a string that ends with either a '\n' or a NUL
1428  */
pax_xhdr_set_gname(pax_xheader * self,char * new_name)1429 LIB_EXPORT rc_t CC pax_xhdr_set_gname(pax_xheader * self, char * new_name)
1430 {
1431     return pax_xhdr_set_general_string(&(self->gname),new_name);
1432 }
1433 
pax_xhdr_get_general_string(char ** src,char ** dst,size_t max)1434 LIB_EXPORT bool CC pax_xhdr_get_general_string (char**src, char**dst, size_t max)
1435 {
1436     size_t size;
1437     if (string_measure(*src, &size) > max-1)    /* fail if too big for target */
1438     {
1439         return false;
1440     }
1441     string_copy(*dst, max, *src, size);
1442     return true;
1443 }
1444 
pax_xhdr_get_path(pax_xheader * self,char ** path,size_t max)1445 LIB_EXPORT rc_t CC pax_xhdr_get_path(pax_xheader * self, char ** path, size_t max)
1446 {
1447     return (pax_xhdr_get_general_string(&self->path,path,max))? 0 : ~0;
1448 }
1449 
pax_xhdr_get_linkpath(pax_xheader * self,char ** path,size_t max)1450 LIB_EXPORT rc_t CC pax_xhdr_get_linkpath(pax_xheader * self, char ** path, size_t max)
1451 {
1452     return (pax_xhdr_get_general_string(&self->linkpath,path,max))? 0 : ~0;
1453 }
1454 
pax_xhdr_get_uname(pax_xheader * self,char ** name,size_t max)1455 LIB_EXPORT rc_t CC pax_xhdr_get_uname(pax_xheader * self, char ** name, size_t max)
1456 {
1457     return (pax_xhdr_get_general_string(&self->uname,name,max))? 0 : ~0;
1458 }
1459 
pax_xhdr_get_gname(pax_xheader * self,char ** name,size_t max)1460 LIB_EXPORT rc_t CC pax_xhdr_get_gname(pax_xheader * self, char ** name, size_t max)
1461 {
1462     return (pax_xhdr_get_general_string(&self->gname,name,max))? 0 : ~0;
1463 }
1464 
1465 /* ----------
1466  * The mtime and atime <value> is defined as
1467  *  The pax utility shall write an mtime record for each file in write
1468  *  or copy modes if the file's modification time cannot be represented
1469  *  exactly in the ustar header logical record described in ustar
1470  *  Interchange Format. This can occur if the time is out of ustar range,
1471  *  or if the file system of the underlying implementation supports
1472  *  non-integer time granularities and the time is not an integer. All of
1473  *  these time records shall be formatted as a decimal representation of
1474  *  the time in seconds since the Epoch. If a period ( '.' ) decimal
1475  *  point character is present, the digits to the right of the point shall
1476  *  represent the units of a subsecond timing granularity, where the first
1477  *  digit is tenths of a second and each subsequent digit is a tenth of
1478  *  the previous digit. In read or copy mode, the pax utility shall
1479  *  truncate the time of a file to the greatest value that is not greater
1480  *  than the input header file time. In write or copy mode, the pax
1481  *  utility shall output a time exactly if it can be represented exactly
1482  *  as a decimal number, and otherwise shall generate only enough digits
1483  *  so that the same time shall be recovered if the file is extracted on a
1484  *  system whose underlying implementation supports the same time
1485  *  granularity.
1486  *
1487  * So...
1488  * Looks like for our purposes we ignore anything after a possible decimal
1489  * point and just use the integer part of whatever is there and just not be
1490  * to worried about it since we aren't creating any of this, just reading ie.
1491  */
pax_xhdr_set_general_time(const char * ts,time_t * tt)1492 static rc_t pax_xhdr_set_general_time (const char * ts, time_t * tt)
1493 {
1494     uint64_t temp = strtou64(ts,NULL,10); /* tosses decimal part for us */
1495 
1496     if (temp)
1497         *tt = (time_t)temp;
1498     return temp ? 0 : ~0;
1499 }
1500 
1501 /* ======================================================================
1502  * This is the data accumulated for each entry in a tar file.
1503  */
1504 static struct   tar_entry_data
1505 {
1506     /* -----
1507      * We are not using linux (or other O/S specific) type here because the ranges
1508      * for the system creating the archive might have larger types than the system
1509      * we are running on.  We'll leave it to the outside caller of this program to
1510      * "make it fit".
1511      */
1512 
1513 
1514     entry_type  type;
1515 
1516     char *  path;
1517     char *  link;
1518 
1519     uint64_t    size;
1520     uint64_t    offset;
1521 
1522     mode_t  mode;
1523 
1524     char *  uname;
1525     uid_t   uid;
1526 
1527     char *  gname;
1528     uint32_t    gid;
1529 
1530     time_t  mtime;
1531     time_t  atime;
1532     time_t  ctime;
1533 
1534 } tar_entry_data;
1535 #endif
1536 /* ======================================================================
1537  * Mapping a memory region
1538  *
1539  * This will have two versions:
1540  *  initially for development it will make raw Linux O/S calls
1541  *  quickly it will be ported to use KFS structures instead
1542  */
1543 /* -----
1544  * Use a map window size of a gigabyte
1545  * Life would be really bad if a single header was with in pagesize of that gigabyte
1546  * This assumes that a gigabyte is a multiple of system pagesize - a very safe bet.
1547  */
1548 #define MAP_WINDOW_SIZE (1024*1024*1024)
1549 
1550 /* ----------
1551  * map_tar_file
1552  *
1553  * This function uses existing members of the ktar state structure and a single parameter
1554  * to decide what part of a file to map.
1555  *
1556  * IN: offset:  an uint64_t type of where the starting point with in the file the memory mapped region
1557  *      should start
1558  * SIDE:    side effects are a freeing of any existing memory mapped region of a file and
1559  *      if successful mappinga region it will have mapped that region and put real
1560  *      parameters describing that region in the private memory block
1561  */
1562 static
map_tar_file(KTarState * self,uint64_t requested_offset)1563 rc_t map_tar_file (KTarState * self, uint64_t requested_offset)
1564 {
1565     rc_t  ret = 0;
1566 
1567     /* -----
1568      * If we are mapping for the first time
1569      */
1570     if (self->kmmap == NULL)
1571     {
1572     ret = KMMapMakeMaxRead(&self->kmmap,self->kfile);
1573     if (ret)
1574         return ret;
1575     ret = KMMapSize(self->kmmap, &self->buffer_length);
1576     if (ret)
1577         return ret;
1578     }
1579     /* -----
1580      * if we are remapping the region
1581      */
1582     else
1583     {
1584     ret = KMMapReposition(self->kmmap, requested_offset, &self->buffer_length);
1585     if (ret)
1586         return ret;
1587     }
1588     ret = KMMapAddrRead(self->kmmap, &self->map);
1589     if (ret)
1590     return ret;
1591     ret = KMMapPosition(self->kmmap, &self->buffer_start);
1592     if (ret)
1593     return ret;
1594     ret = KMMapSize(self->kmmap, &self->buffer_length);
1595     if (ret)
1596     return ret;
1597     self->buffer_limit = self->buffer_start + self->buffer_length;
1598 
1599     return ret;
1600 }
1601 
1602 static
release_map(KTarState * self)1603 rc_t release_map (KTarState * self)
1604 {
1605     KMMapRelease (self->kmmap);
1606     self->kmmap = NULL;
1607     return 0;
1608 }
1609 
1610 /* ======================================================================
1611  *
1612  * offset is the byte position within the tar file
1613  * hard_limit is the byte position with in the tar file that is not mapped
1614  *
1615  * This is the ugliest function/method in the whole module.  The extensions
1616  * to the tar header are not done in a consistent manner so convolutions
1617  * have to be made to support all manner of extensions.
1618  */
1619 static
process_one_entry(KTarState * self,uint64_t offset,uint64_t hard_limit,bool silent)1620 uint64_t process_one_entry (KTarState * self, uint64_t offset, uint64_t hard_limit, bool silent)
1621 {
1622     /* -----
1623      * full_path will store the full path of an element which can be longer than
1624      * will fit in the standard tar header.  This will also usually be an output
1625      * to the consumer.
1626      */
1627     char full_path [ 4096 ];
1628 
1629     /* -----
1630      * full_path will store the full link (if any) of an element which can be longer than
1631      * will fit in the standard tar header.  This will also usually be an output
1632      * to the consumer.
1633      */
1634     char full_link [ 4096 ];
1635 
1636     /* -----
1637      * data_offset will index into the tar file where the data portion for the current header
1638      * lies. (Usually at the address of the header + 512)  This will also usually be an output
1639      * to the consumer.
1640      */
1641     uint64_t data_offset = 0;
1642 
1643     /* -----
1644      * data_size will hold the size of the data portion for the current header.  This will also
1645      * usually be an output to the consumer.
1646      */
1647     uint64_t data_size = 0;
1648 
1649     /* -----
1650      * virtual_data_size will hold the virtual size of a sparse file.  This will also
1651      * usually be an output to the consumer.
1652      */
1653     uint64_t virtual_data_size = 0;
1654     /* -----
1655      * type is an enumerated type that described the format of the tar header.  Its initialized
1656      * to an invalid header type.
1657      */
1658     tar_header_type type = TAR_UNDEFINED;
1659 
1660     /* -----
1661      * link is an enumerated type that described the contents of this element.
1662      */
1663     tar_link link = LINK_OLDNORMAL_FILE;
1664 
1665 #if _DEBUGGING && 0
1666 /* We are not using these components of the tar header block at this point
1667  * but with a debug build it doesn't hurt to verify we fully understand the
1668  * header.
1669  */
1670     uid_t  uid = 0;
1671     uint32_t gid = 0;
1672 #endif
1673     time_t mtime = 0;
1674     mode_t mode = 0;
1675 
1676     /* -----
1677      * current_offset is the offset of the current header which might be a different header
1678      * than the one we started with.
1679      */
1680     uint64_t current_offset = offset;
1681 
1682     /* -----
1683      * we can access the header either as a sequence of bytes or as a
1684      * header structure.  That header can further be accessed as one of
1685      * several more specific types of header.
1686      */
1687     union
1688     {
1689         const uint8_t *    b;
1690         const tar_header * h;
1691     } current_header;
1692 
1693     /* -----
1694      * done is a flag as to when we are finished processing a tar element
1695      * that might have multiple headers and other elements
1696      */
1697     bool done = false;
1698 
1699     /* -----
1700      * gnu_sparse is a flag that we are currently inside a sparse file
1701      * with in the TAR and have more sparse header elements to parse
1702      */
1703     bool gnu_sparse = false;
1704 
1705     TAR_FUNC_ENTRY();
1706 
1707     /* -----
1708      * check right away to make sure we are still in our memory mapped window
1709      */
1710     if (offset > self->buffer_limit)
1711     {
1712         /* -----
1713          * if not bail and say we couldn't consume any bytes
1714          */
1715         return 0;
1716     }
1717 
1718     /* -----
1719      * clear the full name and link name entries
1720      */
1721     memset (full_path, 0, sizeof(full_path));
1722     memset (full_link, 0, sizeof(full_link));
1723 
1724     /* -----
1725      * set the header at the current TAR block.
1726      * That is the map starts at offset buffer_start and we are at
1727      * current_offset into the file so we take the map as a pointer
1728      * and add to it the difference between our current offset and the map's
1729      * initial offset (first header is at map + 0 - 0)
1730      */
1731     current_header.b = (const uint8_t *)self->map + current_offset - self->buffer_start;
1732     /* -----
1733      * start processing
1734      */
1735     do
1736     {
1737         TAR_DEBUG (( "Processing one block at (%lu), it is sparse? %s\n",
1738                      current_offset, get_bool_string(gnu_sparse)));
1739 
1740         /* -----
1741          * what we will do depends upon the type of this block
1742          */
1743         type = gnu_sparse ? TAR_SPARSE : what_header_type(current_header.h);
1744         if (self->found_zero_block)
1745         {
1746             if (self->found_second_zero_block == true)
1747             {
1748                 type = TAR_ZERO_BLOCK; /* skip anyway */
1749             }
1750             else if (type == TAR_ZERO_BLOCK)
1751             {
1752                 self->found_second_zero_block = true;
1753             }
1754             else
1755             {
1756                 if ( ! silent )
1757                 {
1758                     PLOGMSG(klogErr,(klogErr,
1759                         "Found Extra Header after a block of zeros $(O)",
1760                         PLOG_U64(O), offset));
1761                 }
1762                 return -1;
1763             }
1764         }
1765         switch (type)
1766         {
1767         case TAR_ZERO_BLOCK:
1768         {
1769             self->found_zero_block = true;
1770             done = true;
1771             break;
1772         }
1773         case TAR_SPARSE:
1774         {
1775             /* -----
1776              * If there is an extension header we'll have different work to do
1777              */
1778 #if 0
1779             if (current_header.h->sparse.isextended)
1780             {
1781                 LOGMSG (klogDebug3,"isextended true");
1782             }
1783             else
1784             {
1785                 LOGMSG (klogDebug3,"isextended false");
1786             }
1787 #endif
1788             {
1789                 int64_t  ix;
1790                 uint64_t of;
1791                 uint64_t sz;
1792 
1793                 for (ix = 0; ix< GNU_SPARSES_IN_EXTRA_HEADER; ++ix)
1794                 {
1795                     rc_t ret;
1796                     of = tar_strtoll(
1797                      (const uint8_t*)current_header.h->sparse.sparse[ix].offset,
1798                                      TAR_SIZE_LEN, silent);
1799                     sz = tar_strtoll(
1800                   (const uint8_t*)current_header.h->sparse.sparse[ix].num_bytes,
1801                                      TAR_SIZE_LEN, silent);
1802                     if (sz == 0)
1803                     {
1804                         break;
1805                     }
1806 #if 0
1807                     PLOGMSG ((klogDebug4,
1808                               "SPARSE ext: $(count): $(offset) $(size)",
1809                               PLOG_3(PLOG_I64(count),PLOG_X64(offset),PLOG_X64(size)),
1810                               ix,
1811                               of,
1812                               sz));
1813 #endif
1814 
1815                     ret = sparse_data_push (&self->sparse_q, of, sz);
1816                     if (ret)
1817                     {
1818                         sparse_data_kill(&self->sparse_q);
1819                         return -1;
1820                     }
1821                     ++self->num_chunks;
1822                 }
1823             }
1824         }
1825         break;
1826 
1827 #if 0
1828         /*Same as the default case */
1829         case TAR_CPIO:
1830             PLOGMSG ((klogErr,
1831                       "unsupported header type $(type) $(string)",
1832                       "type=%d,string=%s",
1833                       type, get_type_string(type)));
1834             done = true;
1835             break;
1836 #endif
1837 
1838         default:
1839             if ( ! silent )
1840             {
1841                 PLOGMSG (klogErr,(klogErr,
1842                                   "unsupported header type $(type) $(string)",
1843                                   "type=%d,string=%s",
1844                                   type, get_type_string(type)));
1845             }
1846             done = true;
1847             break;
1848 
1849         case TAR_GNU_89:
1850         case TAR_V7:
1851         case TAR_POSIX:
1852             /* -----
1853              * almost anything we do will depend upon the size of the data for this block
1854              *
1855              * this will be wrong if we ever support cpio...
1856              */
1857             data_size = (uint64_t) ( tar_strtoll
1858                 ( (uint8_t*)current_header.h->tar.size,TAR_SIZE_LEN, silent ) );
1859 #if _DEBUGGING && 0
1860             uid =  ( tar_strtoll
1861                 ( (uint8_t*)current_header.h->tar.uid,TAR_ID_LEN, silent ) );
1862             gid =  ( tar_strtoll
1863                 ( (uint8_t*)current_header.h->tar.gid,TAR_ID_LEN, silent ) );
1864 #endif
1865             mtime = ( tar_strtoll
1866                 ( (uint8_t*)current_header.h->tar.mtime,TAR_TIME_LEN, silent) );
1867             mode = (uint32_t) ( tar_strtoll
1868                 ( (uint8_t*)current_header.h->tar.mode,TAR_MODE_LEN, silent) );
1869         }
1870 
1871         /* -----
1872          * Sometimes we are done just by identifying the header type.
1873          * If so we break the loop here.
1874          */
1875         if (done)
1876         {
1877             /* -----
1878              * point at the next header block
1879              * Add the size of the header itself plus enough block sizes of data to cover
1880              * any associated data.
1881              */
1882             size_t header_plus_data_block_size = (1+BLOCKS_FOR_BYTES(data_size))*TAR_BLOCK_SIZE;
1883             current_offset += (uint64_t)header_plus_data_block_size;
1884             current_header.b += header_plus_data_block_size;
1885             break;
1886         }
1887 
1888         /* -----
1889          * several extensions to USTAR/TAR format headers involve
1890          * prepending another header type to give a name longer
1891          * than will fit in tthe header itself.  If we had one of
1892          * those use that name.  But if the full path has not been
1893          * set use the path from this header.
1894          */
1895         if (full_path[0] == 0) /* if full_path wasn't filled in by an 'L' long name */
1896         {
1897             size_t len, size;
1898             /* -----
1899              * if there is a prefix (POSIX style) use it
1900              * copy the prefix and then concatenate the name field
1901              */
1902             if (current_header.h->posix.prefix[0])
1903             {
1904 #if 0
1905                 PLOGMSG ((klogDebug1,
1906                           "used a posix prefix $(prefix)",
1907                           "prefix=%s",
1908                           current_header.h->posix.prefix));
1909 #endif
1910                 /* -----
1911                  * copy in the prefix, force a NUL just in case. then add a directory divider
1912                  */
1913                 string_copy(full_path, sizeof(full_path), current_header.h->posix.prefix, TAR_PREFIX_LEN);
1914                 full_path[TAR_PREFIX_LEN] = 0x00;
1915                 strcat(full_path,"/");
1916             }
1917 
1918             strncat(full_path,current_header.h->tar.name,TAR_NAME_LEN);
1919             len = string_measure(full_path, &size);
1920             while (len > 1)
1921             {
1922                 if (full_path[len-1] == '/')
1923                     len--;
1924                 else
1925                     break;
1926             }
1927             full_path[len] = '\0';
1928         }
1929         link = current_header.h->tar.link;
1930         TAR_DEBUG(("link = %s(%c)\n",get_link_string(link),link));
1931         switch (link)
1932         {
1933         case LINK_SPARSE:
1934             /* -----
1935              * If there is an extension header we'll have different work to do
1936              */
1937             if (current_header.h->gnu_89.isextended)
1938             {
1939                 /* -----
1940                  * If we have an extended header following make sure there is room
1941                  */
1942                 if ( (uint64_t)( offset + 2 * sizeof(tar_header) ) > hard_limit )
1943                     return 0;
1944                 done = false;
1945                 gnu_sparse = true;  /* next block will be part of the header and not data */
1946                 data_offset = offset + 2 * sizeof(tar_header);
1947             }
1948             else
1949             {
1950                 done = true;
1951                 gnu_sparse = false;
1952                 data_offset = offset + sizeof(tar_header);
1953             }
1954 
1955             done = (bool)! current_header.h->gnu_89.isextended;
1956             {
1957                 int32_t ix;
1958                 rc_t    ret;
1959 
1960                 virtual_data_size = (uint64_t)(tar_strtoll(
1961                     (uint8_t*)current_header.h->gnu_89.realsize,TAR_SIZE_LEN,
1962                     silent));
1963 
1964                 for (ix = 0; ix< GNU_SPARSES_IN_OLD_HEADER; ++ix)
1965                 {
1966                     uint64_t soffset = tar_strtoll(
1967                      (const uint8_t*)current_header.h->gnu_89.sparse[ix].offset,
1968                                                    TAR_SIZE_LEN, silent);
1969                     uint64_t ssize = tar_strtoll(
1970                   (const uint8_t*)current_header.h->gnu_89.sparse[ix].num_bytes,
1971                                                  TAR_SIZE_LEN, silent);
1972                     if (ssize == 0)
1973                     {
1974                         break;
1975                     }
1976 
1977                     ret = sparse_data_push (&self->sparse_q, soffset, ssize);
1978                     if (ret)
1979                     {
1980                         sparse_data_kill(&self->sparse_q);
1981                         return -1;
1982                     }
1983                     ++self->num_chunks;
1984                 }
1985             }
1986             data_offset = current_offset + sizeof(tar_header);
1987             break;
1988 
1989         case LINK_OLDNORMAL_FILE:       /* deprecated normal file */
1990             /* -----
1991              * this should only happen with LINK_OLDNORMAL_FILE
1992              *
1993              * If the type is file but the last character in the path is "/"
1994              * treat it as a directory instead
1995              */
1996         {
1997             size_t size;
1998             if (full_path[string_measure(full_path, &size)-1] == '/')
1999             {
2000                 link = LINK_DIRECTORY;
2001             }
2002         }
2003         /* fall through */
2004         case LINK_NORMAL_FILE:
2005         case LINK_CONTIGUOUS_FILE:
2006         case LINK_DIRECTORY:
2007             data_offset = current_offset + sizeof(tar_header);
2008             done = true;
2009             break;
2010 
2011             /* since we do not extract for tar files, a symlink and a hardlink
2012              * are the same to us */
2013         case LINK_HARD_LINK:
2014         case LINK_SYMBOLIC_LINK:
2015             if (full_link[0] == 0)
2016             {
2017                 string_copy(full_link, sizeof(full_link), current_header.h->tar.linkname, TAR_NAME_LEN);
2018             }
2019             done = true;
2020             break;
2021 
2022             /* ----------
2023              * These types we ignore and they are defined to not have a data size
2024              */
2025         case LINK_CHARACTER_SPECIAL:
2026         case LINK_BLOCK_SPECIAL:
2027         case LINK_FIFO:
2028         case LINK_INODE_METADATA:
2029             /* -----
2030              * Nothing to be done
2031              */
2032             TAR_DEBUG (("%s: ignored block link type %s(%c) @ %lu\n",
2033                         __func__,
2034                         get_link_string(current_header.h->tar.link),
2035                         get_link_string(current_header.h->tar.link),
2036                         (uint64_t)(current_offset)+(uint64_t)(OFFSET_OF(tar_v7_header,link))));
2037             data_size = 0; /* data size is specifically to be ignored */
2038             done = true;
2039             break;
2040 
2041             /* ----------
2042              * These types we ignore as a final block or a block unto themselves and are
2043              * not part of a series of blocks and they are defined to have a data size
2044              */
2045         case LINK_PAX_GLOBAL_XHDR:
2046         case LINK_GNU_DUMPDIR: /* we are just gonna ignore this and not treat it like LINK_DIRECTORY */
2047         case LINK_SOLARIS_ACL_FILE:
2048         case LINK_VOLUME_NAME:
2049         case LINK_MULTI_VOLUME:
2050             TAR_DEBUG (("%s: ignored block link type %s(%c) %lu @ %lu\n",
2051                         __func__,
2052                         get_link_string(current_header.h->tar.link),
2053                         get_link_string(current_header.h->tar.link),
2054                         data_size,
2055                         (uint64_t)(current_offset)+(uint64_t)(OFFSET_OF(tar_v7_header,link))));
2056             done = true;
2057             break;
2058 
2059             /* ----------
2060              * These types we ignore as a block with in a series of blocks
2061              * and they are defined to have a data size
2062              *
2063              * Nothing to be done
2064              */
2065         case LINK_SOLARIS_ACL:
2066         case LINK_PAX_XHDR: /* posix extended */
2067             /* -----
2068              */
2069             TAR_DEBUG (("%s: ignored block link type %s(%c) @ %lu\n",
2070                         __func__,
2071                         get_link_string(current_header.h->tar.link),
2072                         get_link_string(current_header.h->tar.link),
2073                         (uint64_t)(current_offset)+(uint64_t)(OFFSET_OF(tar_v7_header,link))));
2074             break;
2075 
2076             /* some link types we ignore this block */
2077         default:
2078             TAR_DEBUG (("%s: Ignoring block with link %s(%c/%02.2x) @ %lu\n",
2079                         __func__,
2080                         get_link_string(current_header.h->tar.link),
2081                         current_header.h->tar.link,
2082                         (unsigned)(current_header.h->tar.link),
2083                         (uint64_t)(current_offset)+(uint64_t)(OFFSET_OF(tar_v7_header,link))));
2084             break;
2085 
2086         case LINK_NEXT_LONG_LINK:   /* long link name */
2087             /* -----
2088              * Long link name needs access now to its full set of data blocks, request a window shift
2089              * if it is not currently accessible
2090              */
2091             if ( (uint64_t)( offset + sizeof( tar_header ) + data_size ) > hard_limit )
2092                 return 0;
2093 
2094             string_copy(full_link, sizeof(full_link), (char*)(current_header.b + sizeof(tar_header)), data_size);
2095             break;
2096         case LINK_NEXT_LONG_NAME:   /* long path name */
2097             /* -----
2098              * Long path name needs access now to its full set of data blocks, request a window shift
2099              * if it is not currently accessible
2100              */
2101             if ( (uint64_t)( offset + sizeof( tar_header ) + data_size ) > hard_limit )
2102             {
2103                 return 0;
2104             }
2105 
2106             string_copy(full_path, sizeof(full_path), (char*)(current_header.b + sizeof(tar_header)), data_size);
2107             break;
2108         }
2109 
2110         /* -----
2111          * move the current header offset to past the data blocks
2112          */
2113         if (link == LINK_SPARSE)
2114         {
2115             current_offset += sizeof (tar_header);
2116             current_header.b += sizeof(tar_header);
2117         }
2118         else
2119         {
2120             current_offset += sizeof (tar_header)+ ((data_size+TAR_BLOCK_SIZE-1)/TAR_BLOCK_SIZE)*TAR_BLOCK_SIZE;
2121             current_header.b += sizeof(tar_header) + ((data_size+TAR_BLOCK_SIZE-1)/TAR_BLOCK_SIZE)*TAR_BLOCK_SIZE;
2122         }
2123         /* -----
2124          * if that is past the currently available
2125          * quit the parse of this entry asn ask for a window shift, yeah, we'll redo work
2126          * but its far simpler code to just start over than track being in the middle
2127          */
2128         if ((!done) && (current_offset >= hard_limit))
2129         {
2130             return 0;
2131         }
2132 
2133     } while (! done);
2134 
2135     /* -----
2136      * generate output for this entry
2137      */
2138     switch (type)
2139     {
2140     default:
2141         if ( ! silent )
2142         {
2143             PLOGMSG (klogErr,(klogErr,"Unhandled Header Block Type $(type):$(typeint)","type=%c,typeint=%d",type ? type : '0',type));
2144         }
2145         return RC (rcFS, rcArc, rcParsing, rcData, rcUnsupported);
2146     case TAR_ZERO_BLOCK:
2147 #if 0
2148         LOGMSG (klogDebug1, "Zero Block");
2149 #endif
2150         /* ignored */
2151         break;
2152 
2153     case TAR_CPIO:
2154     case TAR_V7:
2155     case TAR_POSIX:
2156 #if 0
2157     case TAR_PAX:
2158 #endif
2159     case TAR_SUN:
2160     case TAR_STAR_85:
2161     case TAR_STAR_94:
2162     case TAR_GNU_89:
2163 #if 0
2164     case TAR_GNU_01:
2165 #endif
2166     case TAR_SPARSE:
2167         /* -----
2168          * TODO:
2169          *  implement a header checksum routine call it here, and return -1 if the check sum fails
2170          */
2171         if (gnu_sparse)
2172         {
2173 #if 0
2174             PLOGMSG ((klogDebug3,
2175                       "Need to create a file but gnu_sparse is true $(l)",
2176                       PLOG_U8(l),
2177                       link
2178                          ));
2179 #endif
2180             link = LINK_SPARSE; /* faking it for the next section */
2181         }
2182         switch (link)
2183         {
2184         case LINK_OLDNORMAL_FILE:
2185         case LINK_NORMAL_FILE:
2186         case LINK_CONTIGUOUS_FILE:
2187 #if 0
2188             LOGMSG (klogDebug3, "KTocCreateFile");
2189 #endif
2190             TAR_DEBUG (("%s call KTocCreateFile '%c':%hhd\n", __func__, link, link));
2191             KTocCreateFile (self->toc,
2192                             data_size?data_offset:0,
2193                             data_size,
2194                             mtime,
2195                             mode,
2196                             (KCreateMode)(kcmInit|kcmParents),
2197                             full_path);
2198             /* -----
2199              * TODO:
2200              *  print something if extraneous fields found
2201              */
2202             break;
2203         case LINK_HARD_LINK:
2204             TAR_DEBUG (("%s call KTocCreateHardLink\n", __func__));
2205             KTocCreateHardLink (self->toc, mtime, mode,
2206                                 (KCreateMode)(kcmInit|kcmParents),
2207                                 full_link, full_path);
2208             /* -----
2209              * TODO:
2210              *  print something if extraneous fields found
2211              */
2212             break;
2213         case LINK_SYMBOLIC_LINK:
2214             TAR_DEBUG (("%s call KTocCreateSoftLink\n", __func__));
2215             KTocCreateSoftLink (self->toc, mtime, mode,
2216                                 (KCreateMode)(kcmInit|kcmParents),
2217                                 full_link, full_path);
2218             /* -----
2219              * TODO:
2220              *  print something if extraneous fields found
2221              */
2222             break;
2223 
2224         case LINK_SPARSE:
2225             TAR_DEBUG (("%s LINK_SPARSE current_offset %jx: data_offset %jx: data_size %jx\n",
2226                         __func__, current_offset, data_offset, data_size));
2227             make_chunk_list(self, data_offset);
2228             KTocCreateChunkedFile (self->toc,
2229                                    virtual_data_size,
2230                                    mtime, mode,
2231                                    self->num_chunks,
2232                                    self->chunks,
2233                                    (KCreateMode)(kcmInit|kcmParents),
2234                                    full_path);
2235             whack_chunk_list (self);
2236             break;
2237         case LINK_CHARACTER_SPECIAL:
2238         case LINK_BLOCK_SPECIAL:
2239         case LINK_FIFO:
2240         case LINK_VOLUME_NAME:
2241         case LINK_SOLARIS_ACL:
2242         case LINK_GNU_DUMPDIR:
2243         case LINK_SUN_XHDR:
2244         case LINK_INODE_METADATA:
2245             TAR_DEBUG ( ( "%s ignored entry type %s linktype name  %s\n",
2246                           __func__, get_link_string( link ), full_path ) );
2247             /* ignore */
2248             break;
2249         case LINK_DIRECTORY:
2250             TAR_DEBUG (("%s call KTocCreateDir\n", __func__));
2251             KTocCreateDir (self->toc, mtime, mode, (KCreateMode)(kcmOpen|kcmParents), full_path);
2252             /* -----
2253              * TODO:
2254              *  print something if extraneous fields found?
2255              */
2256             break;
2257         default:
2258             if ( ! silent )
2259             {
2260                 PLOGMSG(klogErr,(klogErr,
2261                                  " type ($(type)) name ($(name() link ($(link)) size ($(size)) offset ($(offset))",
2262                                  "type=%s,name=%s,link=%s,size=%lld,offset=%lld",
2263                                  get_type_string(type),
2264                                  full_path,
2265                                  full_link,
2266                                  data_size,
2267                                  data_offset));
2268             }
2269             break;
2270         }
2271         break;
2272     }
2273     return current_offset - offset;
2274 }
2275 
2276 
2277 static
KArcParseTAR_intern(KToc * self,const void * kvoid,bool silent)2278 rc_t KArcParseTAR_intern ( KToc * self,
2279                            const void * kvoid,
2280                            bool silent )
2281 {
2282     KTarState state;
2283     /* -----
2284      * offset is the running index into the file of where the first tar header
2285      *          for the next element should start
2286      */
2287     uint64_t offset;
2288     /* -----
2289      * used is the count of bytes used for headers, storage and padding of the last
2290      *          examined element
2291      */
2292     uint64_t used;
2293     uint64_t filesize;
2294     const KFile * kfile = kvoid;
2295     rc_t rc;
2296 
2297     TAR_FUNC_ENTRY();
2298 
2299     /* -----
2300      * save the KFS File and TOC references
2301      */
2302 
2303     if ( kfile == NULL )
2304     {
2305         if ( !silent )
2306             LOGMSG ( klogFatal, "Called with a bad KFile parameter" );
2307         return RC (rcFS, rcArc, rcParsing, rcParam, rcNull );
2308     }
2309     else if ( self == NULL )
2310     {
2311         if ( !silent )
2312             LOGMSG (klogFatal, "Called with a bad KToc parameter");
2313         return RC (rcFS, rcArc, rcParsing, rcSelf, rcNull );
2314     }
2315 
2316     memset (&state, 0, sizeof (state));
2317     state.kfile = kfile;
2318     state.toc = self;
2319 
2320     if ( ( rc = KFileSize ( state.kfile, &filesize ) ) != 0 )
2321     {
2322         if ( !silent )
2323             LOGERR ( klogFatal, rc, "Failed to get file size of tarfile" );
2324     }
2325     else if ( filesize == 0 )
2326     {
2327         rc = RC ( rcFS, rcArc, rcAccessing, rcArc, rcEmpty );
2328         if ( !silent )
2329             LOGERR( klogFatal, rc, "Empty file" );
2330     }
2331     else if ( ( rc = map_tar_file ( &state, 0 ) ) != 0 )
2332     {
2333         if ( !silent )
2334             PLOGMSG( klogFatal,
2335                     ( klogFatal, "Failed to $(operation) of size $(size)",
2336                       "operation=%s,size=%lu", "mmap", filesize ) );
2337         return rc;
2338     }
2339     else
2340     {
2341         int stalled = 0;
2342 
2343         for ( offset = 0; offset < filesize; )
2344         {
2345             /* -----
2346              * evaluate the tar file header at the current offset into the file
2347              *
2348              * the return is a positive number of bytes used
2349              * OR a negative on a file parse error
2350              * OR zero for a file that would exceed the current window
2351              */
2352             used = process_one_entry ( &state, offset, state.buffer_limit, silent );
2353 
2354             if ( used == 0 )
2355                 stalled ++;
2356             else
2357                 stalled = 0;
2358 
2359             /* kill any left over sparse data - safely handles an empty queue */
2360             sparse_data_kill ( &state.sparse_q );
2361 
2362             if ( state.found_second_zero_block )
2363             {
2364                 break;
2365             }
2366             if ( used > 0 )
2367             {
2368                 offset += used;
2369             }
2370             else if ( used == 0 )   /* if the validate returns 0 we need more buffer */
2371             {
2372                 if ( stalled == 5 ) /* sure why not 5 tries */
2373                 {
2374                     rc = RC ( rcFS, rcArc, rcParsing, rcArc, rcIncomplete );
2375                     break;
2376                 }
2377                 map_tar_file ( &state, offset ); /* slide the window up to current location */
2378                 /* -----
2379                  * TODO:
2380                  *  Handle a repeated call from the same location as a failure in file format
2381                  * (truncation) of the tar file
2382                  */
2383             }
2384             else /*if (used < 0)*/      /* if it is negative it means abort the tar file */
2385             {
2386                 rc = RC ( rcFS, rcArc, rcParsing, rcArc, rcUnexpected );
2387                 break;
2388             }
2389         }
2390         if ( offset > filesize )
2391         {
2392             rc = RC ( rcFS, rcArc, rcParsing, rcToc, rcIncomplete );
2393             TAR_DEBUG (( "%s %R File offset %ju exceeds filesize %ju\n",
2394                          __func__, rc, offset, filesize ));
2395         }
2396         release_map ( &state );
2397     }
2398     return rc;
2399 }
2400 
2401 /* ======================================================================
2402  * validating a tar file is listing the files, links and directories
2403  * in that archive.
2404  *
2405  * partial results are not to be accepted in the end but it is the responsibility
2406  * of the caller to clear them.
2407  *
2408  * returns 0 for good archive and -1 for bad archive
2409  */
KArcParseTAR(KToc * self,const void * kvoid,bool (CC * ignored)(const KDirectory *,const char *,void *),void * also_ignored)2410 LIB_EXPORT rc_t CC KArcParseTAR ( KToc * self,
2411               const void * kvoid,
2412               bool ( CC * ignored )( const KDirectory *, const char *, void * ),
2413               void *also_ignored )
2414 {
2415     return KArcParseTAR_intern ( self, kvoid, false );
2416 }
2417 
2418 
KArcParseTAR_silent(KToc * self,const void * kvoid,bool (CC * ignored)(const KDirectory *,const char *,void *),void * also_ignored)2419 LIB_EXPORT rc_t CC KArcParseTAR_silent ( KToc * self,
2420               const void * kvoid,
2421               bool ( CC * ignored )( const KDirectory *, const char *, void * ),
2422               void *also_ignored )
2423 {
2424     return KArcParseTAR_intern ( self, kvoid, true );
2425 }
2426 
2427 
KDirectoryVOpenTarArchiveRead(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const char * fmt,va_list args)2428 LIB_EXPORT int CC KDirectoryVOpenTarArchiveRead ( struct KDirectory const *self,
2429     struct KDirectory const **tar_dir, int chroot, const char *fmt, va_list args )
2430 {
2431     char path [ 4096 ];
2432     /*VDB-4386: cannot treat va_list as a pointer!*/
2433     int size = 0;
2434     if ( fmt != NULL ) /*(args == NULL) ? snprintf  ( path, sizeof path, "%s", fmt ) :*/
2435         size = vsnprintf ( path, sizeof path, fmt, args );
2436     if ( size < 0 || size >= ( int ) sizeof path )
2437         return RC ( rcFS, rcDirectory, rcOpening, rcPath, rcExcessive );
2438 
2439     /* putting off parameter validation into this call */
2440     return KDirectoryOpenArcDirRead ( self, tar_dir, false, path, tocKFile,
2441                                       KArcParseTAR, NULL, NULL );
2442 }
2443 
KDirectoryVOpenTarArchiveRead_silent(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const char * fmt,va_list args)2444 LIB_EXPORT int CC KDirectoryVOpenTarArchiveRead_silent ( struct KDirectory const *self,
2445     struct KDirectory const **tar_dir, int chroot, const char *fmt, va_list args )
2446 {
2447     char path [ 4096 ];
2448     /*VDB-4386: cannot treat va_list as a pointer!*/
2449     int size = 0;
2450     if ( fmt != NULL ) /*(args == NULL) ? snprintf  ( path, sizeof path, "%s", fmt ) :*/
2451         size = vsnprintf ( path, sizeof path, fmt, args );
2452     if ( size < 0 || size >= ( int ) sizeof path )
2453         return RC ( rcFS, rcDirectory, rcOpening, rcPath, rcExcessive );
2454 
2455     /* putting off parameter validation into this call */
2456     return KDirectoryOpenArcDirRead_silent ( self, tar_dir, false, path, tocKFile,
2457                                       KArcParseTAR_silent, NULL, NULL );
2458 }
2459 
2460 
KDirectoryVOpenTarArchiveRead_silent_preopened(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const struct KFile * f,const char * fmt,va_list args)2461 LIB_EXPORT int CC KDirectoryVOpenTarArchiveRead_silent_preopened ( struct KDirectory const *self,
2462     struct KDirectory const **tar_dir, int chroot, const struct KFile * f, const char *fmt, va_list args )
2463 {
2464     char path [ 4096 ];
2465     /*VDB-4386: cannot treat va_list as a pointer!*/
2466     int size = 0;
2467     if ( fmt != NULL ) /*(args == NULL) ? snprintf  ( path, sizeof path, "%s", fmt ) :*/
2468         size = vsnprintf ( path, sizeof path, fmt, args );
2469     if ( size < 0 || size >= ( int ) sizeof path )
2470         return RC ( rcFS, rcDirectory, rcOpening, rcPath, rcExcessive );
2471 
2472     /* putting off parameter validation into this call */
2473     return KDirectoryOpenArcDirRead_silent_preopened ( self, tar_dir, false, path, tocKFile,
2474                                       (void*)f, KArcParseTAR_silent, NULL, NULL );
2475 }
2476 
2477 
KDirectoryOpenTarArchiveRead(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const char * path,...)2478 LIB_EXPORT int CC KDirectoryOpenTarArchiveRead ( struct KDirectory const *self,
2479     struct KDirectory const **tar_dir, int chroot, const char *path, ... )
2480 {
2481     rc_t rc;
2482     va_list args;
2483 
2484     va_start ( args, path );
2485     rc = KDirectoryVOpenTarArchiveRead ( self, tar_dir, chroot, path, args );
2486     va_end ( args );
2487 
2488     return rc;
2489 }
2490 
2491 
KDirectoryOpenTarArchiveRead_silent(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const char * path,...)2492 LIB_EXPORT int CC KDirectoryOpenTarArchiveRead_silent ( struct KDirectory const *self,
2493     struct KDirectory const **tar_dir, int chroot, const char *path, ... )
2494 {
2495     rc_t rc;
2496     va_list args;
2497 
2498     va_start ( args, path );
2499     rc = KDirectoryVOpenTarArchiveRead_silent ( self, tar_dir, chroot, path, args );
2500     va_end ( args );
2501 
2502     return rc;
2503 }
2504 
2505 
KDirectoryOpenTarArchiveRead_silent_preopened(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const KFile * f,const char * path,...)2506 LIB_EXPORT int CC KDirectoryOpenTarArchiveRead_silent_preopened ( struct KDirectory const *self,
2507     struct KDirectory const **tar_dir, int chroot, const KFile * f, const char *path, ... )
2508 {
2509     rc_t rc;
2510     va_list args;
2511 
2512     va_start ( args, path );
2513     rc = KDirectoryVOpenTarArchiveRead_silent_preopened ( self, tar_dir, chroot, f, path, args );
2514     va_end ( args );
2515 
2516     return rc;
2517 }
2518 
2519 
2520 /* ----------
2521  * Validate that the compiler packed all the character arrays into the correct sizes
2522  * to use the header.
2523  *
2524  * a bunch of magic numbers are in the function because we are making sure that
2525  * they are reached by using the structure members.  They are listed in comments
2526  * within comments in the tar_header.h file.
2527  *
2528  * No return value
2529  */
validate_header_offsets(void)2530 LIB_EXPORT bool CC validate_header_offsets( void )
2531 {
2532     bool pass_fail = true; /* assume pass */
2533 #if _DEBUGGING
2534     /* -----
2535      * checking the size entails making sure the compiler made the structure
2536      * the right length to match the series of octents in the file
2537      */
2538 #define check_size(T,V)     if( sizeof(T) != (size_t)V) { pass_fail = false; printf ("*** BAD_SIZE: %s is %u not %u\n", #T, (unsigned)sizeof(T), (unsigned)V);}
2539     /* -----
2540      * checking the offset entails making sure the compiler put the field at the
2541      * right absolute location within a structure
2542      */
2543 #define check_offset(T,M,V) if( OFFSET_OF(T,M) != (size_t)V ) { pass_fail = false; printf ("*** BAD_OFFSET: %s.%s is %u not %d\n", #T, #M, (unsigned)OFFSET_OF(T,M), V);}
2544 
2545     check_size(tar_raw_block,512);
2546     check_size(tar_v7_header,257);
2547     check_size(tar_posix_header,500);
2548     check_size(tar_sun_header,512);
2549     check_size(tar_star_85_header,512);
2550     check_size(tar_star_94_header,512);
2551     check_size(tar_gnu_89_header,495);
2552     /*check_size(tar_new_gnu_header,512); */
2553     check_size(tar_sparse_header,505);
2554 
2555 
2556     /* tar_header union */
2557     check_offset(tar_header,raw,0);
2558     check_offset(tar_header,tar,0);
2559     check_offset(tar_header,posix,0);
2560     check_offset(tar_header,suntar,0);
2561     check_offset(tar_header,star_85,0);
2562     check_offset(tar_header,star_94,0);
2563     check_offset(tar_header,gnu_89,0);
2564     /*check_offset(tar_header,new_gnu,0); */
2565     check_offset(tar_header,sparse,0);
2566 
2567     /* tar_v7_header */
2568     check_offset(tar_v7_header,name,0);
2569     check_offset(tar_v7_header,mode,100);
2570     check_offset(tar_v7_header,uid,108);
2571     check_offset(tar_v7_header,gid,116);
2572     check_offset(tar_v7_header,size,124);
2573     check_offset(tar_v7_header,mtime,136);
2574     check_offset(tar_v7_header,csum,148);
2575     check_offset(tar_v7_header,link,156);
2576     check_offset(tar_v7_header,linkname,157);
2577 
2578     /* tar_posix_header */
2579     check_offset(tar_posix_header,name,0);
2580     check_offset(tar_posix_header,mode,100);
2581     check_offset(tar_posix_header,uid,108);
2582     check_offset(tar_posix_header,gid,116);
2583     check_offset(tar_posix_header,size,124);
2584     check_offset(tar_posix_header,mtime,136);
2585     check_offset(tar_posix_header,csum,148);
2586     check_offset(tar_posix_header,link,156);
2587     check_offset(tar_posix_header,linkname,157);
2588     check_offset(tar_posix_header,magic,257);
2589     check_offset(tar_posix_header,version,263);
2590     check_offset(tar_posix_header,uname,265);
2591     check_offset(tar_posix_header,gname,297);
2592     check_offset(tar_posix_header,devmajor,329);
2593     check_offset(tar_posix_header,devminor,337);
2594     check_offset(tar_posix_header,prefix,345);
2595 
2596     /* tar_sun_header */
2597     check_offset(tar_sun_header,name,0);
2598     check_offset(tar_sun_header,mode,100);
2599     check_offset(tar_sun_header,uid,108);
2600     check_offset(tar_sun_header,gid,116);
2601     check_offset(tar_sun_header,size,124);
2602     check_offset(tar_sun_header,mtime,136);
2603     check_offset(tar_sun_header,csum,148);
2604     check_offset(tar_sun_header,link,156);
2605     check_offset(tar_sun_header,linkname,157);
2606     check_offset(tar_sun_header,magic,257);
2607     check_offset(tar_sun_header,version,263);
2608     check_offset(tar_sun_header,uname,265);
2609     check_offset(tar_sun_header,gname,297);
2610     check_offset(tar_sun_header,devmajor,329);
2611     check_offset(tar_sun_header,devminor,337);
2612     check_offset(tar_sun_header,prefix,345);
2613     check_offset(tar_sun_header,extnum,500);
2614     check_offset(tar_sun_header,extcount,501);
2615     check_offset(tar_sun_header,fullsize,502);
2616 
2617     /* tar_star_85_header */
2618     check_offset(tar_star_85_header,name,0);
2619     check_offset(tar_star_85_header,mode,100);
2620     check_offset(tar_star_85_header,uid,108);
2621     check_offset(tar_star_85_header,gid,116);
2622     check_offset(tar_star_85_header,size,124);
2623     check_offset(tar_star_85_header,mtime,136);
2624     check_offset(tar_star_85_header,csum,148);
2625     check_offset(tar_star_85_header,link,156);
2626     check_offset(tar_star_85_header,linkname,157);
2627     check_offset(tar_star_85_header,starversion,257);
2628     check_offset(tar_star_85_header,starfiletype,258);
2629     check_offset(tar_star_85_header,startype,266);
2630     check_offset(tar_star_85_header,rdev,278);
2631     check_offset(tar_star_85_header,atime,290);
2632     check_offset(tar_star_85_header,ctime,302);
2633     check_offset(tar_star_85_header,uname,314);
2634     check_offset(tar_star_85_header,gname,330);
2635     check_offset(tar_star_85_header,prefix,345);
2636     check_offset(tar_star_85_header,xmagic,508);
2637 
2638     /* tar_star_94_header */
2639     check_offset(tar_star_94_header,name,0);
2640     check_offset(tar_star_94_header,mode,100);
2641     check_offset(tar_star_94_header,uid,108);
2642     check_offset(tar_star_94_header,gid,116);
2643     check_offset(tar_star_94_header,size,124);
2644     check_offset(tar_star_94_header,mtime,136);
2645     check_offset(tar_star_94_header,csum,148);
2646     check_offset(tar_star_94_header,link,156);
2647     check_offset(tar_star_94_header,linkname,157);
2648     check_offset(tar_star_94_header,magic,257);
2649     check_offset(tar_star_94_header,version,263);
2650     check_offset(tar_star_94_header,uname,265);
2651     check_offset(tar_star_94_header,gname,297);
2652     check_offset(tar_star_94_header,devmajor,329);
2653     check_offset(tar_star_94_header,devminor,337);
2654     check_offset(tar_star_94_header,prefix,345);
2655     check_offset(tar_star_94_header,isextended,355);
2656     check_offset(tar_star_94_header,sparse,356);
2657     check_offset(tar_star_94_header,realsize,452);
2658     check_offset(tar_star_94_header,offset,464);
2659     check_offset(tar_star_94_header,atime,476);
2660     check_offset(tar_star_94_header,ctime,488);
2661     check_offset(tar_star_94_header,xmagic,508);
2662 
2663     /* tar_gnu_89_header */
2664     check_offset(tar_gnu_89_header,name,0);
2665     check_offset(tar_gnu_89_header,mode,100);
2666     check_offset(tar_gnu_89_header,uid,108);
2667     check_offset(tar_gnu_89_header,gid,116);
2668     check_offset(tar_gnu_89_header,size,124);
2669     check_offset(tar_gnu_89_header,mtime,136);
2670     check_offset(tar_gnu_89_header,csum,148);
2671     check_offset(tar_gnu_89_header,link,156);
2672     check_offset(tar_gnu_89_header,linkname,157);
2673     check_offset(tar_gnu_89_header,magic,257);
2674     check_offset(tar_gnu_89_header,uname,265);
2675     check_offset(tar_gnu_89_header,gname,297);
2676     check_offset(tar_gnu_89_header,atime,345);
2677     check_offset(tar_gnu_89_header,ctime,357);
2678     check_offset(tar_gnu_89_header,offset,369);
2679     check_offset(tar_gnu_89_header,longnames,381);
2680     check_offset(tar_gnu_89_header,sparse,386);
2681     check_offset(tar_gnu_89_header,isextended,482);
2682 
2683     /* tar_extended_header */
2684     check_offset(tar_sparse_header,sparse,0);
2685     check_offset(tar_sparse_header,isextended,504);
2686 #endif
2687     return pass_fail;
2688 }
2689 
2690 /* end of file */
2691