1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #define HANDLING_EXTENDED_HEADERS 0
27
28 #include <kfs/extern.h>
29 #include <klib/defs.h>
30 #include <klib/rc.h>
31 #include <kfs/file.h>
32 #include <kfs/mmap.h>
33 #include <kfs/arc.h>
34 #include <kfs/toc.h>
35 #include <kfs/tar.h>
36 #include <klib/log.h>
37 #include <klib/debug.h>
38
39 #include "toc-priv.h"
40 #include <os-native.h>
41 #include <sysalloc.h>
42 #include <strtol.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <stdio.h> /* temporary for development */
46
47 #include <limits.h>
48 #include <sys/types.h>
49
50 #ifdef _DEBUGGING
51 #define TAR_FUNC_ENTRY() DBGMSG (DBG_KFS, DBG_FLAG(DBG_KFS_TARENTRY), ("Enter: %s\n", __func__))
52 #define TAR_DEBUG(msg) DBGMSG (DBG_KFS, DBG_FLAG(DBG_KFS_TAR), msg)
53 #else
54 #define TAR_FUNC_ENTRY()
55 #define TAR_DEBUG(msg)
56 #endif
57
58 /* -----
59 * offset of is the count of bytes between the base of a structure and
60 * a particular member of that structure
61 */
62 #ifndef OFFSET_OF
63 #define OFFSET_OF(structure,member) ((size_t)((&(((structure*)0)->member))-(0)))
64 #endif
65
66 /* -----
67 * Hide any definition of sun that might have come from sun compilers
68 * or the like
69 */
70 #undef sun
71 #undef SUN
72
73
74 #define PASTE_2(a,b) a##b
75 #define PASTE_3(a,b,c) a##b##c
76 #define STRINGIFY(a) #a
77
78
79 /* ======================================================================
80 * return an ASCII string that describes a header type
81 *
82 * Needs to match tar_header_type_enum.
83 */
84 #if _DEBUGGING
get_bool_string(bool b)85 static const char * get_bool_string (bool b)
86 {
87 /* this curiosity comes from an embedded programmer who worried that
88 while false is generally considered 0b00000000, true is usually
89 considered 0b00000001 but sometimes 0b11111111, e.g MC68000. */
90 switch ( ( int ) b )
91 {
92 case true:
93 return "true";
94 case false:
95 return "false";
96 default:
97 return "not-false";
98 }
99 }
100 #endif
101
102
103 /* ======================================================================
104 * Header format structures
105 *
106 * Tar (tape archiver) started possibly as a unix utility in BSD rivalling
107 * the cpio from the AT&T System III. The exact derivation is unimportant
108 * and some names might be slightly misleading based on historical
109 * inaccuracies but successful implmentation is not dependant on such
110 * accuracy in historical trivia but rather on accuracy in technical details.
111 *
112 * Posix attempted to standardize the growingly divergent variants of tar
113 * but it has led to only slightly more standardized variants with vaguely
114 * compatible extensions.
115 *
116 * For the purposes of this implmentation we will refer to the known to the
117 * author versions of tar and include the tar like aspects of the posix pax
118 * replacement for tar.
119 *
120 * Cpio support could be added if desired with only some difficulty.
121 *
122 * Supported known variants for this implmentation will be:
123 * V7 - the oldest known common base defitions for a tar header
124 * block ( possibly from Unix V7?)
125 * POSIX - Posix.1-1988 initial restandardization of a header block
126 * This versio introduced the ustar name for a tar header
127 * and includes that term as a "magic" constant.
128 * PAX - Posix.1.2001 headers for pax a tar derivative that us a
129 * peaceful attempt to unify the tar and cpio formats apparently.
130 * It isn't different than POSIX in the ustar block but instead
131 * is an introduction of two new values of a link field in the
132 * tar header that defines what comes next (see headers defined
133 * below).
134 * SUN - an extension to the POSIX tar header format from SunOS 5
135 * STAR 85 - pre-POSIX extensions to tar from Heorg Schilling (ask him its
136 * the bestest most greatest tar until STAR94)
137 * STAR 94 - A redo of star based on the POSIX ustar tar header. A less
138 * broken than most implmentation of a POSIX/ustar tar header
139 * based tar. Schilling says its the only real implmentation of
140 * a ustar based tar but it isn't fully compliant by design.
141 * GNU 89 - a selected variant of tar from FSF/GNU that is a broken
142 * implementation of a POSIX/ustar header based tar.
143 * GNU 01 - A slight redo of the FSF/GNU tar format. There are actually
144 * evolving variants all of which are still somewhat broken
145 * implementations of a ustar based tar header format.
146 *
147 * Along with these variants of a tar/ustar semi-standard tar header there are
148 * other header blocks and other significant blocks that are also tracked in
149 * this implmentation of a tar reader.
150 * ZERO BLOCK - a block of 512 zero bytes that is supposed to be padding at
151 * the end of a tar file to meet some super blocksize. Based on
152 * where it fits in it would be found when looking for a header
153 * for the next file included in a tar archive.
154 * RAW - a convention to mean a header block of a type that has not been
155 * determined.
156 */
157 #define TYPES() \
158 type_(UNDEFINED) type_(ZERO_BLOCK) type_(CPIO) type_(V7) type_(POSIX) \
159 type_(SUN) type_(STAR_85) type_(STAR_94) type_(GNU_89) type_(SPARSE)
160
161
162 #define type_(e) PASTE_2(TAR_,e),
163
164 typedef enum tar_header_type
165 {
166 TYPES()
167 TAR_TYPE_COUNT
168 } tar_header_type;
169
170 #undef type_
171 #define type_(e) STRINGIFY(e),
get_type_string(tar_header_type t)172 static const char * get_type_string(tar_header_type t)
173 {
174 static const char * type_error = "Error";
175 static const char * type_strings[] =
176 {
177 TYPES()
178 };
179 if ((t < 0) || (t >= TAR_TYPE_COUNT))
180 return type_error;
181 return type_strings[t];
182 }
183 #undef type_
184 #undef TYPES
185
186
187 /* =============================================================================
188 * Tar headers are almost ASCII based but definitely byte/octet based so all
189 * elements are best defined as arrays of char and use casts to signed and unsigned
190 * where appropriate in interpretation..
191 *
192 * All Tar files or streams are divided into blocks of 512 bytes
193 * This is significant in the file data in that the last block
194 * of a file is supposed to be padded with NUL to fill out a block
195 * and then be followed by two blocks of all NUL bytes.
196 * headers are also 512 bytes with various but fairly consistent
197 * interpretations of what is where with in that block
198 *
199 * Most tar utilities further define super blocks consisting of a number of blocks
200 * typically 10 of them for a length of 5120 bytes. This is irrelevant for this
201 * implmentation. By definition a tar file ends with two "zero blocks" and enough
202 * more after that to fill one of these super blocks. We ignore all aspects of
203 * this.
204 */
205 #define TAR_BLOCK_SIZE (512)
206 typedef char tar_raw_block [TAR_BLOCK_SIZE];
207 #define BLOCKS_FOR_BYTES(byte_count) ((byte_count+TAR_BLOCK_SIZE-1)/TAR_BLOCK_SIZE)
208
209 /* --------------------------------------------------------------------------------
210 * lengths of various tar header fields
211 */
212
213 /* --------------------
214 * Tar file names are always 100 bytes long and include preceding
215 * path names. The utilities do not precluded paths that put the
216 * files outside of the "base" where the tar file was created.
217 *
218 * To handle tar files made by older tar utilities if the last
219 * character is '/' then the file should be assumed to be a directory.
220 *
221 * This 'type' is used for both the name of the object being archived
222 * and the link target if it is a hard or soft link.
223 */
224 #define TAR_NAME_LEN (100)
225 typedef char tar_file_name [TAR_NAME_LEN];
226
227 /* --------------------
228 * Tar mode strings are always 8 bytes long.
229 *
230 * 9 file access permissions bits and three execution mode bits.
231 *
232 * Zero '0' not NUL pre-fill unused bytes. a user permision of 0644 would be stored as
233 * "0000644" with a NUL terminator.
234 *
235 * The format is 7 octal ASCII bytes with only the last 4 being
236 * significant. That is the first three are always '0'. The 8th
237 * byte is NUL.
238 *
239 * Older utilities might put preceding ' ' characters or
240 * instead of pre-fill have post fill ' ' or NUL.
241 */
242 #define TAR_MODE_LEN (8)
243 typedef char tar_file_mode [TAR_MODE_LEN];
244 /* -----
245 * These are the bits if the mode is in binary (octal defines for ease of interpretation)
246 * they match the st_mode field from the stat()/fstat() struct stat.
247 *
248 * GNU tar puts the file type bits from the stat structure in the mode of the tar file.
249 * Other tar implmentations might as well though no where is this dktefined as required or
250 * recommended or even supported.
251 */
252 #define TAR_SUID_BIT (04000) /* set UID on execution */
253 #define TAR_GUID_BIT (02000) /* set GID on execution */
254 #define TAR_STICKY_BIT (01000) /* save text / sticky bit */
255 /* file permissions */
256 #define TAR_MODE_OREAD (00400) /* read by owner */
257 #define TAR_MODE_OWRITE (00200) /* write by owner */
258 #define TAR_MODE_OEXEC (00100) /* execute by owner */
259 #define TAR_MODE_GREAD (00040) /* read by group */
260 #define TAR_MODE_GWRITE (00020) /* write by group */
261 #define TAR_MODE_GEXEC (00010) /* execute by group */
262 #define TAR_MODE_WREAD (00004) /* read by other */
263 #define TAR_MODE_WWRITE (00002) /* write by other */
264 #define TAR_MODE_WEXEC (00001) /* execute by other */
265
266 /* -----
267 * These are the bits once converted into ASCII
268 * with in an ASCII byte these bits are actually usable so no conversion needed
269 * '0' = 0x30 / 060
270 * '1' = 0x31 / 061
271 * '2' = 0x32 / 062
272 * '3' = 0x33 / 063
273 * '4' = 0x34 / 064
274 * '5' = 0x35 / 065
275 * '6' = 0x36 / 066
276 * '7' = 0x37 / 067
277 */
278 #define TAR_MODE_READ (0x01)
279 #define TAR_MODE_WRITE (0x02)
280 #define TAR_MODE_EXEC (0x04)
281 #define TAR_MODE_STICKY (0x01)
282 #define TAR_MODE_GUID (0x02)
283 #define TAR_MODE_SUID (0x04)
284 #define TAR_MODE_OWNER_BYTE (6)
285 #define TAR_MODE_GROUP_BYTE (5)
286 #define TAR_MODE_WORLD_BYTE (4)
287 #define TAR_MODE_USER_BYTE (3)
288 #define TAR_MODE_EXEC_BYTE (2)
289
290 /* --------------------
291 * Tar user (and group) numeric IDs are put into 8 bytes.
292 *
293 * There is a '0' prefill and as terminating NUL.
294 *
295 * Older utilities might put preceding ' ' characters or
296 * instead of pre-fill have post fill ' ' or NUL.
297 */
298 #define TAR_ID_LEN (8)
299 typedef char tar_id [TAR_ID_LEN];
300
301 /* --------------------
302 * Tar file size elements are 12 bytes long with 11 used
303 * for octal characters making the maximum size of a file
304 * for pure classic or Posix tar limited to 8 GBytes.
305 * Various tar utilities handle longer files in different
306 * ways if at all.
307 *
308 * A length of 100 bytes would be stored as "00000000144".
309 *
310 * Links and some other special values are archived with a
311 * length of zero and thus no data blocks.
312 *
313 * There is a '0' prefill and as terminating NUL.
314 *
315 * Older utilities might put preceding ' ' characters or
316 * instead of pre-fill have post fill ' ' or NUL.
317 *
318 * GNU tar implmentations use alternative intrpretatopms of
319 * this and possibly other fields using mime base 64 or
320 * base 256 (big endian nonstandard sized binary)
321 */
322 #define TAR_SIZE_LEN (12)
323 typedef char tar_size [TAR_SIZE_LEN];
324 #define MAX_TAR_FILE_SIZE (077777777777)
325
326 /* --------------------
327 * Tar file modification/access/creation times are 12 bytes
328 * long. This holds 11 octal ASCII digits representing the
329 * number of seconds since 01/01/1970 00:00 UTC.
330 *
331 * There is a '0' prefill and as terminating NUL.
332 *
333 * Older utilities might put preceding ' ' characters or
334 * instead of pre-fill have post fill ' ' or NUL.
335 */
336 #define TAR_TIME_LEN (12)
337 typedef char tar_time [TAR_TIME_LEN];
338
339 /* --------------------
340 * Tar has a weak checksum protection of part of the tar header
341 * that is 8 bytes long and again uses 7 ASCII octal digits.
342 *
343 * There is a '0' prefill and as terminating NUL.
344 *
345 * Older utilities might put preceding ' ' characters or
346 * instead of pre-fill have post fill ' ' or NUL.
347 */
348 #define TAR_CSUM_LEN (8)
349 typedef char tar_csum [TAR_CSUM_LEN];
350 #define csum_blanks (" ")
351
352 /* --------------------
353 * The tar link is a single byte that expresses the type of file
354 * or special value represented by this entry.
355 */
356 typedef char tar_link;
357
358 #define LINKS() \
359 link_('\0',OLDNORMAL_FILE) \
360 link_('0',NORMAL_FILE) \
361 link_('1',HARD_LINK) \
362 link_('2',SYMBOLIC_LINK) \
363 link_('3',CHARACTER_SPECIAL) \
364 link_('4',BLOCK_SPECIAL) \
365 link_('5',DIRECTORY) \
366 link_('6',FIFO) \
367 link_('7',CONTIGUOUS_FILE) \
368 link_('A',SOLARIS_ACL) \
369 link_('D',GNU_DUMPDIR) \
370 link_('E',SOLARIS_ACL_FILE) \
371 link_('I',INODE_METADATA) \
372 link_('K',NEXT_LONG_LINK) \
373 link_('L',NEXT_LONG_NAME) \
374 link_('M',MULTI_VOLUME) \
375 link_('N',GNU1989_LONG_NAMES) \
376 link_('S',SPARSE) \
377 link_('V',VOLUME_NAME) \
378 link_('X',SUN_XHDR) \
379 link_('g',PAX_GLOBAL_XHDR) \
380 link_('x',PAX_XHDR)
381
382 #define link_(v,n) PASTE_2(LINK_,n) = v,
383
384 /* LINK_COUNT is a count not a mac value */
385 enum e_tar_link
386 {
387 LINKS()
388 LINK_COUNT
389 };
390 #undef link_
391
392 #if _DEBUGGING
393 #define link_(v,n) {STRINGIFY(n), v},
394 struct nv_pair
395 {
396 const char * name;
397 tar_link link;
398 };
get_link_string(tar_link l)399 static const char * get_link_string(tar_link l)
400 {
401 static const struct nv_pair pairs[] =
402 {
403 LINKS()
404 {NULL, 0}
405 };
406 unsigned int ix;
407
408 for (ix = 0; pairs[ix].name != NULL; ++ix)
409 if (pairs[ix].link == l)
410 return pairs[ix].name;
411 return "UNDEFINED";
412 }
413 #undef link_
414 #endif
415 #undef LINKS
416
417
418 /* --------------------
419 * The tar magic string is 6 bytes long.
420 */
421 #define TAR_MAGIC_LEN (6)
422 typedef char tar_magic [TAR_MAGIC_LEN];
423 #define POSIX_MAGIC_CONST "ustar" /* includes terminating NUL */
424
425 /* --------------------
426 * The tar version string is two bytes long and uses both bytes
427 */
428 #define TAR_VERSION_LEN (2)
429 typedef char tar_version [TAR_VERSION_LEN];
430 #define POSIX_VERSION_CONST "00" /* does not include terminating NUL */
431
432 /* --------------------
433 * The Posix strong user/group name is 32 bytes long
434 */
435 #define TAR_STRNAME_LEN (32)
436 typedef char tar_strname [TAR_STRNAME_LEN];
437
438 /* --------------------
439 * The dev? strings are 8 bytes long
440 */
441 #define TAR_DEV_LEN (8)
442 typedef char tar_dev [TAR_DEV_LEN];
443
444 /* --------------------
445 * Posix prefix is 155 bytes that can be put before the name to give a path of
446 * 255 bytes instad of the smaller limit of 99.
447 */
448 #define TAR_PREFIX_LEN (155)
449 typedef char tar_prefix [TAR_PREFIX_LEN];
450
451 /*
452 * Sun extensions
453 */
454
455 /* --------------------
456 */
457 typedef char sun_extnum;
458 #define SUN_FULLSIZE_LEN (10)
459 typedef char sun_fullsize [SUN_FULLSIZE_LEN];
460
461 /* --------------------
462 * star extensions
463 *
464 * star85 is old star from 1985
465 */
466 typedef char star85_version;
467 #define STAR85_FILETYPE_LEN (8)
468 typedef char star85_filetype [STAR85_FILETYPE_LEN];
469 #define STAR85_TYPE_LEN (12)
470 typedef char star85_type [STAR85_TYPE_LEN];
471 #define STAR85_RDEV_LEN (12)
472 typedef char star85_rdev [STAR85_RDEV_LEN];
473 /* ignoring the 11 byte rdev with minor bits */
474 #define STAR85_UNAME_LEN (16)
475 typedef char star85_uname [STAR85_UNAME_LEN];
476 #define STAR85_GNAME_LEN (15)
477 typedef char star85_gname [STAR85_GNAME_LEN];
478 #define STAR_XMAGIC_LEN (4)
479 typedef char star_magic [STAR_XMAGIC_LEN];
480 #define STAR_MAGIC_CONST ("tar")
481 #define NSTAR_PREFIX_LEN (1)
482 typedef char nstar_prefix [NSTAR_PREFIX_LEN];
483
484 /* --------------------
485 * gnu extensions
486 */
487 #define GNU89_MAGIC_LEN (8)
488 typedef char gnu89_magic [GNU89_MAGIC_LEN];
489 #define GNU_89_MAGIC_CONST "ustar " /* includes terminating NUL */
490 #define GNU_89_GNUMAGIC_CONST "GNUtar " /* includes terminating NUL */
491 #define GNU89_LONGNAMES_LEN (4)
492 typedef char gnu89_longnames [GNU89_LONGNAMES_LEN];
493
494
495 /* --------------------
496 * shared between the feuding star and gnu tar
497 */
498 typedef char tar_isextended ;
499 typedef struct tar_sparse
500 {
501 tar_size offset;
502 tar_size num_bytes;
503 } tar_sparse;
504 #define GNU_SPARSES_IN_EXTRA_HEADER (16)
505 #define GNU_SPARSES_IN_OLD_HEADER (4)
506 #define GNU_SPARSES_IN_SPARSE_HEADER (21)
507 #define STAR_SPARSES_IN_HEADER (4)
508 #define STAR_SPARSES_IN_EXT_HEADER (21)
509
510
511 /* ----------------------------------------
512 * The various tar header formats
513 *
514 * Note that in all the tar, posix and pax formats the first 257 bytes are
515 * the same. In all posix ad almost compliant formats the first
516 * 345 bytes are the same (except GNU 89). It is abuse of the 155
517 * bytes of the prefix that make star and gnu truly not posix compliant
518 * Sun used the 12 bytes after the prefix so is still psox compliant.
519 */
520 typedef struct tar_v7_header
521 {
522 /* type member name and offset */
523 tar_file_name name; /* 0 */
524 tar_file_mode mode; /* 100 */
525 tar_id uid; /* 108 */
526 tar_id gid; /* 116 */
527 tar_size size; /* 124 */
528 tar_time mtime; /* 136 */
529 tar_csum csum; /* 148 */
530 tar_link link; /* 156 */
531 tar_file_name linkname; /* 157 */
532 /* end of header // 257 */
533 } tar_v7_header;
534
535 typedef struct tar_posix_header
536 {
537 /* type member name and offset */
538 tar_file_name name; /* 0 */
539 tar_file_mode mode; /* 100 */
540 tar_id uid; /* 108 */
541 tar_id gid; /* 116 */
542 tar_size size; /* 124 */
543 tar_time mtime; /* 136 */
544 tar_csum csum; /* 148 */
545 tar_link link; /* 156 */
546 tar_file_name linkname; /* 157 */
547 tar_magic magic; /* 257 */
548 tar_version version; /* 263 */
549 tar_strname uname; /* 265 */
550 tar_strname gname; /* 297 */
551 tar_dev devmajor; /* 329 */
552 tar_dev devminor; /* 337 */
553 tar_prefix prefix; /* 345 */
554 /* end of header // 500 */
555 } tar_posix_header, tar_pax_header;
556
557 typedef struct tar_sun_header
558 {
559 /* type member name and offset */
560 tar_file_name name; /* 0 */
561 tar_file_mode mode; /* 100 */
562 tar_id uid; /* 108 */
563 tar_id gid; /* 116 */
564 tar_size size; /* 124 */
565 tar_time mtime; /* 136 */
566 tar_csum csum; /* 148 */
567 tar_link link; /* 156 */
568 tar_file_name linkname; /* 157 */
569 tar_magic magic; /* 257 */
570 tar_version version; /* 263 */
571 tar_strname uname; /* 265 */
572 tar_strname gname; /* 297 */
573 tar_dev devmajor; /* 329 */
574 tar_dev devminor; /* 337 */
575 tar_prefix prefix; /* 345 */
576 sun_extnum extnum; /* 500 non-conformant */
577 sun_extnum extcount; /* 501 non-conformant */
578 sun_fullsize fullsize; /* 502 non-conformant */
579 /* end of header // 512 non-conformant */
580 } tar_sun_header;
581
582 typedef struct tar_star_85_header
583 {
584 /* type member name and offset */
585 tar_file_name name; /* 0 */
586 tar_file_mode mode; /* 100 */
587 tar_id uid; /* 108 */
588 tar_id gid; /* 116 */
589 tar_size size; /* 124 */
590 tar_time mtime; /* 136 */
591 tar_csum csum; /* 148 */
592 tar_link link; /* 156 */
593 tar_file_name linkname; /* 157 */
594 star85_version starversion; /* 257 non-conformant */
595 star85_filetype starfiletype; /* 258 internal type of file? non-conformant */
596 star85_type startype; /* 266 type of file (UNIX)? non-conformant */
597 star85_rdev rdev; /* 278 non-conformant */
598 tar_time atime; /* 290 non-conformant */
599 tar_time ctime; /* 302 non-conformant */
600 star85_uname uname; /* 314 non-conformant */
601 star85_gname gname; /* 330 non-conformant */
602 tar_prefix prefix; /* 345 non-conformant */
603 char ___fill0[8]; /* 500 non-conformant */
604 star_magic xmagic; /* 508 non-conformant */
605 /* end of header // 512 non-conformant */
606 } tar_star_85_header;
607 typedef struct tar_star_94_header
608 {
609 /* type member name and offset */
610 tar_file_name name; /* 0 */
611 tar_file_mode mode; /* 100 */
612 tar_id uid; /* 108 */
613 tar_id gid; /* 116 */
614 tar_size size; /* 124 */
615 tar_time mtime; /* 136 */
616 tar_csum csum; /* 148 */
617 tar_link link; /* 156 */
618 tar_file_name linkname; /* 157 */
619 tar_magic magic; /* 257 */
620 tar_version version; /* 263 */
621 tar_strname uname; /* 265 */
622 tar_strname gname; /* 297 */
623 tar_dev devmajor; /* 329 */
624 tar_dev devminor; /* 337 */
625 nstar_prefix prefix; /* 345 */
626 char ___fill0; /* 346 */
627 char ___fill1[8]; /* 347 */
628 tar_isextended isextended; /* 355 non-conformant */
629 tar_sparse sparse[STAR_SPARSES_IN_HEADER]; /* 356 non-conformant */
630 tar_size realsize; /* 452 non-conformant */
631 tar_size offset; /* 464 non-conformant */
632 tar_time atime; /* 476 non-conformant */
633 tar_time ctime; /* 488 non-conformant */
634 char ___fill2[8]; /* 500 */
635 star_magic xmagic; /* 508 non-conformant */
636 /* end of header // 512 */
637 } tar_star_94_header;
638
639 typedef struct tar_gnu_89_header
640 {
641 /* type member name and offset */
642 tar_file_name name; /* 0 */
643 tar_file_mode mode; /* 100 */
644 tar_id uid; /* 108 */
645 tar_id gid; /* 116 */
646 tar_size size; /* 124 */
647 tar_time mtime; /* 136 */
648 tar_csum csum; /* 148 */
649 tar_link link; /* 156 */
650 tar_file_name linkname; /* 157 */
651 gnu89_magic magic; /* 257 non-conformant */
652 tar_strname uname; /* 265 */
653 tar_strname gname; /* 297 */
654 tar_dev devmajor; /* 329 */
655 tar_dev devminor; /* 337 */
656 tar_time atime; /* 345 non-conformant */
657 tar_time ctime; /* 357 non-conformant */
658 tar_size offset; /* 369 non-conformant */
659 gnu89_longnames longnames; /* 381 non-conformant */
660 char ___fill0[1]; /* 385 */
661 tar_sparse sparse [GNU_SPARSES_IN_OLD_HEADER];/* 386 optional sparse */
662 tar_isextended isextended; /* 482 non-conformant */
663 tar_size realsize; /* 483 non-conformant */
664 /* end of header // 495 */
665 } tar_gnu_89_header;
666
667 typedef struct tar_gnu_99_header
668 {
669 /* type member name and offset */
670 tar_file_name name; /* 0 */
671 tar_file_mode mode; /* 100 */
672 tar_id uid; /* 108 */
673 tar_id gid; /* 116 */
674 tar_size size; /* 124 */
675 tar_time mtime; /* 136 */
676 tar_csum csum; /* 148 */
677 tar_link link; /* 156 */
678 tar_file_name linkname; /* 157 */
679 gnu89_magic magic; /* 257 non-conformant */
680 tar_strname uname; /* 265 */
681 tar_strname gname; /* 297 */
682 tar_dev devmajor; /* 329 */
683 tar_dev devminor; /* 337 */
684 tar_time atime; /* 345 non-conformant */
685 tar_time ctime; /* 357 non-conformant */
686 tar_size offset; /* 369 non-conformant */
687 gnu89_longnames longnames; /* 381 non-conformant */
688 char ___fill0[1]; /* 385 */
689 tar_sparse sparse [GNU_SPARSES_IN_OLD_HEADER];/* 386 non-conformant optional sparse */
690 tar_isextended isextended; /* 482 non-conformant */
691 tar_size realsize; /* 483 non-conformant */
692 /* end of header // 495 */
693 } tar_gnu_99_header;
694
695 typedef struct tar_sparse_header
696 {
697 /* type member name and offset */
698 tar_sparse sparse [STAR_SPARSES_IN_EXT_HEADER];/* 0 */
699 tar_isextended isextended; /* 504 */
700 } tar_sparse_header; /* 505 end of header */
701
702 typedef struct tar_cpio_header
703 {
704 /* type member name and offset */
705 char magic [6]; /* 0 must be "070707" */
706 #define TAR_CPIO_MAGIC_CONST ("070707")
707 char dev [6]; /* 6 (dev,ino) is unique for each file in archive */
708 char ino [6]; /* 12 see dev */
709 char mode [6]; /* 18 */
710 char uid [6]; /* 24 */
711 char gif [6]; /* 30 */
712 char nlink [6]; /* 36 */
713 char rdev [6]; /* 42 */
714 char mtime [11]; /* 48 */
715 char namesize [6]; /* 59 */
716 char filesize [6]; /* 65 */
717 char buff [1]; /* 71 name and file data */
718 } tar_cpio_header;
719
720 typedef union tar_header
721 {
722 /* ----------
723 * The TAR header is to be zero filled by definition but some tar
724 * programs use ' ' instead.
725 *
726 * accept zeros ('0'), spaces (' ') or NULs (0x00) as equivalent where reasonable
727 *
728 * This name refers to the entire header as a single undifferentiated
729 * sequence of bytes.
730 */
731 tar_raw_block raw; /* tar block as an array of bytes: used for 0 blocks here */
732 tar_cpio_header cpio; /* cpio not really tar */
733 tar_v7_header tar; /* classic header back to the beginnings of memory */
734 tar_posix_header posix; /* ustar or Posix 1003.1 header */
735 tar_sun_header suntar; /* Sun Microsystems tar header */
736 tar_star_85_header star_85; /* star header from 1985 (pre-Posix) */
737 tar_star_94_header star_94; /* star header from 1994 (post-Posix) */
738 tar_gnu_89_header gnu_89; /* gnu header from circa 1989 (post-posix but broken) */
739 tar_gnu_99_header gnu_99; /* gnu header from ???? */
740 tar_sparse_header sparse; /* star / gnu extended sparses header */
741 } tar_header;
742
743
744 /* ======================================================================
745 * local module-wide function like macros
746 */
747 /* -----
748 * offset of is the count of bytes between the base of a structure and
749 * a particular member of that structure
750 */
751 #define OFFSET_OF(structure,member) ((size_t)((&(((structure*)0)->member))-(0)))
752
753
754 /* ======================================================================
755 * compilation unit local functions
756 */
757
758 /* ======================================================================
759 * return the RFC 2045 base 64 value for a byte character
760 * -1 for any out of range
761 *
762 * base 64 uses A-Z as 0-25, a-z as 26-51, 0-9 as 52-61, + as 62 and / as 63
763 */
decode_base64char(uint8_t byte)764 static int64_t decode_base64char (uint8_t byte)
765 {
766 /*
767 * trade off of space for table versus time to upsize the return
768 * and reupsize where it is used
769 */
770 static const int8_t table[] =
771 {
772 /*
773 * 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
774 */
775 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 00 */
776 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 10 */
777 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* 20 */
778 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 30 */
779 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* 40 */
780 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* 50 */
781 -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* 60 */
782 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* 70 */
783 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 80 */
784 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 90 */
785 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* A0 */
786 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* B0 */
787 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* C0 */
788 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* D0 */
789 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* E0 */
790 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 /* F0 */
791 };
792 return table[byte];
793 }
794
795
796 /* ======================================================================
797 * return the RFC 2045 base 64 value of a string at <str> of length <len>
798 *
799 * Interpreted the specification to mean that you simply ignore any out
800 * of range characters. They are not treated as bytes of 0 as that would
801 * mean a shift of earlier bytes.
802 *
803 * Each character is 6 bits of the final value.
804 */
decode_base64string(const uint8_t * str,size_t len)805 static int64_t decode_base64string (const uint8_t* str, size_t len)
806 {
807 int64_t result = 0;
808 int64_t temp = 0;
809 size_t ix;
810
811 for (ix = 0; ix < len; ++ix)
812 {
813 if ((temp = decode_base64char(str[ix])) >= 0)
814 {
815 result <<= 6; /* result *= 64 */
816 result += temp;
817 }
818 }
819 return result;
820 }
821
822
823 /* ======================================================================
824 * tar_strtoll
825 *
826 * This function will convert a string in a tar header into a 64 signed
827 * integer.
828 *
829 * The original tar header used just octal numbers in ascii in fixed length
830 * fields. As normal in the computer software world these "obviously plenty
831 * big" fields became way too small.
832 *
833 * GNU tar invented two approaches to make these numeric fields "bigger" but
834 * only prolonged the pain by squeezing the new numbers into the same fields.
835 *
836 * In the tar v7 and ustar based tar headers the fields are:
837 * Name Size Octal Range Interpretation
838 * mode 8 0-2097151 bit flags
839 * uid 8 0-2097151
840 * gid 8 0-2097151
841 * size 12 0-8589934591 up to 8 Giga-Byte files
842 * mtime 12 0-8589934591 1970/01/01 00:00:00 GMT-2242/03/16 12:56:31 GMT
843 *
844 * atime 12 0-8589934591 1970/01/01 00:00:00 GMT-2242/03/16 12:56:31 GMT
845 * ctime 12 0-8589934591 1970/01/01 00:00:00 GMT-2242/03/16 12:56:31 GMT
846 *
847 * GNU's first and already obsolete approach was to put Mime base-64 numbers
848 * With these the first byte is '+' or '-' to signal it isn't octal ASCII.
849 *
850 * GNU's second approach is base-256 which is a big endian binary string of lengths other
851 * than the 1, 2, 4 or 8 bytes of the standard integer types. In this approach the first
852 * byte is either 0x80 for a positive number or 0xFF for a negative number.
853 */
854
tar_strtoll(const uint8_t * str,size_t len,bool silent)855 static int64_t tar_strtoll ( const uint8_t * str, size_t len, bool silent )
856 {
857 int64_t result = 0;
858 bool negative = false;
859 uint8_t temp_buff[24]; /* long enough to hold all octal bytes for 64 bit numbers */
860
861 /* -----
862 * force a NUL in case the source doesn't have one; this is for
863 * strtoll() on ASCII Octal
864 */
865 if ( len >= sizeof temp_buff )
866 len = sizeof temp_buff - 1;
867 memmove (temp_buff, str, len);
868 temp_buff[ len ] = 0x00;
869
870 /* -----
871 * Most are going to be simple ASCII octal using '0'-'7' with NUL terminator
872 * leading 0 is not required but of course is accepted to match tar
873 * header specifications
874 */
875 if (((temp_buff[0] >= '0')&&(temp_buff[0] <= '7'))||(temp_buff[0] == ' '))
876 {
877 /* use stdlib strtoll - longest expected goes a few bits into the upper longword */
878 result = strtoi64((char*)temp_buff,NULL,8);
879 }
880 /* -----
881 * "base-256" well that is "binary" big endian of some length
882 *
883 * For fields longer than eight bytes upper bytes will shift out of
884 * significance into the bit bucket.
885 */
886 else if ((temp_buff[0] == 0x80)||(temp_buff[0] == 0xFF)) /* from GNU tar */
887 {
888 unsigned int ix;
889 negative = (bool)((temp_buff[0] == 0xFF) ? true : false);
890 result = temp_buff[0] & 0x7F; /* toss first flag bit */
891 for (ix = 1; ix < len; ++ix)
892 {
893 result <<= 8; /* result *= 256; */
894 result += temp_buff[ix];
895 }
896 if (negative)
897 result = -result;
898 }
899 /* -----
900 * "base-64" an already dumped idea from GNU tar
901 */
902 else if ((temp_buff[0] == '+')||(temp_buff[0] == '-')) /* from GNU tar */
903 {
904 int64_t temp;
905 negative = (bool)((temp_buff[0] == '-') ? true : false);
906 temp = decode_base64string(temp_buff+1,len-1);
907 /* potential overflow */
908 result = negative ? -temp : temp;
909 }
910
911 /* -----
912 * look for an empty field of all NUL
913 */
914 else if (temp_buff[0] == 0x00)
915 {
916 unsigned int ix;
917 for (ix = 1; ix < len; ++ix)
918 {
919 if (temp_buff[ix] != 0x00)
920 goto fail;
921 }
922 }
923 /* -----
924 * no idea what it is then
925 */
926 else
927 {
928 fail:
929 result = 0; /* as good a guess as any */
930 TAR_DEBUG (("%s: unknown integer storage type %c%c%c%c%c%c%c%c\n",
931 temp_buff[0],temp_buff[1],temp_buff[2],temp_buff[3],
932 temp_buff[4],temp_buff[5],temp_buff[6],temp_buff[7]));
933 if ( ! silent ) {
934 PLOGMSG (klogErr, (klogErr, "unknown integer storage type "
935 "$(B0)$(B1)$(B2)$(B3)$(B4)$(B5)$(B6)$(B7)",
936 "B0=%c,B1=%c,B2=%c,B3=%c,B4=%c,B5=%c,B6=%c,B7=%c",
937 temp_buff[0],temp_buff[1],temp_buff[2],temp_buff[3],
938 temp_buff[4],temp_buff[5],temp_buff[6],temp_buff[7]));
939 }
940 }
941 return result;
942 }
943
944 /* ======================================================================
945 * tar_header_type
946 * determine most probable tar header block type
947 */
what_header_type(const tar_header * header)948 static tar_header_type what_header_type(const tar_header* header)
949 {
950 /* -----
951 * we'll assume its bad until we find a better guess
952 */
953 tar_header_type type = TAR_UNDEFINED;
954
955 /* -----
956 * look for a cpio header though we aren't expecting to support it quite yet
957 */
958 if (strncmp(header->cpio.magic, TAR_CPIO_MAGIC_CONST, sizeof(header->cpio.magic)) == 0)
959 {
960 type = TAR_CPIO;
961 }
962 /* -----
963 * look for extended headers of some type
964 *
965 * look for posix based extensions as hopefully most likely
966 */
967 else if (strcmp(header->posix.magic, POSIX_MAGIC_CONST) == 0)
968 {
969 /* -----
970 * we have a post posix standard tar header but we aren't done yet
971 *
972 * First we look for star extensions to the header
973 */
974 if (strcmp(header->star_94.xmagic, STAR_MAGIC_CONST) == 0)
975 {
976
977 /* -----
978 * we have either an old or new star archive
979 */
980 if (strcmp(header->star_94.magic, POSIX_MAGIC_CONST) == 0)
981 {
982 /* -----
983 * we have a new star type archive
984 */
985 type = TAR_STAR_94;
986 }
987 else
988 {
989 /* -----
990 * we have a old star type archive
991 */
992 type = TAR_STAR_85;
993 }
994 }
995 /* -----
996 * Next we look for SunOS5 extensions to the header
997 */
998 else if (header->suntar.extnum &&
999 header->suntar.extcount &&
1000 header->suntar.fullsize[0])
1001 {
1002 type = TAR_SUN;
1003 }
1004 /* -----
1005 * Else it seems to be the generic Posix tar header
1006 */
1007 else
1008 {
1009 type = TAR_POSIX;
1010 }
1011 }
1012 /* -----
1013 * Not a proper magic for a POSIX ustar header so look for the improper magic of GNU's tar
1014 */
1015 else if (strcmp(header->star_94.magic, GNU_89_MAGIC_CONST) == 0)
1016 {
1017 /* we have an old GNU not really posix compliant archive type */
1018 type = TAR_GNU_89;
1019 }
1020 /* -----
1021 * If there is anything else there we don't know what to do with it
1022 */
1023 else if (header->posix.magic[0] != 0)
1024 {
1025 /* -----
1026 * Log an anomaly showing what we found in the magic field
1027 */
1028 char temp_str [9];
1029 memset(temp_str,0,sizeof(temp_str));
1030 string_copy(temp_str, sizeof(temp_str), header->posix.magic, 8);
1031 TAR_DEBUG(("%s: unknown header type magic [%s]\n",
1032 __func__, temp_str));
1033 type = TAR_UNDEFINED;
1034 }
1035 /* -----
1036 * not knowing what we have lets see if it is a block of all zeroes
1037 * knowing we can ignore it
1038 */
1039 else
1040 {
1041 /* -----
1042 * If this were a legitimate V7 (well just old style)
1043 * tar header the first character would be non-NUL
1044 * and the link would be on eof the old types
1045 */
1046 if ((header->raw[0]>= ' ')&&(header->raw[0] <= '~'))
1047 {
1048 switch (header->tar.link)
1049 {
1050 case LINK_OLDNORMAL_FILE:
1051 case LINK_NORMAL_FILE:
1052 case LINK_HARD_LINK:
1053 case LINK_SYMBOLIC_LINK:
1054 case LINK_CHARACTER_SPECIAL:
1055 case LINK_BLOCK_SPECIAL:
1056 case LINK_DIRECTORY:
1057 case LINK_FIFO:
1058 case LINK_CONTIGUOUS_FILE:
1059 /* GNU TAR will do this to us */
1060 case LINK_NEXT_LONG_LINK:
1061 case LINK_NEXT_LONG_NAME:
1062 case LINK_GNU1989_LONG_NAMES:
1063 type = TAR_V7;
1064 break;
1065 default:
1066 break;
1067 }
1068 }
1069 else
1070 {
1071 /* -----
1072 * look for anything not zero
1073 */
1074 unsigned int ix;
1075
1076 for (ix = 0; ix < sizeof(tar_header); ++ix)
1077 {
1078 if (header->raw[ix])
1079 {
1080 /* -----
1081 * non-zero so quit looking
1082 */
1083 break;
1084 }
1085 }
1086 /* -----
1087 * if we got to the end we know they are all zero so say so
1088 */
1089 if (ix == sizeof(tar_header))
1090 {
1091 type = TAR_ZERO_BLOCK;
1092 }
1093 }
1094 }
1095 TAR_DEBUG (("%s: %s(%d)\n", __func__, get_type_string(type), type));
1096 return type;
1097 }
1098
1099
1100 typedef struct sparse_data
1101 {
1102 struct sparse_data* next;
1103 uint64_t offset;
1104 uint64_t size;
1105 } sparse_data;
1106 /* ======================================================================
1107 */
1108
sparse_data_make(sparse_data ** new_item,uint64_t offset,uint64_t size)1109 static rc_t sparse_data_make (sparse_data ** new_item, uint64_t offset, uint64_t size)
1110 {
1111 sparse_data * p;
1112
1113 p = malloc (sizeof (sparse_data));
1114 if (p != NULL)
1115 {
1116 p->offset = offset;
1117 p->size = size;
1118 p->next = NULL;
1119 *new_item = p;
1120 return 0;
1121 }
1122 return -1;
1123 }
1124
sparse_data_push(sparse_data ** q,uint64_t offset,uint64_t size)1125 static rc_t sparse_data_push(sparse_data ** q, uint64_t offset, uint64_t size)
1126 {
1127 if (q == NULL)
1128 return -1;
1129 if (*q == NULL)
1130 return sparse_data_make (q, offset, size);
1131 return sparse_data_push (&((*q)->next), offset, size);
1132 }
1133
sparse_data_pop(sparse_data ** q,sparse_data ** item)1134 static rc_t sparse_data_pop (sparse_data ** q, sparse_data **item)
1135 {
1136 if ((q == NULL) || (item == NULL))
1137 return -1;
1138
1139 *item = *q;
1140 *q = (*item)->next;
1141 (*item)->next = NULL;
1142 return 0;
1143 }
1144
sparse_data_kill(sparse_data ** q)1145 static rc_t sparse_data_kill (sparse_data ** q)
1146 {
1147 if (q == NULL)
1148 return -1;
1149 if (*q == NULL)
1150 return 0;
1151 if ((*q)->next != NULL)
1152 return sparse_data_kill(&(*q)->next);
1153
1154 free (*q);
1155 *q = NULL;
1156 return 0;
1157 }
1158
1159 /* ======================================================================
1160 * local module-wide variables (reduction in parameter pushing)
1161 *
1162 * Module shared variables; kinda like C++ class elements
1163 */
1164 typedef struct KTarState
1165 {
1166 /* TODO: optimize chunk/sparse lists by counting as pushed */
1167 const KFile * kfile; /* KFS reference to the specific file being parsed */
1168 const KMMap * kmmap; /* KFS memory mapping for a portion of that file */
1169 const void * map; /* where the tar file got put by mmap */
1170 sparse_data * sparse_q;
1171 KTocChunk * chunks; /* table of chunks: logical_position, source_position, size */
1172 KToc * toc; /* the Table of Contents we are building */
1173 size_t tar_length; /* how long is the tar file */
1174 size_t buffer_length; /* how long is the window into the buffer */
1175 uint64_t buffer_start; /* how far into the tar file is the buffer start */
1176 uint64_t buffer_limit; /* how far into the tar file is the buffer end */
1177 uint32_t num_chunks;
1178 /* -----
1179 * zero blocks are only supposed to pad out the end of a tar file to
1180 * match a larger block size. If we find something after a zero block
1181 * it is a file error of some type
1182 */
1183 bool found_zero_block;
1184 bool found_second_zero_block;
1185 } KTarState;
1186
make_chunk_list(KTarState * self,uint64_t file_offset)1187 static rc_t make_chunk_list(KTarState * self, uint64_t file_offset)
1188 {
1189 uint64_t count = self->num_chunks;
1190 uint64_t source_position = file_offset;
1191 uint64_t ix;
1192 sparse_data * psd;
1193 rc_t ret;
1194
1195 if (self->chunks)
1196 free (self->chunks); /* shouldn't happen */
1197 self->chunks = malloc( (size_t)( count * sizeof(KTocChunk) ) );
1198 if (self->chunks == NULL)
1199 return -1;
1200
1201
1202 for (ix = 0; ix < count; ++ix)
1203 {
1204 ret = sparse_data_pop (&self->sparse_q, &psd);
1205 if (ret != 0)
1206 return ret;
1207 self->chunks[ix].logical_position = psd->offset;
1208 self->chunks[ix].source_position = source_position;
1209 self->chunks[ix].size = psd->size;
1210 source_position += psd->size;
1211 sparse_data_kill(&psd);
1212 }
1213 return 0;
1214 }
1215
whack_chunk_list(KTarState * self)1216 static void whack_chunk_list(KTarState * self)
1217 {
1218 free (self->chunks);
1219 self->chunks = NULL;
1220 self->num_chunks = 0;
1221 }
1222
1223 #if HANDLING_EXTENDED_HEADERS
1224 /* ======================================================================
1225 * mini class for handling pax/posix/ustar
1226 * extended headers and global extended headers
1227 *
1228 * Many of the values are included to get past range limits imposed by the ustar format
1229 * particular for string length and charcter set or shortish integral values. <ekyword>
1230 * and <value> below are UTF-8.
1231 *
1232 * values in the header are string values written as with a 'printf' using the form
1233 * printf("%d %s=%s\n",<length>,<keyword>,<value>)
1234 *
1235 * <length> is described ambiguously as
1236 * "The <length> field shall be the decimal length of the extended header record in octets,
1237 * including the trailing <newline>."
1238 * So does that include the length of <length>? or not?
1239 *
1240 * The field <keyword> is allowed in a pax Extended Header include but are not limited to
1241 * atime time_t but with fractional seconds maybe
1242 * charset enumeration list
1243 * comment human readable comments
1244 * gid integer gid allowing numbers greater than ustar limit of 2097151 (07777777)
1245 * gname over rides xhdr gid and ustar gname and gid
1246 * linkpath
1247 * mtime time_t but with franctional seconds maybe
1248 * path
1249 * realtime.<ANY>
1250 * security.<ANY>
1251 * size
1252 * uid integer uid allowing numbers greater than ustar limit of 2097151 (07777777)
1253 * uname
1254 * Any other keywords desired can be included but might not be meaningful to many applications.
1255 * Keyword can have pretty much any character in it except '='.
1256 *
1257 * <value> is a UTF-8 string that ends with the '\n'.
1258 *
1259 *
1260 * charset is limited to (omit the quotation marks)
1261 * <value> Formal Standard
1262 * "ISO-IR 646 1990" ISO/IEC 646:1990
1263 * "ISO-IR 8859 1 1998" ISO/IEC 8859-1:1998
1264 * "ISO-IR 8859 2 1999" ISO/IEC 8859-2:1999
1265 * "ISO-IR 8859 3 1999" ISO/IEC 8859-3:1999
1266 * "ISO-IR 8859 4 1998" ISO/IEC 8859-4:1998
1267 * "ISO-IR 8859 5 1999" ISO/IEC 8859-5:1999
1268 * "ISO-IR 8859 6 1999" ISO/IEC 8859-6:1999
1269 * "ISO-IR 8859 7 1987" ISO/IEC 8859-7:1987
1270 * "ISO-IR 8859 8 1999" ISO/IEC 8859-8:1999
1271 * "ISO-IR 8859 9 1999" ISO/IEC 8859-9:1999
1272 * "ISO-IR 8859 10 1998" ISO/IEC 8859-10:1998
1273 * "ISO-IR 8859 13 1998" ISO/IEC 8859-13:1998
1274 * "ISO-IR 8859 14 1998" ISO/IEC 8859-14:1998
1275 * "ISO-IR 8859 15 1999" ISO/IEC 8859-15:1999
1276 * "ISO-IR 10646 2000" ISO/IEC 10646:2000
1277 * "ISO-IR 10646 2000 UTF-8" ISO/IEC 10646, UTF-8 encoding
1278 * "BINARY" None.
1279 */
1280 typedef enum pax_charset
1281 {
1282 PAX_CS_NOT_SPECIFIED,
1283 PAX_CS_ISO_IR_646_1990,
1284 PAX_CS_ISO_IR_8859_1_1998,
1285 PAX_CS_ISO_IR_8859_2_1999,
1286 PAX_CS_ISO_IR_8859_3_1999,
1287 PAX_CS_ISO_IR_8859_4_1998,
1288 PAX_CS_ISO_IR_8859_5_1999,
1289 PAX_CS_ISO_IR_8859_6_1999,
1290 PAX_CS_PAX_CS_ISO_IR_8859_7_1997,
1291 PAX_CS_ISO_IR_8859_8_1999,
1292 PAX_CS_ISO_IR_8859_9_1999,
1293 PAX_CS_ISO_IR_8859_10_1998,
1294 PAX_CS_ISO_IR_8859_13_1998,
1295 PAX_CS_ISO_IR_8859_14_1998,
1296 PAX_CS_ISO_IR_8859_15_1998,
1297 PAX_CS_ISO_IR_1064_2000,
1298 PAX_CS_ISO_IR_1064_2000_UTF_8,
1299 PAX_CS_BINARY
1300 } pax_charset;
1301
1302 static const char * pax_charset_strings[] =
1303 {
1304 "Not Specified",
1305 "ISO-IR 646 1990",
1306 "ISO-IR 8859 1 1998",
1307 "ISO-IR 8859 2 1999",
1308 "ISO-IR 8859 3 1999",
1309 "ISO-IR 8859 4 1998",
1310 "ISO-IR 8859 5 1999",
1311 "ISO-IR 8859 6 1999",
1312 "ISO-IR 8859 7 1987",
1313 "ISO-IR 8859 8 1999",
1314 "ISO-IR 8859 9 1999",
1315 "ISO-IR 8859 10 1998",
1316 "ISO-IR 8859 13 1998",
1317 "ISO-IR 8859 14 1998",
1318 "ISO-IR 8859 15 1999",
1319 "ISO-IR 10646 2000",
1320 "ISO-IR 10646 2000 UTF-8",
1321 "BINARY",
1322 NULL
1323 };
1324
pax_xhdr_parse_charset_string(char * string)1325 static pax_charset pax_xhdr_parse_charset_string(char*string)
1326 {
1327 int ix; /* index */
1328 const char * ps; /* pointer to string */
1329
1330 for (ps = pax_charset_strings[ix= 0]; ps; ps = pax_charset_strings[++ix])
1331 if (strcmp(ps,string) == 0)
1332 return ix;
1333 return PAX_CS_NOT_SPECIFIED; /* matched none so set it to not specified */
1334 }
1335
1336 typedef struct pax_xheader pax_xheader;
1337 struct pax_xheader
1338 {
1339 char * path; /* supercedes .posix.name */
1340 char * linkpath; /* supercedes .posix.linkname */
1341 char * uname; /* user name supercedes .posix.uname uid and .posix.uid */
1342 char * gname; /* group name supercedes .posix.gname gid and .posix.gid */
1343 time_t atime; /* supercedes .posix.atime */
1344 time_t mtime; /* supercedes .posix.mtime */
1345 uid_t uid; /* supercedes .posix.uid */
1346 uint32_t gid; /* supercedes .posix.gname */
1347 uint64_t size; /* supercedes .posix.size */
1348 pax_charset charset; /* not supported by us? */
1349 };
1350
pax_xhdr_create(void)1351 static pax_xheader * pax_xhdr_create (void)
1352 {
1353 pax_xheader * pxv = malloc (sizeof(pax_xheader));
1354 memset (pxv,0,sizeof(pax_xheader));
1355 return pxv;
1356 }
1357
pax_xhdr_delete(pax_xheader * self)1358 static void pax_xhdr_delete(pax_xheader * self)
1359 {
1360 if (self->path)
1361 free (self->path);
1362 if (self->linkpath)
1363 free (self->linkpath);
1364 if (self->uname)
1365 free (self->uname);
1366 if (self->gname)
1367 free (self->gname);
1368 free (self);
1369 }
1370
pax_xhdr_set_general_string(char ** str,const char * val)1371 static rc_t pax_xhdr_set_general_string (char ** str, const char * val)
1372 {
1373 size_t len; /* how much memory */
1374 char * nl; /* point to any new line in source */
1375 rc_t rc = 0; /* return code; assume success */
1376
1377 if (*str) /* if already set, free the old value */
1378 free (*str);
1379 nl = strchr (val, '\n');
1380 if (nl)
1381 {
1382 len = nl - val + 1; /* +1 for NUL */
1383 }
1384 else
1385 {
1386 size_t size;
1387 len = string_measure(val, &size) + 1;
1388 }
1389 *str = malloc (len);
1390 if (*str)
1391 {
1392 string_copy(*str, len, val, len-1);
1393 (*str)[len-1] = '\0'; /* if it was a '\n' terminated value this is needed not worth the check if needed */
1394 }
1395 else
1396 {
1397 rc = RC(rcFS/*?*/,rcAllocating,0/*?*/,rcNoObj/*?*/,rcNull); /* well its not 0 */
1398 }
1399 return rc;
1400 }
1401
1402 /* ----------
1403 * new_path points to a string that ends with either a '\n' or a NUL
1404 */
pax_xhdr_set_path(pax_xheader * self,char * new_path)1405 LIB_EXPORT rc_t CC pax_xhdr_set_path(pax_xheader * self, char * new_path)
1406 {
1407 return pax_xhdr_set_general_string(&(self->path),new_path);
1408 }
1409
1410 /* ----------
1411 * new_path points to a string that ends with either a '\n' or a NUL
1412 */
pax_xhdr_set_linkpath(pax_xheader * self,char * new_path)1413 LIB_EXPORT rc_t CC pax_xhdr_set_linkpath(pax_xheader * self, char * new_path)
1414 {
1415 return pax_xhdr_set_general_string(&(self->linkpath),new_path);
1416 }
1417
1418 /* ----------
1419 * new_name points to a string that ends with either a '\n' or a NUL
1420 */
pax_xhdr_set_uname(pax_xheader * self,char * new_name)1421 LIB_EXPORT rc_t CC pax_xhdr_set_uname(pax_xheader * self, char * new_name)
1422 {
1423 return pax_xhdr_set_general_string(&(self->uname),new_name);
1424 }
1425
1426 /* ----------
1427 * new_name points to a string that ends with either a '\n' or a NUL
1428 */
pax_xhdr_set_gname(pax_xheader * self,char * new_name)1429 LIB_EXPORT rc_t CC pax_xhdr_set_gname(pax_xheader * self, char * new_name)
1430 {
1431 return pax_xhdr_set_general_string(&(self->gname),new_name);
1432 }
1433
pax_xhdr_get_general_string(char ** src,char ** dst,size_t max)1434 LIB_EXPORT bool CC pax_xhdr_get_general_string (char**src, char**dst, size_t max)
1435 {
1436 size_t size;
1437 if (string_measure(*src, &size) > max-1) /* fail if too big for target */
1438 {
1439 return false;
1440 }
1441 string_copy(*dst, max, *src, size);
1442 return true;
1443 }
1444
pax_xhdr_get_path(pax_xheader * self,char ** path,size_t max)1445 LIB_EXPORT rc_t CC pax_xhdr_get_path(pax_xheader * self, char ** path, size_t max)
1446 {
1447 return (pax_xhdr_get_general_string(&self->path,path,max))? 0 : ~0;
1448 }
1449
pax_xhdr_get_linkpath(pax_xheader * self,char ** path,size_t max)1450 LIB_EXPORT rc_t CC pax_xhdr_get_linkpath(pax_xheader * self, char ** path, size_t max)
1451 {
1452 return (pax_xhdr_get_general_string(&self->linkpath,path,max))? 0 : ~0;
1453 }
1454
pax_xhdr_get_uname(pax_xheader * self,char ** name,size_t max)1455 LIB_EXPORT rc_t CC pax_xhdr_get_uname(pax_xheader * self, char ** name, size_t max)
1456 {
1457 return (pax_xhdr_get_general_string(&self->uname,name,max))? 0 : ~0;
1458 }
1459
pax_xhdr_get_gname(pax_xheader * self,char ** name,size_t max)1460 LIB_EXPORT rc_t CC pax_xhdr_get_gname(pax_xheader * self, char ** name, size_t max)
1461 {
1462 return (pax_xhdr_get_general_string(&self->gname,name,max))? 0 : ~0;
1463 }
1464
1465 /* ----------
1466 * The mtime and atime <value> is defined as
1467 * The pax utility shall write an mtime record for each file in write
1468 * or copy modes if the file's modification time cannot be represented
1469 * exactly in the ustar header logical record described in ustar
1470 * Interchange Format. This can occur if the time is out of ustar range,
1471 * or if the file system of the underlying implementation supports
1472 * non-integer time granularities and the time is not an integer. All of
1473 * these time records shall be formatted as a decimal representation of
1474 * the time in seconds since the Epoch. If a period ( '.' ) decimal
1475 * point character is present, the digits to the right of the point shall
1476 * represent the units of a subsecond timing granularity, where the first
1477 * digit is tenths of a second and each subsequent digit is a tenth of
1478 * the previous digit. In read or copy mode, the pax utility shall
1479 * truncate the time of a file to the greatest value that is not greater
1480 * than the input header file time. In write or copy mode, the pax
1481 * utility shall output a time exactly if it can be represented exactly
1482 * as a decimal number, and otherwise shall generate only enough digits
1483 * so that the same time shall be recovered if the file is extracted on a
1484 * system whose underlying implementation supports the same time
1485 * granularity.
1486 *
1487 * So...
1488 * Looks like for our purposes we ignore anything after a possible decimal
1489 * point and just use the integer part of whatever is there and just not be
1490 * to worried about it since we aren't creating any of this, just reading ie.
1491 */
pax_xhdr_set_general_time(const char * ts,time_t * tt)1492 static rc_t pax_xhdr_set_general_time (const char * ts, time_t * tt)
1493 {
1494 uint64_t temp = strtou64(ts,NULL,10); /* tosses decimal part for us */
1495
1496 if (temp)
1497 *tt = (time_t)temp;
1498 return temp ? 0 : ~0;
1499 }
1500
1501 /* ======================================================================
1502 * This is the data accumulated for each entry in a tar file.
1503 */
1504 static struct tar_entry_data
1505 {
1506 /* -----
1507 * We are not using linux (or other O/S specific) type here because the ranges
1508 * for the system creating the archive might have larger types than the system
1509 * we are running on. We'll leave it to the outside caller of this program to
1510 * "make it fit".
1511 */
1512
1513
1514 entry_type type;
1515
1516 char * path;
1517 char * link;
1518
1519 uint64_t size;
1520 uint64_t offset;
1521
1522 mode_t mode;
1523
1524 char * uname;
1525 uid_t uid;
1526
1527 char * gname;
1528 uint32_t gid;
1529
1530 time_t mtime;
1531 time_t atime;
1532 time_t ctime;
1533
1534 } tar_entry_data;
1535 #endif
1536 /* ======================================================================
1537 * Mapping a memory region
1538 *
1539 * This will have two versions:
1540 * initially for development it will make raw Linux O/S calls
1541 * quickly it will be ported to use KFS structures instead
1542 */
1543 /* -----
1544 * Use a map window size of a gigabyte
1545 * Life would be really bad if a single header was with in pagesize of that gigabyte
1546 * This assumes that a gigabyte is a multiple of system pagesize - a very safe bet.
1547 */
1548 #define MAP_WINDOW_SIZE (1024*1024*1024)
1549
1550 /* ----------
1551 * map_tar_file
1552 *
1553 * This function uses existing members of the ktar state structure and a single parameter
1554 * to decide what part of a file to map.
1555 *
1556 * IN: offset: an uint64_t type of where the starting point with in the file the memory mapped region
1557 * should start
1558 * SIDE: side effects are a freeing of any existing memory mapped region of a file and
1559 * if successful mappinga region it will have mapped that region and put real
1560 * parameters describing that region in the private memory block
1561 */
1562 static
map_tar_file(KTarState * self,uint64_t requested_offset)1563 rc_t map_tar_file (KTarState * self, uint64_t requested_offset)
1564 {
1565 rc_t ret = 0;
1566
1567 /* -----
1568 * If we are mapping for the first time
1569 */
1570 if (self->kmmap == NULL)
1571 {
1572 ret = KMMapMakeMaxRead(&self->kmmap,self->kfile);
1573 if (ret)
1574 return ret;
1575 ret = KMMapSize(self->kmmap, &self->buffer_length);
1576 if (ret)
1577 return ret;
1578 }
1579 /* -----
1580 * if we are remapping the region
1581 */
1582 else
1583 {
1584 ret = KMMapReposition(self->kmmap, requested_offset, &self->buffer_length);
1585 if (ret)
1586 return ret;
1587 }
1588 ret = KMMapAddrRead(self->kmmap, &self->map);
1589 if (ret)
1590 return ret;
1591 ret = KMMapPosition(self->kmmap, &self->buffer_start);
1592 if (ret)
1593 return ret;
1594 ret = KMMapSize(self->kmmap, &self->buffer_length);
1595 if (ret)
1596 return ret;
1597 self->buffer_limit = self->buffer_start + self->buffer_length;
1598
1599 return ret;
1600 }
1601
1602 static
release_map(KTarState * self)1603 rc_t release_map (KTarState * self)
1604 {
1605 KMMapRelease (self->kmmap);
1606 self->kmmap = NULL;
1607 return 0;
1608 }
1609
1610 /* ======================================================================
1611 *
1612 * offset is the byte position within the tar file
1613 * hard_limit is the byte position with in the tar file that is not mapped
1614 *
1615 * This is the ugliest function/method in the whole module. The extensions
1616 * to the tar header are not done in a consistent manner so convolutions
1617 * have to be made to support all manner of extensions.
1618 */
1619 static
process_one_entry(KTarState * self,uint64_t offset,uint64_t hard_limit,bool silent)1620 uint64_t process_one_entry (KTarState * self, uint64_t offset, uint64_t hard_limit, bool silent)
1621 {
1622 /* -----
1623 * full_path will store the full path of an element which can be longer than
1624 * will fit in the standard tar header. This will also usually be an output
1625 * to the consumer.
1626 */
1627 char full_path [ 4096 ];
1628
1629 /* -----
1630 * full_path will store the full link (if any) of an element which can be longer than
1631 * will fit in the standard tar header. This will also usually be an output
1632 * to the consumer.
1633 */
1634 char full_link [ 4096 ];
1635
1636 /* -----
1637 * data_offset will index into the tar file where the data portion for the current header
1638 * lies. (Usually at the address of the header + 512) This will also usually be an output
1639 * to the consumer.
1640 */
1641 uint64_t data_offset = 0;
1642
1643 /* -----
1644 * data_size will hold the size of the data portion for the current header. This will also
1645 * usually be an output to the consumer.
1646 */
1647 uint64_t data_size = 0;
1648
1649 /* -----
1650 * virtual_data_size will hold the virtual size of a sparse file. This will also
1651 * usually be an output to the consumer.
1652 */
1653 uint64_t virtual_data_size = 0;
1654 /* -----
1655 * type is an enumerated type that described the format of the tar header. Its initialized
1656 * to an invalid header type.
1657 */
1658 tar_header_type type = TAR_UNDEFINED;
1659
1660 /* -----
1661 * link is an enumerated type that described the contents of this element.
1662 */
1663 tar_link link = LINK_OLDNORMAL_FILE;
1664
1665 #if _DEBUGGING && 0
1666 /* We are not using these components of the tar header block at this point
1667 * but with a debug build it doesn't hurt to verify we fully understand the
1668 * header.
1669 */
1670 uid_t uid = 0;
1671 uint32_t gid = 0;
1672 #endif
1673 time_t mtime = 0;
1674 mode_t mode = 0;
1675
1676 /* -----
1677 * current_offset is the offset of the current header which might be a different header
1678 * than the one we started with.
1679 */
1680 uint64_t current_offset = offset;
1681
1682 /* -----
1683 * we can access the header either as a sequence of bytes or as a
1684 * header structure. That header can further be accessed as one of
1685 * several more specific types of header.
1686 */
1687 union
1688 {
1689 const uint8_t * b;
1690 const tar_header * h;
1691 } current_header;
1692
1693 /* -----
1694 * done is a flag as to when we are finished processing a tar element
1695 * that might have multiple headers and other elements
1696 */
1697 bool done = false;
1698
1699 /* -----
1700 * gnu_sparse is a flag that we are currently inside a sparse file
1701 * with in the TAR and have more sparse header elements to parse
1702 */
1703 bool gnu_sparse = false;
1704
1705 TAR_FUNC_ENTRY();
1706
1707 /* -----
1708 * check right away to make sure we are still in our memory mapped window
1709 */
1710 if (offset > self->buffer_limit)
1711 {
1712 /* -----
1713 * if not bail and say we couldn't consume any bytes
1714 */
1715 return 0;
1716 }
1717
1718 /* -----
1719 * clear the full name and link name entries
1720 */
1721 memset (full_path, 0, sizeof(full_path));
1722 memset (full_link, 0, sizeof(full_link));
1723
1724 /* -----
1725 * set the header at the current TAR block.
1726 * That is the map starts at offset buffer_start and we are at
1727 * current_offset into the file so we take the map as a pointer
1728 * and add to it the difference between our current offset and the map's
1729 * initial offset (first header is at map + 0 - 0)
1730 */
1731 current_header.b = (const uint8_t *)self->map + current_offset - self->buffer_start;
1732 /* -----
1733 * start processing
1734 */
1735 do
1736 {
1737 TAR_DEBUG (( "Processing one block at (%lu), it is sparse? %s\n",
1738 current_offset, get_bool_string(gnu_sparse)));
1739
1740 /* -----
1741 * what we will do depends upon the type of this block
1742 */
1743 type = gnu_sparse ? TAR_SPARSE : what_header_type(current_header.h);
1744 if (self->found_zero_block)
1745 {
1746 if (self->found_second_zero_block == true)
1747 {
1748 type = TAR_ZERO_BLOCK; /* skip anyway */
1749 }
1750 else if (type == TAR_ZERO_BLOCK)
1751 {
1752 self->found_second_zero_block = true;
1753 }
1754 else
1755 {
1756 if ( ! silent )
1757 {
1758 PLOGMSG(klogErr,(klogErr,
1759 "Found Extra Header after a block of zeros $(O)",
1760 PLOG_U64(O), offset));
1761 }
1762 return -1;
1763 }
1764 }
1765 switch (type)
1766 {
1767 case TAR_ZERO_BLOCK:
1768 {
1769 self->found_zero_block = true;
1770 done = true;
1771 break;
1772 }
1773 case TAR_SPARSE:
1774 {
1775 /* -----
1776 * If there is an extension header we'll have different work to do
1777 */
1778 #if 0
1779 if (current_header.h->sparse.isextended)
1780 {
1781 LOGMSG (klogDebug3,"isextended true");
1782 }
1783 else
1784 {
1785 LOGMSG (klogDebug3,"isextended false");
1786 }
1787 #endif
1788 {
1789 int64_t ix;
1790 uint64_t of;
1791 uint64_t sz;
1792
1793 for (ix = 0; ix< GNU_SPARSES_IN_EXTRA_HEADER; ++ix)
1794 {
1795 rc_t ret;
1796 of = tar_strtoll(
1797 (const uint8_t*)current_header.h->sparse.sparse[ix].offset,
1798 TAR_SIZE_LEN, silent);
1799 sz = tar_strtoll(
1800 (const uint8_t*)current_header.h->sparse.sparse[ix].num_bytes,
1801 TAR_SIZE_LEN, silent);
1802 if (sz == 0)
1803 {
1804 break;
1805 }
1806 #if 0
1807 PLOGMSG ((klogDebug4,
1808 "SPARSE ext: $(count): $(offset) $(size)",
1809 PLOG_3(PLOG_I64(count),PLOG_X64(offset),PLOG_X64(size)),
1810 ix,
1811 of,
1812 sz));
1813 #endif
1814
1815 ret = sparse_data_push (&self->sparse_q, of, sz);
1816 if (ret)
1817 {
1818 sparse_data_kill(&self->sparse_q);
1819 return -1;
1820 }
1821 ++self->num_chunks;
1822 }
1823 }
1824 }
1825 break;
1826
1827 #if 0
1828 /*Same as the default case */
1829 case TAR_CPIO:
1830 PLOGMSG ((klogErr,
1831 "unsupported header type $(type) $(string)",
1832 "type=%d,string=%s",
1833 type, get_type_string(type)));
1834 done = true;
1835 break;
1836 #endif
1837
1838 default:
1839 if ( ! silent )
1840 {
1841 PLOGMSG (klogErr,(klogErr,
1842 "unsupported header type $(type) $(string)",
1843 "type=%d,string=%s",
1844 type, get_type_string(type)));
1845 }
1846 done = true;
1847 break;
1848
1849 case TAR_GNU_89:
1850 case TAR_V7:
1851 case TAR_POSIX:
1852 /* -----
1853 * almost anything we do will depend upon the size of the data for this block
1854 *
1855 * this will be wrong if we ever support cpio...
1856 */
1857 data_size = (uint64_t) ( tar_strtoll
1858 ( (uint8_t*)current_header.h->tar.size,TAR_SIZE_LEN, silent ) );
1859 #if _DEBUGGING && 0
1860 uid = ( tar_strtoll
1861 ( (uint8_t*)current_header.h->tar.uid,TAR_ID_LEN, silent ) );
1862 gid = ( tar_strtoll
1863 ( (uint8_t*)current_header.h->tar.gid,TAR_ID_LEN, silent ) );
1864 #endif
1865 mtime = ( tar_strtoll
1866 ( (uint8_t*)current_header.h->tar.mtime,TAR_TIME_LEN, silent) );
1867 mode = (uint32_t) ( tar_strtoll
1868 ( (uint8_t*)current_header.h->tar.mode,TAR_MODE_LEN, silent) );
1869 }
1870
1871 /* -----
1872 * Sometimes we are done just by identifying the header type.
1873 * If so we break the loop here.
1874 */
1875 if (done)
1876 {
1877 /* -----
1878 * point at the next header block
1879 * Add the size of the header itself plus enough block sizes of data to cover
1880 * any associated data.
1881 */
1882 size_t header_plus_data_block_size = (1+BLOCKS_FOR_BYTES(data_size))*TAR_BLOCK_SIZE;
1883 current_offset += (uint64_t)header_plus_data_block_size;
1884 current_header.b += header_plus_data_block_size;
1885 break;
1886 }
1887
1888 /* -----
1889 * several extensions to USTAR/TAR format headers involve
1890 * prepending another header type to give a name longer
1891 * than will fit in tthe header itself. If we had one of
1892 * those use that name. But if the full path has not been
1893 * set use the path from this header.
1894 */
1895 if (full_path[0] == 0) /* if full_path wasn't filled in by an 'L' long name */
1896 {
1897 size_t len, size;
1898 /* -----
1899 * if there is a prefix (POSIX style) use it
1900 * copy the prefix and then concatenate the name field
1901 */
1902 if (current_header.h->posix.prefix[0])
1903 {
1904 #if 0
1905 PLOGMSG ((klogDebug1,
1906 "used a posix prefix $(prefix)",
1907 "prefix=%s",
1908 current_header.h->posix.prefix));
1909 #endif
1910 /* -----
1911 * copy in the prefix, force a NUL just in case. then add a directory divider
1912 */
1913 string_copy(full_path, sizeof(full_path), current_header.h->posix.prefix, TAR_PREFIX_LEN);
1914 full_path[TAR_PREFIX_LEN] = 0x00;
1915 strcat(full_path,"/");
1916 }
1917
1918 strncat(full_path,current_header.h->tar.name,TAR_NAME_LEN);
1919 len = string_measure(full_path, &size);
1920 while (len > 1)
1921 {
1922 if (full_path[len-1] == '/')
1923 len--;
1924 else
1925 break;
1926 }
1927 full_path[len] = '\0';
1928 }
1929 link = current_header.h->tar.link;
1930 TAR_DEBUG(("link = %s(%c)\n",get_link_string(link),link));
1931 switch (link)
1932 {
1933 case LINK_SPARSE:
1934 /* -----
1935 * If there is an extension header we'll have different work to do
1936 */
1937 if (current_header.h->gnu_89.isextended)
1938 {
1939 /* -----
1940 * If we have an extended header following make sure there is room
1941 */
1942 if ( (uint64_t)( offset + 2 * sizeof(tar_header) ) > hard_limit )
1943 return 0;
1944 done = false;
1945 gnu_sparse = true; /* next block will be part of the header and not data */
1946 data_offset = offset + 2 * sizeof(tar_header);
1947 }
1948 else
1949 {
1950 done = true;
1951 gnu_sparse = false;
1952 data_offset = offset + sizeof(tar_header);
1953 }
1954
1955 done = (bool)! current_header.h->gnu_89.isextended;
1956 {
1957 int32_t ix;
1958 rc_t ret;
1959
1960 virtual_data_size = (uint64_t)(tar_strtoll(
1961 (uint8_t*)current_header.h->gnu_89.realsize,TAR_SIZE_LEN,
1962 silent));
1963
1964 for (ix = 0; ix< GNU_SPARSES_IN_OLD_HEADER; ++ix)
1965 {
1966 uint64_t soffset = tar_strtoll(
1967 (const uint8_t*)current_header.h->gnu_89.sparse[ix].offset,
1968 TAR_SIZE_LEN, silent);
1969 uint64_t ssize = tar_strtoll(
1970 (const uint8_t*)current_header.h->gnu_89.sparse[ix].num_bytes,
1971 TAR_SIZE_LEN, silent);
1972 if (ssize == 0)
1973 {
1974 break;
1975 }
1976
1977 ret = sparse_data_push (&self->sparse_q, soffset, ssize);
1978 if (ret)
1979 {
1980 sparse_data_kill(&self->sparse_q);
1981 return -1;
1982 }
1983 ++self->num_chunks;
1984 }
1985 }
1986 data_offset = current_offset + sizeof(tar_header);
1987 break;
1988
1989 case LINK_OLDNORMAL_FILE: /* deprecated normal file */
1990 /* -----
1991 * this should only happen with LINK_OLDNORMAL_FILE
1992 *
1993 * If the type is file but the last character in the path is "/"
1994 * treat it as a directory instead
1995 */
1996 {
1997 size_t size;
1998 if (full_path[string_measure(full_path, &size)-1] == '/')
1999 {
2000 link = LINK_DIRECTORY;
2001 }
2002 }
2003 /* fall through */
2004 case LINK_NORMAL_FILE:
2005 case LINK_CONTIGUOUS_FILE:
2006 case LINK_DIRECTORY:
2007 data_offset = current_offset + sizeof(tar_header);
2008 done = true;
2009 break;
2010
2011 /* since we do not extract for tar files, a symlink and a hardlink
2012 * are the same to us */
2013 case LINK_HARD_LINK:
2014 case LINK_SYMBOLIC_LINK:
2015 if (full_link[0] == 0)
2016 {
2017 string_copy(full_link, sizeof(full_link), current_header.h->tar.linkname, TAR_NAME_LEN);
2018 }
2019 done = true;
2020 break;
2021
2022 /* ----------
2023 * These types we ignore and they are defined to not have a data size
2024 */
2025 case LINK_CHARACTER_SPECIAL:
2026 case LINK_BLOCK_SPECIAL:
2027 case LINK_FIFO:
2028 case LINK_INODE_METADATA:
2029 /* -----
2030 * Nothing to be done
2031 */
2032 TAR_DEBUG (("%s: ignored block link type %s(%c) @ %lu\n",
2033 __func__,
2034 get_link_string(current_header.h->tar.link),
2035 get_link_string(current_header.h->tar.link),
2036 (uint64_t)(current_offset)+(uint64_t)(OFFSET_OF(tar_v7_header,link))));
2037 data_size = 0; /* data size is specifically to be ignored */
2038 done = true;
2039 break;
2040
2041 /* ----------
2042 * These types we ignore as a final block or a block unto themselves and are
2043 * not part of a series of blocks and they are defined to have a data size
2044 */
2045 case LINK_PAX_GLOBAL_XHDR:
2046 case LINK_GNU_DUMPDIR: /* we are just gonna ignore this and not treat it like LINK_DIRECTORY */
2047 case LINK_SOLARIS_ACL_FILE:
2048 case LINK_VOLUME_NAME:
2049 case LINK_MULTI_VOLUME:
2050 TAR_DEBUG (("%s: ignored block link type %s(%c) %lu @ %lu\n",
2051 __func__,
2052 get_link_string(current_header.h->tar.link),
2053 get_link_string(current_header.h->tar.link),
2054 data_size,
2055 (uint64_t)(current_offset)+(uint64_t)(OFFSET_OF(tar_v7_header,link))));
2056 done = true;
2057 break;
2058
2059 /* ----------
2060 * These types we ignore as a block with in a series of blocks
2061 * and they are defined to have a data size
2062 *
2063 * Nothing to be done
2064 */
2065 case LINK_SOLARIS_ACL:
2066 case LINK_PAX_XHDR: /* posix extended */
2067 /* -----
2068 */
2069 TAR_DEBUG (("%s: ignored block link type %s(%c) @ %lu\n",
2070 __func__,
2071 get_link_string(current_header.h->tar.link),
2072 get_link_string(current_header.h->tar.link),
2073 (uint64_t)(current_offset)+(uint64_t)(OFFSET_OF(tar_v7_header,link))));
2074 break;
2075
2076 /* some link types we ignore this block */
2077 default:
2078 TAR_DEBUG (("%s: Ignoring block with link %s(%c/%02.2x) @ %lu\n",
2079 __func__,
2080 get_link_string(current_header.h->tar.link),
2081 current_header.h->tar.link,
2082 (unsigned)(current_header.h->tar.link),
2083 (uint64_t)(current_offset)+(uint64_t)(OFFSET_OF(tar_v7_header,link))));
2084 break;
2085
2086 case LINK_NEXT_LONG_LINK: /* long link name */
2087 /* -----
2088 * Long link name needs access now to its full set of data blocks, request a window shift
2089 * if it is not currently accessible
2090 */
2091 if ( (uint64_t)( offset + sizeof( tar_header ) + data_size ) > hard_limit )
2092 return 0;
2093
2094 string_copy(full_link, sizeof(full_link), (char*)(current_header.b + sizeof(tar_header)), data_size);
2095 break;
2096 case LINK_NEXT_LONG_NAME: /* long path name */
2097 /* -----
2098 * Long path name needs access now to its full set of data blocks, request a window shift
2099 * if it is not currently accessible
2100 */
2101 if ( (uint64_t)( offset + sizeof( tar_header ) + data_size ) > hard_limit )
2102 {
2103 return 0;
2104 }
2105
2106 string_copy(full_path, sizeof(full_path), (char*)(current_header.b + sizeof(tar_header)), data_size);
2107 break;
2108 }
2109
2110 /* -----
2111 * move the current header offset to past the data blocks
2112 */
2113 if (link == LINK_SPARSE)
2114 {
2115 current_offset += sizeof (tar_header);
2116 current_header.b += sizeof(tar_header);
2117 }
2118 else
2119 {
2120 current_offset += sizeof (tar_header)+ ((data_size+TAR_BLOCK_SIZE-1)/TAR_BLOCK_SIZE)*TAR_BLOCK_SIZE;
2121 current_header.b += sizeof(tar_header) + ((data_size+TAR_BLOCK_SIZE-1)/TAR_BLOCK_SIZE)*TAR_BLOCK_SIZE;
2122 }
2123 /* -----
2124 * if that is past the currently available
2125 * quit the parse of this entry asn ask for a window shift, yeah, we'll redo work
2126 * but its far simpler code to just start over than track being in the middle
2127 */
2128 if ((!done) && (current_offset >= hard_limit))
2129 {
2130 return 0;
2131 }
2132
2133 } while (! done);
2134
2135 /* -----
2136 * generate output for this entry
2137 */
2138 switch (type)
2139 {
2140 default:
2141 if ( ! silent )
2142 {
2143 PLOGMSG (klogErr,(klogErr,"Unhandled Header Block Type $(type):$(typeint)","type=%c,typeint=%d",type ? type : '0',type));
2144 }
2145 return RC (rcFS, rcArc, rcParsing, rcData, rcUnsupported);
2146 case TAR_ZERO_BLOCK:
2147 #if 0
2148 LOGMSG (klogDebug1, "Zero Block");
2149 #endif
2150 /* ignored */
2151 break;
2152
2153 case TAR_CPIO:
2154 case TAR_V7:
2155 case TAR_POSIX:
2156 #if 0
2157 case TAR_PAX:
2158 #endif
2159 case TAR_SUN:
2160 case TAR_STAR_85:
2161 case TAR_STAR_94:
2162 case TAR_GNU_89:
2163 #if 0
2164 case TAR_GNU_01:
2165 #endif
2166 case TAR_SPARSE:
2167 /* -----
2168 * TODO:
2169 * implement a header checksum routine call it here, and return -1 if the check sum fails
2170 */
2171 if (gnu_sparse)
2172 {
2173 #if 0
2174 PLOGMSG ((klogDebug3,
2175 "Need to create a file but gnu_sparse is true $(l)",
2176 PLOG_U8(l),
2177 link
2178 ));
2179 #endif
2180 link = LINK_SPARSE; /* faking it for the next section */
2181 }
2182 switch (link)
2183 {
2184 case LINK_OLDNORMAL_FILE:
2185 case LINK_NORMAL_FILE:
2186 case LINK_CONTIGUOUS_FILE:
2187 #if 0
2188 LOGMSG (klogDebug3, "KTocCreateFile");
2189 #endif
2190 TAR_DEBUG (("%s call KTocCreateFile '%c':%hhd\n", __func__, link, link));
2191 KTocCreateFile (self->toc,
2192 data_size?data_offset:0,
2193 data_size,
2194 mtime,
2195 mode,
2196 (KCreateMode)(kcmInit|kcmParents),
2197 full_path);
2198 /* -----
2199 * TODO:
2200 * print something if extraneous fields found
2201 */
2202 break;
2203 case LINK_HARD_LINK:
2204 TAR_DEBUG (("%s call KTocCreateHardLink\n", __func__));
2205 KTocCreateHardLink (self->toc, mtime, mode,
2206 (KCreateMode)(kcmInit|kcmParents),
2207 full_link, full_path);
2208 /* -----
2209 * TODO:
2210 * print something if extraneous fields found
2211 */
2212 break;
2213 case LINK_SYMBOLIC_LINK:
2214 TAR_DEBUG (("%s call KTocCreateSoftLink\n", __func__));
2215 KTocCreateSoftLink (self->toc, mtime, mode,
2216 (KCreateMode)(kcmInit|kcmParents),
2217 full_link, full_path);
2218 /* -----
2219 * TODO:
2220 * print something if extraneous fields found
2221 */
2222 break;
2223
2224 case LINK_SPARSE:
2225 TAR_DEBUG (("%s LINK_SPARSE current_offset %jx: data_offset %jx: data_size %jx\n",
2226 __func__, current_offset, data_offset, data_size));
2227 make_chunk_list(self, data_offset);
2228 KTocCreateChunkedFile (self->toc,
2229 virtual_data_size,
2230 mtime, mode,
2231 self->num_chunks,
2232 self->chunks,
2233 (KCreateMode)(kcmInit|kcmParents),
2234 full_path);
2235 whack_chunk_list (self);
2236 break;
2237 case LINK_CHARACTER_SPECIAL:
2238 case LINK_BLOCK_SPECIAL:
2239 case LINK_FIFO:
2240 case LINK_VOLUME_NAME:
2241 case LINK_SOLARIS_ACL:
2242 case LINK_GNU_DUMPDIR:
2243 case LINK_SUN_XHDR:
2244 case LINK_INODE_METADATA:
2245 TAR_DEBUG ( ( "%s ignored entry type %s linktype name %s\n",
2246 __func__, get_link_string( link ), full_path ) );
2247 /* ignore */
2248 break;
2249 case LINK_DIRECTORY:
2250 TAR_DEBUG (("%s call KTocCreateDir\n", __func__));
2251 KTocCreateDir (self->toc, mtime, mode, (KCreateMode)(kcmOpen|kcmParents), full_path);
2252 /* -----
2253 * TODO:
2254 * print something if extraneous fields found?
2255 */
2256 break;
2257 default:
2258 if ( ! silent )
2259 {
2260 PLOGMSG(klogErr,(klogErr,
2261 " type ($(type)) name ($(name() link ($(link)) size ($(size)) offset ($(offset))",
2262 "type=%s,name=%s,link=%s,size=%lld,offset=%lld",
2263 get_type_string(type),
2264 full_path,
2265 full_link,
2266 data_size,
2267 data_offset));
2268 }
2269 break;
2270 }
2271 break;
2272 }
2273 return current_offset - offset;
2274 }
2275
2276
2277 static
KArcParseTAR_intern(KToc * self,const void * kvoid,bool silent)2278 rc_t KArcParseTAR_intern ( KToc * self,
2279 const void * kvoid,
2280 bool silent )
2281 {
2282 KTarState state;
2283 /* -----
2284 * offset is the running index into the file of where the first tar header
2285 * for the next element should start
2286 */
2287 uint64_t offset;
2288 /* -----
2289 * used is the count of bytes used for headers, storage and padding of the last
2290 * examined element
2291 */
2292 uint64_t used;
2293 uint64_t filesize;
2294 const KFile * kfile = kvoid;
2295 rc_t rc;
2296
2297 TAR_FUNC_ENTRY();
2298
2299 /* -----
2300 * save the KFS File and TOC references
2301 */
2302
2303 if ( kfile == NULL )
2304 {
2305 if ( !silent )
2306 LOGMSG ( klogFatal, "Called with a bad KFile parameter" );
2307 return RC (rcFS, rcArc, rcParsing, rcParam, rcNull );
2308 }
2309 else if ( self == NULL )
2310 {
2311 if ( !silent )
2312 LOGMSG (klogFatal, "Called with a bad KToc parameter");
2313 return RC (rcFS, rcArc, rcParsing, rcSelf, rcNull );
2314 }
2315
2316 memset (&state, 0, sizeof (state));
2317 state.kfile = kfile;
2318 state.toc = self;
2319
2320 if ( ( rc = KFileSize ( state.kfile, &filesize ) ) != 0 )
2321 {
2322 if ( !silent )
2323 LOGERR ( klogFatal, rc, "Failed to get file size of tarfile" );
2324 }
2325 else if ( filesize == 0 )
2326 {
2327 rc = RC ( rcFS, rcArc, rcAccessing, rcArc, rcEmpty );
2328 if ( !silent )
2329 LOGERR( klogFatal, rc, "Empty file" );
2330 }
2331 else if ( ( rc = map_tar_file ( &state, 0 ) ) != 0 )
2332 {
2333 if ( !silent )
2334 PLOGMSG( klogFatal,
2335 ( klogFatal, "Failed to $(operation) of size $(size)",
2336 "operation=%s,size=%lu", "mmap", filesize ) );
2337 return rc;
2338 }
2339 else
2340 {
2341 int stalled = 0;
2342
2343 for ( offset = 0; offset < filesize; )
2344 {
2345 /* -----
2346 * evaluate the tar file header at the current offset into the file
2347 *
2348 * the return is a positive number of bytes used
2349 * OR a negative on a file parse error
2350 * OR zero for a file that would exceed the current window
2351 */
2352 used = process_one_entry ( &state, offset, state.buffer_limit, silent );
2353
2354 if ( used == 0 )
2355 stalled ++;
2356 else
2357 stalled = 0;
2358
2359 /* kill any left over sparse data - safely handles an empty queue */
2360 sparse_data_kill ( &state.sparse_q );
2361
2362 if ( state.found_second_zero_block )
2363 {
2364 break;
2365 }
2366 if ( used > 0 )
2367 {
2368 offset += used;
2369 }
2370 else if ( used == 0 ) /* if the validate returns 0 we need more buffer */
2371 {
2372 if ( stalled == 5 ) /* sure why not 5 tries */
2373 {
2374 rc = RC ( rcFS, rcArc, rcParsing, rcArc, rcIncomplete );
2375 break;
2376 }
2377 map_tar_file ( &state, offset ); /* slide the window up to current location */
2378 /* -----
2379 * TODO:
2380 * Handle a repeated call from the same location as a failure in file format
2381 * (truncation) of the tar file
2382 */
2383 }
2384 else /*if (used < 0)*/ /* if it is negative it means abort the tar file */
2385 {
2386 rc = RC ( rcFS, rcArc, rcParsing, rcArc, rcUnexpected );
2387 break;
2388 }
2389 }
2390 if ( offset > filesize )
2391 {
2392 rc = RC ( rcFS, rcArc, rcParsing, rcToc, rcIncomplete );
2393 TAR_DEBUG (( "%s %R File offset %ju exceeds filesize %ju\n",
2394 __func__, rc, offset, filesize ));
2395 }
2396 release_map ( &state );
2397 }
2398 return rc;
2399 }
2400
2401 /* ======================================================================
2402 * validating a tar file is listing the files, links and directories
2403 * in that archive.
2404 *
2405 * partial results are not to be accepted in the end but it is the responsibility
2406 * of the caller to clear them.
2407 *
2408 * returns 0 for good archive and -1 for bad archive
2409 */
KArcParseTAR(KToc * self,const void * kvoid,bool (CC * ignored)(const KDirectory *,const char *,void *),void * also_ignored)2410 LIB_EXPORT rc_t CC KArcParseTAR ( KToc * self,
2411 const void * kvoid,
2412 bool ( CC * ignored )( const KDirectory *, const char *, void * ),
2413 void *also_ignored )
2414 {
2415 return KArcParseTAR_intern ( self, kvoid, false );
2416 }
2417
2418
KArcParseTAR_silent(KToc * self,const void * kvoid,bool (CC * ignored)(const KDirectory *,const char *,void *),void * also_ignored)2419 LIB_EXPORT rc_t CC KArcParseTAR_silent ( KToc * self,
2420 const void * kvoid,
2421 bool ( CC * ignored )( const KDirectory *, const char *, void * ),
2422 void *also_ignored )
2423 {
2424 return KArcParseTAR_intern ( self, kvoid, true );
2425 }
2426
2427
KDirectoryVOpenTarArchiveRead(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const char * fmt,va_list args)2428 LIB_EXPORT int CC KDirectoryVOpenTarArchiveRead ( struct KDirectory const *self,
2429 struct KDirectory const **tar_dir, int chroot, const char *fmt, va_list args )
2430 {
2431 char path [ 4096 ];
2432 /*VDB-4386: cannot treat va_list as a pointer!*/
2433 int size = 0;
2434 if ( fmt != NULL ) /*(args == NULL) ? snprintf ( path, sizeof path, "%s", fmt ) :*/
2435 size = vsnprintf ( path, sizeof path, fmt, args );
2436 if ( size < 0 || size >= ( int ) sizeof path )
2437 return RC ( rcFS, rcDirectory, rcOpening, rcPath, rcExcessive );
2438
2439 /* putting off parameter validation into this call */
2440 return KDirectoryOpenArcDirRead ( self, tar_dir, false, path, tocKFile,
2441 KArcParseTAR, NULL, NULL );
2442 }
2443
KDirectoryVOpenTarArchiveRead_silent(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const char * fmt,va_list args)2444 LIB_EXPORT int CC KDirectoryVOpenTarArchiveRead_silent ( struct KDirectory const *self,
2445 struct KDirectory const **tar_dir, int chroot, const char *fmt, va_list args )
2446 {
2447 char path [ 4096 ];
2448 /*VDB-4386: cannot treat va_list as a pointer!*/
2449 int size = 0;
2450 if ( fmt != NULL ) /*(args == NULL) ? snprintf ( path, sizeof path, "%s", fmt ) :*/
2451 size = vsnprintf ( path, sizeof path, fmt, args );
2452 if ( size < 0 || size >= ( int ) sizeof path )
2453 return RC ( rcFS, rcDirectory, rcOpening, rcPath, rcExcessive );
2454
2455 /* putting off parameter validation into this call */
2456 return KDirectoryOpenArcDirRead_silent ( self, tar_dir, false, path, tocKFile,
2457 KArcParseTAR_silent, NULL, NULL );
2458 }
2459
2460
KDirectoryVOpenTarArchiveRead_silent_preopened(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const struct KFile * f,const char * fmt,va_list args)2461 LIB_EXPORT int CC KDirectoryVOpenTarArchiveRead_silent_preopened ( struct KDirectory const *self,
2462 struct KDirectory const **tar_dir, int chroot, const struct KFile * f, const char *fmt, va_list args )
2463 {
2464 char path [ 4096 ];
2465 /*VDB-4386: cannot treat va_list as a pointer!*/
2466 int size = 0;
2467 if ( fmt != NULL ) /*(args == NULL) ? snprintf ( path, sizeof path, "%s", fmt ) :*/
2468 size = vsnprintf ( path, sizeof path, fmt, args );
2469 if ( size < 0 || size >= ( int ) sizeof path )
2470 return RC ( rcFS, rcDirectory, rcOpening, rcPath, rcExcessive );
2471
2472 /* putting off parameter validation into this call */
2473 return KDirectoryOpenArcDirRead_silent_preopened ( self, tar_dir, false, path, tocKFile,
2474 (void*)f, KArcParseTAR_silent, NULL, NULL );
2475 }
2476
2477
KDirectoryOpenTarArchiveRead(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const char * path,...)2478 LIB_EXPORT int CC KDirectoryOpenTarArchiveRead ( struct KDirectory const *self,
2479 struct KDirectory const **tar_dir, int chroot, const char *path, ... )
2480 {
2481 rc_t rc;
2482 va_list args;
2483
2484 va_start ( args, path );
2485 rc = KDirectoryVOpenTarArchiveRead ( self, tar_dir, chroot, path, args );
2486 va_end ( args );
2487
2488 return rc;
2489 }
2490
2491
KDirectoryOpenTarArchiveRead_silent(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const char * path,...)2492 LIB_EXPORT int CC KDirectoryOpenTarArchiveRead_silent ( struct KDirectory const *self,
2493 struct KDirectory const **tar_dir, int chroot, const char *path, ... )
2494 {
2495 rc_t rc;
2496 va_list args;
2497
2498 va_start ( args, path );
2499 rc = KDirectoryVOpenTarArchiveRead_silent ( self, tar_dir, chroot, path, args );
2500 va_end ( args );
2501
2502 return rc;
2503 }
2504
2505
KDirectoryOpenTarArchiveRead_silent_preopened(struct KDirectory const * self,struct KDirectory const ** tar_dir,int chroot,const KFile * f,const char * path,...)2506 LIB_EXPORT int CC KDirectoryOpenTarArchiveRead_silent_preopened ( struct KDirectory const *self,
2507 struct KDirectory const **tar_dir, int chroot, const KFile * f, const char *path, ... )
2508 {
2509 rc_t rc;
2510 va_list args;
2511
2512 va_start ( args, path );
2513 rc = KDirectoryVOpenTarArchiveRead_silent_preopened ( self, tar_dir, chroot, f, path, args );
2514 va_end ( args );
2515
2516 return rc;
2517 }
2518
2519
2520 /* ----------
2521 * Validate that the compiler packed all the character arrays into the correct sizes
2522 * to use the header.
2523 *
2524 * a bunch of magic numbers are in the function because we are making sure that
2525 * they are reached by using the structure members. They are listed in comments
2526 * within comments in the tar_header.h file.
2527 *
2528 * No return value
2529 */
validate_header_offsets(void)2530 LIB_EXPORT bool CC validate_header_offsets( void )
2531 {
2532 bool pass_fail = true; /* assume pass */
2533 #if _DEBUGGING
2534 /* -----
2535 * checking the size entails making sure the compiler made the structure
2536 * the right length to match the series of octents in the file
2537 */
2538 #define check_size(T,V) if( sizeof(T) != (size_t)V) { pass_fail = false; printf ("*** BAD_SIZE: %s is %u not %u\n", #T, (unsigned)sizeof(T), (unsigned)V);}
2539 /* -----
2540 * checking the offset entails making sure the compiler put the field at the
2541 * right absolute location within a structure
2542 */
2543 #define check_offset(T,M,V) if( OFFSET_OF(T,M) != (size_t)V ) { pass_fail = false; printf ("*** BAD_OFFSET: %s.%s is %u not %d\n", #T, #M, (unsigned)OFFSET_OF(T,M), V);}
2544
2545 check_size(tar_raw_block,512);
2546 check_size(tar_v7_header,257);
2547 check_size(tar_posix_header,500);
2548 check_size(tar_sun_header,512);
2549 check_size(tar_star_85_header,512);
2550 check_size(tar_star_94_header,512);
2551 check_size(tar_gnu_89_header,495);
2552 /*check_size(tar_new_gnu_header,512); */
2553 check_size(tar_sparse_header,505);
2554
2555
2556 /* tar_header union */
2557 check_offset(tar_header,raw,0);
2558 check_offset(tar_header,tar,0);
2559 check_offset(tar_header,posix,0);
2560 check_offset(tar_header,suntar,0);
2561 check_offset(tar_header,star_85,0);
2562 check_offset(tar_header,star_94,0);
2563 check_offset(tar_header,gnu_89,0);
2564 /*check_offset(tar_header,new_gnu,0); */
2565 check_offset(tar_header,sparse,0);
2566
2567 /* tar_v7_header */
2568 check_offset(tar_v7_header,name,0);
2569 check_offset(tar_v7_header,mode,100);
2570 check_offset(tar_v7_header,uid,108);
2571 check_offset(tar_v7_header,gid,116);
2572 check_offset(tar_v7_header,size,124);
2573 check_offset(tar_v7_header,mtime,136);
2574 check_offset(tar_v7_header,csum,148);
2575 check_offset(tar_v7_header,link,156);
2576 check_offset(tar_v7_header,linkname,157);
2577
2578 /* tar_posix_header */
2579 check_offset(tar_posix_header,name,0);
2580 check_offset(tar_posix_header,mode,100);
2581 check_offset(tar_posix_header,uid,108);
2582 check_offset(tar_posix_header,gid,116);
2583 check_offset(tar_posix_header,size,124);
2584 check_offset(tar_posix_header,mtime,136);
2585 check_offset(tar_posix_header,csum,148);
2586 check_offset(tar_posix_header,link,156);
2587 check_offset(tar_posix_header,linkname,157);
2588 check_offset(tar_posix_header,magic,257);
2589 check_offset(tar_posix_header,version,263);
2590 check_offset(tar_posix_header,uname,265);
2591 check_offset(tar_posix_header,gname,297);
2592 check_offset(tar_posix_header,devmajor,329);
2593 check_offset(tar_posix_header,devminor,337);
2594 check_offset(tar_posix_header,prefix,345);
2595
2596 /* tar_sun_header */
2597 check_offset(tar_sun_header,name,0);
2598 check_offset(tar_sun_header,mode,100);
2599 check_offset(tar_sun_header,uid,108);
2600 check_offset(tar_sun_header,gid,116);
2601 check_offset(tar_sun_header,size,124);
2602 check_offset(tar_sun_header,mtime,136);
2603 check_offset(tar_sun_header,csum,148);
2604 check_offset(tar_sun_header,link,156);
2605 check_offset(tar_sun_header,linkname,157);
2606 check_offset(tar_sun_header,magic,257);
2607 check_offset(tar_sun_header,version,263);
2608 check_offset(tar_sun_header,uname,265);
2609 check_offset(tar_sun_header,gname,297);
2610 check_offset(tar_sun_header,devmajor,329);
2611 check_offset(tar_sun_header,devminor,337);
2612 check_offset(tar_sun_header,prefix,345);
2613 check_offset(tar_sun_header,extnum,500);
2614 check_offset(tar_sun_header,extcount,501);
2615 check_offset(tar_sun_header,fullsize,502);
2616
2617 /* tar_star_85_header */
2618 check_offset(tar_star_85_header,name,0);
2619 check_offset(tar_star_85_header,mode,100);
2620 check_offset(tar_star_85_header,uid,108);
2621 check_offset(tar_star_85_header,gid,116);
2622 check_offset(tar_star_85_header,size,124);
2623 check_offset(tar_star_85_header,mtime,136);
2624 check_offset(tar_star_85_header,csum,148);
2625 check_offset(tar_star_85_header,link,156);
2626 check_offset(tar_star_85_header,linkname,157);
2627 check_offset(tar_star_85_header,starversion,257);
2628 check_offset(tar_star_85_header,starfiletype,258);
2629 check_offset(tar_star_85_header,startype,266);
2630 check_offset(tar_star_85_header,rdev,278);
2631 check_offset(tar_star_85_header,atime,290);
2632 check_offset(tar_star_85_header,ctime,302);
2633 check_offset(tar_star_85_header,uname,314);
2634 check_offset(tar_star_85_header,gname,330);
2635 check_offset(tar_star_85_header,prefix,345);
2636 check_offset(tar_star_85_header,xmagic,508);
2637
2638 /* tar_star_94_header */
2639 check_offset(tar_star_94_header,name,0);
2640 check_offset(tar_star_94_header,mode,100);
2641 check_offset(tar_star_94_header,uid,108);
2642 check_offset(tar_star_94_header,gid,116);
2643 check_offset(tar_star_94_header,size,124);
2644 check_offset(tar_star_94_header,mtime,136);
2645 check_offset(tar_star_94_header,csum,148);
2646 check_offset(tar_star_94_header,link,156);
2647 check_offset(tar_star_94_header,linkname,157);
2648 check_offset(tar_star_94_header,magic,257);
2649 check_offset(tar_star_94_header,version,263);
2650 check_offset(tar_star_94_header,uname,265);
2651 check_offset(tar_star_94_header,gname,297);
2652 check_offset(tar_star_94_header,devmajor,329);
2653 check_offset(tar_star_94_header,devminor,337);
2654 check_offset(tar_star_94_header,prefix,345);
2655 check_offset(tar_star_94_header,isextended,355);
2656 check_offset(tar_star_94_header,sparse,356);
2657 check_offset(tar_star_94_header,realsize,452);
2658 check_offset(tar_star_94_header,offset,464);
2659 check_offset(tar_star_94_header,atime,476);
2660 check_offset(tar_star_94_header,ctime,488);
2661 check_offset(tar_star_94_header,xmagic,508);
2662
2663 /* tar_gnu_89_header */
2664 check_offset(tar_gnu_89_header,name,0);
2665 check_offset(tar_gnu_89_header,mode,100);
2666 check_offset(tar_gnu_89_header,uid,108);
2667 check_offset(tar_gnu_89_header,gid,116);
2668 check_offset(tar_gnu_89_header,size,124);
2669 check_offset(tar_gnu_89_header,mtime,136);
2670 check_offset(tar_gnu_89_header,csum,148);
2671 check_offset(tar_gnu_89_header,link,156);
2672 check_offset(tar_gnu_89_header,linkname,157);
2673 check_offset(tar_gnu_89_header,magic,257);
2674 check_offset(tar_gnu_89_header,uname,265);
2675 check_offset(tar_gnu_89_header,gname,297);
2676 check_offset(tar_gnu_89_header,atime,345);
2677 check_offset(tar_gnu_89_header,ctime,357);
2678 check_offset(tar_gnu_89_header,offset,369);
2679 check_offset(tar_gnu_89_header,longnames,381);
2680 check_offset(tar_gnu_89_header,sparse,386);
2681 check_offset(tar_gnu_89_header,isextended,482);
2682
2683 /* tar_extended_header */
2684 check_offset(tar_sparse_header,sparse,0);
2685 check_offset(tar_sparse_header,isextended,504);
2686 #endif
2687 return pass_fail;
2688 }
2689
2690 /* end of file */
2691