1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "archive_platform.h"
28 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_tar.c 201161 2009-12-29 05:44:39Z kientzle $");
29 
30 #ifdef HAVE_ERRNO_H
31 #include <errno.h>
32 #endif
33 #include <stddef.h>
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 
41 #include "archive.h"
42 #include "archive_acl_private.h" /* For ACL parsing routines. */
43 #include "archive_entry.h"
44 #include "archive_entry_locale.h"
45 #include "archive_private.h"
46 #include "archive_read_private.h"
47 
48 #define tar_min(a,b) ((a) < (b) ? (a) : (b))
49 
50 /*
51  * Layout of POSIX 'ustar' tar header.
52  */
53 struct archive_entry_header_ustar {
54 	char	name[100];
55 	char	mode[8];
56 	char	uid[8];
57 	char	gid[8];
58 	char	size[12];
59 	char	mtime[12];
60 	char	checksum[8];
61 	char	typeflag[1];
62 	char	linkname[100];	/* "old format" header ends here */
63 	char	magic[6];	/* For POSIX: "ustar\0" */
64 	char	version[2];	/* For POSIX: "00" */
65 	char	uname[32];
66 	char	gname[32];
67 	char	rdevmajor[8];
68 	char	rdevminor[8];
69 	char	prefix[155];
70 };
71 
72 /*
73  * Structure of GNU tar header
74  */
75 struct gnu_sparse {
76 	char	offset[12];
77 	char	numbytes[12];
78 };
79 
80 struct archive_entry_header_gnutar {
81 	char	name[100];
82 	char	mode[8];
83 	char	uid[8];
84 	char	gid[8];
85 	char	size[12];
86 	char	mtime[12];
87 	char	checksum[8];
88 	char	typeflag[1];
89 	char	linkname[100];
90 	char	magic[8];  /* "ustar  \0" (note blank/blank/null at end) */
91 	char	uname[32];
92 	char	gname[32];
93 	char	rdevmajor[8];
94 	char	rdevminor[8];
95 	char	atime[12];
96 	char	ctime[12];
97 	char	offset[12];
98 	char	longnames[4];
99 	char	unused[1];
100 	struct gnu_sparse sparse[4];
101 	char	isextended[1];
102 	char	realsize[12];
103 	/*
104 	 * Old GNU format doesn't use POSIX 'prefix' field; they use
105 	 * the 'L' (longname) entry instead.
106 	 */
107 };
108 
109 /*
110  * Data specific to this format.
111  */
112 struct sparse_block {
113 	struct sparse_block	*next;
114 	int64_t	offset;
115 	int64_t	remaining;
116 	int hole;
117 };
118 
119 struct tar {
120 	struct archive_string	 acl_text;
121 	struct archive_string	 entry_pathname;
122 	/* For "GNU.sparse.name" and other similar path extensions. */
123 	struct archive_string	 entry_pathname_override;
124 	struct archive_string	 entry_linkpath;
125 	struct archive_string	 entry_uname;
126 	struct archive_string	 entry_gname;
127 	struct archive_string	 longlink;
128 	struct archive_string	 longname;
129 	struct archive_string	 pax_header;
130 	struct archive_string	 pax_global;
131 	struct archive_string	 line;
132 	int			 pax_hdrcharset_binary;
133 	int			 header_recursion_depth;
134 	int64_t			 entry_bytes_remaining;
135 	int64_t			 entry_offset;
136 	int64_t			 entry_padding;
137 	int64_t 		 entry_bytes_unconsumed;
138 	int64_t			 realsize;
139 	struct sparse_block	*sparse_list;
140 	struct sparse_block	*sparse_last;
141 	int64_t			 sparse_offset;
142 	int64_t			 sparse_numbytes;
143 	int			 sparse_gnu_major;
144 	int			 sparse_gnu_minor;
145 	char			 sparse_gnu_pending;
146 
147 	struct archive_string	 localname;
148 	struct archive_string_conv *opt_sconv;
149 	struct archive_string_conv *sconv;
150 	struct archive_string_conv *sconv_acl;
151 	struct archive_string_conv *sconv_default;
152 	int			 init_default_conversion;
153 	int			 compat_2x;
154 	int			 process_mac_extensions;
155 	int			 read_concatenated_archives;
156 };
157 
158 static int	archive_block_is_null(const char *p);
159 static char	*base64_decode(const char *, size_t, size_t *);
160 static int	gnu_add_sparse_entry(struct archive_read *, struct tar *,
161 		    int64_t offset, int64_t remaining);
162 
163 static void	gnu_clear_sparse_list(struct tar *);
164 static int	gnu_sparse_old_read(struct archive_read *, struct tar *,
165 		    const struct archive_entry_header_gnutar *header, size_t *);
166 static int	gnu_sparse_old_parse(struct archive_read *, struct tar *,
167 		    const struct gnu_sparse *sparse, int length);
168 static int	gnu_sparse_01_parse(struct archive_read *, struct tar *,
169 		    const char *);
170 static ssize_t	gnu_sparse_10_read(struct archive_read *, struct tar *,
171 			size_t *);
172 static int	header_Solaris_ACL(struct archive_read *,  struct tar *,
173 		    struct archive_entry *, const void *, size_t *);
174 static int	header_common(struct archive_read *,  struct tar *,
175 		    struct archive_entry *, const void *);
176 static int	header_old_tar(struct archive_read *, struct tar *,
177 		    struct archive_entry *, const void *);
178 static int	header_pax_extensions(struct archive_read *, struct tar *,
179 		    struct archive_entry *, const void *, size_t *);
180 static int	header_pax_global(struct archive_read *, struct tar *,
181 		    struct archive_entry *, const void *h, size_t *);
182 static int	header_longlink(struct archive_read *, struct tar *,
183 		    struct archive_entry *, const void *h, size_t *);
184 static int	header_longname(struct archive_read *, struct tar *,
185 		    struct archive_entry *, const void *h, size_t *);
186 static int	read_mac_metadata_blob(struct archive_read *, struct tar *,
187 		    struct archive_entry *, const void *h, size_t *);
188 static int	header_volume(struct archive_read *, struct tar *,
189 		    struct archive_entry *, const void *h, size_t *);
190 static int	header_ustar(struct archive_read *, struct tar *,
191 		    struct archive_entry *, const void *h);
192 static int	header_gnutar(struct archive_read *, struct tar *,
193 		    struct archive_entry *, const void *h, size_t *);
194 static int	archive_read_format_tar_bid(struct archive_read *, int);
195 static int	archive_read_format_tar_options(struct archive_read *,
196 		    const char *, const char *);
197 static int	archive_read_format_tar_cleanup(struct archive_read *);
198 static int	archive_read_format_tar_read_data(struct archive_read *a,
199 		    const void **buff, size_t *size, int64_t *offset);
200 static int	archive_read_format_tar_skip(struct archive_read *a);
201 static int	archive_read_format_tar_read_header(struct archive_read *,
202 		    struct archive_entry *);
203 static int	checksum(struct archive_read *, const void *);
204 static int 	pax_attribute(struct archive_read *, struct tar *,
205 		    struct archive_entry *, char *key, char *value);
206 static int 	pax_header(struct archive_read *, struct tar *,
207 		    struct archive_entry *, char *attr);
208 static void	pax_time(const char *, int64_t *sec, long *nanos);
209 static ssize_t	readline(struct archive_read *, struct tar *, const char **,
210 		    ssize_t limit, size_t *);
211 static int	read_body_to_string(struct archive_read *, struct tar *,
212 		    struct archive_string *, const void *h, size_t *);
213 static int	solaris_sparse_parse(struct archive_read *, struct tar *,
214 		    struct archive_entry *, const char *);
215 static int64_t	tar_atol(const char *, size_t);
216 static int64_t	tar_atol10(const char *, size_t);
217 static int64_t	tar_atol256(const char *, size_t);
218 static int64_t	tar_atol8(const char *, size_t);
219 static int	tar_read_header(struct archive_read *, struct tar *,
220 		    struct archive_entry *, size_t *);
221 static int	tohex(int c);
222 static char	*url_decode(const char *);
223 static void	tar_flush_unconsumed(struct archive_read *, size_t *);
224 
225 
226 int
227 archive_read_support_format_gnutar(struct archive *a)
228 {
229 	archive_check_magic(a, ARCHIVE_READ_MAGIC,
230 	    ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar");
231 	return (archive_read_support_format_tar(a));
232 }
233 
234 
235 int
236 archive_read_support_format_tar(struct archive *_a)
237 {
238 	struct archive_read *a = (struct archive_read *)_a;
239 	struct tar *tar;
240 	int r;
241 
242 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
243 	    ARCHIVE_STATE_NEW, "archive_read_support_format_tar");
244 
245 	tar = (struct tar *)calloc(1, sizeof(*tar));
246 #ifdef HAVE_COPYFILE_H
247 	/* Set this by default on Mac OS. */
248 	tar->process_mac_extensions = 1;
249 #endif
250 	if (tar == NULL) {
251 		archive_set_error(&a->archive, ENOMEM,
252 		    "Can't allocate tar data");
253 		return (ARCHIVE_FATAL);
254 	}
255 
256 	r = __archive_read_register_format(a, tar, "tar",
257 	    archive_read_format_tar_bid,
258 	    archive_read_format_tar_options,
259 	    archive_read_format_tar_read_header,
260 	    archive_read_format_tar_read_data,
261 	    archive_read_format_tar_skip,
262 	    NULL,
263 	    archive_read_format_tar_cleanup,
264 	    NULL,
265 	    NULL);
266 
267 	if (r != ARCHIVE_OK)
268 		free(tar);
269 	return (ARCHIVE_OK);
270 }
271 
272 static int
273 archive_read_format_tar_cleanup(struct archive_read *a)
274 {
275 	struct tar *tar;
276 
277 	tar = (struct tar *)(a->format->data);
278 	gnu_clear_sparse_list(tar);
279 	archive_string_free(&tar->acl_text);
280 	archive_string_free(&tar->entry_pathname);
281 	archive_string_free(&tar->entry_pathname_override);
282 	archive_string_free(&tar->entry_linkpath);
283 	archive_string_free(&tar->entry_uname);
284 	archive_string_free(&tar->entry_gname);
285 	archive_string_free(&tar->line);
286 	archive_string_free(&tar->pax_global);
287 	archive_string_free(&tar->pax_header);
288 	archive_string_free(&tar->longname);
289 	archive_string_free(&tar->longlink);
290 	archive_string_free(&tar->localname);
291 	free(tar);
292 	(a->format->data) = NULL;
293 	return (ARCHIVE_OK);
294 }
295 
296 
297 static int
298 archive_read_format_tar_bid(struct archive_read *a, int best_bid)
299 {
300 	int bid;
301 	const char *h;
302 	const struct archive_entry_header_ustar *header;
303 
304 	(void)best_bid; /* UNUSED */
305 
306 	bid = 0;
307 
308 	/* Now let's look at the actual header and see if it matches. */
309 	h = __archive_read_ahead(a, 512, NULL);
310 	if (h == NULL)
311 		return (-1);
312 
313 	/* If it's an end-of-archive mark, we can handle it. */
314 	if (h[0] == 0 && archive_block_is_null(h)) {
315 		/*
316 		 * Usually, I bid the number of bits verified, but
317 		 * in this case, 4096 seems excessive so I picked 10 as
318 		 * an arbitrary but reasonable-seeming value.
319 		 */
320 		return (10);
321 	}
322 
323 	/* If it's not an end-of-archive mark, it must have a valid checksum.*/
324 	if (!checksum(a, h))
325 		return (0);
326 	bid += 48;  /* Checksum is usually 6 octal digits. */
327 
328 	header = (const struct archive_entry_header_ustar *)h;
329 
330 	/* Recognize POSIX formats. */
331 	if ((memcmp(header->magic, "ustar\0", 6) == 0)
332 	    && (memcmp(header->version, "00", 2) == 0))
333 		bid += 56;
334 
335 	/* Recognize GNU tar format. */
336 	if ((memcmp(header->magic, "ustar ", 6) == 0)
337 	    && (memcmp(header->version, " \0", 2) == 0))
338 		bid += 56;
339 
340 	/* Type flag must be null, digit or A-Z, a-z. */
341 	if (header->typeflag[0] != 0 &&
342 	    !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
343 	    !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
344 	    !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
345 		return (0);
346 	bid += 2;  /* 6 bits of variation in an 8-bit field leaves 2 bits. */
347 
348 	/* Sanity check: Look at first byte of mode field. */
349 	switch (255 & (unsigned)header->mode[0]) {
350 	case 0: case 255:
351 		/* Base-256 value: No further verification possible! */
352 		break;
353 	case ' ': /* Not recommended, but not illegal, either. */
354 		break;
355 	case '0': case '1': case '2': case '3':
356 	case '4': case '5': case '6': case '7':
357 		/* Octal Value. */
358 		/* TODO: Check format of remainder of this field. */
359 		break;
360 	default:
361 		/* Not a valid mode; bail out here. */
362 		return (0);
363 	}
364 	/* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */
365 
366 	return (bid);
367 }
368 
369 static int
370 archive_read_format_tar_options(struct archive_read *a,
371     const char *key, const char *val)
372 {
373 	struct tar *tar;
374 	int ret = ARCHIVE_FAILED;
375 
376 	tar = (struct tar *)(a->format->data);
377 	if (strcmp(key, "compat-2x")  == 0) {
378 		/* Handle UTF-8 filnames as libarchive 2.x */
379 		tar->compat_2x = (val != NULL && val[0] != 0);
380 		tar->init_default_conversion = tar->compat_2x;
381 		return (ARCHIVE_OK);
382 	} else if (strcmp(key, "hdrcharset")  == 0) {
383 		if (val == NULL || val[0] == 0)
384 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
385 			    "tar: hdrcharset option needs a character-set name");
386 		else {
387 			tar->opt_sconv =
388 			    archive_string_conversion_from_charset(
389 				&a->archive, val, 0);
390 			if (tar->opt_sconv != NULL)
391 				ret = ARCHIVE_OK;
392 			else
393 				ret = ARCHIVE_FATAL;
394 		}
395 		return (ret);
396 	} else if (strcmp(key, "mac-ext") == 0) {
397 		tar->process_mac_extensions = (val != NULL && val[0] != 0);
398 		return (ARCHIVE_OK);
399 	} else if (strcmp(key, "read_concatenated_archives") == 0) {
400 		tar->read_concatenated_archives = (val != NULL && val[0] != 0);
401 		return (ARCHIVE_OK);
402 	}
403 
404 	/* Note: The "warn" return is just to inform the options
405 	 * supervisor that we didn't handle it.  It will generate
406 	 * a suitable error if no one used this option. */
407 	return (ARCHIVE_WARN);
408 }
409 
410 /* utility function- this exists to centralize the logic of tracking
411  * how much unconsumed data we have floating around, and to consume
412  * anything outstanding since we're going to do read_aheads
413  */
414 static void
415 tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed)
416 {
417 	if (*unconsumed) {
418 /*
419 		void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL);
420 		 * this block of code is to poison claimed unconsumed space, ensuring
421 		 * things break if it is in use still.
422 		 * currently it WILL break things, so enable it only for debugging this issue
423 		if (data) {
424 			memset(data, 0xff, *unconsumed);
425 		}
426 */
427 		__archive_read_consume(a, *unconsumed);
428 		*unconsumed = 0;
429 	}
430 }
431 
432 /*
433  * The function invoked by archive_read_next_header().  This
434  * just sets up a few things and then calls the internal
435  * tar_read_header() function below.
436  */
437 static int
438 archive_read_format_tar_read_header(struct archive_read *a,
439     struct archive_entry *entry)
440 {
441 	/*
442 	 * When converting tar archives to cpio archives, it is
443 	 * essential that each distinct file have a distinct inode
444 	 * number.  To simplify this, we keep a static count here to
445 	 * assign fake dev/inode numbers to each tar entry.  Note that
446 	 * pax format archives may overwrite this with something more
447 	 * useful.
448 	 *
449 	 * Ideally, we would track every file read from the archive so
450 	 * that we could assign the same dev/ino pair to hardlinks,
451 	 * but the memory required to store a complete lookup table is
452 	 * probably not worthwhile just to support the relatively
453 	 * obscure tar->cpio conversion case.
454 	 */
455 	static int default_inode;
456 	static int default_dev;
457 	struct tar *tar;
458 	const char *p;
459 	const wchar_t *wp;
460 	int r;
461 	size_t l, unconsumed = 0;
462 
463 	/* Assign default device/inode values. */
464 	archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
465 	archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
466 	/* Limit generated st_ino number to 16 bits. */
467 	if (default_inode >= 0xffff) {
468 		++default_dev;
469 		default_inode = 0;
470 	}
471 
472 	tar = (struct tar *)(a->format->data);
473 	tar->entry_offset = 0;
474 	gnu_clear_sparse_list(tar);
475 	tar->realsize = -1; /* Mark this as "unset" */
476 
477 	/* Setup default string conversion. */
478 	tar->sconv = tar->opt_sconv;
479 	if (tar->sconv == NULL) {
480 		if (!tar->init_default_conversion) {
481 			tar->sconv_default =
482 			    archive_string_default_conversion_for_read(&(a->archive));
483 			tar->init_default_conversion = 1;
484 		}
485 		tar->sconv = tar->sconv_default;
486 	}
487 
488 	r = tar_read_header(a, tar, entry, &unconsumed);
489 
490 	tar_flush_unconsumed(a, &unconsumed);
491 
492 	/*
493 	 * "non-sparse" files are really just sparse files with
494 	 * a single block.
495 	 */
496 	if (tar->sparse_list == NULL) {
497 		if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
498 		    != ARCHIVE_OK)
499 			return (ARCHIVE_FATAL);
500 	} else {
501 		struct sparse_block *sb;
502 
503 		for (sb = tar->sparse_list; sb != NULL; sb = sb->next) {
504 			if (!sb->hole)
505 				archive_entry_sparse_add_entry(entry,
506 				    sb->offset, sb->remaining);
507 		}
508 	}
509 
510 	if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) {
511 		/*
512 		 * "Regular" entry with trailing '/' is really
513 		 * directory: This is needed for certain old tar
514 		 * variants and even for some broken newer ones.
515 		 */
516 		if ((wp = archive_entry_pathname_w(entry)) != NULL) {
517 			l = wcslen(wp);
518 			if (l > 0 && wp[l - 1] == L'/') {
519 				archive_entry_set_filetype(entry, AE_IFDIR);
520 			}
521 		} else if ((p = archive_entry_pathname(entry)) != NULL) {
522 			l = strlen(p);
523 			if (l > 0 && p[l - 1] == '/') {
524 				archive_entry_set_filetype(entry, AE_IFDIR);
525 			}
526 		}
527 	}
528 	return (r);
529 }
530 
531 static int
532 archive_read_format_tar_read_data(struct archive_read *a,
533     const void **buff, size_t *size, int64_t *offset)
534 {
535 	ssize_t bytes_read;
536 	struct tar *tar;
537 	struct sparse_block *p;
538 
539 	tar = (struct tar *)(a->format->data);
540 
541 	for (;;) {
542 		/* Remove exhausted entries from sparse list. */
543 		while (tar->sparse_list != NULL &&
544 		    tar->sparse_list->remaining == 0) {
545 			p = tar->sparse_list;
546 			tar->sparse_list = p->next;
547 			free(p);
548 		}
549 
550 		if (tar->entry_bytes_unconsumed) {
551 			__archive_read_consume(a, tar->entry_bytes_unconsumed);
552 			tar->entry_bytes_unconsumed = 0;
553 		}
554 
555 		/* If we're at end of file, return EOF. */
556 		if (tar->sparse_list == NULL ||
557 		    tar->entry_bytes_remaining == 0) {
558 			if (__archive_read_consume(a, tar->entry_padding) < 0)
559 				return (ARCHIVE_FATAL);
560 			tar->entry_padding = 0;
561 			*buff = NULL;
562 			*size = 0;
563 			*offset = tar->realsize;
564 			return (ARCHIVE_EOF);
565 		}
566 
567 		*buff = __archive_read_ahead(a, 1, &bytes_read);
568 		if (bytes_read < 0)
569 			return (ARCHIVE_FATAL);
570 		if (*buff == NULL) {
571 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
572 			    "Truncated tar archive");
573 			return (ARCHIVE_FATAL);
574 		}
575 		if (bytes_read > tar->entry_bytes_remaining)
576 			bytes_read = (ssize_t)tar->entry_bytes_remaining;
577 		/* Don't read more than is available in the
578 		 * current sparse block. */
579 		if (tar->sparse_list->remaining < bytes_read)
580 			bytes_read = (ssize_t)tar->sparse_list->remaining;
581 		*size = bytes_read;
582 		*offset = tar->sparse_list->offset;
583 		tar->sparse_list->remaining -= bytes_read;
584 		tar->sparse_list->offset += bytes_read;
585 		tar->entry_bytes_remaining -= bytes_read;
586 		tar->entry_bytes_unconsumed = bytes_read;
587 
588 		if (!tar->sparse_list->hole)
589 			return (ARCHIVE_OK);
590 		/* Current is hole data and skip this. */
591 	}
592 }
593 
594 static int
595 archive_read_format_tar_skip(struct archive_read *a)
596 {
597 	int64_t bytes_skipped;
598 	int64_t request;
599 	struct sparse_block *p;
600 	struct tar* tar;
601 
602 	tar = (struct tar *)(a->format->data);
603 
604 	/* Do not consume the hole of a sparse file. */
605 	request = 0;
606 	for (p = tar->sparse_list; p != NULL; p = p->next) {
607 		if (!p->hole) {
608 			if (p->remaining >= INT64_MAX - request) {
609 				return ARCHIVE_FATAL;
610 			}
611 			request += p->remaining;
612 		}
613 	}
614 	if (request > tar->entry_bytes_remaining)
615 		request = tar->entry_bytes_remaining;
616 	request += tar->entry_padding + tar->entry_bytes_unconsumed;
617 
618 	bytes_skipped = __archive_read_consume(a, request);
619 	if (bytes_skipped < 0)
620 		return (ARCHIVE_FATAL);
621 
622 	tar->entry_bytes_remaining = 0;
623 	tar->entry_bytes_unconsumed = 0;
624 	tar->entry_padding = 0;
625 
626 	/* Free the sparse list. */
627 	gnu_clear_sparse_list(tar);
628 
629 	return (ARCHIVE_OK);
630 }
631 
632 /*
633  * This function recursively interprets all of the headers associated
634  * with a single entry.
635  */
636 static int
637 tar_read_header(struct archive_read *a, struct tar *tar,
638     struct archive_entry *entry, size_t *unconsumed)
639 {
640 	ssize_t bytes;
641 	int err;
642 	const char *h;
643 	const struct archive_entry_header_ustar *header;
644 	const struct archive_entry_header_gnutar *gnuheader;
645 
646 	/* Loop until we find a workable header record. */
647 	for (;;) {
648 		tar_flush_unconsumed(a, unconsumed);
649 
650 		/* Read 512-byte header record */
651 		h = __archive_read_ahead(a, 512, &bytes);
652 		if (bytes < 0)
653 			return ((int)bytes);
654 		if (bytes == 0) { /* EOF at a block boundary. */
655 			/* Some writers do omit the block of nulls. <sigh> */
656 			return (ARCHIVE_EOF);
657 		}
658 		if (bytes < 512) {  /* Short block at EOF; this is bad. */
659 			archive_set_error(&a->archive,
660 			    ARCHIVE_ERRNO_FILE_FORMAT,
661 			    "Truncated tar archive");
662 			return (ARCHIVE_FATAL);
663 		}
664 		*unconsumed = 512;
665 
666 		/* Header is workable if it's not an end-of-archive mark. */
667 		if (h[0] != 0 || !archive_block_is_null(h))
668 			break;
669 
670 		/* Ensure format is set for archives with only null blocks. */
671 		if (a->archive.archive_format_name == NULL) {
672 			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
673 			a->archive.archive_format_name = "tar";
674 		}
675 
676 		if (!tar->read_concatenated_archives) {
677 			/* Try to consume a second all-null record, as well. */
678 			tar_flush_unconsumed(a, unconsumed);
679 			h = __archive_read_ahead(a, 512, NULL);
680 			if (h != NULL && h[0] == 0 && archive_block_is_null(h))
681 				__archive_read_consume(a, 512);
682 			archive_clear_error(&a->archive);
683 			return (ARCHIVE_EOF);
684 		}
685 
686 		/*
687 		 * We're reading concatenated archives, ignore this block and
688 		 * loop to get the next.
689 		 */
690 	}
691 
692 	/*
693 	 * Note: If the checksum fails and we return ARCHIVE_RETRY,
694 	 * then the client is likely to just retry.  This is a very
695 	 * crude way to search for the next valid header!
696 	 *
697 	 * TODO: Improve this by implementing a real header scan.
698 	 */
699 	if (!checksum(a, h)) {
700 		tar_flush_unconsumed(a, unconsumed);
701 		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
702 		return (ARCHIVE_RETRY); /* Retryable: Invalid header */
703 	}
704 
705 	if (++tar->header_recursion_depth > 32) {
706 		tar_flush_unconsumed(a, unconsumed);
707 		archive_set_error(&a->archive, EINVAL, "Too many special headers");
708 		return (ARCHIVE_WARN);
709 	}
710 
711 	/* Determine the format variant. */
712 	header = (const struct archive_entry_header_ustar *)h;
713 
714 	switch(header->typeflag[0]) {
715 	case 'A': /* Solaris tar ACL */
716 		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
717 		a->archive.archive_format_name = "Solaris tar";
718 		err = header_Solaris_ACL(a, tar, entry, h, unconsumed);
719 		break;
720 	case 'g': /* POSIX-standard 'g' header. */
721 		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
722 		a->archive.archive_format_name = "POSIX pax interchange format";
723 		err = header_pax_global(a, tar, entry, h, unconsumed);
724 		if (err == ARCHIVE_EOF)
725 			return (err);
726 		break;
727 	case 'K': /* Long link name (GNU tar, others) */
728 		err = header_longlink(a, tar, entry, h, unconsumed);
729 		break;
730 	case 'L': /* Long filename (GNU tar, others) */
731 		err = header_longname(a, tar, entry, h, unconsumed);
732 		break;
733 	case 'V': /* GNU volume header */
734 		err = header_volume(a, tar, entry, h, unconsumed);
735 		break;
736 	case 'X': /* Used by SUN tar; same as 'x'. */
737 		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
738 		a->archive.archive_format_name =
739 		    "POSIX pax interchange format (Sun variant)";
740 		err = header_pax_extensions(a, tar, entry, h, unconsumed);
741 		break;
742 	case 'x': /* POSIX-standard 'x' header. */
743 		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
744 		a->archive.archive_format_name = "POSIX pax interchange format";
745 		err = header_pax_extensions(a, tar, entry, h, unconsumed);
746 		break;
747 	default:
748 		gnuheader = (const struct archive_entry_header_gnutar *)h;
749 		if (memcmp(gnuheader->magic, "ustar  \0", 8) == 0) {
750 			a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
751 			a->archive.archive_format_name = "GNU tar format";
752 			err = header_gnutar(a, tar, entry, h, unconsumed);
753 		} else if (memcmp(header->magic, "ustar", 5) == 0) {
754 			if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
755 				a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
756 				a->archive.archive_format_name = "POSIX ustar format";
757 			}
758 			err = header_ustar(a, tar, entry, h);
759 		} else {
760 			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
761 			a->archive.archive_format_name = "tar (non-POSIX)";
762 			err = header_old_tar(a, tar, entry, h);
763 		}
764 	}
765 	if (err == ARCHIVE_FATAL)
766 		return (err);
767 
768 	tar_flush_unconsumed(a, unconsumed);
769 
770 	h = NULL;
771 	header = NULL;
772 
773 	--tar->header_recursion_depth;
774 	/* Yuck.  Apple's design here ends up storing long pathname
775 	 * extensions for both the AppleDouble extension entry and the
776 	 * regular entry.
777 	 */
778 	if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) &&
779 	    tar->header_recursion_depth == 0 &&
780 	    tar->process_mac_extensions) {
781 		int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed);
782 		if (err2 < err)
783 			err = err2;
784 	}
785 
786 	/* We return warnings or success as-is.  Anything else is fatal. */
787 	if (err == ARCHIVE_WARN || err == ARCHIVE_OK) {
788 		if (tar->sparse_gnu_pending) {
789 			if (tar->sparse_gnu_major == 1 &&
790 			    tar->sparse_gnu_minor == 0) {
791 				ssize_t bytes_read;
792 
793 				tar->sparse_gnu_pending = 0;
794 				/* Read initial sparse map. */
795 				bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
796 				tar->entry_bytes_remaining -= bytes_read;
797 				if (bytes_read < 0)
798 					return ((int)bytes_read);
799 			} else {
800 				archive_set_error(&a->archive,
801 				    ARCHIVE_ERRNO_MISC,
802 				    "Unrecognized GNU sparse file format");
803 				return (ARCHIVE_WARN);
804 			}
805 			tar->sparse_gnu_pending = 0;
806 		}
807 		return (err);
808 	}
809 	if (err == ARCHIVE_EOF)
810 		/* EOF when recursively reading a header is bad. */
811 		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
812 	return (ARCHIVE_FATAL);
813 }
814 
815 /*
816  * Return true if block checksum is correct.
817  */
818 static int
819 checksum(struct archive_read *a, const void *h)
820 {
821 	const unsigned char *bytes;
822 	const struct archive_entry_header_ustar	*header;
823 	int check, sum;
824 	size_t i;
825 
826 	(void)a; /* UNUSED */
827 	bytes = (const unsigned char *)h;
828 	header = (const struct archive_entry_header_ustar *)h;
829 
830 	/* Checksum field must hold an octal number */
831 	for (i = 0; i < sizeof(header->checksum); ++i) {
832 		char c = header->checksum[i];
833 		if (c != ' ' && c != '\0' && (c < '0' || c > '7'))
834 			return 0;
835 	}
836 
837 	/*
838 	 * Test the checksum.  Note that POSIX specifies _unsigned_
839 	 * bytes for this calculation.
840 	 */
841 	sum = (int)tar_atol(header->checksum, sizeof(header->checksum));
842 	check = 0;
843 	for (i = 0; i < 148; i++)
844 		check += (unsigned char)bytes[i];
845 	for (; i < 156; i++)
846 		check += 32;
847 	for (; i < 512; i++)
848 		check += (unsigned char)bytes[i];
849 	if (sum == check)
850 		return (1);
851 
852 	/*
853 	 * Repeat test with _signed_ bytes, just in case this archive
854 	 * was created by an old BSD, Solaris, or HP-UX tar with a
855 	 * broken checksum calculation.
856 	 */
857 	check = 0;
858 	for (i = 0; i < 148; i++)
859 		check += (signed char)bytes[i];
860 	for (; i < 156; i++)
861 		check += 32;
862 	for (; i < 512; i++)
863 		check += (signed char)bytes[i];
864 	if (sum == check)
865 		return (1);
866 
867 	return (0);
868 }
869 
870 /*
871  * Return true if this block contains only nulls.
872  */
873 static int
874 archive_block_is_null(const char *p)
875 {
876 	unsigned i;
877 
878 	for (i = 0; i < 512; i++)
879 		if (*p++)
880 			return (0);
881 	return (1);
882 }
883 
884 /*
885  * Interpret 'A' Solaris ACL header
886  */
887 static int
888 header_Solaris_ACL(struct archive_read *a, struct tar *tar,
889     struct archive_entry *entry, const void *h, size_t *unconsumed)
890 {
891 	const struct archive_entry_header_ustar *header;
892 	size_t size;
893 	int err;
894 	int64_t type;
895 	char *acl, *p;
896 
897 	/*
898 	 * read_body_to_string adds a NUL terminator, but we need a little
899 	 * more to make sure that we don't overrun acl_text later.
900 	 */
901 	header = (const struct archive_entry_header_ustar *)h;
902 	size = (size_t)tar_atol(header->size, sizeof(header->size));
903 	err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed);
904 	if (err != ARCHIVE_OK)
905 		return (err);
906 
907 	/* Recursively read next header */
908 	err = tar_read_header(a, tar, entry, unconsumed);
909 	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
910 		return (err);
911 
912 	/* TODO: Examine the first characters to see if this
913 	 * is an AIX ACL descriptor.  We'll likely never support
914 	 * them, but it would be polite to recognize and warn when
915 	 * we do see them. */
916 
917 	/* Leading octal number indicates ACL type and number of entries. */
918 	p = acl = tar->acl_text.s;
919 	type = 0;
920 	while (*p != '\0' && p < acl + size) {
921 		if (*p < '0' || *p > '7') {
922 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
923 			    "Malformed Solaris ACL attribute (invalid digit)");
924 			return(ARCHIVE_WARN);
925 		}
926 		type <<= 3;
927 		type += *p - '0';
928 		if (type > 077777777) {
929 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
930 			    "Malformed Solaris ACL attribute (count too large)");
931 			return (ARCHIVE_WARN);
932 		}
933 		p++;
934 	}
935 	switch ((int)type & ~0777777) {
936 	case 01000000:
937 		/* POSIX.1e ACL */
938 		break;
939 	case 03000000:
940 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
941 		    "Solaris NFSv4 ACLs not supported");
942 		return (ARCHIVE_WARN);
943 	default:
944 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
945 		    "Malformed Solaris ACL attribute (unsupported type %o)",
946 		    (int)type);
947 		return (ARCHIVE_WARN);
948 	}
949 	p++;
950 
951 	if (p >= acl + size) {
952 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
953 		    "Malformed Solaris ACL attribute (body overflow)");
954 		return(ARCHIVE_WARN);
955 	}
956 
957 	/* ACL text is null-terminated; find the end. */
958 	size -= (p - acl);
959 	acl = p;
960 
961 	while (*p != '\0' && p < acl + size)
962 		p++;
963 
964 	if (tar->sconv_acl == NULL) {
965 		tar->sconv_acl = archive_string_conversion_from_charset(
966 		    &(a->archive), "UTF-8", 1);
967 		if (tar->sconv_acl == NULL)
968 			return (ARCHIVE_FATAL);
969 	}
970 	archive_strncpy(&(tar->localname), acl, p - acl);
971 	err = archive_acl_parse_l(archive_entry_acl(entry),
972 	    tar->localname.s, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, tar->sconv_acl);
973 	if (err != ARCHIVE_OK) {
974 		if (errno == ENOMEM) {
975 			archive_set_error(&a->archive, ENOMEM,
976 			    "Can't allocate memory for ACL");
977 		} else
978 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
979 			    "Malformed Solaris ACL attribute (unparsable)");
980 	}
981 	return (err);
982 }
983 
984 /*
985  * Interpret 'K' long linkname header.
986  */
987 static int
988 header_longlink(struct archive_read *a, struct tar *tar,
989     struct archive_entry *entry, const void *h, size_t *unconsumed)
990 {
991 	int err;
992 
993 	err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed);
994 	if (err != ARCHIVE_OK)
995 		return (err);
996 	err = tar_read_header(a, tar, entry, unconsumed);
997 	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
998 		return (err);
999 	/* Set symlink if symlink already set, else hardlink. */
1000 	archive_entry_copy_link(entry, tar->longlink.s);
1001 	return (ARCHIVE_OK);
1002 }
1003 
1004 static int
1005 set_conversion_failed_error(struct archive_read *a,
1006     struct archive_string_conv *sconv, const char *name)
1007 {
1008 	if (errno == ENOMEM) {
1009 		archive_set_error(&a->archive, ENOMEM,
1010 		    "Can't allocate memory for %s", name);
1011 		return (ARCHIVE_FATAL);
1012 	}
1013 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1014 	    "%s can't be converted from %s to current locale.",
1015 	    name, archive_string_conversion_charset_name(sconv));
1016 	return (ARCHIVE_WARN);
1017 }
1018 
1019 /*
1020  * Interpret 'L' long filename header.
1021  */
1022 static int
1023 header_longname(struct archive_read *a, struct tar *tar,
1024     struct archive_entry *entry, const void *h, size_t *unconsumed)
1025 {
1026 	int err;
1027 
1028 	err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed);
1029 	if (err != ARCHIVE_OK)
1030 		return (err);
1031 	/* Read and parse "real" header, then override name. */
1032 	err = tar_read_header(a, tar, entry, unconsumed);
1033 	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
1034 		return (err);
1035 	if (archive_entry_copy_pathname_l(entry, tar->longname.s,
1036 	    archive_strlen(&(tar->longname)), tar->sconv) != 0)
1037 		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1038 	return (err);
1039 }
1040 
1041 
1042 /*
1043  * Interpret 'V' GNU tar volume header.
1044  */
1045 static int
1046 header_volume(struct archive_read *a, struct tar *tar,
1047     struct archive_entry *entry, const void *h, size_t *unconsumed)
1048 {
1049 	(void)h;
1050 
1051 	/* Just skip this and read the next header. */
1052 	return (tar_read_header(a, tar, entry, unconsumed));
1053 }
1054 
1055 /*
1056  * Read body of an archive entry into an archive_string object.
1057  */
1058 static int
1059 read_body_to_string(struct archive_read *a, struct tar *tar,
1060     struct archive_string *as, const void *h, size_t *unconsumed)
1061 {
1062 	int64_t size;
1063 	const struct archive_entry_header_ustar *header;
1064 	const void *src;
1065 
1066 	(void)tar; /* UNUSED */
1067 	header = (const struct archive_entry_header_ustar *)h;
1068 	size  = tar_atol(header->size, sizeof(header->size));
1069 	if ((size > 1048576) || (size < 0)) {
1070 		archive_set_error(&a->archive, EINVAL,
1071 		    "Special header too large");
1072 		return (ARCHIVE_FATAL);
1073 	}
1074 
1075 	/* Fail if we can't make our buffer big enough. */
1076 	if (archive_string_ensure(as, (size_t)size+1) == NULL) {
1077 		archive_set_error(&a->archive, ENOMEM,
1078 		    "No memory");
1079 		return (ARCHIVE_FATAL);
1080 	}
1081 
1082 	tar_flush_unconsumed(a, unconsumed);
1083 
1084 	/* Read the body into the string. */
1085 	*unconsumed = (size_t)((size + 511) & ~ 511);
1086 	src = __archive_read_ahead(a, *unconsumed, NULL);
1087 	if (src == NULL) {
1088 		*unconsumed = 0;
1089 		return (ARCHIVE_FATAL);
1090 	}
1091 	memcpy(as->s, src, (size_t)size);
1092 	as->s[size] = '\0';
1093 	as->length = (size_t)size;
1094 	return (ARCHIVE_OK);
1095 }
1096 
1097 /*
1098  * Parse out common header elements.
1099  *
1100  * This would be the same as header_old_tar, except that the
1101  * filename is handled slightly differently for old and POSIX
1102  * entries  (POSIX entries support a 'prefix').  This factoring
1103  * allows header_old_tar and header_ustar
1104  * to handle filenames differently, while still putting most of the
1105  * common parsing into one place.
1106  */
1107 static int
1108 header_common(struct archive_read *a, struct tar *tar,
1109     struct archive_entry *entry, const void *h)
1110 {
1111 	const struct archive_entry_header_ustar	*header;
1112 	char	tartype;
1113 	int     err = ARCHIVE_OK;
1114 
1115 	header = (const struct archive_entry_header_ustar *)h;
1116 	if (header->linkname[0])
1117 		archive_strncpy(&(tar->entry_linkpath),
1118 		    header->linkname, sizeof(header->linkname));
1119 	else
1120 		archive_string_empty(&(tar->entry_linkpath));
1121 
1122 	/* Parse out the numeric fields (all are octal) */
1123 	archive_entry_set_mode(entry,
1124 		(mode_t)tar_atol(header->mode, sizeof(header->mode)));
1125 	archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
1126 	archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
1127 	tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
1128 	if (tar->entry_bytes_remaining < 0) {
1129 		tar->entry_bytes_remaining = 0;
1130 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1131 		    "Tar entry has negative size?");
1132 		err = ARCHIVE_WARN;
1133 	}
1134 	tar->realsize = tar->entry_bytes_remaining;
1135 	archive_entry_set_size(entry, tar->entry_bytes_remaining);
1136 	archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
1137 
1138 	/* Handle the tar type flag appropriately. */
1139 	tartype = header->typeflag[0];
1140 
1141 	switch (tartype) {
1142 	case '1': /* Hard link */
1143 		if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s,
1144 		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
1145 			err = set_conversion_failed_error(a, tar->sconv,
1146 			    "Linkname");
1147 			if (err == ARCHIVE_FATAL)
1148 				return (err);
1149 		}
1150 		/*
1151 		 * The following may seem odd, but: Technically, tar
1152 		 * does not store the file type for a "hard link"
1153 		 * entry, only the fact that it is a hard link.  So, I
1154 		 * leave the type zero normally.  But, pax interchange
1155 		 * format allows hard links to have data, which
1156 		 * implies that the underlying entry is a regular
1157 		 * file.
1158 		 */
1159 		if (archive_entry_size(entry) > 0)
1160 			archive_entry_set_filetype(entry, AE_IFREG);
1161 
1162 		/*
1163 		 * A tricky point: Traditionally, tar readers have
1164 		 * ignored the size field when reading hardlink
1165 		 * entries, and some writers put non-zero sizes even
1166 		 * though the body is empty.  POSIX blessed this
1167 		 * convention in the 1988 standard, but broke with
1168 		 * this tradition in 2001 by permitting hardlink
1169 		 * entries to store valid bodies in pax interchange
1170 		 * format, but not in ustar format.  Since there is no
1171 		 * hard and fast way to distinguish pax interchange
1172 		 * from earlier archives (the 'x' and 'g' entries are
1173 		 * optional, after all), we need a heuristic.
1174 		 */
1175 		if (archive_entry_size(entry) == 0) {
1176 			/* If the size is already zero, we're done. */
1177 		}  else if (a->archive.archive_format
1178 		    == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
1179 			/* Definitely pax extended; must obey hardlink size. */
1180 		} else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
1181 		    || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
1182 		{
1183 			/* Old-style or GNU tar: we must ignore the size. */
1184 			archive_entry_set_size(entry, 0);
1185 			tar->entry_bytes_remaining = 0;
1186 		} else if (archive_read_format_tar_bid(a, 50) > 50) {
1187 			/*
1188 			 * We don't know if it's pax: If the bid
1189 			 * function sees a valid ustar header
1190 			 * immediately following, then let's ignore
1191 			 * the hardlink size.
1192 			 */
1193 			archive_entry_set_size(entry, 0);
1194 			tar->entry_bytes_remaining = 0;
1195 		}
1196 		/*
1197 		 * TODO: There are still two cases I'd like to handle:
1198 		 *   = a ustar non-pax archive with a hardlink entry at
1199 		 *     end-of-archive.  (Look for block of nulls following?)
1200 		 *   = a pax archive that has not seen any pax headers
1201 		 *     and has an entry which is a hardlink entry storing
1202 		 *     a body containing an uncompressed tar archive.
1203 		 * The first is worth addressing; I don't see any reliable
1204 		 * way to deal with the second possibility.
1205 		 */
1206 		break;
1207 	case '2': /* Symlink */
1208 		archive_entry_set_filetype(entry, AE_IFLNK);
1209 		archive_entry_set_size(entry, 0);
1210 		tar->entry_bytes_remaining = 0;
1211 		if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s,
1212 		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
1213 			err = set_conversion_failed_error(a, tar->sconv,
1214 			    "Linkname");
1215 			if (err == ARCHIVE_FATAL)
1216 				return (err);
1217 		}
1218 		break;
1219 	case '3': /* Character device */
1220 		archive_entry_set_filetype(entry, AE_IFCHR);
1221 		archive_entry_set_size(entry, 0);
1222 		tar->entry_bytes_remaining = 0;
1223 		break;
1224 	case '4': /* Block device */
1225 		archive_entry_set_filetype(entry, AE_IFBLK);
1226 		archive_entry_set_size(entry, 0);
1227 		tar->entry_bytes_remaining = 0;
1228 		break;
1229 	case '5': /* Dir */
1230 		archive_entry_set_filetype(entry, AE_IFDIR);
1231 		archive_entry_set_size(entry, 0);
1232 		tar->entry_bytes_remaining = 0;
1233 		break;
1234 	case '6': /* FIFO device */
1235 		archive_entry_set_filetype(entry, AE_IFIFO);
1236 		archive_entry_set_size(entry, 0);
1237 		tar->entry_bytes_remaining = 0;
1238 		break;
1239 	case 'D': /* GNU incremental directory type */
1240 		/*
1241 		 * No special handling is actually required here.
1242 		 * It might be nice someday to preprocess the file list and
1243 		 * provide it to the client, though.
1244 		 */
1245 		archive_entry_set_filetype(entry, AE_IFDIR);
1246 		break;
1247 	case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
1248 		/*
1249 		 * As far as I can tell, this is just like a regular file
1250 		 * entry, except that the contents should be _appended_ to
1251 		 * the indicated file at the indicated offset.  This may
1252 		 * require some API work to fully support.
1253 		 */
1254 		break;
1255 	case 'N': /* Old GNU "long filename" entry. */
1256 		/* The body of this entry is a script for renaming
1257 		 * previously-extracted entries.  Ugh.  It will never
1258 		 * be supported by libarchive. */
1259 		archive_entry_set_filetype(entry, AE_IFREG);
1260 		break;
1261 	case 'S': /* GNU sparse files */
1262 		/*
1263 		 * Sparse files are really just regular files with
1264 		 * sparse information in the extended area.
1265 		 */
1266 		/* FALLTHROUGH */
1267 	default: /* Regular file  and non-standard types */
1268 		/*
1269 		 * Per POSIX: non-recognized types should always be
1270 		 * treated as regular files.
1271 		 */
1272 		archive_entry_set_filetype(entry, AE_IFREG);
1273 		break;
1274 	}
1275 	return (err);
1276 }
1277 
1278 /*
1279  * Parse out header elements for "old-style" tar archives.
1280  */
1281 static int
1282 header_old_tar(struct archive_read *a, struct tar *tar,
1283     struct archive_entry *entry, const void *h)
1284 {
1285 	const struct archive_entry_header_ustar	*header;
1286 	int err = ARCHIVE_OK, err2;
1287 
1288 	/* Copy filename over (to ensure null termination). */
1289 	header = (const struct archive_entry_header_ustar *)h;
1290 	if (archive_entry_copy_pathname_l(entry,
1291 	    header->name, sizeof(header->name), tar->sconv) != 0) {
1292 		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1293 		if (err == ARCHIVE_FATAL)
1294 			return (err);
1295 	}
1296 
1297 	/* Grab rest of common fields */
1298 	err2 = header_common(a, tar, entry, h);
1299 	if (err > err2)
1300 		err = err2;
1301 
1302 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1303 	return (err);
1304 }
1305 
1306 /*
1307  * Read a Mac AppleDouble-encoded blob of file metadata,
1308  * if there is one.
1309  */
1310 static int
1311 read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
1312     struct archive_entry *entry, const void *h, size_t *unconsumed)
1313 {
1314 	int64_t size;
1315 	const void *data;
1316 	const char *p, *name;
1317 	const wchar_t *wp, *wname;
1318 
1319 	(void)h; /* UNUSED */
1320 
1321 	wname = wp = archive_entry_pathname_w(entry);
1322 	if (wp != NULL) {
1323 		/* Find the last path element. */
1324 		for (; *wp != L'\0'; ++wp) {
1325 			if (wp[0] == '/' && wp[1] != L'\0')
1326 				wname = wp + 1;
1327 		}
1328 		/*
1329 		 * If last path element starts with "._", then
1330 		 * this is a Mac extension.
1331 		 */
1332 		if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0')
1333 			return ARCHIVE_OK;
1334 	} else {
1335 		/* Find the last path element. */
1336 		name = p = archive_entry_pathname(entry);
1337 		if (p == NULL)
1338 			return (ARCHIVE_FAILED);
1339 		for (; *p != '\0'; ++p) {
1340 			if (p[0] == '/' && p[1] != '\0')
1341 				name = p + 1;
1342 		}
1343 		/*
1344 		 * If last path element starts with "._", then
1345 		 * this is a Mac extension.
1346 		 */
1347 		if (name[0] != '.' || name[1] != '_' || name[2] == '\0')
1348 			return ARCHIVE_OK;
1349 	}
1350 
1351  	/* Read the body as a Mac OS metadata blob. */
1352 	size = archive_entry_size(entry);
1353 
1354 	/*
1355 	 * TODO: Look beyond the body here to peek at the next header.
1356 	 * If it's a regular header (not an extension header)
1357 	 * that has the wrong name, just return the current
1358 	 * entry as-is, without consuming the body here.
1359 	 * That would reduce the risk of us mis-identifying
1360 	 * an ordinary file that just happened to have
1361 	 * a name starting with "._".
1362 	 *
1363 	 * Q: Is the above idea really possible?  Even
1364 	 * when there are GNU or pax extension entries?
1365 	 */
1366 	data = __archive_read_ahead(a, (size_t)size, NULL);
1367 	if (data == NULL) {
1368 		*unconsumed = 0;
1369 		return (ARCHIVE_FATAL);
1370 	}
1371 	archive_entry_copy_mac_metadata(entry, data, (size_t)size);
1372 	*unconsumed = (size_t)((size + 511) & ~ 511);
1373 	tar_flush_unconsumed(a, unconsumed);
1374 	return (tar_read_header(a, tar, entry, unconsumed));
1375 }
1376 
1377 /*
1378  * Parse a file header for a pax extended archive entry.
1379  */
1380 static int
1381 header_pax_global(struct archive_read *a, struct tar *tar,
1382     struct archive_entry *entry, const void *h, size_t *unconsumed)
1383 {
1384 	int err;
1385 
1386 	err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed);
1387 	if (err != ARCHIVE_OK)
1388 		return (err);
1389 	err = tar_read_header(a, tar, entry, unconsumed);
1390 	return (err);
1391 }
1392 
1393 static int
1394 header_pax_extensions(struct archive_read *a, struct tar *tar,
1395     struct archive_entry *entry, const void *h, size_t *unconsumed)
1396 {
1397 	int err, err2;
1398 
1399 	err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed);
1400 	if (err != ARCHIVE_OK)
1401 		return (err);
1402 
1403 	/* Parse the next header. */
1404 	err = tar_read_header(a, tar, entry, unconsumed);
1405 	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
1406 		return (err);
1407 
1408 	/*
1409 	 * TODO: Parse global/default options into 'entry' struct here
1410 	 * before handling file-specific options.
1411 	 *
1412 	 * This design (parse standard header, then overwrite with pax
1413 	 * extended attribute data) usually works well, but isn't ideal;
1414 	 * it would be better to parse the pax extended attributes first
1415 	 * and then skip any fields in the standard header that were
1416 	 * defined in the pax header.
1417 	 */
1418 	err2 = pax_header(a, tar, entry, tar->pax_header.s);
1419 	err =  err_combine(err, err2);
1420 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1421 	return (err);
1422 }
1423 
1424 
1425 /*
1426  * Parse a file header for a Posix "ustar" archive entry.  This also
1427  * handles "pax" or "extended ustar" entries.
1428  */
1429 static int
1430 header_ustar(struct archive_read *a, struct tar *tar,
1431     struct archive_entry *entry, const void *h)
1432 {
1433 	const struct archive_entry_header_ustar	*header;
1434 	struct archive_string *as;
1435 	int err = ARCHIVE_OK, r;
1436 
1437 	header = (const struct archive_entry_header_ustar *)h;
1438 
1439 	/* Copy name into an internal buffer to ensure null-termination. */
1440 	as = &(tar->entry_pathname);
1441 	if (header->prefix[0]) {
1442 		archive_strncpy(as, header->prefix, sizeof(header->prefix));
1443 		if (as->s[archive_strlen(as) - 1] != '/')
1444 			archive_strappend_char(as, '/');
1445 		archive_strncat(as, header->name, sizeof(header->name));
1446 	} else {
1447 		archive_strncpy(as, header->name, sizeof(header->name));
1448 	}
1449 	if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as),
1450 	    tar->sconv) != 0) {
1451 		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1452 		if (err == ARCHIVE_FATAL)
1453 			return (err);
1454 	}
1455 
1456 	/* Handle rest of common fields. */
1457 	r = header_common(a, tar, entry, h);
1458 	if (r == ARCHIVE_FATAL)
1459 		return (r);
1460 	if (r < err)
1461 		err = r;
1462 
1463 	/* Handle POSIX ustar fields. */
1464 	if (archive_entry_copy_uname_l(entry,
1465 	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
1466 		err = set_conversion_failed_error(a, tar->sconv, "Uname");
1467 		if (err == ARCHIVE_FATAL)
1468 			return (err);
1469 	}
1470 
1471 	if (archive_entry_copy_gname_l(entry,
1472 	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
1473 		err = set_conversion_failed_error(a, tar->sconv, "Gname");
1474 		if (err == ARCHIVE_FATAL)
1475 			return (err);
1476 	}
1477 
1478 	/* Parse out device numbers only for char and block specials. */
1479 	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1480 		archive_entry_set_rdevmajor(entry, (dev_t)
1481 		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1482 		archive_entry_set_rdevminor(entry, (dev_t)
1483 		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1484 	}
1485 
1486 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1487 
1488 	return (err);
1489 }
1490 
1491 
1492 /*
1493  * Parse the pax extended attributes record.
1494  *
1495  * Returns non-zero if there's an error in the data.
1496  */
1497 static int
1498 pax_header(struct archive_read *a, struct tar *tar,
1499     struct archive_entry *entry, char *attr)
1500 {
1501 	size_t attr_length, l, line_length;
1502 	char *p;
1503 	char *key, *value;
1504 	struct archive_string *as;
1505 	struct archive_string_conv *sconv;
1506 	int err, err2;
1507 
1508 	attr_length = strlen(attr);
1509 	tar->pax_hdrcharset_binary = 0;
1510 	archive_string_empty(&(tar->entry_gname));
1511 	archive_string_empty(&(tar->entry_linkpath));
1512 	archive_string_empty(&(tar->entry_pathname));
1513 	archive_string_empty(&(tar->entry_pathname_override));
1514 	archive_string_empty(&(tar->entry_uname));
1515 	err = ARCHIVE_OK;
1516 	while (attr_length > 0) {
1517 		/* Parse decimal length field at start of line. */
1518 		line_length = 0;
1519 		l = attr_length;
1520 		p = attr; /* Record start of line. */
1521 		while (l>0) {
1522 			if (*p == ' ') {
1523 				p++;
1524 				l--;
1525 				break;
1526 			}
1527 			if (*p < '0' || *p > '9') {
1528 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1529 				    "Ignoring malformed pax extended attributes");
1530 				return (ARCHIVE_WARN);
1531 			}
1532 			line_length *= 10;
1533 			line_length += *p - '0';
1534 			if (line_length > 999999) {
1535 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1536 				    "Rejecting pax extended attribute > 1MB");
1537 				return (ARCHIVE_WARN);
1538 			}
1539 			p++;
1540 			l--;
1541 		}
1542 
1543 		/*
1544 		 * Parsed length must be no bigger than available data,
1545 		 * at least 1, and the last character of the line must
1546 		 * be '\n'.
1547 		 */
1548 		if (line_length > attr_length
1549 		    || line_length < 1
1550 		    || attr[line_length - 1] != '\n')
1551 		{
1552 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1553 			    "Ignoring malformed pax extended attribute");
1554 			return (ARCHIVE_WARN);
1555 		}
1556 
1557 		/* Null-terminate the line. */
1558 		attr[line_length - 1] = '\0';
1559 
1560 		/* Find end of key and null terminate it. */
1561 		key = p;
1562 		if (key[0] == '=')
1563 			return (-1);
1564 		while (*p && *p != '=')
1565 			++p;
1566 		if (*p == '\0') {
1567 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1568 			    "Invalid pax extended attributes");
1569 			return (ARCHIVE_WARN);
1570 		}
1571 		*p = '\0';
1572 
1573 		/* Identify null-terminated 'value' portion. */
1574 		value = p + 1;
1575 
1576 		/* Identify this attribute and set it in the entry. */
1577 		err2 = pax_attribute(a, tar, entry, key, value);
1578 		if (err2 == ARCHIVE_FATAL)
1579 			return (err2);
1580 		err = err_combine(err, err2);
1581 
1582 		/* Skip to next line */
1583 		attr += line_length;
1584 		attr_length -= line_length;
1585 	}
1586 
1587 	/*
1588 	 * PAX format uses UTF-8 as default charset for its metadata
1589 	 * unless hdrcharset=BINARY is present in its header.
1590 	 * We apply the charset specified by the hdrcharset option only
1591 	 * when the hdrcharset attribute(in PAX header) is BINARY because
1592 	 * we respect the charset described in PAX header and BINARY also
1593 	 * means that metadata(filename,uname and gname) character-set
1594 	 * is unknown.
1595 	 */
1596 	if (tar->pax_hdrcharset_binary)
1597 		sconv = tar->opt_sconv;
1598 	else {
1599 		sconv = archive_string_conversion_from_charset(
1600 		    &(a->archive), "UTF-8", 1);
1601 		if (sconv == NULL)
1602 			return (ARCHIVE_FATAL);
1603 		if (tar->compat_2x)
1604 			archive_string_conversion_set_opt(sconv,
1605 			    SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
1606 	}
1607 
1608 	if (archive_strlen(&(tar->entry_gname)) > 0) {
1609 		if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
1610 		    archive_strlen(&(tar->entry_gname)), sconv) != 0) {
1611 			err = set_conversion_failed_error(a, sconv, "Gname");
1612 			if (err == ARCHIVE_FATAL)
1613 				return (err);
1614 			/* Use a converted an original name. */
1615 			archive_entry_copy_gname(entry, tar->entry_gname.s);
1616 		}
1617 	}
1618 	if (archive_strlen(&(tar->entry_linkpath)) > 0) {
1619 		if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
1620 		    archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
1621 			err = set_conversion_failed_error(a, sconv, "Linkname");
1622 			if (err == ARCHIVE_FATAL)
1623 				return (err);
1624 			/* Use a converted an original name. */
1625 			archive_entry_copy_link(entry, tar->entry_linkpath.s);
1626 		}
1627 	}
1628 	/*
1629 	 * Some extensions (such as the GNU sparse file extensions)
1630 	 * deliberately store a synthetic name under the regular 'path'
1631 	 * attribute and the real file name under a different attribute.
1632 	 * Since we're supposed to not care about the order, we
1633 	 * have no choice but to store all of the various filenames
1634 	 * we find and figure it all out afterwards.  This is the
1635 	 * figuring out part.
1636 	 */
1637 	as = NULL;
1638 	if (archive_strlen(&(tar->entry_pathname_override)) > 0)
1639 		as = &(tar->entry_pathname_override);
1640 	else if (archive_strlen(&(tar->entry_pathname)) > 0)
1641 		as = &(tar->entry_pathname);
1642 	if (as != NULL) {
1643 		if (archive_entry_copy_pathname_l(entry, as->s,
1644 		    archive_strlen(as), sconv) != 0) {
1645 			err = set_conversion_failed_error(a, sconv, "Pathname");
1646 			if (err == ARCHIVE_FATAL)
1647 				return (err);
1648 			/* Use a converted an original name. */
1649 			archive_entry_copy_pathname(entry, as->s);
1650 		}
1651 	}
1652 	if (archive_strlen(&(tar->entry_uname)) > 0) {
1653 		if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
1654 		    archive_strlen(&(tar->entry_uname)), sconv) != 0) {
1655 			err = set_conversion_failed_error(a, sconv, "Uname");
1656 			if (err == ARCHIVE_FATAL)
1657 				return (err);
1658 			/* Use a converted an original name. */
1659 			archive_entry_copy_uname(entry, tar->entry_uname.s);
1660 		}
1661 	}
1662 	return (err);
1663 }
1664 
1665 static int
1666 pax_attribute_xattr(struct archive_entry *entry,
1667 	char *name, char *value)
1668 {
1669 	char *name_decoded;
1670 	void *value_decoded;
1671 	size_t value_len;
1672 
1673 	if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
1674 		return 3;
1675 
1676 	name += 17;
1677 
1678 	/* URL-decode name */
1679 	name_decoded = url_decode(name);
1680 	if (name_decoded == NULL)
1681 		return 2;
1682 
1683 	/* Base-64 decode value */
1684 	value_decoded = base64_decode(value, strlen(value), &value_len);
1685 	if (value_decoded == NULL) {
1686 		free(name_decoded);
1687 		return 1;
1688 	}
1689 
1690 	archive_entry_xattr_add_entry(entry, name_decoded,
1691 		value_decoded, value_len);
1692 
1693 	free(name_decoded);
1694 	free(value_decoded);
1695 	return 0;
1696 }
1697 
1698 /*
1699  * Parse a single key=value attribute.  key/value pointers are
1700  * assumed to point into reasonably long-lived storage.
1701  *
1702  * Note that POSIX reserves all-lowercase keywords.  Vendor-specific
1703  * extensions should always have keywords of the form "VENDOR.attribute"
1704  * In particular, it's quite feasible to support many different
1705  * vendor extensions here.  I'm using "LIBARCHIVE" for extensions
1706  * unique to this library.
1707  *
1708  * Investigate other vendor-specific extensions and see if
1709  * any of them look useful.
1710  */
1711 static int
1712 pax_attribute(struct archive_read *a, struct tar *tar,
1713     struct archive_entry *entry, char *key, char *value)
1714 {
1715 	int64_t s;
1716 	long n;
1717 	int err = ARCHIVE_OK, r;
1718 
1719 	if (value == NULL)
1720 		value = "";	/* Disable compiler warning; do not pass
1721 				 * NULL pointer to strlen().  */
1722 	switch (key[0]) {
1723 	case 'G':
1724 		/* GNU "0.0" sparse pax format. */
1725 		if (strcmp(key, "GNU.sparse.numblocks") == 0) {
1726 			tar->sparse_offset = -1;
1727 			tar->sparse_numbytes = -1;
1728 			tar->sparse_gnu_major = 0;
1729 			tar->sparse_gnu_minor = 0;
1730 		}
1731 		if (strcmp(key, "GNU.sparse.offset") == 0) {
1732 			tar->sparse_offset = tar_atol10(value, strlen(value));
1733 			if (tar->sparse_numbytes != -1) {
1734 				if (gnu_add_sparse_entry(a, tar,
1735 				    tar->sparse_offset, tar->sparse_numbytes)
1736 				    != ARCHIVE_OK)
1737 					return (ARCHIVE_FATAL);
1738 				tar->sparse_offset = -1;
1739 				tar->sparse_numbytes = -1;
1740 			}
1741 		}
1742 		if (strcmp(key, "GNU.sparse.numbytes") == 0) {
1743 			tar->sparse_numbytes = tar_atol10(value, strlen(value));
1744 			if (tar->sparse_numbytes != -1) {
1745 				if (gnu_add_sparse_entry(a, tar,
1746 				    tar->sparse_offset, tar->sparse_numbytes)
1747 				    != ARCHIVE_OK)
1748 					return (ARCHIVE_FATAL);
1749 				tar->sparse_offset = -1;
1750 				tar->sparse_numbytes = -1;
1751 			}
1752 		}
1753 		if (strcmp(key, "GNU.sparse.size") == 0) {
1754 			tar->realsize = tar_atol10(value, strlen(value));
1755 			archive_entry_set_size(entry, tar->realsize);
1756 		}
1757 
1758 		/* GNU "0.1" sparse pax format. */
1759 		if (strcmp(key, "GNU.sparse.map") == 0) {
1760 			tar->sparse_gnu_major = 0;
1761 			tar->sparse_gnu_minor = 1;
1762 			if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK)
1763 				return (ARCHIVE_WARN);
1764 		}
1765 
1766 		/* GNU "1.0" sparse pax format */
1767 		if (strcmp(key, "GNU.sparse.major") == 0) {
1768 			tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value));
1769 			tar->sparse_gnu_pending = 1;
1770 		}
1771 		if (strcmp(key, "GNU.sparse.minor") == 0) {
1772 			tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value));
1773 			tar->sparse_gnu_pending = 1;
1774 		}
1775 		if (strcmp(key, "GNU.sparse.name") == 0) {
1776 			/*
1777 			 * The real filename; when storing sparse
1778 			 * files, GNU tar puts a synthesized name into
1779 			 * the regular 'path' attribute in an attempt
1780 			 * to limit confusion. ;-)
1781 			 */
1782 			archive_strcpy(&(tar->entry_pathname_override), value);
1783 		}
1784 		if (strcmp(key, "GNU.sparse.realsize") == 0) {
1785 			tar->realsize = tar_atol10(value, strlen(value));
1786 			archive_entry_set_size(entry, tar->realsize);
1787 		}
1788 		break;
1789 	case 'L':
1790 		/* Our extensions */
1791 /* TODO: Handle arbitrary extended attributes... */
1792 /*
1793 		if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
1794 			archive_entry_set_xxxxxx(entry, value);
1795 */
1796 		if (strcmp(key, "LIBARCHIVE.creationtime") == 0) {
1797 			pax_time(value, &s, &n);
1798 			archive_entry_set_birthtime(entry, s, n);
1799 		}
1800 		if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0)
1801 			pax_attribute_xattr(entry, key, value);
1802 		break;
1803 	case 'S':
1804 		/* We support some keys used by the "star" archiver */
1805 		if (strcmp(key, "SCHILY.acl.access") == 0) {
1806 			if (tar->sconv_acl == NULL) {
1807 				tar->sconv_acl =
1808 				    archive_string_conversion_from_charset(
1809 					&(a->archive), "UTF-8", 1);
1810 				if (tar->sconv_acl == NULL)
1811 					return (ARCHIVE_FATAL);
1812 			}
1813 
1814 			r = archive_acl_parse_l(archive_entry_acl(entry),
1815 			    value, ARCHIVE_ENTRY_ACL_TYPE_ACCESS,
1816 			    tar->sconv_acl);
1817 			if (r != ARCHIVE_OK) {
1818 				err = r;
1819 				if (err == ARCHIVE_FATAL) {
1820 					archive_set_error(&a->archive, ENOMEM,
1821 					    "Can't allocate memory for "
1822 					    "SCHILY.acl.access");
1823 					return (err);
1824 				}
1825 				archive_set_error(&a->archive,
1826 				    ARCHIVE_ERRNO_MISC,
1827 				    "Parse error: SCHILY.acl.access");
1828 			}
1829 		} else if (strcmp(key, "SCHILY.acl.default") == 0) {
1830 			if (tar->sconv_acl == NULL) {
1831 				tar->sconv_acl =
1832 				    archive_string_conversion_from_charset(
1833 					&(a->archive), "UTF-8", 1);
1834 				if (tar->sconv_acl == NULL)
1835 					return (ARCHIVE_FATAL);
1836 			}
1837 
1838 			r = archive_acl_parse_l(archive_entry_acl(entry),
1839 			    value, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT,
1840 			    tar->sconv_acl);
1841 			if (r != ARCHIVE_OK) {
1842 				err = r;
1843 				if (err == ARCHIVE_FATAL) {
1844 					archive_set_error(&a->archive, ENOMEM,
1845 					    "Can't allocate memory for "
1846 					    "SCHILY.acl.default");
1847 					return (err);
1848 				}
1849 				archive_set_error(&a->archive,
1850 				    ARCHIVE_ERRNO_MISC,
1851 				    "Parse error: SCHILY.acl.default");
1852 			}
1853 		} else if (strcmp(key, "SCHILY.devmajor") == 0) {
1854 			archive_entry_set_rdevmajor(entry,
1855 			    (dev_t)tar_atol10(value, strlen(value)));
1856 		} else if (strcmp(key, "SCHILY.devminor") == 0) {
1857 			archive_entry_set_rdevminor(entry,
1858 			    (dev_t)tar_atol10(value, strlen(value)));
1859 		} else if (strcmp(key, "SCHILY.fflags") == 0) {
1860 			archive_entry_copy_fflags_text(entry, value);
1861 		} else if (strcmp(key, "SCHILY.dev") == 0) {
1862 			archive_entry_set_dev(entry,
1863 			    (dev_t)tar_atol10(value, strlen(value)));
1864 		} else if (strcmp(key, "SCHILY.ino") == 0) {
1865 			archive_entry_set_ino(entry,
1866 			    tar_atol10(value, strlen(value)));
1867 		} else if (strcmp(key, "SCHILY.nlink") == 0) {
1868 			archive_entry_set_nlink(entry, (unsigned)
1869 			    tar_atol10(value, strlen(value)));
1870 		} else if (strcmp(key, "SCHILY.realsize") == 0) {
1871 			tar->realsize = tar_atol10(value, strlen(value));
1872 			archive_entry_set_size(entry, tar->realsize);
1873 		} else if (strcmp(key, "SUN.holesdata") == 0) {
1874 			/* A Solaris extension for sparse. */
1875 			r = solaris_sparse_parse(a, tar, entry, value);
1876 			if (r < err) {
1877 				if (r == ARCHIVE_FATAL)
1878 					return (r);
1879 				err = r;
1880 				archive_set_error(&a->archive,
1881 				    ARCHIVE_ERRNO_MISC,
1882 				    "Parse error: SUN.holesdata");
1883 			}
1884 		}
1885 		break;
1886 	case 'a':
1887 		if (strcmp(key, "atime") == 0) {
1888 			pax_time(value, &s, &n);
1889 			archive_entry_set_atime(entry, s, n);
1890 		}
1891 		break;
1892 	case 'c':
1893 		if (strcmp(key, "ctime") == 0) {
1894 			pax_time(value, &s, &n);
1895 			archive_entry_set_ctime(entry, s, n);
1896 		} else if (strcmp(key, "charset") == 0) {
1897 			/* TODO: Publish charset information in entry. */
1898 		} else if (strcmp(key, "comment") == 0) {
1899 			/* TODO: Publish comment in entry. */
1900 		}
1901 		break;
1902 	case 'g':
1903 		if (strcmp(key, "gid") == 0) {
1904 			archive_entry_set_gid(entry,
1905 			    tar_atol10(value, strlen(value)));
1906 		} else if (strcmp(key, "gname") == 0) {
1907 			archive_strcpy(&(tar->entry_gname), value);
1908 		}
1909 		break;
1910 	case 'h':
1911 		if (strcmp(key, "hdrcharset") == 0) {
1912 			if (strcmp(value, "BINARY") == 0)
1913 				/* Binary  mode. */
1914 				tar->pax_hdrcharset_binary = 1;
1915 			else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
1916 				tar->pax_hdrcharset_binary = 0;
1917 		}
1918 		break;
1919 	case 'l':
1920 		/* pax interchange doesn't distinguish hardlink vs. symlink. */
1921 		if (strcmp(key, "linkpath") == 0) {
1922 			archive_strcpy(&(tar->entry_linkpath), value);
1923 		}
1924 		break;
1925 	case 'm':
1926 		if (strcmp(key, "mtime") == 0) {
1927 			pax_time(value, &s, &n);
1928 			archive_entry_set_mtime(entry, s, n);
1929 		}
1930 		break;
1931 	case 'p':
1932 		if (strcmp(key, "path") == 0) {
1933 			archive_strcpy(&(tar->entry_pathname), value);
1934 		}
1935 		break;
1936 	case 'r':
1937 		/* POSIX has reserved 'realtime.*' */
1938 		break;
1939 	case 's':
1940 		/* POSIX has reserved 'security.*' */
1941 		/* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
1942 		if (strcmp(key, "size") == 0) {
1943 			/* "size" is the size of the data in the entry. */
1944 			tar->entry_bytes_remaining
1945 			    = tar_atol10(value, strlen(value));
1946 			/*
1947 			 * But, "size" is not necessarily the size of
1948 			 * the file on disk; if this is a sparse file,
1949 			 * the disk size may have already been set from
1950 			 * GNU.sparse.realsize or GNU.sparse.size or
1951 			 * an old GNU header field or SCHILY.realsize
1952 			 * or ....
1953 			 */
1954 			if (tar->realsize < 0) {
1955 				archive_entry_set_size(entry,
1956 				    tar->entry_bytes_remaining);
1957 				tar->realsize
1958 				    = tar->entry_bytes_remaining;
1959 			}
1960 		}
1961 		break;
1962 	case 'u':
1963 		if (strcmp(key, "uid") == 0) {
1964 			archive_entry_set_uid(entry,
1965 			    tar_atol10(value, strlen(value)));
1966 		} else if (strcmp(key, "uname") == 0) {
1967 			archive_strcpy(&(tar->entry_uname), value);
1968 		}
1969 		break;
1970 	}
1971 	return (err);
1972 }
1973 
1974 
1975 
1976 /*
1977  * parse a decimal time value, which may include a fractional portion
1978  */
1979 static void
1980 pax_time(const char *p, int64_t *ps, long *pn)
1981 {
1982 	char digit;
1983 	int64_t	s;
1984 	unsigned long l;
1985 	int sign;
1986 	int64_t limit, last_digit_limit;
1987 
1988 	limit = INT64_MAX / 10;
1989 	last_digit_limit = INT64_MAX % 10;
1990 
1991 	s = 0;
1992 	sign = 1;
1993 	if (*p == '-') {
1994 		sign = -1;
1995 		p++;
1996 	}
1997 	while (*p >= '0' && *p <= '9') {
1998 		digit = *p - '0';
1999 		if (s > limit ||
2000 		    (s == limit && digit > last_digit_limit)) {
2001 			s = INT64_MAX;
2002 			break;
2003 		}
2004 		s = (s * 10) + digit;
2005 		++p;
2006 	}
2007 
2008 	*ps = s * sign;
2009 
2010 	/* Calculate nanoseconds. */
2011 	*pn = 0;
2012 
2013 	if (*p != '.')
2014 		return;
2015 
2016 	l = 100000000UL;
2017 	do {
2018 		++p;
2019 		if (*p >= '0' && *p <= '9')
2020 			*pn += (*p - '0') * l;
2021 		else
2022 			break;
2023 	} while (l /= 10);
2024 }
2025 
2026 /*
2027  * Parse GNU tar header
2028  */
2029 static int
2030 header_gnutar(struct archive_read *a, struct tar *tar,
2031     struct archive_entry *entry, const void *h, size_t *unconsumed)
2032 {
2033 	const struct archive_entry_header_gnutar *header;
2034 	int64_t t;
2035 	int err = ARCHIVE_OK;
2036 
2037 	/*
2038 	 * GNU header is like POSIX ustar, except 'prefix' is
2039 	 * replaced with some other fields. This also means the
2040 	 * filename is stored as in old-style archives.
2041 	 */
2042 
2043 	/* Grab fields common to all tar variants. */
2044 	err = header_common(a, tar, entry, h);
2045 	if (err == ARCHIVE_FATAL)
2046 		return (err);
2047 
2048 	/* Copy filename over (to ensure null termination). */
2049 	header = (const struct archive_entry_header_gnutar *)h;
2050 	if (archive_entry_copy_pathname_l(entry,
2051 	    header->name, sizeof(header->name), tar->sconv) != 0) {
2052 		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
2053 		if (err == ARCHIVE_FATAL)
2054 			return (err);
2055 	}
2056 
2057 	/* Fields common to ustar and GNU */
2058 	/* XXX Can the following be factored out since it's common
2059 	 * to ustar and gnu tar?  Is it okay to move it down into
2060 	 * header_common, perhaps?  */
2061 	if (archive_entry_copy_uname_l(entry,
2062 	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
2063 		err = set_conversion_failed_error(a, tar->sconv, "Uname");
2064 		if (err == ARCHIVE_FATAL)
2065 			return (err);
2066 	}
2067 
2068 	if (archive_entry_copy_gname_l(entry,
2069 	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
2070 		err = set_conversion_failed_error(a, tar->sconv, "Gname");
2071 		if (err == ARCHIVE_FATAL)
2072 			return (err);
2073 	}
2074 
2075 	/* Parse out device numbers only for char and block specials */
2076 	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
2077 		archive_entry_set_rdevmajor(entry, (dev_t)
2078 		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
2079 		archive_entry_set_rdevminor(entry, (dev_t)
2080 		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
2081 	} else
2082 		archive_entry_set_rdev(entry, 0);
2083 
2084 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
2085 
2086 	/* Grab GNU-specific fields. */
2087 	t = tar_atol(header->atime, sizeof(header->atime));
2088 	if (t > 0)
2089 		archive_entry_set_atime(entry, t, 0);
2090 	t = tar_atol(header->ctime, sizeof(header->ctime));
2091 	if (t > 0)
2092 		archive_entry_set_ctime(entry, t, 0);
2093 
2094 	if (header->realsize[0] != 0) {
2095 		tar->realsize
2096 		    = tar_atol(header->realsize, sizeof(header->realsize));
2097 		archive_entry_set_size(entry, tar->realsize);
2098 	}
2099 
2100 	if (header->sparse[0].offset[0] != 0) {
2101 		if (gnu_sparse_old_read(a, tar, header, unconsumed)
2102 		    != ARCHIVE_OK)
2103 			return (ARCHIVE_FATAL);
2104 	} else {
2105 		if (header->isextended[0] != 0) {
2106 			/* XXX WTF? XXX */
2107 		}
2108 	}
2109 
2110 	return (err);
2111 }
2112 
2113 static int
2114 gnu_add_sparse_entry(struct archive_read *a, struct tar *tar,
2115     int64_t offset, int64_t remaining)
2116 {
2117 	struct sparse_block *p;
2118 
2119 	p = (struct sparse_block *)malloc(sizeof(*p));
2120 	if (p == NULL) {
2121 		archive_set_error(&a->archive, ENOMEM, "Out of memory");
2122 		return (ARCHIVE_FATAL);
2123 	}
2124 	memset(p, 0, sizeof(*p));
2125 	if (tar->sparse_last != NULL)
2126 		tar->sparse_last->next = p;
2127 	else
2128 		tar->sparse_list = p;
2129 	tar->sparse_last = p;
2130 	if (remaining < 0 || offset < 0) {
2131 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data");
2132 		return (ARCHIVE_FATAL);
2133 	}
2134 	p->offset = offset;
2135 	p->remaining = remaining;
2136 	return (ARCHIVE_OK);
2137 }
2138 
2139 static void
2140 gnu_clear_sparse_list(struct tar *tar)
2141 {
2142 	struct sparse_block *p;
2143 
2144 	while (tar->sparse_list != NULL) {
2145 		p = tar->sparse_list;
2146 		tar->sparse_list = p->next;
2147 		free(p);
2148 	}
2149 	tar->sparse_last = NULL;
2150 }
2151 
2152 /*
2153  * GNU tar old-format sparse data.
2154  *
2155  * GNU old-format sparse data is stored in a fixed-field
2156  * format.  Offset/size values are 11-byte octal fields (same
2157  * format as 'size' field in ustart header).  These are
2158  * stored in the header, allocating subsequent header blocks
2159  * as needed.  Extending the header in this way is a pretty
2160  * severe POSIX violation; this design has earned GNU tar a
2161  * lot of criticism.
2162  */
2163 
2164 static int
2165 gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
2166     const struct archive_entry_header_gnutar *header, size_t *unconsumed)
2167 {
2168 	ssize_t bytes_read;
2169 	const void *data;
2170 	struct extended {
2171 		struct gnu_sparse sparse[21];
2172 		char	isextended[1];
2173 		char	padding[7];
2174 	};
2175 	const struct extended *ext;
2176 
2177 	if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
2178 		return (ARCHIVE_FATAL);
2179 	if (header->isextended[0] == 0)
2180 		return (ARCHIVE_OK);
2181 
2182 	do {
2183 		tar_flush_unconsumed(a, unconsumed);
2184 		data = __archive_read_ahead(a, 512, &bytes_read);
2185 		if (bytes_read < 0)
2186 			return (ARCHIVE_FATAL);
2187 		if (bytes_read < 512) {
2188 			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2189 			    "Truncated tar archive "
2190 			    "detected while reading sparse file data");
2191 			return (ARCHIVE_FATAL);
2192 		}
2193 		*unconsumed = 512;
2194 		ext = (const struct extended *)data;
2195 		if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
2196 			return (ARCHIVE_FATAL);
2197 	} while (ext->isextended[0] != 0);
2198 	if (tar->sparse_list != NULL)
2199 		tar->entry_offset = tar->sparse_list->offset;
2200 	return (ARCHIVE_OK);
2201 }
2202 
2203 static int
2204 gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
2205     const struct gnu_sparse *sparse, int length)
2206 {
2207 	while (length > 0 && sparse->offset[0] != 0) {
2208 		if (gnu_add_sparse_entry(a, tar,
2209 		    tar_atol(sparse->offset, sizeof(sparse->offset)),
2210 		    tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
2211 		    != ARCHIVE_OK)
2212 			return (ARCHIVE_FATAL);
2213 		sparse++;
2214 		length--;
2215 	}
2216 	return (ARCHIVE_OK);
2217 }
2218 
2219 /*
2220  * GNU tar sparse format 0.0
2221  *
2222  * Beginning with GNU tar 1.15, sparse files are stored using
2223  * information in the pax extended header.  The GNU tar maintainers
2224  * have gone through a number of variations in the process of working
2225  * out this scheme; fortunately, they're all numbered.
2226  *
2227  * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
2228  * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
2229  * store offset/size for each block.  The repeated instances of these
2230  * latter fields violate the pax specification (which frowns on
2231  * duplicate keys), so this format was quickly replaced.
2232  */
2233 
2234 /*
2235  * GNU tar sparse format 0.1
2236  *
2237  * This version replaced the offset/numbytes attributes with
2238  * a single "map" attribute that stored a list of integers.  This
2239  * format had two problems: First, the "map" attribute could be very
2240  * long, which caused problems for some implementations.  More
2241  * importantly, the sparse data was lost when extracted by archivers
2242  * that didn't recognize this extension.
2243  */
2244 
2245 static int
2246 gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p)
2247 {
2248 	const char *e;
2249 	int64_t offset = -1, size = -1;
2250 
2251 	for (;;) {
2252 		e = p;
2253 		while (*e != '\0' && *e != ',') {
2254 			if (*e < '0' || *e > '9')
2255 				return (ARCHIVE_WARN);
2256 			e++;
2257 		}
2258 		if (offset < 0) {
2259 			offset = tar_atol10(p, e - p);
2260 			if (offset < 0)
2261 				return (ARCHIVE_WARN);
2262 		} else {
2263 			size = tar_atol10(p, e - p);
2264 			if (size < 0)
2265 				return (ARCHIVE_WARN);
2266 			if (gnu_add_sparse_entry(a, tar, offset, size)
2267 			    != ARCHIVE_OK)
2268 				return (ARCHIVE_FATAL);
2269 			offset = -1;
2270 		}
2271 		if (*e == '\0')
2272 			return (ARCHIVE_OK);
2273 		p = e + 1;
2274 	}
2275 }
2276 
2277 /*
2278  * GNU tar sparse format 1.0
2279  *
2280  * The idea: The offset/size data is stored as a series of base-10
2281  * ASCII numbers prepended to the file data, so that dearchivers that
2282  * don't support this format will extract the block map along with the
2283  * data and a separate post-process can restore the sparseness.
2284  *
2285  * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
2286  * padding to the body of the file when using this format.  GNU tar
2287  * 1.17 corrected this bug without bumping the version number, so
2288  * it's not possible to support both variants.  This code supports
2289  * the later variant at the expense of not supporting the former.
2290  *
2291  * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
2292  * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
2293  */
2294 
2295 /*
2296  * Read the next line from the input, and parse it as a decimal
2297  * integer followed by '\n'.  Returns positive integer value or
2298  * negative on error.
2299  */
2300 static int64_t
2301 gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
2302     int64_t *remaining, size_t *unconsumed)
2303 {
2304 	int64_t l, limit, last_digit_limit;
2305 	const char *p;
2306 	ssize_t bytes_read;
2307 	int base, digit;
2308 
2309 	base = 10;
2310 	limit = INT64_MAX / base;
2311 	last_digit_limit = INT64_MAX % base;
2312 
2313 	/*
2314 	 * Skip any lines starting with '#'; GNU tar specs
2315 	 * don't require this, but they should.
2316 	 */
2317 	do {
2318 		bytes_read = readline(a, tar, &p,
2319 			(ssize_t)tar_min(*remaining, 100), unconsumed);
2320 		if (bytes_read <= 0)
2321 			return (ARCHIVE_FATAL);
2322 		*remaining -= bytes_read;
2323 	} while (p[0] == '#');
2324 
2325 	l = 0;
2326 	while (bytes_read > 0) {
2327 		if (*p == '\n')
2328 			return (l);
2329 		if (*p < '0' || *p >= '0' + base)
2330 			return (ARCHIVE_WARN);
2331 		digit = *p - '0';
2332 		if (l > limit || (l == limit && digit > last_digit_limit))
2333 			l = INT64_MAX; /* Truncate on overflow. */
2334 		else
2335 			l = (l * base) + digit;
2336 		p++;
2337 		bytes_read--;
2338 	}
2339 	/* TODO: Error message. */
2340 	return (ARCHIVE_WARN);
2341 }
2342 
2343 /*
2344  * Returns length (in bytes) of the sparse data description
2345  * that was read.
2346  */
2347 static ssize_t
2348 gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
2349 {
2350 	ssize_t bytes_read;
2351 	int entries;
2352 	int64_t offset, size, to_skip, remaining;
2353 
2354 	/* Clear out the existing sparse list. */
2355 	gnu_clear_sparse_list(tar);
2356 
2357 	remaining = tar->entry_bytes_remaining;
2358 
2359 	/* Parse entries. */
2360 	entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2361 	if (entries < 0)
2362 		return (ARCHIVE_FATAL);
2363 	/* Parse the individual entries. */
2364 	while (entries-- > 0) {
2365 		/* Parse offset/size */
2366 		offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2367 		if (offset < 0)
2368 			return (ARCHIVE_FATAL);
2369 		size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2370 		if (size < 0)
2371 			return (ARCHIVE_FATAL);
2372 		/* Add a new sparse entry. */
2373 		if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
2374 			return (ARCHIVE_FATAL);
2375 	}
2376 	/* Skip rest of block... */
2377 	tar_flush_unconsumed(a, unconsumed);
2378 	bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining);
2379 	to_skip = 0x1ff & -bytes_read;
2380 	if (to_skip != __archive_read_consume(a, to_skip))
2381 		return (ARCHIVE_FATAL);
2382 	return ((ssize_t)(bytes_read + to_skip));
2383 }
2384 
2385 /*
2386  * Solaris pax extension for a sparse file. This is recorded with the
2387  * data and hole pairs. The way recording sparse information by Solaris'
2388  * pax simply indicates where data and sparse are, so the stored contents
2389  * consist of both data and hole.
2390  */
2391 static int
2392 solaris_sparse_parse(struct archive_read *a, struct tar *tar,
2393     struct archive_entry *entry, const char *p)
2394 {
2395 	const char *e;
2396 	int64_t start, end;
2397 	int hole = 1;
2398 
2399 	(void)entry; /* UNUSED */
2400 
2401 	end = 0;
2402 	if (*p == ' ')
2403 		p++;
2404 	else
2405 		return (ARCHIVE_WARN);
2406 	for (;;) {
2407 		e = p;
2408 		while (*e != '\0' && *e != ' ') {
2409 			if (*e < '0' || *e > '9')
2410 				return (ARCHIVE_WARN);
2411 			e++;
2412 		}
2413 		start = end;
2414 		end = tar_atol10(p, e - p);
2415 		if (end < 0)
2416 			return (ARCHIVE_WARN);
2417 		if (start < end) {
2418 			if (gnu_add_sparse_entry(a, tar, start,
2419 			    end - start) != ARCHIVE_OK)
2420 				return (ARCHIVE_FATAL);
2421 			tar->sparse_last->hole = hole;
2422 		}
2423 		if (*e == '\0')
2424 			return (ARCHIVE_OK);
2425 		p = e + 1;
2426 		hole = hole == 0;
2427 	}
2428 }
2429 
2430 /*-
2431  * Convert text->integer.
2432  *
2433  * Traditional tar formats (including POSIX) specify base-8 for
2434  * all of the standard numeric fields.  This is a significant limitation
2435  * in practice:
2436  *   = file size is limited to 8GB
2437  *   = rdevmajor and rdevminor are limited to 21 bits
2438  *   = uid/gid are limited to 21 bits
2439  *
2440  * There are two workarounds for this:
2441  *   = pax extended headers, which use variable-length string fields
2442  *   = GNU tar and STAR both allow either base-8 or base-256 in
2443  *      most fields.  The high bit is set to indicate base-256.
2444  *
2445  * On read, this implementation supports both extensions.
2446  */
2447 static int64_t
2448 tar_atol(const char *p, size_t char_cnt)
2449 {
2450 	/*
2451 	 * Technically, GNU tar considers a field to be in base-256
2452 	 * only if the first byte is 0xff or 0x80.
2453 	 */
2454 	if (*p & 0x80)
2455 		return (tar_atol256(p, char_cnt));
2456 	return (tar_atol8(p, char_cnt));
2457 }
2458 
2459 /*
2460  * Note that this implementation does not (and should not!) obey
2461  * locale settings; you cannot simply substitute strtol here, since
2462  * it does obey locale.
2463  */
2464 static int64_t
2465 tar_atol_base_n(const char *p, size_t char_cnt, int base)
2466 {
2467 	int64_t	l, maxval, limit, last_digit_limit;
2468 	int digit, sign;
2469 
2470 	maxval = INT64_MAX;
2471 	limit = INT64_MAX / base;
2472 	last_digit_limit = INT64_MAX % base;
2473 
2474 	/* the pointer will not be dereferenced if char_cnt is zero
2475 	 * due to the way the && operator is evaulated.
2476 	 */
2477 	while (char_cnt != 0 && (*p == ' ' || *p == '\t')) {
2478 		p++;
2479 		char_cnt--;
2480 	}
2481 
2482 	sign = 1;
2483 	if (char_cnt != 0 && *p == '-') {
2484 		sign = -1;
2485 		p++;
2486 		char_cnt--;
2487 
2488 		maxval = INT64_MIN;
2489 		limit = -(INT64_MIN / base);
2490 		last_digit_limit = INT64_MIN % base;
2491 	}
2492 
2493 	l = 0;
2494 	if (char_cnt != 0) {
2495 		digit = *p - '0';
2496 		while (digit >= 0 && digit < base  && char_cnt != 0) {
2497 			if (l>limit || (l == limit && digit > last_digit_limit)) {
2498 				return maxval; /* Truncate on overflow. */
2499 			}
2500 			l = (l * base) + digit;
2501 			digit = *++p - '0';
2502 			char_cnt--;
2503 		}
2504 	}
2505 	return (sign < 0) ? -l : l;
2506 }
2507 
2508 static int64_t
2509 tar_atol8(const char *p, size_t char_cnt)
2510 {
2511 	return tar_atol_base_n(p, char_cnt, 8);
2512 }
2513 
2514 static int64_t
2515 tar_atol10(const char *p, size_t char_cnt)
2516 {
2517 	return tar_atol_base_n(p, char_cnt, 10);
2518 }
2519 
2520 /*
2521  * Parse a base-256 integer.  This is just a variable-length
2522  * twos-complement signed binary value in big-endian order, except
2523  * that the high-order bit is ignored.  The values here can be up to
2524  * 12 bytes, so we need to be careful about overflowing 64-bit
2525  * (8-byte) integers.
2526  *
2527  * This code unashamedly assumes that the local machine uses 8-bit
2528  * bytes and twos-complement arithmetic.
2529  */
2530 static int64_t
2531 tar_atol256(const char *_p, size_t char_cnt)
2532 {
2533 	uint64_t l;
2534 	const unsigned char *p = (const unsigned char *)_p;
2535 	unsigned char c, neg;
2536 
2537 	/* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */
2538 	c = *p;
2539 	if (c & 0x40) {
2540 		neg = 0xff;
2541 		c |= 0x80;
2542 		l = ~ARCHIVE_LITERAL_ULL(0);
2543 	} else {
2544 		neg = 0;
2545 		c &= 0x7f;
2546 		l = 0;
2547 	}
2548 
2549 	/* If more than 8 bytes, check that we can ignore
2550 	 * high-order bits without overflow. */
2551 	while (char_cnt > sizeof(int64_t)) {
2552 		--char_cnt;
2553 		if (c != neg)
2554 			return neg ? INT64_MIN : INT64_MAX;
2555 		c = *++p;
2556 	}
2557 
2558 	/* c is first byte that fits; if sign mismatch, return overflow */
2559 	if ((c ^ neg) & 0x80) {
2560 		return neg ? INT64_MIN : INT64_MAX;
2561 	}
2562 
2563 	/* Accumulate remaining bytes. */
2564 	while (--char_cnt > 0) {
2565 		l = (l << 8) | c;
2566 		c = *++p;
2567 	}
2568 	l = (l << 8) | c;
2569 	/* Return signed twos-complement value. */
2570 	return (int64_t)(l);
2571 }
2572 
2573 /*
2574  * Returns length of line (including trailing newline)
2575  * or negative on error.  'start' argument is updated to
2576  * point to first character of line.  This avoids copying
2577  * when possible.
2578  */
2579 static ssize_t
2580 readline(struct archive_read *a, struct tar *tar, const char **start,
2581     ssize_t limit, size_t *unconsumed)
2582 {
2583 	ssize_t bytes_read;
2584 	ssize_t total_size = 0;
2585 	const void *t;
2586 	const char *s;
2587 	void *p;
2588 
2589 	tar_flush_unconsumed(a, unconsumed);
2590 
2591 	t = __archive_read_ahead(a, 1, &bytes_read);
2592 	if (bytes_read <= 0)
2593 		return (ARCHIVE_FATAL);
2594 	s = t;  /* Start of line? */
2595 	p = memchr(t, '\n', bytes_read);
2596 	/* If we found '\n' in the read buffer, return pointer to that. */
2597 	if (p != NULL) {
2598 		bytes_read = 1 + ((const char *)p) - s;
2599 		if (bytes_read > limit) {
2600 			archive_set_error(&a->archive,
2601 			    ARCHIVE_ERRNO_FILE_FORMAT,
2602 			    "Line too long");
2603 			return (ARCHIVE_FATAL);
2604 		}
2605 		*unconsumed = bytes_read;
2606 		*start = s;
2607 		return (bytes_read);
2608 	}
2609 	*unconsumed = bytes_read;
2610 	/* Otherwise, we need to accumulate in a line buffer. */
2611 	for (;;) {
2612 		if (total_size + bytes_read > limit) {
2613 			archive_set_error(&a->archive,
2614 			    ARCHIVE_ERRNO_FILE_FORMAT,
2615 			    "Line too long");
2616 			return (ARCHIVE_FATAL);
2617 		}
2618 		if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
2619 			archive_set_error(&a->archive, ENOMEM,
2620 			    "Can't allocate working buffer");
2621 			return (ARCHIVE_FATAL);
2622 		}
2623 		memcpy(tar->line.s + total_size, t, bytes_read);
2624 		tar_flush_unconsumed(a, unconsumed);
2625 		total_size += bytes_read;
2626 		/* If we found '\n', clean up and return. */
2627 		if (p != NULL) {
2628 			*start = tar->line.s;
2629 			return (total_size);
2630 		}
2631 		/* Read some more. */
2632 		t = __archive_read_ahead(a, 1, &bytes_read);
2633 		if (bytes_read <= 0)
2634 			return (ARCHIVE_FATAL);
2635 		s = t;  /* Start of line? */
2636 		p = memchr(t, '\n', bytes_read);
2637 		/* If we found '\n', trim the read. */
2638 		if (p != NULL) {
2639 			bytes_read = 1 + ((const char *)p) - s;
2640 		}
2641 		*unconsumed = bytes_read;
2642 	}
2643 }
2644 
2645 /*
2646  * base64_decode - Base64 decode
2647  *
2648  * This accepts most variations of base-64 encoding, including:
2649  *    * with or without line breaks
2650  *    * with or without the final group padded with '=' or '_' characters
2651  * (The most economical Base-64 variant does not pad the last group and
2652  * omits line breaks; RFC1341 used for MIME requires both.)
2653  */
2654 static char *
2655 base64_decode(const char *s, size_t len, size_t *out_len)
2656 {
2657 	static const unsigned char digits[64] = {
2658 		'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
2659 		'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
2660 		'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
2661 		'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
2662 		'4','5','6','7','8','9','+','/' };
2663 	static unsigned char decode_table[128];
2664 	char *out, *d;
2665 	const unsigned char *src = (const unsigned char *)s;
2666 
2667 	/* If the decode table is not yet initialized, prepare it. */
2668 	if (decode_table[digits[1]] != 1) {
2669 		unsigned i;
2670 		memset(decode_table, 0xff, sizeof(decode_table));
2671 		for (i = 0; i < sizeof(digits); i++)
2672 			decode_table[digits[i]] = i;
2673 	}
2674 
2675 	/* Allocate enough space to hold the entire output. */
2676 	/* Note that we may not use all of this... */
2677 	out = (char *)malloc(len - len / 4 + 1);
2678 	if (out == NULL) {
2679 		*out_len = 0;
2680 		return (NULL);
2681 	}
2682 	d = out;
2683 
2684 	while (len > 0) {
2685 		/* Collect the next group of (up to) four characters. */
2686 		int v = 0;
2687 		int group_size = 0;
2688 		while (group_size < 4 && len > 0) {
2689 			/* '=' or '_' padding indicates final group. */
2690 			if (*src == '=' || *src == '_') {
2691 				len = 0;
2692 				break;
2693 			}
2694 			/* Skip illegal characters (including line breaks) */
2695 			if (*src > 127 || *src < 32
2696 			    || decode_table[*src] == 0xff) {
2697 				len--;
2698 				src++;
2699 				continue;
2700 			}
2701 			v <<= 6;
2702 			v |= decode_table[*src++];
2703 			len --;
2704 			group_size++;
2705 		}
2706 		/* Align a short group properly. */
2707 		v <<= 6 * (4 - group_size);
2708 		/* Unpack the group we just collected. */
2709 		switch (group_size) {
2710 		case 4: d[2] = v & 0xff;
2711 			/* FALLTHROUGH */
2712 		case 3: d[1] = (v >> 8) & 0xff;
2713 			/* FALLTHROUGH */
2714 		case 2: d[0] = (v >> 16) & 0xff;
2715 			break;
2716 		case 1: /* this is invalid! */
2717 			break;
2718 		}
2719 		d += group_size * 3 / 4;
2720 	}
2721 
2722 	*out_len = d - out;
2723 	return (out);
2724 }
2725 
2726 static char *
2727 url_decode(const char *in)
2728 {
2729 	char *out, *d;
2730 	const char *s;
2731 
2732 	out = (char *)malloc(strlen(in) + 1);
2733 	if (out == NULL)
2734 		return (NULL);
2735 	for (s = in, d = out; *s != '\0'; ) {
2736 		if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
2737 			/* Try to convert % escape */
2738 			int digit1 = tohex(s[1]);
2739 			int digit2 = tohex(s[2]);
2740 			if (digit1 >= 0 && digit2 >= 0) {
2741 				/* Looks good, consume three chars */
2742 				s += 3;
2743 				/* Convert output */
2744 				*d++ = ((digit1 << 4) | digit2);
2745 				continue;
2746 			}
2747 			/* Else fall through and treat '%' as normal char */
2748 		}
2749 		*d++ = *s++;
2750 	}
2751 	*d = '\0';
2752 	return (out);
2753 }
2754 
2755 static int
2756 tohex(int c)
2757 {
2758 	if (c >= '0' && c <= '9')
2759 		return (c - '0');
2760 	else if (c >= 'A' && c <= 'F')
2761 		return (c - 'A' + 10);
2762 	else if (c >= 'a' && c <= 'f')
2763 		return (c - 'a' + 10);
2764 	else
2765 		return (-1);
2766 }
2767