1 /*-
2  * Copyright (c) 2007 Kai Wang
3  * Copyright (c) 2007 Tim Kientzle
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer
11  *    in this position and unchanged.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_ar.c 201101 2009-12-28 03:06:27Z kientzle $");
30 
31 #ifdef HAVE_SYS_STAT_H
32 #include <sys/stat.h>
33 #endif
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40 #ifdef HAVE_STRING_H
41 #include <string.h>
42 #endif
43 #ifdef HAVE_LIMITS_H
44 #include <limits.h>
45 #endif
46 
47 #include "archive.h"
48 #include "archive_entry.h"
49 #include "archive_private.h"
50 #include "archive_read_private.h"
51 
52 struct ar {
53 	int64_t	 entry_bytes_remaining;
54 	/* unconsumed is purely to track data we've gotten from readahead,
55 	 * but haven't yet marked as consumed.  Must be paired with
56 	 * entry_bytes_remaining usage/modification.
57 	 */
58 	size_t   entry_bytes_unconsumed;
59 	int64_t	 entry_offset;
60 	int64_t	 entry_padding;
61 	char	*strtab;
62 	size_t	 strtab_size;
63 	char	 read_global_header;
64 };
65 
66 /*
67  * Define structure of the "ar" header.
68  */
69 #define AR_name_offset 0
70 #define AR_name_size 16
71 #define AR_date_offset 16
72 #define AR_date_size 12
73 #define AR_uid_offset 28
74 #define AR_uid_size 6
75 #define AR_gid_offset 34
76 #define AR_gid_size 6
77 #define AR_mode_offset 40
78 #define AR_mode_size 8
79 #define AR_size_offset 48
80 #define AR_size_size 10
81 #define AR_fmag_offset 58
82 #define AR_fmag_size 2
83 
84 static int	archive_read_format_ar_bid(struct archive_read *a, int);
85 static int	archive_read_format_ar_cleanup(struct archive_read *a);
86 static int	archive_read_format_ar_read_data(struct archive_read *a,
87 		    const void **buff, size_t *size, int64_t *offset);
88 static int	archive_read_format_ar_skip(struct archive_read *a);
89 static int	archive_read_format_ar_read_header(struct archive_read *a,
90 		    struct archive_entry *e);
91 static uint64_t	ar_atol8(const char *p, unsigned char_cnt);
92 static uint64_t	ar_atol10(const char *p, unsigned char_cnt);
93 static int	ar_parse_gnu_filename_table(struct archive_read *a);
94 static int	ar_parse_common_header(struct ar *ar, struct archive_entry *,
95 		    const char *h);
96 
97 int
98 archive_read_support_format_ar(struct archive *_a)
99 {
100 	struct archive_read *a = (struct archive_read *)_a;
101 	struct ar *ar;
102 	int r;
103 
104 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
105 	    ARCHIVE_STATE_NEW, "archive_read_support_format_ar");
106 
107 	ar = (struct ar *)calloc(1, sizeof(*ar));
108 	if (ar == NULL) {
109 		archive_set_error(&a->archive, ENOMEM,
110 		    "Can't allocate ar data");
111 		return (ARCHIVE_FATAL);
112 	}
113 	ar->strtab = NULL;
114 
115 	r = __archive_read_register_format(a,
116 	    ar,
117 	    "ar",
118 	    archive_read_format_ar_bid,
119 	    NULL,
120 	    archive_read_format_ar_read_header,
121 	    archive_read_format_ar_read_data,
122 	    archive_read_format_ar_skip,
123 	    NULL,
124 	    archive_read_format_ar_cleanup,
125 	    NULL,
126 	    NULL);
127 
128 	if (r != ARCHIVE_OK) {
129 		free(ar);
130 		return (r);
131 	}
132 	return (ARCHIVE_OK);
133 }
134 
135 static int
136 archive_read_format_ar_cleanup(struct archive_read *a)
137 {
138 	struct ar *ar;
139 
140 	ar = (struct ar *)(a->format->data);
141 	if (ar->strtab)
142 		free(ar->strtab);
143 	free(ar);
144 	(a->format->data) = NULL;
145 	return (ARCHIVE_OK);
146 }
147 
148 static int
149 archive_read_format_ar_bid(struct archive_read *a, int best_bid)
150 {
151 	const void *h;
152 
153 	(void)best_bid; /* UNUSED */
154 
155 	/*
156 	 * Verify the 8-byte file signature.
157 	 * TODO: Do we need to check more than this?
158 	 */
159 	if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
160 		return (-1);
161 	if (memcmp(h, "!<arch>\n", 8) == 0) {
162 		return (64);
163 	}
164 	return (-1);
165 }
166 
167 static int
168 _ar_read_header(struct archive_read *a, struct archive_entry *entry,
169 	struct ar *ar, const char *h, size_t *unconsumed)
170 {
171 	char filename[AR_name_size + 1];
172 	uint64_t number; /* Used to hold parsed numbers before validation. */
173 	size_t bsd_name_length, entry_size;
174 	char *p, *st;
175 	const void *b;
176 	int r;
177 
178 	/* Verify the magic signature on the file header. */
179 	if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
180 		archive_set_error(&a->archive, EINVAL,
181 		    "Incorrect file header signature");
182 		return (ARCHIVE_FATAL);
183 	}
184 
185 	/* Copy filename into work buffer. */
186 	strncpy(filename, h + AR_name_offset, AR_name_size);
187 	filename[AR_name_size] = '\0';
188 
189 	/*
190 	 * Guess the format variant based on the filename.
191 	 */
192 	if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
193 		/* We don't already know the variant, so let's guess. */
194 		/*
195 		 * Biggest clue is presence of '/': GNU starts special
196 		 * filenames with '/', appends '/' as terminator to
197 		 * non-special names, so anything with '/' should be
198 		 * GNU except for BSD long filenames.
199 		 */
200 		if (strncmp(filename, "#1/", 3) == 0)
201 			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
202 		else if (strchr(filename, '/') != NULL)
203 			a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
204 		else if (strncmp(filename, "__.SYMDEF", 9) == 0)
205 			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
206 		/*
207 		 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
208 		 * if name exactly fills 16-byte field?  If so, we
209 		 * can't assume entries without '/' are BSD. XXX
210 		 */
211 	}
212 
213 	/* Update format name from the code. */
214 	if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
215 		a->archive.archive_format_name = "ar (GNU/SVR4)";
216 	else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
217 		a->archive.archive_format_name = "ar (BSD)";
218 	else
219 		a->archive.archive_format_name = "ar";
220 
221 	/*
222 	 * Remove trailing spaces from the filename.  GNU and BSD
223 	 * variants both pad filename area out with spaces.
224 	 * This will only be wrong if GNU/SVR4 'ar' implementations
225 	 * omit trailing '/' for 16-char filenames and we have
226 	 * a 16-char filename that ends in ' '.
227 	 */
228 	p = filename + AR_name_size - 1;
229 	while (p >= filename && *p == ' ') {
230 		*p = '\0';
231 		p--;
232 	}
233 
234 	/*
235 	 * Remove trailing slash unless first character is '/'.
236 	 * (BSD entries never end in '/', so this will only trim
237 	 * GNU-format entries.  GNU special entries start with '/'
238 	 * and are not terminated in '/', so we don't trim anything
239 	 * that starts with '/'.)
240 	 */
241 	if (filename[0] != '/' && p > filename && *p == '/') {
242 		*p = '\0';
243 	}
244 
245 	if (p < filename) {
246 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
247 		    "Found entry with empty filename");
248 		return (ARCHIVE_FATAL);
249 	}
250 
251 	/*
252 	 * '//' is the GNU filename table.
253 	 * Later entries can refer to names in this table.
254 	 */
255 	if (strcmp(filename, "//") == 0) {
256 		/* This must come before any call to _read_ahead. */
257 		ar_parse_common_header(ar, entry, h);
258 		archive_entry_copy_pathname(entry, filename);
259 		archive_entry_set_filetype(entry, AE_IFREG);
260 		/* Get the size of the filename table. */
261 		number = ar_atol10(h + AR_size_offset, AR_size_size);
262 		if (number > SIZE_MAX || number > 1024 * 1024 * 1024) {
263 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
264 			    "Filename table too large");
265 			return (ARCHIVE_FATAL);
266 		}
267 		entry_size = (size_t)number;
268 		if (entry_size == 0) {
269 			archive_set_error(&a->archive, EINVAL,
270 			    "Invalid string table");
271 			return (ARCHIVE_FATAL);
272 		}
273 		if (ar->strtab != NULL) {
274 			archive_set_error(&a->archive, EINVAL,
275 			    "More than one string tables exist");
276 			return (ARCHIVE_FATAL);
277 		}
278 
279 		/* Read the filename table into memory. */
280 		st = malloc(entry_size);
281 		if (st == NULL) {
282 			archive_set_error(&a->archive, ENOMEM,
283 			    "Can't allocate filename table buffer");
284 			return (ARCHIVE_FATAL);
285 		}
286 		ar->strtab = st;
287 		ar->strtab_size = entry_size;
288 
289 		if (*unconsumed) {
290 			__archive_read_consume(a, *unconsumed);
291 			*unconsumed = 0;
292 		}
293 
294 		if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
295 			return (ARCHIVE_FATAL);
296 		memcpy(st, b, entry_size);
297 		__archive_read_consume(a, entry_size);
298 		/* All contents are consumed. */
299 		ar->entry_bytes_remaining = 0;
300 		archive_entry_set_size(entry, ar->entry_bytes_remaining);
301 
302 		/* Parse the filename table. */
303 		return (ar_parse_gnu_filename_table(a));
304 	}
305 
306 	/*
307 	 * GNU variant handles long filenames by storing /<number>
308 	 * to indicate a name stored in the filename table.
309 	 * XXX TODO: Verify that it's all digits... Don't be fooled
310 	 * by "/9xyz" XXX
311 	 */
312 	if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
313 		number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
314 		/*
315 		 * If we can't look up the real name, warn and return
316 		 * the entry with the wrong name.
317 		 */
318 		if (ar->strtab == NULL || number >= ar->strtab_size) {
319 			archive_set_error(&a->archive, EINVAL,
320 			    "Can't find long filename for GNU/SVR4 archive entry");
321 			archive_entry_copy_pathname(entry, filename);
322 			/* Parse the time, owner, mode, size fields. */
323 			ar_parse_common_header(ar, entry, h);
324 			return (ARCHIVE_FATAL);
325 		}
326 
327 		archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
328 		/* Parse the time, owner, mode, size fields. */
329 		return (ar_parse_common_header(ar, entry, h));
330 	}
331 
332 	/*
333 	 * BSD handles long filenames by storing "#1/" followed by the
334 	 * length of filename as a decimal number, then prepends the
335 	 * the filename to the file contents.
336 	 */
337 	if (strncmp(filename, "#1/", 3) == 0) {
338 		/* Parse the time, owner, mode, size fields. */
339 		/* This must occur before _read_ahead is called again. */
340 		ar_parse_common_header(ar, entry, h);
341 
342 		/* Parse the size of the name, adjust the file size. */
343 		number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
344 		/* Sanity check the filename length:
345 		 *   = Must be <= SIZE_MAX - 1
346 		 *   = Must be <= 1MB
347 		 *   = Cannot be bigger than the entire entry
348 		 */
349 		if (number > SIZE_MAX - 1
350 		    || number > 1024 * 1024
351 		    || (int64_t)number > ar->entry_bytes_remaining) {
352 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
353 			    "Bad input file size");
354 			return (ARCHIVE_FATAL);
355 		}
356 		bsd_name_length = (size_t)number;
357 		ar->entry_bytes_remaining -= bsd_name_length;
358 		/* Adjust file size reported to client. */
359 		archive_entry_set_size(entry, ar->entry_bytes_remaining);
360 
361 		if (*unconsumed) {
362 			__archive_read_consume(a, *unconsumed);
363 			*unconsumed = 0;
364 		}
365 
366 		/* Read the long name into memory. */
367 		if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
368 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
369 			    "Truncated input file");
370 			return (ARCHIVE_FATAL);
371 		}
372 		/* Store it in the entry. */
373 		p = (char *)malloc(bsd_name_length + 1);
374 		if (p == NULL) {
375 			archive_set_error(&a->archive, ENOMEM,
376 			    "Can't allocate fname buffer");
377 			return (ARCHIVE_FATAL);
378 		}
379 		strncpy(p, b, bsd_name_length);
380 		p[bsd_name_length] = '\0';
381 
382 		__archive_read_consume(a, bsd_name_length);
383 
384 		archive_entry_copy_pathname(entry, p);
385 		free(p);
386 		return (ARCHIVE_OK);
387 	}
388 
389 	/*
390 	 * "/" is the SVR4/GNU archive symbol table.
391 	 */
392 	if (strcmp(filename, "/") == 0) {
393 		archive_entry_copy_pathname(entry, "/");
394 		/* Parse the time, owner, mode, size fields. */
395 		r = ar_parse_common_header(ar, entry, h);
396 		/* Force the file type to a regular file. */
397 		archive_entry_set_filetype(entry, AE_IFREG);
398 		return (r);
399 	}
400 
401 	/*
402 	 * "__.SYMDEF" is a BSD archive symbol table.
403 	 */
404 	if (strcmp(filename, "__.SYMDEF") == 0) {
405 		archive_entry_copy_pathname(entry, filename);
406 		/* Parse the time, owner, mode, size fields. */
407 		return (ar_parse_common_header(ar, entry, h));
408 	}
409 
410 	/*
411 	 * Otherwise, this is a standard entry.  The filename
412 	 * has already been trimmed as much as possible, based
413 	 * on our current knowledge of the format.
414 	 */
415 	archive_entry_copy_pathname(entry, filename);
416 	return (ar_parse_common_header(ar, entry, h));
417 }
418 
419 static int
420 archive_read_format_ar_read_header(struct archive_read *a,
421     struct archive_entry *entry)
422 {
423 	struct ar *ar = (struct ar*)(a->format->data);
424 	size_t unconsumed;
425 	const void *header_data;
426 	int ret;
427 
428 	if (!ar->read_global_header) {
429 		/*
430 		 * We are now at the beginning of the archive,
431 		 * so we need first consume the ar global header.
432 		 */
433 		__archive_read_consume(a, 8);
434 		ar->read_global_header = 1;
435 		/* Set a default format code for now. */
436 		a->archive.archive_format = ARCHIVE_FORMAT_AR;
437 	}
438 
439 	/* Read the header for the next file entry. */
440 	if ((header_data = __archive_read_ahead(a, 60, NULL)) == NULL)
441 		/* Broken header. */
442 		return (ARCHIVE_EOF);
443 
444 	unconsumed = 60;
445 
446 	ret = _ar_read_header(a, entry, ar, (const char *)header_data, &unconsumed);
447 
448 	if (unconsumed)
449 		__archive_read_consume(a, unconsumed);
450 
451 	return ret;
452 }
453 
454 
455 static int
456 ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
457     const char *h)
458 {
459 	uint64_t n;
460 
461 	/* Copy remaining header */
462 	archive_entry_set_mtime(entry,
463 	    (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
464 	archive_entry_set_uid(entry,
465 	    (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
466 	archive_entry_set_gid(entry,
467 	    (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
468 	archive_entry_set_mode(entry,
469 	    (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
470 	n = ar_atol10(h + AR_size_offset, AR_size_size);
471 
472 	ar->entry_offset = 0;
473 	ar->entry_padding = n % 2;
474 	archive_entry_set_size(entry, n);
475 	ar->entry_bytes_remaining = n;
476 	return (ARCHIVE_OK);
477 }
478 
479 static int
480 archive_read_format_ar_read_data(struct archive_read *a,
481     const void **buff, size_t *size, int64_t *offset)
482 {
483 	ssize_t bytes_read;
484 	struct ar *ar;
485 
486 	ar = (struct ar *)(a->format->data);
487 
488 	if (ar->entry_bytes_unconsumed) {
489 		__archive_read_consume(a, ar->entry_bytes_unconsumed);
490 		ar->entry_bytes_unconsumed = 0;
491 	}
492 
493 	if (ar->entry_bytes_remaining > 0) {
494 		*buff = __archive_read_ahead(a, 1, &bytes_read);
495 		if (bytes_read == 0) {
496 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
497 			    "Truncated ar archive");
498 			return (ARCHIVE_FATAL);
499 		}
500 		if (bytes_read < 0)
501 			return (ARCHIVE_FATAL);
502 		if (bytes_read > ar->entry_bytes_remaining)
503 			bytes_read = (ssize_t)ar->entry_bytes_remaining;
504 		*size = bytes_read;
505 		ar->entry_bytes_unconsumed = bytes_read;
506 		*offset = ar->entry_offset;
507 		ar->entry_offset += bytes_read;
508 		ar->entry_bytes_remaining -= bytes_read;
509 		return (ARCHIVE_OK);
510 	} else {
511 		int64_t skipped = __archive_read_consume(a, ar->entry_padding);
512 		if (skipped >= 0) {
513 			ar->entry_padding -= skipped;
514 		}
515 		if (ar->entry_padding) {
516 			if (skipped >= 0) {
517 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
518 					"Truncated ar archive- failed consuming padding");
519 			}
520 			return (ARCHIVE_FATAL);
521 		}
522 		*buff = NULL;
523 		*size = 0;
524 		*offset = ar->entry_offset;
525 		return (ARCHIVE_EOF);
526 	}
527 }
528 
529 static int
530 archive_read_format_ar_skip(struct archive_read *a)
531 {
532 	int64_t bytes_skipped;
533 	struct ar* ar;
534 
535 	ar = (struct ar *)(a->format->data);
536 
537 	bytes_skipped = __archive_read_consume(a,
538 	    ar->entry_bytes_remaining + ar->entry_padding
539 	    + ar->entry_bytes_unconsumed);
540 	if (bytes_skipped < 0)
541 		return (ARCHIVE_FATAL);
542 
543 	ar->entry_bytes_remaining = 0;
544 	ar->entry_bytes_unconsumed = 0;
545 	ar->entry_padding = 0;
546 
547 	return (ARCHIVE_OK);
548 }
549 
550 static int
551 ar_parse_gnu_filename_table(struct archive_read *a)
552 {
553 	struct ar *ar;
554 	char *p;
555 	size_t size;
556 
557 	ar = (struct ar*)(a->format->data);
558 	size = ar->strtab_size;
559 
560 	for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
561 		if (*p == '/') {
562 			*p++ = '\0';
563 			if (*p != '\n')
564 				goto bad_string_table;
565 			*p = '\0';
566 		}
567 	}
568 	/*
569 	 * GNU ar always pads the table to an even size.
570 	 * The pad character is either '\n' or '`'.
571 	 */
572 	if (p != ar->strtab + size && *p != '\n' && *p != '`')
573 		goto bad_string_table;
574 
575 	/* Enforce zero termination. */
576 	ar->strtab[size - 1] = '\0';
577 
578 	return (ARCHIVE_OK);
579 
580 bad_string_table:
581 	archive_set_error(&a->archive, EINVAL,
582 	    "Invalid string table");
583 	free(ar->strtab);
584 	ar->strtab = NULL;
585 	return (ARCHIVE_FATAL);
586 }
587 
588 static uint64_t
589 ar_atol8(const char *p, unsigned char_cnt)
590 {
591 	uint64_t l, limit, last_digit_limit;
592 	unsigned int digit, base;
593 
594 	base = 8;
595 	limit = UINT64_MAX / base;
596 	last_digit_limit = UINT64_MAX % base;
597 
598 	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
599 		p++;
600 
601 	l = 0;
602 	digit = *p - '0';
603 	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
604 		if (l>limit || (l == limit && digit > last_digit_limit)) {
605 			l = UINT64_MAX; /* Truncate on overflow. */
606 			break;
607 		}
608 		l = (l * base) + digit;
609 		digit = *++p - '0';
610 	}
611 	return (l);
612 }
613 
614 static uint64_t
615 ar_atol10(const char *p, unsigned char_cnt)
616 {
617 	uint64_t l, limit, last_digit_limit;
618 	unsigned int base, digit;
619 
620 	base = 10;
621 	limit = UINT64_MAX / base;
622 	last_digit_limit = UINT64_MAX % base;
623 
624 	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
625 		p++;
626 	l = 0;
627 	digit = *p - '0';
628 	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
629 		if (l > limit || (l == limit && digit > last_digit_limit)) {
630 			l = UINT64_MAX; /* Truncate on overflow. */
631 			break;
632 		}
633 		l = (l * base) + digit;
634 		digit = *++p - '0';
635 	}
636 	return (l);
637 }
638