1 /*-
2  * Copyright (c) 2007 Kai Wang
3  * Copyright (c) 2007 Tim Kientzle
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer
11  *    in this position and unchanged.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_ar.c,v 1.12 2008/12/17 19:02:42 kientzle Exp $");
30 
31 #ifdef HAVE_SYS_STAT_H
32 #include <sys/stat.h>
33 #endif
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40 #ifdef HAVE_STRING_H
41 #include <string.h>
42 #endif
43 
44 #include "archive.h"
45 #include "archive_entry.h"
46 #include "archive_private.h"
47 #include "archive_read_private.h"
48 
49 struct ar {
50 	off_t	 entry_bytes_remaining;
51 	off_t	 entry_offset;
52 	off_t	 entry_padding;
53 	char	*strtab;
54 	size_t	 strtab_size;
55 };
56 
57 /*
58  * Define structure of the "ar" header.
59  */
60 #define AR_name_offset 0
61 #define AR_name_size 16
62 #define AR_date_offset 16
63 #define AR_date_size 12
64 #define AR_uid_offset 28
65 #define AR_uid_size 6
66 #define AR_gid_offset 34
67 #define AR_gid_size 6
68 #define AR_mode_offset 40
69 #define AR_mode_size 8
70 #define AR_size_offset 48
71 #define AR_size_size 10
72 #define AR_fmag_offset 58
73 #define AR_fmag_size 2
74 
75 static int	archive_read_format_ar_bid(struct archive_read *a);
76 static int	archive_read_format_ar_cleanup(struct archive_read *a);
77 static int	archive_read_format_ar_read_data(struct archive_read *a,
78 		    const void **buff, size_t *size, off_t *offset);
79 static int	archive_read_format_ar_skip(struct archive_read *a);
80 static int	archive_read_format_ar_read_header(struct archive_read *a,
81 		    struct archive_entry *e);
82 static uint64_t	ar_atol8(const char *p, unsigned char_cnt);
83 static uint64_t	ar_atol10(const char *p, unsigned char_cnt);
84 static int	ar_parse_gnu_filename_table(struct archive_read *a);
85 static int	ar_parse_common_header(struct ar *ar, struct archive_entry *,
86 		    const char *h);
87 
88 int
89 archive_read_support_format_ar(struct archive *_a)
90 {
91 	struct archive_read *a = (struct archive_read *)_a;
92 	struct ar *ar;
93 	int r;
94 
95 	ar = (struct ar *)malloc(sizeof(*ar));
96 	if (ar == NULL) {
97 		archive_set_error(&a->archive, ENOMEM,
98 		    "Can't allocate ar data");
99 		return (ARCHIVE_FATAL);
100 	}
101 	memset(ar, 0, sizeof(*ar));
102 	ar->strtab = NULL;
103 
104 	r = __archive_read_register_format(a,
105 	    ar,
106 	    "ar",
107 	    archive_read_format_ar_bid,
108 	    NULL,
109 	    archive_read_format_ar_read_header,
110 	    archive_read_format_ar_read_data,
111 	    archive_read_format_ar_skip,
112 	    archive_read_format_ar_cleanup);
113 
114 	if (r != ARCHIVE_OK) {
115 		free(ar);
116 		return (r);
117 	}
118 	return (ARCHIVE_OK);
119 }
120 
121 static int
122 archive_read_format_ar_cleanup(struct archive_read *a)
123 {
124 	struct ar *ar;
125 
126 	ar = (struct ar *)(a->format->data);
127 	if (ar->strtab)
128 		free(ar->strtab);
129 	free(ar);
130 	(a->format->data) = NULL;
131 	return (ARCHIVE_OK);
132 }
133 
134 static int
135 archive_read_format_ar_bid(struct archive_read *a)
136 {
137 	struct ar *ar;
138 	const void *h;
139 
140 	if (a->archive.archive_format != 0 &&
141 	    (a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) !=
142 	    ARCHIVE_FORMAT_AR)
143 		return(0);
144 
145 	ar = (struct ar *)(a->format->data);
146 
147 	/*
148 	 * Verify the 8-byte file signature.
149 	 * TODO: Do we need to check more than this?
150 	 */
151 	if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
152 		return (-1);
153 	if (strncmp((const char*)h, "!<arch>\n", 8) == 0) {
154 		return (64);
155 	}
156 	return (-1);
157 }
158 
159 static int
160 archive_read_format_ar_read_header(struct archive_read *a,
161     struct archive_entry *entry)
162 {
163 	char filename[AR_name_size + 1];
164 	struct ar *ar;
165 	uint64_t number; /* Used to hold parsed numbers before validation. */
166 	ssize_t bytes_read;
167 	size_t bsd_name_length, entry_size;
168 	char *p, *st;
169 	const void *b;
170 	const char *h;
171 	int r;
172 
173 	ar = (struct ar*)(a->format->data);
174 
175 	if (a->archive.file_position == 0) {
176 		/*
177 		 * We are now at the beginning of the archive,
178 		 * so we need first consume the ar global header.
179 		 */
180 		__archive_read_consume(a, 8);
181 		/* Set a default format code for now. */
182 		a->archive.archive_format = ARCHIVE_FORMAT_AR;
183 	}
184 
185 	/* Read the header for the next file entry. */
186 	if ((b = __archive_read_ahead(a, 60, &bytes_read)) == NULL)
187 		/* Broken header. */
188 		return (ARCHIVE_EOF);
189 	__archive_read_consume(a, 60);
190 	h = (const char *)b;
191 
192 	/* Verify the magic signature on the file header. */
193 	if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
194 		archive_set_error(&a->archive, EINVAL,
195 		    "Incorrect file header signature");
196 		return (ARCHIVE_WARN);
197 	}
198 
199 	/* Copy filename into work buffer. */
200 	strncpy(filename, h + AR_name_offset, AR_name_size);
201 	filename[AR_name_size] = '\0';
202 
203 	/*
204 	 * Guess the format variant based on the filename.
205 	 */
206 	if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
207 		/* We don't already know the variant, so let's guess. */
208 		/*
209 		 * Biggest clue is presence of '/': GNU starts special
210 		 * filenames with '/', appends '/' as terminator to
211 		 * non-special names, so anything with '/' should be
212 		 * GNU except for BSD long filenames.
213 		 */
214 		if (strncmp(filename, "#1/", 3) == 0)
215 			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
216 		else if (strchr(filename, '/') != NULL)
217 			a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
218 		else if (strncmp(filename, "__.SYMDEF", 9) == 0)
219 			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
220 		/*
221 		 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
222 		 * if name exactly fills 16-byte field?  If so, we
223 		 * can't assume entries without '/' are BSD. XXX
224 		 */
225 	}
226 
227 	/* Update format name from the code. */
228 	if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
229 		a->archive.archive_format_name = "ar (GNU/SVR4)";
230 	else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
231 		a->archive.archive_format_name = "ar (BSD)";
232 	else
233 		a->archive.archive_format_name = "ar";
234 
235 	/*
236 	 * Remove trailing spaces from the filename.  GNU and BSD
237 	 * variants both pad filename area out with spaces.
238 	 * This will only be wrong if GNU/SVR4 'ar' implementations
239 	 * omit trailing '/' for 16-char filenames and we have
240 	 * a 16-char filename that ends in ' '.
241 	 */
242 	p = filename + AR_name_size - 1;
243 	while (p >= filename && *p == ' ') {
244 		*p = '\0';
245 		p--;
246 	}
247 
248 	/*
249 	 * Remove trailing slash unless first character is '/'.
250 	 * (BSD entries never end in '/', so this will only trim
251 	 * GNU-format entries.  GNU special entries start with '/'
252 	 * and are not terminated in '/', so we don't trim anything
253 	 * that starts with '/'.)
254 	 */
255 	if (filename[0] != '/' && *p == '/')
256 		*p = '\0';
257 
258 	/*
259 	 * '//' is the GNU filename table.
260 	 * Later entries can refer to names in this table.
261 	 */
262 	if (strcmp(filename, "//") == 0) {
263 		/* This must come before any call to _read_ahead. */
264 		ar_parse_common_header(ar, entry, h);
265 		archive_entry_copy_pathname(entry, filename);
266 		archive_entry_set_filetype(entry, AE_IFREG);
267 		/* Get the size of the filename table. */
268 		number = ar_atol10(h + AR_size_offset, AR_size_size);
269 		if (number > SIZE_MAX) {
270 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
271 			    "Filename table too large");
272 			return (ARCHIVE_FATAL);
273 		}
274 		entry_size = (size_t)number;
275 		if (entry_size == 0) {
276 			archive_set_error(&a->archive, EINVAL,
277 			    "Invalid string table");
278 			return (ARCHIVE_WARN);
279 		}
280 		if (ar->strtab != NULL) {
281 			archive_set_error(&a->archive, EINVAL,
282 			    "More than one string tables exist");
283 			return (ARCHIVE_WARN);
284 		}
285 
286 		/* Read the filename table into memory. */
287 		st = malloc(entry_size);
288 		if (st == NULL) {
289 			archive_set_error(&a->archive, ENOMEM,
290 			    "Can't allocate filename table buffer");
291 			return (ARCHIVE_FATAL);
292 		}
293 		ar->strtab = st;
294 		ar->strtab_size = entry_size;
295 		if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
296 			return (ARCHIVE_FATAL);
297 		memcpy(st, b, entry_size);
298 		__archive_read_consume(a, entry_size);
299 		/* All contents are consumed. */
300 		ar->entry_bytes_remaining = 0;
301 		archive_entry_set_size(entry, ar->entry_bytes_remaining);
302 
303 		/* Parse the filename table. */
304 		return (ar_parse_gnu_filename_table(a));
305 	}
306 
307 	/*
308 	 * GNU variant handles long filenames by storing /<number>
309 	 * to indicate a name stored in the filename table.
310 	 * XXX TODO: Verify that it's all digits... Don't be fooled
311 	 * by "/9xyz" XXX
312 	 */
313 	if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
314 		number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
315 		/*
316 		 * If we can't look up the real name, warn and return
317 		 * the entry with the wrong name.
318 		 */
319 		if (ar->strtab == NULL || number > ar->strtab_size) {
320 			archive_set_error(&a->archive, EINVAL,
321 			    "Can't find long filename for entry");
322 			archive_entry_copy_pathname(entry, filename);
323 			/* Parse the time, owner, mode, size fields. */
324 			ar_parse_common_header(ar, entry, h);
325 			return (ARCHIVE_WARN);
326 		}
327 
328 		archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
329 		/* Parse the time, owner, mode, size fields. */
330 		return (ar_parse_common_header(ar, entry, h));
331 	}
332 
333 	/*
334 	 * BSD handles long filenames by storing "#1/" followed by the
335 	 * length of filename as a decimal number, then prepends the
336 	 * the filename to the file contents.
337 	 */
338 	if (strncmp(filename, "#1/", 3) == 0) {
339 		/* Parse the time, owner, mode, size fields. */
340 		/* This must occur before _read_ahead is called again. */
341 		ar_parse_common_header(ar, entry, h);
342 
343 		/* Parse the size of the name, adjust the file size. */
344 		number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
345 		bsd_name_length = (size_t)number;
346 		/* Guard against the filename + trailing NUL
347 		 * overflowing a size_t and against the filename size
348 		 * being larger than the entire entry. */
349 		if (number > (uint64_t)(bsd_name_length + 1)
350 		    || (off_t)bsd_name_length > ar->entry_bytes_remaining) {
351 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
352 			    "Bad input file size");
353 			return (ARCHIVE_FATAL);
354 		}
355 		ar->entry_bytes_remaining -= bsd_name_length;
356 		/* Adjust file size reported to client. */
357 		archive_entry_set_size(entry, ar->entry_bytes_remaining);
358 
359 		/* Read the long name into memory. */
360 		if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
361 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
362 			    "Truncated input file");
363 			return (ARCHIVE_FATAL);
364 		}
365 		__archive_read_consume(a, bsd_name_length);
366 
367 		/* Store it in the entry. */
368 		p = (char *)malloc(bsd_name_length + 1);
369 		if (p == NULL) {
370 			archive_set_error(&a->archive, ENOMEM,
371 			    "Can't allocate fname buffer");
372 			return (ARCHIVE_FATAL);
373 		}
374 		strncpy(p, b, bsd_name_length);
375 		p[bsd_name_length] = '\0';
376 		archive_entry_copy_pathname(entry, p);
377 		free(p);
378 		return (ARCHIVE_OK);
379 	}
380 
381 	/*
382 	 * "/" is the SVR4/GNU archive symbol table.
383 	 */
384 	if (strcmp(filename, "/") == 0) {
385 		archive_entry_copy_pathname(entry, "/");
386 		/* Parse the time, owner, mode, size fields. */
387 		r = ar_parse_common_header(ar, entry, h);
388 		/* Force the file type to a regular file. */
389 		archive_entry_set_filetype(entry, AE_IFREG);
390 		return (r);
391 	}
392 
393 	/*
394 	 * "__.SYMDEF" is a BSD archive symbol table.
395 	 */
396 	if (strcmp(filename, "__.SYMDEF") == 0) {
397 		archive_entry_copy_pathname(entry, filename);
398 		/* Parse the time, owner, mode, size fields. */
399 		return (ar_parse_common_header(ar, entry, h));
400 	}
401 
402 	/*
403 	 * Otherwise, this is a standard entry.  The filename
404 	 * has already been trimmed as much as possible, based
405 	 * on our current knowledge of the format.
406 	 */
407 	archive_entry_copy_pathname(entry, filename);
408 	return (ar_parse_common_header(ar, entry, h));
409 }
410 
411 static int
412 ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
413     const char *h)
414 {
415 	uint64_t n;
416 
417 	/* Copy remaining header */
418 	archive_entry_set_mtime(entry,
419 	    (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
420 	archive_entry_set_uid(entry,
421 	    (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
422 	archive_entry_set_gid(entry,
423 	    (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
424 	archive_entry_set_mode(entry,
425 	    (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
426 	n = ar_atol10(h + AR_size_offset, AR_size_size);
427 
428 	ar->entry_offset = 0;
429 	ar->entry_padding = n % 2;
430 	archive_entry_set_size(entry, n);
431 	ar->entry_bytes_remaining = n;
432 	return (ARCHIVE_OK);
433 }
434 
435 static int
436 archive_read_format_ar_read_data(struct archive_read *a,
437     const void **buff, size_t *size, off_t *offset)
438 {
439 	ssize_t bytes_read;
440 	struct ar *ar;
441 
442 	ar = (struct ar *)(a->format->data);
443 
444 	if (ar->entry_bytes_remaining > 0) {
445 		*buff = __archive_read_ahead(a, 1, &bytes_read);
446 		if (bytes_read == 0) {
447 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
448 			    "Truncated ar archive");
449 			return (ARCHIVE_FATAL);
450 		}
451 		if (bytes_read < 0)
452 			return (ARCHIVE_FATAL);
453 		if (bytes_read > ar->entry_bytes_remaining)
454 			bytes_read = (ssize_t)ar->entry_bytes_remaining;
455 		*size = bytes_read;
456 		*offset = ar->entry_offset;
457 		ar->entry_offset += bytes_read;
458 		ar->entry_bytes_remaining -= bytes_read;
459 		__archive_read_consume(a, (size_t)bytes_read);
460 		return (ARCHIVE_OK);
461 	} else {
462 		while (ar->entry_padding > 0) {
463 			*buff = __archive_read_ahead(a, 1, &bytes_read);
464 			if (bytes_read <= 0)
465 				return (ARCHIVE_FATAL);
466 			if (bytes_read > ar->entry_padding)
467 				bytes_read = (ssize_t)ar->entry_padding;
468 			__archive_read_consume(a, (size_t)bytes_read);
469 			ar->entry_padding -= bytes_read;
470 		}
471 		*buff = NULL;
472 		*size = 0;
473 		*offset = ar->entry_offset;
474 		return (ARCHIVE_EOF);
475 	}
476 }
477 
478 static int
479 archive_read_format_ar_skip(struct archive_read *a)
480 {
481 	off_t bytes_skipped;
482 	struct ar* ar;
483 
484 	ar = (struct ar *)(a->format->data);
485 
486 	bytes_skipped = __archive_read_skip(a,
487 	    ar->entry_bytes_remaining + ar->entry_padding);
488 	if (bytes_skipped < 0)
489 		return (ARCHIVE_FATAL);
490 
491 	ar->entry_bytes_remaining = 0;
492 	ar->entry_padding = 0;
493 
494 	return (ARCHIVE_OK);
495 }
496 
497 static int
498 ar_parse_gnu_filename_table(struct archive_read *a)
499 {
500 	struct ar *ar;
501 	char *p;
502 	size_t size;
503 
504 	ar = (struct ar*)(a->format->data);
505 	size = ar->strtab_size;
506 
507 	for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
508 		if (*p == '/') {
509 			*p++ = '\0';
510 			if (*p != '\n')
511 				goto bad_string_table;
512 			*p = '\0';
513 		}
514 	}
515 	/*
516 	 * GNU ar always pads the table to an even size.
517 	 * The pad character is either '\n' or '`'.
518 	 */
519 	if (p != ar->strtab + size && *p != '\n' && *p != '`')
520 		goto bad_string_table;
521 
522 	/* Enforce zero termination. */
523 	ar->strtab[size - 1] = '\0';
524 
525 	return (ARCHIVE_OK);
526 
527 bad_string_table:
528 	archive_set_error(&a->archive, EINVAL,
529 	    "Invalid string table");
530 	free(ar->strtab);
531 	ar->strtab = NULL;
532 	return (ARCHIVE_WARN);
533 }
534 
535 static uint64_t
536 ar_atol8(const char *p, unsigned char_cnt)
537 {
538 	uint64_t l, limit, last_digit_limit;
539 	unsigned int digit, base;
540 
541 	base = 8;
542 	limit = UINT64_MAX / base;
543 	last_digit_limit = UINT64_MAX % base;
544 
545 	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
546 		p++;
547 
548 	l = 0;
549 	digit = *p - '0';
550 	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
551 		if (l>limit || (l == limit && digit > last_digit_limit)) {
552 			l = UINT64_MAX; /* Truncate on overflow. */
553 			break;
554 		}
555 		l = (l * base) + digit;
556 		digit = *++p - '0';
557 	}
558 	return (l);
559 }
560 
561 static uint64_t
562 ar_atol10(const char *p, unsigned char_cnt)
563 {
564 	uint64_t l, limit, last_digit_limit;
565 	unsigned int base, digit;
566 
567 	base = 10;
568 	limit = UINT64_MAX / base;
569 	last_digit_limit = UINT64_MAX % base;
570 
571 	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
572 		p++;
573 	l = 0;
574 	digit = *p - '0';
575 	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
576 		if (l > limit || (l == limit && digit > last_digit_limit)) {
577 			l = UINT64_MAX; /* Truncate on overflow. */
578 			break;
579 		}
580 		l = (l * base) + digit;
581 		digit = *++p - '0';
582 	}
583 	return (l);
584 }
585