1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * Copyright (c) 2008 Joerg Sonnenberger
4  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_mtree.c 201165 2009-12-29 05:52:13Z kientzle $");
30 
31 #ifdef HAVE_SYS_STAT_H
32 #include <sys/stat.h>
33 #endif
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_FCNTL_H
38 #include <fcntl.h>
39 #endif
40 #include <stddef.h>
41 /* #include <stdint.h> */ /* See archive_platform.h */
42 #ifdef HAVE_STDLIB_H
43 #include <stdlib.h>
44 #endif
45 #ifdef HAVE_STRING_H
46 #include <string.h>
47 #endif
48 
49 #include "archive.h"
50 #include "archive_entry.h"
51 #include "archive_private.h"
52 #include "archive_read_private.h"
53 #include "archive_string.h"
54 #include "archive_pack_dev.h"
55 
56 #ifndef O_BINARY
57 #define	O_BINARY 0
58 #endif
59 #ifndef O_CLOEXEC
60 #define O_CLOEXEC	0
61 #endif
62 
63 #define	MTREE_HAS_DEVICE	0x0001
64 #define	MTREE_HAS_FFLAGS	0x0002
65 #define	MTREE_HAS_GID		0x0004
66 #define	MTREE_HAS_GNAME		0x0008
67 #define	MTREE_HAS_MTIME		0x0010
68 #define	MTREE_HAS_NLINK		0x0020
69 #define	MTREE_HAS_PERM		0x0040
70 #define	MTREE_HAS_SIZE		0x0080
71 #define	MTREE_HAS_TYPE		0x0100
72 #define	MTREE_HAS_UID		0x0200
73 #define	MTREE_HAS_UNAME		0x0400
74 
75 #define	MTREE_HAS_OPTIONAL	0x0800
76 #define	MTREE_HAS_NOCHANGE	0x1000 /* FreeBSD specific */
77 
78 struct mtree_option {
79 	struct mtree_option *next;
80 	char *value;
81 };
82 
83 struct mtree_entry {
84 	struct mtree_entry *next;
85 	struct mtree_option *options;
86 	char *name;
87 	char full;
88 	char used;
89 };
90 
91 struct mtree {
92 	struct archive_string	 line;
93 	size_t			 buffsize;
94 	char			*buff;
95 	int64_t			 offset;
96 	int			 fd;
97 	int			 archive_format;
98 	const char		*archive_format_name;
99 	struct mtree_entry	*entries;
100 	struct mtree_entry	*this_entry;
101 	struct archive_string	 current_dir;
102 	struct archive_string	 contents_name;
103 
104 	struct archive_entry_linkresolver *resolver;
105 
106 	int64_t			 cur_size;
107 	char checkfs;
108 };
109 
110 static int	bid_keycmp(const char *, const char *, ssize_t);
111 static int	cleanup(struct archive_read *);
112 static int	detect_form(struct archive_read *, int *);
113 static int	mtree_bid(struct archive_read *, int);
114 static int	parse_file(struct archive_read *, struct archive_entry *,
115 		    struct mtree *, struct mtree_entry *, int *);
116 static void	parse_escapes(char *, struct mtree_entry *);
117 static int	parse_line(struct archive_read *, struct archive_entry *,
118 		    struct mtree *, struct mtree_entry *, int *);
119 static int	parse_keyword(struct archive_read *, struct mtree *,
120 		    struct archive_entry *, struct mtree_option *, int *);
121 static int	read_data(struct archive_read *a,
122 		    const void **buff, size_t *size, int64_t *offset);
123 static ssize_t	readline(struct archive_read *, struct mtree *, char **, ssize_t);
124 static int	skip(struct archive_read *a);
125 static int	read_header(struct archive_read *,
126 		    struct archive_entry *);
127 static int64_t	mtree_atol10(char **);
128 static int64_t	mtree_atol8(char **);
129 static int64_t	mtree_atol(char **);
130 
131 /*
132  * There's no standard for TIME_T_MAX/TIME_T_MIN.  So we compute them
133  * here.  TODO: Move this to configure time, but be careful
134  * about cross-compile environments.
135  */
136 static int64_t
137 get_time_t_max(void)
138 {
139 #if defined(TIME_T_MAX)
140 	return TIME_T_MAX;
141 #else
142 	/* ISO C allows time_t to be a floating-point type,
143 	   but POSIX requires an integer type.  The following
144 	   should work on any system that follows the POSIX
145 	   conventions. */
146 	if (((time_t)0) < ((time_t)-1)) {
147 		/* Time_t is unsigned */
148 		return (~(time_t)0);
149 	} else {
150 		/* Time_t is signed. */
151 		/* Assume it's the same as int64_t or int32_t */
152 		if (sizeof(time_t) == sizeof(int64_t)) {
153 			return (time_t)INT64_MAX;
154 		} else {
155 			return (time_t)INT32_MAX;
156 		}
157 	}
158 #endif
159 }
160 
161 static int64_t
162 get_time_t_min(void)
163 {
164 #if defined(TIME_T_MIN)
165 	return TIME_T_MIN;
166 #else
167 	if (((time_t)0) < ((time_t)-1)) {
168 		/* Time_t is unsigned */
169 		return (time_t)0;
170 	} else {
171 		/* Time_t is signed. */
172 		if (sizeof(time_t) == sizeof(int64_t)) {
173 			return (time_t)INT64_MIN;
174 		} else {
175 			return (time_t)INT32_MIN;
176 		}
177 	}
178 #endif
179 }
180 
181 static int
182 archive_read_format_mtree_options(struct archive_read *a,
183     const char *key, const char *val)
184 {
185 	struct mtree *mtree;
186 
187 	mtree = (struct mtree *)(a->format->data);
188 	if (strcmp(key, "checkfs")  == 0) {
189 		/* Allows to read information missing from the mtree from the file system */
190 		if (val == NULL || val[0] == 0) {
191 			mtree->checkfs = 0;
192 		} else {
193 			mtree->checkfs = 1;
194 		}
195 		return (ARCHIVE_OK);
196 	}
197 
198 	/* Note: The "warn" return is just to inform the options
199 	 * supervisor that we didn't handle it.  It will generate
200 	 * a suitable error if no one used this option. */
201 	return (ARCHIVE_WARN);
202 }
203 
204 static void
205 free_options(struct mtree_option *head)
206 {
207 	struct mtree_option *next;
208 
209 	for (; head != NULL; head = next) {
210 		next = head->next;
211 		free(head->value);
212 		free(head);
213 	}
214 }
215 
216 int
217 archive_read_support_format_mtree(struct archive *_a)
218 {
219 	struct archive_read *a = (struct archive_read *)_a;
220 	struct mtree *mtree;
221 	int r;
222 
223 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
224 	    ARCHIVE_STATE_NEW, "archive_read_support_format_mtree");
225 
226 	mtree = (struct mtree *)malloc(sizeof(*mtree));
227 	if (mtree == NULL) {
228 		archive_set_error(&a->archive, ENOMEM,
229 		    "Can't allocate mtree data");
230 		return (ARCHIVE_FATAL);
231 	}
232 	memset(mtree, 0, sizeof(*mtree));
233 	mtree->fd = -1;
234 
235 	r = __archive_read_register_format(a, mtree, "mtree",
236            mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup, NULL, NULL);
237 
238 	if (r != ARCHIVE_OK)
239 		free(mtree);
240 	return (ARCHIVE_OK);
241 }
242 
243 static int
244 cleanup(struct archive_read *a)
245 {
246 	struct mtree *mtree;
247 	struct mtree_entry *p, *q;
248 
249 	mtree = (struct mtree *)(a->format->data);
250 
251 	p = mtree->entries;
252 	while (p != NULL) {
253 		q = p->next;
254 		free(p->name);
255 		free_options(p->options);
256 		free(p);
257 		p = q;
258 	}
259 	archive_string_free(&mtree->line);
260 	archive_string_free(&mtree->current_dir);
261 	archive_string_free(&mtree->contents_name);
262 	archive_entry_linkresolver_free(mtree->resolver);
263 
264 	free(mtree->buff);
265 	free(mtree);
266 	(a->format->data) = NULL;
267 	return (ARCHIVE_OK);
268 }
269 
270 static ssize_t
271 get_line_size(const char *b, ssize_t avail, ssize_t *nlsize)
272 {
273 	ssize_t len;
274 
275 	len = 0;
276 	while (len < avail) {
277 		switch (*b) {
278 		case '\0':/* Non-ascii character or control character. */
279 			if (nlsize != NULL)
280 				*nlsize = 0;
281 			return (-1);
282 		case '\r':
283 			if (avail-len > 1 && b[1] == '\n') {
284 				if (nlsize != NULL)
285 					*nlsize = 2;
286 				return (len+2);
287 			}
288 			/* FALL THROUGH */
289 		case '\n':
290 			if (nlsize != NULL)
291 				*nlsize = 1;
292 			return (len+1);
293 		default:
294 			b++;
295 			len++;
296 			break;
297 		}
298 	}
299 	if (nlsize != NULL)
300 		*nlsize = 0;
301 	return (avail);
302 }
303 
304 static ssize_t
305 next_line(struct archive_read *a,
306     const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl)
307 {
308 	ssize_t len;
309 	int quit;
310 
311 	quit = 0;
312 	if (*avail == 0) {
313 		*nl = 0;
314 		len = 0;
315 	} else
316 		len = get_line_size(*b, *avail, nl);
317 	/*
318 	 * Read bytes more while it does not reach the end of line.
319 	 */
320 	while (*nl == 0 && len == *avail && !quit) {
321 		ssize_t diff = *ravail - *avail;
322 		size_t nbytes_req = (*ravail+1023) & ~1023U;
323 		ssize_t tested;
324 
325 		/* Increase reading bytes if it is not enough to at least
326 		 * new two lines. */
327 		if (nbytes_req < (size_t)*ravail + 160)
328 			nbytes_req <<= 1;
329 
330 		*b = __archive_read_ahead(a, nbytes_req, avail);
331 		if (*b == NULL) {
332 			if (*ravail >= *avail)
333 				return (0);
334 			/* Reading bytes reaches the end of file. */
335 			*b = __archive_read_ahead(a, *avail, avail);
336 			quit = 1;
337 		}
338 		*ravail = *avail;
339 		*b += diff;
340 		*avail -= diff;
341 		tested = len;/* Skip some bytes we already determinated. */
342 		len = get_line_size(*b, *avail, nl);
343 		if (len >= 0)
344 			len += tested;
345 	}
346 	return (len);
347 }
348 
349 /*
350  * Compare characters with a mtree keyword.
351  * Returns the length of a mtree keyword if matched.
352  * Returns 0 if not matched.
353  */
354 static int
355 bid_keycmp(const char *p, const char *key, ssize_t len)
356 {
357 	int match_len = 0;
358 
359 	while (len > 0 && *p && *key) {
360 		if (*p == *key) {
361 			--len;
362 			++p;
363 			++key;
364 			++match_len;
365 			continue;
366 		}
367 		return (0);/* Not match */
368 	}
369 	if (*key != '\0')
370 		return (0);/* Not match */
371 
372 	/* A following character should be specified characters */
373 	if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' ||
374 	    p[0] == '\n' || p[0] == '\r' ||
375 	   (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')))
376 		return (match_len);
377 	return (0);/* Not match */
378 }
379 
380 /*
381  * Test whether the characters 'p' has is mtree keyword.
382  * Returns the length of a detected keyword.
383  * Returns 0 if any keywords were not found.
384  */
385 static int
386 bid_keyword(const char *p,  ssize_t len)
387 {
388 	static const char *keys_c[] = {
389 		"content", "contents", "cksum", NULL
390 	};
391 	static const char *keys_df[] = {
392 		"device", "flags", NULL
393 	};
394 	static const char *keys_g[] = {
395 		"gid", "gname", NULL
396 	};
397 	static const char *keys_il[] = {
398 		"ignore", "inode", "link", NULL
399 	};
400 	static const char *keys_m[] = {
401 		"md5", "md5digest", "mode", NULL
402 	};
403 	static const char *keys_no[] = {
404 		"nlink", "nochange", "optional", NULL
405 	};
406 	static const char *keys_r[] = {
407 		"resdevice", "rmd160", "rmd160digest", NULL
408 	};
409 	static const char *keys_s[] = {
410 		"sha1", "sha1digest",
411 		"sha256", "sha256digest",
412 		"sha384", "sha384digest",
413 		"sha512", "sha512digest",
414 		"size", NULL
415 	};
416 	static const char *keys_t[] = {
417 		"tags", "time", "type", NULL
418 	};
419 	static const char *keys_u[] = {
420 		"uid", "uname",	NULL
421 	};
422 	const char **keys;
423 	int i;
424 
425 	switch (*p) {
426 	case 'c': keys = keys_c; break;
427 	case 'd': case 'f': keys = keys_df; break;
428 	case 'g': keys = keys_g; break;
429 	case 'i': case 'l': keys = keys_il; break;
430 	case 'm': keys = keys_m; break;
431 	case 'n': case 'o': keys = keys_no; break;
432 	case 'r': keys = keys_r; break;
433 	case 's': keys = keys_s; break;
434 	case 't': keys = keys_t; break;
435 	case 'u': keys = keys_u; break;
436 	default: return (0);/* Unknown key */
437 	}
438 
439 	for (i = 0; keys[i] != NULL; i++) {
440 		int l = bid_keycmp(p, keys[i], len);
441 		if (l > 0)
442 			return (l);
443 	}
444 	return (0);/* Unknown key */
445 }
446 
447 /*
448  * Test whether there is a set of mtree keywords.
449  * Returns the number of keyword.
450  * Returns -1 if we got incorrect sequence.
451  * This function expects a set of "<space characters>keyword=value".
452  * When "unset" is specified, expects a set of "<space characters>keyword".
453  */
454 static int
455 bid_keyword_list(const char *p,  ssize_t len, int unset, int last_is_path)
456 {
457 	int l;
458 	int keycnt = 0;
459 
460 	while (len > 0 && *p) {
461 		int blank = 0;
462 
463 		/* Test whether there are blank characters in the line. */
464 		while (len >0 && (*p == ' ' || *p == '\t')) {
465 			++p;
466 			--len;
467 			blank = 1;
468 		}
469 		if (*p == '\n' || *p == '\r')
470 			break;
471 		if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))
472 			break;
473 		if (!blank && !last_is_path) /* No blank character. */
474 			return (-1);
475 		if (last_is_path && len == 0)
476 				return (keycnt);
477 
478 		if (unset) {
479 			l = bid_keycmp(p, "all", len);
480 			if (l > 0)
481 				return (1);
482 		}
483 		/* Test whether there is a correct key in the line. */
484 		l = bid_keyword(p, len);
485 		if (l == 0)
486 			return (-1);/* Unknown keyword was found. */
487 		p += l;
488 		len -= l;
489 		keycnt++;
490 
491 		/* Skip value */
492 		if (*p == '=') {
493 			int value = 0;
494 			++p;
495 			--len;
496 			while (len > 0 && *p != ' ' && *p != '\t') {
497 				++p;
498 				--len;
499 				value = 1;
500 			}
501 			/* A keyword should have a its value unless
502 			 * "/unset" operation. */
503 			if (!unset && value == 0)
504 				return (-1);
505 		}
506 	}
507 	return (keycnt);
508 }
509 
510 static int
511 bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path)
512 {
513 	int f = 0;
514 	static const unsigned char safe_char[256] = {
515 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
516 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
517 		/* !"$%&'()*+,-./  EXCLUSION:( )(#) */
518 		0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
519 		/* 0123456789:;<>?  EXCLUSION:(=) */
520 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */
521 		/* @ABCDEFGHIJKLMNO */
522 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
523 		/* PQRSTUVWXYZ[\]^_  */
524 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
525 		/* `abcdefghijklmno */
526 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
527 		/* pqrstuvwxyz{|}~ */
528 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */
529 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
530 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
531 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
532 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
533 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
534 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
535 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
536 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
537 	};
538 	ssize_t ll;
539 	const char *pp = p;
540 	const char * const pp_end = pp + len;
541 
542 	*last_is_path = 0;
543 	/*
544 	 * Skip the path-name which is quoted.
545 	 */
546 	for (;pp < pp_end; ++pp) {
547 		if (!safe_char[*(const unsigned char *)pp]) {
548 			if (*pp != ' ' && *pp != '\t' && *pp != '\r'
549 			    && *pp != '\n')
550 				f = 0;
551 			break;
552 		}
553 		f = 1;
554 	}
555 	ll = pp_end - pp;
556 
557 	/* If a path-name was not found at the first, try to check
558 	 * a mtree format(a.k.a form D) ``NetBSD's mtree -D'' creates,
559 	 * which places the path-name at the last. */
560 	if (f == 0) {
561 		const char *pb = p + len - nl;
562 		int name_len = 0;
563 		int slash;
564 
565 		/* The form D accepts only a single line for an entry. */
566 		if (pb-2 >= p &&
567 		    pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t'))
568 			return (-1);
569 		if (pb-1 >= p && pb[-1] == '\\')
570 			return (-1);
571 
572 		slash = 0;
573 		while (p <= --pb && *pb != ' ' && *pb != '\t') {
574 			if (!safe_char[*(const unsigned char *)pb])
575 				return (-1);
576 			name_len++;
577 			/* The pathname should have a slash in this
578 			 * format. */
579 			if (*pb == '/')
580 				slash = 1;
581 		}
582 		if (name_len == 0 || slash == 0)
583 			return (-1);
584 		/* If '/' is placed at the first in this field, this is not
585 		 * a valid filename. */
586 		if (pb[1] == '/')
587 			return (-1);
588 		ll = len - nl - name_len;
589 		pp = p;
590 		*last_is_path = 1;
591 	}
592 
593 	return (bid_keyword_list(pp, ll, 0, *last_is_path));
594 }
595 
596 #define MAX_BID_ENTRY	3
597 
598 static int
599 mtree_bid(struct archive_read *a, int best_bid)
600 {
601 	const char *signature = "#mtree";
602 	const char *p;
603 
604 	(void)best_bid; /* UNUSED */
605 
606 	/* Now let's look at the actual header and see if it matches. */
607 	p = __archive_read_ahead(a, strlen(signature), NULL);
608 	if (p == NULL)
609 		return (-1);
610 
611 	if (memcmp(p, signature, strlen(signature)) == 0)
612 		return (8 * (int)strlen(signature));
613 
614 	/*
615 	 * There is not a mtree signature. Let's try to detect mtree format.
616 	 */
617 	return (detect_form(a, NULL));
618 }
619 
620 static int
621 detect_form(struct archive_read *a, int *is_form_d)
622 {
623 	const char *p;
624 	ssize_t avail, ravail;
625 	ssize_t detected_bytes = 0, len, nl;
626 	int entry_cnt = 0, multiline = 0;
627 	int form_D = 0;/* The archive is generated by `NetBSD mtree -D'
628 			* (In this source we call it `form D') . */
629 
630 	if (is_form_d != NULL)
631 		*is_form_d = 0;
632 	p = __archive_read_ahead(a, 1, &avail);
633 	if (p == NULL)
634 		return (-1);
635 	ravail = avail;
636 	for (;;) {
637 		len = next_line(a, &p, &avail, &ravail, &nl);
638 		/* The terminal character of the line should be
639 		 * a new line character, '\r\n' or '\n'. */
640 		if (len <= 0 || nl == 0)
641 			break;
642 		if (!multiline) {
643 			/* Leading whitespace is never significant,
644 			 * ignore it. */
645 			while (len > 0 && (*p == ' ' || *p == '\t')) {
646 				++p;
647 				--avail;
648 				--len;
649 			}
650 			/* Skip comment or empty line. */
651 			if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') {
652 				p += len;
653 				avail -= len;
654 				continue;
655 			}
656 		} else {
657 			/* A continuance line; the terminal
658 			 * character of previous line was '\' character. */
659 			if (bid_keyword_list(p, len, 0, 0) <= 0)
660 				break;
661 			if (multiline == 1)
662 				detected_bytes += len;
663 			if (p[len-nl-1] != '\\') {
664 				if (multiline == 1 &&
665 				    ++entry_cnt >= MAX_BID_ENTRY)
666 					break;
667 				multiline = 0;
668 			}
669 			p += len;
670 			avail -= len;
671 			continue;
672 		}
673 		if (p[0] != '/') {
674 			int last_is_path, keywords;
675 
676 			keywords = bid_entry(p, len, nl, &last_is_path);
677 			if (keywords >= 0) {
678 				detected_bytes += len;
679 				if (form_D == 0) {
680 					if (last_is_path)
681 						form_D = 1;
682 					else if (keywords > 0)
683 						/* This line is not `form D'. */
684 						form_D = -1;
685 				} else if (form_D == 1) {
686 					if (!last_is_path && keywords > 0)
687 						/* This this is not `form D'
688 						 * and We cannot accept mixed
689 						 * format. */
690 						break;
691 				}
692 				if (!last_is_path && p[len-nl-1] == '\\')
693 					/* This line continues. */
694 					multiline = 1;
695 				else {
696 					/* We've got plenty of correct lines
697 					 * to assume that this file is a mtree
698 					 * format. */
699 					if (++entry_cnt >= MAX_BID_ENTRY)
700 						break;
701 				}
702 			} else
703 				break;
704 		} else if (strncmp(p, "/set", 4) == 0) {
705 			if (bid_keyword_list(p+4, len-4, 0, 0) <= 0)
706 				break;
707 			/* This line continues. */
708 			if (p[len-nl-1] == '\\')
709 				multiline = 2;
710 		} else if (strncmp(p, "/unset", 6) == 0) {
711 			if (bid_keyword_list(p+6, len-6, 1, 0) <= 0)
712 				break;
713 			/* This line continues. */
714 			if (p[len-nl-1] == '\\')
715 				multiline = 2;
716 		} else
717 			break;
718 
719 		/* Test next line. */
720 		p += len;
721 		avail -= len;
722 	}
723 	if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) {
724 		if (is_form_d != NULL) {
725 			if (form_D == 1)
726 				*is_form_d = 1;
727 		}
728 		return (32);
729 	}
730 
731 	return (0);
732 }
733 
734 /*
735  * The extended mtree format permits multiple lines specifying
736  * attributes for each file.  For those entries, only the last line
737  * is actually used.  Practically speaking, that means we have
738  * to read the entire mtree file into memory up front.
739  *
740  * The parsing is done in two steps.  First, it is decided if a line
741  * changes the global defaults and if it is, processed accordingly.
742  * Otherwise, the options of the line are merged with the current
743  * global options.
744  */
745 static int
746 add_option(struct archive_read *a, struct mtree_option **global,
747     const char *value, size_t len)
748 {
749 	struct mtree_option *opt;
750 
751 	if ((opt = malloc(sizeof(*opt))) == NULL) {
752 		archive_set_error(&a->archive, errno, "Can't allocate memory");
753 		return (ARCHIVE_FATAL);
754 	}
755 	if ((opt->value = malloc(len + 1)) == NULL) {
756 		free(opt);
757 		archive_set_error(&a->archive, errno, "Can't allocate memory");
758 		return (ARCHIVE_FATAL);
759 	}
760 	memcpy(opt->value, value, len);
761 	opt->value[len] = '\0';
762 	opt->next = *global;
763 	*global = opt;
764 	return (ARCHIVE_OK);
765 }
766 
767 static void
768 remove_option(struct mtree_option **global, const char *value, size_t len)
769 {
770 	struct mtree_option *iter, *last;
771 
772 	last = NULL;
773 	for (iter = *global; iter != NULL; last = iter, iter = iter->next) {
774 		if (strncmp(iter->value, value, len) == 0 &&
775 		    (iter->value[len] == '\0' ||
776 		     iter->value[len] == '='))
777 			break;
778 	}
779 	if (iter == NULL)
780 		return;
781 	if (last == NULL)
782 		*global = iter->next;
783 	else
784 		last->next = iter->next;
785 
786 	free(iter->value);
787 	free(iter);
788 }
789 
790 static int
791 process_global_set(struct archive_read *a,
792     struct mtree_option **global, const char *line)
793 {
794 	const char *next, *eq;
795 	size_t len;
796 	int r;
797 
798 	line += 4;
799 	for (;;) {
800 		next = line + strspn(line, " \t\r\n");
801 		if (*next == '\0')
802 			return (ARCHIVE_OK);
803 		line = next;
804 		next = line + strcspn(line, " \t\r\n");
805 		eq = strchr(line, '=');
806 		if (eq > next)
807 			len = next - line;
808 		else
809 			len = eq - line;
810 
811 		remove_option(global, line, len);
812 		r = add_option(a, global, line, next - line);
813 		if (r != ARCHIVE_OK)
814 			return (r);
815 		line = next;
816 	}
817 }
818 
819 static int
820 process_global_unset(struct archive_read *a,
821     struct mtree_option **global, const char *line)
822 {
823 	const char *next;
824 	size_t len;
825 
826 	line += 6;
827 	if (strchr(line, '=') != NULL) {
828 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
829 		    "/unset shall not contain `='");
830 		return ARCHIVE_FATAL;
831 	}
832 
833 	for (;;) {
834 		next = line + strspn(line, " \t\r\n");
835 		if (*next == '\0')
836 			return (ARCHIVE_OK);
837 		line = next;
838 		len = strcspn(line, " \t\r\n");
839 
840 		if (len == 3 && strncmp(line, "all", 3) == 0) {
841 			free_options(*global);
842 			*global = NULL;
843 		} else {
844 			remove_option(global, line, len);
845 		}
846 
847 		line += len;
848 	}
849 }
850 
851 static int
852 process_add_entry(struct archive_read *a, struct mtree *mtree,
853     struct mtree_option **global, const char *line, ssize_t line_len,
854     struct mtree_entry **last_entry, int is_form_d)
855 {
856 	struct mtree_entry *entry;
857 	struct mtree_option *iter;
858 	const char *next, *eq, *name, *end;
859 	size_t name_len, len;
860 	int r, i;
861 
862 	if ((entry = malloc(sizeof(*entry))) == NULL) {
863 		archive_set_error(&a->archive, errno, "Can't allocate memory");
864 		return (ARCHIVE_FATAL);
865 	}
866 	entry->next = NULL;
867 	entry->options = NULL;
868 	entry->name = NULL;
869 	entry->used = 0;
870 	entry->full = 0;
871 
872 	/* Add this entry to list. */
873 	if (*last_entry == NULL)
874 		mtree->entries = entry;
875 	else
876 		(*last_entry)->next = entry;
877 	*last_entry = entry;
878 
879 	if (is_form_d) {
880 		/* Filename is last item on line. */
881 		/* Adjust line_len to trim trailing whitespace */
882 		while (line_len > 0) {
883 			char last_character = line[line_len - 1];
884 			if (last_character == '\r'
885 			    || last_character == '\n'
886 			    || last_character == '\t'
887 			    || last_character == ' ') {
888 				line_len--;
889 			} else {
890 				break;
891 			}
892 		}
893 		/* Name starts after the last whitespace separator */
894 		name = line;
895 		for (i = 0; i < line_len; i++) {
896 			if (line[i] == '\r'
897 			    || line[i] == '\n'
898 			    || line[i] == '\t'
899 			    || line[i] == ' ') {
900 				name = line + i + 1;
901 			}
902 		}
903 		name_len = line + line_len - name;
904 		end = name;
905 	} else {
906 		/* Filename is first item on line */
907 		name_len = strcspn(line, " \t\r\n");
908 		name = line;
909 		line += name_len;
910 		end = line + line_len;
911 	}
912 	/* name/name_len is the name within the line. */
913 	/* line..end brackets the entire line except the name */
914 
915 	if ((entry->name = malloc(name_len + 1)) == NULL) {
916 		archive_set_error(&a->archive, errno, "Can't allocate memory");
917 		return (ARCHIVE_FATAL);
918 	}
919 
920 	memcpy(entry->name, name, name_len);
921 	entry->name[name_len] = '\0';
922 	parse_escapes(entry->name, entry);
923 
924 	for (iter = *global; iter != NULL; iter = iter->next) {
925 		r = add_option(a, &entry->options, iter->value,
926 		    strlen(iter->value));
927 		if (r != ARCHIVE_OK)
928 			return (r);
929 	}
930 
931 	for (;;) {
932 		next = line + strspn(line, " \t\r\n");
933 		if (*next == '\0')
934 			return (ARCHIVE_OK);
935 		if (next >= end)
936 			return (ARCHIVE_OK);
937 		line = next;
938 		next = line + strcspn(line, " \t\r\n");
939 		eq = strchr(line, '=');
940 		if (eq == NULL || eq > next)
941 			len = next - line;
942 		else
943 			len = eq - line;
944 
945 		remove_option(&entry->options, line, len);
946 		r = add_option(a, &entry->options, line, next - line);
947 		if (r != ARCHIVE_OK)
948 			return (r);
949 		line = next;
950 	}
951 }
952 
953 static int
954 read_mtree(struct archive_read *a, struct mtree *mtree)
955 {
956 	ssize_t len;
957 	uintmax_t counter;
958 	char *p;
959 	struct mtree_option *global;
960 	struct mtree_entry *last_entry;
961 	int r, is_form_d;
962 
963 	mtree->archive_format = ARCHIVE_FORMAT_MTREE;
964 	mtree->archive_format_name = "mtree";
965 
966 	global = NULL;
967 	last_entry = NULL;
968 
969 	(void)detect_form(a, &is_form_d);
970 
971 	for (counter = 1; ; ++counter) {
972 		len = readline(a, mtree, &p, 65536);
973 		if (len == 0) {
974 			mtree->this_entry = mtree->entries;
975 			free_options(global);
976 			return (ARCHIVE_OK);
977 		}
978 		if (len < 0) {
979 			free_options(global);
980 			return ((int)len);
981 		}
982 		/* Leading whitespace is never significant, ignore it. */
983 		while (*p == ' ' || *p == '\t') {
984 			++p;
985 			--len;
986 		}
987 		/* Skip content lines and blank lines. */
988 		if (*p == '#')
989 			continue;
990 		if (*p == '\r' || *p == '\n' || *p == '\0')
991 			continue;
992 		if (*p != '/') {
993 			r = process_add_entry(a, mtree, &global, p, len,
994 			    &last_entry, is_form_d);
995 		} else if (strncmp(p, "/set", 4) == 0) {
996 			if (p[4] != ' ' && p[4] != '\t')
997 				break;
998 			r = process_global_set(a, &global, p);
999 		} else if (strncmp(p, "/unset", 6) == 0) {
1000 			if (p[6] != ' ' && p[6] != '\t')
1001 				break;
1002 			r = process_global_unset(a, &global, p);
1003 		} else
1004 			break;
1005 
1006 		if (r != ARCHIVE_OK) {
1007 			free_options(global);
1008 			return r;
1009 		}
1010 	}
1011 
1012 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1013 	    "Can't parse line %ju", counter);
1014 	free_options(global);
1015 	return (ARCHIVE_FATAL);
1016 }
1017 
1018 /*
1019  * Read in the entire mtree file into memory on the first request.
1020  * Then use the next unused file to satisfy each header request.
1021  */
1022 static int
1023 read_header(struct archive_read *a, struct archive_entry *entry)
1024 {
1025 	struct mtree *mtree;
1026 	char *p;
1027 	int r, use_next;
1028 
1029 	mtree = (struct mtree *)(a->format->data);
1030 
1031 	if (mtree->fd >= 0) {
1032 		close(mtree->fd);
1033 		mtree->fd = -1;
1034 	}
1035 
1036 	if (mtree->entries == NULL) {
1037 		mtree->resolver = archive_entry_linkresolver_new();
1038 		if (mtree->resolver == NULL)
1039 			return ARCHIVE_FATAL;
1040 		archive_entry_linkresolver_set_strategy(mtree->resolver,
1041 		    ARCHIVE_FORMAT_MTREE);
1042 		r = read_mtree(a, mtree);
1043 		if (r != ARCHIVE_OK)
1044 			return (r);
1045 	}
1046 
1047 	a->archive.archive_format = mtree->archive_format;
1048 	a->archive.archive_format_name = mtree->archive_format_name;
1049 
1050 	for (;;) {
1051 		if (mtree->this_entry == NULL)
1052 			return (ARCHIVE_EOF);
1053 		if (strcmp(mtree->this_entry->name, "..") == 0) {
1054 			mtree->this_entry->used = 1;
1055 			if (archive_strlen(&mtree->current_dir) > 0) {
1056 				/* Roll back current path. */
1057 				p = mtree->current_dir.s
1058 				    + mtree->current_dir.length - 1;
1059 				while (p >= mtree->current_dir.s && *p != '/')
1060 					--p;
1061 				if (p >= mtree->current_dir.s)
1062 					--p;
1063 				mtree->current_dir.length
1064 				    = p - mtree->current_dir.s + 1;
1065 			}
1066 		}
1067 		if (!mtree->this_entry->used) {
1068 			use_next = 0;
1069 			r = parse_file(a, entry, mtree, mtree->this_entry,
1070 				&use_next);
1071 			if (use_next == 0)
1072 				return (r);
1073 		}
1074 		mtree->this_entry = mtree->this_entry->next;
1075 	}
1076 }
1077 
1078 /*
1079  * A single file can have multiple lines contribute specifications.
1080  * Parse as many lines as necessary, then pull additional information
1081  * from a backing file on disk as necessary.
1082  */
1083 static int
1084 parse_file(struct archive_read *a, struct archive_entry *entry,
1085     struct mtree *mtree, struct mtree_entry *mentry, int *use_next)
1086 {
1087 	const char *path;
1088 	struct stat st_storage, *st;
1089 	struct mtree_entry *mp;
1090 	struct archive_entry *sparse_entry;
1091 	int r = ARCHIVE_OK, r1, parsed_kws;
1092 
1093 	mentry->used = 1;
1094 
1095 	/* Initialize reasonable defaults. */
1096 	archive_entry_set_filetype(entry, AE_IFREG);
1097 	archive_entry_set_size(entry, 0);
1098 	archive_string_empty(&mtree->contents_name);
1099 
1100 	/* Parse options from this line. */
1101 	parsed_kws = 0;
1102 	r = parse_line(a, entry, mtree, mentry, &parsed_kws);
1103 
1104 	if (mentry->full) {
1105 		archive_entry_copy_pathname(entry, mentry->name);
1106 		/*
1107 		 * "Full" entries are allowed to have multiple lines
1108 		 * and those lines aren't required to be adjacent.  We
1109 		 * don't support multiple lines for "relative" entries
1110 		 * nor do we make any attempt to merge data from
1111 		 * separate "relative" and "full" entries.  (Merging
1112 		 * "relative" and "full" entries would require dealing
1113 		 * with pathname canonicalization, which is a very
1114 		 * tricky subject.)
1115 		 */
1116 		for (mp = mentry->next; mp != NULL; mp = mp->next) {
1117 			if (mp->full && !mp->used
1118 			    && strcmp(mentry->name, mp->name) == 0) {
1119 				/* Later lines override earlier ones. */
1120 				mp->used = 1;
1121 				r1 = parse_line(a, entry, mtree, mp,
1122 				    &parsed_kws);
1123 				if (r1 < r)
1124 					r = r1;
1125 			}
1126 		}
1127 	} else {
1128 		/*
1129 		 * Relative entries require us to construct
1130 		 * the full path and possibly update the
1131 		 * current directory.
1132 		 */
1133 		size_t n = archive_strlen(&mtree->current_dir);
1134 		if (n > 0)
1135 			archive_strcat(&mtree->current_dir, "/");
1136 		archive_strcat(&mtree->current_dir, mentry->name);
1137 		archive_entry_copy_pathname(entry, mtree->current_dir.s);
1138 		if (archive_entry_filetype(entry) != AE_IFDIR)
1139 			mtree->current_dir.length = n;
1140 	}
1141 
1142 	if (mtree->checkfs) {
1143 		/*
1144 		 * Try to open and stat the file to get the real size
1145 		 * and other file info.  It would be nice to avoid
1146 		 * this here so that getting a listing of an mtree
1147 		 * wouldn't require opening every referenced contents
1148 		 * file.  But then we wouldn't know the actual
1149 		 * contents size, so I don't see a really viable way
1150 		 * around this.  (Also, we may want to someday pull
1151 		 * other unspecified info from the contents file on
1152 		 * disk.)
1153 		 */
1154 		mtree->fd = -1;
1155 		if (archive_strlen(&mtree->contents_name) > 0)
1156 			path = mtree->contents_name.s;
1157 		else
1158 			path = archive_entry_pathname(entry);
1159 
1160 		if (archive_entry_filetype(entry) == AE_IFREG ||
1161 				archive_entry_filetype(entry) == AE_IFDIR) {
1162 			mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
1163 			__archive_ensure_cloexec_flag(mtree->fd);
1164 			if (mtree->fd == -1 &&
1165 				(errno != ENOENT ||
1166 				 archive_strlen(&mtree->contents_name) > 0)) {
1167 				archive_set_error(&a->archive, errno,
1168 						"Can't open %s", path);
1169 				r = ARCHIVE_WARN;
1170 			}
1171 		}
1172 
1173 		st = &st_storage;
1174 		if (mtree->fd >= 0) {
1175 			if (fstat(mtree->fd, st) == -1) {
1176 				archive_set_error(&a->archive, errno,
1177 						"Could not fstat %s", path);
1178 				r = ARCHIVE_WARN;
1179 				/* If we can't stat it, don't keep it open. */
1180 				close(mtree->fd);
1181 				mtree->fd = -1;
1182 				st = NULL;
1183 			}
1184 		} else if (lstat(path, st) == -1) {
1185 			st = NULL;
1186 		}
1187 
1188 		/*
1189 		 * Check for a mismatch between the type in the specification
1190 		 * and the type of the contents object on disk.
1191 		 */
1192 		if (st != NULL) {
1193 			if (((st->st_mode & S_IFMT) == S_IFREG &&
1194 			      archive_entry_filetype(entry) == AE_IFREG)
1195 #ifdef S_IFLNK
1196 			  ||((st->st_mode & S_IFMT) == S_IFLNK &&
1197 			      archive_entry_filetype(entry) == AE_IFLNK)
1198 #endif
1199 #ifdef S_IFSOCK
1200 			  ||((st->st_mode & S_IFSOCK) == S_IFSOCK &&
1201 			      archive_entry_filetype(entry) == AE_IFSOCK)
1202 #endif
1203 #ifdef S_IFCHR
1204 			  ||((st->st_mode & S_IFMT) == S_IFCHR &&
1205 			      archive_entry_filetype(entry) == AE_IFCHR)
1206 #endif
1207 #ifdef S_IFBLK
1208 			  ||((st->st_mode & S_IFMT) == S_IFBLK &&
1209 			      archive_entry_filetype(entry) == AE_IFBLK)
1210 #endif
1211 			  ||((st->st_mode & S_IFMT) == S_IFDIR &&
1212 			      archive_entry_filetype(entry) == AE_IFDIR)
1213 #ifdef S_IFIFO
1214 			  ||((st->st_mode & S_IFMT) == S_IFIFO &&
1215 			      archive_entry_filetype(entry) == AE_IFIFO)
1216 #endif
1217 			) {
1218 				/* Types match. */
1219 			} else {
1220 				/* Types don't match; bail out gracefully. */
1221 				if (mtree->fd >= 0)
1222 					close(mtree->fd);
1223 				mtree->fd = -1;
1224 				if (parsed_kws & MTREE_HAS_OPTIONAL) {
1225 					/* It's not an error for an optional
1226 					 * entry to not match disk. */
1227 					*use_next = 1;
1228 				} else if (r == ARCHIVE_OK) {
1229 					archive_set_error(&a->archive,
1230 					    ARCHIVE_ERRNO_MISC,
1231 					    "mtree specification has different"
1232 					    " type for %s",
1233 					    archive_entry_pathname(entry));
1234 					r = ARCHIVE_WARN;
1235 				}
1236 				return (r);
1237 			}
1238 		}
1239 
1240 		/*
1241 		 * If there is a contents file on disk, pick some of the
1242 		 * metadata from that file.  For most of these, we only
1243 		 * set it from the contents if it wasn't already parsed
1244 		 * from the specification.
1245 		 */
1246 		if (st != NULL) {
1247 			if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
1248 				(parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
1249 				(archive_entry_filetype(entry) == AE_IFCHR ||
1250 				 archive_entry_filetype(entry) == AE_IFBLK))
1251 				archive_entry_set_rdev(entry, st->st_rdev);
1252 			if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME))
1253 				== 0 ||
1254 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1255 				archive_entry_set_gid(entry, st->st_gid);
1256 			if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME))
1257 				== 0 ||
1258 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1259 				archive_entry_set_uid(entry, st->st_uid);
1260 			if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
1261 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
1262 #if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC
1263 				archive_entry_set_mtime(entry, st->st_mtime,
1264 						st->st_mtimespec.tv_nsec);
1265 #elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
1266 				archive_entry_set_mtime(entry, st->st_mtime,
1267 						st->st_mtim.tv_nsec);
1268 #elif HAVE_STRUCT_STAT_ST_MTIME_N
1269 				archive_entry_set_mtime(entry, st->st_mtime,
1270 						st->st_mtime_n);
1271 #elif HAVE_STRUCT_STAT_ST_UMTIME
1272 				archive_entry_set_mtime(entry, st->st_mtime,
1273 						st->st_umtime*1000);
1274 #elif HAVE_STRUCT_STAT_ST_MTIME_USEC
1275 				archive_entry_set_mtime(entry, st->st_mtime,
1276 						st->st_mtime_usec*1000);
1277 #else
1278 				archive_entry_set_mtime(entry, st->st_mtime, 0);
1279 #endif
1280 			}
1281 			if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
1282 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1283 				archive_entry_set_nlink(entry, st->st_nlink);
1284 			if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
1285 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1286 				archive_entry_set_perm(entry, st->st_mode);
1287 			if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
1288 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1289 				archive_entry_set_size(entry, st->st_size);
1290 			archive_entry_set_ino(entry, st->st_ino);
1291 			archive_entry_set_dev(entry, st->st_dev);
1292 
1293 			archive_entry_linkify(mtree->resolver, &entry,
1294 				&sparse_entry);
1295 		} else if (parsed_kws & MTREE_HAS_OPTIONAL) {
1296 			/*
1297 			 * Couldn't open the entry, stat it or the on-disk type
1298 			 * didn't match.  If this entry is optional, just
1299 			 * ignore it and read the next header entry.
1300 			 */
1301 			*use_next = 1;
1302 			return ARCHIVE_OK;
1303 		}
1304 	}
1305 
1306 	mtree->cur_size = archive_entry_size(entry);
1307 	mtree->offset = 0;
1308 
1309 	return r;
1310 }
1311 
1312 /*
1313  * Each line contains a sequence of keywords.
1314  */
1315 static int
1316 parse_line(struct archive_read *a, struct archive_entry *entry,
1317     struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws)
1318 {
1319 	struct mtree_option *iter;
1320 	int r = ARCHIVE_OK, r1;
1321 
1322 	for (iter = mp->options; iter != NULL; iter = iter->next) {
1323 		r1 = parse_keyword(a, mtree, entry, iter, parsed_kws);
1324 		if (r1 < r)
1325 			r = r1;
1326 	}
1327 	if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) {
1328 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1329 		    "Missing type keyword in mtree specification");
1330 		return (ARCHIVE_WARN);
1331 	}
1332 	return (r);
1333 }
1334 
1335 /*
1336  * Device entries have one of the following forms:
1337  *  - raw dev_t
1338  *  - format,major,minor[,subdevice]
1339  * When parsing succeeded, `pdev' will contain the appropriate dev_t value.
1340  */
1341 
1342 /* strsep() is not in C90, but strcspn() is. */
1343 /* Taken from http://unixpapa.com/incnote/string.html */
1344 static char *
1345 la_strsep(char **sp, char *sep)
1346 {
1347 	char *p, *s;
1348 	if (sp == NULL || *sp == NULL || **sp == '\0')
1349 		return(NULL);
1350 	s = *sp;
1351 	p = s + strcspn(s, sep);
1352 	if (*p != '\0')
1353 		*p++ = '\0';
1354 	*sp = p;
1355 	return(s);
1356 }
1357 
1358 static int
1359 parse_device(dev_t *pdev, struct archive *a, char *val)
1360 {
1361 #define MAX_PACK_ARGS 3
1362 	unsigned long numbers[MAX_PACK_ARGS];
1363 	char *p, *dev;
1364 	int argc;
1365 	pack_t *pack;
1366 	dev_t result;
1367 	const char *error = NULL;
1368 
1369 	memset(pdev, 0, sizeof(*pdev));
1370 	if ((dev = strchr(val, ',')) != NULL) {
1371 		/*
1372 		 * Device's major/minor are given in a specified format.
1373 		 * Decode and pack it accordingly.
1374 		 */
1375 		*dev++ = '\0';
1376 		if ((pack = pack_find(val)) == NULL) {
1377 			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1378 			    "Unknown format `%s'", val);
1379 			return ARCHIVE_WARN;
1380 		}
1381 		argc = 0;
1382 		while ((p = la_strsep(&dev, ",")) != NULL) {
1383 			if (*p == '\0') {
1384 				archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1385 				    "Missing number");
1386 				return ARCHIVE_WARN;
1387 			}
1388 			numbers[argc++] = (unsigned long)mtree_atol(&p);
1389 			if (argc > MAX_PACK_ARGS) {
1390 				archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1391 				    "Too many arguments");
1392 				return ARCHIVE_WARN;
1393 			}
1394 		}
1395 		if (argc < 2) {
1396 			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1397 			    "Not enough arguments");
1398 			return ARCHIVE_WARN;
1399 		}
1400 		result = (*pack)(argc, numbers, &error);
1401 		if (error != NULL) {
1402 			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1403 			    "%s", error);
1404 			return ARCHIVE_WARN;
1405 		}
1406 	} else {
1407 		/* file system raw value. */
1408 		result = (dev_t)mtree_atol(&val);
1409 	}
1410 	*pdev = result;
1411 	return ARCHIVE_OK;
1412 #undef MAX_PACK_ARGS
1413 }
1414 
1415 /*
1416  * Parse a single keyword and its value.
1417  */
1418 static int
1419 parse_keyword(struct archive_read *a, struct mtree *mtree,
1420     struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws)
1421 {
1422 	char *val, *key;
1423 
1424 	key = opt->value;
1425 
1426 	if (*key == '\0')
1427 		return (ARCHIVE_OK);
1428 
1429 	if (strcmp(key, "nochange") == 0) {
1430 		*parsed_kws |= MTREE_HAS_NOCHANGE;
1431 		return (ARCHIVE_OK);
1432 	}
1433 	if (strcmp(key, "optional") == 0) {
1434 		*parsed_kws |= MTREE_HAS_OPTIONAL;
1435 		return (ARCHIVE_OK);
1436 	}
1437 	if (strcmp(key, "ignore") == 0) {
1438 		/*
1439 		 * The mtree processing is not recursive, so
1440 		 * recursion will only happen for explicitly listed
1441 		 * entries.
1442 		 */
1443 		return (ARCHIVE_OK);
1444 	}
1445 
1446 	val = strchr(key, '=');
1447 	if (val == NULL) {
1448 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1449 		    "Malformed attribute \"%s\" (%d)", key, key[0]);
1450 		return (ARCHIVE_WARN);
1451 	}
1452 
1453 	*val = '\0';
1454 	++val;
1455 
1456 	switch (key[0]) {
1457 	case 'c':
1458 		if (strcmp(key, "content") == 0
1459 		    || strcmp(key, "contents") == 0) {
1460 			parse_escapes(val, NULL);
1461 			archive_strcpy(&mtree->contents_name, val);
1462 			break;
1463 		}
1464 		if (strcmp(key, "cksum") == 0)
1465 			break;
1466 	case 'd':
1467 		if (strcmp(key, "device") == 0) {
1468 			/* stat(2) st_rdev field, e.g. the major/minor IDs
1469 			 * of a char/block special file */
1470 			int r;
1471 			dev_t dev;
1472 
1473 			*parsed_kws |= MTREE_HAS_DEVICE;
1474 			r = parse_device(&dev, &a->archive, val);
1475 			if (r == ARCHIVE_OK)
1476 				archive_entry_set_rdev(entry, dev);
1477 			return r;
1478 		}
1479 	case 'f':
1480 		if (strcmp(key, "flags") == 0) {
1481 			*parsed_kws |= MTREE_HAS_FFLAGS;
1482 			archive_entry_copy_fflags_text(entry, val);
1483 			break;
1484 		}
1485 	case 'g':
1486 		if (strcmp(key, "gid") == 0) {
1487 			*parsed_kws |= MTREE_HAS_GID;
1488 			archive_entry_set_gid(entry, mtree_atol10(&val));
1489 			break;
1490 		}
1491 		if (strcmp(key, "gname") == 0) {
1492 			*parsed_kws |= MTREE_HAS_GNAME;
1493 			archive_entry_copy_gname(entry, val);
1494 			break;
1495 		}
1496 	case 'i':
1497 		if (strcmp(key, "inode") == 0) {
1498 			archive_entry_set_ino(entry, mtree_atol10(&val));
1499 			break;
1500 		}
1501 	case 'l':
1502 		if (strcmp(key, "link") == 0) {
1503 			archive_entry_copy_symlink(entry, val);
1504 			break;
1505 		}
1506 	case 'm':
1507 		if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0)
1508 			break;
1509 		if (strcmp(key, "mode") == 0) {
1510 			if (val[0] >= '0' && val[0] <= '9') {
1511 				*parsed_kws |= MTREE_HAS_PERM;
1512 				archive_entry_set_perm(entry,
1513 				    (mode_t)mtree_atol8(&val));
1514 			} else {
1515 				archive_set_error(&a->archive,
1516 				    ARCHIVE_ERRNO_FILE_FORMAT,
1517 				    "Symbolic mode \"%s\" unsupported", val);
1518 				return ARCHIVE_WARN;
1519 			}
1520 			break;
1521 		}
1522 	case 'n':
1523 		if (strcmp(key, "nlink") == 0) {
1524 			*parsed_kws |= MTREE_HAS_NLINK;
1525 			archive_entry_set_nlink(entry,
1526 				(unsigned int)mtree_atol10(&val));
1527 			break;
1528 		}
1529 	case 'r':
1530 		if (strcmp(key, "resdevice") == 0) {
1531 			/* stat(2) st_dev field, e.g. the device ID where the
1532 			 * inode resides */
1533 			int r;
1534 			dev_t dev;
1535 
1536 			r = parse_device(&dev, &a->archive, val);
1537 			if (r == ARCHIVE_OK)
1538 				archive_entry_set_dev(entry, dev);
1539 			return r;
1540 		}
1541 		if (strcmp(key, "rmd160") == 0 ||
1542 		    strcmp(key, "rmd160digest") == 0)
1543 			break;
1544 	case 's':
1545 		if (strcmp(key, "sha1") == 0 || strcmp(key, "sha1digest") == 0)
1546 			break;
1547 		if (strcmp(key, "sha256") == 0 ||
1548 		    strcmp(key, "sha256digest") == 0)
1549 			break;
1550 		if (strcmp(key, "sha384") == 0 ||
1551 		    strcmp(key, "sha384digest") == 0)
1552 			break;
1553 		if (strcmp(key, "sha512") == 0 ||
1554 		    strcmp(key, "sha512digest") == 0)
1555 			break;
1556 		if (strcmp(key, "size") == 0) {
1557 			archive_entry_set_size(entry, mtree_atol10(&val));
1558 			break;
1559 		}
1560 	case 't':
1561 		if (strcmp(key, "tags") == 0) {
1562 			/*
1563 			 * Comma delimited list of tags.
1564 			 * Ignore the tags for now, but the interface
1565 			 * should be extended to allow inclusion/exclusion.
1566 			 */
1567 			break;
1568 		}
1569 		if (strcmp(key, "time") == 0) {
1570 			int64_t m;
1571 			int64_t my_time_t_max = get_time_t_max();
1572 			int64_t my_time_t_min = get_time_t_min();
1573 			long ns = 0;
1574 
1575 			*parsed_kws |= MTREE_HAS_MTIME;
1576 			m = mtree_atol10(&val);
1577 			/* Replicate an old mtree bug:
1578 			 * 123456789.1 represents 123456789
1579 			 * seconds and 1 nanosecond. */
1580 			if (*val == '.') {
1581 				++val;
1582 				ns = (long)mtree_atol10(&val);
1583 			} else
1584 				ns = 0;
1585 			if (m > my_time_t_max)
1586 				m = my_time_t_max;
1587 			else if (m < my_time_t_min)
1588 				m = my_time_t_min;
1589 			archive_entry_set_mtime(entry, (time_t)m, ns);
1590 			break;
1591 		}
1592 		if (strcmp(key, "type") == 0) {
1593 			switch (val[0]) {
1594 			case 'b':
1595 				if (strcmp(val, "block") == 0) {
1596 					archive_entry_set_filetype(entry, AE_IFBLK);
1597 					break;
1598 				}
1599 			case 'c':
1600 				if (strcmp(val, "char") == 0) {
1601 					archive_entry_set_filetype(entry,
1602 						AE_IFCHR);
1603 					break;
1604 				}
1605 			case 'd':
1606 				if (strcmp(val, "dir") == 0) {
1607 					archive_entry_set_filetype(entry,
1608 						AE_IFDIR);
1609 					break;
1610 				}
1611 			case 'f':
1612 				if (strcmp(val, "fifo") == 0) {
1613 					archive_entry_set_filetype(entry,
1614 						AE_IFIFO);
1615 					break;
1616 				}
1617 				if (strcmp(val, "file") == 0) {
1618 					archive_entry_set_filetype(entry,
1619 						AE_IFREG);
1620 					break;
1621 				}
1622 			case 'l':
1623 				if (strcmp(val, "link") == 0) {
1624 					archive_entry_set_filetype(entry,
1625 						AE_IFLNK);
1626 					break;
1627 				}
1628 			default:
1629 				archive_set_error(&a->archive,
1630 				    ARCHIVE_ERRNO_FILE_FORMAT,
1631 				    "Unrecognized file type \"%s\"; "
1632 				    "assuming \"file\"", val);
1633 				archive_entry_set_filetype(entry, AE_IFREG);
1634 				return (ARCHIVE_WARN);
1635 			}
1636 			*parsed_kws |= MTREE_HAS_TYPE;
1637 			break;
1638 		}
1639 	case 'u':
1640 		if (strcmp(key, "uid") == 0) {
1641 			*parsed_kws |= MTREE_HAS_UID;
1642 			archive_entry_set_uid(entry, mtree_atol10(&val));
1643 			break;
1644 		}
1645 		if (strcmp(key, "uname") == 0) {
1646 			*parsed_kws |= MTREE_HAS_UNAME;
1647 			archive_entry_copy_uname(entry, val);
1648 			break;
1649 		}
1650 	default:
1651 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1652 		    "Unrecognized key %s=%s", key, val);
1653 		return (ARCHIVE_WARN);
1654 	}
1655 	return (ARCHIVE_OK);
1656 }
1657 
1658 static int
1659 read_data(struct archive_read *a, const void **buff, size_t *size,
1660     int64_t *offset)
1661 {
1662 	size_t bytes_to_read;
1663 	ssize_t bytes_read;
1664 	struct mtree *mtree;
1665 
1666 	mtree = (struct mtree *)(a->format->data);
1667 	if (mtree->fd < 0) {
1668 		*buff = NULL;
1669 		*offset = 0;
1670 		*size = 0;
1671 		return (ARCHIVE_EOF);
1672 	}
1673 	if (mtree->buff == NULL) {
1674 		mtree->buffsize = 64 * 1024;
1675 		mtree->buff = malloc(mtree->buffsize);
1676 		if (mtree->buff == NULL) {
1677 			archive_set_error(&a->archive, ENOMEM,
1678 			    "Can't allocate memory");
1679 			return (ARCHIVE_FATAL);
1680 		}
1681 	}
1682 
1683 	*buff = mtree->buff;
1684 	*offset = mtree->offset;
1685 	if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset)
1686 		bytes_to_read = (size_t)(mtree->cur_size - mtree->offset);
1687 	else
1688 		bytes_to_read = mtree->buffsize;
1689 	bytes_read = read(mtree->fd, mtree->buff, bytes_to_read);
1690 	if (bytes_read < 0) {
1691 		archive_set_error(&a->archive, errno, "Can't read");
1692 		return (ARCHIVE_WARN);
1693 	}
1694 	if (bytes_read == 0) {
1695 		*size = 0;
1696 		return (ARCHIVE_EOF);
1697 	}
1698 	mtree->offset += bytes_read;
1699 	*size = bytes_read;
1700 	return (ARCHIVE_OK);
1701 }
1702 
1703 /* Skip does nothing except possibly close the contents file. */
1704 static int
1705 skip(struct archive_read *a)
1706 {
1707 	struct mtree *mtree;
1708 
1709 	mtree = (struct mtree *)(a->format->data);
1710 	if (mtree->fd >= 0) {
1711 		close(mtree->fd);
1712 		mtree->fd = -1;
1713 	}
1714 	return (ARCHIVE_OK);
1715 }
1716 
1717 /*
1718  * Since parsing backslash sequences always makes strings shorter,
1719  * we can always do this conversion in-place.
1720  */
1721 static void
1722 parse_escapes(char *src, struct mtree_entry *mentry)
1723 {
1724 	char *dest = src;
1725 	char c;
1726 
1727 	if (mentry != NULL && strcmp(src, ".") == 0)
1728 		mentry->full = 1;
1729 
1730 	while (*src != '\0') {
1731 		c = *src++;
1732 		if (c == '/' && mentry != NULL)
1733 			mentry->full = 1;
1734 		if (c == '\\') {
1735 			switch (src[0]) {
1736 			case '0':
1737 				if (src[1] < '0' || src[1] > '7') {
1738 					c = 0;
1739 					++src;
1740 					break;
1741 				}
1742 				/* FALLTHROUGH */
1743 			case '1':
1744 			case '2':
1745 			case '3':
1746 				if (src[1] >= '0' && src[1] <= '7' &&
1747 				    src[2] >= '0' && src[2] <= '7') {
1748 					c = (src[0] - '0') << 6;
1749 					c |= (src[1] - '0') << 3;
1750 					c |= (src[2] - '0');
1751 					src += 3;
1752 				}
1753 				break;
1754 			case 'a':
1755 				c = '\a';
1756 				++src;
1757 				break;
1758 			case 'b':
1759 				c = '\b';
1760 				++src;
1761 				break;
1762 			case 'f':
1763 				c = '\f';
1764 				++src;
1765 				break;
1766 			case 'n':
1767 				c = '\n';
1768 				++src;
1769 				break;
1770 			case 'r':
1771 				c = '\r';
1772 				++src;
1773 				break;
1774 			case 's':
1775 				c = ' ';
1776 				++src;
1777 				break;
1778 			case 't':
1779 				c = '\t';
1780 				++src;
1781 				break;
1782 			case 'v':
1783 				c = '\v';
1784 				++src;
1785 				break;
1786 			case '\\':
1787 				c = '\\';
1788 				++src;
1789 				break;
1790 			}
1791 		}
1792 		*dest++ = c;
1793 	}
1794 	*dest = '\0';
1795 }
1796 
1797 /*
1798  * Note that this implementation does not (and should not!) obey
1799  * locale settings; you cannot simply substitute strtol here, since
1800  * it does obey locale.
1801  */
1802 static int64_t
1803 mtree_atol8(char **p)
1804 {
1805 	int64_t	l, limit, last_digit_limit;
1806 	int digit, base;
1807 
1808 	base = 8;
1809 	limit = INT64_MAX / base;
1810 	last_digit_limit = INT64_MAX % base;
1811 
1812 	l = 0;
1813 	digit = **p - '0';
1814 	while (digit >= 0 && digit < base) {
1815 		if (l>limit || (l == limit && digit > last_digit_limit)) {
1816 			l = INT64_MAX; /* Truncate on overflow. */
1817 			break;
1818 		}
1819 		l = (l * base) + digit;
1820 		digit = *++(*p) - '0';
1821 	}
1822 	return (l);
1823 }
1824 
1825 /*
1826  * Note that this implementation does not (and should not!) obey
1827  * locale settings; you cannot simply substitute strtol here, since
1828  * it does obey locale.
1829  */
1830 static int64_t
1831 mtree_atol10(char **p)
1832 {
1833 	int64_t l, limit, last_digit_limit;
1834 	int base, digit, sign;
1835 
1836 	base = 10;
1837 
1838 	if (**p == '-') {
1839 		sign = -1;
1840 		limit = ((uint64_t)(INT64_MAX) + 1) / base;
1841 		last_digit_limit = ((uint64_t)(INT64_MAX) + 1) % base;
1842 		++(*p);
1843 	} else {
1844 		sign = 1;
1845 		limit = INT64_MAX / base;
1846 		last_digit_limit = INT64_MAX % base;
1847 	}
1848 
1849 	l = 0;
1850 	digit = **p - '0';
1851 	while (digit >= 0 && digit < base) {
1852 		if (l > limit || (l == limit && digit > last_digit_limit))
1853 			return (sign < 0) ? INT64_MIN : INT64_MAX;
1854 		l = (l * base) + digit;
1855 		digit = *++(*p) - '0';
1856 	}
1857 	return (sign < 0) ? -l : l;
1858 }
1859 
1860 /* Parse a hex digit. */
1861 static int
1862 parsehex(char c)
1863 {
1864 	if (c >= '0' && c <= '9')
1865 		return c - '0';
1866 	else if (c >= 'a' && c <= 'f')
1867 		return c - 'a';
1868 	else if (c >= 'A' && c <= 'F')
1869 		return c - 'A';
1870 	else
1871 		return -1;
1872 }
1873 
1874 /*
1875  * Note that this implementation does not (and should not!) obey
1876  * locale settings; you cannot simply substitute strtol here, since
1877  * it does obey locale.
1878  */
1879 static int64_t
1880 mtree_atol16(char **p)
1881 {
1882 	int64_t l, limit, last_digit_limit;
1883 	int base, digit, sign;
1884 
1885 	base = 16;
1886 
1887 	if (**p == '-') {
1888 		sign = -1;
1889 		limit = ((uint64_t)(INT64_MAX) + 1) / base;
1890 		last_digit_limit = ((uint64_t)(INT64_MAX) + 1) % base;
1891 		++(*p);
1892 	} else {
1893 		sign = 1;
1894 		limit = INT64_MAX / base;
1895 		last_digit_limit = INT64_MAX % base;
1896 	}
1897 
1898 	l = 0;
1899 	digit = parsehex(**p);
1900 	while (digit >= 0 && digit < base) {
1901 		if (l > limit || (l == limit && digit > last_digit_limit))
1902 			return (sign < 0) ? INT64_MIN : INT64_MAX;
1903 		l = (l * base) + digit;
1904 		digit = parsehex(*++(*p));
1905 	}
1906 	return (sign < 0) ? -l : l;
1907 }
1908 
1909 static int64_t
1910 mtree_atol(char **p)
1911 {
1912 	if (**p != '0')
1913 		return mtree_atol10(p);
1914 	if ((*p)[1] == 'x' || (*p)[1] == 'X') {
1915 		*p += 2;
1916 		return mtree_atol16(p);
1917 	}
1918 	return mtree_atol8(p);
1919 }
1920 
1921 /*
1922  * Returns length of line (including trailing newline)
1923  * or negative on error.  'start' argument is updated to
1924  * point to first character of line.
1925  */
1926 static ssize_t
1927 readline(struct archive_read *a, struct mtree *mtree, char **start,
1928     ssize_t limit)
1929 {
1930 	ssize_t bytes_read;
1931 	ssize_t total_size = 0;
1932 	ssize_t find_off = 0;
1933 	const void *t;
1934 	void *nl;
1935 	char *u;
1936 
1937 	/* Accumulate line in a line buffer. */
1938 	for (;;) {
1939 		/* Read some more. */
1940 		t = __archive_read_ahead(a, 1, &bytes_read);
1941 		if (t == NULL)
1942 			return (0);
1943 		if (bytes_read < 0)
1944 			return (ARCHIVE_FATAL);
1945 		nl = memchr(t, '\n', bytes_read);
1946 		/* If we found '\n', trim the read to end exactly there. */
1947 		if (nl != NULL) {
1948 			bytes_read = ((const char *)nl) - ((const char *)t) + 1;
1949 		}
1950 		if (total_size + bytes_read + 1 > limit) {
1951 			archive_set_error(&a->archive,
1952 			    ARCHIVE_ERRNO_FILE_FORMAT,
1953 			    "Line too long");
1954 			return (ARCHIVE_FATAL);
1955 		}
1956 		if (archive_string_ensure(&mtree->line,
1957 			total_size + bytes_read + 1) == NULL) {
1958 			archive_set_error(&a->archive, ENOMEM,
1959 			    "Can't allocate working buffer");
1960 			return (ARCHIVE_FATAL);
1961 		}
1962 		/* Append new bytes to string. */
1963 		memcpy(mtree->line.s + total_size, t, bytes_read);
1964 		__archive_read_consume(a, bytes_read);
1965 		total_size += bytes_read;
1966 		mtree->line.s[total_size] = '\0';
1967 
1968 		for (u = mtree->line.s + find_off; *u; ++u) {
1969 			if (u[0] == '\n') {
1970 				/* Ends with unescaped newline. */
1971 				*start = mtree->line.s;
1972 				return total_size;
1973 			} else if (u[0] == '#') {
1974 				/* Ends with comment sequence #...\n */
1975 				if (nl == NULL) {
1976 					/* But we've not found the \n yet */
1977 					break;
1978 				}
1979 			} else if (u[0] == '\\') {
1980 				if (u[1] == '\n') {
1981 					/* Trim escaped newline. */
1982 					total_size -= 2;
1983 					mtree->line.s[total_size] = '\0';
1984 					break;
1985 				} else if (u[1] != '\0') {
1986 					/* Skip the two-char escape sequence */
1987 					++u;
1988 				}
1989 			}
1990 		}
1991 		find_off = u - mtree->line.s;
1992 	}
1993 }
1994