1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * Copyright (c) 2008 Joerg Sonnenberger
4  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "archive_platform.h"
29 
30 #ifdef HAVE_SYS_STAT_H
31 #include <sys/stat.h>
32 #endif
33 #ifdef HAVE_ERRNO_H
34 #include <errno.h>
35 #endif
36 #ifdef HAVE_FCNTL_H
37 #include <fcntl.h>
38 #endif
39 #include <stddef.h>
40 /* #include <stdint.h> */ /* See archive_platform.h */
41 #ifdef HAVE_STDLIB_H
42 #include <stdlib.h>
43 #endif
44 #ifdef HAVE_STRING_H
45 #include <string.h>
46 #endif
47 #ifdef HAVE_CTYPE_H
48 #include <ctype.h>
49 #endif
50 
51 #include "archive.h"
52 #include "archive_entry.h"
53 #include "archive_entry_private.h"
54 #include "archive_private.h"
55 #include "archive_rb.h"
56 #include "archive_read_private.h"
57 #include "archive_string.h"
58 #include "archive_pack_dev.h"
59 
60 #ifndef O_BINARY
61 #define	O_BINARY 0
62 #endif
63 #ifndef O_CLOEXEC
64 #define O_CLOEXEC	0
65 #endif
66 
67 #define	MTREE_HAS_DEVICE	0x0001
68 #define	MTREE_HAS_FFLAGS	0x0002
69 #define	MTREE_HAS_GID		0x0004
70 #define	MTREE_HAS_GNAME		0x0008
71 #define	MTREE_HAS_MTIME		0x0010
72 #define	MTREE_HAS_NLINK		0x0020
73 #define	MTREE_HAS_PERM		0x0040
74 #define	MTREE_HAS_SIZE		0x0080
75 #define	MTREE_HAS_TYPE		0x0100
76 #define	MTREE_HAS_UID		0x0200
77 #define	MTREE_HAS_UNAME		0x0400
78 
79 #define	MTREE_HAS_OPTIONAL	0x0800
80 #define	MTREE_HAS_NOCHANGE	0x1000 /* FreeBSD specific */
81 
82 #define	MAX_LINE_LEN		(1024 * 1024)
83 
84 struct mtree_option {
85 	struct mtree_option *next;
86 	char *value;
87 };
88 
89 struct mtree_entry {
90 	struct archive_rb_node rbnode;
91 	struct mtree_entry *next_dup;
92 	struct mtree_entry *next;
93 	struct mtree_option *options;
94 	char *name;
95 	char full;
96 	char used;
97 };
98 
99 struct mtree {
100 	struct archive_string	 line;
101 	size_t			 buffsize;
102 	char			*buff;
103 	int64_t			 offset;
104 	int			 fd;
105 	int			 archive_format;
106 	const char		*archive_format_name;
107 	struct mtree_entry	*entries;
108 	struct mtree_entry	*this_entry;
109 	struct archive_rb_tree	 entry_rbtree;
110 	struct archive_string	 current_dir;
111 	struct archive_string	 contents_name;
112 
113 	struct archive_entry_linkresolver *resolver;
114 	struct archive_rb_tree rbtree;
115 
116 	int64_t			 cur_size;
117 	char checkfs;
118 };
119 
120 static int	bid_keycmp(const char *, const char *, ssize_t);
121 static int	cleanup(struct archive_read *);
122 static int	detect_form(struct archive_read *, int *);
123 static int	mtree_bid(struct archive_read *, int);
124 static int	parse_file(struct archive_read *, struct archive_entry *,
125 		    struct mtree *, struct mtree_entry *, int *);
126 static void	parse_escapes(char *, struct mtree_entry *);
127 static int	parse_line(struct archive_read *, struct archive_entry *,
128 		    struct mtree *, struct mtree_entry *, int *);
129 static int	parse_keyword(struct archive_read *, struct mtree *,
130 		    struct archive_entry *, struct mtree_option *, int *);
131 static int	read_data(struct archive_read *a,
132 		    const void **buff, size_t *size, int64_t *offset);
133 static ssize_t	readline(struct archive_read *, struct mtree *, char **, ssize_t);
134 static int	skip(struct archive_read *a);
135 static int	read_header(struct archive_read *,
136 		    struct archive_entry *);
137 static int64_t	mtree_atol(char **, int base);
138 #ifndef HAVE_STRNLEN
139 static size_t	mtree_strnlen(const char *, size_t);
140 #endif
141 
142 /*
143  * There's no standard for TIME_T_MAX/TIME_T_MIN.  So we compute them
144  * here.  TODO: Move this to configure time, but be careful
145  * about cross-compile environments.
146  */
147 static int64_t
get_time_t_max(void)148 get_time_t_max(void)
149 {
150 #if defined(TIME_T_MAX)
151 	return TIME_T_MAX;
152 #else
153 	/* ISO C allows time_t to be a floating-point type,
154 	   but POSIX requires an integer type.  The following
155 	   should work on any system that follows the POSIX
156 	   conventions. */
157 	if (((time_t)0) < ((time_t)-1)) {
158 		/* Time_t is unsigned */
159 		return (~(time_t)0);
160 	} else {
161 		/* Time_t is signed. */
162 		/* Assume it's the same as int64_t or int32_t */
163 		if (sizeof(time_t) == sizeof(int64_t)) {
164 			return (time_t)INT64_MAX;
165 		} else {
166 			return (time_t)INT32_MAX;
167 		}
168 	}
169 #endif
170 }
171 
172 static int64_t
get_time_t_min(void)173 get_time_t_min(void)
174 {
175 #if defined(TIME_T_MIN)
176 	return TIME_T_MIN;
177 #else
178 	if (((time_t)0) < ((time_t)-1)) {
179 		/* Time_t is unsigned */
180 		return (time_t)0;
181 	} else {
182 		/* Time_t is signed. */
183 		if (sizeof(time_t) == sizeof(int64_t)) {
184 			return (time_t)INT64_MIN;
185 		} else {
186 			return (time_t)INT32_MIN;
187 		}
188 	}
189 #endif
190 }
191 
192 #ifdef HAVE_STRNLEN
193 #define mtree_strnlen(a,b) strnlen(a,b)
194 #else
195 static size_t
mtree_strnlen(const char * p,size_t maxlen)196 mtree_strnlen(const char *p, size_t maxlen)
197 {
198 	size_t i;
199 
200 	for (i = 0; i <= maxlen; i++) {
201 		if (p[i] == 0)
202 			break;
203 	}
204 	if (i > maxlen)
205 		return (-1);/* invalid */
206 	return (i);
207 }
208 #endif
209 
210 static int
archive_read_format_mtree_options(struct archive_read * a,const char * key,const char * val)211 archive_read_format_mtree_options(struct archive_read *a,
212     const char *key, const char *val)
213 {
214 	struct mtree *mtree;
215 
216 	mtree = (struct mtree *)(a->format->data);
217 	if (strcmp(key, "checkfs")  == 0) {
218 		/* Allows to read information missing from the mtree from the file system */
219 		if (val == NULL || val[0] == 0) {
220 			mtree->checkfs = 0;
221 		} else {
222 			mtree->checkfs = 1;
223 		}
224 		return (ARCHIVE_OK);
225 	}
226 
227 	/* Note: The "warn" return is just to inform the options
228 	 * supervisor that we didn't handle it.  It will generate
229 	 * a suitable error if no one used this option. */
230 	return (ARCHIVE_WARN);
231 }
232 
233 static void
free_options(struct mtree_option * head)234 free_options(struct mtree_option *head)
235 {
236 	struct mtree_option *next;
237 
238 	for (; head != NULL; head = next) {
239 		next = head->next;
240 		free(head->value);
241 		free(head);
242 	}
243 }
244 
245 static int
mtree_cmp_node(const struct archive_rb_node * n1,const struct archive_rb_node * n2)246 mtree_cmp_node(const struct archive_rb_node *n1,
247     const struct archive_rb_node *n2)
248 {
249 	const struct mtree_entry *e1 = (const struct mtree_entry *)n1;
250 	const struct mtree_entry *e2 = (const struct mtree_entry *)n2;
251 
252 	return (strcmp(e1->name, e2->name));
253 }
254 
255 static int
mtree_cmp_key(const struct archive_rb_node * n,const void * key)256 mtree_cmp_key(const struct archive_rb_node *n, const void *key)
257 {
258 	const struct mtree_entry *e = (const struct mtree_entry *)n;
259 
260 	return (strcmp(e->name, key));
261 }
262 
263 int
archive_read_support_format_mtree(struct archive * _a)264 archive_read_support_format_mtree(struct archive *_a)
265 {
266 	static const struct archive_rb_tree_ops rb_ops = {
267 		mtree_cmp_node, mtree_cmp_key,
268 	};
269 	struct archive_read *a = (struct archive_read *)_a;
270 	struct mtree *mtree;
271 	int r;
272 
273 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
274 	    ARCHIVE_STATE_NEW, "archive_read_support_format_mtree");
275 
276 	mtree = (struct mtree *)calloc(1, sizeof(*mtree));
277 	if (mtree == NULL) {
278 		archive_set_error(&a->archive, ENOMEM,
279 		    "Can't allocate mtree data");
280 		return (ARCHIVE_FATAL);
281 	}
282 	mtree->checkfs = 0;
283 	mtree->fd = -1;
284 
285 	__archive_rb_tree_init(&mtree->rbtree, &rb_ops);
286 
287 	r = __archive_read_register_format(a, mtree, "mtree",
288            mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup, NULL, NULL);
289 
290 	if (r != ARCHIVE_OK)
291 		free(mtree);
292 	return (ARCHIVE_OK);
293 }
294 
295 static int
cleanup(struct archive_read * a)296 cleanup(struct archive_read *a)
297 {
298 	struct mtree *mtree;
299 	struct mtree_entry *p, *q;
300 
301 	mtree = (struct mtree *)(a->format->data);
302 
303 	p = mtree->entries;
304 	while (p != NULL) {
305 		q = p->next;
306 		free(p->name);
307 		free_options(p->options);
308 		free(p);
309 		p = q;
310 	}
311 	archive_string_free(&mtree->line);
312 	archive_string_free(&mtree->current_dir);
313 	archive_string_free(&mtree->contents_name);
314 	archive_entry_linkresolver_free(mtree->resolver);
315 
316 	free(mtree->buff);
317 	free(mtree);
318 	(a->format->data) = NULL;
319 	return (ARCHIVE_OK);
320 }
321 
322 static ssize_t
get_line_size(const char * b,ssize_t avail,ssize_t * nlsize)323 get_line_size(const char *b, ssize_t avail, ssize_t *nlsize)
324 {
325 	ssize_t len;
326 
327 	len = 0;
328 	while (len < avail) {
329 		switch (*b) {
330 		case '\0':/* Non-ascii character or control character. */
331 			if (nlsize != NULL)
332 				*nlsize = 0;
333 			return (-1);
334 		case '\r':
335 			if (avail-len > 1 && b[1] == '\n') {
336 				if (nlsize != NULL)
337 					*nlsize = 2;
338 				return (len+2);
339 			}
340 			/* FALL THROUGH */
341 		case '\n':
342 			if (nlsize != NULL)
343 				*nlsize = 1;
344 			return (len+1);
345 		default:
346 			b++;
347 			len++;
348 			break;
349 		}
350 	}
351 	if (nlsize != NULL)
352 		*nlsize = 0;
353 	return (avail);
354 }
355 
356 /*
357  *  <---------------- ravail --------------------->
358  *  <-- diff ------> <---  avail ----------------->
359  *                   <---- len ----------->
360  * | Previous lines | line being parsed  nl extra |
361  *                  ^
362  *                  b
363  *
364  */
365 static ssize_t
next_line(struct archive_read * a,const char ** b,ssize_t * avail,ssize_t * ravail,ssize_t * nl)366 next_line(struct archive_read *a,
367     const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl)
368 {
369 	ssize_t len;
370 	int quit;
371 
372 	quit = 0;
373 	if (*avail == 0) {
374 		*nl = 0;
375 		len = 0;
376 	} else
377 		len = get_line_size(*b, *avail, nl);
378 	/*
379 	 * Read bytes more while it does not reach the end of line.
380 	 */
381 	while (*nl == 0 && len == *avail && !quit) {
382 		ssize_t diff = *ravail - *avail;
383 		size_t nbytes_req = (*ravail+1023) & ~1023U;
384 		ssize_t tested;
385 
386 		/*
387 		 * Place an arbitrary limit on the line length.
388 		 * mtree is almost free-form input and without line length limits,
389 		 * it can consume a lot of memory.
390 		 */
391 		if (len >= MAX_LINE_LEN)
392 			return (-1);
393 
394 		/* Increase reading bytes if it is not enough to at least
395 		 * new two lines. */
396 		if (nbytes_req < (size_t)*ravail + 160)
397 			nbytes_req <<= 1;
398 
399 		*b = __archive_read_ahead(a, nbytes_req, avail);
400 		if (*b == NULL) {
401 			if (*ravail >= *avail)
402 				return (0);
403 			/* Reading bytes reaches the end of file. */
404 			*b = __archive_read_ahead(a, *avail, avail);
405 			quit = 1;
406 		}
407 		*ravail = *avail;
408 		*b += diff;
409 		*avail -= diff;
410 		tested = len;/* Skip some bytes we already determined. */
411 		len = get_line_size(*b + len, *avail - len, nl);
412 		if (len >= 0)
413 			len += tested;
414 	}
415 	return (len);
416 }
417 
418 /*
419  * Compare characters with an mtree keyword.
420  * Returns the length of an mtree keyword if matched.
421  * Returns 0 if not matched.
422  */
423 static int
bid_keycmp(const char * p,const char * key,ssize_t len)424 bid_keycmp(const char *p, const char *key, ssize_t len)
425 {
426 	int match_len = 0;
427 
428 	while (len > 0 && *p && *key) {
429 		if (*p == *key) {
430 			--len;
431 			++p;
432 			++key;
433 			++match_len;
434 			continue;
435 		}
436 		return (0);/* Not match */
437 	}
438 	if (*key != '\0')
439 		return (0);/* Not match */
440 
441 	/* A following character should be specified characters */
442 	if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' ||
443 	    p[0] == '\n' || p[0] == '\r' ||
444 	   (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')))
445 		return (match_len);
446 	return (0);/* Not match */
447 }
448 
449 /*
450  * Test whether the characters 'p' has is mtree keyword.
451  * Returns the length of a detected keyword.
452  * Returns 0 if any keywords were not found.
453  */
454 static int
bid_keyword(const char * p,ssize_t len)455 bid_keyword(const char *p,  ssize_t len)
456 {
457 	static const char * const keys_c[] = {
458 		"content", "contents", "cksum", NULL
459 	};
460 	static const char * const keys_df[] = {
461 		"device", "flags", NULL
462 	};
463 	static const char * const keys_g[] = {
464 		"gid", "gname", NULL
465 	};
466 	static const char * const keys_il[] = {
467 		"ignore", "inode", "link", NULL
468 	};
469 	static const char * const keys_m[] = {
470 		"md5", "md5digest", "mode", NULL
471 	};
472 	static const char * const keys_no[] = {
473 		"nlink", "nochange", "optional", NULL
474 	};
475 	static const char * const keys_r[] = {
476 		"resdevice", "rmd160", "rmd160digest", NULL
477 	};
478 	static const char * const keys_s[] = {
479 		"sha1", "sha1digest",
480 		"sha256", "sha256digest",
481 		"sha384", "sha384digest",
482 		"sha512", "sha512digest",
483 		"size", NULL
484 	};
485 	static const char * const keys_t[] = {
486 		"tags", "time", "type", NULL
487 	};
488 	static const char * const keys_u[] = {
489 		"uid", "uname",	NULL
490 	};
491 	const char * const *keys;
492 	int i;
493 
494 	switch (*p) {
495 	case 'c': keys = keys_c; break;
496 	case 'd': case 'f': keys = keys_df; break;
497 	case 'g': keys = keys_g; break;
498 	case 'i': case 'l': keys = keys_il; break;
499 	case 'm': keys = keys_m; break;
500 	case 'n': case 'o': keys = keys_no; break;
501 	case 'r': keys = keys_r; break;
502 	case 's': keys = keys_s; break;
503 	case 't': keys = keys_t; break;
504 	case 'u': keys = keys_u; break;
505 	default: return (0);/* Unknown key */
506 	}
507 
508 	for (i = 0; keys[i] != NULL; i++) {
509 		int l = bid_keycmp(p, keys[i], len);
510 		if (l > 0)
511 			return (l);
512 	}
513 	return (0);/* Unknown key */
514 }
515 
516 /*
517  * Test whether there is a set of mtree keywords.
518  * Returns the number of keywords.
519  * Returns -1 if we got incorrect sequence.
520  * This function expects a set of "<space characters>keyword=value".
521  * When "unset" is specified, expects a set of "<space characters>keyword".
522  */
523 static int
bid_keyword_list(const char * p,ssize_t len,int unset,int last_is_path)524 bid_keyword_list(const char *p,  ssize_t len, int unset, int last_is_path)
525 {
526 	int l;
527 	int keycnt = 0;
528 
529 	while (len > 0 && *p) {
530 		int blank = 0;
531 
532 		/* Test whether there are blank characters in the line. */
533 		while (len >0 && (*p == ' ' || *p == '\t')) {
534 			++p;
535 			--len;
536 			blank = 1;
537 		}
538 		if (*p == '\n' || *p == '\r')
539 			break;
540 		if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))
541 			break;
542 		if (!blank && !last_is_path) /* No blank character. */
543 			return (-1);
544 		if (last_is_path && len == 0)
545 				return (keycnt);
546 
547 		if (unset) {
548 			l = bid_keycmp(p, "all", len);
549 			if (l > 0)
550 				return (1);
551 		}
552 		/* Test whether there is a correct key in the line. */
553 		l = bid_keyword(p, len);
554 		if (l == 0)
555 			return (-1);/* Unknown keyword was found. */
556 		p += l;
557 		len -= l;
558 		keycnt++;
559 
560 		/* Skip value */
561 		if (*p == '=') {
562 			int value = 0;
563 			++p;
564 			--len;
565 			while (len > 0 && *p != ' ' && *p != '\t') {
566 				++p;
567 				--len;
568 				value = 1;
569 			}
570 			/* A keyword should have a its value unless
571 			 * "/unset" operation. */
572 			if (!unset && value == 0)
573 				return (-1);
574 		}
575 	}
576 	return (keycnt);
577 }
578 
579 static int
bid_entry(const char * p,ssize_t len,ssize_t nl,int * last_is_path)580 bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path)
581 {
582 	int f = 0;
583 	static const unsigned char safe_char[256] = {
584 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
585 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
586 		/* !"$%&'()*+,-./  EXCLUSION:( )(#) */
587 		0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
588 		/* 0123456789:;<>?  EXCLUSION:(=) */
589 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */
590 		/* @ABCDEFGHIJKLMNO */
591 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
592 		/* PQRSTUVWXYZ[\]^_  */
593 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
594 		/* `abcdefghijklmno */
595 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
596 		/* pqrstuvwxyz{|}~ */
597 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */
598 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
599 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
600 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
601 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
602 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
603 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
604 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
605 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
606 	};
607 	ssize_t ll;
608 	const char *pp = p;
609 	const char * const pp_end = pp + len;
610 
611 	*last_is_path = 0;
612 	/*
613 	 * Skip the path-name which is quoted.
614 	 */
615 	for (;pp < pp_end; ++pp) {
616 		if (!safe_char[*(const unsigned char *)pp]) {
617 			if (*pp != ' ' && *pp != '\t' && *pp != '\r'
618 			    && *pp != '\n')
619 				f = 0;
620 			break;
621 		}
622 		f = 1;
623 	}
624 	ll = pp_end - pp;
625 
626 	/* If a path-name was not found at the first, try to check
627 	 * a mtree format(a.k.a form D) ``NetBSD's mtree -D'' creates,
628 	 * which places the path-name at the last. */
629 	if (f == 0) {
630 		const char *pb = p + len - nl;
631 		int name_len = 0;
632 		int slash;
633 
634 		/* The form D accepts only a single line for an entry. */
635 		if (pb-2 >= p &&
636 		    pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t'))
637 			return (-1);
638 		if (pb-1 >= p && pb[-1] == '\\')
639 			return (-1);
640 
641 		slash = 0;
642 		while (p <= --pb && *pb != ' ' && *pb != '\t') {
643 			if (!safe_char[*(const unsigned char *)pb])
644 				return (-1);
645 			name_len++;
646 			/* The pathname should have a slash in this
647 			 * format. */
648 			if (*pb == '/')
649 				slash = 1;
650 		}
651 		if (name_len == 0 || slash == 0)
652 			return (-1);
653 		/* If '/' is placed at the first in this field, this is not
654 		 * a valid filename. */
655 		if (pb[1] == '/')
656 			return (-1);
657 		ll = len - nl - name_len;
658 		pp = p;
659 		*last_is_path = 1;
660 	}
661 
662 	return (bid_keyword_list(pp, ll, 0, *last_is_path));
663 }
664 
665 #define MAX_BID_ENTRY	3
666 
667 static int
mtree_bid(struct archive_read * a,int best_bid)668 mtree_bid(struct archive_read *a, int best_bid)
669 {
670 	const char *signature = "#mtree";
671 	const char *p;
672 
673 	(void)best_bid; /* UNUSED */
674 
675 	/* Now let's look at the actual header and see if it matches. */
676 	p = __archive_read_ahead(a, strlen(signature), NULL);
677 	if (p == NULL)
678 		return (-1);
679 
680 	if (memcmp(p, signature, strlen(signature)) == 0)
681 		return (8 * (int)strlen(signature));
682 
683 	/*
684 	 * There is not a mtree signature. Let's try to detect mtree format.
685 	 */
686 	return (detect_form(a, NULL));
687 }
688 
689 static int
detect_form(struct archive_read * a,int * is_form_d)690 detect_form(struct archive_read *a, int *is_form_d)
691 {
692 	const char *p;
693 	ssize_t avail, ravail;
694 	ssize_t len, nl;
695 	int entry_cnt = 0, multiline = 0;
696 	int form_D = 0;/* The archive is generated by `NetBSD mtree -D'
697 			* (In this source we call it `form D') . */
698 
699 	if (is_form_d != NULL)
700 		*is_form_d = 0;
701 	p = __archive_read_ahead(a, 1, &avail);
702 	if (p == NULL)
703 		return (-1);
704 	ravail = avail;
705 	for (;;) {
706 		len = next_line(a, &p, &avail, &ravail, &nl);
707 		/* The terminal character of the line should be
708 		 * a new line character, '\r\n' or '\n'. */
709 		if (len <= 0 || nl == 0)
710 			break;
711 		if (!multiline) {
712 			/* Leading whitespace is never significant,
713 			 * ignore it. */
714 			while (len > 0 && (*p == ' ' || *p == '\t')) {
715 				++p;
716 				--avail;
717 				--len;
718 			}
719 			/* Skip comment or empty line. */
720 			if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') {
721 				p += len;
722 				avail -= len;
723 				continue;
724 			}
725 		} else {
726 			/* A continuance line; the terminal
727 			 * character of previous line was '\' character. */
728 			if (bid_keyword_list(p, len, 0, 0) <= 0)
729 				break;
730 			if (p[len-nl-1] != '\\') {
731 				if (multiline == 1 &&
732 				    ++entry_cnt >= MAX_BID_ENTRY)
733 					break;
734 				multiline = 0;
735 			}
736 			p += len;
737 			avail -= len;
738 			continue;
739 		}
740 		if (p[0] != '/') {
741 			int last_is_path, keywords;
742 
743 			keywords = bid_entry(p, len, nl, &last_is_path);
744 			if (keywords >= 0) {
745 				if (form_D == 0) {
746 					if (last_is_path)
747 						form_D = 1;
748 					else if (keywords > 0)
749 						/* This line is not `form D'. */
750 						form_D = -1;
751 				} else if (form_D == 1) {
752 					if (!last_is_path && keywords > 0)
753 						/* This this is not `form D'
754 						 * and We cannot accept mixed
755 						 * format. */
756 						break;
757 				}
758 				if (!last_is_path && p[len-nl-1] == '\\')
759 					/* This line continues. */
760 					multiline = 1;
761 				else {
762 					/* We've got plenty of correct lines
763 					 * to assume that this file is an mtree
764 					 * format. */
765 					if (++entry_cnt >= MAX_BID_ENTRY)
766 						break;
767 				}
768 			} else
769 				break;
770 		} else if (len > 4 && strncmp(p, "/set", 4) == 0) {
771 			if (bid_keyword_list(p+4, len-4, 0, 0) <= 0)
772 				break;
773 			/* This line continues. */
774 			if (p[len-nl-1] == '\\')
775 				multiline = 2;
776 		} else if (len > 6 && strncmp(p, "/unset", 6) == 0) {
777 			if (bid_keyword_list(p+6, len-6, 1, 0) <= 0)
778 				break;
779 			/* This line continues. */
780 			if (p[len-nl-1] == '\\')
781 				multiline = 2;
782 		} else
783 			break;
784 
785 		/* Test next line. */
786 		p += len;
787 		avail -= len;
788 	}
789 	if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) {
790 		if (is_form_d != NULL) {
791 			if (form_D == 1)
792 				*is_form_d = 1;
793 		}
794 		return (32);
795 	}
796 
797 	return (0);
798 }
799 
800 /*
801  * The extended mtree format permits multiple lines specifying
802  * attributes for each file.  For those entries, only the last line
803  * is actually used.  Practically speaking, that means we have
804  * to read the entire mtree file into memory up front.
805  *
806  * The parsing is done in two steps.  First, it is decided if a line
807  * changes the global defaults and if it is, processed accordingly.
808  * Otherwise, the options of the line are merged with the current
809  * global options.
810  */
811 static int
add_option(struct archive_read * a,struct mtree_option ** global,const char * value,size_t len)812 add_option(struct archive_read *a, struct mtree_option **global,
813     const char *value, size_t len)
814 {
815 	struct mtree_option *opt;
816 
817 	if ((opt = malloc(sizeof(*opt))) == NULL) {
818 		archive_set_error(&a->archive, errno, "Can't allocate memory");
819 		return (ARCHIVE_FATAL);
820 	}
821 	if ((opt->value = malloc(len + 1)) == NULL) {
822 		free(opt);
823 		archive_set_error(&a->archive, errno, "Can't allocate memory");
824 		return (ARCHIVE_FATAL);
825 	}
826 	memcpy(opt->value, value, len);
827 	opt->value[len] = '\0';
828 	opt->next = *global;
829 	*global = opt;
830 	return (ARCHIVE_OK);
831 }
832 
833 static void
remove_option(struct mtree_option ** global,const char * value,size_t len)834 remove_option(struct mtree_option **global, const char *value, size_t len)
835 {
836 	struct mtree_option *iter, *last;
837 
838 	last = NULL;
839 	for (iter = *global; iter != NULL; last = iter, iter = iter->next) {
840 		if (strncmp(iter->value, value, len) == 0 &&
841 		    (iter->value[len] == '\0' ||
842 		     iter->value[len] == '='))
843 			break;
844 	}
845 	if (iter == NULL)
846 		return;
847 	if (last == NULL)
848 		*global = iter->next;
849 	else
850 		last->next = iter->next;
851 
852 	free(iter->value);
853 	free(iter);
854 }
855 
856 static int
process_global_set(struct archive_read * a,struct mtree_option ** global,const char * line)857 process_global_set(struct archive_read *a,
858     struct mtree_option **global, const char *line)
859 {
860 	const char *next, *eq;
861 	size_t len;
862 	int r;
863 
864 	line += 4;
865 	for (;;) {
866 		next = line + strspn(line, " \t\r\n");
867 		if (*next == '\0')
868 			return (ARCHIVE_OK);
869 		line = next;
870 		next = line + strcspn(line, " \t\r\n");
871 		eq = strchr(line, '=');
872 		if (eq > next)
873 			len = next - line;
874 		else
875 			len = eq - line;
876 
877 		remove_option(global, line, len);
878 		r = add_option(a, global, line, next - line);
879 		if (r != ARCHIVE_OK)
880 			return (r);
881 		line = next;
882 	}
883 }
884 
885 static int
process_global_unset(struct archive_read * a,struct mtree_option ** global,const char * line)886 process_global_unset(struct archive_read *a,
887     struct mtree_option **global, const char *line)
888 {
889 	const char *next;
890 	size_t len;
891 
892 	line += 6;
893 	if (strchr(line, '=') != NULL) {
894 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
895 		    "/unset shall not contain `='");
896 		return ARCHIVE_FATAL;
897 	}
898 
899 	for (;;) {
900 		next = line + strspn(line, " \t\r\n");
901 		if (*next == '\0')
902 			return (ARCHIVE_OK);
903 		line = next;
904 		len = strcspn(line, " \t\r\n");
905 
906 		if (len == 3 && strncmp(line, "all", 3) == 0) {
907 			free_options(*global);
908 			*global = NULL;
909 		} else {
910 			remove_option(global, line, len);
911 		}
912 
913 		line += len;
914 	}
915 }
916 
917 static int
process_add_entry(struct archive_read * a,struct mtree * mtree,struct mtree_option ** global,const char * line,ssize_t line_len,struct mtree_entry ** last_entry,int is_form_d)918 process_add_entry(struct archive_read *a, struct mtree *mtree,
919     struct mtree_option **global, const char *line, ssize_t line_len,
920     struct mtree_entry **last_entry, int is_form_d)
921 {
922 	struct mtree_entry *entry;
923 	struct mtree_option *iter;
924 	const char *next, *eq, *name, *end;
925 	size_t name_len, len;
926 	int r, i;
927 
928 	if ((entry = malloc(sizeof(*entry))) == NULL) {
929 		archive_set_error(&a->archive, errno, "Can't allocate memory");
930 		return (ARCHIVE_FATAL);
931 	}
932 	entry->next = NULL;
933 	entry->options = NULL;
934 	entry->name = NULL;
935 	entry->used = 0;
936 	entry->full = 0;
937 
938 	/* Add this entry to list. */
939 	if (*last_entry == NULL)
940 		mtree->entries = entry;
941 	else
942 		(*last_entry)->next = entry;
943 	*last_entry = entry;
944 
945 	if (is_form_d) {
946 		/* Filename is last item on line. */
947 		/* Adjust line_len to trim trailing whitespace */
948 		while (line_len > 0) {
949 			char last_character = line[line_len - 1];
950 			if (last_character == '\r'
951 			    || last_character == '\n'
952 			    || last_character == '\t'
953 			    || last_character == ' ') {
954 				line_len--;
955 			} else {
956 				break;
957 			}
958 		}
959 		/* Name starts after the last whitespace separator */
960 		name = line;
961 		for (i = 0; i < line_len; i++) {
962 			if (line[i] == '\r'
963 			    || line[i] == '\n'
964 			    || line[i] == '\t'
965 			    || line[i] == ' ') {
966 				name = line + i + 1;
967 			}
968 		}
969 		name_len = line + line_len - name;
970 		end = name;
971 	} else {
972 		/* Filename is first item on line */
973 		name_len = strcspn(line, " \t\r\n");
974 		name = line;
975 		line += name_len;
976 		end = line + line_len;
977 	}
978 	/* name/name_len is the name within the line. */
979 	/* line..end brackets the entire line except the name */
980 
981 	if ((entry->name = malloc(name_len + 1)) == NULL) {
982 		archive_set_error(&a->archive, errno, "Can't allocate memory");
983 		return (ARCHIVE_FATAL);
984 	}
985 
986 	memcpy(entry->name, name, name_len);
987 	entry->name[name_len] = '\0';
988 	parse_escapes(entry->name, entry);
989 
990 	entry->next_dup = NULL;
991 	if (entry->full) {
992 		if (!__archive_rb_tree_insert_node(&mtree->rbtree, &entry->rbnode)) {
993 			struct mtree_entry *alt;
994 			alt = (struct mtree_entry *)__archive_rb_tree_find_node(
995 			    &mtree->rbtree, entry->name);
996 			if (alt != NULL) {
997 				while (alt->next_dup)
998 					alt = alt->next_dup;
999 				alt->next_dup = entry;
1000 			}
1001 		}
1002 	}
1003 
1004 	for (iter = *global; iter != NULL; iter = iter->next) {
1005 		r = add_option(a, &entry->options, iter->value,
1006 		    strlen(iter->value));
1007 		if (r != ARCHIVE_OK)
1008 			return (r);
1009 	}
1010 
1011 	for (;;) {
1012 		next = line + strspn(line, " \t\r\n");
1013 		if (*next == '\0')
1014 			return (ARCHIVE_OK);
1015 		if (next >= end)
1016 			return (ARCHIVE_OK);
1017 		line = next;
1018 		next = line + strcspn(line, " \t\r\n");
1019 		eq = strchr(line, '=');
1020 		if (eq == NULL || eq > next)
1021 			len = next - line;
1022 		else
1023 			len = eq - line;
1024 
1025 		remove_option(&entry->options, line, len);
1026 		r = add_option(a, &entry->options, line, next - line);
1027 		if (r != ARCHIVE_OK)
1028 			return (r);
1029 		line = next;
1030 	}
1031 }
1032 
1033 static int
read_mtree(struct archive_read * a,struct mtree * mtree)1034 read_mtree(struct archive_read *a, struct mtree *mtree)
1035 {
1036 	ssize_t len;
1037 	uintmax_t counter;
1038 	char *p, *s;
1039 	struct mtree_option *global;
1040 	struct mtree_entry *last_entry;
1041 	int r, is_form_d;
1042 
1043 	mtree->archive_format = ARCHIVE_FORMAT_MTREE;
1044 	mtree->archive_format_name = "mtree";
1045 
1046 	global = NULL;
1047 	last_entry = NULL;
1048 
1049 	(void)detect_form(a, &is_form_d);
1050 
1051 	for (counter = 1; ; ++counter) {
1052 		r = ARCHIVE_OK;
1053 		len = readline(a, mtree, &p, 65536);
1054 		if (len == 0) {
1055 			mtree->this_entry = mtree->entries;
1056 			free_options(global);
1057 			return (ARCHIVE_OK);
1058 		}
1059 		if (len < 0) {
1060 			free_options(global);
1061 			return ((int)len);
1062 		}
1063 		/* Leading whitespace is never significant, ignore it. */
1064 		while (*p == ' ' || *p == '\t') {
1065 			++p;
1066 			--len;
1067 		}
1068 		/* Skip content lines and blank lines. */
1069 		if (*p == '#')
1070 			continue;
1071 		if (*p == '\r' || *p == '\n' || *p == '\0')
1072 			continue;
1073 		/* Non-printable characters are not allowed */
1074 		for (s = p;s < p + len - 1; s++) {
1075 			if (!isprint((unsigned char)*s) && *s != '\t') {
1076 				r = ARCHIVE_FATAL;
1077 				break;
1078 			}
1079 		}
1080 		if (r != ARCHIVE_OK)
1081 			break;
1082 		if (*p != '/') {
1083 			r = process_add_entry(a, mtree, &global, p, len,
1084 			    &last_entry, is_form_d);
1085 		} else if (len > 4 && strncmp(p, "/set", 4) == 0) {
1086 			if (p[4] != ' ' && p[4] != '\t')
1087 				break;
1088 			r = process_global_set(a, &global, p);
1089 		} else if (len > 6 && strncmp(p, "/unset", 6) == 0) {
1090 			if (p[6] != ' ' && p[6] != '\t')
1091 				break;
1092 			r = process_global_unset(a, &global, p);
1093 		} else
1094 			break;
1095 
1096 		if (r != ARCHIVE_OK) {
1097 			free_options(global);
1098 			return r;
1099 		}
1100 	}
1101 
1102 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1103 	    "Can't parse line %ju", counter);
1104 	free_options(global);
1105 	return (ARCHIVE_FATAL);
1106 }
1107 
1108 /*
1109  * Read in the entire mtree file into memory on the first request.
1110  * Then use the next unused file to satisfy each header request.
1111  */
1112 static int
read_header(struct archive_read * a,struct archive_entry * entry)1113 read_header(struct archive_read *a, struct archive_entry *entry)
1114 {
1115 	struct mtree *mtree;
1116 	char *p;
1117 	int r, use_next;
1118 
1119 	mtree = (struct mtree *)(a->format->data);
1120 
1121 	if (mtree->fd >= 0) {
1122 		close(mtree->fd);
1123 		mtree->fd = -1;
1124 	}
1125 
1126 	if (mtree->entries == NULL) {
1127 		mtree->resolver = archive_entry_linkresolver_new();
1128 		if (mtree->resolver == NULL)
1129 			return ARCHIVE_FATAL;
1130 		archive_entry_linkresolver_set_strategy(mtree->resolver,
1131 		    ARCHIVE_FORMAT_MTREE);
1132 		r = read_mtree(a, mtree);
1133 		if (r != ARCHIVE_OK)
1134 			return (r);
1135 	}
1136 
1137 	a->archive.archive_format = mtree->archive_format;
1138 	a->archive.archive_format_name = mtree->archive_format_name;
1139 
1140 	for (;;) {
1141 		if (mtree->this_entry == NULL)
1142 			return (ARCHIVE_EOF);
1143 		if (strcmp(mtree->this_entry->name, "..") == 0) {
1144 			mtree->this_entry->used = 1;
1145 			if (archive_strlen(&mtree->current_dir) > 0) {
1146 				/* Roll back current path. */
1147 				p = mtree->current_dir.s
1148 				    + mtree->current_dir.length - 1;
1149 				while (p >= mtree->current_dir.s && *p != '/')
1150 					--p;
1151 				if (p >= mtree->current_dir.s)
1152 					--p;
1153 				mtree->current_dir.length
1154 				    = p - mtree->current_dir.s + 1;
1155 			}
1156 		}
1157 		if (!mtree->this_entry->used) {
1158 			use_next = 0;
1159 			r = parse_file(a, entry, mtree, mtree->this_entry,
1160 				&use_next);
1161 			if (use_next == 0)
1162 				return (r);
1163 		}
1164 		mtree->this_entry = mtree->this_entry->next;
1165 	}
1166 }
1167 
1168 /*
1169  * A single file can have multiple lines contribute specifications.
1170  * Parse as many lines as necessary, then pull additional information
1171  * from a backing file on disk as necessary.
1172  */
1173 static int
parse_file(struct archive_read * a,struct archive_entry * entry,struct mtree * mtree,struct mtree_entry * mentry,int * use_next)1174 parse_file(struct archive_read *a, struct archive_entry *entry,
1175     struct mtree *mtree, struct mtree_entry *mentry, int *use_next)
1176 {
1177 	const char *path;
1178 	struct stat st_storage, *st;
1179 	struct mtree_entry *mp;
1180 	struct archive_entry *sparse_entry;
1181 	int r = ARCHIVE_OK, r1, parsed_kws;
1182 
1183 	mentry->used = 1;
1184 
1185 	/* Initialize reasonable defaults. */
1186 	archive_entry_set_filetype(entry, AE_IFREG);
1187 	archive_entry_set_size(entry, 0);
1188 	archive_string_empty(&mtree->contents_name);
1189 
1190 	/* Parse options from this line. */
1191 	parsed_kws = 0;
1192 	r = parse_line(a, entry, mtree, mentry, &parsed_kws);
1193 
1194 	if (mentry->full) {
1195 		archive_entry_copy_pathname(entry, mentry->name);
1196 		/*
1197 		 * "Full" entries are allowed to have multiple lines
1198 		 * and those lines aren't required to be adjacent.  We
1199 		 * don't support multiple lines for "relative" entries
1200 		 * nor do we make any attempt to merge data from
1201 		 * separate "relative" and "full" entries.  (Merging
1202 		 * "relative" and "full" entries would require dealing
1203 		 * with pathname canonicalization, which is a very
1204 		 * tricky subject.)
1205 		 */
1206 		mp = (struct mtree_entry *)__archive_rb_tree_find_node(
1207 		    &mtree->rbtree, mentry->name);
1208 		for (; mp; mp = mp->next_dup) {
1209 			if (mp->full && !mp->used) {
1210 				/* Later lines override earlier ones. */
1211 				mp->used = 1;
1212 				r1 = parse_line(a, entry, mtree, mp, &parsed_kws);
1213 				if (r1 < r)
1214 					r = r1;
1215 			}
1216 		}
1217 	} else {
1218 		/*
1219 		 * Relative entries require us to construct
1220 		 * the full path and possibly update the
1221 		 * current directory.
1222 		 */
1223 		size_t n = archive_strlen(&mtree->current_dir);
1224 		if (n > 0)
1225 			archive_strcat(&mtree->current_dir, "/");
1226 		archive_strcat(&mtree->current_dir, mentry->name);
1227 		archive_entry_copy_pathname(entry, mtree->current_dir.s);
1228 		if (archive_entry_filetype(entry) != AE_IFDIR)
1229 			mtree->current_dir.length = n;
1230 	}
1231 
1232 	if (mtree->checkfs) {
1233 		/*
1234 		 * Try to open and stat the file to get the real size
1235 		 * and other file info.  It would be nice to avoid
1236 		 * this here so that getting a listing of an mtree
1237 		 * wouldn't require opening every referenced contents
1238 		 * file.  But then we wouldn't know the actual
1239 		 * contents size, so I don't see a really viable way
1240 		 * around this.  (Also, we may want to someday pull
1241 		 * other unspecified info from the contents file on
1242 		 * disk.)
1243 		 */
1244 		mtree->fd = -1;
1245 		if (archive_strlen(&mtree->contents_name) > 0)
1246 			path = mtree->contents_name.s;
1247 		else
1248 			path = archive_entry_pathname(entry);
1249 
1250 		if (archive_entry_filetype(entry) == AE_IFREG ||
1251 				archive_entry_filetype(entry) == AE_IFDIR) {
1252 			mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
1253 			__archive_ensure_cloexec_flag(mtree->fd);
1254 			if (mtree->fd == -1 && (
1255 #if defined(_WIN32) && !defined(__CYGWIN__)
1256         /*
1257          * On Windows, attempting to open a file with an
1258          * invalid name result in EINVAL (Error 22)
1259          */
1260 				(errno != ENOENT && errno != EINVAL)
1261 #else
1262 				errno != ENOENT
1263 #endif
1264         || archive_strlen(&mtree->contents_name) > 0)) {
1265 				archive_set_error(&a->archive, errno,
1266 						"Can't open %s", path);
1267 				r = ARCHIVE_WARN;
1268 			}
1269 		}
1270 
1271 		st = &st_storage;
1272 		if (mtree->fd >= 0) {
1273 			if (fstat(mtree->fd, st) == -1) {
1274 				archive_set_error(&a->archive, errno,
1275 						"Could not fstat %s", path);
1276 				r = ARCHIVE_WARN;
1277 				/* If we can't stat it, don't keep it open. */
1278 				close(mtree->fd);
1279 				mtree->fd = -1;
1280 				st = NULL;
1281 			}
1282 		}
1283 #ifdef HAVE_LSTAT
1284 		else if (lstat(path, st) == -1)
1285 #else
1286 		else if (la_stat(path, st) == -1)
1287 #endif
1288 		{
1289 			st = NULL;
1290 		}
1291 
1292 		/*
1293 		 * Check for a mismatch between the type in the specification
1294 		 * and the type of the contents object on disk.
1295 		 */
1296 		if (st != NULL) {
1297 			if (((st->st_mode & S_IFMT) == S_IFREG &&
1298 			      archive_entry_filetype(entry) == AE_IFREG)
1299 #ifdef S_IFLNK
1300 			  ||((st->st_mode & S_IFMT) == S_IFLNK &&
1301 			      archive_entry_filetype(entry) == AE_IFLNK)
1302 #endif
1303 #ifdef S_IFSOCK
1304 			  ||((st->st_mode & S_IFSOCK) == S_IFSOCK &&
1305 			      archive_entry_filetype(entry) == AE_IFSOCK)
1306 #endif
1307 #ifdef S_IFCHR
1308 			  ||((st->st_mode & S_IFMT) == S_IFCHR &&
1309 			      archive_entry_filetype(entry) == AE_IFCHR)
1310 #endif
1311 #ifdef S_IFBLK
1312 			  ||((st->st_mode & S_IFMT) == S_IFBLK &&
1313 			      archive_entry_filetype(entry) == AE_IFBLK)
1314 #endif
1315 			  ||((st->st_mode & S_IFMT) == S_IFDIR &&
1316 			      archive_entry_filetype(entry) == AE_IFDIR)
1317 #ifdef S_IFIFO
1318 			  ||((st->st_mode & S_IFMT) == S_IFIFO &&
1319 			      archive_entry_filetype(entry) == AE_IFIFO)
1320 #endif
1321 			) {
1322 				/* Types match. */
1323 			} else {
1324 				/* Types don't match; bail out gracefully. */
1325 				if (mtree->fd >= 0)
1326 					close(mtree->fd);
1327 				mtree->fd = -1;
1328 				if (parsed_kws & MTREE_HAS_OPTIONAL) {
1329 					/* It's not an error for an optional
1330 					 * entry to not match disk. */
1331 					*use_next = 1;
1332 				} else if (r == ARCHIVE_OK) {
1333 					archive_set_error(&a->archive,
1334 					    ARCHIVE_ERRNO_MISC,
1335 					    "mtree specification has different"
1336 					    " type for %s",
1337 					    archive_entry_pathname(entry));
1338 					r = ARCHIVE_WARN;
1339 				}
1340 				return (r);
1341 			}
1342 		}
1343 
1344 		/*
1345 		 * If there is a contents file on disk, pick some of the
1346 		 * metadata from that file.  For most of these, we only
1347 		 * set it from the contents if it wasn't already parsed
1348 		 * from the specification.
1349 		 */
1350 		if (st != NULL) {
1351 			if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
1352 				(parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
1353 				(archive_entry_filetype(entry) == AE_IFCHR ||
1354 				 archive_entry_filetype(entry) == AE_IFBLK))
1355 				archive_entry_set_rdev(entry, st->st_rdev);
1356 			if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME))
1357 				== 0 ||
1358 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1359 				archive_entry_set_gid(entry, st->st_gid);
1360 			if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME))
1361 				== 0 ||
1362 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1363 				archive_entry_set_uid(entry, st->st_uid);
1364 			if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
1365 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
1366 #if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC
1367 				archive_entry_set_mtime(entry, st->st_mtime,
1368 						st->st_mtimespec.tv_nsec);
1369 #elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
1370 				archive_entry_set_mtime(entry, st->st_mtime,
1371 						st->st_mtim.tv_nsec);
1372 #elif HAVE_STRUCT_STAT_ST_MTIME_N
1373 				archive_entry_set_mtime(entry, st->st_mtime,
1374 						st->st_mtime_n);
1375 #elif HAVE_STRUCT_STAT_ST_UMTIME
1376 				archive_entry_set_mtime(entry, st->st_mtime,
1377 						st->st_umtime*1000);
1378 #elif HAVE_STRUCT_STAT_ST_MTIME_USEC
1379 				archive_entry_set_mtime(entry, st->st_mtime,
1380 						st->st_mtime_usec*1000);
1381 #else
1382 				archive_entry_set_mtime(entry, st->st_mtime, 0);
1383 #endif
1384 			}
1385 			if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
1386 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1387 				archive_entry_set_nlink(entry, st->st_nlink);
1388 			if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
1389 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1390 				archive_entry_set_perm(entry, st->st_mode);
1391 			if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
1392 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1393 				archive_entry_set_size(entry, st->st_size);
1394 			archive_entry_set_ino(entry, st->st_ino);
1395 			archive_entry_set_dev(entry, st->st_dev);
1396 
1397 			archive_entry_linkify(mtree->resolver, &entry,
1398 				&sparse_entry);
1399 		} else if (parsed_kws & MTREE_HAS_OPTIONAL) {
1400 			/*
1401 			 * Couldn't open the entry, stat it or the on-disk type
1402 			 * didn't match.  If this entry is optional, just
1403 			 * ignore it and read the next header entry.
1404 			 */
1405 			*use_next = 1;
1406 			return ARCHIVE_OK;
1407 		}
1408 	}
1409 
1410 	mtree->cur_size = archive_entry_size(entry);
1411 	mtree->offset = 0;
1412 
1413 	return r;
1414 }
1415 
1416 /*
1417  * Each line contains a sequence of keywords.
1418  */
1419 static int
parse_line(struct archive_read * a,struct archive_entry * entry,struct mtree * mtree,struct mtree_entry * mp,int * parsed_kws)1420 parse_line(struct archive_read *a, struct archive_entry *entry,
1421     struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws)
1422 {
1423 	struct mtree_option *iter;
1424 	int r = ARCHIVE_OK, r1;
1425 
1426 	for (iter = mp->options; iter != NULL; iter = iter->next) {
1427 		r1 = parse_keyword(a, mtree, entry, iter, parsed_kws);
1428 		if (r1 < r)
1429 			r = r1;
1430 	}
1431 	if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) {
1432 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1433 		    "Missing type keyword in mtree specification");
1434 		return (ARCHIVE_WARN);
1435 	}
1436 	return (r);
1437 }
1438 
1439 /*
1440  * Device entries have one of the following forms:
1441  *  - raw dev_t
1442  *  - format,major,minor[,subdevice]
1443  * When parsing succeeded, `pdev' will contain the appropriate dev_t value.
1444  */
1445 
1446 /* strsep() is not in C90, but strcspn() is. */
1447 /* Taken from http://unixpapa.com/incnote/string.html */
1448 static char *
la_strsep(char ** sp,const char * sep)1449 la_strsep(char **sp, const char *sep)
1450 {
1451 	char *p, *s;
1452 	if (sp == NULL || *sp == NULL || **sp == '\0')
1453 		return(NULL);
1454 	s = *sp;
1455 	p = s + strcspn(s, sep);
1456 	if (*p != '\0')
1457 		*p++ = '\0';
1458 	*sp = p;
1459 	return(s);
1460 }
1461 
1462 static int
parse_device(dev_t * pdev,struct archive * a,char * val)1463 parse_device(dev_t *pdev, struct archive *a, char *val)
1464 {
1465 #define MAX_PACK_ARGS 3
1466 	unsigned long numbers[MAX_PACK_ARGS];
1467 	char *p, *dev;
1468 	int argc;
1469 	pack_t *pack;
1470 	dev_t result;
1471 	const char *error = NULL;
1472 
1473 	memset(pdev, 0, sizeof(*pdev));
1474 	if ((dev = strchr(val, ',')) != NULL) {
1475 		/*
1476 		 * Device's major/minor are given in a specified format.
1477 		 * Decode and pack it accordingly.
1478 		 */
1479 		*dev++ = '\0';
1480 		if ((pack = pack_find(val)) == NULL) {
1481 			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1482 			    "Unknown format `%s'", val);
1483 			return ARCHIVE_WARN;
1484 		}
1485 		argc = 0;
1486 		while ((p = la_strsep(&dev, ",")) != NULL) {
1487 			if (*p == '\0') {
1488 				archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1489 				    "Missing number");
1490 				return ARCHIVE_WARN;
1491 			}
1492 			if (argc >= MAX_PACK_ARGS) {
1493 				archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1494 				    "Too many arguments");
1495 				return ARCHIVE_WARN;
1496 			}
1497 			numbers[argc++] = (unsigned long)mtree_atol(&p, 0);
1498 		}
1499 		if (argc < 2) {
1500 			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1501 			    "Not enough arguments");
1502 			return ARCHIVE_WARN;
1503 		}
1504 		result = (*pack)(argc, numbers, &error);
1505 		if (error != NULL) {
1506 			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1507 			    "%s", error);
1508 			return ARCHIVE_WARN;
1509 		}
1510 	} else {
1511 		/* file system raw value. */
1512 		result = (dev_t)mtree_atol(&val, 0);
1513 	}
1514 	*pdev = result;
1515 	return ARCHIVE_OK;
1516 #undef MAX_PACK_ARGS
1517 }
1518 
1519 static int
parse_hex_nibble(char c)1520 parse_hex_nibble(char c)
1521 {
1522 	if (c >= '0' && c <= '9')
1523 		return c - '0';
1524 	if (c >= 'a' && c <= 'f')
1525 		return 10 + c - 'a';
1526 #if 0
1527 	/* XXX: Is uppercase something we should support? */
1528 	if (c >= 'A' && c <= 'F')
1529 		return 10 + c - 'A';
1530 #endif
1531 
1532 	return -1;
1533 }
1534 
1535 static int
parse_digest(struct archive_read * a,struct archive_entry * entry,const char * digest,int type)1536 parse_digest(struct archive_read *a, struct archive_entry *entry,
1537     const char *digest, int type)
1538 {
1539 	unsigned char digest_buf[64];
1540 	int high, low;
1541 	size_t i, j, len;
1542 
1543 	switch (type) {
1544 	case ARCHIVE_ENTRY_DIGEST_MD5:
1545 		len = sizeof(entry->digest.md5);
1546 		break;
1547 	case ARCHIVE_ENTRY_DIGEST_RMD160:
1548 		len = sizeof(entry->digest.rmd160);
1549 		break;
1550 	case ARCHIVE_ENTRY_DIGEST_SHA1:
1551 		len = sizeof(entry->digest.sha1);
1552 		break;
1553 	case ARCHIVE_ENTRY_DIGEST_SHA256:
1554 		len = sizeof(entry->digest.sha256);
1555 		break;
1556 	case ARCHIVE_ENTRY_DIGEST_SHA384:
1557 		len = sizeof(entry->digest.sha384);
1558 		break;
1559 	case ARCHIVE_ENTRY_DIGEST_SHA512:
1560 		len = sizeof(entry->digest.sha512);
1561 		break;
1562 	default:
1563 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1564 			"Internal error: Unknown digest type");
1565 		return ARCHIVE_FATAL;
1566 	}
1567 
1568 	if (len > sizeof(digest_buf)) {
1569 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1570 			"Internal error: Digest storage too large");
1571 		return ARCHIVE_FATAL;
1572 	}
1573 
1574 	len *= 2;
1575 
1576 	if (mtree_strnlen(digest, len+1) != len) {
1577 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1578 				  "incorrect digest length, ignoring");
1579 		return ARCHIVE_WARN;
1580 	}
1581 
1582 	for (i = 0, j = 0; i < len; i += 2, j++) {
1583 		high = parse_hex_nibble(digest[i]);
1584 		low = parse_hex_nibble(digest[i+1]);
1585 		if (high == -1 || low == -1) {
1586 			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1587 					  "invalid digest data, ignoring");
1588 			return ARCHIVE_WARN;
1589 		}
1590 
1591 		digest_buf[j] = high << 4 | low;
1592 	}
1593 
1594 	return archive_entry_set_digest(entry, type, digest_buf);
1595 }
1596 
1597 /*
1598  * Parse a single keyword and its value.
1599  */
1600 static int
parse_keyword(struct archive_read * a,struct mtree * mtree,struct archive_entry * entry,struct mtree_option * opt,int * parsed_kws)1601 parse_keyword(struct archive_read *a, struct mtree *mtree,
1602     struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws)
1603 {
1604 	char *val, *key;
1605 
1606 	key = opt->value;
1607 
1608 	if (*key == '\0')
1609 		return (ARCHIVE_OK);
1610 
1611 	if (strcmp(key, "nochange") == 0) {
1612 		*parsed_kws |= MTREE_HAS_NOCHANGE;
1613 		return (ARCHIVE_OK);
1614 	}
1615 	if (strcmp(key, "optional") == 0) {
1616 		*parsed_kws |= MTREE_HAS_OPTIONAL;
1617 		return (ARCHIVE_OK);
1618 	}
1619 	if (strcmp(key, "ignore") == 0) {
1620 		/*
1621 		 * The mtree processing is not recursive, so
1622 		 * recursion will only happen for explicitly listed
1623 		 * entries.
1624 		 */
1625 		return (ARCHIVE_OK);
1626 	}
1627 
1628 	val = strchr(key, '=');
1629 	if (val == NULL) {
1630 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1631 		    "Malformed attribute \"%s\" (%d)", key, key[0]);
1632 		return (ARCHIVE_WARN);
1633 	}
1634 
1635 	*val = '\0';
1636 	++val;
1637 
1638 	switch (key[0]) {
1639 	case 'c':
1640 		if (strcmp(key, "content") == 0
1641 		    || strcmp(key, "contents") == 0) {
1642 			parse_escapes(val, NULL);
1643 			archive_strcpy(&mtree->contents_name, val);
1644 			return (ARCHIVE_OK);
1645 		}
1646 		if (strcmp(key, "cksum") == 0)
1647 			return (ARCHIVE_OK);
1648 		break;
1649 	case 'd':
1650 		if (strcmp(key, "device") == 0) {
1651 			/* stat(2) st_rdev field, e.g. the major/minor IDs
1652 			 * of a char/block special file */
1653 			int r;
1654 			dev_t dev;
1655 
1656 			*parsed_kws |= MTREE_HAS_DEVICE;
1657 			r = parse_device(&dev, &a->archive, val);
1658 			if (r == ARCHIVE_OK)
1659 				archive_entry_set_rdev(entry, dev);
1660 			return r;
1661 		}
1662 		break;
1663 	case 'f':
1664 		if (strcmp(key, "flags") == 0) {
1665 			*parsed_kws |= MTREE_HAS_FFLAGS;
1666 			archive_entry_copy_fflags_text(entry, val);
1667 			return (ARCHIVE_OK);
1668 		}
1669 		break;
1670 	case 'g':
1671 		if (strcmp(key, "gid") == 0) {
1672 			*parsed_kws |= MTREE_HAS_GID;
1673 			archive_entry_set_gid(entry, mtree_atol(&val, 10));
1674 			return (ARCHIVE_OK);
1675 		}
1676 		if (strcmp(key, "gname") == 0) {
1677 			*parsed_kws |= MTREE_HAS_GNAME;
1678 			archive_entry_copy_gname(entry, val);
1679 			return (ARCHIVE_OK);
1680 		}
1681 		break;
1682 	case 'i':
1683 		if (strcmp(key, "inode") == 0) {
1684 			archive_entry_set_ino(entry, mtree_atol(&val, 10));
1685 			return (ARCHIVE_OK);
1686 		}
1687 		break;
1688 	case 'l':
1689 		if (strcmp(key, "link") == 0) {
1690 			parse_escapes(val, NULL);
1691 			archive_entry_copy_symlink(entry, val);
1692 			return (ARCHIVE_OK);
1693 		}
1694 		break;
1695 	case 'm':
1696 		if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0) {
1697 			return parse_digest(a, entry, val,
1698 			    ARCHIVE_ENTRY_DIGEST_MD5);
1699 		}
1700 		if (strcmp(key, "mode") == 0) {
1701 			if (val[0] < '0' || val[0] > '7') {
1702 				archive_set_error(&a->archive,
1703 				    ARCHIVE_ERRNO_FILE_FORMAT,
1704 				    "Symbolic or non-octal mode \"%s\" unsupported", val);
1705 				return (ARCHIVE_WARN);
1706 			}
1707 			*parsed_kws |= MTREE_HAS_PERM;
1708 			archive_entry_set_perm(entry, (mode_t)mtree_atol(&val, 8));
1709 			return (ARCHIVE_OK);
1710 		}
1711 		break;
1712 	case 'n':
1713 		if (strcmp(key, "nlink") == 0) {
1714 			*parsed_kws |= MTREE_HAS_NLINK;
1715 			archive_entry_set_nlink(entry,
1716 				(unsigned int)mtree_atol(&val, 10));
1717 			return (ARCHIVE_OK);
1718 		}
1719 		break;
1720 	case 'r':
1721 		if (strcmp(key, "resdevice") == 0) {
1722 			/* stat(2) st_dev field, e.g. the device ID where the
1723 			 * inode resides */
1724 			int r;
1725 			dev_t dev;
1726 
1727 			r = parse_device(&dev, &a->archive, val);
1728 			if (r == ARCHIVE_OK)
1729 				archive_entry_set_dev(entry, dev);
1730 			return r;
1731 		}
1732 		if (strcmp(key, "rmd160") == 0 ||
1733 		    strcmp(key, "rmd160digest") == 0) {
1734 			return parse_digest(a, entry, val,
1735 			    ARCHIVE_ENTRY_DIGEST_RMD160);
1736 		}
1737 		break;
1738 	case 's':
1739 		if (strcmp(key, "sha1") == 0 ||
1740 		    strcmp(key, "sha1digest") == 0) {
1741 			return parse_digest(a, entry, val,
1742 			    ARCHIVE_ENTRY_DIGEST_SHA1);
1743 		}
1744 		if (strcmp(key, "sha256") == 0 ||
1745 		    strcmp(key, "sha256digest") == 0) {
1746 			return parse_digest(a, entry, val,
1747 			    ARCHIVE_ENTRY_DIGEST_SHA256);
1748 		}
1749 		if (strcmp(key, "sha384") == 0 ||
1750 		    strcmp(key, "sha384digest") == 0) {
1751 			return parse_digest(a, entry, val,
1752 			    ARCHIVE_ENTRY_DIGEST_SHA384);
1753 		}
1754 		if (strcmp(key, "sha512") == 0 ||
1755 		    strcmp(key, "sha512digest") == 0) {
1756 			return parse_digest(a, entry, val,
1757 			    ARCHIVE_ENTRY_DIGEST_SHA512);
1758 		}
1759 		if (strcmp(key, "size") == 0) {
1760 			archive_entry_set_size(entry, mtree_atol(&val, 10));
1761 			return (ARCHIVE_OK);
1762 		}
1763 		break;
1764 	case 't':
1765 		if (strcmp(key, "tags") == 0) {
1766 			/*
1767 			 * Comma delimited list of tags.
1768 			 * Ignore the tags for now, but the interface
1769 			 * should be extended to allow inclusion/exclusion.
1770 			 */
1771 			return (ARCHIVE_OK);
1772 		}
1773 		if (strcmp(key, "time") == 0) {
1774 			int64_t m;
1775 			int64_t my_time_t_max = get_time_t_max();
1776 			int64_t my_time_t_min = get_time_t_min();
1777 			long ns = 0;
1778 
1779 			*parsed_kws |= MTREE_HAS_MTIME;
1780 			m = mtree_atol(&val, 10);
1781 			/* Replicate an old mtree bug:
1782 			 * 123456789.1 represents 123456789
1783 			 * seconds and 1 nanosecond. */
1784 			if (*val == '.') {
1785 				++val;
1786 				ns = (long)mtree_atol(&val, 10);
1787 				if (ns < 0)
1788 					ns = 0;
1789 				else if (ns > 999999999)
1790 					ns = 999999999;
1791 			}
1792 			if (m > my_time_t_max)
1793 				m = my_time_t_max;
1794 			else if (m < my_time_t_min)
1795 				m = my_time_t_min;
1796 			archive_entry_set_mtime(entry, (time_t)m, ns);
1797 			return (ARCHIVE_OK);
1798 		}
1799 		if (strcmp(key, "type") == 0) {
1800 			switch (val[0]) {
1801 			case 'b':
1802 				if (strcmp(val, "block") == 0) {
1803 					*parsed_kws |= MTREE_HAS_TYPE;
1804 					archive_entry_set_filetype(entry,
1805 						AE_IFBLK);
1806 					return (ARCHIVE_OK);
1807 				}
1808 				break;
1809 			case 'c':
1810 				if (strcmp(val, "char") == 0) {
1811 					*parsed_kws |= MTREE_HAS_TYPE;
1812 					archive_entry_set_filetype(entry,
1813 						AE_IFCHR);
1814 					return (ARCHIVE_OK);
1815 				}
1816 				break;
1817 			case 'd':
1818 				if (strcmp(val, "dir") == 0) {
1819 					*parsed_kws |= MTREE_HAS_TYPE;
1820 					archive_entry_set_filetype(entry,
1821 						AE_IFDIR);
1822 					return (ARCHIVE_OK);
1823 				}
1824 				break;
1825 			case 'f':
1826 				if (strcmp(val, "fifo") == 0) {
1827 					*parsed_kws |= MTREE_HAS_TYPE;
1828 					archive_entry_set_filetype(entry,
1829 						AE_IFIFO);
1830 					return (ARCHIVE_OK);
1831 				}
1832 				if (strcmp(val, "file") == 0) {
1833 					*parsed_kws |= MTREE_HAS_TYPE;
1834 					archive_entry_set_filetype(entry,
1835 						AE_IFREG);
1836 					return (ARCHIVE_OK);
1837 				}
1838 				break;
1839 			case 'l':
1840 				if (strcmp(val, "link") == 0) {
1841 					*parsed_kws |= MTREE_HAS_TYPE;
1842 					archive_entry_set_filetype(entry,
1843 						AE_IFLNK);
1844 					return (ARCHIVE_OK);
1845 				}
1846 				break;
1847 			default:
1848 				break;
1849 			}
1850 			archive_set_error(&a->archive,
1851 			    ARCHIVE_ERRNO_FILE_FORMAT,
1852 			    "Unrecognized file type \"%s\"; "
1853 			    "assuming \"file\"", val);
1854 			archive_entry_set_filetype(entry, AE_IFREG);
1855 			return (ARCHIVE_WARN);
1856 		}
1857 		break;
1858 	case 'u':
1859 		if (strcmp(key, "uid") == 0) {
1860 			*parsed_kws |= MTREE_HAS_UID;
1861 			archive_entry_set_uid(entry, mtree_atol(&val, 10));
1862 			return (ARCHIVE_OK);
1863 		}
1864 		if (strcmp(key, "uname") == 0) {
1865 			*parsed_kws |= MTREE_HAS_UNAME;
1866 			archive_entry_copy_uname(entry, val);
1867 			return (ARCHIVE_OK);
1868 		}
1869 		break;
1870 	default:
1871 		break;
1872 	}
1873 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1874 	    "Unrecognized key %s=%s", key, val);
1875 	return (ARCHIVE_WARN);
1876 }
1877 
1878 static int
read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1879 read_data(struct archive_read *a, const void **buff, size_t *size,
1880     int64_t *offset)
1881 {
1882 	size_t bytes_to_read;
1883 	ssize_t bytes_read;
1884 	struct mtree *mtree;
1885 
1886 	mtree = (struct mtree *)(a->format->data);
1887 	if (mtree->fd < 0) {
1888 		*buff = NULL;
1889 		*offset = 0;
1890 		*size = 0;
1891 		return (ARCHIVE_EOF);
1892 	}
1893 	if (mtree->buff == NULL) {
1894 		mtree->buffsize = 64 * 1024;
1895 		mtree->buff = malloc(mtree->buffsize);
1896 		if (mtree->buff == NULL) {
1897 			archive_set_error(&a->archive, ENOMEM,
1898 			    "Can't allocate memory");
1899 			return (ARCHIVE_FATAL);
1900 		}
1901 	}
1902 
1903 	*buff = mtree->buff;
1904 	*offset = mtree->offset;
1905 	if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset)
1906 		bytes_to_read = (size_t)(mtree->cur_size - mtree->offset);
1907 	else
1908 		bytes_to_read = mtree->buffsize;
1909 	bytes_read = read(mtree->fd, mtree->buff, bytes_to_read);
1910 	if (bytes_read < 0) {
1911 		archive_set_error(&a->archive, errno, "Can't read");
1912 		return (ARCHIVE_WARN);
1913 	}
1914 	if (bytes_read == 0) {
1915 		*size = 0;
1916 		return (ARCHIVE_EOF);
1917 	}
1918 	mtree->offset += bytes_read;
1919 	*size = bytes_read;
1920 	return (ARCHIVE_OK);
1921 }
1922 
1923 /* Skip does nothing except possibly close the contents file. */
1924 static int
skip(struct archive_read * a)1925 skip(struct archive_read *a)
1926 {
1927 	struct mtree *mtree;
1928 
1929 	mtree = (struct mtree *)(a->format->data);
1930 	if (mtree->fd >= 0) {
1931 		close(mtree->fd);
1932 		mtree->fd = -1;
1933 	}
1934 	return (ARCHIVE_OK);
1935 }
1936 
1937 /*
1938  * Since parsing backslash sequences always makes strings shorter,
1939  * we can always do this conversion in-place.
1940  */
1941 static void
parse_escapes(char * src,struct mtree_entry * mentry)1942 parse_escapes(char *src, struct mtree_entry *mentry)
1943 {
1944 	char *dest = src;
1945 	char c;
1946 
1947 	if (mentry != NULL && strcmp(src, ".") == 0)
1948 		mentry->full = 1;
1949 
1950 	while (*src != '\0') {
1951 		c = *src++;
1952 		if (c == '/' && mentry != NULL)
1953 			mentry->full = 1;
1954 		if (c == '\\') {
1955 			switch (src[0]) {
1956 			case '0':
1957 				if (src[1] < '0' || src[1] > '7') {
1958 					c = 0;
1959 					++src;
1960 					break;
1961 				}
1962 				/* FALLTHROUGH */
1963 			case '1':
1964 			case '2':
1965 			case '3':
1966 				if (src[1] >= '0' && src[1] <= '7' &&
1967 				    src[2] >= '0' && src[2] <= '7') {
1968 					c = (src[0] - '0') << 6;
1969 					c |= (src[1] - '0') << 3;
1970 					c |= (src[2] - '0');
1971 					src += 3;
1972 				}
1973 				break;
1974 			case 'a':
1975 				c = '\a';
1976 				++src;
1977 				break;
1978 			case 'b':
1979 				c = '\b';
1980 				++src;
1981 				break;
1982 			case 'f':
1983 				c = '\f';
1984 				++src;
1985 				break;
1986 			case 'n':
1987 				c = '\n';
1988 				++src;
1989 				break;
1990 			case 'r':
1991 				c = '\r';
1992 				++src;
1993 				break;
1994 			case 's':
1995 				c = ' ';
1996 				++src;
1997 				break;
1998 			case 't':
1999 				c = '\t';
2000 				++src;
2001 				break;
2002 			case 'v':
2003 				c = '\v';
2004 				++src;
2005 				break;
2006 			case '\\':
2007 				c = '\\';
2008 				++src;
2009 				break;
2010 			}
2011 		}
2012 		*dest++ = c;
2013 	}
2014 	*dest = '\0';
2015 }
2016 
2017 /* Parse a hex digit. */
2018 static int
parsedigit(char c)2019 parsedigit(char c)
2020 {
2021 	if (c >= '0' && c <= '9')
2022 		return c - '0';
2023 	else if (c >= 'a' && c <= 'f')
2024 		return c - 'a';
2025 	else if (c >= 'A' && c <= 'F')
2026 		return c - 'A';
2027 	else
2028 		return -1;
2029 }
2030 
2031 /*
2032  * Note that this implementation does not (and should not!) obey
2033  * locale settings; you cannot simply substitute strtol here, since
2034  * it does obey locale.
2035  */
2036 static int64_t
mtree_atol(char ** p,int base)2037 mtree_atol(char **p, int base)
2038 {
2039 	int64_t l, limit;
2040 	int digit, last_digit_limit;
2041 
2042 	if (base == 0) {
2043 		if (**p != '0')
2044 			base = 10;
2045 		else if ((*p)[1] == 'x' || (*p)[1] == 'X') {
2046 			*p += 2;
2047 			base = 16;
2048 		} else {
2049 			base = 8;
2050 		}
2051 	}
2052 
2053 	if (**p == '-') {
2054 		limit = INT64_MIN / base;
2055 		last_digit_limit = -(INT64_MIN % base);
2056 		++(*p);
2057 
2058 		l = 0;
2059 		digit = parsedigit(**p);
2060 		while (digit >= 0 && digit < base) {
2061 			if (l < limit || (l == limit && digit >= last_digit_limit))
2062 				return INT64_MIN;
2063 			l = (l * base) - digit;
2064 			digit = parsedigit(*++(*p));
2065 		}
2066 		return l;
2067 	} else {
2068 		limit = INT64_MAX / base;
2069 		last_digit_limit = INT64_MAX % base;
2070 
2071 		l = 0;
2072 		digit = parsedigit(**p);
2073 		while (digit >= 0 && digit < base) {
2074 			if (l > limit || (l == limit && digit > last_digit_limit))
2075 				return INT64_MAX;
2076 			l = (l * base) + digit;
2077 			digit = parsedigit(*++(*p));
2078 		}
2079 		return l;
2080 	}
2081 }
2082 
2083 /*
2084  * Returns length of line (including trailing newline)
2085  * or negative on error.  'start' argument is updated to
2086  * point to first character of line.
2087  */
2088 static ssize_t
readline(struct archive_read * a,struct mtree * mtree,char ** start,ssize_t limit)2089 readline(struct archive_read *a, struct mtree *mtree, char **start,
2090     ssize_t limit)
2091 {
2092 	ssize_t bytes_read;
2093 	ssize_t total_size = 0;
2094 	ssize_t find_off = 0;
2095 	const void *t;
2096 	void *nl;
2097 	char *u;
2098 
2099 	/* Accumulate line in a line buffer. */
2100 	for (;;) {
2101 		/* Read some more. */
2102 		t = __archive_read_ahead(a, 1, &bytes_read);
2103 		if (t == NULL)
2104 			return (0);
2105 		if (bytes_read < 0)
2106 			return (ARCHIVE_FATAL);
2107 		nl = memchr(t, '\n', bytes_read);
2108 		/* If we found '\n', trim the read to end exactly there. */
2109 		if (nl != NULL) {
2110 			bytes_read = ((const char *)nl) - ((const char *)t) + 1;
2111 		}
2112 		if (total_size + bytes_read + 1 > limit) {
2113 			archive_set_error(&a->archive,
2114 			    ARCHIVE_ERRNO_FILE_FORMAT,
2115 			    "Line too long");
2116 			return (ARCHIVE_FATAL);
2117 		}
2118 		if (archive_string_ensure(&mtree->line,
2119 			total_size + bytes_read + 1) == NULL) {
2120 			archive_set_error(&a->archive, ENOMEM,
2121 			    "Can't allocate working buffer");
2122 			return (ARCHIVE_FATAL);
2123 		}
2124 		/* Append new bytes to string. */
2125 		memcpy(mtree->line.s + total_size, t, bytes_read);
2126 		__archive_read_consume(a, bytes_read);
2127 		total_size += bytes_read;
2128 		mtree->line.s[total_size] = '\0';
2129 
2130 		for (u = mtree->line.s + find_off; *u; ++u) {
2131 			if (u[0] == '\n') {
2132 				/* Ends with unescaped newline. */
2133 				*start = mtree->line.s;
2134 				return total_size;
2135 			} else if (u[0] == '#') {
2136 				/* Ends with comment sequence #...\n */
2137 				if (nl == NULL) {
2138 					/* But we've not found the \n yet */
2139 					break;
2140 				}
2141 			} else if (u[0] == '\\') {
2142 				if (u[1] == '\n') {
2143 					/* Trim escaped newline. */
2144 					total_size -= 2;
2145 					mtree->line.s[total_size] = '\0';
2146 					break;
2147 				} else if (u[1] != '\0') {
2148 					/* Skip the two-char escape sequence */
2149 					++u;
2150 				}
2151 			}
2152 		}
2153 		find_off = u - mtree->line.s;
2154 	}
2155 }
2156