xref: /freebsd/sys/fs/tarfs/tarfs_vfsops.c (revision 9768746b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Juniper Networks, Inc.
5  * Copyright (c) 2022-2023 Klara, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_tarfs.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/buf.h>
34 #include <sys/conf.h>
35 #include <sys/fcntl.h>
36 #include <sys/libkern.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mount.h>
41 #include <sys/mutex.h>
42 #include <sys/namei.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/queue.h>
46 #include <sys/sbuf.h>
47 #include <sys/stat.h>
48 #include <sys/uio.h>
49 #include <sys/vnode.h>
50 
51 #include <vm/vm_param.h>
52 
53 #include <geom/geom.h>
54 #include <geom/geom_vfs.h>
55 
56 #include <fs/tarfs/tarfs.h>
57 #include <fs/tarfs/tarfs_dbg.h>
58 
59 CTASSERT(ZERO_REGION_SIZE > TARFS_BLOCKSIZE);
60 
61 struct ustar_header {
62 	char	name[100];		/* File name */
63 	char	mode[8];		/* Mode flags */
64 	char	uid[8];			/* User id */
65 	char	gid[8];			/* Group id */
66 	char	size[12];		/* Size */
67 	char	mtime[12];		/* Modified time */
68 	char	checksum[8];		/* Checksum */
69 	char	typeflag[1];		/* Type */
70 	char	linkname[100];		/* "old format" stops here */
71 	char	magic[6];		/* POSIX UStar "ustar\0" indicator */
72 	char	version[2];		/* POSIX UStar version "00" */
73 	char	uname[32];		/* User name */
74 	char	gname[32];		/* Group name */
75 	char	major[8];		/* Device major number */
76 	char	minor[8];		/* Device minor number */
77 	char	prefix[155];		/* Path prefix */
78 };
79 
80 #define	TAR_EOF			((off_t)-1)
81 
82 #define	TAR_TYPE_FILE		'0'
83 #define	TAR_TYPE_HARDLINK	'1'
84 #define	TAR_TYPE_SYMLINK	'2'
85 #define	TAR_TYPE_CHAR		'3'
86 #define	TAR_TYPE_BLOCK		'4'
87 #define	TAR_TYPE_DIRECTORY	'5'
88 #define	TAR_TYPE_FIFO		'6'
89 #define	TAR_TYPE_CONTIG		'7'
90 #define	TAR_TYPE_GLOBAL_EXTHDR	'g'
91 #define	TAR_TYPE_EXTHDR		'x'
92 #define	TAR_TYPE_GNU_SPARSE	'S'
93 
94 #define	USTAR_MAGIC		(uint8_t []){ 'u', 's', 't', 'a', 'r', 0 }
95 #define	USTAR_VERSION		(uint8_t []){ '0', '0' }
96 #define	GNUTAR_MAGIC		(uint8_t []){ 'u', 's', 't', 'a', 'r', ' ' }
97 #define	GNUTAR_VERSION		(uint8_t []){ ' ', '\x0' }
98 
99 #define	DEFDIRMODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
100 
101 MALLOC_DEFINE(M_TARFSMNT, "tarfs mount", "tarfs mount structures");
102 MALLOC_DEFINE(M_TARFSNODE, "tarfs node", "tarfs node structures");
103 
104 static vfs_mount_t	tarfs_mount;
105 static vfs_unmount_t	tarfs_unmount;
106 static vfs_root_t	tarfs_root;
107 static vfs_statfs_t	tarfs_statfs;
108 static vfs_fhtovp_t	tarfs_fhtovp;
109 
110 static const char *tarfs_opts[] = {
111 	"from", "gid", "mode", "uid", "verify",
112 	NULL
113 };
114 
115 /*
116  * Reads a len-width signed octal number from strp.  Returns the value.
117  * XXX Does not report errors.
118  */
119 static int64_t
120 tarfs_str2octal(const char *strp, size_t len)
121 {
122 	int64_t val;
123 	size_t idx;
124 	int sign;
125 
126 	/*
127 	 * Skip leading spaces or tabs.
128 	 * XXX why?  POSIX requires numeric fields to be 0-padded.
129 	 */
130 	for (idx = 0; idx < len; idx++)
131 		if (strp[idx] != ' ' && strp[idx] != '\t')
132 			break;
133 
134 	if (idx == len)
135 		return (0);
136 
137 	if (strp[idx] == '-') {
138 		sign = -1;
139 		idx++;
140 	} else
141 		sign = 1;
142 
143 	val = 0;
144 	for (; idx < len; idx++) {
145 		if (strp[idx] < '0' || strp[idx] > '7')
146 			break;
147 		val <<= 3;
148 		val += (strp[idx] - '0');
149 
150 		/* Truncate on overflow */
151 		if (val > INT64_MAX / 8) {
152 			val = INT64_MAX;
153 			break;
154 		}
155 	}
156 
157 	return (sign > 0) ? val : -val;
158 }
159 
160 /*
161  * Reads a len-byte extended numeric value from strp.  The first byte has
162  * bit 7 set to indicate the format; the remaining 7 bits + the (len - 1)
163  * bytes that follow form a big-endian signed two's complement binary
164  * number.  Returns the value.  XXX Does not report errors.
165  */
166 static int64_t
167 tarfs_str2base256(const char *strp, size_t len)
168 {
169 	int64_t val;
170 	size_t idx;
171 
172 	KASSERT(strp[0] & 0x80, ("not an extended numeric value"));
173 
174 	/* Sign-extend the first byte */
175 	if ((strp[0] & 0x40) != 0)
176 		val = (int64_t)-1;
177 	else
178 		val = 0;
179 	val <<= 6;
180 	val |= (strp[0] & 0x3f);
181 
182 	/* Read subsequent bytes */
183 	for (idx = 1; idx < len; idx++) {
184 		val <<= 8;
185 		val |= (0xff & (int64_t)strp[idx]);
186 
187 		/* Truncate on overflow and underflow */
188 		if (val > INT64_MAX / 256) {
189 			val = INT64_MAX;
190 			break;
191 		} else if (val < INT64_MAX / 256) {
192 			val = INT64_MIN;
193 			break;
194 		}
195 	}
196 
197 	return (val);
198 }
199 
200 /*
201  * Read a len-byte numeric field from strp.  If bit 7 of the first byte it
202  * set, assume an extended numeric value (signed two's complement);
203  * otherwise, assume a signed octal value.
204  *
205  * XXX practically no error checking or handling
206  */
207 static int64_t
208 tarfs_str2int64(const char *strp, size_t len)
209 {
210 
211 	if (len < 1)
212 		return (0);
213 
214 	if ((strp[0] & 0x80) != 0)
215 		return (tarfs_str2base256(strp, len));
216 	return (tarfs_str2octal(strp, len));
217 }
218 
219 /*
220  * Verifies the checksum of a header.  Returns true if the checksum is
221  * valid, false otherwise.
222  */
223 static boolean_t
224 tarfs_checksum(struct ustar_header *hdrp)
225 {
226 	const unsigned char *ptr;
227 	int64_t checksum, hdrsum;
228 	size_t idx;
229 
230 	hdrsum = tarfs_str2int64(hdrp->checksum, sizeof(hdrp->checksum));
231 	TARFS_DPF(CHECKSUM, "%s: header checksum %lx\n", __func__, hdrsum);
232 
233 	checksum = 0;
234 	for (ptr = (const unsigned char *)hdrp;
235 	     ptr < (const unsigned char *)hdrp->checksum; ptr++)
236 		checksum += *ptr;
237 	for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
238 		checksum += 0x20;
239 	for (ptr = (const unsigned char *)hdrp->typeflag;
240 	     ptr < (const unsigned char *)(hdrp + 1); ptr++)
241 		checksum += *ptr;
242 	TARFS_DPF(CHECKSUM, "%s: calc unsigned checksum %lx\n", __func__,
243 	    checksum);
244 	if (hdrsum == checksum)
245 		return (true);
246 
247 	/*
248 	 * Repeat test with signed bytes, some older formats use a broken
249 	 * form of the calculation
250 	 */
251 	checksum = 0;
252 	for (ptr = (const unsigned char *)hdrp;
253 	     ptr < (const unsigned char *)&hdrp->checksum; ptr++)
254 		checksum += *((const signed char *)ptr);
255 	for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
256 		checksum += 0x20;
257 	for (ptr = (const unsigned char *)&hdrp->typeflag;
258 	     ptr < (const unsigned char *)(hdrp + 1); ptr++)
259 		checksum += *((const signed char *)ptr);
260 	TARFS_DPF(CHECKSUM, "%s: calc signed checksum %lx\n", __func__,
261 	    checksum);
262 	if (hdrsum == checksum)
263 		return (true);
264 
265 	return (false);
266 }
267 
268 
269 /*
270  * Looks up a path in the tarfs node tree.
271  *
272  * - If the path exists, stores a pointer to the corresponding tarfs_node
273  *   in retnode and a pointer to its parent in retparent.
274  *
275  * - If the path does not exist, but create_dirs is true, creates ancestor
276  *   directories and returns NULL in retnode and the parent in retparent.
277  *
278  * - If the path does not exist and create_dirs is false, stops at the
279  *   first missing path name component.
280  *
281  * - In all cases, on return, endp and sepp point to the beginning and
282  *   end, respectively, of the last-processed path name component.
283  *
284  * - Returns 0 if the node was found, ENOENT if it was not, and some other
285  *   positive errno value on failure.
286  */
287 static int
288 tarfs_lookup_path(struct tarfs_mount *tmp, char *name, size_t namelen,
289     char **endp, char **sepp, struct tarfs_node **retparent,
290     struct tarfs_node **retnode, boolean_t create_dirs)
291 {
292 	struct componentname cn = { };
293 	struct tarfs_node *parent, *tnp;
294 	char *sep;
295 	size_t len;
296 	int error;
297 	boolean_t do_lookup;
298 
299 	MPASS(name != NULL && namelen != 0);
300 
301 	do_lookup = true;
302 	error = 0;
303 	parent = tnp = tmp->root;
304 	if (tnp == NULL)
305 		panic("%s: root node not yet created", __func__);
306 
307 	TARFS_DPF(LOOKUP, "%s: Full path: %.*s\n", __func__, (int)namelen,
308 	    name);
309 
310 	sep = NULL;
311 	for (;;) {
312 		/* skip leading slash(es) */
313 		while (name[0] == '/' && namelen > 0)
314 			name++, namelen--;
315 
316 		/* did we reach the end? */
317 		if (namelen == 0 || name[0] == '\0') {
318 			name = do_lookup ? NULL : cn.cn_nameptr;
319 			namelen = do_lookup ? 0 : cn.cn_namelen;
320 			break;
321 		}
322 
323 		/* locate the next separator */
324 		for (sep = name, len = 0;
325 		     *sep != '\0' && *sep != '/' && len < namelen;
326 		     sep++, len++)
327 			/* nothing */ ;
328 
329 		/* check for . and .. */
330 		if (name[0] == '.' && len == 1) {
331 			name += len;
332 			namelen -= len;
333 			continue;
334 		}
335 		if (name[0] == '.' && name[1] == '.' && len == 2) {
336 			if (tnp == tmp->root) {
337 				error = EINVAL;
338 				break;
339 			}
340 			tnp = parent;
341 			parent = tnp->parent;
342 			name += len;
343 			namelen -= len;
344 			continue;
345 		}
346 
347 		/* create parent if necessary */
348 		if (!do_lookup) {
349 			TARFS_DPF(ALLOC, "%s: creating %.*s\n", __func__,
350 			    (int)cn.cn_namelen, cn.cn_nameptr);
351 			error = tarfs_alloc_node(tmp, cn.cn_nameptr,
352 			    cn.cn_namelen, VDIR, -1, 0, tmp->mtime, 0, 0,
353 			    DEFDIRMODE, 0, NULL, NODEV, parent, &tnp);
354 			if (error != 0)
355 				break;
356 		}
357 
358 		parent = tnp;
359 		tnp = NULL;
360 		cn.cn_nameptr = name;
361 		cn.cn_namelen = len;
362 		TARFS_DPF(LOOKUP, "%s: Search: %.*s\n", __func__,
363 		    (int)cn.cn_namelen, cn.cn_nameptr);
364 		if (do_lookup) {
365 			tnp = tarfs_lookup_node(parent, NULL, &cn);
366 			if (tnp == NULL) {
367 				do_lookup = false;
368 				if (!create_dirs)
369 					break;
370 			}
371 		}
372 		name += cn.cn_namelen;
373 		namelen -= cn.cn_namelen;
374 	}
375 
376 	TARFS_DPF(LOOKUP, "%s: Parent %p, node %p\n", __func__, parent, tnp);
377 
378 	if (retparent)
379 		*retparent = parent;
380 	if (retnode)
381 		*retnode = tnp;
382 	if (endp) {
383 		if (namelen > 0)
384 			*endp = name;
385 		else
386 			*endp = NULL;
387 	}
388 	if (sepp)
389 		*sepp = sep;
390 	return (error);
391 }
392 
393 /*
394  * Frees a tarfs_mount structure and everything it references.
395  */
396 static void
397 tarfs_free_mount(struct tarfs_mount *tmp)
398 {
399 	struct mount *mp;
400 	struct tarfs_node *tnp;
401 
402 	MPASS(tmp != NULL);
403 
404 	TARFS_DPF(ALLOC, "%s: Freeing mount structure %p\n", __func__, tmp);
405 
406 	TARFS_DPF(ALLOC, "%s: freeing tarfs_node structures\n", __func__);
407 	while (!TAILQ_EMPTY(&tmp->allnodes)) {
408 		tnp = TAILQ_FIRST(&tmp->allnodes);
409 		TAILQ_REMOVE(&tmp->allnodes, tnp, entries);
410 		tarfs_free_node(tnp);
411 	}
412 
413 	(void)tarfs_io_fini(tmp);
414 
415 	TARFS_DPF(ALLOC, "%s: deleting unr header\n", __func__);
416 	delete_unrhdr(tmp->ino_unr);
417 	mp = tmp->vfs;
418 	mp->mnt_data = NULL;
419 
420 	TARFS_DPF(ALLOC, "%s: freeing structure\n", __func__);
421 	free(tmp, M_TARFSMNT);
422 }
423 
424 /*
425  * Processes the tar file header at block offset blknump and allocates and
426  * populates a tarfs_node structure for the file it describes.  Updated
427  * blknump to point to the next unread tar file block, or TAR_EOF if EOF
428  * is reached.  Returns 0 on success or EOF and a positive errno value on
429  * failure.
430  */
431 static int
432 tarfs_alloc_one(struct tarfs_mount *tmp, off_t *blknump)
433 {
434 	char block[TARFS_BLOCKSIZE];
435 	struct ustar_header *hdrp = (struct ustar_header *)block;
436 	struct sbuf *namebuf = NULL;
437 	char *exthdr = NULL, *name = NULL, *link = NULL;
438 	off_t blknum = *blknump;
439 	int64_t num;
440 	int endmarker = 0;
441 	char *namep, *sep;
442 	struct tarfs_node *parent, *tnp;
443 	size_t namelen = 0, linklen = 0, realsize = 0, sz;
444 	ssize_t res;
445 	dev_t rdev;
446 	gid_t gid;
447 	mode_t mode;
448 	time_t mtime;
449 	uid_t uid;
450 	long major = -1, minor = -1;
451 	unsigned int flags = 0;
452 	int error;
453 	boolean_t sparse = false;
454 
455 again:
456 	/* read next header */
457 	res = tarfs_io_read_buf(tmp, false, block,
458 	    TARFS_BLOCKSIZE * blknum, TARFS_BLOCKSIZE);
459 	if (res < 0) {
460 		error = -res;
461 		goto bad;
462 	} else if (res < TARFS_BLOCKSIZE) {
463 		goto eof;
464 	}
465 	blknum++;
466 
467 	/* check for end marker */
468 	if (memcmp(block, zero_region, TARFS_BLOCKSIZE) == 0) {
469 		if (endmarker++) {
470 			if (exthdr != NULL) {
471 				TARFS_DPF(IO, "%s: orphaned extended header at %zu\n",
472 				    __func__, TARFS_BLOCKSIZE * (blknum - 1));
473 				free(exthdr, M_TEMP);
474 			}
475 			TARFS_DPF(IO, "%s: end of archive at %zu\n", __func__,
476 			    TARFS_BLOCKSIZE * blknum);
477 			tmp->nblocks = blknum;
478 			*blknump = TAR_EOF;
479 			return (0);
480 		}
481 		goto again;
482 	}
483 
484 	/* verify magic */
485 	if (memcmp(hdrp->magic, USTAR_MAGIC, sizeof(USTAR_MAGIC)) == 0 &&
486 	    memcmp(hdrp->version, USTAR_VERSION, sizeof(USTAR_VERSION)) == 0) {
487 		/* POSIX */
488 	} else if (memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0 &&
489 	    memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0) {
490 		TARFS_DPF(ALLOC, "%s: GNU tar format at %zu\n", __func__,
491 		    TARFS_BLOCKSIZE * (blknum - 1));
492 		error = EFTYPE;
493 		goto bad;
494 	} else {
495 		TARFS_DPF(ALLOC, "%s: unsupported TAR format at %zu\n",
496 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
497 		error = EINVAL;
498 		goto bad;
499 	}
500 
501 	/* verify checksum */
502 	if (!tarfs_checksum(hdrp)) {
503 		TARFS_DPF(ALLOC, "%s: header checksum failed at %zu\n",
504 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
505 		error = EINVAL;
506 		goto bad;
507 	}
508 
509 	/* get standard attributes */
510 	num = tarfs_str2int64(hdrp->mode, sizeof(hdrp->mode));
511 	if (num < 0 || num > ALLPERMS) {
512 		TARFS_DPF(ALLOC, "%s: invalid file mode at %zu\n",
513 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
514 		mode = S_IRUSR;
515 	} else {
516 		mode = num;
517 	}
518 	num = tarfs_str2int64(hdrp->uid, sizeof(hdrp->uid));
519 	if (num < 0 || num > UID_MAX) {
520 		TARFS_DPF(ALLOC, "%s: UID out of range at %zu\n",
521 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
522 		uid = tmp->root->uid;
523 		mode &= ~S_ISUID;
524 	} else {
525 		uid = num;
526 	}
527 	num = tarfs_str2int64(hdrp->gid, sizeof(hdrp->gid));
528 	if (num < 0 || num > GID_MAX) {
529 		TARFS_DPF(ALLOC, "%s: GID out of range at %zu\n",
530 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
531 		gid = tmp->root->gid;
532 		mode &= ~S_ISGID;
533 	} else {
534 		gid = num;
535 	}
536 	num = tarfs_str2int64(hdrp->size, sizeof(hdrp->size));
537 	if (num < 0) {
538 		TARFS_DPF(ALLOC, "%s: negative size at %zu\n",
539 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
540 		error = EINVAL;
541 		goto bad;
542 	} else {
543 		sz = num;
544 	}
545 	mtime = tarfs_str2int64(hdrp->mtime, sizeof(hdrp->mtime));
546 	rdev = NODEV;
547 	TARFS_DPF(ALLOC, "%s: [%c] %zu @%jd %o %d:%d\n", __func__,
548 	    hdrp->typeflag[0], sz, (intmax_t)mtime, mode, uid, gid);
549 
550 	/* extended header? */
551 	if (hdrp->typeflag[0] == TAR_TYPE_GLOBAL_EXTHDR) {
552 		printf("%s: unsupported global extended header at %zu\n",
553 		    __func__, (size_t)(TARFS_BLOCKSIZE * (blknum - 1)));
554 		error = EFTYPE;
555 		goto bad;
556 	}
557 	if (hdrp->typeflag[0] == TAR_TYPE_EXTHDR) {
558 		if (exthdr != NULL) {
559 			TARFS_DPF(IO, "%s: multiple extended headers at %zu\n",
560 			    __func__, TARFS_BLOCKSIZE * (blknum - 1));
561 			error = EFTYPE;
562 			goto bad;
563 		}
564 		/* read the contents of the exthdr */
565 		TARFS_DPF(ALLOC, "%s: %zu-byte extended header at %zd\n",
566 		    __func__, sz, TARFS_BLOCKSIZE * (blknum - 1));
567 		exthdr = malloc(sz, M_TEMP, M_WAITOK);
568 		res = tarfs_io_read_buf(tmp, false, exthdr,
569 		    TARFS_BLOCKSIZE * blknum, sz);
570 		if (res < 0) {
571 			error = -res;
572 			goto bad;
573 		}
574 		if (res < sz) {
575 			goto eof;
576 		}
577 		blknum += TARFS_SZ2BLKS(res);
578 		/* XXX TODO: refactor this parser */
579 		char *line = exthdr;
580 		while (line < exthdr + sz) {
581 			char *eol, *key, *value, *sep;
582 			size_t len = strtoul(line, &sep, 10);
583 			if (len == 0 || sep == line || *sep != ' ') {
584 				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
585 				    __func__);
586 				error = EINVAL;
587 				goto bad;
588 			}
589 			if (line + len > exthdr + sz) {
590 				TARFS_DPF(ALLOC, "%s: exthdr overflow\n",
591 				    __func__);
592 				error = EINVAL;
593 				goto bad;
594 			}
595 			eol = line + len - 1;
596 			*eol = '\0';
597 			line += len;
598 			key = sep + 1;
599 			sep = strchr(key, '=');
600 			if (sep == NULL) {
601 				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
602 				    __func__);
603 				error = EINVAL;
604 				goto bad;
605 			}
606 			*sep = '\0';
607 			value = sep + 1;
608 			TARFS_DPF(ALLOC, "%s: exthdr %s=%s\n", __func__,
609 			    key, value);
610 			if (strcmp(key, "linkpath") == 0) {
611 				link = value;
612 				linklen = eol - value;
613 			} else if (strcmp(key, "GNU.sparse.major") == 0) {
614 				sparse = true;
615 				major = strtol(value, &sep, 10);
616 				if (sep != eol) {
617 					printf("exthdr syntax error\n");
618 					error = EINVAL;
619 					goto bad;
620 				}
621 			} else if (strcmp(key, "GNU.sparse.minor") == 0) {
622 				sparse = true;
623 				minor = strtol(value, &sep, 10);
624 				if (sep != eol) {
625 					printf("exthdr syntax error\n");
626 					error = EINVAL;
627 					goto bad;
628 				}
629 			} else if (strcmp(key, "GNU.sparse.name") == 0) {
630 				sparse = true;
631 				name = value;
632 				namelen = eol - value;
633 				if (namelen == 0) {
634 					printf("exthdr syntax error\n");
635 					error = EINVAL;
636 					goto bad;
637 				}
638 			} else if (strcmp(key, "GNU.sparse.realsize") == 0) {
639 				sparse = true;
640 				realsize = strtoul(value, &sep, 10);
641 				if (sep != eol) {
642 					printf("exthdr syntax error\n");
643 					error = EINVAL;
644 					goto bad;
645 				}
646 			} else if (strcmp(key, "SCHILY.fflags") == 0) {
647 				flags |= tarfs_strtofflags(value, &sep);
648 				if (sep != eol) {
649 					printf("exthdr syntax error\n");
650 					error = EINVAL;
651 					goto bad;
652 				}
653 			}
654 		}
655 		goto again;
656 	}
657 
658 	/* sparse file consistency checks */
659 	if (sparse) {
660 		TARFS_DPF(ALLOC, "%s: %s: sparse %ld.%ld (%zu bytes)\n", __func__,
661 		    name, major, minor, realsize);
662 		if (major != 1 || minor != 0 || name == NULL || realsize == 0 ||
663 		    hdrp->typeflag[0] != TAR_TYPE_FILE) {
664 			TARFS_DPF(ALLOC, "%s: invalid sparse format\n", __func__);
665 			error = EINVAL;
666 			goto bad;
667 		}
668 	}
669 
670 	/* file name */
671 	if (name == NULL) {
672 		if (hdrp->prefix[0] != '\0') {
673 			namebuf = sbuf_new_auto();
674 			sbuf_printf(namebuf, "%.*s/%.*s",
675 			    (int)sizeof(hdrp->prefix), hdrp->prefix,
676 			    (int)sizeof(hdrp->name), hdrp->name);
677 			sbuf_finish(namebuf);
678 			name = sbuf_data(namebuf);
679 			namelen = sbuf_len(namebuf);
680 		} else {
681 			name = hdrp->name;
682 			namelen = strnlen(hdrp->name, sizeof(hdrp->name));
683 		}
684 	}
685 
686 	error = tarfs_lookup_path(tmp, name, namelen, &namep,
687 	    &sep, &parent, &tnp, true);
688 	if (error != 0)
689 		goto bad;
690 	if (tnp != NULL) {
691 		if (hdrp->typeflag[0] == TAR_TYPE_DIRECTORY) {
692 			/* XXX set attributes? */
693 			goto skip;
694 		}
695 		TARFS_DPF(ALLOC, "%s: duplicate file %.*s\n", __func__,
696 		    (int)namelen, name);
697 		error = EINVAL;
698 		goto bad;
699 	}
700 	switch (hdrp->typeflag[0]) {
701 	case TAR_TYPE_DIRECTORY:
702 		error = tarfs_alloc_node(tmp, namep, sep - namep, VDIR,
703 		    0, 0, mtime, uid, gid, mode, flags, NULL, 0,
704 		    parent, &tnp);
705 		break;
706 	case TAR_TYPE_FILE:
707 		error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
708 		    blknum * TARFS_BLOCKSIZE, sz, mtime, uid, gid, mode,
709 		    flags, NULL, 0, parent, &tnp);
710 		if (error == 0 && sparse) {
711 			error = tarfs_load_blockmap(tnp, realsize);
712 		}
713 		break;
714 	case TAR_TYPE_HARDLINK:
715 		if (link == NULL) {
716 			link = hdrp->linkname;
717 			linklen = strnlen(link, sizeof(hdrp->linkname));
718 		}
719 		error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
720 		    0, 0, 0, 0, 0, 0, 0, NULL, 0, parent, &tnp);
721 		if (error != 0) {
722 			goto bad;
723 		}
724 		error = tarfs_lookup_path(tmp, link, linklen, NULL,
725 		    NULL, NULL, &tnp->other, false);
726 		if (tnp->other == NULL ||
727 		    tnp->other->type != VREG ||
728 		    tnp->other->other != NULL) {
729 			TARFS_DPF(ALLOC, "%s: %.*s: dead hard link to %.*s\n",
730 			    __func__, (int)namelen, name, (int)linklen, link);
731 			error = EINVAL;
732 			goto bad;
733 		}
734 		break;
735 	case TAR_TYPE_SYMLINK:
736 		if (link == NULL) {
737 			link = hdrp->linkname;
738 			linklen = strnlen(link, sizeof(hdrp->linkname));
739 		}
740 		error = tarfs_alloc_node(tmp, namep, sep - namep, VLNK,
741 		    0, linklen, mtime, uid, gid, mode, flags, link, 0,
742 		    parent, &tnp);
743 		break;
744 	case TAR_TYPE_BLOCK:
745 		major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
746 		minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
747 		rdev = makedev(major, minor);
748 		error = tarfs_alloc_node(tmp, namep, sep - namep, VBLK,
749 		    0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
750 		    parent, &tnp);
751 		break;
752 	case TAR_TYPE_CHAR:
753 		major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
754 		minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
755 		rdev = makedev(major, minor);
756 		error = tarfs_alloc_node(tmp, namep, sep - namep, VCHR,
757 		    0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
758 		    parent, &tnp);
759 		break;
760 	default:
761 		TARFS_DPF(ALLOC, "%s: unsupported type %c for %.*s\n",
762 		    __func__, hdrp->typeflag[0], (int)namelen, name);
763 		error = EINVAL;
764 		break;
765 	}
766 	if (error != 0)
767 		goto bad;
768 
769 skip:
770 	blknum += TARFS_SZ2BLKS(sz);
771 	tmp->nblocks = blknum;
772 	*blknump = blknum;
773 	if (exthdr != NULL) {
774 		free(exthdr, M_TEMP);
775 	}
776 	if (namebuf != NULL) {
777 		sbuf_delete(namebuf);
778 	}
779 	return (0);
780 eof:
781 	TARFS_DPF(IO, "%s: premature end of file\n", __func__);
782 	error = EIO;
783 	goto bad;
784 bad:
785 	if (exthdr != NULL) {
786 		free(exthdr, M_TEMP);
787 	}
788 	if (namebuf != NULL) {
789 		sbuf_delete(namebuf);
790 	}
791 	return (error);
792 }
793 
794 /*
795  * Allocates and populates the metadata structures for the tar file
796  * referenced by vp.  On success, a pointer to the tarfs_mount structure
797  * is stored in tmpp.  Returns 0 on success or a positive errno value on
798  * failure.
799  */
800 static int
801 tarfs_alloc_mount(struct mount *mp, struct vnode *vp,
802     uid_t root_uid, gid_t root_gid, mode_t root_mode,
803     struct tarfs_mount **tmpp)
804 {
805 	struct vattr va;
806 	struct thread *td = curthread;
807 	struct tarfs_mount *tmp;
808 	struct tarfs_node *root;
809 	off_t blknum;
810 	time_t mtime;
811 	int error;
812 
813 	KASSERT(tmpp != NULL, ("tarfs mount return is NULL"));
814 	ASSERT_VOP_LOCKED(vp, __func__);
815 
816 	tmp = NULL;
817 
818 	TARFS_DPF(ALLOC, "%s: Allocating tarfs mount structure for vp %p\n",
819 	    __func__, vp);
820 
821 	/* Get source metadata */
822 	error = VOP_GETATTR(vp, &va, td->td_ucred);
823 	if (error != 0) {
824 		return (error);
825 	}
826 	VOP_UNLOCK(vp);
827 	mtime = va.va_mtime.tv_sec;
828 
829 	/* Allocate and initialize tarfs mount structure */
830 	tmp = malloc(sizeof(*tmp), M_TARFSMNT, M_WAITOK | M_ZERO);
831 	TARFS_DPF(ALLOC, "%s: Allocated mount structure\n", __func__);
832 	mp->mnt_data = tmp;
833 
834 	mtx_init(&tmp->allnode_lock, "tarfs allnode lock", NULL,
835 	    MTX_DEF);
836 	TAILQ_INIT(&tmp->allnodes);
837 	tmp->ino_unr = new_unrhdr(TARFS_MININO, INT_MAX, &tmp->allnode_lock);
838 	tmp->vp = vp;
839 	tmp->vfs = mp;
840 	tmp->mtime = mtime;
841 
842 	/*
843 	 * XXX The decompression layer passes everything through the
844 	 * buffer cache, and the buffer cache wants to know our blocksize,
845 	 * but mnt_stat normally isn't populated until after we return, so
846 	 * we have to cheat a bit.
847 	 */
848 	tmp->iosize = 1U << tarfs_ioshift;
849 	mp->mnt_stat.f_iosize = tmp->iosize;
850 
851 	/* Initialize decompression layer */
852 	error = tarfs_io_init(tmp);
853 	if (error != 0)
854 		goto bad;
855 
856 	error = tarfs_alloc_node(tmp, NULL, 0, VDIR, 0, 0, mtime, root_uid,
857 	    root_gid, root_mode & ALLPERMS, 0, NULL, NODEV, NULL, &root);
858 	if (error != 0 || root == NULL)
859 		goto bad;
860 	tmp->root = root;
861 
862 	blknum = 0;
863 	do {
864 		if ((error = tarfs_alloc_one(tmp, &blknum)) != 0) {
865 			goto bad;
866 		}
867 	} while (blknum != TAR_EOF);
868 
869 	*tmpp = tmp;
870 
871 	TARFS_DPF(ALLOC, "%s: pfsmnt_root %p\n", __func__, tmp->root);
872 	return (0);
873 
874 bad:
875 	tarfs_free_mount(tmp);
876 	return (error);
877 }
878 
879 /*
880  * VFS Operations.
881  */
882 
883 static int
884 tarfs_mount(struct mount *mp)
885 {
886 	struct nameidata nd;
887 	struct vattr va;
888 	struct tarfs_mount *tmp = NULL;
889 	struct thread *td = curthread;
890 	struct vnode *vp;
891 	char *from;
892 	uid_t root_uid;
893 	gid_t root_gid;
894 	mode_t root_mode;
895 	int error, flags, len;
896 
897 	if (mp->mnt_flag & MNT_UPDATE)
898 		return (EOPNOTSUPP);
899 
900 	if (vfs_filteropt(mp->mnt_optnew, tarfs_opts))
901 		return (EINVAL);
902 
903 	vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY);
904 	error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred);
905 	VOP_UNLOCK(mp->mnt_vnodecovered);
906 	if (error)
907 		return (error);
908 
909 	if (mp->mnt_cred->cr_ruid != 0 ||
910 	    vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1)
911 		root_gid = va.va_gid;
912 	if (mp->mnt_cred->cr_ruid != 0 ||
913 	    vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1)
914 		root_uid = va.va_uid;
915 	if (mp->mnt_cred->cr_ruid != 0 ||
916 	    vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1)
917 		root_mode = va.va_mode;
918 
919 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
920 	if (error != 0 || from[len - 1] != '\0')
921 		return (EINVAL);
922 
923 	/* Find the source tarball */
924 	TARFS_DPF(FS, "%s(%s, uid=%u, gid=%u, mode=%o)\n", __func__,
925 	    from, root_uid, root_gid, root_mode);
926 	flags = FREAD;
927 	if (vfs_flagopt(mp->mnt_optnew, "verify", NULL, 0)) {
928 	    flags |= O_VERIFY;
929 	}
930 	NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF, UIO_SYSSPACE, from);
931 	error = namei(&nd);
932 	if (error != 0)
933 		return (error);
934 	NDFREE_PNBUF(&nd);
935 	vp = nd.ni_vp;
936 	TARFS_DPF(FS, "%s: N: hold %u use %u lock 0x%x\n", __func__,
937 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
938 	/* vp is now held and locked */
939 
940 	/* Open the source tarball */
941 	error = vn_open_vnode(vp, flags, td->td_ucred, td, NULL);
942 	if (error != 0) {
943 		TARFS_DPF(FS, "%s: failed to open %s: %d\n", __func__,
944 		    from, error);
945 		vput(vp);
946 		goto bad;
947 	}
948 	TARFS_DPF(FS, "%s: O: hold %u use %u lock 0x%x\n", __func__,
949 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
950 	if (vp->v_type != VREG) {
951 		TARFS_DPF(FS, "%s: not a regular file\n", __func__);
952 		error = EOPNOTSUPP;
953 		goto bad_open_locked;
954 	}
955 	error = priv_check(td, PRIV_VFS_MOUNT_PERM);
956 	if (error != 0) {
957 		TARFS_DPF(FS, "%s: not permitted to mount\n", __func__);
958 		goto bad_open_locked;
959 	}
960 	if (flags & O_VERIFY) {
961 		mp->mnt_flag |= MNT_VERIFIED;
962 	}
963 
964 	/* Allocate the tarfs mount */
965 	error = tarfs_alloc_mount(mp, vp, root_uid, root_gid, root_mode, &tmp);
966 	/* vp is now held but unlocked */
967 	if (error != 0) {
968 		TARFS_DPF(FS, "%s: failed to mount %s: %d\n", __func__,
969 		    from, error);
970 		goto bad_open_unlocked;
971 	}
972 	TARFS_DPF(FS, "%s: M: hold %u use %u lock 0x%x\n", __func__,
973 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
974 
975 	/* Unconditionally mount as read-only */
976 	MNT_ILOCK(mp);
977 	mp->mnt_flag |= (MNT_LOCAL | MNT_RDONLY);
978 	MNT_IUNLOCK(mp);
979 
980 	vfs_getnewfsid(mp);
981 	vfs_mountedfrom(mp, "tarfs");
982 	TARFS_DPF(FS, "%s: success\n", __func__);
983 
984 	return (0);
985 
986 bad_open_locked:
987 	/* vp must be held and locked */
988 	TARFS_DPF(FS, "%s: L: hold %u use %u lock 0x%x\n", __func__,
989 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
990 	VOP_UNLOCK(vp);
991 bad_open_unlocked:
992 	/* vp must be held and unlocked */
993 	TARFS_DPF(FS, "%s: E: hold %u use %u lock 0x%x\n", __func__,
994 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
995 	(void)vn_close(vp, flags, td->td_ucred, td);
996 bad:
997 	/* vp must be released and unlocked */
998 	TARFS_DPF(FS, "%s: X: hold %u use %u lock 0x%x\n", __func__,
999 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1000 	return (error);
1001 }
1002 
1003 /*
1004  * Unmounts a tarfs filesystem.
1005  */
1006 static int
1007 tarfs_unmount(struct mount *mp, int mntflags)
1008 {
1009 	struct thread *td = curthread;
1010 	struct tarfs_mount *tmp;
1011 	struct vnode *vp;
1012 	int error;
1013 	int flags = 0;
1014 
1015 	TARFS_DPF(FS, "%s: Unmounting %p\n", __func__, mp);
1016 
1017 	/* Handle forced unmounts */
1018 	if (mntflags & MNT_FORCE)
1019 		flags |= FORCECLOSE;
1020 
1021 	/* Finalize all pending I/O */
1022 	error = vflush(mp, 0, flags, curthread);
1023 	if (error != 0)
1024 		return (error);
1025 	tmp = MP_TO_TARFS_MOUNT(mp);
1026 	vp = tmp->vp;
1027 
1028 	MPASS(vp != NULL);
1029 	TARFS_DPF(FS, "%s: U: hold %u use %u lock 0x%x\n", __func__,
1030 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1031 	vn_close(vp, FREAD, td->td_ucred, td);
1032 	TARFS_DPF(FS, "%s: C: hold %u use %u lock 0x%x\n", __func__,
1033 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1034 	tarfs_free_mount(tmp);
1035 
1036 	return (0);
1037 }
1038 
1039 /*
1040  * Gets the root of a tarfs filesystem.  Returns 0 on success or a
1041  * positive errno value on failure.
1042  */
1043 static int
1044 tarfs_root(struct mount *mp, int flags, struct vnode **vpp)
1045 {
1046 	struct vnode *nvp;
1047 	int error;
1048 
1049 	TARFS_DPF(FS, "%s: Getting root vnode\n", __func__);
1050 
1051 	error = VFS_VGET(mp, TARFS_ROOTINO, LK_EXCLUSIVE, &nvp);
1052 	if (error != 0)
1053 		return (error);
1054 
1055 	nvp->v_vflag |= VV_ROOT;
1056 	*vpp = nvp;
1057 	return (0);
1058 }
1059 
1060 /*
1061  * Gets statistics for a tarfs filesystem.  Returns 0.
1062  */
1063 static int
1064 tarfs_statfs(struct mount *mp, struct statfs *sbp)
1065 {
1066 	struct tarfs_mount *tmp;
1067 
1068 	tmp = MP_TO_TARFS_MOUNT(mp);
1069 
1070 	sbp->f_bsize = TARFS_BLOCKSIZE;
1071 	sbp->f_iosize = tmp->iosize;
1072 	sbp->f_blocks = tmp->nblocks;
1073 	sbp->f_bfree = 0;
1074 	sbp->f_bavail = 0;
1075 	sbp->f_files = tmp->nfiles;
1076 	sbp->f_ffree = 0;
1077 
1078 	return (0);
1079 }
1080 
1081 /*
1082  * Gets a vnode for the given inode.  On success, a pointer to the vnode
1083  * is stored in vpp.  Returns 0 on success or a positive errno value on
1084  * failure.
1085  */
1086 static int
1087 tarfs_vget(struct mount *mp, ino_t ino, int lkflags, struct vnode **vpp)
1088 {
1089 	struct tarfs_mount *tmp;
1090 	struct tarfs_node *tnp;
1091 	struct thread *td;
1092 	struct vnode *vp;
1093 	int error;
1094 
1095 	TARFS_DPF(FS, "%s: mp %p, ino %lu, lkflags %d\n", __func__, mp, ino,
1096 	    lkflags);
1097 
1098 	td = curthread;
1099 	error = vfs_hash_get(mp, ino, lkflags, td, vpp, NULL, NULL);
1100 	if (error != 0)
1101 		return (error);
1102 
1103 	if (*vpp != NULL) {
1104 		TARFS_DPF(FS, "%s: found hashed vnode %p\n", __func__, *vpp);
1105 		return (error);
1106 	}
1107 
1108 	TARFS_DPF(FS, "%s: no hashed vnode for inode %lu\n", __func__, ino);
1109 
1110 	tmp = MP_TO_TARFS_MOUNT(mp);
1111 
1112 	if (ino == TARFS_ZIOINO) {
1113 		error = vget(tmp->znode, lkflags);
1114 		if (error != 0)
1115 			return (error);
1116 		*vpp = tmp->znode;
1117 		return (0);
1118 	}
1119 
1120 	/* XXX Should use hash instead? */
1121 	TAILQ_FOREACH(tnp, &tmp->allnodes, entries) {
1122 		if (tnp->ino == ino)
1123 			break;
1124 	}
1125 	TARFS_DPF(FS, "%s: search of all nodes found %p\n", __func__, tnp);
1126 	if (tnp == NULL)
1127 		return (ENOENT);
1128 
1129 	(void)getnewvnode("tarfs", mp, &tarfs_vnodeops, &vp);
1130 	TARFS_DPF(FS, "%s: allocated vnode\n", __func__);
1131 	vp->v_data = tnp;
1132 	vp->v_type = tnp->type;
1133 	tnp->vnode = vp;
1134 
1135 	lockmgr(vp->v_vnlock, lkflags, NULL);
1136 	error = insmntque(vp, mp);
1137 	if (error != 0)
1138 		goto bad;
1139 	TARFS_DPF(FS, "%s: inserting entry into VFS hash\n", __func__);
1140 	error = vfs_hash_insert(vp, ino, lkflags, td, vpp, NULL, NULL);
1141 	if (error != 0 || *vpp != NULL)
1142 		return (error);
1143 
1144 	vn_set_state(vp, VSTATE_CONSTRUCTED);
1145 	*vpp = vp;
1146 	return (0);
1147 
1148 bad:
1149 	*vpp = NULLVP;
1150 	return (error);
1151 }
1152 
1153 static int
1154 tarfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
1155 {
1156 	struct tarfs_node *tnp;
1157 	struct tarfs_fid *tfp;
1158 	struct vnode *nvp;
1159 	int error;
1160 
1161 	tfp = (struct tarfs_fid *)fhp;
1162 	MP_TO_TARFS_MOUNT(mp);
1163 	if (tfp->ino < TARFS_ROOTINO || tfp->ino > INT_MAX)
1164 		return (ESTALE);
1165 
1166 	error = VFS_VGET(mp, tfp->ino, LK_EXCLUSIVE, &nvp);
1167 	if (error != 0) {
1168 		*vpp = NULLVP;
1169 		return (error);
1170 	}
1171 	tnp = VP_TO_TARFS_NODE(nvp);
1172 	if (tnp->mode == 0 ||
1173 	    tnp->gen != tfp->gen ||
1174 	    tnp->nlink <= 0) {
1175 		vput(nvp);
1176 		*vpp = NULLVP;
1177 		return (ESTALE);
1178 	}
1179 	*vpp = nvp;
1180 	return (0);
1181 }
1182 
1183 static struct vfsops tarfs_vfsops = {
1184 	.vfs_fhtovp =	tarfs_fhtovp,
1185 	.vfs_mount =	tarfs_mount,
1186 	.vfs_root =	tarfs_root,
1187 	.vfs_statfs =	tarfs_statfs,
1188 	.vfs_unmount =	tarfs_unmount,
1189 	.vfs_vget =	tarfs_vget,
1190 };
1191 VFS_SET(tarfs_vfsops, tarfs, VFCF_READONLY);
1192 MODULE_VERSION(tarfs, 1);
1193 MODULE_DEPEND(tarfs, xz, 1, 1, 1);
1194