1 /*	$NetBSD: union_vnops.c,v 1.63 2015/04/20 23:03:08 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 1992, 1993, 1994, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Jan-Simon Pendry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35  */
36 
37 /*
38  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39  *
40  * This code is derived from software contributed to Berkeley by
41  * Jan-Simon Pendry.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *	This product includes software developed by the University of
54  *	California, Berkeley and its contributors.
55  * 4. Neither the name of the University nor the names of its contributors
56  *    may be used to endorse or promote products derived from this software
57  *    without specific prior written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  * SUCH DAMAGE.
70  *
71  *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72  */
73 
74 #include <sys/cdefs.h>
75 __KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.63 2015/04/20 23:03:08 riastradh Exp $");
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/proc.h>
80 #include <sys/file.h>
81 #include <sys/time.h>
82 #include <sys/stat.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
85 #include <sys/namei.h>
86 #include <sys/malloc.h>
87 #include <sys/buf.h>
88 #include <sys/queue.h>
89 #include <sys/lock.h>
90 #include <sys/kauth.h>
91 
92 #include <fs/union/union.h>
93 #include <miscfs/genfs/genfs.h>
94 #include <miscfs/specfs/specdev.h>
95 
96 int union_lookup(void *);
97 int union_create(void *);
98 int union_whiteout(void *);
99 int union_mknod(void *);
100 int union_open(void *);
101 int union_close(void *);
102 int union_access(void *);
103 int union_getattr(void *);
104 int union_setattr(void *);
105 int union_read(void *);
106 int union_write(void *);
107 int union_ioctl(void *);
108 int union_poll(void *);
109 int union_revoke(void *);
110 int union_mmap(void *);
111 int union_fsync(void *);
112 int union_seek(void *);
113 int union_remove(void *);
114 int union_link(void *);
115 int union_rename(void *);
116 int union_mkdir(void *);
117 int union_rmdir(void *);
118 int union_symlink(void *);
119 int union_readdir(void *);
120 int union_readlink(void *);
121 int union_abortop(void *);
122 int union_inactive(void *);
123 int union_reclaim(void *);
124 int union_lock(void *);
125 int union_unlock(void *);
126 int union_bmap(void *);
127 int union_print(void *);
128 int union_islocked(void *);
129 int union_pathconf(void *);
130 int union_advlock(void *);
131 int union_strategy(void *);
132 int union_bwrite(void *);
133 int union_getpages(void *);
134 int union_putpages(void *);
135 int union_kqfilter(void *);
136 
137 static int union_lookup1(struct vnode *, struct vnode **,
138 			      struct vnode **, struct componentname *);
139 
140 
141 /*
142  * Global vfs data structures
143  */
144 int (**union_vnodeop_p)(void *);
145 const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
146 	{ &vop_default_desc, vn_default_error },
147 	{ &vop_lookup_desc, union_lookup },		/* lookup */
148 	{ &vop_create_desc, union_create },		/* create */
149 	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
150 	{ &vop_mknod_desc, union_mknod },		/* mknod */
151 	{ &vop_open_desc, union_open },			/* open */
152 	{ &vop_close_desc, union_close },		/* close */
153 	{ &vop_access_desc, union_access },		/* access */
154 	{ &vop_getattr_desc, union_getattr },		/* getattr */
155 	{ &vop_setattr_desc, union_setattr },		/* setattr */
156 	{ &vop_read_desc, union_read },			/* read */
157 	{ &vop_write_desc, union_write },		/* write */
158 	{ &vop_fallocate_desc, genfs_eopnotsupp },	/* fallocate */
159 	{ &vop_fdiscard_desc, genfs_eopnotsupp },	/* fdiscard */
160 	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
161 	{ &vop_poll_desc, union_poll },			/* select */
162 	{ &vop_revoke_desc, union_revoke },		/* revoke */
163 	{ &vop_mmap_desc, union_mmap },			/* mmap */
164 	{ &vop_fsync_desc, union_fsync },		/* fsync */
165 	{ &vop_seek_desc, union_seek },			/* seek */
166 	{ &vop_remove_desc, union_remove },		/* remove */
167 	{ &vop_link_desc, union_link },			/* link */
168 	{ &vop_rename_desc, union_rename },		/* rename */
169 	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
170 	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
171 	{ &vop_symlink_desc, union_symlink },		/* symlink */
172 	{ &vop_readdir_desc, union_readdir },		/* readdir */
173 	{ &vop_readlink_desc, union_readlink },		/* readlink */
174 	{ &vop_abortop_desc, union_abortop },		/* abortop */
175 	{ &vop_inactive_desc, union_inactive },		/* inactive */
176 	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
177 	{ &vop_lock_desc, union_lock },			/* lock */
178 	{ &vop_unlock_desc, union_unlock },		/* unlock */
179 	{ &vop_bmap_desc, union_bmap },			/* bmap */
180 	{ &vop_strategy_desc, union_strategy },		/* strategy */
181 	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
182 	{ &vop_print_desc, union_print },		/* print */
183 	{ &vop_islocked_desc, union_islocked },		/* islocked */
184 	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
185 	{ &vop_advlock_desc, union_advlock },		/* advlock */
186 	{ &vop_getpages_desc, union_getpages },		/* getpages */
187 	{ &vop_putpages_desc, union_putpages },		/* putpages */
188 	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
189 	{ NULL, NULL }
190 };
191 const struct vnodeopv_desc union_vnodeop_opv_desc =
192 	{ &union_vnodeop_p, union_vnodeop_entries };
193 
194 #define NODE_IS_SPECIAL(vp) \
195 	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
196 	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
197 
198 static int
union_lookup1(struct vnode * udvp,struct vnode ** dvpp,struct vnode ** vpp,struct componentname * cnp)199 union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
200 	struct componentname *cnp)
201 {
202 	int error;
203 	struct vnode *tdvp;
204 	struct vnode *dvp;
205 	struct mount *mp;
206 
207 	dvp = *dvpp;
208 
209 	/*
210 	 * If stepping up the directory tree, check for going
211 	 * back across the mount point, in which case do what
212 	 * lookup would do by stepping back down the mount
213 	 * hierarchy.
214 	 */
215 	if (cnp->cn_flags & ISDOTDOT) {
216 		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
217 			/*
218 			 * Don't do the NOCROSSMOUNT check
219 			 * at this level.  By definition,
220 			 * union fs deals with namespaces, not
221 			 * filesystems.
222 			 */
223 			tdvp = dvp;
224 			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
225 			VOP_UNLOCK(tdvp);
226 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
227 		}
228 	}
229 
230         error = VOP_LOOKUP(dvp, &tdvp, cnp);
231 	if (error)
232 		return (error);
233 	if (dvp != tdvp) {
234 		if (cnp->cn_flags & ISDOTDOT)
235 			VOP_UNLOCK(dvp);
236 		error = vn_lock(tdvp, LK_EXCLUSIVE);
237 		if (cnp->cn_flags & ISDOTDOT)
238 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
239 		if (error) {
240 			vrele(tdvp);
241 			return error;
242 		}
243 		dvp = tdvp;
244 	}
245 
246 	/*
247 	 * Lastly check if the current node is a mount point in
248 	 * which case walk up the mount hierarchy making sure not to
249 	 * bump into the root of the mount tree (ie. dvp != udvp).
250 	 */
251 	while (dvp != udvp && (dvp->v_type == VDIR) &&
252 	       (mp = dvp->v_mountedhere)) {
253 		if (vfs_busy(mp, NULL))
254 			continue;
255 		vput(dvp);
256 		error = VFS_ROOT(mp, &tdvp);
257 		vfs_unbusy(mp, false, NULL);
258 		if (error) {
259 			return (error);
260 		}
261 		dvp = tdvp;
262 	}
263 
264 	*vpp = dvp;
265 	return (0);
266 }
267 
268 int
union_lookup(void * v)269 union_lookup(void *v)
270 {
271 	struct vop_lookup_v2_args /* {
272 		struct vnodeop_desc *a_desc;
273 		struct vnode *a_dvp;
274 		struct vnode **a_vpp;
275 		struct componentname *a_cnp;
276 	} */ *ap = v;
277 	int error;
278 	int uerror, lerror;
279 	struct vnode *uppervp, *lowervp;
280 	struct vnode *upperdvp, *lowerdvp;
281 	struct vnode *dvp = ap->a_dvp;
282 	struct union_node *dun = VTOUNION(dvp);
283 	struct componentname *cnp = ap->a_cnp;
284 	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
285 	kauth_cred_t saved_cred = NULL;
286 	int iswhiteout;
287 	struct vattr va;
288 
289 #ifdef notyet
290 	if (cnp->cn_namelen == 3 &&
291 			cnp->cn_nameptr[2] == '.' &&
292 			cnp->cn_nameptr[1] == '.' &&
293 			cnp->cn_nameptr[0] == '.') {
294 		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
295 		if (dvp == NULLVP)
296 			return (ENOENT);
297 		vref(dvp);
298 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
299 		return (0);
300 	}
301 #endif
302 
303 	if ((cnp->cn_flags & ISLASTCN) &&
304 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
305 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
306 		return (EROFS);
307 
308 start:
309 	upperdvp = dun->un_uppervp;
310 	lowerdvp = dun->un_lowervp;
311 	uppervp = NULLVP;
312 	lowervp = NULLVP;
313 	iswhiteout = 0;
314 
315 	/*
316 	 * do the lookup in the upper level.
317 	 * if that level comsumes additional pathnames,
318 	 * then assume that something special is going
319 	 * on and just return that vnode.
320 	 */
321 	if (upperdvp != NULLVP) {
322 		uerror = union_lookup1(um->um_uppervp, &upperdvp,
323 					&uppervp, cnp);
324 		if (cnp->cn_consume != 0) {
325 			if (uppervp != upperdvp)
326 				VOP_UNLOCK(uppervp);
327 			*ap->a_vpp = uppervp;
328 			return (uerror);
329 		}
330 		if (uerror == ENOENT || uerror == EJUSTRETURN) {
331 			if (cnp->cn_flags & ISWHITEOUT) {
332 				iswhiteout = 1;
333 			} else if (lowerdvp != NULLVP) {
334 				lerror = VOP_GETATTR(upperdvp, &va,
335 					cnp->cn_cred);
336 				if (lerror == 0 && (va.va_flags & OPAQUE))
337 					iswhiteout = 1;
338 			}
339 		}
340 	} else {
341 		uerror = ENOENT;
342 	}
343 
344 	/*
345 	 * in a similar way to the upper layer, do the lookup
346 	 * in the lower layer.   this time, if there is some
347 	 * component magic going on, then vput whatever we got
348 	 * back from the upper layer and return the lower vnode
349 	 * instead.
350 	 */
351 	if (lowerdvp != NULLVP && !iswhiteout) {
352 		int nameiop;
353 
354 		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
355 
356 		/*
357 		 * Only do a LOOKUP on the bottom node, since
358 		 * we won't be making changes to it anyway.
359 		 */
360 		nameiop = cnp->cn_nameiop;
361 		cnp->cn_nameiop = LOOKUP;
362 		if (um->um_op == UNMNT_BELOW) {
363 			saved_cred = cnp->cn_cred;
364 			cnp->cn_cred = um->um_cred;
365 		}
366 
367 		/*
368 		 * we shouldn't have to worry about locking interactions
369 		 * between the lower layer and our union layer (w.r.t.
370 		 * `..' processing) because we don't futz with lowervp
371 		 * locks in the union-node instantiation code path.
372 		 */
373 		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
374 				&lowervp, cnp);
375 		if (um->um_op == UNMNT_BELOW)
376 			cnp->cn_cred = saved_cred;
377 		cnp->cn_nameiop = nameiop;
378 
379 		if (lowervp != lowerdvp)
380 			VOP_UNLOCK(lowerdvp);
381 
382 		if (cnp->cn_consume != 0) {
383 			if (uppervp != NULLVP) {
384 				if (uppervp == upperdvp)
385 					vrele(uppervp);
386 				else
387 					vput(uppervp);
388 				uppervp = NULLVP;
389 			}
390 			*ap->a_vpp = lowervp;
391 			return (lerror);
392 		}
393 	} else {
394 		lerror = ENOENT;
395 		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
396 			lowervp = LOWERVP(dun->un_pvp);
397 			if (lowervp != NULLVP) {
398 				vref(lowervp);
399 				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
400 				lerror = 0;
401 			}
402 		}
403 	}
404 
405 	/*
406 	 * EJUSTRETURN is used by underlying filesystems to indicate that
407 	 * a directory modification op was started successfully.
408 	 * This will only happen in the upper layer, since
409 	 * the lower layer only does LOOKUPs.
410 	 * If this union is mounted read-only, bounce it now.
411 	 */
412 
413 	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
414 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
415 	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
416 		uerror = EROFS;
417 
418 	/*
419 	 * at this point, we have uerror and lerror indicating
420 	 * possible errors with the lookups in the upper and lower
421 	 * layers.  additionally, uppervp and lowervp are (locked)
422 	 * references to existing vnodes in the upper and lower layers.
423 	 *
424 	 * there are now three cases to consider.
425 	 * 1. if both layers returned an error, then return whatever
426 	 *    error the upper layer generated.
427 	 *
428 	 * 2. if the top layer failed and the bottom layer succeeded
429 	 *    then two subcases occur.
430 	 *    a.  the bottom vnode is not a directory, in which
431 	 *	  case just return a new union vnode referencing
432 	 *	  an empty top layer and the existing bottom layer.
433 	 *    b.  the bottom vnode is a directory, in which case
434 	 *	  create a new directory in the top-level and
435 	 *	  continue as in case 3.
436 	 *
437 	 * 3. if the top layer succeeded then return a new union
438 	 *    vnode referencing whatever the new top layer and
439 	 *    whatever the bottom layer returned.
440 	 */
441 
442 	*ap->a_vpp = NULLVP;
443 
444 
445 	/* case 1. */
446 	if ((uerror != 0) && (lerror != 0)) {
447 		return (uerror);
448 	}
449 
450 	/* case 2. */
451 	if (uerror != 0 /* && (lerror == 0) */ ) {
452 		if (lowervp->v_type == VDIR) { /* case 2b. */
453 			/*
454 			 * We may be racing another process to make the
455 			 * upper-level shadow directory.  Be careful with
456 			 * locks/etc!
457 			 * If we have to create a shadow directory and want
458 			 * to commit the node we have to restart the lookup
459 			 * to get the componentname right.
460 			 */
461 			if (upperdvp) {
462 				VOP_UNLOCK(upperdvp);
463 				uerror = union_mkshadow(um, upperdvp, cnp,
464 				    &uppervp);
465 				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
466 				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
467 					vrele(uppervp);
468 					if (lowervp != NULLVP)
469 						vput(lowervp);
470 					goto start;
471 				}
472 			}
473 			if (uerror) {
474 				if (lowervp != NULLVP) {
475 					vput(lowervp);
476 					lowervp = NULLVP;
477 				}
478 				return (uerror);
479 			}
480 		}
481 	} else { /* uerror == 0 */
482 		if (uppervp != upperdvp)
483 			VOP_UNLOCK(uppervp);
484 	}
485 
486 	if (lowervp != NULLVP)
487 		VOP_UNLOCK(lowervp);
488 
489 	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
490 			      uppervp, lowervp, 1);
491 
492 	if (error) {
493 		if (uppervp != NULLVP)
494 			vrele(uppervp);
495 		if (lowervp != NULLVP)
496 			vrele(lowervp);
497 		return error;
498 	}
499 
500 	return 0;
501 }
502 
503 int
union_create(void * v)504 union_create(void *v)
505 {
506 	struct vop_create_v3_args /* {
507 		struct vnode *a_dvp;
508 		struct vnode **a_vpp;
509 		struct componentname *a_cnp;
510 		struct vattr *a_vap;
511 	} */ *ap = v;
512 	struct union_node *un = VTOUNION(ap->a_dvp);
513 	struct vnode *dvp = un->un_uppervp;
514 	struct componentname *cnp = ap->a_cnp;
515 
516 	if (dvp != NULLVP) {
517 		int error;
518 		struct vnode *vp;
519 		struct mount *mp;
520 
521 		mp = ap->a_dvp->v_mount;
522 
523 		vp = NULL;
524 		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
525 		if (error)
526 			return (error);
527 
528 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
529 				NULLVP, 1);
530 		if (error)
531 			vrele(vp);
532 		return (error);
533 	}
534 
535 	return (EROFS);
536 }
537 
538 int
union_whiteout(void * v)539 union_whiteout(void *v)
540 {
541 	struct vop_whiteout_args /* {
542 		struct vnode *a_dvp;
543 		struct componentname *a_cnp;
544 		int a_flags;
545 	} */ *ap = v;
546 	struct union_node *un = VTOUNION(ap->a_dvp);
547 	struct componentname *cnp = ap->a_cnp;
548 
549 	if (un->un_uppervp == NULLVP)
550 		return (EOPNOTSUPP);
551 
552 	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
553 }
554 
555 int
union_mknod(void * v)556 union_mknod(void *v)
557 {
558 	struct vop_mknod_v3_args /* {
559 		struct vnode *a_dvp;
560 		struct vnode **a_vpp;
561 		struct componentname *a_cnp;
562 		struct vattr *a_vap;
563 	} */ *ap = v;
564 	struct union_node *un = VTOUNION(ap->a_dvp);
565 	struct vnode *dvp = un->un_uppervp;
566 	struct componentname *cnp = ap->a_cnp;
567 
568 	if (dvp != NULLVP) {
569 		int error;
570 		struct vnode *vp;
571 		struct mount *mp;
572 
573 		mp = ap->a_dvp->v_mount;
574 		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
575 		if (error)
576 			return (error);
577 
578 		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
579 				      cnp, vp, NULLVP, 1);
580 		if (error)
581 			vrele(vp);
582 		return (error);
583 	}
584 
585 	return (EROFS);
586 }
587 
588 int
union_open(void * v)589 union_open(void *v)
590 {
591 	struct vop_open_args /* {
592 		struct vnodeop_desc *a_desc;
593 		struct vnode *a_vp;
594 		int a_mode;
595 		kauth_cred_t a_cred;
596 	} */ *ap = v;
597 	struct union_node *un = VTOUNION(ap->a_vp);
598 	struct vnode *tvp;
599 	int mode = ap->a_mode;
600 	kauth_cred_t cred = ap->a_cred;
601 	struct lwp *l = curlwp;
602 	int error;
603 
604 	/*
605 	 * If there is an existing upper vp then simply open that.
606 	 */
607 	tvp = un->un_uppervp;
608 	if (tvp == NULLVP) {
609 		/*
610 		 * If the lower vnode is being opened for writing, then
611 		 * copy the file contents to the upper vnode and open that,
612 		 * otherwise can simply open the lower vnode.
613 		 */
614 		tvp = un->un_lowervp;
615 		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
616 			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
617 			if (error == 0)
618 				error = VOP_OPEN(un->un_uppervp, mode, cred);
619 			return (error);
620 		}
621 
622 		/*
623 		 * Just open the lower vnode, but check for nodev mount flag
624 		 */
625 		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
626 		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
627 			return ENXIO;
628 		un->un_openl++;
629 		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
630 		error = VOP_OPEN(tvp, mode, cred);
631 		VOP_UNLOCK(tvp);
632 
633 		return (error);
634 	}
635 	/*
636 	 * Just open the upper vnode, checking for nodev mount flag first
637 	 */
638 	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
639 	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
640 		return ENXIO;
641 
642 	error = VOP_OPEN(tvp, mode, cred);
643 
644 	return (error);
645 }
646 
647 int
union_close(void * v)648 union_close(void *v)
649 {
650 	struct vop_close_args /* {
651 		struct vnode *a_vp;
652 		int  a_fflag;
653 		kauth_cred_t a_cred;
654 	} */ *ap = v;
655 	struct union_node *un = VTOUNION(ap->a_vp);
656 	struct vnode *vp;
657 	int error;
658 	bool do_lock;
659 
660 	vp = un->un_uppervp;
661 	if (vp != NULLVP) {
662 		do_lock = false;
663 	} else {
664 		KASSERT(un->un_openl > 0);
665 		--un->un_openl;
666 		vp = un->un_lowervp;
667 		do_lock = true;
668 	}
669 
670 	KASSERT(vp != NULLVP);
671 	ap->a_vp = vp;
672 	if (do_lock)
673 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
674 	error = VCALL(vp, VOFFSET(vop_close), ap);
675 	if (do_lock)
676 		VOP_UNLOCK(vp);
677 
678 	return error;
679 }
680 
681 /*
682  * Check access permission on the union vnode.
683  * The access check being enforced is to check
684  * against both the underlying vnode, and any
685  * copied vnode.  This ensures that no additional
686  * file permissions are given away simply because
687  * the user caused an implicit file copy.
688  */
689 int
union_access(void * v)690 union_access(void *v)
691 {
692 	struct vop_access_args /* {
693 		struct vnodeop_desc *a_desc;
694 		struct vnode *a_vp;
695 		int a_mode;
696 		kauth_cred_t a_cred;
697 	} */ *ap = v;
698 	struct vnode *vp = ap->a_vp;
699 	struct union_node *un = VTOUNION(vp);
700 	int error = EACCES;
701 	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
702 
703 	/*
704 	 * Disallow write attempts on read-only file systems;
705 	 * unless the file is a socket, fifo, or a block or
706 	 * character device resident on the file system.
707 	 */
708 	if (ap->a_mode & VWRITE) {
709 		switch (vp->v_type) {
710 		case VDIR:
711 		case VLNK:
712 		case VREG:
713 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
714 				return (EROFS);
715 			break;
716 		case VBAD:
717 		case VBLK:
718 		case VCHR:
719 		case VSOCK:
720 		case VFIFO:
721 		case VNON:
722 		default:
723 			break;
724 		}
725 	}
726 
727 
728 	if ((vp = un->un_uppervp) != NULLVP) {
729 		ap->a_vp = vp;
730 		return (VCALL(vp, VOFFSET(vop_access), ap));
731 	}
732 
733 	if ((vp = un->un_lowervp) != NULLVP) {
734 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
735 		ap->a_vp = vp;
736 		error = VCALL(vp, VOFFSET(vop_access), ap);
737 		if (error == 0) {
738 			if (um->um_op == UNMNT_BELOW) {
739 				ap->a_cred = um->um_cred;
740 				error = VCALL(vp, VOFFSET(vop_access), ap);
741 			}
742 		}
743 		VOP_UNLOCK(vp);
744 		if (error)
745 			return (error);
746 	}
747 
748 	return (error);
749 }
750 
751 /*
752  * We handle getattr only to change the fsid and
753  * track object sizes
754  */
755 int
union_getattr(void * v)756 union_getattr(void *v)
757 {
758 	struct vop_getattr_args /* {
759 		struct vnode *a_vp;
760 		struct vattr *a_vap;
761 		kauth_cred_t a_cred;
762 	} */ *ap = v;
763 	int error;
764 	struct union_node *un = VTOUNION(ap->a_vp);
765 	struct vnode *vp = un->un_uppervp;
766 	struct vattr *vap;
767 	struct vattr va;
768 
769 
770 	/*
771 	 * Some programs walk the filesystem hierarchy by counting
772 	 * links to directories to avoid stat'ing all the time.
773 	 * This means the link count on directories needs to be "correct".
774 	 * The only way to do that is to call getattr on both layers
775 	 * and fix up the link count.  The link count will not necessarily
776 	 * be accurate but will be large enough to defeat the tree walkers.
777 	 *
778 	 * To make life more interesting, some filesystems don't keep
779 	 * track of link counts in the expected way, and return a
780 	 * link count of `1' for those directories; if either of the
781 	 * component directories returns a link count of `1', we return a 1.
782 	 */
783 
784 	vap = ap->a_vap;
785 
786 	vp = un->un_uppervp;
787 	if (vp != NULLVP) {
788 		error = VOP_GETATTR(vp, vap, ap->a_cred);
789 		if (error)
790 			return (error);
791 		mutex_enter(&un->un_lock);
792 		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
793 	}
794 
795 	if (vp == NULLVP) {
796 		vp = un->un_lowervp;
797 	} else if (vp->v_type == VDIR) {
798 		vp = un->un_lowervp;
799 		if (vp != NULLVP)
800 			vap = &va;
801 	} else {
802 		vp = NULLVP;
803 	}
804 
805 	if (vp != NULLVP) {
806 		if (vp == un->un_lowervp)
807 			vn_lock(vp, LK_SHARED | LK_RETRY);
808 		error = VOP_GETATTR(vp, vap, ap->a_cred);
809 		if (vp == un->un_lowervp)
810 			VOP_UNLOCK(vp);
811 		if (error)
812 			return (error);
813 		mutex_enter(&un->un_lock);
814 		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
815 	}
816 
817 	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
818 		/*
819 		 * Link count manipulation:
820 		 *	- If both return "2", return 2 (no subdirs)
821 		 *	- If one or the other return "1", return "1" (ENOCLUE)
822 		 */
823 		if ((ap->a_vap->va_nlink == 2) &&
824 		    (vap->va_nlink == 2))
825 			;
826 		else if (ap->a_vap->va_nlink != 1) {
827 			if (vap->va_nlink == 1)
828 				ap->a_vap->va_nlink = 1;
829 			else
830 				ap->a_vap->va_nlink += vap->va_nlink;
831 		}
832 	}
833 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
834 	return (0);
835 }
836 
837 int
union_setattr(void * v)838 union_setattr(void *v)
839 {
840 	struct vop_setattr_args /* {
841 		struct vnode *a_vp;
842 		struct vattr *a_vap;
843 		kauth_cred_t a_cred;
844 	} */ *ap = v;
845 	struct vattr *vap = ap->a_vap;
846 	struct vnode *vp = ap->a_vp;
847 	struct union_node *un = VTOUNION(vp);
848 	bool size_only;		/* All but va_size are VNOVAL. */
849 	int error;
850 
851 	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
852 	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
853 	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
854 
855 	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
856 		return (EROFS);
857 	if (vap->va_size != VNOVAL) {
858  		switch (vp->v_type) {
859  		case VDIR:
860  			return (EISDIR);
861  		case VCHR:
862  		case VBLK:
863  		case VSOCK:
864  		case VFIFO:
865 			break;
866 		case VREG:
867 		case VLNK:
868  		default:
869 			/*
870 			 * Disallow write attempts if the filesystem is
871 			 * mounted read-only.
872 			 */
873 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
874 				return (EROFS);
875 		}
876 	}
877 
878 	/*
879 	 * Handle case of truncating lower object to zero size,
880 	 * by creating a zero length upper object.  This is to
881 	 * handle the case of open with O_TRUNC and O_CREAT.
882 	 */
883 	if ((un->un_uppervp == NULLVP) &&
884 	    /* assert(un->un_lowervp != NULLVP) */
885 	    (un->un_lowervp->v_type == VREG)) {
886 		error = union_copyup(un, (vap->va_size != 0),
887 						ap->a_cred, curlwp);
888 		if (error)
889 			return (error);
890 	}
891 
892 	/*
893 	 * Try to set attributes in upper layer, ignore size change to zero
894 	 * for devices to handle O_TRUNC and return read-only filesystem error
895 	 * otherwise.
896 	 */
897 	if (un->un_uppervp != NULLVP) {
898 		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
899 		if ((error == 0) && (vap->va_size != VNOVAL)) {
900 			mutex_enter(&un->un_lock);
901 			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
902 		}
903 	} else {
904 		KASSERT(un->un_lowervp != NULLVP);
905 		if (NODE_IS_SPECIAL(un->un_lowervp)) {
906 			if (size_only &&
907 			    (vap->va_size == 0 || vap->va_size == VNOVAL))
908 				error = 0;
909 			else
910 				error = EROFS;
911 		} else {
912 			error = EROFS;
913 		}
914 	}
915 
916 	return (error);
917 }
918 
919 int
union_read(void * v)920 union_read(void *v)
921 {
922 	struct vop_read_args /* {
923 		struct vnode *a_vp;
924 		struct uio *a_uio;
925 		int  a_ioflag;
926 		kauth_cred_t a_cred;
927 	} */ *ap = v;
928 	int error;
929 	struct vnode *vp = OTHERVP(ap->a_vp);
930 	int dolock = (vp == LOWERVP(ap->a_vp));
931 
932 	if (dolock)
933 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
934 	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
935 	if (dolock)
936 		VOP_UNLOCK(vp);
937 
938 	/*
939 	 * XXX
940 	 * perhaps the size of the underlying object has changed under
941 	 * our feet.  take advantage of the offset information present
942 	 * in the uio structure.
943 	 */
944 	if (error == 0) {
945 		struct union_node *un = VTOUNION(ap->a_vp);
946 		off_t cur = ap->a_uio->uio_offset;
947 		off_t usz = VNOVAL, lsz = VNOVAL;
948 
949 		mutex_enter(&un->un_lock);
950 		if (vp == un->un_uppervp) {
951 			if (cur > un->un_uppersz)
952 				usz = cur;
953 		} else {
954 			if (cur > un->un_lowersz)
955 				lsz = cur;
956 		}
957 
958 		if (usz != VNOVAL || lsz != VNOVAL)
959 			union_newsize(ap->a_vp, usz, lsz);
960 		else
961 			mutex_exit(&un->un_lock);
962 	}
963 
964 	return (error);
965 }
966 
967 int
union_write(void * v)968 union_write(void *v)
969 {
970 	struct vop_read_args /* {
971 		struct vnode *a_vp;
972 		struct uio *a_uio;
973 		int  a_ioflag;
974 		kauth_cred_t a_cred;
975 	} */ *ap = v;
976 	int error;
977 	struct vnode *vp;
978 	struct union_node *un = VTOUNION(ap->a_vp);
979 
980 	vp = UPPERVP(ap->a_vp);
981 	if (vp == NULLVP) {
982 		vp = LOWERVP(ap->a_vp);
983 		if (NODE_IS_SPECIAL(vp)) {
984 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
985 			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
986 			    ap->a_cred);
987 			VOP_UNLOCK(vp);
988 			return error;
989 		}
990 		panic("union: missing upper layer in write");
991 	}
992 
993 	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
994 
995 	/*
996 	 * the size of the underlying object may be changed by the
997 	 * write.
998 	 */
999 	if (error == 0) {
1000 		off_t cur = ap->a_uio->uio_offset;
1001 
1002 		mutex_enter(&un->un_lock);
1003 		if (cur > un->un_uppersz)
1004 			union_newsize(ap->a_vp, cur, VNOVAL);
1005 		else
1006 			mutex_exit(&un->un_lock);
1007 	}
1008 
1009 	return (error);
1010 }
1011 
1012 int
union_ioctl(void * v)1013 union_ioctl(void *v)
1014 {
1015 	struct vop_ioctl_args /* {
1016 		struct vnode *a_vp;
1017 		int  a_command;
1018 		void *a_data;
1019 		int  a_fflag;
1020 		kauth_cred_t a_cred;
1021 	} */ *ap = v;
1022 	struct vnode *ovp = OTHERVP(ap->a_vp);
1023 
1024 	ap->a_vp = ovp;
1025 	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1026 }
1027 
1028 int
union_poll(void * v)1029 union_poll(void *v)
1030 {
1031 	struct vop_poll_args /* {
1032 		struct vnode *a_vp;
1033 		int a_events;
1034 	} */ *ap = v;
1035 	struct vnode *ovp = OTHERVP(ap->a_vp);
1036 
1037 	ap->a_vp = ovp;
1038 	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1039 }
1040 
1041 int
union_revoke(void * v)1042 union_revoke(void *v)
1043 {
1044 	struct vop_revoke_args /* {
1045 		struct vnode *a_vp;
1046 		int a_flags;
1047 		struct proc *a_p;
1048 	} */ *ap = v;
1049 	struct vnode *vp = ap->a_vp;
1050 
1051 	if (UPPERVP(vp))
1052 		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1053 	if (LOWERVP(vp))
1054 		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1055 	vgone(vp);	/* XXXAD?? */
1056 	return (0);
1057 }
1058 
1059 int
union_mmap(void * v)1060 union_mmap(void *v)
1061 {
1062 	struct vop_mmap_args /* {
1063 		struct vnode *a_vp;
1064 		vm_prot_t a_prot;
1065 		kauth_cred_t a_cred;
1066 	} */ *ap = v;
1067 	struct vnode *ovp = OTHERVP(ap->a_vp);
1068 
1069 	ap->a_vp = ovp;
1070 	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1071 }
1072 
1073 int
union_fsync(void * v)1074 union_fsync(void *v)
1075 {
1076 	struct vop_fsync_args /* {
1077 		struct vnode *a_vp;
1078 		kauth_cred_t a_cred;
1079 		int  a_flags;
1080 		off_t offhi;
1081 		off_t offlo;
1082 	} */ *ap = v;
1083 	int error = 0;
1084 	struct vnode *targetvp;
1085 
1086 	/*
1087 	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1088 	 * bother syncing the underlying vnodes, since (a) they'll be
1089 	 * fsync'ed when reclaimed and (b) we could deadlock if
1090 	 * they're locked; otherwise, pass it through to the
1091 	 * underlying layer.
1092 	 */
1093 	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1094 		error = spec_fsync(v);
1095 		if (error)
1096 			return error;
1097 	}
1098 
1099 	if (ap->a_flags & FSYNC_RECLAIM)
1100 		return 0;
1101 
1102 	targetvp = OTHERVP(ap->a_vp);
1103 	if (targetvp != NULLVP) {
1104 		int dolock = (targetvp == LOWERVP(ap->a_vp));
1105 
1106 		if (dolock)
1107 			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1108 		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1109 			    ap->a_offlo, ap->a_offhi);
1110 		if (dolock)
1111 			VOP_UNLOCK(targetvp);
1112 	}
1113 
1114 	return (error);
1115 }
1116 
1117 int
union_seek(void * v)1118 union_seek(void *v)
1119 {
1120 	struct vop_seek_args /* {
1121 		struct vnode *a_vp;
1122 		off_t  a_oldoff;
1123 		off_t  a_newoff;
1124 		kauth_cred_t a_cred;
1125 	} */ *ap = v;
1126 	struct vnode *ovp = OTHERVP(ap->a_vp);
1127 
1128 	ap->a_vp = ovp;
1129 	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1130 }
1131 
1132 int
union_remove(void * v)1133 union_remove(void *v)
1134 {
1135 	struct vop_remove_args /* {
1136 		struct vnode *a_dvp;
1137 		struct vnode *a_vp;
1138 		struct componentname *a_cnp;
1139 	} */ *ap = v;
1140 	int error;
1141 	struct union_node *dun = VTOUNION(ap->a_dvp);
1142 	struct union_node *un = VTOUNION(ap->a_vp);
1143 	struct componentname *cnp = ap->a_cnp;
1144 
1145 	if (dun->un_uppervp == NULLVP)
1146 		panic("union remove: null upper vnode");
1147 
1148 	if (un->un_uppervp != NULLVP) {
1149 		struct vnode *dvp = dun->un_uppervp;
1150 		struct vnode *vp = un->un_uppervp;
1151 
1152 		/*
1153 		 * Account for VOP_REMOVE to vrele dvp and vp.
1154 		 * Note: VOP_REMOVE will unlock dvp and vp.
1155 		 */
1156 		vref(dvp);
1157 		vref(vp);
1158 		if (union_dowhiteout(un, cnp->cn_cred))
1159 			cnp->cn_flags |= DOWHITEOUT;
1160 		error = VOP_REMOVE(dvp, vp, cnp);
1161 		if (!error)
1162 			union_removed_upper(un);
1163 		vrele(ap->a_dvp);
1164 		vrele(ap->a_vp);
1165 	} else {
1166 		error = union_mkwhiteout(
1167 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1168 			dun->un_uppervp, ap->a_cnp, un);
1169 		vput(ap->a_dvp);
1170 		vput(ap->a_vp);
1171 	}
1172 
1173 	return (error);
1174 }
1175 
1176 int
union_link(void * v)1177 union_link(void *v)
1178 {
1179 	struct vop_link_v2_args /* {
1180 		struct vnode *a_dvp;
1181 		struct vnode *a_vp;
1182 		struct componentname *a_cnp;
1183 	} */ *ap = v;
1184 	int error = 0;
1185 	struct componentname *cnp = ap->a_cnp;
1186 	struct union_node *dun;
1187 	struct vnode *vp;
1188 	struct vnode *dvp;
1189 
1190 	dun = VTOUNION(ap->a_dvp);
1191 
1192 	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1193 
1194 	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1195 		vp = ap->a_vp;
1196 	} else {
1197 		struct union_node *un = VTOUNION(ap->a_vp);
1198 		if (un->un_uppervp == NULLVP) {
1199 			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1200 
1201 			/*
1202 			 * Needs to be copied before we can link it.
1203 			 */
1204 			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1205 			if (droplock)
1206 				VOP_UNLOCK(dun->un_uppervp);
1207 			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1208 			if (droplock) {
1209 				vn_lock(dun->un_uppervp,
1210 				    LK_EXCLUSIVE | LK_RETRY);
1211 				/*
1212 				 * During copyup, we dropped the lock on the
1213 				 * dir and invalidated any saved namei lookup
1214 				 * state for the directory we'll be entering
1215 				 * the link in.  We need to re-run the lookup
1216 				 * in that directory to reset any state needed
1217 				 * for VOP_LINK.
1218 				 * Call relookup on the union-layer to reset
1219 				 * the state.
1220 				 */
1221 				vp  = NULLVP;
1222 				if (dun->un_uppervp == NULLVP)
1223 					 panic("union: null upperdvp?");
1224 				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1225 				if (error) {
1226 					VOP_UNLOCK(ap->a_vp);
1227 					return EROFS;	/* ? */
1228 				}
1229 				if (vp != NULLVP) {
1230 					/*
1231 					 * The name we want to create has
1232 					 * mysteriously appeared (a race?)
1233 					 */
1234 					error = EEXIST;
1235 					VOP_UNLOCK(ap->a_vp);
1236 					vput(vp);
1237 					return (error);
1238 				}
1239 			}
1240 			VOP_UNLOCK(ap->a_vp);
1241 		}
1242 		vp = un->un_uppervp;
1243 	}
1244 
1245 	dvp = dun->un_uppervp;
1246 	if (dvp == NULLVP)
1247 		error = EROFS;
1248 
1249 	if (error)
1250 		return (error);
1251 
1252 	return VOP_LINK(dvp, vp, cnp);
1253 }
1254 
1255 int
union_rename(void * v)1256 union_rename(void *v)
1257 {
1258 	struct vop_rename_args  /* {
1259 		struct vnode *a_fdvp;
1260 		struct vnode *a_fvp;
1261 		struct componentname *a_fcnp;
1262 		struct vnode *a_tdvp;
1263 		struct vnode *a_tvp;
1264 		struct componentname *a_tcnp;
1265 	} */ *ap = v;
1266 	int error;
1267 
1268 	struct vnode *fdvp = ap->a_fdvp;
1269 	struct vnode *fvp = ap->a_fvp;
1270 	struct vnode *tdvp = ap->a_tdvp;
1271 	struct vnode *tvp = ap->a_tvp;
1272 
1273 	/*
1274 	 * Account for VOP_RENAME to vrele all nodes.
1275 	 * Note: VOP_RENAME will unlock tdvp.
1276 	 */
1277 
1278 	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1279 		struct union_node *un = VTOUNION(fdvp);
1280 		if (un->un_uppervp == NULLVP) {
1281 			/*
1282 			 * this should never happen in normal
1283 			 * operation but might if there was
1284 			 * a problem creating the top-level shadow
1285 			 * directory.
1286 			 */
1287 			error = EXDEV;
1288 			goto bad;
1289 		}
1290 
1291 		fdvp = un->un_uppervp;
1292 		vref(fdvp);
1293 	}
1294 
1295 	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1296 		struct union_node *un = VTOUNION(fvp);
1297 		if (un->un_uppervp == NULLVP) {
1298 			/* XXX: should do a copyup */
1299 			error = EXDEV;
1300 			goto bad;
1301 		}
1302 
1303 		if (un->un_lowervp != NULLVP)
1304 			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1305 
1306 		fvp = un->un_uppervp;
1307 		vref(fvp);
1308 	}
1309 
1310 	if (tdvp->v_op == union_vnodeop_p) {
1311 		struct union_node *un = VTOUNION(tdvp);
1312 		if (un->un_uppervp == NULLVP) {
1313 			/*
1314 			 * this should never happen in normal
1315 			 * operation but might if there was
1316 			 * a problem creating the top-level shadow
1317 			 * directory.
1318 			 */
1319 			error = EXDEV;
1320 			goto bad;
1321 		}
1322 
1323 		tdvp = un->un_uppervp;
1324 		vref(tdvp);
1325 	}
1326 
1327 	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1328 		struct union_node *un = VTOUNION(tvp);
1329 
1330 		tvp = un->un_uppervp;
1331 		if (tvp != NULLVP) {
1332 			vref(tvp);
1333 		}
1334 	}
1335 
1336 	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1337 	goto out;
1338 
1339 bad:
1340 	vput(tdvp);
1341 	if (tvp != NULLVP)
1342 		vput(tvp);
1343 	vrele(fdvp);
1344 	vrele(fvp);
1345 
1346 out:
1347 	if (fdvp != ap->a_fdvp) {
1348 		vrele(ap->a_fdvp);
1349 	}
1350 	if (fvp != ap->a_fvp) {
1351 		vrele(ap->a_fvp);
1352 	}
1353 	if (tdvp != ap->a_tdvp) {
1354 		vrele(ap->a_tdvp);
1355 	}
1356 	if (tvp != ap->a_tvp) {
1357 		vrele(ap->a_tvp);
1358 	}
1359 	return (error);
1360 }
1361 
1362 int
union_mkdir(void * v)1363 union_mkdir(void *v)
1364 {
1365 	struct vop_mkdir_v3_args /* {
1366 		struct vnode *a_dvp;
1367 		struct vnode **a_vpp;
1368 		struct componentname *a_cnp;
1369 		struct vattr *a_vap;
1370 	} */ *ap = v;
1371 	struct union_node *un = VTOUNION(ap->a_dvp);
1372 	struct vnode *dvp = un->un_uppervp;
1373 	struct componentname *cnp = ap->a_cnp;
1374 
1375 	if (dvp != NULLVP) {
1376 		int error;
1377 		struct vnode *vp;
1378 
1379 		vp = NULL;
1380 		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1381 		if (error) {
1382 			vrele(ap->a_dvp);
1383 			return (error);
1384 		}
1385 
1386 		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1387 				NULLVP, cnp, vp, NULLVP, 1);
1388 		if (error)
1389 			vrele(vp);
1390 		return (error);
1391 	}
1392 
1393 	return (EROFS);
1394 }
1395 
1396 int
union_rmdir(void * v)1397 union_rmdir(void *v)
1398 {
1399 	struct vop_rmdir_args /* {
1400 		struct vnode *a_dvp;
1401 		struct vnode *a_vp;
1402 		struct componentname *a_cnp;
1403 	} */ *ap = v;
1404 	int error;
1405 	struct union_node *dun = VTOUNION(ap->a_dvp);
1406 	struct union_node *un = VTOUNION(ap->a_vp);
1407 	struct componentname *cnp = ap->a_cnp;
1408 
1409 	if (dun->un_uppervp == NULLVP)
1410 		panic("union rmdir: null upper vnode");
1411 
1412 	error = union_check_rmdir(un, cnp->cn_cred);
1413 	if (error) {
1414 		vput(ap->a_dvp);
1415 		vput(ap->a_vp);
1416 		return error;
1417 	}
1418 
1419 	if (un->un_uppervp != NULLVP) {
1420 		struct vnode *dvp = dun->un_uppervp;
1421 		struct vnode *vp = un->un_uppervp;
1422 
1423 		/*
1424 		 * Account for VOP_RMDIR to vrele dvp and vp.
1425 		 * Note: VOP_RMDIR will unlock dvp and vp.
1426 		 */
1427 		vref(dvp);
1428 		vref(vp);
1429 		if (union_dowhiteout(un, cnp->cn_cred))
1430 			cnp->cn_flags |= DOWHITEOUT;
1431 		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1432 		if (!error)
1433 			union_removed_upper(un);
1434 		vrele(ap->a_dvp);
1435 		vrele(ap->a_vp);
1436 	} else {
1437 		error = union_mkwhiteout(
1438 			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1439 			dun->un_uppervp, ap->a_cnp, un);
1440 		vput(ap->a_dvp);
1441 		vput(ap->a_vp);
1442 	}
1443 
1444 	return (error);
1445 }
1446 
1447 int
union_symlink(void * v)1448 union_symlink(void *v)
1449 {
1450 	struct vop_symlink_v3_args /* {
1451 		struct vnode *a_dvp;
1452 		struct vnode **a_vpp;
1453 		struct componentname *a_cnp;
1454 		struct vattr *a_vap;
1455 		char *a_target;
1456 	} */ *ap = v;
1457 	struct union_node *un = VTOUNION(ap->a_dvp);
1458 	struct vnode *dvp = un->un_uppervp;
1459 	struct componentname *cnp = ap->a_cnp;
1460 
1461 	if (dvp != NULLVP) {
1462 		int error;
1463 
1464 		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1465 				    ap->a_target);
1466 		return (error);
1467 	}
1468 
1469 	return (EROFS);
1470 }
1471 
1472 /*
1473  * union_readdir works in concert with getdirentries and
1474  * readdir(3) to provide a list of entries in the unioned
1475  * directories.  getdirentries is responsible for walking
1476  * down the union stack.  readdir(3) is responsible for
1477  * eliminating duplicate names from the returned data stream.
1478  */
1479 int
union_readdir(void * v)1480 union_readdir(void *v)
1481 {
1482 	struct vop_readdir_args /* {
1483 		struct vnodeop_desc *a_desc;
1484 		struct vnode *a_vp;
1485 		struct uio *a_uio;
1486 		kauth_cred_t a_cred;
1487 		int *a_eofflag;
1488 		u_long *a_cookies;
1489 		int a_ncookies;
1490 	} */ *ap = v;
1491 	struct union_node *un = VTOUNION(ap->a_vp);
1492 	struct vnode *uvp = un->un_uppervp;
1493 
1494 	if (uvp == NULLVP)
1495 		return (0);
1496 
1497 	ap->a_vp = uvp;
1498 	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1499 }
1500 
1501 int
union_readlink(void * v)1502 union_readlink(void *v)
1503 {
1504 	struct vop_readlink_args /* {
1505 		struct vnode *a_vp;
1506 		struct uio *a_uio;
1507 		kauth_cred_t a_cred;
1508 	} */ *ap = v;
1509 	int error;
1510 	struct vnode *vp = OTHERVP(ap->a_vp);
1511 	int dolock = (vp == LOWERVP(ap->a_vp));
1512 
1513 	if (dolock)
1514 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1515 	ap->a_vp = vp;
1516 	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1517 	if (dolock)
1518 		VOP_UNLOCK(vp);
1519 
1520 	return (error);
1521 }
1522 
1523 int
union_abortop(void * v)1524 union_abortop(void *v)
1525 {
1526 	struct vop_abortop_args /* {
1527 		struct vnode *a_dvp;
1528 		struct componentname *a_cnp;
1529 	} */ *ap = v;
1530 
1531 	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1532 
1533 	ap->a_dvp = UPPERVP(ap->a_dvp);
1534 	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1535 }
1536 
1537 int
union_inactive(void * v)1538 union_inactive(void *v)
1539 {
1540 	struct vop_inactive_args /* {
1541 		const struct vnodeop_desc *a_desc;
1542 		struct vnode *a_vp;
1543 		bool *a_recycle;
1544 	} */ *ap = v;
1545 	struct vnode *vp = ap->a_vp;
1546 	struct union_node *un = VTOUNION(vp);
1547 	struct vnode **vpp;
1548 
1549 	/*
1550 	 * Do nothing (and _don't_ bypass).
1551 	 * Wait to vrele lowervp until reclaim,
1552 	 * so that until then our union_node is in the
1553 	 * cache and reusable.
1554 	 *
1555 	 * NEEDSWORK: Someday, consider inactive'ing
1556 	 * the lowervp and then trying to reactivate it
1557 	 * with capabilities (v_id)
1558 	 * like they do in the name lookup cache code.
1559 	 * That's too much work for now.
1560 	 */
1561 
1562 	if (un->un_dircache != 0) {
1563 		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1564 			vrele(*vpp);
1565 		free(un->un_dircache, M_TEMP);
1566 		un->un_dircache = 0;
1567 	}
1568 
1569 	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1570 	VOP_UNLOCK(vp);
1571 
1572 	return (0);
1573 }
1574 
1575 int
union_reclaim(void * v)1576 union_reclaim(void *v)
1577 {
1578 	struct vop_reclaim_args /* {
1579 		struct vnode *a_vp;
1580 	} */ *ap = v;
1581 
1582 	union_freevp(ap->a_vp);
1583 
1584 	return (0);
1585 }
1586 
1587 static int
union_lock1(struct vnode * vp,struct vnode * lockvp,int flags)1588 union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1589 {
1590 	struct vop_lock_args ap;
1591 
1592 	if (lockvp == vp) {
1593 		ap.a_vp = vp;
1594 		ap.a_flags = flags;
1595 		return genfs_lock(&ap);
1596 	} else
1597 		return VOP_LOCK(lockvp, flags);
1598 }
1599 
1600 static int
union_unlock1(struct vnode * vp,struct vnode * lockvp)1601 union_unlock1(struct vnode *vp, struct vnode *lockvp)
1602 {
1603 	struct vop_unlock_args ap;
1604 
1605 	if (lockvp == vp) {
1606 		ap.a_vp = vp;
1607 		return genfs_unlock(&ap);
1608 	} else
1609 		return VOP_UNLOCK(lockvp);
1610 }
1611 
1612 int
union_lock(void * v)1613 union_lock(void *v)
1614 {
1615 	struct vop_lock_args /* {
1616 		struct vnode *a_vp;
1617 		int a_flags;
1618 	} */ *ap = v;
1619 	struct vnode *vp = ap->a_vp, *lockvp;
1620 	struct union_node *un = VTOUNION(vp);
1621 	int flags = ap->a_flags;
1622 	int error;
1623 
1624 	if ((flags & LK_NOWAIT) != 0) {
1625 		if (!mutex_tryenter(&un->un_lock))
1626 			return EBUSY;
1627 		lockvp = LOCKVP(vp);
1628 		error = union_lock1(vp, lockvp, flags);
1629 		mutex_exit(&un->un_lock);
1630 		if (error)
1631 			return error;
1632 		if (mutex_tryenter(vp->v_interlock)) {
1633 			error = vdead_check(vp, VDEAD_NOWAIT);
1634 			mutex_exit(vp->v_interlock);
1635 		} else
1636 			error = EBUSY;
1637 		if (error)
1638 			union_unlock1(vp, lockvp);
1639 		return error;
1640 	}
1641 
1642 	mutex_enter(&un->un_lock);
1643 	for (;;) {
1644 		lockvp = LOCKVP(vp);
1645 		mutex_exit(&un->un_lock);
1646 		error = union_lock1(vp, lockvp, flags);
1647 		if (error != 0)
1648 			return error;
1649 		mutex_enter(&un->un_lock);
1650 		if (lockvp == LOCKVP(vp))
1651 			break;
1652 		union_unlock1(vp, lockvp);
1653 	}
1654 	mutex_exit(&un->un_lock);
1655 
1656 	mutex_enter(vp->v_interlock);
1657 	error = vdead_check(vp, VDEAD_NOWAIT);
1658 	if (error) {
1659 		union_unlock1(vp, lockvp);
1660 		error = vdead_check(vp, 0);
1661 		KASSERT(error == ENOENT);
1662 	}
1663 	mutex_exit(vp->v_interlock);
1664 	return error;
1665 }
1666 
1667 int
union_unlock(void * v)1668 union_unlock(void *v)
1669 {
1670 	struct vop_unlock_args /* {
1671 		struct vnode *a_vp;
1672 		int a_flags;
1673 	} */ *ap = v;
1674 	struct vnode *vp = ap->a_vp, *lockvp;
1675 
1676 	lockvp = LOCKVP(vp);
1677 	union_unlock1(vp, lockvp);
1678 
1679 	return 0;
1680 }
1681 
1682 int
union_bmap(void * v)1683 union_bmap(void *v)
1684 {
1685 	struct vop_bmap_args /* {
1686 		struct vnode *a_vp;
1687 		daddr_t  a_bn;
1688 		struct vnode **a_vpp;
1689 		daddr_t *a_bnp;
1690 		int *a_runp;
1691 	} */ *ap = v;
1692 	int error;
1693 	struct vnode *vp = OTHERVP(ap->a_vp);
1694 	int dolock = (vp == LOWERVP(ap->a_vp));
1695 
1696 	if (dolock)
1697 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1698 	ap->a_vp = vp;
1699 	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1700 	if (dolock)
1701 		VOP_UNLOCK(vp);
1702 
1703 	return (error);
1704 }
1705 
1706 int
union_print(void * v)1707 union_print(void *v)
1708 {
1709 	struct vop_print_args /* {
1710 		struct vnode *a_vp;
1711 	} */ *ap = v;
1712 	struct vnode *vp = ap->a_vp;
1713 
1714 	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1715 			vp, UPPERVP(vp), LOWERVP(vp));
1716 	if (UPPERVP(vp) != NULLVP)
1717 		vprint("union: upper", UPPERVP(vp));
1718 	if (LOWERVP(vp) != NULLVP)
1719 		vprint("union: lower", LOWERVP(vp));
1720 	if (VTOUNION(vp)->un_dircache) {
1721 		struct vnode **vpp;
1722 		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1723 			vprint("dircache:", *vpp);
1724 	}
1725 
1726 	return (0);
1727 }
1728 
1729 int
union_islocked(void * v)1730 union_islocked(void *v)
1731 {
1732 	struct vop_islocked_args /* {
1733 		struct vnode *a_vp;
1734 	} */ *ap = v;
1735 	struct vnode *vp;
1736 	struct union_node *un;
1737 
1738 	un = VTOUNION(ap->a_vp);
1739 	mutex_enter(&un->un_lock);
1740 	vp = LOCKVP(ap->a_vp);
1741 	mutex_exit(&un->un_lock);
1742 
1743 	if (vp == ap->a_vp)
1744 		return genfs_islocked(ap);
1745 	else
1746 		return VOP_ISLOCKED(vp);
1747 }
1748 
1749 int
union_pathconf(void * v)1750 union_pathconf(void *v)
1751 {
1752 	struct vop_pathconf_args /* {
1753 		struct vnode *a_vp;
1754 		int a_name;
1755 		int *a_retval;
1756 	} */ *ap = v;
1757 	int error;
1758 	struct vnode *vp = OTHERVP(ap->a_vp);
1759 	int dolock = (vp == LOWERVP(ap->a_vp));
1760 
1761 	if (dolock)
1762 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1763 	ap->a_vp = vp;
1764 	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1765 	if (dolock)
1766 		VOP_UNLOCK(vp);
1767 
1768 	return (error);
1769 }
1770 
1771 int
union_advlock(void * v)1772 union_advlock(void *v)
1773 {
1774 	struct vop_advlock_args /* {
1775 		struct vnode *a_vp;
1776 		void *a_id;
1777 		int  a_op;
1778 		struct flock *a_fl;
1779 		int  a_flags;
1780 	} */ *ap = v;
1781 	struct vnode *ovp = OTHERVP(ap->a_vp);
1782 
1783 	ap->a_vp = ovp;
1784 	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1785 }
1786 
1787 int
union_strategy(void * v)1788 union_strategy(void *v)
1789 {
1790 	struct vop_strategy_args /* {
1791 		struct vnode *a_vp;
1792 		struct buf *a_bp;
1793 	} */ *ap = v;
1794 	struct vnode *ovp = OTHERVP(ap->a_vp);
1795 	struct buf *bp = ap->a_bp;
1796 
1797 	KASSERT(ovp != NULLVP);
1798 	if (!NODE_IS_SPECIAL(ovp))
1799 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1800 
1801 	return (VOP_STRATEGY(ovp, bp));
1802 }
1803 
1804 int
union_bwrite(void * v)1805 union_bwrite(void *v)
1806 {
1807 	struct vop_bwrite_args /* {
1808 		struct vnode *a_vp;
1809 		struct buf *a_bp;
1810 	} */ *ap = v;
1811 	struct vnode *ovp = OTHERVP(ap->a_vp);
1812 	struct buf *bp = ap->a_bp;
1813 
1814 	KASSERT(ovp != NULLVP);
1815 	if (!NODE_IS_SPECIAL(ovp))
1816 		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1817 
1818 	return (VOP_BWRITE(ovp, bp));
1819 }
1820 
1821 int
union_getpages(void * v)1822 union_getpages(void *v)
1823 {
1824 	struct vop_getpages_args /* {
1825 		struct vnode *a_vp;
1826 		voff_t a_offset;
1827 		struct vm_page **a_m;
1828 		int *a_count;
1829 		int a_centeridx;
1830 		vm_prot_t a_access_type;
1831 		int a_advice;
1832 		int a_flags;
1833 	} */ *ap = v;
1834 	struct vnode *vp = ap->a_vp;
1835 
1836 	KASSERT(mutex_owned(vp->v_interlock));
1837 
1838 	if (ap->a_flags & PGO_LOCKED) {
1839 		return EBUSY;
1840 	}
1841 	ap->a_vp = OTHERVP(vp);
1842 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1843 
1844 	/* Just pass the request on to the underlying layer. */
1845 	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1846 }
1847 
1848 int
union_putpages(void * v)1849 union_putpages(void *v)
1850 {
1851 	struct vop_putpages_args /* {
1852 		struct vnode *a_vp;
1853 		voff_t a_offlo;
1854 		voff_t a_offhi;
1855 		int a_flags;
1856 	} */ *ap = v;
1857 	struct vnode *vp = ap->a_vp;
1858 
1859 	KASSERT(mutex_owned(vp->v_interlock));
1860 
1861 	ap->a_vp = OTHERVP(vp);
1862 	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1863 
1864 	if (ap->a_flags & PGO_RECLAIM) {
1865 		mutex_exit(vp->v_interlock);
1866 		return 0;
1867 	}
1868 
1869 	/* Just pass the request on to the underlying layer. */
1870 	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1871 }
1872 
1873 int
union_kqfilter(void * v)1874 union_kqfilter(void *v)
1875 {
1876 	struct vop_kqfilter_args /* {
1877 		struct vnode	*a_vp;
1878 		struct knote	*a_kn;
1879 	} */ *ap = v;
1880 	int error;
1881 
1882 	/*
1883 	 * We watch either the upper layer file (if it already exists),
1884 	 * or the lower layer one. If there is lower layer file only
1885 	 * at this moment, we will keep watching that lower layer file
1886 	 * even if upper layer file would be created later on.
1887 	 */
1888 	if (UPPERVP(ap->a_vp))
1889 		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1890 	else if (LOWERVP(ap->a_vp))
1891 		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1892 	else {
1893 		/* panic? */
1894 		error = EOPNOTSUPP;
1895 	}
1896 
1897 	return (error);
1898 }
1899