xref: /freebsd/sys/fs/unionfs/union_subr.c (revision 42249ef2)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1994 Jan-Simon Pendry
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
8  * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Jan-Simon Pendry.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
38  * $FreeBSD$
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/ktr.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/malloc.h>
48 #include <sys/mount.h>
49 #include <sys/namei.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/dirent.h>
53 #include <sys/fcntl.h>
54 #include <sys/filedesc.h>
55 #include <sys/stat.h>
56 #include <sys/resourcevar.h>
57 
58 #include <security/mac/mac_framework.h>
59 
60 #include <vm/uma.h>
61 
62 #include <fs/unionfs/union.h>
63 
64 #define NUNIONFSNODECACHE 16
65 
66 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
67 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
68 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
69 
70 /*
71  * Initialize
72  */
73 int
74 unionfs_init(struct vfsconf *vfsp)
75 {
76 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
77 	return (0);
78 }
79 
80 /*
81  * Uninitialize
82  */
83 int
84 unionfs_uninit(struct vfsconf *vfsp)
85 {
86 	return (0);
87 }
88 
89 static struct unionfs_node_hashhead *
90 unionfs_get_hashhead(struct vnode *dvp, char *path)
91 {
92 	int		count;
93 	char		hash;
94 	struct unionfs_node *unp;
95 
96 	hash = 0;
97 	unp = VTOUNIONFS(dvp);
98 	if (path != NULL) {
99 		for (count = 0; path[count]; count++)
100 			hash += path[count];
101 	}
102 
103 	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
104 }
105 
106 /*
107  * Get the cached vnode.
108  */
109 static struct vnode *
110 unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
111 			struct vnode *dvp, char *path)
112 {
113 	struct unionfs_node_hashhead *hd;
114 	struct unionfs_node *unp;
115 	struct vnode   *vp;
116 
117 	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
118 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
119 	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
120 	    ("unionfs_get_cached_vnode: v_type != VDIR"));
121 
122 	VI_LOCK(dvp);
123 	hd = unionfs_get_hashhead(dvp, path);
124 	LIST_FOREACH(unp, hd, un_hash) {
125 		if (!strcmp(unp->un_path, path)) {
126 			vp = UNIONFSTOV(unp);
127 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
128 			VI_UNLOCK(dvp);
129 			vp->v_iflag &= ~VI_OWEINACT;
130 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
131 				VI_UNLOCK(vp);
132 				vp = NULLVP;
133 			} else
134 				VI_UNLOCK(vp);
135 			return (vp);
136 		}
137 	}
138 	VI_UNLOCK(dvp);
139 
140 	return (NULLVP);
141 }
142 
143 /*
144  * Add the new vnode into cache.
145  */
146 static struct vnode *
147 unionfs_ins_cached_vnode(struct unionfs_node *uncp,
148 			struct vnode *dvp, char *path)
149 {
150 	struct unionfs_node_hashhead *hd;
151 	struct unionfs_node *unp;
152 	struct vnode   *vp;
153 
154 	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
155 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
156 	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
157 	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
158 
159 	VI_LOCK(dvp);
160 	hd = unionfs_get_hashhead(dvp, path);
161 	LIST_FOREACH(unp, hd, un_hash) {
162 		if (!strcmp(unp->un_path, path)) {
163 			vp = UNIONFSTOV(unp);
164 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
165 			vp->v_iflag &= ~VI_OWEINACT;
166 			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
167 				LIST_INSERT_HEAD(hd, uncp, un_hash);
168 				VI_UNLOCK(vp);
169 				vp = NULLVP;
170 			} else
171 				VI_UNLOCK(vp);
172 			VI_UNLOCK(dvp);
173 			return (vp);
174 		}
175 	}
176 
177 	LIST_INSERT_HEAD(hd, uncp, un_hash);
178 	VI_UNLOCK(dvp);
179 
180 	return (NULLVP);
181 }
182 
183 /*
184  * Remove the vnode.
185  */
186 static void
187 unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
188 {
189 	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
190 	KASSERT((dvp != NULLVP),
191 	    ("unionfs_rem_cached_vnode: null parent vnode"));
192 	KASSERT((unp->un_hash.le_prev != NULL),
193 	    ("unionfs_rem_cached_vnode: null hash"));
194 
195 	VI_LOCK(dvp);
196 	LIST_REMOVE(unp, un_hash);
197 	unp->un_hash.le_next = NULL;
198 	unp->un_hash.le_prev = NULL;
199 	VI_UNLOCK(dvp);
200 }
201 
202 /*
203  * Make a new or get existing unionfs node.
204  *
205  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
206  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
207  * you should not lock plurality simultaneously.
208  */
209 int
210 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
211 		struct vnode *lowervp, struct vnode *dvp,
212 		struct vnode **vpp, struct componentname *cnp,
213 		struct thread *td)
214 {
215 	struct unionfs_mount *ump;
216 	struct unionfs_node *unp;
217 	struct vnode   *vp;
218 	int		error;
219 	int		lkflags;
220 	enum vtype	vt;
221 	char	       *path;
222 
223 	ump = MOUNTTOUNIONFSMOUNT(mp);
224 	lkflags = (cnp ? cnp->cn_lkflags : 0);
225 	path = (cnp ? cnp->cn_nameptr : NULL);
226 	*vpp = NULLVP;
227 
228 	if (uppervp == NULLVP && lowervp == NULLVP)
229 		panic("unionfs_nodeget: upper and lower is null");
230 
231 	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
232 
233 	/* If it has no ISLASTCN flag, path check is skipped. */
234 	if (cnp && !(cnp->cn_flags & ISLASTCN))
235 		path = NULL;
236 
237 	/* check the cache */
238 	if (path != NULL && dvp != NULLVP && vt == VDIR) {
239 		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
240 		if (vp != NULLVP) {
241 			vref(vp);
242 			*vpp = vp;
243 			goto unionfs_nodeget_out;
244 		}
245 	}
246 
247 	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
248 	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
249 		/* dvp will be NULLVP only in case of root vnode. */
250 		if (dvp == NULLVP)
251 			return (EINVAL);
252 	}
253 	unp = malloc(sizeof(struct unionfs_node),
254 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
255 
256 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
257 	if (error != 0) {
258 		free(unp, M_UNIONFSNODE);
259 		return (error);
260 	}
261 	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
262 	if (error != 0) {
263 		free(unp, M_UNIONFSNODE);
264 		return (error);
265 	}
266 	if (dvp != NULLVP)
267 		vref(dvp);
268 	if (uppervp != NULLVP)
269 		vref(uppervp);
270 	if (lowervp != NULLVP)
271 		vref(lowervp);
272 
273 	if (vt == VDIR)
274 		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
275 		    &(unp->un_hashmask));
276 
277 	unp->un_vnode = vp;
278 	unp->un_uppervp = uppervp;
279 	unp->un_lowervp = lowervp;
280 	unp->un_dvp = dvp;
281 	if (uppervp != NULLVP)
282 		vp->v_vnlock = uppervp->v_vnlock;
283 	else
284 		vp->v_vnlock = lowervp->v_vnlock;
285 
286 	if (path != NULL) {
287 		unp->un_path = (char *)
288 		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
289 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
290 		unp->un_path[cnp->cn_namelen] = '\0';
291 	}
292 	vp->v_type = vt;
293 	vp->v_data = unp;
294 
295 	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
296 	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
297 		vp->v_vflag |= VV_ROOT;
298 
299 	if (path != NULL && dvp != NULLVP && vt == VDIR)
300 		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
301 	if ((*vpp) != NULLVP) {
302 		if (dvp != NULLVP)
303 			vrele(dvp);
304 		if (uppervp != NULLVP)
305 			vrele(uppervp);
306 		if (lowervp != NULLVP)
307 			vrele(lowervp);
308 
309 		unp->un_uppervp = NULLVP;
310 		unp->un_lowervp = NULLVP;
311 		unp->un_dvp = NULLVP;
312 		vrele(vp);
313 		vp = *vpp;
314 		vref(vp);
315 	} else
316 		*vpp = vp;
317 
318 unionfs_nodeget_out:
319 	if (lkflags & LK_TYPE_MASK)
320 		vn_lock(vp, lkflags | LK_RETRY);
321 
322 	return (0);
323 }
324 
325 /*
326  * Clean up the unionfs node.
327  */
328 void
329 unionfs_noderem(struct vnode *vp, struct thread *td)
330 {
331 	int		count;
332 	struct unionfs_node *unp, *unp_t1, *unp_t2;
333 	struct unionfs_node_hashhead *hd;
334 	struct unionfs_node_status *unsp, *unsp_tmp;
335 	struct vnode   *lvp;
336 	struct vnode   *uvp;
337 	struct vnode   *dvp;
338 
339 	/*
340 	 * Use the interlock to protect the clearing of v_data to
341 	 * prevent faults in unionfs_lock().
342 	 */
343 	VI_LOCK(vp);
344 	unp = VTOUNIONFS(vp);
345 	lvp = unp->un_lowervp;
346 	uvp = unp->un_uppervp;
347 	dvp = unp->un_dvp;
348 	unp->un_lowervp = unp->un_uppervp = NULLVP;
349 	vp->v_vnlock = &(vp->v_lock);
350 	vp->v_data = NULL;
351 	vp->v_object = NULL;
352 	if (vp->v_writecount > 0) {
353 		if (uvp != NULL)
354 			VOP_ADD_WRITECOUNT(uvp, -vp->v_writecount);
355 		else if (lvp != NULL)
356 			VOP_ADD_WRITECOUNT(lvp, -vp->v_writecount);
357 	} else if (vp->v_writecount < 0)
358 		vp->v_writecount = 0;
359 	VI_UNLOCK(vp);
360 
361 	if (lvp != NULLVP)
362 		VOP_UNLOCK(lvp, LK_RELEASE);
363 	if (uvp != NULLVP)
364 		VOP_UNLOCK(uvp, LK_RELEASE);
365 
366 	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
367 		unionfs_rem_cached_vnode(unp, dvp);
368 
369 	if (lockmgr(vp->v_vnlock, LK_EXCLUSIVE, VI_MTX(vp)) != 0)
370 		panic("the lock for deletion is unacquirable.");
371 
372 	if (lvp != NULLVP)
373 		vrele(lvp);
374 	if (uvp != NULLVP)
375 		vrele(uvp);
376 	if (dvp != NULLVP) {
377 		vrele(dvp);
378 		unp->un_dvp = NULLVP;
379 	}
380 	if (unp->un_path != NULL) {
381 		free(unp->un_path, M_UNIONFSPATH);
382 		unp->un_path = NULL;
383 	}
384 
385 	if (unp->un_hashtbl != NULL) {
386 		for (count = 0; count <= unp->un_hashmask; count++) {
387 			hd = unp->un_hashtbl + count;
388 			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
389 				LIST_REMOVE(unp_t1, un_hash);
390 				unp_t1->un_hash.le_next = NULL;
391 				unp_t1->un_hash.le_prev = NULL;
392 			}
393 		}
394 		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
395 	}
396 
397 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
398 		LIST_REMOVE(unsp, uns_list);
399 		free(unsp, M_TEMP);
400 	}
401 	free(unp, M_UNIONFSNODE);
402 }
403 
404 /*
405  * Get the unionfs node status.
406  * You need exclusive lock this vnode.
407  */
408 void
409 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
410 			struct unionfs_node_status **unspp)
411 {
412 	struct unionfs_node_status *unsp;
413 	pid_t pid = td->td_proc->p_pid;
414 
415 	KASSERT(NULL != unspp, ("null pointer"));
416 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
417 
418 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
419 		if (unsp->uns_pid == pid) {
420 			*unspp = unsp;
421 			return;
422 		}
423 	}
424 
425 	/* create a new unionfs node status */
426 	unsp = malloc(sizeof(struct unionfs_node_status),
427 	    M_TEMP, M_WAITOK | M_ZERO);
428 
429 	unsp->uns_pid = pid;
430 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
431 
432 	*unspp = unsp;
433 }
434 
435 /*
436  * Remove the unionfs node status, if you can.
437  * You need exclusive lock this vnode.
438  */
439 void
440 unionfs_tryrem_node_status(struct unionfs_node *unp,
441 			   struct unionfs_node_status *unsp)
442 {
443 	KASSERT(NULL != unsp, ("null pointer"));
444 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
445 
446 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
447 		return;
448 
449 	LIST_REMOVE(unsp, uns_list);
450 	free(unsp, M_TEMP);
451 }
452 
453 /*
454  * Create upper node attr.
455  */
456 void
457 unionfs_create_uppervattr_core(struct unionfs_mount *ump,
458 			       struct vattr *lva,
459 			       struct vattr *uva,
460 			       struct thread *td)
461 {
462 	VATTR_NULL(uva);
463 	uva->va_type = lva->va_type;
464 	uva->va_atime = lva->va_atime;
465 	uva->va_mtime = lva->va_mtime;
466 	uva->va_ctime = lva->va_ctime;
467 
468 	switch (ump->um_copymode) {
469 	case UNIONFS_TRANSPARENT:
470 		uva->va_mode = lva->va_mode;
471 		uva->va_uid = lva->va_uid;
472 		uva->va_gid = lva->va_gid;
473 		break;
474 	case UNIONFS_MASQUERADE:
475 		if (ump->um_uid == lva->va_uid) {
476 			uva->va_mode = lva->va_mode & 077077;
477 			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
478 			uva->va_uid = lva->va_uid;
479 			uva->va_gid = lva->va_gid;
480 		} else {
481 			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
482 			uva->va_uid = ump->um_uid;
483 			uva->va_gid = ump->um_gid;
484 		}
485 		break;
486 	default:		/* UNIONFS_TRADITIONAL */
487 		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
488 		uva->va_uid = ump->um_uid;
489 		uva->va_gid = ump->um_gid;
490 		break;
491 	}
492 }
493 
494 /*
495  * Create upper node attr.
496  */
497 int
498 unionfs_create_uppervattr(struct unionfs_mount *ump,
499 			  struct vnode *lvp,
500 			  struct vattr *uva,
501 			  struct ucred *cred,
502 			  struct thread *td)
503 {
504 	int		error;
505 	struct vattr	lva;
506 
507 	if ((error = VOP_GETATTR(lvp, &lva, cred)))
508 		return (error);
509 
510 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
511 
512 	return (error);
513 }
514 
515 /*
516  * relookup
517  *
518  * dvp should be locked on entry and will be locked on return.
519  *
520  * If an error is returned, *vpp will be invalid, otherwise it will hold a
521  * locked, referenced vnode. If *vpp == dvp then remember that only one
522  * LK_EXCLUSIVE lock is held.
523  */
524 int
525 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
526 		 struct componentname *cnp, struct componentname *cn,
527 		 struct thread *td, char *path, int pathlen, u_long nameiop)
528 {
529 	int	error;
530 
531 	cn->cn_namelen = pathlen;
532 	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
533 	bcopy(path, cn->cn_pnbuf, pathlen);
534 	cn->cn_pnbuf[pathlen] = '\0';
535 
536 	cn->cn_nameiop = nameiop;
537 	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
538 	cn->cn_lkflags = LK_EXCLUSIVE;
539 	cn->cn_thread = td;
540 	cn->cn_cred = cnp->cn_cred;
541 
542 	cn->cn_nameptr = cn->cn_pnbuf;
543 
544 	if (nameiop == DELETE)
545 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
546 	else if (RENAME == nameiop)
547 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
548 	else if (nameiop == CREATE)
549 		cn->cn_flags |= NOCACHE;
550 
551 	vref(dvp);
552 	VOP_UNLOCK(dvp, LK_RELEASE);
553 
554 	if ((error = relookup(dvp, vpp, cn))) {
555 		uma_zfree(namei_zone, cn->cn_pnbuf);
556 		cn->cn_flags &= ~HASBUF;
557 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
558 	} else
559 		vrele(dvp);
560 
561 	return (error);
562 }
563 
564 /*
565  * relookup for CREATE namei operation.
566  *
567  * dvp is unionfs vnode. dvp should be locked.
568  *
569  * If it called 'unionfs_copyfile' function by unionfs_link etc,
570  * VOP_LOOKUP information is broken.
571  * So it need relookup in order to create link etc.
572  */
573 int
574 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
575 			    struct thread *td)
576 {
577 	int	error;
578 	struct vnode *udvp;
579 	struct vnode *vp;
580 	struct componentname cn;
581 
582 	udvp = UNIONFSVPTOUPPERVP(dvp);
583 	vp = NULLVP;
584 
585 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
586 	    strlen(cnp->cn_nameptr), CREATE);
587 	if (error)
588 		return (error);
589 
590 	if (vp != NULLVP) {
591 		if (udvp == vp)
592 			vrele(vp);
593 		else
594 			vput(vp);
595 
596 		error = EEXIST;
597 	}
598 
599 	if (cn.cn_flags & HASBUF) {
600 		uma_zfree(namei_zone, cn.cn_pnbuf);
601 		cn.cn_flags &= ~HASBUF;
602 	}
603 
604 	if (!error) {
605 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
606 		cnp->cn_flags = cn.cn_flags;
607 	}
608 
609 	return (error);
610 }
611 
612 /*
613  * relookup for DELETE namei operation.
614  *
615  * dvp is unionfs vnode. dvp should be locked.
616  */
617 int
618 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
619 			    struct thread *td)
620 {
621 	int	error;
622 	struct vnode *udvp;
623 	struct vnode *vp;
624 	struct componentname cn;
625 
626 	udvp = UNIONFSVPTOUPPERVP(dvp);
627 	vp = NULLVP;
628 
629 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
630 	    strlen(cnp->cn_nameptr), DELETE);
631 	if (error)
632 		return (error);
633 
634 	if (vp == NULLVP)
635 		error = ENOENT;
636 	else {
637 		if (udvp == vp)
638 			vrele(vp);
639 		else
640 			vput(vp);
641 	}
642 
643 	if (cn.cn_flags & HASBUF) {
644 		uma_zfree(namei_zone, cn.cn_pnbuf);
645 		cn.cn_flags &= ~HASBUF;
646 	}
647 
648 	if (!error) {
649 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
650 		cnp->cn_flags = cn.cn_flags;
651 	}
652 
653 	return (error);
654 }
655 
656 /*
657  * relookup for RENAME namei operation.
658  *
659  * dvp is unionfs vnode. dvp should be locked.
660  */
661 int
662 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
663 			    struct thread *td)
664 {
665 	int error;
666 	struct vnode *udvp;
667 	struct vnode *vp;
668 	struct componentname cn;
669 
670 	udvp = UNIONFSVPTOUPPERVP(dvp);
671 	vp = NULLVP;
672 
673 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
674 	    strlen(cnp->cn_nameptr), RENAME);
675 	if (error)
676 		return (error);
677 
678 	if (vp != NULLVP) {
679 		if (udvp == vp)
680 			vrele(vp);
681 		else
682 			vput(vp);
683 	}
684 
685 	if (cn.cn_flags & HASBUF) {
686 		uma_zfree(namei_zone, cn.cn_pnbuf);
687 		cn.cn_flags &= ~HASBUF;
688 	}
689 
690 	if (!error) {
691 		cn.cn_flags |= (cnp->cn_flags & HASBUF);
692 		cnp->cn_flags = cn.cn_flags;
693 	}
694 
695 	return (error);
696 
697 }
698 
699 /*
700  * Update the unionfs_node.
701  *
702  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
703  * uvp's lock and lower's lock will be unlocked.
704  */
705 static void
706 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
707 		    struct thread *td)
708 {
709 	unsigned	count, lockrec;
710 	struct vnode   *vp;
711 	struct vnode   *lvp;
712 	struct vnode   *dvp;
713 
714 	vp = UNIONFSTOV(unp);
715 	lvp = unp->un_lowervp;
716 	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
717 	dvp = unp->un_dvp;
718 
719 	/*
720 	 * lock update
721 	 */
722 	VI_LOCK(vp);
723 	unp->un_uppervp = uvp;
724 	vp->v_vnlock = uvp->v_vnlock;
725 	VI_UNLOCK(vp);
726 	lockrec = lvp->v_vnlock->lk_recurse;
727 	for (count = 0; count < lockrec; count++)
728 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
729 
730 	/*
731 	 * cache update
732 	 */
733 	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
734 		static struct unionfs_node_hashhead *hd;
735 
736 		VI_LOCK(dvp);
737 		hd = unionfs_get_hashhead(dvp, unp->un_path);
738 		LIST_REMOVE(unp, un_hash);
739 		LIST_INSERT_HEAD(hd, unp, un_hash);
740 		VI_UNLOCK(dvp);
741 	}
742 }
743 
744 /*
745  * Create a new shadow dir.
746  *
747  * udvp should be locked on entry and will be locked on return.
748  *
749  * If no error returned, unp will be updated.
750  */
751 int
752 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
753 		    struct unionfs_node *unp, struct componentname *cnp,
754 		    struct thread *td)
755 {
756 	int		error;
757 	struct vnode   *lvp;
758 	struct vnode   *uvp;
759 	struct vattr	va;
760 	struct vattr	lva;
761 	struct componentname cn;
762 	struct mount   *mp;
763 	struct ucred   *cred;
764 	struct ucred   *credbk;
765 	struct uidinfo *rootinfo;
766 
767 	if (unp->un_uppervp != NULLVP)
768 		return (EEXIST);
769 
770 	lvp = unp->un_lowervp;
771 	uvp = NULLVP;
772 	credbk = cnp->cn_cred;
773 
774 	/* Authority change to root */
775 	rootinfo = uifind((uid_t)0);
776 	cred = crdup(cnp->cn_cred);
777 	/*
778 	 * The calls to chgproccnt() are needed to compensate for change_ruid()
779 	 * calling chgproccnt().
780 	 */
781 	chgproccnt(cred->cr_ruidinfo, 1, 0);
782 	change_euid(cred, rootinfo);
783 	change_ruid(cred, rootinfo);
784 	change_svuid(cred, (uid_t)0);
785 	uifree(rootinfo);
786 	cnp->cn_cred = cred;
787 
788 	memset(&cn, 0, sizeof(cn));
789 
790 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
791 		goto unionfs_mkshadowdir_abort;
792 
793 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
794 		goto unionfs_mkshadowdir_abort;
795 	if (uvp != NULLVP) {
796 		if (udvp == uvp)
797 			vrele(uvp);
798 		else
799 			vput(uvp);
800 
801 		error = EEXIST;
802 		goto unionfs_mkshadowdir_free_out;
803 	}
804 
805 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
806 		goto unionfs_mkshadowdir_free_out;
807 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
808 
809 	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
810 
811 	if (!error) {
812 		unionfs_node_update(unp, uvp, td);
813 
814 		/*
815 		 * XXX The bug which cannot set uid/gid was corrected.
816 		 * Ignore errors.
817 		 */
818 		va.va_type = VNON;
819 		VOP_SETATTR(uvp, &va, cn.cn_cred);
820 	}
821 	vn_finished_write(mp);
822 
823 unionfs_mkshadowdir_free_out:
824 	if (cn.cn_flags & HASBUF) {
825 		uma_zfree(namei_zone, cn.cn_pnbuf);
826 		cn.cn_flags &= ~HASBUF;
827 	}
828 
829 unionfs_mkshadowdir_abort:
830 	cnp->cn_cred = credbk;
831 	chgproccnt(cred->cr_ruidinfo, -1, 0);
832 	crfree(cred);
833 
834 	return (error);
835 }
836 
837 /*
838  * Create a new whiteout.
839  *
840  * dvp should be locked on entry and will be locked on return.
841  */
842 int
843 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
844 		   struct thread *td, char *path)
845 {
846 	int		error;
847 	struct vnode   *wvp;
848 	struct componentname cn;
849 	struct mount   *mp;
850 
851 	if (path == NULL)
852 		path = cnp->cn_nameptr;
853 
854 	wvp = NULLVP;
855 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
856 		return (error);
857 	if (wvp != NULLVP) {
858 		if (cn.cn_flags & HASBUF) {
859 			uma_zfree(namei_zone, cn.cn_pnbuf);
860 			cn.cn_flags &= ~HASBUF;
861 		}
862 		if (dvp == wvp)
863 			vrele(wvp);
864 		else
865 			vput(wvp);
866 
867 		return (EEXIST);
868 	}
869 
870 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
871 		goto unionfs_mkwhiteout_free_out;
872 	error = VOP_WHITEOUT(dvp, &cn, CREATE);
873 
874 	vn_finished_write(mp);
875 
876 unionfs_mkwhiteout_free_out:
877 	if (cn.cn_flags & HASBUF) {
878 		uma_zfree(namei_zone, cn.cn_pnbuf);
879 		cn.cn_flags &= ~HASBUF;
880 	}
881 
882 	return (error);
883 }
884 
885 /*
886  * Create a new vnode for create a new shadow file.
887  *
888  * If an error is returned, *vpp will be invalid, otherwise it will hold a
889  * locked, referenced and opened vnode.
890  *
891  * unp is never updated.
892  */
893 static int
894 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
895 			   struct unionfs_node *unp, struct vattr *uvap,
896 			   struct thread *td)
897 {
898 	struct unionfs_mount *ump;
899 	struct vnode   *vp;
900 	struct vnode   *lvp;
901 	struct ucred   *cred;
902 	struct vattr	lva;
903 	int		fmode;
904 	int		error;
905 	struct componentname cn;
906 
907 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
908 	vp = NULLVP;
909 	lvp = unp->un_lowervp;
910 	cred = td->td_ucred;
911 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
912 	error = 0;
913 
914 	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
915 		return (error);
916 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
917 
918 	if (unp->un_path == NULL)
919 		panic("unionfs: un_path is null");
920 
921 	cn.cn_namelen = strlen(unp->un_path);
922 	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
923 	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
924 	cn.cn_nameiop = CREATE;
925 	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
926 	cn.cn_lkflags = LK_EXCLUSIVE;
927 	cn.cn_thread = td;
928 	cn.cn_cred = cred;
929 	cn.cn_nameptr = cn.cn_pnbuf;
930 
931 	vref(udvp);
932 	if ((error = relookup(udvp, &vp, &cn)) != 0)
933 		goto unionfs_vn_create_on_upper_free_out2;
934 	vrele(udvp);
935 
936 	if (vp != NULLVP) {
937 		if (vp == udvp)
938 			vrele(vp);
939 		else
940 			vput(vp);
941 		error = EEXIST;
942 		goto unionfs_vn_create_on_upper_free_out1;
943 	}
944 
945 	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
946 		goto unionfs_vn_create_on_upper_free_out1;
947 
948 	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
949 		vput(vp);
950 		goto unionfs_vn_create_on_upper_free_out1;
951 	}
952 	error = VOP_ADD_WRITECOUNT(vp, 1);
953 	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",  __func__, vp,
954 	    vp->v_writecount);
955 	if (error == 0) {
956 		*vpp = vp;
957 	} else {
958 		VOP_CLOSE(vp, fmode, cred, td);
959 	}
960 
961 unionfs_vn_create_on_upper_free_out1:
962 	VOP_UNLOCK(udvp, LK_RELEASE);
963 
964 unionfs_vn_create_on_upper_free_out2:
965 	if (cn.cn_flags & HASBUF) {
966 		uma_zfree(namei_zone, cn.cn_pnbuf);
967 		cn.cn_flags &= ~HASBUF;
968 	}
969 
970 	return (error);
971 }
972 
973 /*
974  * Copy from lvp to uvp.
975  *
976  * lvp and uvp should be locked and opened on entry and will be locked and
977  * opened on return.
978  */
979 static int
980 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
981 		      struct ucred *cred, struct thread *td)
982 {
983 	int		error;
984 	off_t		offset;
985 	int		count;
986 	int		bufoffset;
987 	char           *buf;
988 	struct uio	uio;
989 	struct iovec	iov;
990 
991 	error = 0;
992 	memset(&uio, 0, sizeof(uio));
993 
994 	uio.uio_td = td;
995 	uio.uio_segflg = UIO_SYSSPACE;
996 	uio.uio_offset = 0;
997 
998 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
999 
1000 	while (error == 0) {
1001 		offset = uio.uio_offset;
1002 
1003 		uio.uio_iov = &iov;
1004 		uio.uio_iovcnt = 1;
1005 		iov.iov_base = buf;
1006 		iov.iov_len = MAXBSIZE;
1007 		uio.uio_resid = iov.iov_len;
1008 		uio.uio_rw = UIO_READ;
1009 
1010 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
1011 			break;
1012 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1013 			break;
1014 
1015 		bufoffset = 0;
1016 		while (bufoffset < count) {
1017 			uio.uio_iov = &iov;
1018 			uio.uio_iovcnt = 1;
1019 			iov.iov_base = buf + bufoffset;
1020 			iov.iov_len = count - bufoffset;
1021 			uio.uio_offset = offset + bufoffset;
1022 			uio.uio_resid = iov.iov_len;
1023 			uio.uio_rw = UIO_WRITE;
1024 
1025 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1026 				break;
1027 
1028 			bufoffset += (count - bufoffset) - uio.uio_resid;
1029 		}
1030 
1031 		uio.uio_offset = offset + bufoffset;
1032 	}
1033 
1034 	free(buf, M_TEMP);
1035 
1036 	return (error);
1037 }
1038 
1039 /*
1040  * Copy file from lower to upper.
1041  *
1042  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1043  * docopy.
1044  *
1045  * If no error returned, unp will be updated.
1046  */
1047 int
1048 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1049 		 struct thread *td)
1050 {
1051 	int		error;
1052 	struct mount   *mp;
1053 	struct vnode   *udvp;
1054 	struct vnode   *lvp;
1055 	struct vnode   *uvp;
1056 	struct vattr	uva;
1057 
1058 	lvp = unp->un_lowervp;
1059 	uvp = NULLVP;
1060 
1061 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1062 		return (EROFS);
1063 	if (unp->un_dvp == NULLVP)
1064 		return (EINVAL);
1065 	if (unp->un_uppervp != NULLVP)
1066 		return (EEXIST);
1067 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1068 	if (udvp == NULLVP)
1069 		return (EROFS);
1070 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1071 		return (EROFS);
1072 
1073 	error = VOP_ACCESS(lvp, VREAD, cred, td);
1074 	if (error != 0)
1075 		return (error);
1076 
1077 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1078 		return (error);
1079 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1080 	if (error != 0) {
1081 		vn_finished_write(mp);
1082 		return (error);
1083 	}
1084 
1085 	if (docopy != 0) {
1086 		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1087 		if (error == 0) {
1088 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1089 			VOP_CLOSE(lvp, FREAD, cred, td);
1090 		}
1091 	}
1092 	VOP_CLOSE(uvp, FWRITE, cred, td);
1093 	VOP_ADD_WRITECOUNT_CHECKED(uvp, -1);
1094 	CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, uvp,
1095 	    uvp->v_writecount);
1096 
1097 	vn_finished_write(mp);
1098 
1099 	if (error == 0) {
1100 		/* Reset the attributes. Ignore errors. */
1101 		uva.va_type = VNON;
1102 		VOP_SETATTR(uvp, &uva, cred);
1103 	}
1104 
1105 	unionfs_node_update(unp, uvp, td);
1106 
1107 	return (error);
1108 }
1109 
1110 /*
1111  * It checks whether vp can rmdir. (check empty)
1112  *
1113  * vp is unionfs vnode.
1114  * vp should be locked.
1115  */
1116 int
1117 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1118 {
1119 	int		error;
1120 	int		eofflag;
1121 	int		lookuperr;
1122 	struct vnode   *uvp;
1123 	struct vnode   *lvp;
1124 	struct vnode   *tvp;
1125 	struct vattr	va;
1126 	struct componentname cn;
1127 	/*
1128 	 * The size of buf needs to be larger than DIRBLKSIZ.
1129 	 */
1130 	char		buf[256 * 6];
1131 	struct dirent  *dp;
1132 	struct dirent  *edp;
1133 	struct uio	uio;
1134 	struct iovec	iov;
1135 
1136 	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1137 
1138 	eofflag = 0;
1139 	uvp = UNIONFSVPTOUPPERVP(vp);
1140 	lvp = UNIONFSVPTOLOWERVP(vp);
1141 
1142 	/* check opaque */
1143 	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1144 		return (error);
1145 	if (va.va_flags & OPAQUE)
1146 		return (0);
1147 
1148 	/* open vnode */
1149 #ifdef MAC
1150 	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1151 		return (error);
1152 #endif
1153 	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1154 		return (error);
1155 	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1156 		return (error);
1157 
1158 	uio.uio_rw = UIO_READ;
1159 	uio.uio_segflg = UIO_SYSSPACE;
1160 	uio.uio_td = td;
1161 	uio.uio_offset = 0;
1162 
1163 #ifdef MAC
1164 	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1165 #endif
1166 	while (!error && !eofflag) {
1167 		iov.iov_base = buf;
1168 		iov.iov_len = sizeof(buf);
1169 		uio.uio_iov = &iov;
1170 		uio.uio_iovcnt = 1;
1171 		uio.uio_resid = iov.iov_len;
1172 
1173 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1174 		if (error != 0)
1175 			break;
1176 		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1177 #ifdef DIAGNOSTIC
1178 			panic("bad readdir response from lower FS.");
1179 #endif
1180 			break;
1181 		}
1182 
1183 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1184 		for (dp = (struct dirent*)buf; !error && dp < edp;
1185 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1186 			if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
1187 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1188 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1189 				continue;
1190 
1191 			cn.cn_namelen = dp->d_namlen;
1192 			cn.cn_pnbuf = NULL;
1193 			cn.cn_nameptr = dp->d_name;
1194 			cn.cn_nameiop = LOOKUP;
1195 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1196 			cn.cn_lkflags = LK_EXCLUSIVE;
1197 			cn.cn_thread = td;
1198 			cn.cn_cred = cred;
1199 
1200 			/*
1201 			 * check entry in lower.
1202 			 * Sometimes, readdir function returns
1203 			 * wrong entry.
1204 			 */
1205 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1206 
1207 			if (!lookuperr)
1208 				vput(tvp);
1209 			else
1210 				continue; /* skip entry */
1211 
1212 			/*
1213 			 * check entry
1214 			 * If it has no exist/whiteout entry in upper,
1215 			 * directory is not empty.
1216 			 */
1217 			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1218 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1219 
1220 			if (!lookuperr)
1221 				vput(tvp);
1222 
1223 			/* ignore exist or whiteout entry */
1224 			if (!lookuperr ||
1225 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1226 				continue;
1227 
1228 			error = ENOTEMPTY;
1229 		}
1230 	}
1231 
1232 	/* close vnode */
1233 	VOP_CLOSE(vp, FREAD, cred, td);
1234 
1235 	return (error);
1236 }
1237 
1238 #ifdef DIAGNOSTIC
1239 
1240 struct vnode   *
1241 unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1242 {
1243 	struct unionfs_node *unp;
1244 
1245 	unp = VTOUNIONFS(vp);
1246 
1247 #ifdef notyet
1248 	if (vp->v_op != unionfs_vnodeop_p) {
1249 		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1250 #ifdef KDB
1251 		kdb_enter(KDB_WHY_UNIONFS,
1252 		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1253 #endif
1254 		panic("unionfs_checkuppervp");
1255 	}
1256 #endif
1257 	return (unp->un_uppervp);
1258 }
1259 
1260 struct vnode   *
1261 unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1262 {
1263 	struct unionfs_node *unp;
1264 
1265 	unp = VTOUNIONFS(vp);
1266 
1267 #ifdef notyet
1268 	if (vp->v_op != unionfs_vnodeop_p) {
1269 		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1270 #ifdef KDB
1271 		kdb_enter(KDB_WHY_UNIONFS,
1272 		    "unionfs_checklowervp: on non-unionfs-node.\n");
1273 #endif
1274 		panic("unionfs_checklowervp");
1275 	}
1276 #endif
1277 	return (unp->un_lowervp);
1278 }
1279 #endif
1280