xref: /original-bsd/sys/miscfs/union/union_subr.c (revision 6d0f5438)
1 /*
2  * Copyright (c) 1994 Jan-Simon Pendry
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Jan-Simon Pendry.
8  *
9  * %sccs.include.redist.c%
10  *
11  *	@(#)union_subr.c	1.9 (Berkeley) 02/08/94
12  */
13 
14 #include <sys/param.h>
15 #include <sys/systm.h>
16 #include <sys/time.h>
17 #include <sys/kernel.h>
18 #include <sys/vnode.h>
19 #include <sys/namei.h>
20 #include <sys/malloc.h>
21 #include <sys/file.h>
22 #include <sys/filedesc.h>
23 #include "union.h" /*<miscfs/union/union.h>*/
24 
25 #ifdef DIAGNOSTIC
26 #include <sys/proc.h>
27 #endif
28 
29 static struct union_node *unhead;
30 static int unvplock;
31 
32 int
33 union_init()
34 {
35 
36 	unhead = 0;
37 	unvplock = 0;
38 }
39 
40 /*
41  * allocate a union_node/vnode pair.  the vnode is
42  * referenced and locked.  the new vnode is returned
43  * via (vpp).  (mp) is the mountpoint of the union filesystem,
44  * (dvp) is the parent directory where the upper layer object
45  * should exist (but doesn't) and (cnp) is the componentname
46  * information which is partially copied to allow the upper
47  * layer object to be created at a later time.  (uppervp)
48  * and (lowervp) reference the upper and lower layer objects
49  * being mapped.  either, but not both, can be nil.
50  * the reference is either maintained in the new union_node
51  * object which is allocated, or they are vrele'd.
52  *
53  * all union_nodes are maintained on a singly-linked
54  * list.  new nodes are only allocated when they cannot
55  * be found on this list.  entries on the list are
56  * removed when the vfs reclaim entry is called.
57  *
58  * a single lock is kept for the entire list.  this is
59  * needed because the getnewvnode() function can block
60  * waiting for a vnode to become free, in which case there
61  * may be more than one process trying to get the same
62  * vnode.  this lock is only taken if we are going to
63  * call getnewvnode, since the kernel itself is single-threaded.
64  *
65  * if an entry is found on the list, then call vget() to
66  * take a reference.  this is done because there may be
67  * zero references to it and so it needs to removed from
68  * the vnode free list.
69  */
70 int
71 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp)
72 	struct vnode **vpp;
73 	struct mount *mp;
74 	struct vnode *undvp;
75 	struct vnode *dvp;		/* may be null */
76 	struct componentname *cnp;	/* may be null */
77 	struct vnode *uppervp;		/* may be null */
78 	struct vnode *lowervp;		/* may be null */
79 {
80 	int error;
81 	struct union_node *un;
82 	struct union_node **pp;
83 	struct vnode *xlowervp = 0;
84 
85 	if (uppervp == 0 && lowervp == 0)
86 		panic("union: unidentifiable allocation");
87 
88 	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
89 		xlowervp = lowervp;
90 		lowervp = 0;
91 	}
92 
93 loop:
94 	for (un = unhead; un != 0; un = un->un_next) {
95 		if ((un->un_lowervp == lowervp ||
96 		     un->un_lowervp == 0) &&
97 		    (un->un_uppervp == uppervp ||
98 		     un->un_uppervp == 0) &&
99 		    (UNIONTOV(un)->v_mount == mp)) {
100 			if (vget(UNIONTOV(un), 0))
101 				goto loop;
102 			if (UNIONTOV(un) != undvp)
103 				VOP_LOCK(UNIONTOV(un));
104 
105 			/*
106 			 * Save information about the upper layer.
107 			 */
108 			if (uppervp != un->un_uppervp) {
109 				if (un->un_uppervp)
110 					vrele(un->un_uppervp);
111 				un->un_uppervp = uppervp;
112 			} else if (uppervp) {
113 				vrele(uppervp);
114 			}
115 
116 			/*
117 			 * Save information about the lower layer.
118 			 * This needs to keep track of pathname
119 			 * and directory information which union_vn_create
120 			 * might need.
121 			 */
122 			if (lowervp != un->un_lowervp) {
123 				if (un->un_lowervp) {
124 					vrele(un->un_lowervp);
125 					free(un->un_path, M_TEMP);
126 					vrele(un->un_dirvp);
127 				}
128 				un->un_lowervp = lowervp;
129 				if (cnp && (lowervp != NULLVP) &&
130 				    (lowervp->v_type == VREG)) {
131 					un->un_hash = cnp->cn_hash;
132 					un->un_path = malloc(cnp->cn_namelen+1,
133 							M_TEMP, M_WAITOK);
134 					bcopy(cnp->cn_nameptr, un->un_path,
135 							cnp->cn_namelen);
136 					un->un_path[cnp->cn_namelen] = '\0';
137 					VREF(dvp);
138 					un->un_dirvp = dvp;
139 				}
140 			} else if (lowervp) {
141 				vrele(lowervp);
142 			}
143 			*vpp = UNIONTOV(un);
144 			return (0);
145 		}
146 	}
147 
148 	/*
149 	 * otherwise lock the vp list while we call getnewvnode
150 	 * since that can block.
151 	 */
152 	if (unvplock & UN_LOCKED) {
153 		unvplock |= UN_WANT;
154 		sleep((caddr_t) &unvplock, PINOD);
155 		goto loop;
156 	}
157 	unvplock |= UN_LOCKED;
158 
159 	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
160 	if (error)
161 		goto out;
162 
163 	MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
164 		M_TEMP, M_WAITOK);
165 
166 	if (uppervp)
167 		(*vpp)->v_type = uppervp->v_type;
168 	else
169 		(*vpp)->v_type = lowervp->v_type;
170 	un = VTOUNION(*vpp);
171 	un->un_vnode = *vpp;
172 	un->un_next = 0;
173 	un->un_uppervp = uppervp;
174 	un->un_lowervp = lowervp;
175 	un->un_openl = 0;
176 	un->un_flags = 0;
177 	if (cnp && (lowervp != NULLVP) && (lowervp->v_type == VREG)) {
178 		un->un_hash = cnp->cn_hash;
179 		un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
180 		bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
181 		un->un_path[cnp->cn_namelen] = '\0';
182 		VREF(dvp);
183 		un->un_dirvp = dvp;
184 	} else {
185 		un->un_hash = 0;
186 		un->un_path = 0;
187 		un->un_dirvp = 0;
188 	}
189 
190 	/* add to union vnode list */
191 	for (pp = &unhead; *pp; pp = &(*pp)->un_next)
192 		continue;
193 	*pp = un;
194 
195 	un->un_flags |= UN_LOCKED;
196 
197 #ifdef DIAGNOSTIC
198 	un->un_pid = curproc->p_pid;
199 #endif
200 
201 	if (xlowervp)
202 		vrele(xlowervp);
203 
204 out:
205 	unvplock &= ~UN_LOCKED;
206 
207 	if (unvplock & UN_WANT) {
208 		unvplock &= ~UN_WANT;
209 		wakeup((caddr_t) &unvplock);
210 	}
211 
212 	return (error);
213 }
214 
215 int
216 union_freevp(vp)
217 	struct vnode *vp;
218 {
219 	struct union_node **unpp;
220 	struct union_node *un = VTOUNION(vp);
221 
222 	for (unpp = &unhead; *unpp != 0; unpp = &(*unpp)->un_next) {
223 		if (*unpp == un) {
224 			*unpp = un->un_next;
225 			break;
226 		}
227 	}
228 
229 	FREE(vp->v_data, M_TEMP);
230 	vp->v_data = 0;
231 	return (0);
232 }
233 
234 /*
235  * copyfile.  copy the vnode (fvp) to the vnode (tvp)
236  * using a sequence of reads and writes.  both (fvp)
237  * and (tvp) are locked on entry and exit.
238  */
239 int
240 union_copyfile(p, cred, fvp, tvp)
241 	struct proc *p;
242 	struct ucred *cred;
243 	struct vnode *fvp;
244 	struct vnode *tvp;
245 {
246 	char *buf;
247 	struct uio uio;
248 	struct iovec iov;
249 	int error = 0;
250 
251 	/*
252 	 * strategy:
253 	 * allocate a buffer of size MAXBSIZE.
254 	 * loop doing reads and writes, keeping track
255 	 * of the current uio offset.
256 	 * give up at the first sign of trouble.
257 	 */
258 
259 	uio.uio_procp = p;
260 	uio.uio_segflg = UIO_SYSSPACE;
261 	uio.uio_offset = 0;
262 
263 	VOP_UNLOCK(fvp);				/* XXX */
264 	LEASE_CHECK(fvp, p, cred, LEASE_READ);
265 	VOP_LOCK(fvp);					/* XXX */
266 	VOP_UNLOCK(tvp);				/* XXX */
267 	LEASE_CHECK(tvp, p, cred, LEASE_WRITE);
268 	VOP_LOCK(tvp);					/* XXX */
269 
270 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
271 
272 	/* ugly loop follows... */
273 	do {
274 		off_t offset = uio.uio_offset;
275 
276 		uio.uio_iov = &iov;
277 		uio.uio_iovcnt = 1;
278 		iov.iov_base = buf;
279 		iov.iov_len = MAXBSIZE;
280 		uio.uio_resid = iov.iov_len;
281 		uio.uio_rw = UIO_READ;
282 		error = VOP_READ(fvp, &uio, 0, cred);
283 
284 		if (error == 0) {
285 			uio.uio_iov = &iov;
286 			uio.uio_iovcnt = 1;
287 			iov.iov_base = buf;
288 			iov.iov_len = MAXBSIZE - uio.uio_resid;
289 			uio.uio_offset = offset;
290 			uio.uio_rw = UIO_WRITE;
291 			uio.uio_resid = iov.iov_len;
292 
293 			if (uio.uio_resid == 0)
294 				break;
295 
296 			do {
297 				error = VOP_WRITE(tvp, &uio, 0, cred);
298 			} while ((uio.uio_resid > 0) && (error == 0));
299 		}
300 
301 	} while (error == 0);
302 
303 	free(buf, M_TEMP);
304 	return (error);
305 }
306 
307 /*
308  * Create a shadow directory in the upper layer.
309  * The new vnode is returned locked.
310  *
311  * (um) points to the union mount structure for access to the
312  * the mounting process's credentials.
313  * (dvp) is the directory in which to create the shadow directory.
314  * it is unlocked on entry and exit.
315  * (cnp) is the componentname to be created.
316  * (vpp) is the returned newly created shadow directory, which
317  * is returned locked.
318  */
319 int
320 union_mkshadow(um, dvp, cnp, vpp)
321 	struct union_mount *um;
322 	struct vnode *dvp;
323 	struct componentname *cnp;
324 	struct vnode **vpp;
325 {
326 	int error;
327 	struct vattr va;
328 	struct proc *p = cnp->cn_proc;
329 	struct componentname cn;
330 
331 	/*
332 	 * policy: when creating the shadow directory in the
333 	 * upper layer, create it owned by the user who did
334 	 * the mount, group from parent directory, and mode
335 	 * 777 modified by umask (ie mostly identical to the
336 	 * mkdir syscall).  (jsp, kb)
337 	 */
338 
339 	/*
340 	 * A new componentname structure must be faked up because
341 	 * there is no way to know where the upper level cnp came
342 	 * from or what it is being used for.  This must duplicate
343 	 * some of the work done by NDINIT, some of the work done
344 	 * by namei, some of the work done by lookup and some of
345 	 * the work done by VOP_LOOKUP when given a CREATE flag.
346 	 * Conclusion: Horrible.
347 	 *
348 	 * The pathname buffer will be FREEed by VOP_MKDIR.
349 	 */
350 	cn.cn_pnbuf = malloc(cnp->cn_namelen+1, M_NAMEI, M_WAITOK);
351 	bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cnp->cn_namelen);
352 	cn.cn_pnbuf[cnp->cn_namelen] = '\0';
353 
354 	cn.cn_nameiop = CREATE;
355 	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|ISLASTCN);
356 	cn.cn_proc = cnp->cn_proc;
357 	cn.cn_cred = um->um_cred;
358 	cn.cn_nameptr = cn.cn_pnbuf;
359 	cn.cn_namelen = cnp->cn_namelen;
360 	cn.cn_hash = cnp->cn_hash;
361 	cn.cn_consume = cnp->cn_consume;
362 
363 	VREF(dvp);
364 	if (error = relookup(dvp, vpp, &cn))
365 		return (error);
366 	vrele(dvp);
367 
368 	if (*vpp) {
369 		VOP_ABORTOP(dvp, &cn);
370 		VOP_UNLOCK(dvp);
371 		vrele(*vpp);
372 		*vpp = NULLVP;
373 		return (EEXIST);
374 	}
375 
376 	VATTR_NULL(&va);
377 	va.va_type = VDIR;
378 	va.va_mode = um->um_cmode;
379 
380 	/* LEASE_CHECK: dvp is locked */
381 	LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
382 
383 	VREF(dvp);
384 	error = VOP_MKDIR(dvp, vpp, &cn, &va);
385 	return (error);
386 }
387 
388 /*
389  * union_vn_create: creates and opens a new shadow file
390  * on the upper union layer.  this function is similar
391  * in spirit to calling vn_open but it avoids calling namei().
392  * the problem with calling namei is that a) it locks too many
393  * things, and b) it doesn't start at the "right" directory,
394  * whereas relookup is told where to start.
395  */
396 int
397 union_vn_create(vpp, un, p)
398 	struct vnode **vpp;
399 	struct union_node *un;
400 	struct proc *p;
401 {
402 	struct vnode *vp;
403 	struct ucred *cred = p->p_ucred;
404 	struct vattr vat;
405 	struct vattr *vap = &vat;
406 	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
407 	int error;
408 	int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
409 	char *cp;
410 	struct componentname cn;
411 
412 	*vpp = NULLVP;
413 
414 	/*
415 	 * Build a new componentname structure (for the same
416 	 * reasons outlines in union_mkshadow).
417 	 * The difference here is that the file is owned by
418 	 * the current user, rather than by the person who
419 	 * did the mount, since the current user needs to be
420 	 * able to write the file (that's why it is being
421 	 * copied in the first place).
422 	 */
423 	cn.cn_namelen = strlen(un->un_path);
424 	cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK);
425 	bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
426 	cn.cn_nameiop = CREATE;
427 	cn.cn_flags = (LOCKLEAF|LOCKPARENT|HASBUF|SAVENAME|ISLASTCN);
428 	cn.cn_proc = p;
429 	cn.cn_cred = p->p_ucred;
430 	cn.cn_nameptr = cn.cn_pnbuf;
431 	cn.cn_hash = un->un_hash;
432 	cn.cn_consume = 0;
433 
434 	VREF(un->un_dirvp);
435 	if (error = relookup(un->un_dirvp, &vp, &cn))
436 		return (error);
437 	vrele(un->un_dirvp);
438 
439 	if (vp == NULLVP) {
440 		/*
441 		 * Good - there was no race to create the file
442 		 * so go ahead and create it.  The permissions
443 		 * on the file will be 0666 modified by the
444 		 * current user's umask.  Access to the file, while
445 		 * it is unioned, will require access to the top *and*
446 		 * bottom files.  Access when not unioned will simply
447 		 * require access to the top-level file.
448 		 * TODO: confirm choice of access permissions.
449 		 */
450 		VATTR_NULL(vap);
451 		vap->va_type = VREG;
452 		vap->va_mode = cmode;
453 		LEASE_CHECK(un->un_dirvp, p, cred, LEASE_WRITE);
454 		if (error = VOP_CREATE(un->un_dirvp, &vp,
455 		    &cn, vap))
456 			return (error);
457 	} else {
458 		VOP_ABORTOP(un->un_dirvp, &cn);
459 		if (un->un_dirvp == vp)
460 			vrele(un->un_dirvp);
461 		else
462 			vput(vp);
463 		error = EEXIST;
464 		goto bad;
465 	}
466 
467 	if (vp->v_type != VREG) {
468 		error = EOPNOTSUPP;
469 		goto bad;
470 	}
471 
472 	VOP_UNLOCK(vp);				/* XXX */
473 	LEASE_CHECK(vp, p, cred, LEASE_WRITE);
474 	VOP_LOCK(vp);				/* XXX */
475 	VATTR_NULL(vap);
476 	vap->va_size = 0;
477 	if (error = VOP_SETATTR(vp, vap, cred, p))
478 		goto bad;
479 
480 	if (error = VOP_OPEN(vp, fmode, cred, p))
481 		goto bad;
482 
483 	vp->v_writecount++;
484 	*vpp = vp;
485 	return (0);
486 bad:
487 	vput(vp);
488 	return (error);
489 }
490 
491 int
492 union_vn_close(vp, fmode, cred, p)
493 	struct vnode *vp;
494 	int fmode;
495 	struct ucred *cred;
496 	struct proc *p;
497 {
498 	if (fmode & FWRITE)
499 		--vp->v_writecount;
500 	return (VOP_CLOSE(vp, fmode));
501 }
502 
503 void
504 union_removed_upper(un)
505 	struct union_node *un;
506 {
507 	vrele(un->un_uppervp);
508 	un->un_uppervp = NULLVP;
509 }
510 
511 struct vnode *
512 union_lowervp(vp)
513 	struct vnode *vp;
514 {
515 	struct union_node *un = VTOUNION(vp);
516 
517 	if (un->un_lowervp && (vp->v_type == un->un_lowervp->v_type)) {
518 		if (vget(un->un_lowervp, 0))
519 			return (NULLVP);
520 	}
521 
522 	return (un->un_lowervp);
523 }
524