xref: /original-bsd/sys/miscfs/union/union_subr.c (revision 4f59f7de)
1 /*
2  * Copyright (c) 1994 Jan-Simon Pendry
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Jan-Simon Pendry.
8  *
9  * %sccs.include.redist.c%
10  *
11  *	@(#)union_subr.c	1.7 (Berkeley) 02/07/94
12  */
13 
14 #include <sys/param.h>
15 #include <sys/systm.h>
16 #include <sys/time.h>
17 #include <sys/kernel.h>
18 #include <sys/vnode.h>
19 #include <sys/namei.h>
20 #include <sys/malloc.h>
21 #include <sys/file.h>
22 #include <sys/filedesc.h>
23 #include "union.h" /*<miscfs/union/union.h>*/
24 
25 #ifdef DIAGNOSTIC
26 #include <sys/proc.h>
27 #endif
28 
29 static struct union_node *unhead;
30 static int unvplock;
31 
32 int
33 union_init()
34 {
35 
36 	unhead = 0;
37 	unvplock = 0;
38 }
39 
40 /*
41  * allocate a union_node/vnode pair.  the vnode is
42  * referenced and locked.  the new vnode is returned
43  * via (vpp).  (mp) is the mountpoint of the union filesystem,
44  * (dvp) is the parent directory where the upper layer object
45  * should exist (but doesn't) and (cnp) is the componentname
46  * information which is partially copied to allow the upper
47  * layer object to be created at a later time.  (uppervp)
48  * and (lowervp) reference the upper and lower layer objects
49  * being mapped.  either, but not both, can be nil.
50  * the reference is either maintained in the new union_node
51  * object which is allocated, or they are vrele'd.
52  *
53  * all union_nodes are maintained on a singly-linked
54  * list.  new nodes are only allocated when they cannot
55  * be found on this list.  entries on the list are
56  * removed when the vfs reclaim entry is called.
57  *
58  * a single lock is kept for the entire list.  this is
59  * needed because the getnewvnode() function can block
60  * waiting for a vnode to become free, in which case there
61  * may be more than one process trying to get the same
62  * vnode.  this lock is only taken if we are going to
63  * call getnewvnode, since the kernel itself is single-threaded.
64  *
65  * if an entry is found on the list, then call vget() to
66  * take a reference.  this is done because there may be
67  * zero references to it and so it needs to removed from
68  * the vnode free list.
69  */
70 int
71 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp)
72 	struct vnode **vpp;
73 	struct mount *mp;
74 	struct vnode *undvp;
75 	struct vnode *dvp;		/* may be null */
76 	struct componentname *cnp;	/* may be null */
77 	struct vnode *uppervp;		/* may be null */
78 	struct vnode *lowervp;		/* may be null */
79 {
80 	int error;
81 	struct union_node *un;
82 	struct union_node **pp;
83 	struct vnode *xlowervp = 0;
84 
85 	if (uppervp == 0 && lowervp == 0)
86 		panic("union: unidentifiable allocation");
87 
88 	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
89 		xlowervp = lowervp;
90 		lowervp = 0;
91 	}
92 
93 loop:
94 	for (un = unhead; un != 0; un = un->un_next) {
95 		if ((un->un_lowervp == lowervp ||
96 		     un->un_lowervp == 0) &&
97 		    (un->un_uppervp == uppervp ||
98 		     un->un_uppervp == 0) &&
99 		    (UNIONTOV(un)->v_mount == mp)) {
100 			if (vget(UNIONTOV(un), 0))
101 				goto loop;
102 			if (UNIONTOV(un) != undvp)
103 				VOP_LOCK(UNIONTOV(un));
104 
105 			/*
106 			 * Save information about the upper layer.
107 			 */
108 			if (uppervp != un->un_uppervp) {
109 				if (un->un_uppervp)
110 					vrele(un->un_uppervp);
111 				un->un_uppervp = uppervp;
112 			} else if (uppervp) {
113 				vrele(uppervp);
114 			}
115 
116 			/*
117 			 * Save information about the lower layer.
118 			 * This needs to keep track of pathname
119 			 * and directory information which union_vn_create
120 			 * might need.
121 			 */
122 			if (lowervp != un->un_lowervp) {
123 				if (un->un_lowervp) {
124 					vrele(un->un_lowervp);
125 					free(un->un_path, M_TEMP);
126 					vrele(un->un_dirvp);
127 				}
128 				un->un_lowervp = lowervp;
129 				if (cnp && (lowervp != NULLVP) &&
130 				    (lowervp->v_type == VREG)) {
131 					un->un_hash = cnp->cn_hash;
132 					un->un_path = malloc(cnp->cn_namelen+1,
133 							M_TEMP, M_WAITOK);
134 					bcopy(cnp->cn_nameptr, un->un_path,
135 							cnp->cn_namelen);
136 					un->un_path[cnp->cn_namelen] = '\0';
137 					VREF(dvp);
138 					un->un_dirvp = dvp;
139 				}
140 			} else if (lowervp) {
141 				vrele(lowervp);
142 			}
143 			*vpp = UNIONTOV(un);
144 			return (0);
145 		}
146 	}
147 
148 	/*
149 	 * otherwise lock the vp list while we call getnewvnode
150 	 * since that can block.
151 	 */
152 	if (unvplock & UN_LOCKED) {
153 		unvplock |= UN_WANT;
154 		sleep((caddr_t) &unvplock, PINOD);
155 		goto loop;
156 	}
157 	unvplock |= UN_LOCKED;
158 
159 	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
160 	if (error)
161 		goto out;
162 
163 	MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
164 		M_TEMP, M_WAITOK);
165 
166 	if (uppervp)
167 		(*vpp)->v_type = uppervp->v_type;
168 	else
169 		(*vpp)->v_type = lowervp->v_type;
170 	un = VTOUNION(*vpp);
171 	un->un_vnode = *vpp;
172 	un->un_next = 0;
173 	un->un_uppervp = uppervp;
174 	un->un_lowervp = lowervp;
175 	un->un_open = 0;
176 	un->un_flags = 0;
177 	if (cnp && (lowervp != NULLVP) && (lowervp->v_type == VREG)) {
178 		un->un_hash = cnp->cn_hash;
179 		un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
180 		bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
181 		un->un_path[cnp->cn_namelen] = '\0';
182 		VREF(dvp);
183 		un->un_dirvp = dvp;
184 	} else {
185 		un->un_hash = 0;
186 		un->un_path = 0;
187 		un->un_dirvp = 0;
188 	}
189 
190 	/* add to union vnode list */
191 	for (pp = &unhead; *pp; pp = &(*pp)->un_next)
192 		continue;
193 	*pp = un;
194 
195 	un->un_flags |= UN_LOCKED;
196 
197 #ifdef DIAGNOSTIC
198 	un->un_pid = curproc->p_pid;
199 #endif
200 
201 	if (xlowervp)
202 		vrele(xlowervp);
203 
204 out:
205 	unvplock &= ~UN_LOCKED;
206 
207 	if (unvplock & UN_WANT) {
208 		unvplock &= ~UN_WANT;
209 		wakeup((caddr_t) &unvplock);
210 	}
211 
212 	return (error);
213 }
214 
215 int
216 union_freevp(vp)
217 	struct vnode *vp;
218 {
219 	struct union_node **unpp;
220 	struct union_node *un = VTOUNION(vp);
221 
222 	for (unpp = &unhead; *unpp != 0; unpp = &(*unpp)->un_next) {
223 		if (*unpp == un) {
224 			*unpp = un->un_next;
225 			break;
226 		}
227 	}
228 
229 	FREE(vp->v_data, M_TEMP);
230 	vp->v_data = 0;
231 	return (0);
232 }
233 
234 /*
235  * copyfile.  copy the vnode (fvp) to the vnode (tvp)
236  * using a sequence of reads and writes.  both (fvp)
237  * and (tvp) are locked on entry and exit.
238  */
239 int
240 union_copyfile(p, cred, fvp, tvp)
241 	struct proc *p;
242 	struct ucred *cred;
243 	struct vnode *fvp;
244 	struct vnode *tvp;
245 {
246 	char *buf;
247 	struct uio uio;
248 	struct iovec iov;
249 	int error = 0;
250 
251 	/*
252 	 * strategy:
253 	 * allocate a buffer of size MAXBSIZE.
254 	 * loop doing reads and writes, keeping track
255 	 * of the current uio offset.
256 	 * give up at the first sign of trouble.
257 	 */
258 
259 	uio.uio_procp = p;
260 	uio.uio_segflg = UIO_SYSSPACE;
261 	uio.uio_offset = 0;
262 
263 	VOP_UNLOCK(fvp);				/* XXX */
264 	LEASE_CHECK(fvp, p, cred, LEASE_READ);
265 	VOP_LOCK(fvp);					/* XXX */
266 	VOP_UNLOCK(tvp);				/* XXX */
267 	LEASE_CHECK(tvp, p, cred, LEASE_WRITE);
268 	VOP_LOCK(tvp);					/* XXX */
269 
270 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
271 
272 	/* ugly loop follows... */
273 	do {
274 		off_t offset = uio.uio_offset;
275 
276 		uio.uio_iov = &iov;
277 		uio.uio_iovcnt = 1;
278 		iov.iov_base = buf;
279 		iov.iov_len = MAXBSIZE;
280 		uio.uio_resid = iov.iov_len;
281 		uio.uio_rw = UIO_READ;
282 		error = VOP_READ(fvp, &uio, 0, cred);
283 
284 		if (error == 0) {
285 			uio.uio_iov = &iov;
286 			uio.uio_iovcnt = 1;
287 			iov.iov_base = buf;
288 			iov.iov_len = MAXBSIZE - uio.uio_resid;
289 			uio.uio_offset = offset;
290 			uio.uio_rw = UIO_WRITE;
291 			uio.uio_resid = iov.iov_len;
292 
293 			if (uio.uio_resid == 0)
294 				break;
295 
296 			do {
297 				error = VOP_WRITE(tvp, &uio, 0, cred);
298 			} while ((uio.uio_resid > 0) && (error == 0));
299 		}
300 
301 	} while (error == 0);
302 
303 	free(buf, M_TEMP);
304 	return (error);
305 }
306 
307 /*
308  * Create a shadow directory in the upper layer.
309  * The new vnode is returned locked.
310  *
311  * (um) points to the union mount structure for access to the
312  * the mounting process's credentials.
313  * (dvp) is the directory in which to create the shadow directory.
314  * it is unlocked on entry and exit.
315  * (cnp) is the componentname to be created.
316  * (vpp) is the returned newly created shadow directory, which
317  * is returned locked.
318  */
319 int
320 union_mkshadow(um, dvp, cnp, vpp)
321 	struct union_mount *um;
322 	struct vnode *dvp;
323 	struct componentname *cnp;
324 	struct vnode **vpp;
325 {
326 	int error;
327 	struct vattr va;
328 	struct proc *p = cnp->cn_proc;
329 	struct componentname cn;
330 
331 	/*
332 	 * policy: when creating the shadow directory in the
333 	 * upper layer, create it owned by the current user,
334 	 * group from parent directory, and mode 777 modified
335 	 * by umask (ie mostly identical to the mkdir syscall).
336 	 * (jsp, kb)
337 	 * TODO: create the directory owned by the user who
338 	 * did the mount (um->um_cred).
339 	 */
340 
341 	/*
342 	 * A new componentname structure must be faked up because
343 	 * there is no way to know where the upper level cnp came
344 	 * from or what it is being used for.  This must duplicate
345 	 * some of the work done by NDINIT, some of the work done
346 	 * by namei, some of the work done by lookup and some of
347 	 * the work done by VOP_LOOKUP when given a CREATE flag.
348 	 * Conclusion: Horrible.
349 	 *
350 	 * The pathname buffer will be FREEed by VOP_MKDIR.
351 	 */
352 	cn.cn_pnbuf = malloc(cnp->cn_namelen+1, M_NAMEI, M_WAITOK);
353 	bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cnp->cn_namelen);
354 	cn.cn_pnbuf[cnp->cn_namelen] = '\0';
355 
356 	cn.cn_nameiop = CREATE;
357 	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|ISLASTCN);
358 	cn.cn_proc = cnp->cn_proc;
359 	cn.cn_cred = cnp->cn_cred;
360 	cn.cn_nameptr = cn.cn_pnbuf;
361 	cn.cn_namelen = cnp->cn_namelen;
362 	cn.cn_hash = cnp->cn_hash;
363 	cn.cn_consume = cnp->cn_consume;
364 
365 	VREF(dvp);
366 	if (error = relookup(dvp, vpp, &cn))
367 		return (error);
368 	vrele(dvp);
369 
370 	if (*vpp) {
371 		VOP_ABORTOP(dvp, &cn);
372 		VOP_UNLOCK(dvp);
373 		vrele(*vpp);
374 		*vpp = NULLVP;
375 		return (EEXIST);
376 	}
377 
378 	VATTR_NULL(&va);
379 	va.va_type = VDIR;
380 	va.va_mode = UN_DIRMODE & ~p->p_fd->fd_cmask;
381 
382 	/* LEASE_CHECK: dvp is locked */
383 	LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
384 
385 	VREF(dvp);
386 	error = VOP_MKDIR(dvp, vpp, &cn, &va);
387 	return (error);
388 }
389 
390 /*
391  * union_vn_create: creates and opens a new shadow file
392  * on the upper union layer.  this function is similar
393  * in spirit to calling vn_open but it avoids calling namei().
394  * the problem with calling namei is that a) it locks too many
395  * things, and b) it doesn't start at the "right" directory,
396  * whereas relookup is told where to start.
397  */
398 int
399 union_vn_create(vpp, un, p)
400 	struct vnode **vpp;
401 	struct union_node *un;
402 	struct proc *p;
403 {
404 	struct vnode *vp;
405 	struct ucred *cred = p->p_ucred;
406 	struct vattr vat;
407 	struct vattr *vap = &vat;
408 	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
409 	int error;
410 	int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
411 	char *cp;
412 	struct componentname cn;
413 
414 	*vpp = NULLVP;
415 
416 	cn.cn_namelen = strlen(un->un_path);
417 	cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK);
418 	bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
419 	cn.cn_nameiop = CREATE;
420 	cn.cn_flags = (LOCKLEAF|LOCKPARENT|HASBUF|SAVENAME|ISLASTCN);
421 	cn.cn_proc = p;
422 	cn.cn_cred = p->p_ucred;
423 	cn.cn_nameptr = cn.cn_pnbuf;
424 	cn.cn_hash = un->un_hash;
425 	cn.cn_consume = 0;
426 
427 	VREF(un->un_dirvp);
428 	if (error = relookup(un->un_dirvp, &vp, &cn))
429 		return (error);
430 	vrele(un->un_dirvp);
431 
432 	if (vp == NULLVP) {
433 		VATTR_NULL(vap);
434 		vap->va_type = VREG;
435 		vap->va_mode = cmode;
436 		LEASE_CHECK(un->un_dirvp, p, cred, LEASE_WRITE);
437 		if (error = VOP_CREATE(un->un_dirvp, &vp,
438 		    &cn, vap))
439 			return (error);
440 	} else {
441 		VOP_ABORTOP(un->un_dirvp, &cn);
442 		if (un->un_dirvp == vp)
443 			vrele(un->un_dirvp);
444 		else
445 			vput(vp);
446 		error = EEXIST;
447 		goto bad;
448 	}
449 
450 	if (vp->v_type != VREG) {
451 		error = EOPNOTSUPP;
452 		goto bad;
453 	}
454 
455 	VOP_UNLOCK(vp);				/* XXX */
456 	LEASE_CHECK(vp, p, cred, LEASE_WRITE);
457 	VOP_LOCK(vp);				/* XXX */
458 	VATTR_NULL(vap);
459 	vap->va_size = 0;
460 	if (error = VOP_SETATTR(vp, vap, cred, p))
461 		goto bad;
462 
463 	if (error = VOP_OPEN(vp, fmode, cred, p))
464 		goto bad;
465 
466 	vp->v_writecount++;
467 	*vpp = vp;
468 	return (0);
469 bad:
470 	vput(vp);
471 	return (error);
472 }
473 
474 int
475 union_vn_close(vp, fmode)
476 	struct vnode *vp;
477 	int fmode;
478 {
479 	if (fmode & FWRITE)
480 		--vp->v_writecount;
481 	return (VOP_CLOSE(vp, fmode));
482 }
483 
484 void
485 union_removed_upper(un)
486 	struct union_node *un;
487 {
488 	vrele(un->un_uppervp);
489 	un->un_uppervp = NULLVP;
490 }
491