xref: /original-bsd/sys/miscfs/nullfs/null_vnops.c (revision 0ac4996f)
1 /*
2  * Copyright (c) 1992, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * John Heidemann of the UCLA Ficus project.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)null_vnops.c	8.4 (Berkeley) 05/14/95
11  *
12  * Ancestors:
13  *	@(#)lofs_vnops.c	1.2 (Berkeley) 6/18/92
14  *	$Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
15  *	...and...
16  *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
17  */
18 
19 /*
20  * Null Layer
21  *
22  * (See mount_null(8) for more information.)
23  *
24  * The null layer duplicates a portion of the file system
25  * name space under a new name.  In this respect, it is
26  * similar to the loopback file system.  It differs from
27  * the loopback fs in two respects:  it is implemented using
28  * a stackable layers techniques, and it's "null-node"s stack above
29  * all lower-layer vnodes, not just over directory vnodes.
30  *
31  * The null layer has two purposes.  First, it serves as a demonstration
32  * of layering by proving a layer which does nothing.  (It actually
33  * does everything the loopback file system does, which is slightly
34  * more than nothing.)  Second, the null layer can serve as a prototype
35  * layer.  Since it provides all necessary layer framework,
36  * new file system layers can be created very easily be starting
37  * with a null layer.
38  *
39  * The remainder of this man page examines the null layer as a basis
40  * for constructing new layers.
41  *
42  *
43  * INSTANTIATING NEW NULL LAYERS
44  *
45  * New null layers are created with mount_null(8).
46  * Mount_null(8) takes two arguments, the pathname
47  * of the lower vfs (target-pn) and the pathname where the null
48  * layer will appear in the namespace (alias-pn).  After
49  * the null layer is put into place, the contents
50  * of target-pn subtree will be aliased under alias-pn.
51  *
52  *
53  * OPERATION OF A NULL LAYER
54  *
55  * The null layer is the minimum file system layer,
56  * simply bypassing all possible operations to the lower layer
57  * for processing there.  The majority of its activity centers
58  * on the bypass routine, though which nearly all vnode operations
59  * pass.
60  *
61  * The bypass routine accepts arbitrary vnode operations for
62  * handling by the lower layer.  It begins by examing vnode
63  * operation arguments and replacing any null-nodes by their
64  * lower-layer equivlants.  It then invokes the operation
65  * on the lower layer.  Finally, it replaces the null-nodes
66  * in the arguments and, if a vnode is return by the operation,
67  * stacks a null-node on top of the returned vnode.
68  *
69  * Although bypass handles most operations,
70  * vop_getattr, _inactive, _reclaim, and _print are not bypassed.
71  * Vop_getattr must change the fsid being returned.
72  * Vop_inactive and vop_reclaim are not bypassed so that
73  * they can handle freeing null-layer specific data.
74  * Vop_print is not bypassed to avoid excessive debugging
75  * information.
76  *
77  *
78  * INSTANTIATING VNODE STACKS
79  *
80  * Mounting associates the null layer with a lower layer,
81  * effect stacking two VFSes.  Vnode stacks are instead
82  * created on demand as files are accessed.
83  *
84  * The initial mount creates a single vnode stack for the
85  * root of the new null layer.  All other vnode stacks
86  * are created as a result of vnode operations on
87  * this or other null vnode stacks.
88  *
89  * New vnode stacks come into existance as a result of
90  * an operation which returns a vnode.
91  * The bypass routine stacks a null-node above the new
92  * vnode before returning it to the caller.
93  *
94  * For example, imagine mounting a null layer with
95  * "mount_null /usr/include /dev/layer/null".
96  * Changing directory to /dev/layer/null will assign
97  * the root null-node (which was created when the null layer was mounted).
98  * Now consider opening "sys".  A vop_lookup would be
99  * done on the root null-node.  This operation would bypass through
100  * to the lower layer which would return a vnode representing
101  * the UFS "sys".  Null_bypass then builds a null-node
102  * aliasing the UFS "sys" and returns this to the caller.
103  * Later operations on the null-node "sys" will repeat this
104  * process when constructing other vnode stacks.
105  *
106  *
107  * CREATING OTHER FILE SYSTEM LAYERS
108  *
109  * One of the easiest ways to construct new file system layers is to make
110  * a copy of the null layer, rename all files and variables, and
111  * then begin modifing the copy.  Sed can be used to easily rename
112  * all variables.
113  *
114  * The umap layer is an example of a layer descended from the
115  * null layer.
116  *
117  *
118  * INVOKING OPERATIONS ON LOWER LAYERS
119  *
120  * There are two techniques to invoke operations on a lower layer
121  * when the operation cannot be completely bypassed.  Each method
122  * is appropriate in different situations.  In both cases,
123  * it is the responsibility of the aliasing layer to make
124  * the operation arguments "correct" for the lower layer
125  * by mapping an vnode arguments to the lower layer.
126  *
127  * The first approach is to call the aliasing layer's bypass routine.
128  * This method is most suitable when you wish to invoke the operation
129  * currently being hanldled on the lower layer.  It has the advantage
130  * that the bypass routine already must do argument mapping.
131  * An example of this is null_getattrs in the null layer.
132  *
133  * A second approach is to directly invoked vnode operations on
134  * the lower layer with the VOP_OPERATIONNAME interface.
135  * The advantage of this method is that it is easy to invoke
136  * arbitrary operations on the lower layer.  The disadvantage
137  * is that vnodes arguments must be manualy mapped.
138  *
139  */
140 
141 #include <sys/param.h>
142 #include <sys/systm.h>
143 #include <sys/proc.h>
144 #include <sys/time.h>
145 #include <sys/types.h>
146 #include <sys/vnode.h>
147 #include <sys/mount.h>
148 #include <sys/namei.h>
149 #include <sys/malloc.h>
150 #include <sys/buf.h>
151 #include <miscfs/nullfs/null.h>
152 
153 
154 int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
155 
156 /*
157  * This is the 10-Apr-92 bypass routine.
158  *    This version has been optimized for speed, throwing away some
159  * safety checks.  It should still always work, but it's not as
160  * robust to programmer errors.
161  *    Define SAFETY to include some error checking code.
162  *
163  * In general, we map all vnodes going down and unmap them on the way back.
164  * As an exception to this, vnodes can be marked "unmapped" by setting
165  * the Nth bit in operation's vdesc_flags.
166  *
167  * Also, some BSD vnode operations have the side effect of vrele'ing
168  * their arguments.  With stacking, the reference counts are held
169  * by the upper node, not the lower one, so we must handle these
170  * side-effects here.  This is not of concern in Sun-derived systems
171  * since there are no such side-effects.
172  *
173  * This makes the following assumptions:
174  * - only one returned vpp
175  * - no INOUT vpp's (Sun's vop_open has one of these)
176  * - the vnode operation vector of the first vnode should be used
177  *   to determine what implementation of the op should be invoked
178  * - all mapped vnodes are of our vnode-type (NEEDSWORK:
179  *   problems on rmdir'ing mount points and renaming?)
180  */
181 int
182 null_bypass(ap)
183 	struct vop_generic_args /* {
184 		struct vnodeop_desc *a_desc;
185 		<other random data follows, presumably>
186 	} */ *ap;
187 {
188 	extern int (**null_vnodeop_p)();  /* not extern, really "forward" */
189 	register struct vnode **this_vp_p;
190 	int error;
191 	struct vnode *old_vps[VDESC_MAX_VPS];
192 	struct vnode **vps_p[VDESC_MAX_VPS];
193 	struct vnode ***vppp;
194 	struct vnodeop_desc *descp = ap->a_desc;
195 	int reles, i;
196 
197 	if (null_bug_bypass)
198 		printf ("null_bypass: %s\n", descp->vdesc_name);
199 
200 #ifdef SAFETY
201 	/*
202 	 * We require at least one vp.
203 	 */
204 	if (descp->vdesc_vp_offsets == NULL ||
205 	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
206 		panic ("null_bypass: no vp's in map.\n");
207 #endif
208 
209 	/*
210 	 * Map the vnodes going in.
211 	 * Later, we'll invoke the operation based on
212 	 * the first mapped vnode's operation vector.
213 	 */
214 	reles = descp->vdesc_flags;
215 	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
216 		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
217 			break;   /* bail out at end of list */
218 		vps_p[i] = this_vp_p =
219 			VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
220 		/*
221 		 * We're not guaranteed that any but the first vnode
222 		 * are of our type.  Check for and don't map any
223 		 * that aren't.  (We must always map first vp or vclean fails.)
224 		 */
225 		if (i && (*this_vp_p == NULL ||
226 		    (*this_vp_p)->v_op != null_vnodeop_p)) {
227 			old_vps[i] = NULL;
228 		} else {
229 			old_vps[i] = *this_vp_p;
230 			*(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
231 			/*
232 			 * XXX - Several operations have the side effect
233 			 * of vrele'ing their vp's.  We must account for
234 			 * that.  (This should go away in the future.)
235 			 */
236 			if (reles & 1)
237 				VREF(*this_vp_p);
238 		}
239 
240 	}
241 
242 	/*
243 	 * Call the operation on the lower layer
244 	 * with the modified argument structure.
245 	 */
246 	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
247 
248 	/*
249 	 * Maintain the illusion of call-by-value
250 	 * by restoring vnodes in the argument structure
251 	 * to their original value.
252 	 */
253 	reles = descp->vdesc_flags;
254 	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
255 		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
256 			break;   /* bail out at end of list */
257 		if (old_vps[i]) {
258 			*(vps_p[i]) = old_vps[i];
259 			if (reles & 1)
260 				vrele(*(vps_p[i]));
261 		}
262 	}
263 
264 	/*
265 	 * Map the possible out-going vpp
266 	 * (Assumes that the lower layer always returns
267 	 * a VREF'ed vpp unless it gets an error.)
268 	 */
269 	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
270 	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
271 	    !error) {
272 		/*
273 		 * XXX - even though some ops have vpp returned vp's,
274 		 * several ops actually vrele this before returning.
275 		 * We must avoid these ops.
276 		 * (This should go away when these ops are regularized.)
277 		 */
278 		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
279 			goto out;
280 		vppp = VOPARG_OFFSETTO(struct vnode***,
281 				 descp->vdesc_vpp_offset,ap);
282 		error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
283 	}
284 
285  out:
286 	return (error);
287 }
288 
289 /*
290  *  We handle getattr only to change the fsid.
291  */
292 int
293 null_getattr(ap)
294 	struct vop_getattr_args /* {
295 		struct vnode *a_vp;
296 		struct vattr *a_vap;
297 		struct ucred *a_cred;
298 		struct proc *a_p;
299 	} */ *ap;
300 {
301 	int error;
302 	if (error = null_bypass(ap))
303 		return (error);
304 	/* Requires that arguments be restored. */
305 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
306 	return (0);
307 }
308 
309 /*
310  * We need to verify that we are not being vgoned and then clear
311  * the interlock flag as it applies only to our vnode, not the
312  * vnodes below us on the stack.
313  */
314 int
315 null_lock(ap)
316 	struct vop_lock_args *ap;
317 {
318 	struct vnode *vp = ap->a_vp;
319 	int error;
320 
321 	if ((ap->a_flags & LK_INTERLOCK) == 0)
322 		simple_lock(&vp->v_interlock);
323 	if (vp->v_flag & VXLOCK) {
324 		vp->v_flag |= VXWANT;
325 		simple_unlock(&vp->v_interlock);
326 		tsleep((caddr_t)vp, PINOD, "unionlk1", 0);
327 		return (ENOENT);
328 	}
329 	simple_unlock(&vp->v_interlock);
330 	ap->a_flags &= ~LK_INTERLOCK;
331 	if (error = null_bypass(ap))
332 		return (error);
333 	return (0);
334 }
335 
336 int
337 null_inactive(ap)
338 	struct vop_inactive_args /* {
339 		struct vnode *a_vp;
340 	} */ *ap;
341 {
342 	/*
343 	 * Do nothing (and _don't_ bypass).
344 	 * Wait to vrele lowervp until reclaim,
345 	 * so that until then our null_node is in the
346 	 * cache and reusable.
347 	 *
348 	 * NEEDSWORK: Someday, consider inactive'ing
349 	 * the lowervp and then trying to reactivate it
350 	 * with capabilities (v_id)
351 	 * like they do in the name lookup cache code.
352 	 * That's too much work for now.
353 	 */
354 	return (0);
355 }
356 
357 int
358 null_reclaim(ap)
359 	struct vop_reclaim_args /* {
360 		struct vnode *a_vp;
361 		struct proc *a_p;
362 	} */ *ap;
363 {
364 	struct vnode *vp = ap->a_vp;
365 	struct null_node *xp = VTONULL(vp);
366 	struct vnode *lowervp = xp->null_lowervp;
367 
368 	/*
369 	 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
370 	 * so we can't call VOPs on ourself.
371 	 */
372 	/* After this assignment, this node will not be re-used. */
373 	xp->null_lowervp = NULL;
374 	LIST_REMOVE(xp, null_hash);
375 	FREE(vp->v_data, M_TEMP);
376 	vp->v_data = NULL;
377 	vrele (lowervp);
378 	return (0);
379 }
380 
381 int
382 null_print(ap)
383 	struct vop_print_args /* {
384 		struct vnode *a_vp;
385 	} */ *ap;
386 {
387 	register struct vnode *vp = ap->a_vp;
388 	printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
389 	return (0);
390 }
391 
392 /*
393  * XXX - vop_strategy must be hand coded because it has no
394  * vnode in its arguments.
395  * This goes away with a merged VM/buffer cache.
396  */
397 int
398 null_strategy(ap)
399 	struct vop_strategy_args /* {
400 		struct buf *a_bp;
401 	} */ *ap;
402 {
403 	struct buf *bp = ap->a_bp;
404 	int error;
405 	struct vnode *savedvp;
406 
407 	savedvp = bp->b_vp;
408 	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
409 
410 	error = VOP_STRATEGY(bp);
411 
412 	bp->b_vp = savedvp;
413 
414 	return (error);
415 }
416 
417 /*
418  * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
419  * vnode in its arguments.
420  * This goes away with a merged VM/buffer cache.
421  */
422 int
423 null_bwrite(ap)
424 	struct vop_bwrite_args /* {
425 		struct buf *a_bp;
426 	} */ *ap;
427 {
428 	struct buf *bp = ap->a_bp;
429 	int error;
430 	struct vnode *savedvp;
431 
432 	savedvp = bp->b_vp;
433 	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
434 
435 	error = VOP_BWRITE(bp);
436 
437 	bp->b_vp = savedvp;
438 
439 	return (error);
440 }
441 
442 /*
443  * Global vfs data structures
444  */
445 int (**null_vnodeop_p)();
446 struct vnodeopv_entry_desc null_vnodeop_entries[] = {
447 	{ &vop_default_desc, null_bypass },
448 
449 	{ &vop_getattr_desc, null_getattr },
450 	{ &vop_lock_desc, null_lock },
451 	{ &vop_inactive_desc, null_inactive },
452 	{ &vop_reclaim_desc, null_reclaim },
453 	{ &vop_print_desc, null_print },
454 
455 	{ &vop_strategy_desc, null_strategy },
456 	{ &vop_bwrite_desc, null_bwrite },
457 
458 	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
459 };
460 struct vnodeopv_desc null_vnodeop_opv_desc =
461 	{ &null_vnodeop_p, null_vnodeop_entries };
462