xref: /original-bsd/sys/miscfs/nullfs/null_vnops.c (revision 52265cc0)
11347b1ecSjohnh /*
2a1c1e78dSbostic  * Copyright (c) 1992, 1993
3a1c1e78dSbostic  *	The Regents of the University of California.  All rights reserved.
41347b1ecSjohnh  *
5d5d79ae3Sheideman  * This code is derived from software contributed to Berkeley by
6d5d79ae3Sheideman  * John Heidemann of the UCLA Ficus project.
71347b1ecSjohnh  *
81347b1ecSjohnh  * %sccs.include.redist.c%
91347b1ecSjohnh  *
10*52265cc0Smckusick  *	@(#)null_vnops.c	8.6 (Berkeley) 05/27/95
111347b1ecSjohnh  *
12a49f5afcSjohnh  * Ancestors:
13a49f5afcSjohnh  *	@(#)lofs_vnops.c	1.2 (Berkeley) 6/18/92
141347b1ecSjohnh  *	$Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
15a49f5afcSjohnh  *	...and...
16a49f5afcSjohnh  *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
171347b1ecSjohnh  */
181347b1ecSjohnh 
191347b1ecSjohnh /*
20a49f5afcSjohnh  * Null Layer
21a49f5afcSjohnh  *
22d5d79ae3Sheideman  * (See mount_null(8) for more information.)
23d5d79ae3Sheideman  *
24a49f5afcSjohnh  * The null layer duplicates a portion of the file system
25a49f5afcSjohnh  * name space under a new name.  In this respect, it is
26a49f5afcSjohnh  * similar to the loopback file system.  It differs from
27a49f5afcSjohnh  * the loopback fs in two respects:  it is implemented using
28d5d79ae3Sheideman  * a stackable layers techniques, and it's "null-node"s stack above
29a49f5afcSjohnh  * all lower-layer vnodes, not just over directory vnodes.
30a49f5afcSjohnh  *
31d5d79ae3Sheideman  * The null layer has two purposes.  First, it serves as a demonstration
32d5d79ae3Sheideman  * of layering by proving a layer which does nothing.  (It actually
33d5d79ae3Sheideman  * does everything the loopback file system does, which is slightly
34d5d79ae3Sheideman  * more than nothing.)  Second, the null layer can serve as a prototype
35d5d79ae3Sheideman  * layer.  Since it provides all necessary layer framework,
36d5d79ae3Sheideman  * new file system layers can be created very easily be starting
37d5d79ae3Sheideman  * with a null layer.
38d5d79ae3Sheideman  *
39d5d79ae3Sheideman  * The remainder of this man page examines the null layer as a basis
40d5d79ae3Sheideman  * for constructing new layers.
41d5d79ae3Sheideman  *
42d5d79ae3Sheideman  *
43d5d79ae3Sheideman  * INSTANTIATING NEW NULL LAYERS
44d5d79ae3Sheideman  *
45d5d79ae3Sheideman  * New null layers are created with mount_null(8).
46d5d79ae3Sheideman  * Mount_null(8) takes two arguments, the pathname
47d5d79ae3Sheideman  * of the lower vfs (target-pn) and the pathname where the null
48d5d79ae3Sheideman  * layer will appear in the namespace (alias-pn).  After
49d5d79ae3Sheideman  * the null layer is put into place, the contents
50d5d79ae3Sheideman  * of target-pn subtree will be aliased under alias-pn.
51d5d79ae3Sheideman  *
52d5d79ae3Sheideman  *
53d5d79ae3Sheideman  * OPERATION OF A NULL LAYER
54d5d79ae3Sheideman  *
55a49f5afcSjohnh  * The null layer is the minimum file system layer,
56a49f5afcSjohnh  * simply bypassing all possible operations to the lower layer
57d5d79ae3Sheideman  * for processing there.  The majority of its activity centers
58d5d79ae3Sheideman  * on the bypass routine, though which nearly all vnode operations
59d5d79ae3Sheideman  * pass.
60a49f5afcSjohnh  *
61d5d79ae3Sheideman  * The bypass routine accepts arbitrary vnode operations for
62d5d79ae3Sheideman  * handling by the lower layer.  It begins by examing vnode
63d5d79ae3Sheideman  * operation arguments and replacing any null-nodes by their
64d5d79ae3Sheideman  * lower-layer equivlants.  It then invokes the operation
65d5d79ae3Sheideman  * on the lower layer.  Finally, it replaces the null-nodes
66d5d79ae3Sheideman  * in the arguments and, if a vnode is return by the operation,
67d5d79ae3Sheideman  * stacks a null-node on top of the returned vnode.
68d5d79ae3Sheideman  *
698cb45344Smckusick  * Although bypass handles most operations, vop_getattr, vop_lock,
708cb45344Smckusick  * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not
718cb45344Smckusick  * bypassed. Vop_getattr must change the fsid being returned.
728cb45344Smckusick  * Vop_lock and vop_unlock must handle any locking for the
738cb45344Smckusick  * current vnode as well as pass the lock request down.
74d5d79ae3Sheideman  * Vop_inactive and vop_reclaim are not bypassed so that
758cb45344Smckusick  * they can handle freeing null-layer specific data. Vop_print
768cb45344Smckusick  * is not bypassed to avoid excessive debugging information.
778cb45344Smckusick  * Also, certain vnode operations change the locking state within
788cb45344Smckusick  * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
798cb45344Smckusick  * and symlink). Ideally these operations should not change the
808cb45344Smckusick  * lock state, but should be changed to let the caller of the
818cb45344Smckusick  * function unlock them. Otherwise all intermediate vnode layers
828cb45344Smckusick  * (such as union, umapfs, etc) must catch these functions to do
838cb45344Smckusick  * the necessary locking at their layer.
84a49f5afcSjohnh  *
85a69b0080Sheideman  *
86d5d79ae3Sheideman  * INSTANTIATING VNODE STACKS
87d5d79ae3Sheideman  *
88d5d79ae3Sheideman  * Mounting associates the null layer with a lower layer,
89d5d79ae3Sheideman  * effect stacking two VFSes.  Vnode stacks are instead
90d5d79ae3Sheideman  * created on demand as files are accessed.
91d5d79ae3Sheideman  *
92d5d79ae3Sheideman  * The initial mount creates a single vnode stack for the
93d5d79ae3Sheideman  * root of the new null layer.  All other vnode stacks
94d5d79ae3Sheideman  * are created as a result of vnode operations on
95d5d79ae3Sheideman  * this or other null vnode stacks.
96d5d79ae3Sheideman  *
97d5d79ae3Sheideman  * New vnode stacks come into existance as a result of
98d5d79ae3Sheideman  * an operation which returns a vnode.
99d5d79ae3Sheideman  * The bypass routine stacks a null-node above the new
100d5d79ae3Sheideman  * vnode before returning it to the caller.
101d5d79ae3Sheideman  *
102d5d79ae3Sheideman  * For example, imagine mounting a null layer with
103d5d79ae3Sheideman  * "mount_null /usr/include /dev/layer/null".
104341d137dSmckusick  * Changing directory to /dev/layer/null will assign
105d5d79ae3Sheideman  * the root null-node (which was created when the null layer was mounted).
106d5d79ae3Sheideman  * Now consider opening "sys".  A vop_lookup would be
107d5d79ae3Sheideman  * done on the root null-node.  This operation would bypass through
108d5d79ae3Sheideman  * to the lower layer which would return a vnode representing
109d5d79ae3Sheideman  * the UFS "sys".  Null_bypass then builds a null-node
110d5d79ae3Sheideman  * aliasing the UFS "sys" and returns this to the caller.
111d5d79ae3Sheideman  * Later operations on the null-node "sys" will repeat this
112d5d79ae3Sheideman  * process when constructing other vnode stacks.
113a69b0080Sheideman  *
114ec6e17bbSheideman  *
115d5d79ae3Sheideman  * CREATING OTHER FILE SYSTEM LAYERS
116a69b0080Sheideman  *
117a69b0080Sheideman  * One of the easiest ways to construct new file system layers is to make
118a69b0080Sheideman  * a copy of the null layer, rename all files and variables, and
119a69b0080Sheideman  * then begin modifing the copy.  Sed can be used to easily rename
120a69b0080Sheideman  * all variables.
121a69b0080Sheideman  *
122d5d79ae3Sheideman  * The umap layer is an example of a layer descended from the
123d5d79ae3Sheideman  * null layer.
124d5d79ae3Sheideman  *
125d5d79ae3Sheideman  *
126d5d79ae3Sheideman  * INVOKING OPERATIONS ON LOWER LAYERS
127d5d79ae3Sheideman  *
128d5d79ae3Sheideman  * There are two techniques to invoke operations on a lower layer
129d5d79ae3Sheideman  * when the operation cannot be completely bypassed.  Each method
130d5d79ae3Sheideman  * is appropriate in different situations.  In both cases,
131d5d79ae3Sheideman  * it is the responsibility of the aliasing layer to make
132d5d79ae3Sheideman  * the operation arguments "correct" for the lower layer
133d5d79ae3Sheideman  * by mapping an vnode arguments to the lower layer.
134d5d79ae3Sheideman  *
135d5d79ae3Sheideman  * The first approach is to call the aliasing layer's bypass routine.
136d5d79ae3Sheideman  * This method is most suitable when you wish to invoke the operation
137d5d79ae3Sheideman  * currently being hanldled on the lower layer.  It has the advantage
138341d137dSmckusick  * that the bypass routine already must do argument mapping.
139d5d79ae3Sheideman  * An example of this is null_getattrs in the null layer.
140d5d79ae3Sheideman  *
141d5d79ae3Sheideman  * A second approach is to directly invoked vnode operations on
142d5d79ae3Sheideman  * the lower layer with the VOP_OPERATIONNAME interface.
143d5d79ae3Sheideman  * The advantage of this method is that it is easy to invoke
144d5d79ae3Sheideman  * arbitrary operations on the lower layer.  The disadvantage
145d5d79ae3Sheideman  * is that vnodes arguments must be manualy mapped.
146d5d79ae3Sheideman  *
1471347b1ecSjohnh  */
1481347b1ecSjohnh 
1491347b1ecSjohnh #include <sys/param.h>
1501347b1ecSjohnh #include <sys/systm.h>
1511347b1ecSjohnh #include <sys/proc.h>
1521347b1ecSjohnh #include <sys/time.h>
1531347b1ecSjohnh #include <sys/types.h>
1541347b1ecSjohnh #include <sys/vnode.h>
1551347b1ecSjohnh #include <sys/mount.h>
1561347b1ecSjohnh #include <sys/namei.h>
1571347b1ecSjohnh #include <sys/malloc.h>
1581347b1ecSjohnh #include <sys/buf.h>
159341d137dSmckusick #include <miscfs/nullfs/null.h>
1601347b1ecSjohnh 
161a49f5afcSjohnh 
162a49f5afcSjohnh int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
1631347b1ecSjohnh 
1641347b1ecSjohnh /*
165a49f5afcSjohnh  * This is the 10-Apr-92 bypass routine.
166a49f5afcSjohnh  *    This version has been optimized for speed, throwing away some
167a49f5afcSjohnh  * safety checks.  It should still always work, but it's not as
168a49f5afcSjohnh  * robust to programmer errors.
169a49f5afcSjohnh  *    Define SAFETY to include some error checking code.
170a49f5afcSjohnh  *
171a49f5afcSjohnh  * In general, we map all vnodes going down and unmap them on the way back.
172a49f5afcSjohnh  * As an exception to this, vnodes can be marked "unmapped" by setting
173a49f5afcSjohnh  * the Nth bit in operation's vdesc_flags.
174a49f5afcSjohnh  *
175a49f5afcSjohnh  * Also, some BSD vnode operations have the side effect of vrele'ing
176a49f5afcSjohnh  * their arguments.  With stacking, the reference counts are held
177a49f5afcSjohnh  * by the upper node, not the lower one, so we must handle these
178a49f5afcSjohnh  * side-effects here.  This is not of concern in Sun-derived systems
179a49f5afcSjohnh  * since there are no such side-effects.
180a49f5afcSjohnh  *
181a49f5afcSjohnh  * This makes the following assumptions:
182a49f5afcSjohnh  * - only one returned vpp
183a49f5afcSjohnh  * - no INOUT vpp's (Sun's vop_open has one of these)
184a49f5afcSjohnh  * - the vnode operation vector of the first vnode should be used
185a49f5afcSjohnh  *   to determine what implementation of the op should be invoked
186a49f5afcSjohnh  * - all mapped vnodes are of our vnode-type (NEEDSWORK:
187a49f5afcSjohnh  *   problems on rmdir'ing mount points and renaming?)
1881347b1ecSjohnh  */
189a49f5afcSjohnh int
null_bypass(ap)190a49f5afcSjohnh null_bypass(ap)
191341d137dSmckusick 	struct vop_generic_args /* {
192341d137dSmckusick 		struct vnodeop_desc *a_desc;
193341d137dSmckusick 		<other random data follows, presumably>
194341d137dSmckusick 	} */ *ap;
1951347b1ecSjohnh {
196a69b0080Sheideman 	extern int (**null_vnodeop_p)();  /* not extern, really "forward" */
197a69b0080Sheideman 	register struct vnode **this_vp_p;
1981347b1ecSjohnh 	int error;
199a49f5afcSjohnh 	struct vnode *old_vps[VDESC_MAX_VPS];
200a49f5afcSjohnh 	struct vnode **vps_p[VDESC_MAX_VPS];
201a49f5afcSjohnh 	struct vnode ***vppp;
202a49f5afcSjohnh 	struct vnodeop_desc *descp = ap->a_desc;
203a69b0080Sheideman 	int reles, i;
2041347b1ecSjohnh 
205a49f5afcSjohnh 	if (null_bug_bypass)
206a49f5afcSjohnh 		printf ("null_bypass: %s\n", descp->vdesc_name);
207a49f5afcSjohnh 
208a49f5afcSjohnh #ifdef SAFETY
209a49f5afcSjohnh 	/*
210a49f5afcSjohnh 	 * We require at least one vp.
211a49f5afcSjohnh 	 */
212a49f5afcSjohnh 	if (descp->vdesc_vp_offsets == NULL ||
213a49f5afcSjohnh 	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
214a49f5afcSjohnh 		panic ("null_bypass: no vp's in map.\n");
2151347b1ecSjohnh #endif
2161347b1ecSjohnh 
2171347b1ecSjohnh 	/*
218a49f5afcSjohnh 	 * Map the vnodes going in.
219a49f5afcSjohnh 	 * Later, we'll invoke the operation based on
220a49f5afcSjohnh 	 * the first mapped vnode's operation vector.
2211347b1ecSjohnh 	 */
222a69b0080Sheideman 	reles = descp->vdesc_flags;
223a69b0080Sheideman 	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
224a49f5afcSjohnh 		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
225a49f5afcSjohnh 			break;   /* bail out at end of list */
226a49f5afcSjohnh 		vps_p[i] = this_vp_p =
227a49f5afcSjohnh 			VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
228a69b0080Sheideman 		/*
229a69b0080Sheideman 		 * We're not guaranteed that any but the first vnode
230a69b0080Sheideman 		 * are of our type.  Check for and don't map any
231d5d79ae3Sheideman 		 * that aren't.  (We must always map first vp or vclean fails.)
232a69b0080Sheideman 		 */
2335194fdb1Smckusick 		if (i && (*this_vp_p == NULL ||
2345194fdb1Smckusick 		    (*this_vp_p)->v_op != null_vnodeop_p)) {
235a69b0080Sheideman 			old_vps[i] = NULL;
236a69b0080Sheideman 		} else {
237a49f5afcSjohnh 			old_vps[i] = *this_vp_p;
238a69b0080Sheideman 			*(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
239ec6e17bbSheideman 			/*
240ec6e17bbSheideman 			 * XXX - Several operations have the side effect
241ec6e17bbSheideman 			 * of vrele'ing their vp's.  We must account for
242ec6e17bbSheideman 			 * that.  (This should go away in the future.)
243ec6e17bbSheideman 			 */
244a49f5afcSjohnh 			if (reles & 1)
245a49f5afcSjohnh 				VREF(*this_vp_p);
246ec6e17bbSheideman 		}
247a49f5afcSjohnh 
248ec6e17bbSheideman 	}
2491347b1ecSjohnh 
2501347b1ecSjohnh 	/*
251a49f5afcSjohnh 	 * Call the operation on the lower layer
252a49f5afcSjohnh 	 * with the modified argument structure.
2531347b1ecSjohnh 	 */
254a49f5afcSjohnh 	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
2551347b1ecSjohnh 
2561347b1ecSjohnh 	/*
257a49f5afcSjohnh 	 * Maintain the illusion of call-by-value
258a49f5afcSjohnh 	 * by restoring vnodes in the argument structure
259a49f5afcSjohnh 	 * to their original value.
2601347b1ecSjohnh 	 */
261a69b0080Sheideman 	reles = descp->vdesc_flags;
262a69b0080Sheideman 	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
263a49f5afcSjohnh 		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
264a49f5afcSjohnh 			break;   /* bail out at end of list */
265a69b0080Sheideman 		if (old_vps[i]) {
266a49f5afcSjohnh 			*(vps_p[i]) = old_vps[i];
267a49f5afcSjohnh 			if (reles & 1)
268a49f5afcSjohnh 				vrele(*(vps_p[i]));
269ec6e17bbSheideman 		}
270ec6e17bbSheideman 	}
2711347b1ecSjohnh 
2721347b1ecSjohnh 	/*
273ec6e17bbSheideman 	 * Map the possible out-going vpp
274ec6e17bbSheideman 	 * (Assumes that the lower layer always returns
275ec6e17bbSheideman 	 * a VREF'ed vpp unless it gets an error.)
2761347b1ecSjohnh 	 */
277a49f5afcSjohnh 	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
278a49f5afcSjohnh 	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
279a49f5afcSjohnh 	    !error) {
280ec6e17bbSheideman 		/*
281ec6e17bbSheideman 		 * XXX - even though some ops have vpp returned vp's,
282ec6e17bbSheideman 		 * several ops actually vrele this before returning.
283ec6e17bbSheideman 		 * We must avoid these ops.
284d5d79ae3Sheideman 		 * (This should go away when these ops are regularized.)
285ec6e17bbSheideman 		 */
286d5d79ae3Sheideman 		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
287d5d79ae3Sheideman 			goto out;
288a49f5afcSjohnh 		vppp = VOPARG_OFFSETTO(struct vnode***,
289a49f5afcSjohnh 				 descp->vdesc_vpp_offset,ap);
290a69b0080Sheideman 		error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
291ec6e17bbSheideman 	}
2921347b1ecSjohnh 
293d5d79ae3Sheideman  out:
2941347b1ecSjohnh 	return (error);
2951347b1ecSjohnh }
2961347b1ecSjohnh 
2971347b1ecSjohnh /*
2988cb45344Smckusick  * We have to carry on the locking protocol on the null layer vnodes
299*52265cc0Smckusick  * as we progress through the tree. We also have to enforce read-only
300*52265cc0Smckusick  * if this layer is mounted read-only.
3018cb45344Smckusick  */
3028cb45344Smckusick null_lookup(ap)
3038cb45344Smckusick 	struct vop_lookup_args /* {
3048cb45344Smckusick 		struct vnode * a_dvp;
3058cb45344Smckusick 		struct vnode ** a_vpp;
3068cb45344Smckusick 		struct componentname * a_cnp;
3078cb45344Smckusick 	} */ *ap;
3088cb45344Smckusick {
309*52265cc0Smckusick 	struct componentname *cnp = ap->a_cnp;
310*52265cc0Smckusick 	struct proc *p = cnp->cn_proc;
311*52265cc0Smckusick 	int flags = cnp->cn_flags;
3128cb45344Smckusick 	struct vop_lock_args lockargs;
3138cb45344Smckusick 	struct vop_unlock_args unlockargs;
3148cb45344Smckusick 	struct vnode *dvp, *vp;
3158cb45344Smckusick 	int error;
3168cb45344Smckusick 
317*52265cc0Smckusick 	if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
318*52265cc0Smckusick 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
319*52265cc0Smckusick 		return (EROFS);
3208cb45344Smckusick 	error = null_bypass(ap);
321*52265cc0Smckusick 	if (error == EJUSTRETURN && (flags & ISLASTCN) &&
322*52265cc0Smckusick 	    (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
323*52265cc0Smckusick 	    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
324*52265cc0Smckusick 		error = EROFS;
3258cb45344Smckusick 	/*
3268cb45344Smckusick 	 * We must do the same locking and unlocking at this layer as
3278cb45344Smckusick 	 * is done in the layers below us. We could figure this out
3288cb45344Smckusick 	 * based on the error return and the LASTCN, LOCKPARENT, and
3298cb45344Smckusick 	 * LOCKLEAF flags. However, it is more expidient to just find
3308cb45344Smckusick 	 * out the state of the lower level vnodes and set ours to the
3318cb45344Smckusick 	 * same state.
3328cb45344Smckusick 	 */
3338cb45344Smckusick 	dvp = ap->a_dvp;
3348cb45344Smckusick 	vp = *ap->a_vpp;
3358cb45344Smckusick 	if (dvp == vp)
3368cb45344Smckusick 		return (error);
3378cb45344Smckusick 	if (!VOP_ISLOCKED(dvp)) {
3388cb45344Smckusick 		unlockargs.a_vp = dvp;
3398cb45344Smckusick 		unlockargs.a_flags = 0;
3408cb45344Smckusick 		unlockargs.a_p = p;
3418cb45344Smckusick 		vop_nounlock(&unlockargs);
3428cb45344Smckusick 	}
3438cb45344Smckusick 	if (vp != NULL && VOP_ISLOCKED(vp)) {
3448cb45344Smckusick 		lockargs.a_vp = vp;
3458cb45344Smckusick 		lockargs.a_flags = LK_SHARED;
3468cb45344Smckusick 		lockargs.a_p = p;
3478cb45344Smckusick 		vop_nolock(&lockargs);
3488cb45344Smckusick 	}
3498cb45344Smckusick 	return (error);
3508cb45344Smckusick }
3518cb45344Smckusick 
3528cb45344Smckusick /*
353*52265cc0Smckusick  * Setattr call. Disallow write attempts if the layer is mounted read-only.
354*52265cc0Smckusick  */
355*52265cc0Smckusick int
null_setattr(ap)356*52265cc0Smckusick null_setattr(ap)
357*52265cc0Smckusick 	struct vop_setattr_args /* {
358*52265cc0Smckusick 		struct vnodeop_desc *a_desc;
359*52265cc0Smckusick 		struct vnode *a_vp;
360*52265cc0Smckusick 		struct vattr *a_vap;
361*52265cc0Smckusick 		struct ucred *a_cred;
362*52265cc0Smckusick 		struct proc *a_p;
363*52265cc0Smckusick 	} */ *ap;
364*52265cc0Smckusick {
365*52265cc0Smckusick 	struct vnode *vp = ap->a_vp;
366*52265cc0Smckusick 	struct vattr *vap = ap->a_vap;
367*52265cc0Smckusick 
368*52265cc0Smckusick   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
369*52265cc0Smckusick 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.ts_sec != VNOVAL ||
370*52265cc0Smckusick 	    vap->va_mtime.ts_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
371*52265cc0Smckusick 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
372*52265cc0Smckusick 		return (EROFS);
373*52265cc0Smckusick 	if (vap->va_size != VNOVAL) {
374*52265cc0Smckusick  		switch (vp->v_type) {
375*52265cc0Smckusick  		case VDIR:
376*52265cc0Smckusick  			return (EISDIR);
377*52265cc0Smckusick  		case VCHR:
378*52265cc0Smckusick  		case VBLK:
379*52265cc0Smckusick  		case VSOCK:
380*52265cc0Smckusick  		case VFIFO:
381*52265cc0Smckusick 			return (0);
382*52265cc0Smckusick 		case VREG:
383*52265cc0Smckusick 		case VLNK:
384*52265cc0Smckusick  		default:
385*52265cc0Smckusick 			/*
386*52265cc0Smckusick 			 * Disallow write attempts if the filesystem is
387*52265cc0Smckusick 			 * mounted read-only.
388*52265cc0Smckusick 			 */
389*52265cc0Smckusick 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
390*52265cc0Smckusick 				return (EROFS);
391*52265cc0Smckusick 		}
392*52265cc0Smckusick 	}
393*52265cc0Smckusick 	return (null_bypass(ap));
394*52265cc0Smckusick }
395*52265cc0Smckusick 
396*52265cc0Smckusick /*
397d5d79ae3Sheideman  *  We handle getattr only to change the fsid.
3981347b1ecSjohnh  */
399a49f5afcSjohnh int
null_getattr(ap)400c0ef1447Sjohnh null_getattr(ap)
401341d137dSmckusick 	struct vop_getattr_args /* {
402341d137dSmckusick 		struct vnode *a_vp;
403341d137dSmckusick 		struct vattr *a_vap;
404341d137dSmckusick 		struct ucred *a_cred;
405341d137dSmckusick 		struct proc *a_p;
406341d137dSmckusick 	} */ *ap;
4071347b1ecSjohnh {
4081347b1ecSjohnh 	int error;
4098cb45344Smckusick 
410a49f5afcSjohnh 	if (error = null_bypass(ap))
411341d137dSmckusick 		return (error);
412a49f5afcSjohnh 	/* Requires that arguments be restored. */
4131347b1ecSjohnh 	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
414341d137dSmckusick 	return (0);
4151347b1ecSjohnh }
4161347b1ecSjohnh 
417*52265cc0Smckusick int
null_access(ap)418*52265cc0Smckusick null_access(ap)
419*52265cc0Smckusick 	struct vop_access_args /* {
420*52265cc0Smckusick 		struct vnode *a_vp;
421*52265cc0Smckusick 		int  a_mode;
422*52265cc0Smckusick 		struct ucred *a_cred;
423*52265cc0Smckusick 		struct proc *a_p;
424*52265cc0Smckusick 	} */ *ap;
425*52265cc0Smckusick {
426*52265cc0Smckusick 	struct vnode *vp = ap->a_vp;
427*52265cc0Smckusick 	mode_t mode = ap->a_mode;
428*52265cc0Smckusick 
429*52265cc0Smckusick 	/*
430*52265cc0Smckusick 	 * Disallow write attempts on read-only layers;
431*52265cc0Smckusick 	 * unless the file is a socket, fifo, or a block or
432*52265cc0Smckusick 	 * character device resident on the file system.
433*52265cc0Smckusick 	 */
434*52265cc0Smckusick 	if (mode & VWRITE) {
435*52265cc0Smckusick 		switch (vp->v_type) {
436*52265cc0Smckusick 		case VDIR:
437*52265cc0Smckusick 		case VLNK:
438*52265cc0Smckusick 		case VREG:
439*52265cc0Smckusick 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
440*52265cc0Smckusick 				return (EROFS);
441*52265cc0Smckusick 			break;
442*52265cc0Smckusick 		}
443*52265cc0Smckusick 	}
444*52265cc0Smckusick 	return (null_bypass(ap));
445*52265cc0Smckusick }
446*52265cc0Smckusick 
4479cd9dca4Smckusick /*
4488cb45344Smckusick  * We need to process our own vnode lock and then clear the
4498cb45344Smckusick  * interlock flag as it applies only to our vnode, not the
4509cd9dca4Smckusick  * vnodes below us on the stack.
4519cd9dca4Smckusick  */
4529cd9dca4Smckusick int
null_lock(ap)4539cd9dca4Smckusick null_lock(ap)
4548cb45344Smckusick 	struct vop_lock_args /* {
4558cb45344Smckusick 		struct vnode *a_vp;
4568cb45344Smckusick 		int a_flags;
4578cb45344Smckusick 		struct proc *a_p;
4588cb45344Smckusick 	} */ *ap;
4598cb45344Smckusick {
4608cb45344Smckusick 
4618cb45344Smckusick 	vop_nolock(ap);
4628cb45344Smckusick 	if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
4638cb45344Smckusick 		return (0);
4648cb45344Smckusick 	ap->a_flags &= ~LK_INTERLOCK;
4658cb45344Smckusick 	return (null_bypass(ap));
4668cb45344Smckusick }
4678cb45344Smckusick 
4688cb45344Smckusick /*
4698cb45344Smckusick  * We need to process our own vnode unlock and then clear the
4708cb45344Smckusick  * interlock flag as it applies only to our vnode, not the
4718cb45344Smckusick  * vnodes below us on the stack.
4728cb45344Smckusick  */
4738cb45344Smckusick int
null_unlock(ap)4748cb45344Smckusick null_unlock(ap)
4758cb45344Smckusick 	struct vop_unlock_args /* {
4768cb45344Smckusick 		struct vnode *a_vp;
4778cb45344Smckusick 		int a_flags;
4788cb45344Smckusick 		struct proc *a_p;
4798cb45344Smckusick 	} */ *ap;
4809cd9dca4Smckusick {
4819cd9dca4Smckusick 	struct vnode *vp = ap->a_vp;
4829cd9dca4Smckusick 
4838cb45344Smckusick 	vop_nounlock(ap);
4849cd9dca4Smckusick 	ap->a_flags &= ~LK_INTERLOCK;
4858cb45344Smckusick 	return (null_bypass(ap));
4869cd9dca4Smckusick }
4871347b1ecSjohnh 
488a49f5afcSjohnh int
null_inactive(ap)489c0ef1447Sjohnh null_inactive(ap)
490341d137dSmckusick 	struct vop_inactive_args /* {
491341d137dSmckusick 		struct vnode *a_vp;
4928cb45344Smckusick 		struct proc *a_p;
493341d137dSmckusick 	} */ *ap;
4941347b1ecSjohnh {
495600c1214Sheideman 	/*
496a49f5afcSjohnh 	 * Do nothing (and _don't_ bypass).
497a49f5afcSjohnh 	 * Wait to vrele lowervp until reclaim,
498a49f5afcSjohnh 	 * so that until then our null_node is in the
499a49f5afcSjohnh 	 * cache and reusable.
500a49f5afcSjohnh 	 *
501a49f5afcSjohnh 	 * NEEDSWORK: Someday, consider inactive'ing
502a49f5afcSjohnh 	 * the lowervp and then trying to reactivate it
503d5d79ae3Sheideman 	 * with capabilities (v_id)
504a49f5afcSjohnh 	 * like they do in the name lookup cache code.
505a49f5afcSjohnh 	 * That's too much work for now.
506a49f5afcSjohnh 	 */
5078cb45344Smckusick 	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
508341d137dSmckusick 	return (0);
5091347b1ecSjohnh }
5101347b1ecSjohnh 
511ec6e17bbSheideman int
null_reclaim(ap)512c0ef1447Sjohnh null_reclaim(ap)
513341d137dSmckusick 	struct vop_reclaim_args /* {
514341d137dSmckusick 		struct vnode *a_vp;
5159cd9dca4Smckusick 		struct proc *a_p;
516341d137dSmckusick 	} */ *ap;
5171347b1ecSjohnh {
518ec6e17bbSheideman 	struct vnode *vp = ap->a_vp;
519ec6e17bbSheideman 	struct null_node *xp = VTONULL(vp);
520ec6e17bbSheideman 	struct vnode *lowervp = xp->null_lowervp;
521ec6e17bbSheideman 
522ec6e17bbSheideman 	/*
523d5d79ae3Sheideman 	 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
524ec6e17bbSheideman 	 * so we can't call VOPs on ourself.
525ec6e17bbSheideman 	 */
526ec6e17bbSheideman 	/* After this assignment, this node will not be re-used. */
527ec6e17bbSheideman 	xp->null_lowervp = NULL;
528937a8356Smckusick 	LIST_REMOVE(xp, null_hash);
529ec6e17bbSheideman 	FREE(vp->v_data, M_TEMP);
530ec6e17bbSheideman 	vp->v_data = NULL;
531ec6e17bbSheideman 	vrele (lowervp);
532341d137dSmckusick 	return (0);
5331347b1ecSjohnh }
5341347b1ecSjohnh 
535a49f5afcSjohnh int
null_print(ap)536c0ef1447Sjohnh null_print(ap)
537341d137dSmckusick 	struct vop_print_args /* {
538341d137dSmckusick 		struct vnode *a_vp;
539341d137dSmckusick 	} */ *ap;
5401347b1ecSjohnh {
541a49f5afcSjohnh 	register struct vnode *vp = ap->a_vp;
542ec6e17bbSheideman 	printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
543341d137dSmckusick 	return (0);
5441347b1ecSjohnh }
5451347b1ecSjohnh 
546d5d79ae3Sheideman /*
547d5d79ae3Sheideman  * XXX - vop_strategy must be hand coded because it has no
548d5d79ae3Sheideman  * vnode in its arguments.
549d5d79ae3Sheideman  * This goes away with a merged VM/buffer cache.
550d5d79ae3Sheideman  */
551600c1214Sheideman int
null_strategy(ap)552d5d79ae3Sheideman null_strategy(ap)
553341d137dSmckusick 	struct vop_strategy_args /* {
554341d137dSmckusick 		struct buf *a_bp;
555341d137dSmckusick 	} */ *ap;
556600c1214Sheideman {
557d5d79ae3Sheideman 	struct buf *bp = ap->a_bp;
558d5d79ae3Sheideman 	int error;
559d5d79ae3Sheideman 	struct vnode *savedvp;
560d5d79ae3Sheideman 
561d5d79ae3Sheideman 	savedvp = bp->b_vp;
562d5d79ae3Sheideman 	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
563d5d79ae3Sheideman 
564d5d79ae3Sheideman 	error = VOP_STRATEGY(bp);
565d5d79ae3Sheideman 
566d5d79ae3Sheideman 	bp->b_vp = savedvp;
567d5d79ae3Sheideman 
568341d137dSmckusick 	return (error);
569600c1214Sheideman }
570600c1214Sheideman 
571d5d79ae3Sheideman /*
572d5d79ae3Sheideman  * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
573d5d79ae3Sheideman  * vnode in its arguments.
574d5d79ae3Sheideman  * This goes away with a merged VM/buffer cache.
575d5d79ae3Sheideman  */
576600c1214Sheideman int
null_bwrite(ap)577d5d79ae3Sheideman null_bwrite(ap)
578341d137dSmckusick 	struct vop_bwrite_args /* {
579341d137dSmckusick 		struct buf *a_bp;
580341d137dSmckusick 	} */ *ap;
581600c1214Sheideman {
582d5d79ae3Sheideman 	struct buf *bp = ap->a_bp;
583d5d79ae3Sheideman 	int error;
584d5d79ae3Sheideman 	struct vnode *savedvp;
585d5d79ae3Sheideman 
586d5d79ae3Sheideman 	savedvp = bp->b_vp;
587d5d79ae3Sheideman 	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
588d5d79ae3Sheideman 
589d5d79ae3Sheideman 	error = VOP_BWRITE(bp);
590d5d79ae3Sheideman 
591d5d79ae3Sheideman 	bp->b_vp = savedvp;
592d5d79ae3Sheideman 
593341d137dSmckusick 	return (error);
594600c1214Sheideman }
595d5d79ae3Sheideman 
5961347b1ecSjohnh /*
597a49f5afcSjohnh  * Global vfs data structures
5981347b1ecSjohnh  */
599c0ef1447Sjohnh int (**null_vnodeop_p)();
600a69b0080Sheideman struct vnodeopv_entry_desc null_vnodeop_entries[] = {
601a49f5afcSjohnh 	{ &vop_default_desc, null_bypass },
602a49f5afcSjohnh 
6038cb45344Smckusick 	{ &vop_lookup_desc, null_lookup },
604*52265cc0Smckusick 	{ &vop_setattr_desc, null_setattr },
605a49f5afcSjohnh 	{ &vop_getattr_desc, null_getattr },
606*52265cc0Smckusick 	{ &vop_access_desc, null_access },
6079cd9dca4Smckusick 	{ &vop_lock_desc, null_lock },
6088cb45344Smckusick 	{ &vop_unlock_desc, null_unlock },
609a49f5afcSjohnh 	{ &vop_inactive_desc, null_inactive },
610a49f5afcSjohnh 	{ &vop_reclaim_desc, null_reclaim },
611a49f5afcSjohnh 	{ &vop_print_desc, null_print },
612a49f5afcSjohnh 
613a49f5afcSjohnh 	{ &vop_strategy_desc, null_strategy },
614d5d79ae3Sheideman 	{ &vop_bwrite_desc, null_bwrite },
615a49f5afcSjohnh 
6161347b1ecSjohnh 	{ (struct vnodeop_desc*)NULL, (int(*)())NULL }
6171347b1ecSjohnh };
618a69b0080Sheideman struct vnodeopv_desc null_vnodeop_opv_desc =
619a69b0080Sheideman 	{ &null_vnodeop_p, null_vnodeop_entries };
620