1df8bae1dSRodney W. Grimes /* 2df8bae1dSRodney W. Grimes * Copyright (c) 1992, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 6df8bae1dSRodney W. Grimes * John Heidemann of the UCLA Ficus project. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 9df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 10df8bae1dSRodney W. Grimes * are met: 11df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 12df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 13df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 15df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 16df8bae1dSRodney W. Grimes * 3. All advertising materials mentioning features or use of this software 17df8bae1dSRodney W. Grimes * must display the following acknowledgement: 18df8bae1dSRodney W. Grimes * This product includes software developed by the University of 19df8bae1dSRodney W. Grimes * California, Berkeley and its contributors. 20df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 21df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 22df8bae1dSRodney W. Grimes * without specific prior written permission. 23df8bae1dSRodney W. Grimes * 24df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34df8bae1dSRodney W. Grimes * SUCH DAMAGE. 35df8bae1dSRodney W. Grimes * 36996c772fSJohn Dyson * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95 37996c772fSJohn Dyson * 38996c772fSJohn Dyson * Ancestors: 39996c772fSJohn Dyson * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 4009c8ff4aSAlexander Langer * $Id: null_vnops.c,v 1.18 1997/05/25 04:50:02 peter Exp $ 41996c772fSJohn Dyson * ...and... 42996c772fSJohn Dyson * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project 43df8bae1dSRodney W. Grimes * 4409c8ff4aSAlexander Langer * $Id: null_vnops.c,v 1.18 1997/05/25 04:50:02 peter Exp $ 45df8bae1dSRodney W. Grimes */ 46df8bae1dSRodney W. Grimes 47df8bae1dSRodney W. Grimes /* 48df8bae1dSRodney W. Grimes * Null Layer 49df8bae1dSRodney W. Grimes * 50df8bae1dSRodney W. Grimes * (See mount_null(8) for more information.) 51df8bae1dSRodney W. Grimes * 52df8bae1dSRodney W. Grimes * The null layer duplicates a portion of the file system 53df8bae1dSRodney W. Grimes * name space under a new name. In this respect, it is 54df8bae1dSRodney W. Grimes * similar to the loopback file system. It differs from 55df8bae1dSRodney W. Grimes * the loopback fs in two respects: it is implemented using 56df8bae1dSRodney W. Grimes * a stackable layers techniques, and it's "null-node"s stack above 57df8bae1dSRodney W. Grimes * all lower-layer vnodes, not just over directory vnodes. 58df8bae1dSRodney W. Grimes * 59df8bae1dSRodney W. Grimes * The null layer has two purposes. First, it serves as a demonstration 60df8bae1dSRodney W. Grimes * of layering by proving a layer which does nothing. (It actually 61df8bae1dSRodney W. Grimes * does everything the loopback file system does, which is slightly 62df8bae1dSRodney W. Grimes * more than nothing.) Second, the null layer can serve as a prototype 63df8bae1dSRodney W. Grimes * layer. Since it provides all necessary layer framework, 64df8bae1dSRodney W. Grimes * new file system layers can be created very easily be starting 65df8bae1dSRodney W. Grimes * with a null layer. 66df8bae1dSRodney W. Grimes * 67df8bae1dSRodney W. Grimes * The remainder of this man page examines the null layer as a basis 68df8bae1dSRodney W. Grimes * for constructing new layers. 69df8bae1dSRodney W. Grimes * 70df8bae1dSRodney W. Grimes * 71df8bae1dSRodney W. Grimes * INSTANTIATING NEW NULL LAYERS 72df8bae1dSRodney W. Grimes * 73df8bae1dSRodney W. Grimes * New null layers are created with mount_null(8). 74df8bae1dSRodney W. Grimes * Mount_null(8) takes two arguments, the pathname 75df8bae1dSRodney W. Grimes * of the lower vfs (target-pn) and the pathname where the null 76df8bae1dSRodney W. Grimes * layer will appear in the namespace (alias-pn). After 77df8bae1dSRodney W. Grimes * the null layer is put into place, the contents 78df8bae1dSRodney W. Grimes * of target-pn subtree will be aliased under alias-pn. 79df8bae1dSRodney W. Grimes * 80df8bae1dSRodney W. Grimes * 81df8bae1dSRodney W. Grimes * OPERATION OF A NULL LAYER 82df8bae1dSRodney W. Grimes * 83df8bae1dSRodney W. Grimes * The null layer is the minimum file system layer, 84df8bae1dSRodney W. Grimes * simply bypassing all possible operations to the lower layer 85df8bae1dSRodney W. Grimes * for processing there. The majority of its activity centers 8609c8ff4aSAlexander Langer * on the bypass routine, through which nearly all vnode operations 87df8bae1dSRodney W. Grimes * pass. 88df8bae1dSRodney W. Grimes * 89df8bae1dSRodney W. Grimes * The bypass routine accepts arbitrary vnode operations for 90df8bae1dSRodney W. Grimes * handling by the lower layer. It begins by examing vnode 91df8bae1dSRodney W. Grimes * operation arguments and replacing any null-nodes by their 92df8bae1dSRodney W. Grimes * lower-layer equivlants. It then invokes the operation 93df8bae1dSRodney W. Grimes * on the lower layer. Finally, it replaces the null-nodes 94df8bae1dSRodney W. Grimes * in the arguments and, if a vnode is return by the operation, 95df8bae1dSRodney W. Grimes * stacks a null-node on top of the returned vnode. 96df8bae1dSRodney W. Grimes * 97996c772fSJohn Dyson * Although bypass handles most operations, vop_getattr, vop_lock, 98996c772fSJohn Dyson * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not 99996c772fSJohn Dyson * bypassed. Vop_getattr must change the fsid being returned. 100996c772fSJohn Dyson * Vop_lock and vop_unlock must handle any locking for the 101996c772fSJohn Dyson * current vnode as well as pass the lock request down. 102df8bae1dSRodney W. Grimes * Vop_inactive and vop_reclaim are not bypassed so that 103996c772fSJohn Dyson * they can handle freeing null-layer specific data. Vop_print 104996c772fSJohn Dyson * is not bypassed to avoid excessive debugging information. 105996c772fSJohn Dyson * Also, certain vnode operations change the locking state within 106996c772fSJohn Dyson * the operation (create, mknod, remove, link, rename, mkdir, rmdir, 107996c772fSJohn Dyson * and symlink). Ideally these operations should not change the 108996c772fSJohn Dyson * lock state, but should be changed to let the caller of the 109996c772fSJohn Dyson * function unlock them. Otherwise all intermediate vnode layers 110996c772fSJohn Dyson * (such as union, umapfs, etc) must catch these functions to do 111996c772fSJohn Dyson * the necessary locking at their layer. 112df8bae1dSRodney W. Grimes * 113df8bae1dSRodney W. Grimes * 114df8bae1dSRodney W. Grimes * INSTANTIATING VNODE STACKS 115df8bae1dSRodney W. Grimes * 116df8bae1dSRodney W. Grimes * Mounting associates the null layer with a lower layer, 117df8bae1dSRodney W. Grimes * effect stacking two VFSes. Vnode stacks are instead 118df8bae1dSRodney W. Grimes * created on demand as files are accessed. 119df8bae1dSRodney W. Grimes * 120df8bae1dSRodney W. Grimes * The initial mount creates a single vnode stack for the 121df8bae1dSRodney W. Grimes * root of the new null layer. All other vnode stacks 122df8bae1dSRodney W. Grimes * are created as a result of vnode operations on 123df8bae1dSRodney W. Grimes * this or other null vnode stacks. 124df8bae1dSRodney W. Grimes * 125df8bae1dSRodney W. Grimes * New vnode stacks come into existance as a result of 126df8bae1dSRodney W. Grimes * an operation which returns a vnode. 127df8bae1dSRodney W. Grimes * The bypass routine stacks a null-node above the new 128df8bae1dSRodney W. Grimes * vnode before returning it to the caller. 129df8bae1dSRodney W. Grimes * 130df8bae1dSRodney W. Grimes * For example, imagine mounting a null layer with 131df8bae1dSRodney W. Grimes * "mount_null /usr/include /dev/layer/null". 132df8bae1dSRodney W. Grimes * Changing directory to /dev/layer/null will assign 133df8bae1dSRodney W. Grimes * the root null-node (which was created when the null layer was mounted). 134df8bae1dSRodney W. Grimes * Now consider opening "sys". A vop_lookup would be 135df8bae1dSRodney W. Grimes * done on the root null-node. This operation would bypass through 136df8bae1dSRodney W. Grimes * to the lower layer which would return a vnode representing 137df8bae1dSRodney W. Grimes * the UFS "sys". Null_bypass then builds a null-node 138df8bae1dSRodney W. Grimes * aliasing the UFS "sys" and returns this to the caller. 139df8bae1dSRodney W. Grimes * Later operations on the null-node "sys" will repeat this 140df8bae1dSRodney W. Grimes * process when constructing other vnode stacks. 141df8bae1dSRodney W. Grimes * 142df8bae1dSRodney W. Grimes * 143df8bae1dSRodney W. Grimes * CREATING OTHER FILE SYSTEM LAYERS 144df8bae1dSRodney W. Grimes * 145df8bae1dSRodney W. Grimes * One of the easiest ways to construct new file system layers is to make 146df8bae1dSRodney W. Grimes * a copy of the null layer, rename all files and variables, and 147df8bae1dSRodney W. Grimes * then begin modifing the copy. Sed can be used to easily rename 148df8bae1dSRodney W. Grimes * all variables. 149df8bae1dSRodney W. Grimes * 150df8bae1dSRodney W. Grimes * The umap layer is an example of a layer descended from the 151df8bae1dSRodney W. Grimes * null layer. 152df8bae1dSRodney W. Grimes * 153df8bae1dSRodney W. Grimes * 154df8bae1dSRodney W. Grimes * INVOKING OPERATIONS ON LOWER LAYERS 155df8bae1dSRodney W. Grimes * 156df8bae1dSRodney W. Grimes * There are two techniques to invoke operations on a lower layer 157df8bae1dSRodney W. Grimes * when the operation cannot be completely bypassed. Each method 158df8bae1dSRodney W. Grimes * is appropriate in different situations. In both cases, 159df8bae1dSRodney W. Grimes * it is the responsibility of the aliasing layer to make 160df8bae1dSRodney W. Grimes * the operation arguments "correct" for the lower layer 161df8bae1dSRodney W. Grimes * by mapping an vnode arguments to the lower layer. 162df8bae1dSRodney W. Grimes * 163df8bae1dSRodney W. Grimes * The first approach is to call the aliasing layer's bypass routine. 164df8bae1dSRodney W. Grimes * This method is most suitable when you wish to invoke the operation 165df8bae1dSRodney W. Grimes * currently being hanldled on the lower layer. It has the advantage 166df8bae1dSRodney W. Grimes * that the bypass routine already must do argument mapping. 167df8bae1dSRodney W. Grimes * An example of this is null_getattrs in the null layer. 168df8bae1dSRodney W. Grimes * 169df8bae1dSRodney W. Grimes * A second approach is to directly invoked vnode operations on 170df8bae1dSRodney W. Grimes * the lower layer with the VOP_OPERATIONNAME interface. 171df8bae1dSRodney W. Grimes * The advantage of this method is that it is easy to invoke 172df8bae1dSRodney W. Grimes * arbitrary operations on the lower layer. The disadvantage 173df8bae1dSRodney W. Grimes * is that vnodes arguments must be manualy mapped. 174df8bae1dSRodney W. Grimes * 175df8bae1dSRodney W. Grimes */ 176df8bae1dSRodney W. Grimes 177df8bae1dSRodney W. Grimes #include <sys/param.h> 178df8bae1dSRodney W. Grimes #include <sys/systm.h> 17967bfdf83SGarrett Wollman #include <sys/kernel.h> 180d4b7a369SPoul-Henning Kamp #include <sys/sysctl.h> 181df8bae1dSRodney W. Grimes #include <sys/proc.h> 182df8bae1dSRodney W. Grimes #include <sys/time.h> 183df8bae1dSRodney W. Grimes #include <sys/types.h> 184df8bae1dSRodney W. Grimes #include <sys/vnode.h> 185df8bae1dSRodney W. Grimes #include <sys/mount.h> 186df8bae1dSRodney W. Grimes #include <sys/namei.h> 187df8bae1dSRodney W. Grimes #include <sys/malloc.h> 188df8bae1dSRodney W. Grimes #include <sys/buf.h> 189df8bae1dSRodney W. Grimes #include <miscfs/nullfs/null.h> 190df8bae1dSRodney W. Grimes 191d4b7a369SPoul-Henning Kamp static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ 192d4b7a369SPoul-Henning Kamp SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, 193d4b7a369SPoul-Henning Kamp &null_bug_bypass, 0, ""); 194df8bae1dSRodney W. Grimes 19563f50488SMike Pritchard static int null_access __P((struct vop_access_args *ap)); 196d4b7a369SPoul-Henning Kamp static int null_bwrite __P((struct vop_bwrite_args *ap)); 197d4b7a369SPoul-Henning Kamp static int null_getattr __P((struct vop_getattr_args *ap)); 198d4b7a369SPoul-Henning Kamp static int null_inactive __P((struct vop_inactive_args *ap)); 19963f50488SMike Pritchard static int null_lock __P((struct vop_lock_args *ap)); 20063f50488SMike Pritchard static int null_lookup __P((struct vop_lookup_args *ap)); 201d4b7a369SPoul-Henning Kamp static int null_print __P((struct vop_print_args *ap)); 202d4b7a369SPoul-Henning Kamp static int null_reclaim __P((struct vop_reclaim_args *ap)); 20363f50488SMike Pritchard static int null_setattr __P((struct vop_setattr_args *ap)); 204d4b7a369SPoul-Henning Kamp static int null_strategy __P((struct vop_strategy_args *ap)); 20563f50488SMike Pritchard static int null_unlock __P((struct vop_unlock_args *ap)); 2069b5e8b3aSBruce Evans 207df8bae1dSRodney W. Grimes /* 208df8bae1dSRodney W. Grimes * This is the 10-Apr-92 bypass routine. 209df8bae1dSRodney W. Grimes * This version has been optimized for speed, throwing away some 210df8bae1dSRodney W. Grimes * safety checks. It should still always work, but it's not as 211df8bae1dSRodney W. Grimes * robust to programmer errors. 212df8bae1dSRodney W. Grimes * Define SAFETY to include some error checking code. 213df8bae1dSRodney W. Grimes * 214df8bae1dSRodney W. Grimes * In general, we map all vnodes going down and unmap them on the way back. 215df8bae1dSRodney W. Grimes * As an exception to this, vnodes can be marked "unmapped" by setting 216df8bae1dSRodney W. Grimes * the Nth bit in operation's vdesc_flags. 217df8bae1dSRodney W. Grimes * 218df8bae1dSRodney W. Grimes * Also, some BSD vnode operations have the side effect of vrele'ing 219df8bae1dSRodney W. Grimes * their arguments. With stacking, the reference counts are held 220df8bae1dSRodney W. Grimes * by the upper node, not the lower one, so we must handle these 221df8bae1dSRodney W. Grimes * side-effects here. This is not of concern in Sun-derived systems 222df8bae1dSRodney W. Grimes * since there are no such side-effects. 223df8bae1dSRodney W. Grimes * 224df8bae1dSRodney W. Grimes * This makes the following assumptions: 225df8bae1dSRodney W. Grimes * - only one returned vpp 226df8bae1dSRodney W. Grimes * - no INOUT vpp's (Sun's vop_open has one of these) 227df8bae1dSRodney W. Grimes * - the vnode operation vector of the first vnode should be used 228df8bae1dSRodney W. Grimes * to determine what implementation of the op should be invoked 229df8bae1dSRodney W. Grimes * - all mapped vnodes are of our vnode-type (NEEDSWORK: 230df8bae1dSRodney W. Grimes * problems on rmdir'ing mount points and renaming?) 231df8bae1dSRodney W. Grimes */ 232996c772fSJohn Dyson int 233df8bae1dSRodney W. Grimes null_bypass(ap) 234df8bae1dSRodney W. Grimes struct vop_generic_args /* { 235df8bae1dSRodney W. Grimes struct vnodeop_desc *a_desc; 236df8bae1dSRodney W. Grimes <other random data follows, presumably> 237df8bae1dSRodney W. Grimes } */ *ap; 238df8bae1dSRodney W. Grimes { 239df8bae1dSRodney W. Grimes register struct vnode **this_vp_p; 240df8bae1dSRodney W. Grimes int error; 241df8bae1dSRodney W. Grimes struct vnode *old_vps[VDESC_MAX_VPS]; 242df8bae1dSRodney W. Grimes struct vnode **vps_p[VDESC_MAX_VPS]; 243df8bae1dSRodney W. Grimes struct vnode ***vppp; 244df8bae1dSRodney W. Grimes struct vnodeop_desc *descp = ap->a_desc; 245df8bae1dSRodney W. Grimes int reles, i; 246df8bae1dSRodney W. Grimes 247df8bae1dSRodney W. Grimes if (null_bug_bypass) 248df8bae1dSRodney W. Grimes printf ("null_bypass: %s\n", descp->vdesc_name); 249df8bae1dSRodney W. Grimes 250df8bae1dSRodney W. Grimes #ifdef SAFETY 251df8bae1dSRodney W. Grimes /* 252df8bae1dSRodney W. Grimes * We require at least one vp. 253df8bae1dSRodney W. Grimes */ 254df8bae1dSRodney W. Grimes if (descp->vdesc_vp_offsets == NULL || 255df8bae1dSRodney W. Grimes descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) 256edf8a815SDavid Greenman panic ("null_bypass: no vp's in map."); 257df8bae1dSRodney W. Grimes #endif 258df8bae1dSRodney W. Grimes 259df8bae1dSRodney W. Grimes /* 260df8bae1dSRodney W. Grimes * Map the vnodes going in. 261df8bae1dSRodney W. Grimes * Later, we'll invoke the operation based on 262df8bae1dSRodney W. Grimes * the first mapped vnode's operation vector. 263df8bae1dSRodney W. Grimes */ 264df8bae1dSRodney W. Grimes reles = descp->vdesc_flags; 265df8bae1dSRodney W. Grimes for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 266df8bae1dSRodney W. Grimes if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 267df8bae1dSRodney W. Grimes break; /* bail out at end of list */ 268df8bae1dSRodney W. Grimes vps_p[i] = this_vp_p = 269df8bae1dSRodney W. Grimes VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap); 270df8bae1dSRodney W. Grimes /* 271df8bae1dSRodney W. Grimes * We're not guaranteed that any but the first vnode 272df8bae1dSRodney W. Grimes * are of our type. Check for and don't map any 273df8bae1dSRodney W. Grimes * that aren't. (We must always map first vp or vclean fails.) 274df8bae1dSRodney W. Grimes */ 275c5e17d9eSKATO Takenori if (i && (*this_vp_p == NULLVP || 276996c772fSJohn Dyson (*this_vp_p)->v_op != null_vnodeop_p)) { 277c5e17d9eSKATO Takenori old_vps[i] = NULLVP; 278df8bae1dSRodney W. Grimes } else { 279df8bae1dSRodney W. Grimes old_vps[i] = *this_vp_p; 280df8bae1dSRodney W. Grimes *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); 281df8bae1dSRodney W. Grimes /* 282df8bae1dSRodney W. Grimes * XXX - Several operations have the side effect 283df8bae1dSRodney W. Grimes * of vrele'ing their vp's. We must account for 284df8bae1dSRodney W. Grimes * that. (This should go away in the future.) 285df8bae1dSRodney W. Grimes */ 286df8bae1dSRodney W. Grimes if (reles & 1) 287df8bae1dSRodney W. Grimes VREF(*this_vp_p); 288df8bae1dSRodney W. Grimes } 289df8bae1dSRodney W. Grimes 290df8bae1dSRodney W. Grimes } 291df8bae1dSRodney W. Grimes 292df8bae1dSRodney W. Grimes /* 293df8bae1dSRodney W. Grimes * Call the operation on the lower layer 294df8bae1dSRodney W. Grimes * with the modified argument structure. 295df8bae1dSRodney W. Grimes */ 296df8bae1dSRodney W. Grimes error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); 297df8bae1dSRodney W. Grimes 298df8bae1dSRodney W. Grimes /* 299df8bae1dSRodney W. Grimes * Maintain the illusion of call-by-value 300df8bae1dSRodney W. Grimes * by restoring vnodes in the argument structure 301df8bae1dSRodney W. Grimes * to their original value. 302df8bae1dSRodney W. Grimes */ 303df8bae1dSRodney W. Grimes reles = descp->vdesc_flags; 304df8bae1dSRodney W. Grimes for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 305df8bae1dSRodney W. Grimes if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 306df8bae1dSRodney W. Grimes break; /* bail out at end of list */ 307df8bae1dSRodney W. Grimes if (old_vps[i]) { 308df8bae1dSRodney W. Grimes *(vps_p[i]) = old_vps[i]; 309df8bae1dSRodney W. Grimes if (reles & 1) 310df8bae1dSRodney W. Grimes vrele(*(vps_p[i])); 311df8bae1dSRodney W. Grimes } 312df8bae1dSRodney W. Grimes } 313df8bae1dSRodney W. Grimes 314df8bae1dSRodney W. Grimes /* 315df8bae1dSRodney W. Grimes * Map the possible out-going vpp 316df8bae1dSRodney W. Grimes * (Assumes that the lower layer always returns 317df8bae1dSRodney W. Grimes * a VREF'ed vpp unless it gets an error.) 318df8bae1dSRodney W. Grimes */ 319df8bae1dSRodney W. Grimes if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && 320df8bae1dSRodney W. Grimes !(descp->vdesc_flags & VDESC_NOMAP_VPP) && 321df8bae1dSRodney W. Grimes !error) { 322df8bae1dSRodney W. Grimes /* 323df8bae1dSRodney W. Grimes * XXX - even though some ops have vpp returned vp's, 324df8bae1dSRodney W. Grimes * several ops actually vrele this before returning. 325df8bae1dSRodney W. Grimes * We must avoid these ops. 326df8bae1dSRodney W. Grimes * (This should go away when these ops are regularized.) 327df8bae1dSRodney W. Grimes */ 328df8bae1dSRodney W. Grimes if (descp->vdesc_flags & VDESC_VPP_WILLRELE) 329df8bae1dSRodney W. Grimes goto out; 330df8bae1dSRodney W. Grimes vppp = VOPARG_OFFSETTO(struct vnode***, 331df8bae1dSRodney W. Grimes descp->vdesc_vpp_offset,ap); 332df8bae1dSRodney W. Grimes error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp); 333df8bae1dSRodney W. Grimes } 334df8bae1dSRodney W. Grimes 335df8bae1dSRodney W. Grimes out: 336df8bae1dSRodney W. Grimes return (error); 337df8bae1dSRodney W. Grimes } 338df8bae1dSRodney W. Grimes 339996c772fSJohn Dyson /* 340996c772fSJohn Dyson * We have to carry on the locking protocol on the null layer vnodes 341996c772fSJohn Dyson * as we progress through the tree. We also have to enforce read-only 342996c772fSJohn Dyson * if this layer is mounted read-only. 343996c772fSJohn Dyson */ 344996c772fSJohn Dyson static int 345996c772fSJohn Dyson null_lookup(ap) 346996c772fSJohn Dyson struct vop_lookup_args /* { 347996c772fSJohn Dyson struct vnode * a_dvp; 348996c772fSJohn Dyson struct vnode ** a_vpp; 349996c772fSJohn Dyson struct componentname * a_cnp; 350996c772fSJohn Dyson } */ *ap; 351996c772fSJohn Dyson { 352996c772fSJohn Dyson struct componentname *cnp = ap->a_cnp; 353996c772fSJohn Dyson struct proc *p = cnp->cn_proc; 354996c772fSJohn Dyson int flags = cnp->cn_flags; 355996c772fSJohn Dyson struct vop_lock_args lockargs; 356996c772fSJohn Dyson struct vop_unlock_args unlockargs; 357996c772fSJohn Dyson struct vnode *dvp, *vp; 358996c772fSJohn Dyson int error; 359996c772fSJohn Dyson 360996c772fSJohn Dyson if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && 361996c772fSJohn Dyson (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 362996c772fSJohn Dyson return (EROFS); 363f3a778f2SMike Pritchard error = null_bypass((struct vop_generic_args *)ap); 364996c772fSJohn Dyson if (error == EJUSTRETURN && (flags & ISLASTCN) && 365996c772fSJohn Dyson (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && 366996c772fSJohn Dyson (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) 367996c772fSJohn Dyson error = EROFS; 368996c772fSJohn Dyson /* 369996c772fSJohn Dyson * We must do the same locking and unlocking at this layer as 370996c772fSJohn Dyson * is done in the layers below us. We could figure this out 371996c772fSJohn Dyson * based on the error return and the LASTCN, LOCKPARENT, and 372996c772fSJohn Dyson * LOCKLEAF flags. However, it is more expidient to just find 373996c772fSJohn Dyson * out the state of the lower level vnodes and set ours to the 374996c772fSJohn Dyson * same state. 375996c772fSJohn Dyson */ 376996c772fSJohn Dyson dvp = ap->a_dvp; 377996c772fSJohn Dyson vp = *ap->a_vpp; 378996c772fSJohn Dyson if (dvp == vp) 379996c772fSJohn Dyson return (error); 380996c772fSJohn Dyson if (!VOP_ISLOCKED(dvp)) { 381996c772fSJohn Dyson unlockargs.a_vp = dvp; 382996c772fSJohn Dyson unlockargs.a_flags = 0; 383996c772fSJohn Dyson unlockargs.a_p = p; 384996c772fSJohn Dyson vop_nounlock(&unlockargs); 385996c772fSJohn Dyson } 386c5e17d9eSKATO Takenori if (vp != NULLVP && VOP_ISLOCKED(vp)) { 387996c772fSJohn Dyson lockargs.a_vp = vp; 388996c772fSJohn Dyson lockargs.a_flags = LK_SHARED; 389996c772fSJohn Dyson lockargs.a_p = p; 390996c772fSJohn Dyson vop_nolock(&lockargs); 391996c772fSJohn Dyson } 392996c772fSJohn Dyson return (error); 393996c772fSJohn Dyson } 394996c772fSJohn Dyson 395996c772fSJohn Dyson /* 396996c772fSJohn Dyson * Setattr call. Disallow write attempts if the layer is mounted read-only. 397996c772fSJohn Dyson */ 398996c772fSJohn Dyson int 399996c772fSJohn Dyson null_setattr(ap) 400996c772fSJohn Dyson struct vop_setattr_args /* { 401996c772fSJohn Dyson struct vnodeop_desc *a_desc; 402996c772fSJohn Dyson struct vnode *a_vp; 403996c772fSJohn Dyson struct vattr *a_vap; 404996c772fSJohn Dyson struct ucred *a_cred; 405996c772fSJohn Dyson struct proc *a_p; 406996c772fSJohn Dyson } */ *ap; 407996c772fSJohn Dyson { 408996c772fSJohn Dyson struct vnode *vp = ap->a_vp; 409996c772fSJohn Dyson struct vattr *vap = ap->a_vap; 410996c772fSJohn Dyson 411996c772fSJohn Dyson if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 41263f50488SMike Pritchard vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 41363f50488SMike Pritchard vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 414996c772fSJohn Dyson (vp->v_mount->mnt_flag & MNT_RDONLY)) 415996c772fSJohn Dyson return (EROFS); 416996c772fSJohn Dyson if (vap->va_size != VNOVAL) { 417996c772fSJohn Dyson switch (vp->v_type) { 418996c772fSJohn Dyson case VDIR: 419996c772fSJohn Dyson return (EISDIR); 420996c772fSJohn Dyson case VCHR: 421996c772fSJohn Dyson case VBLK: 422996c772fSJohn Dyson case VSOCK: 423996c772fSJohn Dyson case VFIFO: 424996c772fSJohn Dyson return (0); 425996c772fSJohn Dyson case VREG: 426996c772fSJohn Dyson case VLNK: 427996c772fSJohn Dyson default: 428996c772fSJohn Dyson /* 429996c772fSJohn Dyson * Disallow write attempts if the filesystem is 430996c772fSJohn Dyson * mounted read-only. 431996c772fSJohn Dyson */ 432996c772fSJohn Dyson if (vp->v_mount->mnt_flag & MNT_RDONLY) 433996c772fSJohn Dyson return (EROFS); 434996c772fSJohn Dyson } 435996c772fSJohn Dyson } 436f3a778f2SMike Pritchard return (null_bypass((struct vop_generic_args *)ap)); 437996c772fSJohn Dyson } 438df8bae1dSRodney W. Grimes 439df8bae1dSRodney W. Grimes /* 440df8bae1dSRodney W. Grimes * We handle getattr only to change the fsid. 441df8bae1dSRodney W. Grimes */ 442d4b7a369SPoul-Henning Kamp static int 443df8bae1dSRodney W. Grimes null_getattr(ap) 444df8bae1dSRodney W. Grimes struct vop_getattr_args /* { 445df8bae1dSRodney W. Grimes struct vnode *a_vp; 446df8bae1dSRodney W. Grimes struct vattr *a_vap; 447df8bae1dSRodney W. Grimes struct ucred *a_cred; 448df8bae1dSRodney W. Grimes struct proc *a_p; 449df8bae1dSRodney W. Grimes } */ *ap; 450df8bae1dSRodney W. Grimes { 451df8bae1dSRodney W. Grimes int error; 452996c772fSJohn Dyson 453f3a778f2SMike Pritchard if (error = null_bypass((struct vop_generic_args *)ap)) 454df8bae1dSRodney W. Grimes return (error); 455df8bae1dSRodney W. Grimes /* Requires that arguments be restored. */ 456df8bae1dSRodney W. Grimes ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 457df8bae1dSRodney W. Grimes return (0); 458df8bae1dSRodney W. Grimes } 459df8bae1dSRodney W. Grimes 460d4b7a369SPoul-Henning Kamp static int 461996c772fSJohn Dyson null_access(ap) 462996c772fSJohn Dyson struct vop_access_args /* { 463996c772fSJohn Dyson struct vnode *a_vp; 464996c772fSJohn Dyson int a_mode; 465996c772fSJohn Dyson struct ucred *a_cred; 466996c772fSJohn Dyson struct proc *a_p; 467996c772fSJohn Dyson } */ *ap; 468996c772fSJohn Dyson { 469996c772fSJohn Dyson struct vnode *vp = ap->a_vp; 470996c772fSJohn Dyson mode_t mode = ap->a_mode; 471996c772fSJohn Dyson 472996c772fSJohn Dyson /* 473996c772fSJohn Dyson * Disallow write attempts on read-only layers; 474996c772fSJohn Dyson * unless the file is a socket, fifo, or a block or 475996c772fSJohn Dyson * character device resident on the file system. 476996c772fSJohn Dyson */ 477996c772fSJohn Dyson if (mode & VWRITE) { 478996c772fSJohn Dyson switch (vp->v_type) { 479996c772fSJohn Dyson case VDIR: 480996c772fSJohn Dyson case VLNK: 481996c772fSJohn Dyson case VREG: 482996c772fSJohn Dyson if (vp->v_mount->mnt_flag & MNT_RDONLY) 483996c772fSJohn Dyson return (EROFS); 484996c772fSJohn Dyson break; 485996c772fSJohn Dyson } 486996c772fSJohn Dyson } 487f3a778f2SMike Pritchard return (null_bypass((struct vop_generic_args *)ap)); 488996c772fSJohn Dyson } 489996c772fSJohn Dyson 490996c772fSJohn Dyson /* 491996c772fSJohn Dyson * We need to process our own vnode lock and then clear the 492996c772fSJohn Dyson * interlock flag as it applies only to our vnode, not the 493996c772fSJohn Dyson * vnodes below us on the stack. 494996c772fSJohn Dyson */ 49563f50488SMike Pritchard static int 496996c772fSJohn Dyson null_lock(ap) 497996c772fSJohn Dyson struct vop_lock_args /* { 498996c772fSJohn Dyson struct vnode *a_vp; 499996c772fSJohn Dyson int a_flags; 500996c772fSJohn Dyson struct proc *a_p; 501996c772fSJohn Dyson } */ *ap; 502996c772fSJohn Dyson { 503996c772fSJohn Dyson 504996c772fSJohn Dyson vop_nolock(ap); 505996c772fSJohn Dyson if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN) 506996c772fSJohn Dyson return (0); 507996c772fSJohn Dyson ap->a_flags &= ~LK_INTERLOCK; 508f3a778f2SMike Pritchard return (null_bypass((struct vop_generic_args *)ap)); 509996c772fSJohn Dyson } 510996c772fSJohn Dyson 511996c772fSJohn Dyson /* 512996c772fSJohn Dyson * We need to process our own vnode unlock and then clear the 513996c772fSJohn Dyson * interlock flag as it applies only to our vnode, not the 514996c772fSJohn Dyson * vnodes below us on the stack. 515996c772fSJohn Dyson */ 51663f50488SMike Pritchard static int 517996c772fSJohn Dyson null_unlock(ap) 518996c772fSJohn Dyson struct vop_unlock_args /* { 519996c772fSJohn Dyson struct vnode *a_vp; 520996c772fSJohn Dyson int a_flags; 521996c772fSJohn Dyson struct proc *a_p; 522996c772fSJohn Dyson } */ *ap; 523996c772fSJohn Dyson { 524996c772fSJohn Dyson struct vnode *vp = ap->a_vp; 525996c772fSJohn Dyson 526996c772fSJohn Dyson vop_nounlock(ap); 527996c772fSJohn Dyson ap->a_flags &= ~LK_INTERLOCK; 528f3a778f2SMike Pritchard return (null_bypass((struct vop_generic_args *)ap)); 529996c772fSJohn Dyson } 530996c772fSJohn Dyson 53163f50488SMike Pritchard static int 532df8bae1dSRodney W. Grimes null_inactive(ap) 533df8bae1dSRodney W. Grimes struct vop_inactive_args /* { 534df8bae1dSRodney W. Grimes struct vnode *a_vp; 535996c772fSJohn Dyson struct proc *a_p; 536df8bae1dSRodney W. Grimes } */ *ap; 537df8bae1dSRodney W. Grimes { 538df8bae1dSRodney W. Grimes /* 539df8bae1dSRodney W. Grimes * Do nothing (and _don't_ bypass). 540df8bae1dSRodney W. Grimes * Wait to vrele lowervp until reclaim, 541df8bae1dSRodney W. Grimes * so that until then our null_node is in the 542df8bae1dSRodney W. Grimes * cache and reusable. 543df8bae1dSRodney W. Grimes * 544df8bae1dSRodney W. Grimes * NEEDSWORK: Someday, consider inactive'ing 545df8bae1dSRodney W. Grimes * the lowervp and then trying to reactivate it 546df8bae1dSRodney W. Grimes * with capabilities (v_id) 547df8bae1dSRodney W. Grimes * like they do in the name lookup cache code. 548df8bae1dSRodney W. Grimes * That's too much work for now. 549df8bae1dSRodney W. Grimes */ 550996c772fSJohn Dyson VOP_UNLOCK(ap->a_vp, 0, ap->a_p); 551df8bae1dSRodney W. Grimes return (0); 552df8bae1dSRodney W. Grimes } 553df8bae1dSRodney W. Grimes 554d4b7a369SPoul-Henning Kamp static int 555df8bae1dSRodney W. Grimes null_reclaim(ap) 556df8bae1dSRodney W. Grimes struct vop_reclaim_args /* { 557df8bae1dSRodney W. Grimes struct vnode *a_vp; 558996c772fSJohn Dyson struct proc *a_p; 559df8bae1dSRodney W. Grimes } */ *ap; 560df8bae1dSRodney W. Grimes { 561df8bae1dSRodney W. Grimes struct vnode *vp = ap->a_vp; 562df8bae1dSRodney W. Grimes struct null_node *xp = VTONULL(vp); 563df8bae1dSRodney W. Grimes struct vnode *lowervp = xp->null_lowervp; 564df8bae1dSRodney W. Grimes 565df8bae1dSRodney W. Grimes /* 566df8bae1dSRodney W. Grimes * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, 567df8bae1dSRodney W. Grimes * so we can't call VOPs on ourself. 568df8bae1dSRodney W. Grimes */ 569df8bae1dSRodney W. Grimes /* After this assignment, this node will not be re-used. */ 570c5e17d9eSKATO Takenori xp->null_lowervp = NULLVP; 571996c772fSJohn Dyson LIST_REMOVE(xp, null_hash); 572df8bae1dSRodney W. Grimes FREE(vp->v_data, M_TEMP); 573df8bae1dSRodney W. Grimes vp->v_data = NULL; 574df8bae1dSRodney W. Grimes vrele (lowervp); 575df8bae1dSRodney W. Grimes return (0); 576df8bae1dSRodney W. Grimes } 577df8bae1dSRodney W. Grimes 578d4b7a369SPoul-Henning Kamp static int 579df8bae1dSRodney W. Grimes null_print(ap) 580df8bae1dSRodney W. Grimes struct vop_print_args /* { 581df8bae1dSRodney W. Grimes struct vnode *a_vp; 582df8bae1dSRodney W. Grimes } */ *ap; 583df8bae1dSRodney W. Grimes { 584df8bae1dSRodney W. Grimes register struct vnode *vp = ap->a_vp; 5853a773ad0SPoul-Henning Kamp printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, NULLVPTOLOWERVP(vp)); 586df8bae1dSRodney W. Grimes return (0); 587df8bae1dSRodney W. Grimes } 588df8bae1dSRodney W. Grimes 589df8bae1dSRodney W. Grimes /* 590df8bae1dSRodney W. Grimes * XXX - vop_strategy must be hand coded because it has no 591df8bae1dSRodney W. Grimes * vnode in its arguments. 592df8bae1dSRodney W. Grimes * This goes away with a merged VM/buffer cache. 593df8bae1dSRodney W. Grimes */ 594d4b7a369SPoul-Henning Kamp static int 595df8bae1dSRodney W. Grimes null_strategy(ap) 596df8bae1dSRodney W. Grimes struct vop_strategy_args /* { 597df8bae1dSRodney W. Grimes struct buf *a_bp; 598df8bae1dSRodney W. Grimes } */ *ap; 599df8bae1dSRodney W. Grimes { 600df8bae1dSRodney W. Grimes struct buf *bp = ap->a_bp; 601df8bae1dSRodney W. Grimes int error; 602df8bae1dSRodney W. Grimes struct vnode *savedvp; 603df8bae1dSRodney W. Grimes 604df8bae1dSRodney W. Grimes savedvp = bp->b_vp; 605df8bae1dSRodney W. Grimes bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); 606df8bae1dSRodney W. Grimes 607df8bae1dSRodney W. Grimes error = VOP_STRATEGY(bp); 608df8bae1dSRodney W. Grimes 609df8bae1dSRodney W. Grimes bp->b_vp = savedvp; 610df8bae1dSRodney W. Grimes 611df8bae1dSRodney W. Grimes return (error); 612df8bae1dSRodney W. Grimes } 613df8bae1dSRodney W. Grimes 614df8bae1dSRodney W. Grimes /* 615df8bae1dSRodney W. Grimes * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no 616df8bae1dSRodney W. Grimes * vnode in its arguments. 617df8bae1dSRodney W. Grimes * This goes away with a merged VM/buffer cache. 618df8bae1dSRodney W. Grimes */ 619d4b7a369SPoul-Henning Kamp static int 620df8bae1dSRodney W. Grimes null_bwrite(ap) 621df8bae1dSRodney W. Grimes struct vop_bwrite_args /* { 622df8bae1dSRodney W. Grimes struct buf *a_bp; 623df8bae1dSRodney W. Grimes } */ *ap; 624df8bae1dSRodney W. Grimes { 625df8bae1dSRodney W. Grimes struct buf *bp = ap->a_bp; 626df8bae1dSRodney W. Grimes int error; 627df8bae1dSRodney W. Grimes struct vnode *savedvp; 628df8bae1dSRodney W. Grimes 629df8bae1dSRodney W. Grimes savedvp = bp->b_vp; 630df8bae1dSRodney W. Grimes bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); 631df8bae1dSRodney W. Grimes 632df8bae1dSRodney W. Grimes error = VOP_BWRITE(bp); 633df8bae1dSRodney W. Grimes 634df8bae1dSRodney W. Grimes bp->b_vp = savedvp; 635df8bae1dSRodney W. Grimes 636df8bae1dSRodney W. Grimes return (error); 637df8bae1dSRodney W. Grimes } 638df8bae1dSRodney W. Grimes 639df8bae1dSRodney W. Grimes /* 640df8bae1dSRodney W. Grimes * Global vfs data structures 641df8bae1dSRodney W. Grimes */ 642f57e6547SBruce Evans vop_t **null_vnodeop_p; 643d4b7a369SPoul-Henning Kamp static struct vnodeopv_entry_desc null_vnodeop_entries[] = { 644f57e6547SBruce Evans { &vop_default_desc, (vop_t *)null_bypass }, 645df8bae1dSRodney W. Grimes 646996c772fSJohn Dyson { &vop_lookup_desc, (vop_t *)null_lookup }, 647996c772fSJohn Dyson { &vop_setattr_desc, (vop_t *)null_setattr }, 648f57e6547SBruce Evans { &vop_getattr_desc, (vop_t *)null_getattr }, 649996c772fSJohn Dyson { &vop_access_desc, (vop_t *)null_access }, 650996c772fSJohn Dyson { &vop_lock_desc, (vop_t *)null_lock }, 651996c772fSJohn Dyson { &vop_unlock_desc, (vop_t *)null_unlock }, 652f57e6547SBruce Evans { &vop_inactive_desc, (vop_t *)null_inactive }, 653f57e6547SBruce Evans { &vop_reclaim_desc, (vop_t *)null_reclaim }, 654f57e6547SBruce Evans { &vop_print_desc, (vop_t *)null_print }, 655df8bae1dSRodney W. Grimes 656f57e6547SBruce Evans { &vop_strategy_desc, (vop_t *)null_strategy }, 657f57e6547SBruce Evans { &vop_bwrite_desc, (vop_t *)null_bwrite }, 658df8bae1dSRodney W. Grimes 659f57e6547SBruce Evans { NULL, NULL } 660df8bae1dSRodney W. Grimes }; 661d4b7a369SPoul-Henning Kamp static struct vnodeopv_desc null_vnodeop_opv_desc = 662df8bae1dSRodney W. Grimes { &null_vnodeop_p, null_vnodeop_entries }; 663c901836cSGarrett Wollman 664c901836cSGarrett Wollman VNODEOP_SET(null_vnodeop_opv_desc); 665