1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/vfs_lock.c,v 1.19 2006/05/27 20:17:16 dillon Exp $ 35 */ 36 37 /* 38 * External virtual filesystem routines 39 */ 40 #include "opt_ddb.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/malloc.h> 46 #include <sys/mount.h> 47 #include <sys/proc.h> 48 #include <sys/vnode.h> 49 #include <sys/buf.h> 50 #include <sys/sysctl.h> 51 52 #include <machine/limits.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_object.h> 56 57 #include <sys/buf2.h> 58 #include <sys/thread2.h> 59 60 61 static MALLOC_DEFINE(M_VNODE, "vnodes", "vnode structures"); 62 63 static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 64 65 int freevnodes = 0; 66 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, 67 &freevnodes, 0, ""); 68 static int wantfreevnodes = 25; 69 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, 70 &wantfreevnodes, 0, ""); 71 static int minvnodes; 72 SYSCTL_INT(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, 73 &minvnodes, 0, "Minimum number of vnodes"); 74 75 /* 76 * Called from vfsinit() 77 */ 78 void 79 vfs_lock_init(void) 80 { 81 minvnodes = desiredvnodes / 4; 82 83 TAILQ_INIT(&vnode_free_list); 84 } 85 86 /* 87 * Inline helper functions. vbusy() and vfree() must be called while in a 88 * critical section. 89 * 90 * Warning: must be callable if the caller holds a read spinlock to something 91 * else, meaning we can't use read spinlocks here. 92 */ 93 static __inline 94 void 95 __vbusy(struct vnode *vp) 96 { 97 KKASSERT(vp->v_flag & VFREE); 98 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 99 freevnodes--; 100 vp->v_flag &= ~(VFREE|VAGE); 101 } 102 103 static __inline 104 void 105 __vfree(struct vnode *vp) 106 { 107 KKASSERT((vp->v_flag & VFREE) == 0); 108 if (vp->v_flag & (VAGE|VRECLAIMED)) 109 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 110 else 111 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 112 freevnodes++; 113 vp->v_flag &= ~VAGE; 114 vp->v_flag |= VFREE; 115 } 116 117 /* 118 * Return 1 if we can immediately place the vnode on the freelist. 119 */ 120 static __inline int 121 vshouldfree(struct vnode *vp, int usecount) 122 { 123 if (vp->v_holdcnt != 0 || vp->v_usecount != usecount) 124 return (0); /* other holderse */ 125 if (vp->v_object && 126 (vp->v_object->ref_count || vp->v_object->resident_page_count)) { 127 return (0); 128 } 129 return (1); 130 } 131 132 /* 133 * Reference a vnode or release the reference on a vnode. The vnode will 134 * be taken off the freelist if it is on it and cannot be recycled or 135 * deactivated while refd. The last release of a vnode will deactivate the 136 * vnode via VOP_INACTIVE(). 137 * 138 * Special cases: refing a vnode does not clear VINACTIVE, you have to vget() 139 * the vnode shared or exclusive to do that. 140 * 141 * Warning: must be callable if the caller holds a read spinlock to something 142 * else, meaning we can't use read spinlocks here. 143 */ 144 static __inline 145 void 146 __vref(struct vnode *vp) 147 { 148 ++vp->v_usecount; 149 if (vp->v_flag & VFREE) 150 __vbusy(vp); 151 } 152 153 /* 154 * This is a rare case where callers are allowed to hold spinlocks, so 155 * we can't ourselves. In such cases the vnode must already have at least 156 * one reference because we cannot get the spinlock required to move 157 * the vnode off the free list. 158 * 159 * If the usecount & holdcnt are 0 the caller must be holding the 160 * free list spinlock since we will be removing the vnode from the 161 * freelist in that case. 162 */ 163 void 164 vref(struct vnode *vp) 165 { 166 crit_enter(); 167 __vref(vp); 168 crit_exit(); 169 } 170 171 void 172 vrele(struct vnode *vp) 173 { 174 crit_enter(); 175 if (vp->v_usecount == 1) { 176 KASSERT(lockcountnb(&vp->v_lock) == 0, ("last vrele vp %p still locked", vp)); 177 178 /* 179 * Deactivation requires an exclusive v_lock (vx_lock()), and 180 * only occurs if the usecount is still 1 after locking. 181 */ 182 if ((vp->v_flag & VINACTIVE) == 0) { 183 if (vx_lock(vp) == 0) { 184 if ((vp->v_flag & VINACTIVE) == 0 && 185 vp->v_usecount == 1) { 186 vp->v_flag |= VINACTIVE; 187 VOP_INACTIVE(vp); 188 } 189 vx_unlock(vp); 190 } 191 } 192 if (vshouldfree(vp, 1)) 193 __vfree(vp); 194 } else { 195 KKASSERT(vp->v_usecount > 0); 196 } 197 --vp->v_usecount; 198 crit_exit(); 199 } 200 201 /* 202 * Hold a vnode or drop the hold on a vnode. The vnode will be taken off 203 * the freelist if it is on it and cannot be recycled. However, the 204 * vnode can be deactivated and reactivated while held. 205 * 206 * Special cases: The last drop of a vnode does nothing special, allowing it 207 * to be called from an interrupt. vrele() on the otherhand cannot be called 208 * from an interrupt. 209 */ 210 void 211 vhold(struct vnode *vp) 212 { 213 crit_enter(); 214 ++vp->v_holdcnt; 215 if (vp->v_flag & VFREE) 216 __vbusy(vp); 217 crit_exit(); 218 } 219 220 void 221 vdrop(struct vnode *vp) 222 { 223 crit_enter(); 224 if (vp->v_holdcnt == 1) { 225 --vp->v_holdcnt; 226 if (vshouldfree(vp, 0)) 227 __vfree(vp); 228 } else { 229 --vp->v_holdcnt; 230 KKASSERT(vp->v_holdcnt > 0); 231 } 232 crit_exit(); 233 } 234 235 /**************************************************************** 236 * VX LOCKING FUNCTIONS * 237 **************************************************************** 238 * 239 * These functions lock vnodes for reclamation and deactivation ops. 240 * Only vp->v_lock, the top layer of the VFS, is locked. You must be 241 * holding a normal reference in order to be able to safely call vx_lock() 242 * and vx_unlock(). vx_get() and vx_put() are combination functions which 243 * vref+vx_lock and vrele+vx_unlock. 244 */ 245 246 #define VXLOCKFLAGS (LK_EXCLUSIVE|LK_RETRY) 247 #define VXLOCKFLAGS_NB (LK_EXCLUSIVE|LK_NOWAIT) 248 249 static int 250 __vxlock(struct vnode *vp, int flags) 251 { 252 return(lockmgr(&vp->v_lock, flags)); 253 } 254 255 static void 256 __vxunlock(struct vnode *vp) 257 { 258 lockmgr(&vp->v_lock, LK_RELEASE); 259 } 260 261 int 262 vx_lock(struct vnode *vp) 263 { 264 return(__vxlock(vp, VXLOCKFLAGS)); 265 } 266 267 void 268 vx_unlock(struct vnode *vp) 269 { 270 __vxunlock(vp); 271 } 272 273 int 274 vx_get(struct vnode *vp) 275 { 276 int error; 277 278 vref(vp); 279 if ((error = __vxlock(vp, VXLOCKFLAGS)) != 0) 280 vrele(vp); 281 return(error); 282 } 283 284 int 285 vx_get_nonblock(struct vnode *vp) 286 { 287 int error; 288 289 vref(vp); 290 if ((error = __vxlock(vp, VXLOCKFLAGS_NB)) != 0) 291 vrele(vp); 292 return(error); 293 } 294 295 void 296 vx_put(struct vnode *vp) 297 { 298 __vxunlock(vp); 299 vrele(vp); 300 } 301 302 /**************************************************************** 303 * VNODE ACQUISITION FUNCTIONS * 304 **************************************************************** 305 * 306 * vget() and vput() access a vnode for the intent of executing an 307 * operation other then a reclamation or deactivation. vget() will ref 308 * and lock the vnode, vput() will unlock and deref the vnode. 309 * The VOP_*() locking functions are used. 310 * 311 * CALLING VGET IS MANDATORY PRIOR TO ANY MODIFYING OPERATION ON A VNODE. 312 * This is because vget handles the VINACTIVE interlock and is responsible 313 * for clearing the bit. If the bit is not cleared inode updates may not 314 * make it to disk. 315 * 316 * Special cases: If vget()'s locking operation fails the vrele() call may 317 * cause the vnode to be deactivated (VOP_INACTIVE called). However, this 318 * never occurs if the vnode is in a reclaimed state. Vnodes in reclaimed 319 * states always return an error code of ENOENT. 320 * 321 * Special cases: vput() will unlock and, if it is the last reference, 322 * deactivate the vnode. The deactivation uses a separate non-layered 323 * VX lock after the normal unlock. XXX make it more efficient. 324 */ 325 int 326 vget(struct vnode *vp, int flags) 327 { 328 int error; 329 330 crit_enter(); 331 __vref(vp); 332 if (flags & LK_TYPE_MASK) { 333 if ((error = vn_lock(vp, flags)) != 0) { 334 vrele(vp); 335 } else if (vp->v_flag & VRECLAIMED) { 336 VOP_UNLOCK(vp, 0); 337 vrele(vp); 338 error = ENOENT; 339 } else { 340 vp->v_flag &= ~VINACTIVE; 341 error = 0; 342 } 343 } else { 344 panic("vget() called with no lock specified!"); 345 error = ENOENT; /* not reached, compiler opt */ 346 } 347 crit_exit(); 348 return(error); 349 } 350 351 void 352 vput(struct vnode *vp) 353 { 354 VOP_UNLOCK(vp, 0); 355 vrele(vp); 356 } 357 358 void 359 vsetflags(struct vnode *vp, int flags) 360 { 361 crit_enter(); 362 vp->v_flag |= flags; 363 crit_exit(); 364 } 365 366 void 367 vclrflags(struct vnode *vp, int flags) 368 { 369 crit_enter(); 370 vp->v_flag &= ~flags; 371 crit_exit(); 372 } 373 374 /* 375 * Obtain a new vnode from the freelist, allocating more if necessary. 376 * The returned vnode is VX locked & refd. 377 */ 378 struct vnode * 379 allocvnode(int lktimeout, int lkflags) 380 { 381 struct thread *td; 382 struct vnode *vp; 383 384 /* 385 * Try to reuse vnodes if we hit the max. This situation only 386 * occurs in certain large-memory (2G+) situations. We cannot 387 * attempt to directly reclaim vnodes due to nasty recursion 388 * problems. 389 */ 390 while (numvnodes - freevnodes > desiredvnodes) 391 vnlru_proc_wait(); 392 393 td = curthread; 394 vp = NULL; 395 396 /* 397 * Attempt to reuse a vnode already on the free list, allocating 398 * a new vnode if we can't find one or if we have not reached a 399 * good minimum for good LRU performance. 400 */ 401 if (freevnodes >= wantfreevnodes && numvnodes >= minvnodes) { 402 int count; 403 404 for (count = 0; count < freevnodes; count++) { 405 /* 406 * __VNODESCAN__ 407 * 408 * Pull the next vnode off the free list and do some 409 * sanity checks. Note that regardless of how we 410 * block, if freevnodes is non-zero there had better 411 * be something on the list. 412 */ 413 vp = TAILQ_FIRST(&vnode_free_list); 414 if (vp == NULL) 415 panic("getnewvnode: free vnode isn't"); 416 417 /* 418 * Note the lack of a critical section. We vx_get() 419 * the vnode before we check it for validity, reducing 420 * the number of checks we have to make. The vx_get() 421 * will pull it off the freelist. 422 */ 423 if (vx_get(vp)) { 424 vp = NULL; 425 continue; 426 } 427 428 /* 429 * Can this vnode be recycled? It must be in a 430 * VINACTIVE state with only our reference to it. 431 * (vx_get(), unlike vget(), does not reactivate 432 * the vnode). vx_put() will recycle it onto the 433 * end of the freelist. 434 */ 435 if ((vp->v_flag & VINACTIVE) == 0 || 436 vp->v_holdcnt || vp->v_usecount != 1) { 437 vx_put(vp); 438 vp = NULL; 439 continue; 440 } 441 442 /* 443 * Ok, we can reclaim the vnode if it isn't already 444 * in a reclaimed state. If the reclamation fails, 445 * or if someone else is referencing the vnode after 446 * we have vgone()'d it, we recycle the vnode on the 447 * freelist or hold it (by calling vx_put()). 448 */ 449 if ((vp->v_flag & VRECLAIMED) == 0) { 450 vgone(vp); 451 if ((vp->v_flag & VRECLAIMED) == 0 || 452 vp->v_holdcnt || vp->v_usecount != 1) { 453 vx_put(vp); 454 vp = NULL; 455 continue; 456 } 457 } 458 KKASSERT(vp->v_flag & VINACTIVE); 459 460 /* 461 * We have a vnode! 462 */ 463 break; 464 } 465 } 466 467 /* 468 * If we have a vp it will be refd and VX locked. 469 */ 470 if (vp) { 471 #ifdef INVARIANTS 472 if (vp->v_data) 473 panic("cleaned vnode isn't"); 474 if (vp->v_track_read.bk_active + vp->v_track_write.bk_active) 475 panic("Clean vnode has pending I/O's"); 476 KKASSERT(vp->v_mount == NULL); 477 #endif 478 vp->v_flag = 0; 479 vp->v_lastw = 0; 480 vp->v_lasta = 0; 481 vp->v_cstart = 0; 482 vp->v_clen = 0; 483 vp->v_socket = 0; 484 vp->v_opencount = 0; 485 vp->v_writecount = 0; /* XXX */ 486 lockreinit(&vp->v_lock, "vnode", lktimeout, lkflags); 487 KKASSERT(TAILQ_FIRST(&vp->v_namecache) == NULL); 488 } else { 489 /* 490 * A brand-new vnode (we could use malloc() here I think) XXX 491 */ 492 vp = malloc(sizeof(struct vnode), M_VNODE, M_WAITOK|M_ZERO); 493 lwkt_token_init(&vp->v_pollinfo.vpi_token); 494 lockinit(&vp->v_lock, "vnode", lktimeout, lkflags); 495 TAILQ_INIT(&vp->v_namecache); 496 497 /* 498 * short cut around vfreeing it and looping, just set it up 499 * as if we had pulled a reclaimed vnode off the freelist 500 * and reinitialized it. 501 */ 502 vp->v_usecount = 1; 503 if (__vxlock(vp, VXLOCKFLAGS)) 504 panic("getnewvnode: __vxlock failed"); 505 numvnodes++; 506 } 507 508 RB_INIT(&vp->v_rbclean_tree); 509 RB_INIT(&vp->v_rbdirty_tree); 510 RB_INIT(&vp->v_rbhash_tree); 511 vp->v_filesize = NOOFFSET; 512 vp->v_type = VNON; 513 vp->v_tag = 0; 514 vp->v_ops = NULL; 515 vp->v_data = NULL; 516 KKASSERT(vp->v_mount == NULL); 517 return (vp); 518 } 519 520