1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/vfs_lock.c,v 1.7 2005/04/20 17:01:50 dillon Exp $ 35 */ 36 37 /* 38 * External virtual filesystem routines 39 */ 40 #include "opt_ddb.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/malloc.h> 46 #include <sys/mount.h> 47 #include <sys/proc.h> 48 #include <sys/vnode.h> 49 #include <sys/buf.h> 50 #include <sys/sysctl.h> 51 52 #include <machine/limits.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_object.h> 56 57 #include <sys/buf2.h> 58 #include <sys/thread2.h> 59 60 61 static MALLOC_DEFINE(M_VNODE, "vnodes", "vnode structures"); 62 63 static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 64 65 int freevnodes = 0; 66 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, 67 &freevnodes, 0, ""); 68 static int wantfreevnodes = 25; 69 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, 70 &wantfreevnodes, 0, ""); 71 static int minvnodes; 72 SYSCTL_INT(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, 73 &minvnodes, 0, "Minimum number of vnodes"); 74 75 /* 76 * Called from vfsinit() 77 */ 78 void 79 vfs_lock_init(void) 80 { 81 minvnodes = desiredvnodes / 4; 82 83 TAILQ_INIT(&vnode_free_list); 84 } 85 86 /* 87 * Inline helper functions. vbusy() and vfree() must be called while in a 88 * critical section. 89 */ 90 static __inline 91 void 92 __vbusy(struct vnode *vp) 93 { 94 KKASSERT(vp->v_flag & VFREE); 95 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 96 freevnodes--; 97 vp->v_flag &= ~(VFREE|VAGE); 98 } 99 100 static __inline 101 void 102 __vfree(struct vnode *vp) 103 { 104 KKASSERT((vp->v_flag & VFREE) == 0); 105 if (vp->v_flag & (VAGE|VRECLAIMED)) 106 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 107 else 108 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 109 freevnodes++; 110 vp->v_flag &= ~VAGE; 111 vp->v_flag |= VFREE; 112 } 113 114 /* 115 * Return 1 if we can immediately place the vnode on the freelist. 116 */ 117 static __inline int 118 vshouldfree(struct vnode *vp, int usecount) 119 { 120 if (vp->v_holdcnt != 0 || vp->v_usecount != usecount) 121 return (0); /* other holderse */ 122 if (vp->v_object && 123 (vp->v_object->ref_count || vp->v_object->resident_page_count)) { 124 return (0); 125 } 126 return (1); 127 } 128 129 /* 130 * Reference a vnode or release the reference on a vnode. The vnode will 131 * be taken off the freelist if it is on it and cannot be recycled or 132 * deactivated while refd. The last release of a vnode will deactivate the 133 * vnode via VOP_INACTIVE(). 134 * 135 * Special cases: refing a vnode does not clear VINACTIVE, you have to vget() 136 * the vnode shared or exclusive to do that. 137 */ 138 static __inline 139 void 140 __vref(struct vnode *vp) 141 { 142 ++vp->v_usecount; 143 if (vp->v_flag & VFREE) 144 __vbusy(vp); 145 } 146 147 void 148 vref(struct vnode *vp) 149 { 150 crit_enter(); 151 __vref(vp); 152 crit_exit(); 153 } 154 155 void 156 vrele(struct vnode *vp) 157 { 158 thread_t td = curthread; 159 160 crit_enter(); 161 if (vp->v_usecount == 1) { 162 KASSERT(lockcountnb(&vp->v_lock) == 0, ("last vrele vp %p still locked", vp)); 163 164 /* 165 * Deactivation requires an exclusive v_lock (vx_lock()), and 166 * only occurs if the usecount is still 1 after locking. 167 */ 168 if ((vp->v_flag & VINACTIVE) == 0) { 169 if (vx_lock(vp) == 0) { 170 if ((vp->v_flag & VINACTIVE) == 0 && 171 vp->v_usecount == 1) { 172 vp->v_flag |= VINACTIVE; 173 VOP_INACTIVE(vp, td); 174 } 175 vx_unlock(vp); 176 } 177 } 178 if (vshouldfree(vp, 1)) 179 __vfree(vp); 180 } else { 181 KKASSERT(vp->v_usecount > 0); 182 } 183 --vp->v_usecount; 184 crit_exit(); 185 } 186 187 /* 188 * Hold a vnode or drop the hold on a vnode. The vnode will be taken off 189 * the freelist if it is on it and cannot be recycled. However, the 190 * vnode can be deactivated and reactivated while held. 191 * 192 * Special cases: The last drop of a vnode does nothing special, allowing it 193 * to be called from an interrupt. vrele() on the otherhand cannot be called 194 * from an interrupt. 195 */ 196 void 197 vhold(struct vnode *vp) 198 { 199 crit_enter(); 200 ++vp->v_holdcnt; 201 if (vp->v_flag & VFREE) 202 __vbusy(vp); 203 crit_exit(); 204 } 205 206 void 207 vdrop(struct vnode *vp) 208 { 209 crit_enter(); 210 if (vp->v_holdcnt == 1) { 211 --vp->v_holdcnt; 212 if (vshouldfree(vp, 0)) 213 __vfree(vp); 214 } else { 215 --vp->v_holdcnt; 216 KKASSERT(vp->v_holdcnt > 0); 217 } 218 crit_exit(); 219 } 220 221 /**************************************************************** 222 * VX LOCKING FUNCTIONS * 223 **************************************************************** 224 * 225 * These functions lock vnodes for reclamation and deactivation ops. 226 * Only vp->v_lock, the top layer of the VFS, is locked. You must be 227 * holding a normal reference in order to be able to safely call vx_lock() 228 * and vx_unlock(). vx_get() and vx_put() are combination functions which 229 * vref+vx_lock and vrele+vx_unlock. 230 */ 231 232 #define VXLOCKFLAGS (LK_EXCLUSIVE|LK_RETRY) 233 #define VXLOCKFLAGS_NB (LK_EXCLUSIVE|LK_NOWAIT) 234 235 static int 236 __vxlock(struct vnode *vp, int flags) 237 { 238 return(lockmgr(&vp->v_lock, flags, NULL, curthread)); 239 } 240 241 static void 242 __vxunlock(struct vnode *vp) 243 { 244 lockmgr(&vp->v_lock, LK_RELEASE, NULL, curthread); 245 } 246 247 int 248 vx_lock(struct vnode *vp) 249 { 250 return(__vxlock(vp, VXLOCKFLAGS)); 251 } 252 253 void 254 vx_unlock(struct vnode *vp) 255 { 256 __vxunlock(vp); 257 } 258 259 int 260 vx_get(struct vnode *vp) 261 { 262 int error; 263 264 vref(vp); 265 if ((error = __vxlock(vp, VXLOCKFLAGS)) != 0) 266 vrele(vp); 267 return(error); 268 } 269 270 int 271 vx_get_nonblock(struct vnode *vp) 272 { 273 int error; 274 275 vref(vp); 276 if ((error = __vxlock(vp, VXLOCKFLAGS_NB)) != 0) 277 vrele(vp); 278 return(error); 279 } 280 281 void 282 vx_put(struct vnode *vp) 283 { 284 __vxunlock(vp); 285 vrele(vp); 286 } 287 288 /**************************************************************** 289 * VNODE ACQUISITION FUNCTIONS * 290 **************************************************************** 291 * 292 * vget() and vput() access a vnode for the intent of executing an 293 * operation other then a reclamation or deactivation. vget() will ref 294 * and lock the vnode, vput() will unlock and deref the vnode. 295 * The VOP_*() locking functions are used. 296 * 297 * CALLING VGET IS MANDATORY PRIOR TO ANY MODIFYING OPERATION ON A VNODE. 298 * This is because vget handles the VINACTIVE interlock and is responsible 299 * for clearing the bit. If the bit is not cleared inode updates may not 300 * make it to disk. 301 * 302 * Special cases: If vget()'s locking operation fails the vrele() call may 303 * cause the vnode to be deactivated (VOP_INACTIVE called). However, this 304 * never occurs if the vnode is in a reclaimed state. Vnodes in reclaimed 305 * states always return an error code of ENOENT. 306 * 307 * Special cases: vput() will unlock and, if it is the last reference, 308 * deactivate the vnode. The deactivation uses a separate non-layered 309 * VX lock after the normal unlock. XXX make it more efficient. 310 */ 311 int 312 vget(struct vnode *vp, int flags, thread_t td) 313 { 314 int error; 315 316 crit_enter(); 317 __vref(vp); 318 if (flags & LK_TYPE_MASK) { 319 if ((error = vn_lock(vp, flags, td)) != 0) { 320 vrele(vp); 321 } else if (vp->v_flag & VRECLAIMED) { 322 VOP_UNLOCK(vp, 0, td); 323 vrele(vp); 324 error = ENOENT; 325 } else { 326 vp->v_flag &= ~VINACTIVE; 327 error = 0; 328 } 329 } else { 330 panic("vget() called with no lock specified!"); 331 error = ENOENT; /* not reached, compiler opt */ 332 } 333 crit_exit(); 334 return(error); 335 } 336 337 void 338 vput(struct vnode *vp) 339 { 340 VOP_UNLOCK(vp, 0, curthread); 341 vrele(vp); 342 } 343 344 void 345 vsetflags(struct vnode *vp, int flags) 346 { 347 crit_enter(); 348 vp->v_flag |= flags; 349 crit_exit(); 350 } 351 352 void 353 vclrflags(struct vnode *vp, int flags) 354 { 355 crit_enter(); 356 vp->v_flag &= ~flags; 357 crit_exit(); 358 } 359 360 /* 361 * Obtain a new vnode from the freelist, allocating more if necessary. 362 * The returned vnode is VX locked & refd. 363 */ 364 struct vnode * 365 allocvnode(int lktimeout, int lkflags) 366 { 367 struct thread *td; 368 struct vnode *vp; 369 370 /* 371 * Try to reuse vnodes if we hit the max. This situation only 372 * occurs in certain large-memory (2G+) situations. We cannot 373 * attempt to directly reclaim vnodes due to nasty recursion 374 * problems. 375 */ 376 while (numvnodes - freevnodes > desiredvnodes) 377 vnlru_proc_wait(); 378 379 td = curthread; 380 vp = NULL; 381 382 /* 383 * Attempt to reuse a vnode already on the free list, allocating 384 * a new vnode if we can't find one or if we have not reached a 385 * good minimum for good LRU performance. 386 */ 387 if (freevnodes >= wantfreevnodes && numvnodes >= minvnodes) { 388 int count; 389 390 for (count = 0; count < freevnodes; count++) { 391 /* 392 * __VNODESCAN__ 393 * 394 * Pull the next vnode off the free list and do some 395 * sanity checks. Note that regardless of how we 396 * block, if freevnodes is non-zero there had better 397 * be something on the list. 398 */ 399 vp = TAILQ_FIRST(&vnode_free_list); 400 if (vp == NULL) 401 panic("getnewvnode: free vnode isn't"); 402 403 /* 404 * Note the lack of a critical section. We vx_get() 405 * the vnode before we check it for validity, reducing 406 * the number of checks we have to make. The vx_get() 407 * will pull it off the freelist. 408 */ 409 if (vx_get(vp)) { 410 vp = NULL; 411 continue; 412 } 413 414 /* 415 * Can this vnode be recycled? It must be in a 416 * VINACTIVE state with only our reference to it. 417 * (vx_get(), unlike vget(), does not reactivate 418 * the vnode). vx_put() will recycle it onto the 419 * end of the freelist. 420 */ 421 if ((vp->v_flag & VINACTIVE) == 0 || 422 vp->v_holdcnt || vp->v_usecount != 1) { 423 vx_put(vp); 424 vp = NULL; 425 continue; 426 } 427 428 /* 429 * Ok, we can reclaim the vnode if it isn't already 430 * in a reclaimed state. If the reclamation fails, 431 * or if someone else is referencing the vnode after 432 * we have vgone()'d it, we recycle the vnode on the 433 * freelist or hold it (by calling vx_put()). 434 */ 435 if ((vp->v_flag & VRECLAIMED) == 0) { 436 vgone(vp); 437 if ((vp->v_flag & VRECLAIMED) == 0 || 438 vp->v_holdcnt || vp->v_usecount != 1) { 439 vx_put(vp); 440 vp = NULL; 441 continue; 442 } 443 } 444 KKASSERT(vp->v_flag & VINACTIVE); 445 446 /* 447 * We have a vnode! 448 */ 449 break; 450 } 451 } 452 453 /* 454 * If we have a vp it will be refd and VX locked. 455 */ 456 if (vp) { 457 vp->v_lease = NULL; 458 459 #ifdef INVARIANTS 460 if (vp->v_data) 461 panic("cleaned vnode isn't"); 462 if (vp->v_numoutput) 463 panic("Clean vnode has pending I/O's"); 464 KKASSERT(vp->v_mount == NULL); 465 #endif 466 vp->v_flag = 0; 467 vp->v_lastw = 0; 468 vp->v_lasta = 0; 469 vp->v_cstart = 0; 470 vp->v_clen = 0; 471 vp->v_socket = 0; 472 vp->v_writecount = 0; /* XXX */ 473 lockreinit(&vp->v_lock, 0, "vnode", lktimeout, lkflags); 474 KKASSERT(TAILQ_FIRST(&vp->v_namecache) == NULL); 475 } else { 476 /* 477 * A brand-new vnode (we could use malloc() here I think) XXX 478 */ 479 vp = malloc(sizeof(struct vnode), M_VNODE, M_WAITOK|M_ZERO); 480 lwkt_token_init(&vp->v_pollinfo.vpi_token); 481 lockinit(&vp->v_lock, 0, "vnode", lktimeout, lkflags); 482 TAILQ_INIT(&vp->v_namecache); 483 484 /* 485 * short cut around vfreeing it and looping, just set it up 486 * as if we had pulled a reclaimed vnode off the freelist 487 * and reinitialized it. 488 */ 489 vp->v_usecount = 1; 490 if (__vxlock(vp, VXLOCKFLAGS)) 491 panic("getnewvnode: __vxlock failed"); 492 numvnodes++; 493 } 494 495 RB_INIT(&vp->v_rbclean_tree); 496 RB_INIT(&vp->v_rbdirty_tree); 497 vp->v_type = VNON; 498 vp->v_tag = 0; 499 vp->v_ops = NULL; 500 vp->v_data = NULL; 501 KKASSERT(vp->v_mount == NULL); 502 return (vp); 503 } 504 505