1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/vfs_lock.c,v 1.2 2004/10/22 18:00:26 dillon Exp $ 35 */ 36 37 /* 38 * External virtual filesystem routines 39 */ 40 #include "opt_ddb.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/malloc.h> 46 #include <sys/mount.h> 47 #include <sys/proc.h> 48 #include <sys/vnode.h> 49 #include <sys/buf.h> 50 #include <sys/sysctl.h> 51 52 #include <machine/limits.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_object.h> 56 57 #include <sys/buf2.h> 58 #include <sys/thread2.h> 59 60 61 static MALLOC_DEFINE(M_VNODE, "vnodes", "vnode structures"); 62 63 static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 64 65 int freevnodes = 0; 66 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, 67 &freevnodes, 0, ""); 68 static int wantfreevnodes = 25; 69 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, 70 &wantfreevnodes, 0, ""); 71 static int minvnodes; 72 SYSCTL_INT(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, 73 &minvnodes, 0, "Minimum number of vnodes"); 74 75 /* 76 * Called from vfsinit() 77 */ 78 void 79 vfs_lock_init(void) 80 { 81 minvnodes = desiredvnodes / 4; 82 83 TAILQ_INIT(&vnode_free_list); 84 } 85 86 /* 87 * Inline helper functions. vbusy() and vfree() must be called while in a 88 * critical section. 89 */ 90 static __inline 91 void 92 __vbusy(struct vnode *vp) 93 { 94 KKASSERT(vp->v_flag & VFREE); 95 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 96 freevnodes--; 97 vp->v_flag &= ~(VFREE|VAGE); 98 } 99 100 static __inline 101 void 102 __vfree(struct vnode *vp) 103 { 104 KKASSERT((vp->v_flag & VFREE) == 0); 105 if (vp->v_flag & (VAGE|VRECLAIMED)) 106 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 107 else 108 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 109 freevnodes++; 110 vp->v_flag &= ~VAGE; 111 vp->v_flag |= VFREE; 112 } 113 114 /* 115 * Return 1 if we can immediately place the vnode on the freelist. 116 */ 117 static __inline int 118 vshouldfree(struct vnode *vp, int usecount) 119 { 120 if (vp->v_holdcnt != 0 || vp->v_usecount != usecount) 121 return (0); /* other holderse */ 122 if (vp->v_object && 123 (vp->v_object->ref_count || vp->v_object->resident_page_count)) { 124 return (0); 125 } 126 return (1); 127 } 128 129 /* 130 * Reference a vnode or release the reference on a vnode. The vnode will 131 * be taken off the freelist if it is on it and cannot be recycled or 132 * deactivated while refd. The last release of a vnode will deactivate the 133 * vnode via VOP_INACTIVE(). 134 * 135 * Special cases: refing a vnode does not clear VINACTIVE, you have to vget() 136 * the vnode shared or exclusive to do that. 137 */ 138 static __inline 139 void 140 __vref(struct vnode *vp) 141 { 142 ++vp->v_usecount; 143 if (vp->v_flag & VFREE) 144 __vbusy(vp); 145 } 146 147 void 148 vref(struct vnode *vp) 149 { 150 crit_enter(); 151 __vref(vp); 152 crit_exit(); 153 } 154 155 void 156 vrele(struct vnode *vp) 157 { 158 thread_t td = curthread; 159 160 crit_enter(); 161 if (vp->v_usecount == 1) { 162 KASSERT(lockcount(&vp->v_lock) == 0, ("last vrele vp %p still locked", vp)); 163 164 /* 165 * Deactivation requires an exclusive v_lock (vx_lock()), and 166 * only occurs if the usecount is still 1 after locking. 167 */ 168 if ((vp->v_flag & VINACTIVE) == 0) { 169 if (vx_lock(vp) == 0) { 170 if ((vp->v_flag & VINACTIVE) == 0 && 171 vp->v_usecount == 1) { 172 vp->v_flag |= VINACTIVE; 173 VOP_INACTIVE(vp, td); 174 } 175 vx_unlock(vp); 176 } 177 } 178 if (vshouldfree(vp, 1)) 179 __vfree(vp); 180 } else { 181 KKASSERT(vp->v_usecount > 0); 182 } 183 --vp->v_usecount; 184 crit_exit(); 185 } 186 187 /* 188 * Hold a vnode or drop the hold on a vnode. The vnode will be taken off 189 * the freelist if it is on it and cannot be recycled. However, the 190 * vnode can be deactivated and reactivated while held. 191 * 192 * Special cases: The last drop of a vnode does nothing special, allowing it 193 * to be called from an interrupt. vrele() on the otherhand cannot be called 194 * from an interrupt. 195 */ 196 void 197 vhold(struct vnode *vp) 198 { 199 crit_enter(); 200 ++vp->v_holdcnt; 201 if (vp->v_flag & VFREE) 202 __vbusy(vp); 203 crit_exit(); 204 } 205 206 void 207 vdrop(struct vnode *vp) 208 { 209 crit_enter(); 210 if (vp->v_holdcnt == 1) { 211 --vp->v_holdcnt; 212 if (vshouldfree(vp, 0)) 213 __vfree(vp); 214 } else { 215 --vp->v_holdcnt; 216 KKASSERT(vp->v_holdcnt > 0); 217 } 218 crit_exit(); 219 } 220 221 /**************************************************************** 222 * VX LOCKING FUNCTIONS * 223 **************************************************************** 224 * 225 * These functions lock vnodes for reclamation and deactivation ops. 226 * Only vp->v_lock, the top layer of the VFS, is locked. You must be 227 * holding a normal reference in order to be able to safely call vx_lock() 228 * and vx_unlock(). vx_get() and vx_put() are combination functions which 229 * vref+vx_lock and vrele+vx_unlock. 230 */ 231 232 #define VXLOCKFLAGS (LK_EXCLUSIVE|LK_RETRY) 233 #define VXLOCKFLAGS_NB (LK_EXCLUSIVE|LK_NOWAIT) 234 235 static int 236 __vxlock(struct vnode *vp, int flags) 237 { 238 return(lockmgr(&vp->v_lock, flags, NULL, curthread)); 239 } 240 241 static void 242 __vxunlock(struct vnode *vp) 243 { 244 lockmgr(&vp->v_lock, LK_RELEASE, NULL, curthread); 245 } 246 247 int 248 vx_lock(struct vnode *vp) 249 { 250 return(__vxlock(vp, VXLOCKFLAGS)); 251 } 252 253 void 254 vx_unlock(struct vnode *vp) 255 { 256 __vxunlock(vp); 257 } 258 259 int 260 vx_get(struct vnode *vp) 261 { 262 int error; 263 264 vref(vp); 265 if ((error = __vxlock(vp, VXLOCKFLAGS)) != 0) 266 vrele(vp); 267 return(error); 268 } 269 270 int 271 vx_get_nonblock(struct vnode *vp) 272 { 273 int error; 274 275 vref(vp); 276 if ((error = __vxlock(vp, VXLOCKFLAGS_NB)) != 0) 277 vrele(vp); 278 return(error); 279 } 280 281 void 282 vx_put(struct vnode *vp) 283 { 284 __vxunlock(vp); 285 vrele(vp); 286 } 287 288 /**************************************************************** 289 * VNODE ACQUISITION FUNCTIONS * 290 **************************************************************** 291 * 292 * vget() and vput() access a vnode for the intent of executing an 293 * operation other then a reclamation or deactivation. vget() will ref 294 * and lock the vnode, vput() will unlock and deref the vnode. 295 * The VOP_*() locking functions are used. 296 * 297 * Special cases: If vget()'s locking operation fails the vrele() call may 298 * cause the vnode to be deactivated (VOP_INACTIVE called). However, this 299 * never occurs if the vnode is in a reclaimed state. Vnodes in reclaimed 300 * states always return an error code of ENOENT. 301 * 302 * Special cases: vput() will unlock and, if it is the last reference, 303 * deactivate the vnode. The deactivation uses a separate non-layered 304 * VX lock after the normal unlock. XXX make it more efficient. 305 */ 306 int 307 vget(struct vnode *vp, int flags, thread_t td) 308 { 309 int error; 310 311 crit_enter(); 312 __vref(vp); 313 if (flags & LK_TYPE_MASK) { 314 if ((error = vn_lock(vp, flags, td)) != 0) { 315 vrele(vp); 316 } else if (vp->v_flag & VRECLAIMED) { 317 VOP_UNLOCK(vp, 0, td); 318 vrele(vp); 319 error = ENOENT; 320 } else { 321 vp->v_flag &= ~VINACTIVE; 322 error = 0; 323 } 324 } else { 325 panic("vget() called with no lock specified!"); 326 error = ENOENT; /* not reached, compiler opt */ 327 } 328 crit_exit(); 329 return(error); 330 } 331 332 void 333 vput(struct vnode *vp) 334 { 335 VOP_UNLOCK(vp, 0, curthread); 336 vrele(vp); 337 } 338 339 void 340 vsetflags(struct vnode *vp, int flags) 341 { 342 crit_enter(); 343 vp->v_flag |= flags; 344 crit_exit(); 345 } 346 347 void 348 vclrflags(struct vnode *vp, int flags) 349 { 350 crit_enter(); 351 vp->v_flag &= ~flags; 352 crit_exit(); 353 } 354 355 /* 356 * Obtain a new vnode from the freelist, allocating more if necessary. 357 * The returned vnode is VX locked & refd. 358 */ 359 struct vnode * 360 allocvnode(int lktimeout, int lkflags) 361 { 362 struct thread *td; 363 struct vnode *vp; 364 365 /* 366 * Try to reuse vnodes if we hit the max. This situation only 367 * occurs in certain large-memory (2G+) situations. We cannot 368 * attempt to directly reclaim vnodes due to nasty recursion 369 * problems. 370 */ 371 while (numvnodes - freevnodes > desiredvnodes) 372 vnlru_proc_wait(); 373 374 td = curthread; 375 vp = NULL; 376 377 /* 378 * Attempt to reuse a vnode already on the free list, allocating 379 * a new vnode if we can't find one or if we have not reached a 380 * good minimum for good LRU performance. 381 */ 382 if (freevnodes >= wantfreevnodes && numvnodes >= minvnodes) { 383 int count; 384 385 for (count = 0; count < freevnodes; count++) { 386 /* 387 * __VNODESCAN__ 388 * 389 * Pull the next vnode off the free list and do some 390 * sanity checks. Note that regardless of how we 391 * block, if freevnodes is non-zero there had better 392 * be something on the list. 393 */ 394 vp = TAILQ_FIRST(&vnode_free_list); 395 if (vp == NULL) 396 panic("getnewvnode: free vnode isn't"); 397 398 /* 399 * Note the lack of a critical section. We vx_get() 400 * the vnode before we check it for validity, reducing 401 * the number of checks we have to make. The vx_get() 402 * will pull it off the freelist. 403 */ 404 if (vx_get(vp)) { 405 vp = NULL; 406 continue; 407 } 408 409 /* 410 * Can this vnode be recycled? It must be in a 411 * VINACTIVE state with only our reference to it. 412 * (vx_get(), unlike vget(), does not reactivate 413 * the vnode). vx_put() will recycle it onto the 414 * end of the freelist. 415 */ 416 if ((vp->v_flag & VINACTIVE) == 0 || 417 vp->v_holdcnt || vp->v_usecount != 1) { 418 vx_put(vp); 419 vp = NULL; 420 continue; 421 } 422 423 /* 424 * Ok, we can reclaim the vnode if it isn't already 425 * in a reclaimed state. If the reclamation fails, 426 * or if someone else is referencing the vnode after 427 * we have vgone()'d it, we recycle the vnode on the 428 * freelist or hold it (by calling vx_put()). 429 */ 430 if ((vp->v_flag & VRECLAIMED) == 0) { 431 vgone(vp); 432 if ((vp->v_flag & VRECLAIMED) == 0 || 433 vp->v_holdcnt || vp->v_usecount != 1) { 434 vx_put(vp); 435 vp = NULL; 436 continue; 437 } 438 } 439 KKASSERT(vp->v_flag & VINACTIVE); 440 441 /* 442 * We have a vnode! 443 */ 444 break; 445 } 446 } 447 448 /* 449 * If we have a vp it will be refd and VX locked. 450 */ 451 if (vp) { 452 vp->v_lease = NULL; 453 454 #ifdef INVARIANTS 455 if (vp->v_data) 456 panic("cleaned vnode isn't"); 457 if (vp->v_numoutput) 458 panic("Clean vnode has pending I/O's"); 459 KKASSERT(vp->v_mount == NULL); 460 #endif 461 vp->v_flag = 0; 462 vp->v_lastw = 0; 463 vp->v_lasta = 0; 464 vp->v_cstart = 0; 465 vp->v_clen = 0; 466 vp->v_socket = 0; 467 vp->v_writecount = 0; /* XXX */ 468 lockreinit(&vp->v_lock, 0, "vnode", lktimeout, lkflags); 469 KKASSERT(TAILQ_FIRST(&vp->v_namecache) == NULL); 470 } else { 471 /* 472 * A brand-new vnode (we could use malloc() here I think) XXX 473 */ 474 vp = malloc(sizeof(struct vnode), M_VNODE, M_WAITOK|M_ZERO); 475 lwkt_token_init(&vp->v_pollinfo.vpi_token); 476 lockinit(&vp->v_lock, 0, "vnode", lktimeout, lkflags); 477 TAILQ_INIT(&vp->v_namecache); 478 479 /* 480 * short cut around vfreeing it and looping, just set it up 481 * as if we had pulled a reclaimed vnode off the freelist 482 * and reinitialized it. 483 */ 484 vp->v_usecount = 1; 485 if (__vxlock(vp, VXLOCKFLAGS)) 486 panic("getnewvnode: __vxlock failed"); 487 numvnodes++; 488 } 489 490 TAILQ_INIT(&vp->v_cleanblkhd); 491 TAILQ_INIT(&vp->v_dirtyblkhd); 492 vp->v_type = VNON; 493 vp->v_tag = 0; 494 vp->v_ops = NULL; 495 vp->v_data = NULL; 496 KKASSERT(vp->v_mount == NULL); 497 return (vp); 498 } 499 500 struct vnode * 501 allocvnode_placemarker(void) 502 { 503 struct vnode *pvp; 504 505 pvp = malloc(sizeof(struct vnode), 506 M_VNODE, M_WAITOK|M_USE_RESERVE|M_ZERO); 507 pvp->v_flag |= VPLACEMARKER; 508 return(pvp); 509 } 510 511 void 512 freevnode_placemarker(struct vnode *pvp) 513 { 514 KKASSERT(pvp->v_flag & VPLACEMARKER); 515 free(pvp, M_VNODE); 516 } 517 518