xref: /dragonfly/sys/kern/vfs_lock.c (revision f9993810)
1 /*
2  * Copyright (c) 2004,2013-2022 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * External lock/ref-related vnode functions
37  *
38  * vs_state transition locking requirements:
39  *
40  *	INACTIVE -> CACHED|DYING	vx_lock(excl) + vi->spin
41  *	DYING    -> CACHED		vx_lock(excl)
42  *	ACTIVE   -> INACTIVE		(none)       + v_spin + vi->spin
43  *	INACTIVE -> ACTIVE		vn_lock(any) + v_spin + vi->spin
44  *	CACHED   -> ACTIVE		vn_lock(any) + v_spin + vi->spin
45  *
46  * NOTE: Switching to/from ACTIVE/INACTIVE requires v_spin and vi->spin,
47  *
48  *	 Switching into ACTIVE also requires a vref and vnode lock, however
49  *	 the vnode lock is allowed to be SHARED.
50  *
51  *	 Switching into a CACHED or DYING state requires an exclusive vnode
52  *	 lock or vx_lock (which is almost the same thing but not quite).
53  */
54 
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/kernel.h>
58 #include <sys/malloc.h>
59 #include <sys/mount.h>
60 #include <sys/proc.h>
61 #include <sys/vnode.h>
62 #include <sys/spinlock2.h>
63 #include <sys/sysctl.h>
64 
65 #include <machine/limits.h>
66 
67 #include <vm/vm.h>
68 #include <vm/vm_object.h>
69 
70 #define VACT_MAX	10
71 #define VACT_INC	2
72 
73 static void vnode_terminate(struct vnode *vp);
74 
75 static MALLOC_DEFINE_OBJ(M_VNODE, sizeof(struct vnode), "vnodes", "vnodes");
76 static MALLOC_DEFINE(M_VNODE_HASH, "vnodelsthash", "vnode list hash");
77 
78 /*
79  * The vnode free list hold inactive vnodes.  Aged inactive vnodes
80  * are inserted prior to the mid point, and otherwise inserted
81  * at the tail.
82  *
83  * The vnode code goes to great lengths to avoid moving vnodes between
84  * lists, but sometimes it is unavoidable.  For this situation we try to
85  * avoid lock contention but we do not try very hard to avoid cache line
86  * congestion.  A modestly sized hash table is used.
87  */
88 #define VLIST_PRIME2	123462047LU
89 #define VLIST_XOR	(uintptr_t)0xab4582fa8322fb71LLU
90 
91 #define VLIST_HASH(vp)	(((uintptr_t)vp ^ VLIST_XOR) % \
92 			 VLIST_PRIME2 % (unsigned)ncpus)
93 
94 static struct vnode_index *vnode_list_hash;
95 
96 int  activevnodes = 0;
97 SYSCTL_INT(_debug, OID_AUTO, activevnodes, CTLFLAG_RD,
98 	&activevnodes, 0, "Number of active nodes");
99 int  cachedvnodes = 0;
100 SYSCTL_INT(_debug, OID_AUTO, cachedvnodes, CTLFLAG_RD,
101 	&cachedvnodes, 0, "Number of total cached nodes");
102 int  inactivevnodes = 0;
103 SYSCTL_INT(_debug, OID_AUTO, inactivevnodes, CTLFLAG_RD,
104 	&inactivevnodes, 0, "Number of inactive nodes");
105 static int batchfreevnodes = 5;
106 SYSCTL_INT(_debug, OID_AUTO, batchfreevnodes, CTLFLAG_RW,
107 	&batchfreevnodes, 0, "Number of vnodes to free at once");
108 
109 static long auxrecovervnodes1;
110 SYSCTL_INT(_debug, OID_AUTO, auxrecovervnodes1, CTLFLAG_RW,
111         &auxrecovervnodes1, 0, "vnlru auxillary vnodes recovered");
112 static long auxrecovervnodes2;
113 SYSCTL_INT(_debug, OID_AUTO, auxrecovervnodes2, CTLFLAG_RW,
114         &auxrecovervnodes2, 0, "vnlru auxillary vnodes recovered");
115 
116 #ifdef TRACKVNODE
117 static u_long trackvnode;
118 SYSCTL_ULONG(_debug, OID_AUTO, trackvnode, CTLFLAG_RW,
119 		&trackvnode, 0, "");
120 #endif
121 
122 /*
123  * Called from vfsinit()
124  */
125 void
126 vfs_lock_init(void)
127 {
128 	int i;
129 
130 	kmalloc_obj_raise_limit(M_VNODE, 0);	/* unlimited */
131 	vnode_list_hash = kmalloc(sizeof(*vnode_list_hash) * ncpus,
132 				  M_VNODE_HASH, M_ZERO | M_WAITOK);
133 	for (i = 0; i < ncpus; ++i) {
134 		struct vnode_index *vi = &vnode_list_hash[i];
135 
136 		TAILQ_INIT(&vi->inactive_list);
137 		TAILQ_INIT(&vi->active_list);
138 		TAILQ_INSERT_TAIL(&vi->active_list, &vi->active_rover, v_list);
139 		spin_init(&vi->spin, "vfslock");
140 	}
141 }
142 
143 /*
144  * Misc functions
145  */
146 static __inline
147 void
148 _vsetflags(struct vnode *vp, int flags)
149 {
150 	atomic_set_int(&vp->v_flag, flags);
151 }
152 
153 static __inline
154 void
155 _vclrflags(struct vnode *vp, int flags)
156 {
157 	atomic_clear_int(&vp->v_flag, flags);
158 }
159 
160 void
161 vsetflags(struct vnode *vp, int flags)
162 {
163 	_vsetflags(vp, flags);
164 }
165 
166 void
167 vclrflags(struct vnode *vp, int flags)
168 {
169 	_vclrflags(vp, flags);
170 }
171 
172 /*
173  * Place the vnode on the active list.
174  *
175  * Caller must hold vp->v_spin
176  */
177 static __inline
178 void
179 _vactivate(struct vnode *vp)
180 {
181 	struct vnode_index *vi = &vnode_list_hash[VLIST_HASH(vp)];
182 
183 #ifdef TRACKVNODE
184 	if ((u_long)vp == trackvnode)
185 		kprintf("_vactivate %p %08x\n", vp, vp->v_flag);
186 #endif
187 	spin_lock(&vi->spin);
188 
189 	switch(vp->v_state) {
190 	case VS_ACTIVE:
191 		spin_unlock(&vi->spin);
192 		panic("_vactivate: already active");
193 		/* NOT REACHED */
194 		return;
195 	case VS_INACTIVE:
196 		TAILQ_REMOVE(&vi->inactive_list, vp, v_list);
197 		atomic_add_int(&mycpu->gd_inactivevnodes, -1);
198 		break;
199 	case VS_CACHED:
200 	case VS_DYING:
201 		break;
202 	}
203 	TAILQ_INSERT_TAIL(&vi->active_list, vp, v_list);
204 	vp->v_state = VS_ACTIVE;
205 	spin_unlock(&vi->spin);
206 	atomic_add_int(&mycpu->gd_activevnodes, 1);
207 }
208 
209 /*
210  * Put a vnode on the inactive list.
211  *
212  * Caller must hold v_spin
213  */
214 static __inline
215 void
216 _vinactive(struct vnode *vp)
217 {
218 	struct vnode_index *vi = &vnode_list_hash[VLIST_HASH(vp)];
219 
220 #ifdef TRACKVNODE
221 	if ((u_long)vp == trackvnode) {
222 		kprintf("_vinactive %p %08x\n", vp, vp->v_flag);
223 		print_backtrace(-1);
224 	}
225 #endif
226 	spin_lock(&vi->spin);
227 
228 	/*
229 	 * Remove from active list if it is sitting on it
230 	 */
231 	switch(vp->v_state) {
232 	case VS_ACTIVE:
233 		TAILQ_REMOVE(&vi->active_list, vp, v_list);
234 		atomic_add_int(&mycpu->gd_activevnodes, -1);
235 		break;
236 	case VS_INACTIVE:
237 		spin_unlock(&vi->spin);
238 		panic("_vinactive: already inactive");
239 		/* NOT REACHED */
240 		return;
241 	case VS_CACHED:
242 	case VS_DYING:
243 		break;
244 	}
245 
246 	/*
247 	 * Distinguish between basically dead vnodes, vnodes with cached
248 	 * data, and vnodes without cached data.  A rover will shift the
249 	 * vnodes around as their cache status is lost.
250 	 */
251 	if (vp->v_flag & VRECLAIMED) {
252 		TAILQ_INSERT_HEAD(&vi->inactive_list, vp, v_list);
253 	} else {
254 		TAILQ_INSERT_TAIL(&vi->inactive_list, vp, v_list);
255 	}
256 	vp->v_state = VS_INACTIVE;
257 	spin_unlock(&vi->spin);
258 	atomic_add_int(&mycpu->gd_inactivevnodes, 1);
259 }
260 
261 /*
262  * Add a ref to an active vnode.  This function should never be called
263  * with an inactive vnode (use vget() instead), but might be called
264  * with other states.
265  */
266 void
267 vref(struct vnode *vp)
268 {
269 	KASSERT((VREFCNT(vp) > 0 && vp->v_state != VS_INACTIVE),
270 		("vref: bad refcnt %08x %d", vp->v_refcnt, vp->v_state));
271 	atomic_add_int(&vp->v_refcnt, 1);
272 }
273 
274 void
275 vref_special(struct vnode *vp)
276 {
277 	if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
278 		atomic_add_int(&mycpu->gd_cachedvnodes, -1);
279 }
280 
281 void
282 synchronizevnodecount(void)
283 {
284 	int nca = 0;
285 	int act = 0;
286 	int ina = 0;
287 	int i;
288 
289 	for (i = 0; i < ncpus; ++i) {
290 		globaldata_t gd = globaldata_find(i);
291 		nca += gd->gd_cachedvnodes;
292 		act += gd->gd_activevnodes;
293 		ina += gd->gd_inactivevnodes;
294 	}
295 	cachedvnodes = nca;
296 	activevnodes = act;
297 	inactivevnodes = ina;
298 }
299 
300 /*
301  * Count number of cached vnodes.  This is middling expensive so be
302  * careful not to make this call in the critical path.  Each cpu tracks
303  * its own accumulator.  The individual accumulators must be summed
304  * together to get an accurate value.
305  */
306 int
307 countcachedvnodes(void)
308 {
309 	int i;
310 	int n = 0;
311 
312 	for (i = 0; i < ncpus; ++i) {
313 		globaldata_t gd = globaldata_find(i);
314 		n += gd->gd_cachedvnodes;
315 	}
316 	return n;
317 }
318 
319 int
320 countcachedandinactivevnodes(void)
321 {
322 	int i;
323 	int n = 0;
324 
325 	for (i = 0; i < ncpus; ++i) {
326 		globaldata_t gd = globaldata_find(i);
327 		n += gd->gd_cachedvnodes + gd->gd_inactivevnodes;
328 	}
329 	return n;
330 }
331 
332 /*
333  * Release a ref on an active or inactive vnode.
334  *
335  * Caller has no other requirements.
336  *
337  * If VREF_FINALIZE is set this will deactivate the vnode on the 1->0
338  * transition, otherwise we leave the vnode in the active list and
339  * do a lockless transition to 0, which is very important for the
340  * critical path.
341  *
342  * (vrele() is not called when a vnode is being destroyed w/kfree)
343  */
344 void
345 vrele(struct vnode *vp)
346 {
347 	int count;
348 
349 #if 1
350 	count = vp->v_refcnt;
351 	cpu_ccfence();
352 
353 	for (;;) {
354 		KKASSERT((count & VREF_MASK) > 0);
355 		KKASSERT(vp->v_state == VS_ACTIVE ||
356 			 vp->v_state == VS_INACTIVE);
357 
358 		/*
359 		 * 2+ case
360 		 */
361 		if ((count & VREF_MASK) > 1) {
362 			if (atomic_fcmpset_int(&vp->v_refcnt,
363 					       &count, count - 1)) {
364 				break;
365 			}
366 			continue;
367 		}
368 
369 		/*
370 		 * 1->0 transition case must handle possible finalization.
371 		 * When finalizing we transition 1->0x40000000.  Note that
372 		 * cachedvnodes is only adjusted on transitions to ->0.
373 		 *
374 		 * WARNING! VREF_TERMINATE can be cleared at any point
375 		 *	    when the refcnt is non-zero (by vget()) and
376 		 *	    the vnode has not been reclaimed.  Thus
377 		 *	    transitions out of VREF_TERMINATE do not have
378 		 *	    to mess with cachedvnodes.
379 		 */
380 		if (count & VREF_FINALIZE) {
381 			vx_lock(vp);
382 			if (atomic_fcmpset_int(&vp->v_refcnt,
383 					      &count, VREF_TERMINATE)) {
384 				vnode_terminate(vp);
385 				break;
386 			}
387 			vx_unlock(vp);
388 		} else {
389 			if (atomic_fcmpset_int(&vp->v_refcnt, &count, 0)) {
390 				atomic_add_int(&mycpu->gd_cachedvnodes, 1);
391 				break;
392 			}
393 		}
394 		cpu_pause();
395 		/* retry */
396 	}
397 #else
398 	/*
399 	 * XXX NOT YET WORKING!  Multiple threads can reference the vnode
400 	 * after dropping their count, racing destruction, because this
401 	 * code is not directly transitioning from 1->VREF_FINALIZE.
402 	 */
403         /*
404          * Drop the ref-count.  On the 1->0 transition we check VREF_FINALIZE
405          * and attempt to acquire VREF_TERMINATE if set.  It is possible for
406          * concurrent vref/vrele to race and bounce 0->1, 1->0, etc, but
407          * only one will be able to transition the vnode into the
408          * VREF_TERMINATE state.
409          *
410          * NOTE: VREF_TERMINATE is *in* VREF_MASK, so the vnode may only enter
411          *       this state once.
412          */
413         count = atomic_fetchadd_int(&vp->v_refcnt, -1);
414         if ((count & VREF_MASK) == 1) {
415                 atomic_add_int(&mycpu->gd_cachedvnodes, 1);
416                 --count;
417                 while ((count & (VREF_MASK | VREF_FINALIZE)) == VREF_FINALIZE) {
418                         vx_lock(vp);
419                         if (atomic_fcmpset_int(&vp->v_refcnt,
420                                                &count, VREF_TERMINATE)) {
421                                 atomic_add_int(&mycpu->gd_cachedvnodes, -1);
422                                 vnode_terminate(vp);
423                                 break;
424                         }
425                         vx_unlock(vp);
426                 }
427         }
428 #endif
429 }
430 
431 /*
432  * Add an auxiliary data structure reference to the vnode.  Auxiliary
433  * references do not change the state of the vnode or prevent deactivation
434  * or reclamation of the vnode, but will prevent the vnode from being
435  * destroyed (kfree()'d).
436  *
437  * WARNING!  vhold() must not acquire v_spin.  The spinlock may or may not
438  *	     already be held by the caller.  vdrop() will clean up the
439  *	     free list state.
440  */
441 void
442 vhold(struct vnode *vp)
443 {
444 	atomic_add_int(&vp->v_auxrefs, 1);
445 }
446 
447 /*
448  * Remove an auxiliary reference from the vnode.
449  */
450 void
451 vdrop(struct vnode *vp)
452 {
453 	atomic_add_int(&vp->v_auxrefs, -1);
454 }
455 
456 /*
457  * Set VREF_FINALIZE to request that the vnode be inactivated
458  * as soon as possible (on the 1->0 transition of its refs).
459  *
460  * Caller must have a ref on the vnode.
461  *
462  * This function has no effect if the vnode is already in termination
463  * processing.
464  */
465 void
466 vfinalize(struct vnode *vp)
467 {
468 	if ((vp->v_refcnt & VREF_MASK) > 0)
469 		atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
470 }
471 
472 /*
473  * This function is called on the 1->0 transition (which is actually
474  * 1->VREF_TERMINATE) when VREF_FINALIZE is set, forcing deactivation
475  * of the vnode.
476  *
477  * Additional vrefs are allowed to race but will not result in a reentrant
478  * call to vnode_terminate() due to refcnt being VREF_TERMINATE.  This
479  * prevents additional 1->0 transitions.
480  *
481  * ONLY A VGET() CAN REACTIVATE THE VNODE.
482  *
483  * Caller must hold the VX lock.
484  *
485  * NOTE: v_mount may be NULL due to assigmment to dead_vnode_vops
486  *
487  * NOTE: The vnode may be marked inactive with dirty buffers
488  *	 or dirty pages in its cached VM object still present.
489  *
490  * NOTE: VS_FREE should not be set on entry (the vnode was expected to
491  *	 previously be active).  We lose control of the vnode the instant
492  *	 it is placed on the free list.
493  *
494  *	 The VX lock is required when transitioning to VS_CACHED but is
495  *	 not sufficient for the vshouldfree() interlocked test or when
496  *	 transitioning away from VS_CACHED.  v_spin is also required for
497  *	 those cases.
498  */
499 static
500 void
501 vnode_terminate(struct vnode *vp)
502 {
503 	KKASSERT(vp->v_state == VS_ACTIVE);
504 
505 	if ((vp->v_flag & VINACTIVE) == 0) {
506 		_vsetflags(vp, VINACTIVE);
507 		if (vp->v_mount)
508 			VOP_INACTIVE(vp);
509 	}
510 	spin_lock(&vp->v_spin);
511 	_vinactive(vp);
512 	spin_unlock(&vp->v_spin);
513 
514 	vx_unlock(vp);
515 }
516 
517 /****************************************************************
518  *			VX LOCKING FUNCTIONS			*
519  ****************************************************************
520  *
521  * These functions lock vnodes for reclamation and deactivation related
522  * activities.  The caller must already be holding some sort of reference
523  * on the vnode.
524  */
525 void
526 vx_lock(struct vnode *vp)
527 {
528 	lockmgr(&vp->v_lock, LK_EXCLUSIVE);
529 	spin_lock_update_only(&vp->v_spin);
530 }
531 
532 void
533 vx_unlock(struct vnode *vp)
534 {
535 	spin_unlock_update_only(&vp->v_spin);
536 	lockmgr(&vp->v_lock, LK_RELEASE);
537 }
538 
539 /*
540  * Downgrades a VX lock to a normal VN lock.  The lock remains EXCLUSIVE.
541  *
542  * Generally required after calling getnewvnode() if the intention is
543  * to return a normal locked vnode to the caller.
544  */
545 void
546 vx_downgrade(struct vnode *vp)
547 {
548 	spin_unlock_update_only(&vp->v_spin);
549 }
550 
551 /****************************************************************
552  *			VNODE ACQUISITION FUNCTIONS		*
553  ****************************************************************
554  *
555  * These functions must be used when accessing a vnode that has no
556  * chance of being destroyed in a SMP race.  That means the caller will
557  * usually either hold an auxiliary reference (such as the namecache)
558  * or hold some other lock that ensures that the vnode cannot be destroyed.
559  *
560  * These functions are MANDATORY for any code chain accessing a vnode
561  * whos activation state is not known.
562  *
563  * vget() can be called with LK_NOWAIT and will return EBUSY if the
564  * lock cannot be immediately acquired.
565  *
566  * vget()/vput() are used when reactivation is desired.
567  *
568  * vx_get() and vx_put() are used when reactivation is not desired.
569  */
570 int
571 vget(struct vnode *vp, int flags)
572 {
573 	int error;
574 
575 	/*
576 	 * A lock type must be passed
577 	 */
578 	if ((flags & LK_TYPE_MASK) == 0) {
579 		panic("vget() called with no lock specified!");
580 		/* NOT REACHED */
581 	}
582 
583 	/*
584 	 * Reference the structure and then acquire the lock.
585 	 *
586 	 * NOTE: The requested lock might be a shared lock and does
587 	 *	 not protect our access to the refcnt or other fields.
588 	 */
589 	if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
590 		atomic_add_int(&mycpu->gd_cachedvnodes, -1);
591 
592 	if ((error = vn_lock(vp, flags | LK_FAILRECLAIM)) != 0) {
593 		/*
594 		 * The lock failed, undo and return an error.  This will not
595 		 * normally trigger a termination.
596 		 */
597 		vrele(vp);
598 	} else if (vp->v_flag & VRECLAIMED) {
599 		/*
600 		 * The node is being reclaimed and cannot be reactivated
601 		 * any more, undo and return ENOENT.
602 		 */
603 		vn_unlock(vp);
604 		vrele(vp);
605 		error = ENOENT;
606 	} else if (vp->v_state == VS_ACTIVE) {
607 		/*
608 		 * A VS_ACTIVE vnode coupled with the fact that we have
609 		 * a vnode lock (even if shared) prevents v_state from
610 		 * changing.  Since the vnode is not in a VRECLAIMED state,
611 		 * we can safely clear VINACTIVE.
612 		 *
613 		 * It is possible for a shared lock to cause a race with
614 		 * another thread that is also in the process of clearing
615 		 * VREF_TERMINATE, meaning that we might return with it still
616 		 * set and then assert in a later vref().  The solution is to
617 		 * unconditionally clear VREF_TERMINATE here as well.
618 		 *
619 		 * NOTE! Multiple threads may clear VINACTIVE if this is
620 		 *	 shared lock.  This race is allowed.
621 		 */
622 		if (vp->v_flag & VINACTIVE)
623 			_vclrflags(vp, VINACTIVE);	/* SMP race ok */
624 		if (vp->v_act < VACT_MAX) {
625 			vp->v_act += VACT_INC;
626 			if (vp->v_act > VACT_MAX)	/* SMP race ok */
627 				vp->v_act = VACT_MAX;
628 		}
629 		error = 0;
630 		if (vp->v_refcnt & VREF_TERMINATE)	/* SMP race ok */
631 			atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE);
632 	} else {
633 		/*
634 		 * If the vnode is not VS_ACTIVE it must be reactivated
635 		 * in addition to clearing VINACTIVE.  An exclusive spin_lock
636 		 * is needed to manipulate the vnode's list.
637 		 *
638 		 * Because the lockmgr lock might be shared, we might race
639 		 * another reactivation, which we handle.  In this situation,
640 		 * however, the refcnt prevents other v_state races.
641 		 *
642 		 * As with above, clearing VINACTIVE is allowed to race other
643 		 * clearings of VINACTIVE.
644 		 *
645 		 * VREF_TERMINATE and VREF_FINALIZE can only be cleared when
646 		 * the refcnt is non-zero and the vnode has not been
647 		 * reclaimed.  This also means that the transitions do
648 		 * not affect cachedvnodes.
649 		 *
650 		 * It is possible for a shared lock to cause a race with
651 		 * another thread that is also in the process of clearing
652 		 * VREF_TERMINATE, meaning that we might return with it still
653 		 * set and then assert in a later vref().  The solution is to
654 		 * unconditionally clear VREF_TERMINATE here as well.
655 		 */
656 		_vclrflags(vp, VINACTIVE);
657 		vp->v_act += VACT_INC;
658 		if (vp->v_act > VACT_MAX)	/* SMP race ok */
659 			vp->v_act = VACT_MAX;
660 		spin_lock(&vp->v_spin);
661 
662 		switch(vp->v_state) {
663 		case VS_INACTIVE:
664 			_vactivate(vp);
665 			atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE |
666 							VREF_FINALIZE);
667 			spin_unlock(&vp->v_spin);
668 			break;
669 		case VS_CACHED:
670 			_vactivate(vp);
671 			atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE |
672 							VREF_FINALIZE);
673 			spin_unlock(&vp->v_spin);
674 			break;
675 		case VS_ACTIVE:
676 			atomic_clear_int(&vp->v_refcnt, VREF_FINALIZE |
677 							VREF_TERMINATE);
678 			spin_unlock(&vp->v_spin);
679 			break;
680 		case VS_DYING:
681 			spin_unlock(&vp->v_spin);
682 			panic("Impossible VS_DYING state");
683 			break;
684 		}
685 		error = 0;
686 	}
687 	return(error);
688 }
689 
690 #ifdef DEBUG_VPUT
691 
692 void
693 debug_vput(struct vnode *vp, const char *filename, int line)
694 {
695 	kprintf("vput(%p) %s:%d\n", vp, filename, line);
696 	vn_unlock(vp);
697 	vrele(vp);
698 }
699 
700 #else
701 
702 void
703 vput(struct vnode *vp)
704 {
705 	vn_unlock(vp);
706 	vrele(vp);
707 }
708 
709 #endif
710 
711 /*
712  * Acquire the vnode lock unguarded.
713  *
714  * The non-blocking version also uses a slightly different mechanic.
715  * This function will explicitly fail not only if it cannot acquire
716  * the lock normally, but also if the caller already holds a lock.
717  *
718  * The adjusted mechanic is used to close a loophole where complex
719  * VOP_RECLAIM code can circle around recursively and allocate the
720  * same vnode it is trying to destroy from the freelist.
721  *
722  * Any filesystem (aka UFS) which puts LK_CANRECURSE in lk_flags can
723  * cause the incorrect behavior to occur.  If not for that lockmgr()
724  * would do the right thing.
725  *
726  * XXX The vx_*() locks should use auxrefs, not the main reference counter.
727  */
728 void
729 vx_get(struct vnode *vp)
730 {
731 	if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
732 		atomic_add_int(&mycpu->gd_cachedvnodes, -1);
733 	lockmgr(&vp->v_lock, LK_EXCLUSIVE);
734 	spin_lock_update_only(&vp->v_spin);
735 }
736 
737 int
738 vx_get_nonblock(struct vnode *vp)
739 {
740 	int error;
741 
742 	if (lockinuse(&vp->v_lock))
743 		return(EBUSY);
744 	error = lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT);
745 	if (error == 0) {
746 		spin_lock_update_only(&vp->v_spin);
747 		if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
748 			atomic_add_int(&mycpu->gd_cachedvnodes, -1);
749 	}
750 	return(error);
751 }
752 
753 /*
754  * Release a VX lock that also held a ref on the vnode.  vrele() will handle
755  * any needed state transitions.
756  *
757  * However, filesystems use this function to get rid of unwanted new vnodes
758  * so try to get the vnode on the correct queue in that case.
759  */
760 void
761 vx_put(struct vnode *vp)
762 {
763 	if (vp->v_type == VNON || vp->v_type == VBAD)
764 		atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
765 	spin_unlock_update_only(&vp->v_spin);
766 	lockmgr(&vp->v_lock, LK_RELEASE);
767 	vrele(vp);
768 }
769 
770 /*
771  * Try to reuse a vnode from the free list.  This function is somewhat
772  * advisory in that NULL can be returned as a normal case, even if free
773  * vnodes are present.
774  *
775  * The scan is limited because it can result in excessive CPU use during
776  * periods of extreme vnode use.
777  *
778  * NOTE: The returned vnode is not completely initialized.
779  *	 The returned vnode will be VX locked.
780  */
781 static
782 struct vnode *
783 cleanfreevnode(int maxcount)
784 {
785 	struct vnode_index *vi;
786 	struct vnode *vp;
787 	int count;
788 	int trigger = (long)vmstats.v_page_count / (activevnodes * 2 + 1);
789 	int ri;
790 	int cpu_count;
791 	int cachedvnodes;
792 
793 	/*
794 	 * Try to deactivate some vnodes cached on the active list.  We
795 	 * generally want a 50-50 balance active vs inactive.
796 	 */
797 	cachedvnodes = countcachedvnodes();
798 	if (cachedvnodes < inactivevnodes)
799 		goto skip;
800 
801 	ri = vnode_list_hash[mycpu->gd_cpuid].deac_rover + 1;
802 
803 	for (count = 0; count < maxcount * 2; ++count, ++ri) {
804 		vi = &vnode_list_hash[((unsigned)ri >> 4) % ncpus];
805 
806 		spin_lock(&vi->spin);
807 
808 		vp = TAILQ_NEXT(&vi->active_rover, v_list);
809 		TAILQ_REMOVE(&vi->active_list, &vi->active_rover, v_list);
810 		if (vp == NULL) {
811 			TAILQ_INSERT_HEAD(&vi->active_list,
812 					  &vi->active_rover, v_list);
813 		} else {
814 			TAILQ_INSERT_AFTER(&vi->active_list, vp,
815 					   &vi->active_rover, v_list);
816 		}
817 		if (vp == NULL) {
818 			spin_unlock(&vi->spin);
819 			continue;
820 		}
821 
822 		/*
823 		 * Don't try to deactivate if someone has the vp referenced.
824 		 */
825 		if ((vp->v_refcnt & VREF_MASK) != 0) {
826 			spin_unlock(&vi->spin);
827 			vp->v_act += VACT_INC;
828 			if (vp->v_act > VACT_MAX)	/* SMP race ok */
829 				vp->v_act = VACT_MAX;
830 			continue;
831 		}
832 
833 		/*
834 		 * Calculate the deactivation weight.  Reduce v_act less
835 		 * if the vnode's object has a lot of VM pages.
836 		 *
837 		 * XXX obj race
838 		 */
839 		if (vp->v_act > 0) {
840 			vm_object_t obj;
841 
842 			if ((obj = vp->v_object) != NULL &&
843 			    obj->resident_page_count >= trigger)
844 			{
845 				vp->v_act -= 1;
846 			} else {
847 				vp->v_act -= VACT_INC;
848 			}
849 			if (vp->v_act < 0)
850 				vp->v_act = 0;
851 			spin_unlock(&vi->spin);
852 			continue;
853 		}
854 
855 		/*
856 		 * If v_auxrefs is not the expected value the vnode might
857 		 * reside in the namecache topology on an internal node and
858 		 * not at a leaf.  v_auxrefs can be wrong for other reasons,
859 		 * but this is the most likely.
860 		 *
861 		 * Such vnodes will not be recycled by vnlru later on in
862 		 * its inactive scan, so try to make the vnode presentable
863 		 * and only move it to the inactive queue if we can.
864 		 *
865 		 * On success, the vnode is disconnected from the namecache
866 		 * topology entirely, making vnodes above it in the topology
867 		 * recycleable.  This will allow the active scan to continue
868 		 * to make progress in balancing the active and inactive
869 		 * lists.
870 		 */
871 		if (vp->v_auxrefs != vp->v_namecache_count) {
872 			if (vx_get_nonblock(vp) == 0) {
873 				spin_unlock(&vi->spin);
874 				if ((vp->v_refcnt & VREF_MASK) == 1)
875 					cache_inval_vp_quick(vp);
876 				if (vp->v_auxrefs == vp->v_namecache_count)
877 					++auxrecovervnodes1;
878 				vx_put(vp);
879 			} else {
880 				spin_unlock(&vi->spin);
881 			}
882 			continue;
883 		}
884 
885 		/*
886 		 * Try to deactivate the vnode.  It is ok if v_auxrefs
887 		 * races every once in a while, we just don't want an
888 		 * excess of unreclaimable vnodes on the inactive list.
889 		 */
890 		if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
891 			atomic_add_int(&mycpu->gd_cachedvnodes, -1);
892 		atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
893 
894 		spin_unlock(&vi->spin);
895 		vrele(vp);
896 	}
897 
898 	vnode_list_hash[mycpu->gd_cpuid].deac_rover = ri;
899 
900 skip:
901 	/*
902 	 * Loop trying to lock the first vnode on the free list.
903 	 * Cycle if we can't.
904 	 */
905 	cpu_count = ncpus;
906 	ri = vnode_list_hash[mycpu->gd_cpuid].free_rover + 1;
907 
908 	for (count = 0; count < maxcount; ++count, ++ri) {
909 		vi = &vnode_list_hash[((unsigned)ri >> 4) % ncpus];
910 
911 		spin_lock(&vi->spin);
912 
913 		vp = TAILQ_FIRST(&vi->inactive_list);
914 		if (vp == NULL) {
915 			spin_unlock(&vi->spin);
916 			if (--cpu_count == 0)
917 				break;
918 			ri = (ri + 16) & ~15;
919 			--ri;
920 			continue;
921 		}
922 
923 		/*
924 		 * non-blocking vx_get will also ref the vnode on success.
925 		 */
926 		if (vx_get_nonblock(vp)) {
927 			KKASSERT(vp->v_state == VS_INACTIVE);
928 			TAILQ_REMOVE(&vi->inactive_list, vp, v_list);
929 			TAILQ_INSERT_TAIL(&vi->inactive_list, vp, v_list);
930 			spin_unlock(&vi->spin);
931 			continue;
932 		}
933 
934 		/*
935 		 * Because we are holding vfs_spin the vnode should currently
936 		 * be inactive and VREF_TERMINATE should still be set.
937 		 *
938 		 * Once vfs_spin is released the vnode's state should remain
939 		 * unmodified due to both the lock and ref on it.
940 		 */
941 		KKASSERT(vp->v_state == VS_INACTIVE);
942 		spin_unlock(&vi->spin);
943 #ifdef TRACKVNODE
944 		if ((u_long)vp == trackvnode)
945 			kprintf("cleanfreevnode %p %08x\n", vp, vp->v_flag);
946 #endif
947 
948 		/*
949 		 * The active scan already did this, but some leakage can
950 		 * happen.  Don't let an easily recycleable vnode go to
951 		 * waste!
952 		 */
953 		if (vp->v_auxrefs != vp->v_namecache_count &&
954 		    (vp->v_refcnt & ~VREF_FINALIZE) == VREF_TERMINATE + 1)
955 		{
956 			cache_inval_vp_quick(vp);
957 			if (vp->v_auxrefs == vp->v_namecache_count)
958 				++auxrecovervnodes2;
959 		}
960 
961 		/*
962 		 * Do not reclaim/reuse a vnode while auxillary refs exists.
963 		 * This includes namecache refs due to a related ncp being
964 		 * locked or having children, a VM object association, or
965 		 * other hold users.
966 		 *
967 		 * Do not reclaim/reuse a vnode if someone else has a real
968 		 * ref on it.  This can occur if a filesystem temporarily
969 		 * releases the vnode lock during VOP_RECLAIM.
970 		 */
971 		if (vp->v_auxrefs != vp->v_namecache_count ||
972 		    (vp->v_refcnt & ~VREF_FINALIZE) != VREF_TERMINATE + 1) {
973 failed:
974 			if (vp->v_state == VS_INACTIVE) {
975 				spin_lock(&vi->spin);
976 				if (vp->v_state == VS_INACTIVE) {
977 					TAILQ_REMOVE(&vi->inactive_list,
978 						     vp, v_list);
979 					TAILQ_INSERT_TAIL(&vi->inactive_list,
980 							  vp, v_list);
981 				}
982 				spin_unlock(&vi->spin);
983 			}
984 			vx_put(vp);
985 			continue;
986 		}
987 
988 		/*
989 		 * VINACTIVE and VREF_TERMINATE are expected to both be set
990 		 * for vnodes pulled from the inactive list, and cannot be
991 		 * changed while we hold the vx lock.
992 		 *
993 		 * Try to reclaim the vnode.
994 		 *
995 		 * The cache_inval_vp() can fail if any of the namecache
996 		 * elements are actively locked, preventing the vnode from
997 		 * bring reclaimed.  This is desired operation as it gives
998 		 * the namecache code certain guarantees just by holding
999 		 * a ncp.
1000 		 */
1001 		KKASSERT(vp->v_flag & VINACTIVE);
1002 		KKASSERT(vp->v_refcnt & VREF_TERMINATE);
1003 
1004 		if ((vp->v_flag & VRECLAIMED) == 0) {
1005 			if (cache_inval_vp_nonblock(vp))
1006 				goto failed;
1007 			vgone_vxlocked(vp);
1008 			/* vnode is still VX locked */
1009 		}
1010 
1011 		/*
1012 		 * At this point if there are no other refs or auxrefs on
1013 		 * the vnode with the inactive list locked, and we remove
1014 		 * the vnode from the inactive list, it should not be
1015 		 * possible for anyone else to access the vnode any more.
1016 		 *
1017 		 * Since the vnode is in a VRECLAIMED state, no new
1018 		 * namecache associations could have been made and the
1019 		 * vnode should have already been removed from its mountlist.
1020 		 *
1021 		 * Since we hold a VX lock on the vnode it cannot have been
1022 		 * reactivated (moved out of the inactive list).
1023 		 */
1024 		KKASSERT(TAILQ_EMPTY(&vp->v_namecache));
1025 		spin_lock(&vi->spin);
1026 		if (vp->v_auxrefs ||
1027 		    (vp->v_refcnt & ~VREF_FINALIZE) != VREF_TERMINATE + 1) {
1028 			spin_unlock(&vi->spin);
1029 			goto failed;
1030 		}
1031 		KKASSERT(vp->v_state == VS_INACTIVE);
1032 		TAILQ_REMOVE(&vi->inactive_list, vp, v_list);
1033 		atomic_add_int(&mycpu->gd_inactivevnodes, -1);
1034 		vp->v_state = VS_DYING;
1035 		spin_unlock(&vi->spin);
1036 
1037 		/*
1038 		 * Nothing should have been able to access this vp.  Only
1039 		 * our ref should remain now.
1040 		 */
1041 		atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE|VREF_FINALIZE);
1042 		KASSERT(vp->v_refcnt == 1,
1043 			("vp %p badrefs %08x", vp, vp->v_refcnt));
1044 
1045 		/*
1046 		 * Return a VX locked vnode suitable for reuse.
1047 		 */
1048 		vnode_list_hash[mycpu->gd_cpuid].free_rover = ri;
1049 		return(vp);
1050 	}
1051 	vnode_list_hash[mycpu->gd_cpuid].free_rover = ri;
1052 	return(NULL);
1053 }
1054 
1055 /*
1056  * Obtain a new vnode.  The returned vnode is VX locked & vrefd.
1057  *
1058  * All new vnodes set the VAGE flags.  An open() of the vnode will
1059  * decrement the (2-bit) flags.  Vnodes which are opened several times
1060  * are thus retained in the cache over vnodes which are merely stat()d.
1061  *
1062  * We attempt to reuse an already-recycled vnode from our pcpu inactive
1063  * queue first, and allocate otherwise.  Attempting to recycle inactive
1064  * vnodes here can lead to numerous deadlocks, particularly with
1065  * softupdates.
1066  */
1067 struct vnode *
1068 allocvnode(int lktimeout, int lkflags)
1069 {
1070 	struct vnode *vp;
1071 	struct vnode_index *vi;
1072 
1073 	/*
1074 	 * lktimeout only applies when LK_TIMELOCK is used, and only
1075 	 * the pageout daemon uses it.  The timeout may not be zero
1076 	 * or the pageout daemon can deadlock in low-VM situations.
1077 	 */
1078 	if (lktimeout == 0)
1079 		lktimeout = hz / 10;
1080 
1081 	/*
1082 	 * Do not flag for synchronous recyclement unless there are enough
1083 	 * freeable vnodes to recycle and the number of vnodes has
1084 	 * significantly exceeded our target.  We want the normal vnlru
1085 	 * process to handle the cleaning (at 9/10's) before we are forced
1086 	 * to flag it here at 11/10's for userexit path processing.
1087 	 */
1088 	if (numvnodes >= maxvnodes * 11 / 10 &&
1089 	    cachedvnodes + inactivevnodes >= maxvnodes * 5 / 10) {
1090 		struct thread *td = curthread;
1091 		if (td->td_lwp)
1092 			atomic_set_int(&td->td_lwp->lwp_mpflags, LWP_MP_VNLRU);
1093 	}
1094 
1095 	/*
1096 	 * Try to trivially reuse a reclaimed vnode from the head of the
1097 	 * inactive list for this cpu.  Any vnode cycling which occurs
1098 	 * which terminates the vnode will cause it to be returned to the
1099 	 * same pcpu structure (e.g. unlink calls).
1100 	 */
1101 	vi = &vnode_list_hash[mycpuid];
1102 	spin_lock(&vi->spin);
1103 
1104 	vp = TAILQ_FIRST(&vi->inactive_list);
1105 	if (vp && (vp->v_flag & VRECLAIMED)) {
1106 		/*
1107 		 * non-blocking vx_get will also ref the vnode on success.
1108 		 */
1109 		if (vx_get_nonblock(vp)) {
1110 			KKASSERT(vp->v_state == VS_INACTIVE);
1111 			TAILQ_REMOVE(&vi->inactive_list, vp, v_list);
1112 			TAILQ_INSERT_TAIL(&vi->inactive_list, vp, v_list);
1113 			spin_unlock(&vi->spin);
1114 			goto slower;
1115 		}
1116 
1117 		/*
1118 		 * Because we are holding vfs_spin the vnode should currently
1119 		 * be inactive and VREF_TERMINATE should still be set.
1120 		 *
1121 		 * Once vfs_spin is released the vnode's state should remain
1122 		 * unmodified due to both the lock and ref on it.
1123 		 */
1124 		KKASSERT(vp->v_state == VS_INACTIVE);
1125 #ifdef TRACKVNODE
1126 		if ((u_long)vp == trackvnode)
1127 			kprintf("allocvnode %p %08x\n", vp, vp->v_flag);
1128 #endif
1129 
1130 		/*
1131 		 * Do not reclaim/reuse a vnode while auxillary refs exists.
1132 		 * This includes namecache refs due to a related ncp being
1133 		 * locked or having children, a VM object association, or
1134 		 * other hold users.
1135 		 *
1136 		 * Do not reclaim/reuse a vnode if someone else has a real
1137 		 * ref on it.  This can occur if a filesystem temporarily
1138 		 * releases the vnode lock during VOP_RECLAIM.
1139 		 */
1140 		if (vp->v_auxrefs ||
1141 		    (vp->v_refcnt & ~VREF_FINALIZE) != VREF_TERMINATE + 1) {
1142 			if (vp->v_state == VS_INACTIVE) {
1143 				TAILQ_REMOVE(&vi->inactive_list,
1144 					     vp, v_list);
1145 				TAILQ_INSERT_TAIL(&vi->inactive_list,
1146 						  vp, v_list);
1147 			}
1148 			spin_unlock(&vi->spin);
1149 			vx_put(vp);
1150 			goto slower;
1151 		}
1152 
1153 		/*
1154 		 * VINACTIVE and VREF_TERMINATE are expected to both be set
1155 		 * for vnodes pulled from the inactive list, and cannot be
1156 		 * changed while we hold the vx lock.
1157 		 *
1158 		 * Try to reclaim the vnode.
1159 		 */
1160 		KKASSERT(vp->v_flag & VINACTIVE);
1161 		KKASSERT(vp->v_refcnt & VREF_TERMINATE);
1162 
1163 		if ((vp->v_flag & VRECLAIMED) == 0) {
1164 			spin_unlock(&vi->spin);
1165 			vx_put(vp);
1166 			goto slower;
1167 		}
1168 
1169 		/*
1170 		 * At this point if there are no other refs or auxrefs on
1171 		 * the vnode with the inactive list locked, and we remove
1172 		 * the vnode from the inactive list, it should not be
1173 		 * possible for anyone else to access the vnode any more.
1174 		 *
1175 		 * Since the vnode is in a VRECLAIMED state, no new
1176 		 * namecache associations could have been made and the
1177 		 * vnode should have already been removed from its mountlist.
1178 		 *
1179 		 * Since we hold a VX lock on the vnode it cannot have been
1180 		 * reactivated (moved out of the inactive list).
1181 		 */
1182 		KKASSERT(TAILQ_EMPTY(&vp->v_namecache));
1183 		KKASSERT(vp->v_state == VS_INACTIVE);
1184 		TAILQ_REMOVE(&vi->inactive_list, vp, v_list);
1185 		atomic_add_int(&mycpu->gd_inactivevnodes, -1);
1186 		vp->v_state = VS_DYING;
1187 		spin_unlock(&vi->spin);
1188 
1189 		/*
1190 		 * Nothing should have been able to access this vp.  Only
1191 		 * our ref should remain now.
1192 		 *
1193 		 * At this point we can kfree() the vnode if we want to.
1194 		 * Instead, we reuse it for the allocation.
1195 		 */
1196 		atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE|VREF_FINALIZE);
1197 		KASSERT(vp->v_refcnt == 1,
1198 			("vp %p badrefs %08x", vp, vp->v_refcnt));
1199 		vx_unlock(vp);		/* safety: keep the API clean */
1200 		bzero(vp, sizeof(*vp));
1201 	} else {
1202 		spin_unlock(&vi->spin);
1203 slower:
1204 		vp = kmalloc_obj(sizeof(*vp), M_VNODE, M_ZERO | M_WAITOK);
1205 		atomic_add_int(&numvnodes, 1);
1206 	}
1207 
1208 	lwkt_token_init(&vp->v_token, "vnode");
1209 	lockinit(&vp->v_lock, "vnode", lktimeout, lkflags);
1210 	TAILQ_INIT(&vp->v_namecache);
1211 	RB_INIT(&vp->v_rbclean_tree);
1212 	RB_INIT(&vp->v_rbdirty_tree);
1213 	RB_INIT(&vp->v_rbhash_tree);
1214 	spin_init(&vp->v_spin, "allocvnode");
1215 
1216 	vx_lock(vp);
1217 	vp->v_refcnt = 1;
1218 	vp->v_flag = VAGE0 | VAGE1;
1219 	vp->v_pbuf_count = nswbuf_kva / NSWBUF_SPLIT;
1220 
1221 	KKASSERT(TAILQ_EMPTY(&vp->v_namecache));
1222 	/* exclusive lock still held */
1223 
1224 	vp->v_filesize = NOOFFSET;
1225 	vp->v_type = VNON;
1226 	vp->v_tag = 0;
1227 	vp->v_state = VS_CACHED;
1228 	_vactivate(vp);
1229 
1230 	return (vp);
1231 }
1232 
1233 /*
1234  * Called after a process has allocated a vnode via allocvnode()
1235  * and we detected that too many vnodes were present.
1236  *
1237  * This function is called just prior to a return to userland if the
1238  * process at some point had to allocate a new vnode during the last
1239  * system call and the vnode count was found to be excessive.
1240  *
1241  * This is a synchronous path that we do not normally want to execute.
1242  *
1243  * Flagged at >= 11/10's, runs if >= 10/10, vnlru runs at 9/10.
1244  *
1245  * WARNING: Sometimes numvnodes can blow out due to children being
1246  *	    present under directory vnodes in the namecache.  For the
1247  *	    moment use an if() instead of a while() and note that if
1248  *	    we were to use a while() we would still have to break out
1249  *	    if freesomevnodes() returned 0.  vnlru will also be trying
1250  *	    hard to free vnodes at the same time (with a lower trigger
1251  *	    pointer).
1252  */
1253 void
1254 allocvnode_gc(void)
1255 {
1256 	if (numvnodes >= maxvnodes &&
1257 	    countcachedandinactivevnodes() >= maxvnodes * 5 / 10)
1258 	{
1259 		freesomevnodes(batchfreevnodes);
1260 	}
1261 }
1262 
1263 int
1264 freesomevnodes(int n)
1265 {
1266 	struct vnode *vp;
1267 	int count = 0;
1268 
1269 	while (n) {
1270 		if ((vp = cleanfreevnode(n)) == NULL)
1271 			break;
1272 		vx_unlock(vp);
1273 		--n;
1274 		++count;
1275 		kfree_obj(vp, M_VNODE);
1276 		atomic_add_int(&numvnodes, -1);
1277 	}
1278 	return(count);
1279 }
1280