xref: /dragonfly/sys/vm/vm_swapcache.c (revision fdc53cc7)
1096e95c0SMatthew Dillon /*
2096e95c0SMatthew Dillon  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3096e95c0SMatthew Dillon  *
4096e95c0SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
5096e95c0SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
6096e95c0SMatthew Dillon  *
7096e95c0SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
8096e95c0SMatthew Dillon  * modification, are permitted provided that the following conditions
9096e95c0SMatthew Dillon  * are met:
10096e95c0SMatthew Dillon  *
11096e95c0SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
12096e95c0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
13096e95c0SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
14096e95c0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
15096e95c0SMatthew Dillon  *    the documentation and/or other materials provided with the
16096e95c0SMatthew Dillon  *    distribution.
17096e95c0SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
18096e95c0SMatthew Dillon  *    contributors may be used to endorse or promote products derived
19096e95c0SMatthew Dillon  *    from this software without specific, prior written permission.
20096e95c0SMatthew Dillon  *
21096e95c0SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22096e95c0SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23096e95c0SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24096e95c0SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25096e95c0SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26096e95c0SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27096e95c0SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28096e95c0SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29096e95c0SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30096e95c0SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31096e95c0SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32096e95c0SMatthew Dillon  * SUCH DAMAGE.
33096e95c0SMatthew Dillon  */
34096e95c0SMatthew Dillon 
35096e95c0SMatthew Dillon /*
36096e95c0SMatthew Dillon  * Implement the swapcache daemon.  When enabled swap is assumed to be
37096e95c0SMatthew Dillon  * configured on a fast storage device such as a SSD.  Swap is assigned
38096e95c0SMatthew Dillon  * to clean vnode-backed pages in the inactive queue, clustered by object
39096e95c0SMatthew Dillon  * if possible, and written out.  The swap assignment sticks around even
40096e95c0SMatthew Dillon  * after the underlying pages have been recycled.
41096e95c0SMatthew Dillon  *
42096e95c0SMatthew Dillon  * The daemon manages write bandwidth based on sysctl settings to control
43096e95c0SMatthew Dillon  * wear on the SSD.
44096e95c0SMatthew Dillon  *
45096e95c0SMatthew Dillon  * The vnode strategy code will check for the swap assignments and divert
463ffc7051SMatthew Dillon  * reads to the swap device when the data is present in the swapcache.
47096e95c0SMatthew Dillon  *
48096e95c0SMatthew Dillon  * This operates on both regular files and the block device vnodes used by
49096e95c0SMatthew Dillon  * filesystems to manage meta-data.
50096e95c0SMatthew Dillon  */
51096e95c0SMatthew Dillon 
52096e95c0SMatthew Dillon #include "opt_vm.h"
53096e95c0SMatthew Dillon #include <sys/param.h>
54096e95c0SMatthew Dillon #include <sys/systm.h>
55096e95c0SMatthew Dillon #include <sys/kernel.h>
56096e95c0SMatthew Dillon #include <sys/proc.h>
57096e95c0SMatthew Dillon #include <sys/kthread.h>
58096e95c0SMatthew Dillon #include <sys/resourcevar.h>
59096e95c0SMatthew Dillon #include <sys/signalvar.h>
60096e95c0SMatthew Dillon #include <sys/vnode.h>
61096e95c0SMatthew Dillon #include <sys/vmmeter.h>
62096e95c0SMatthew Dillon #include <sys/sysctl.h>
63096e95c0SMatthew Dillon 
64096e95c0SMatthew Dillon #include <vm/vm.h>
65096e95c0SMatthew Dillon #include <vm/vm_param.h>
66096e95c0SMatthew Dillon #include <sys/lock.h>
67096e95c0SMatthew Dillon #include <vm/vm_object.h>
68096e95c0SMatthew Dillon #include <vm/vm_page.h>
69096e95c0SMatthew Dillon #include <vm/vm_map.h>
70096e95c0SMatthew Dillon #include <vm/vm_pageout.h>
71096e95c0SMatthew Dillon #include <vm/vm_pager.h>
72096e95c0SMatthew Dillon #include <vm/swap_pager.h>
73096e95c0SMatthew Dillon #include <vm/vm_extern.h>
74096e95c0SMatthew Dillon 
75096e95c0SMatthew Dillon #include <sys/thread2.h>
76096e95c0SMatthew Dillon #include <vm/vm_page2.h>
77096e95c0SMatthew Dillon 
78096e95c0SMatthew Dillon #define INACTIVE_LIST	(&vm_page_queues[PQ_INACTIVE].pl)
79096e95c0SMatthew Dillon 
80096e95c0SMatthew Dillon /* the kernel process "vm_pageout"*/
81096e95c0SMatthew Dillon static void vm_swapcached (void);
823ffc7051SMatthew Dillon static int vm_swapcached_flush (vm_page_t m);
833ffc7051SMatthew Dillon static int vm_swapcache_test(vm_page_t m);
8400a3fdcaSMatthew Dillon static void vm_swapcache_writing(vm_page_t marker);
8500a3fdcaSMatthew Dillon static void vm_swapcache_cleaning(vm_object_t marker);
86096e95c0SMatthew Dillon struct thread *swapcached_thread;
87096e95c0SMatthew Dillon 
88096e95c0SMatthew Dillon static struct kproc_desc swpc_kp = {
89096e95c0SMatthew Dillon 	"swapcached",
90096e95c0SMatthew Dillon 	vm_swapcached,
91096e95c0SMatthew Dillon 	&swapcached_thread
92096e95c0SMatthew Dillon };
93096e95c0SMatthew Dillon SYSINIT(swapcached, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &swpc_kp)
94096e95c0SMatthew Dillon 
95096e95c0SMatthew Dillon SYSCTL_NODE(_vm, OID_AUTO, swapcache, CTLFLAG_RW, NULL, NULL);
96096e95c0SMatthew Dillon 
97c504e38eSMatthew Dillon int vm_swapcache_read_enable;
98e527fb6bSMatthew Dillon int vm_swapcache_inactive_heuristic;
99096e95c0SMatthew Dillon static int vm_swapcache_sleep;
1001e5196f0SMatthew Dillon static int vm_swapcache_maxlaunder = 256;
101096e95c0SMatthew Dillon static int vm_swapcache_data_enable = 0;
102096e95c0SMatthew Dillon static int vm_swapcache_meta_enable = 0;
103e9b56058SMatthew Dillon static int vm_swapcache_maxswappct = 75;
104e527fb6bSMatthew Dillon static int vm_swapcache_hysteresis;
105e9b56058SMatthew Dillon static int vm_swapcache_use_chflags = 1;	/* require chflags cache */
1063ffc7051SMatthew Dillon static int64_t vm_swapcache_minburst = 10000000LL;	/* 10MB */
1073ffc7051SMatthew Dillon static int64_t vm_swapcache_curburst = 4000000000LL;	/* 4G after boot */
1083ffc7051SMatthew Dillon static int64_t vm_swapcache_maxburst = 2000000000LL;	/* 2G nominal max */
1093ffc7051SMatthew Dillon static int64_t vm_swapcache_accrate = 100000LL;		/* 100K/s */
110096e95c0SMatthew Dillon static int64_t vm_swapcache_write_count;
1113ffc7051SMatthew Dillon static int64_t vm_swapcache_maxfilesize;
112096e95c0SMatthew Dillon 
113096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxlaunder,
114096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxlaunder, 0, "");
115c504e38eSMatthew Dillon 
116096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, data_enable,
117096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_data_enable, 0, "");
118096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, meta_enable,
119096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_meta_enable, 0, "");
120c504e38eSMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, read_enable,
121c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_read_enable, 0, "");
122e9b56058SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxswappct,
123e9b56058SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxswappct, 0, "");
124e527fb6bSMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, hysteresis,
125e527fb6bSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_hysteresis, 0, "");
126e9b56058SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, use_chflags,
127e9b56058SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_use_chflags, 0, "");
128c504e38eSMatthew Dillon 
1293ffc7051SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, minburst,
1303ffc7051SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_minburst, 0, "");
131c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, curburst,
132c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_curburst, 0, "");
133c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, maxburst,
134c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxburst, 0, "");
1353ffc7051SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, maxfilesize,
1363ffc7051SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxfilesize, 0, "");
137c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, accrate,
138c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_accrate, 0, "");
139096e95c0SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, write_count,
140096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_write_count, 0, "");
141096e95c0SMatthew Dillon 
142e9b56058SMatthew Dillon #define SWAPMAX(adj)	\
143e9b56058SMatthew Dillon 	((int64_t)vm_swap_max * (vm_swapcache_maxswappct + (adj)) / 100)
144e9b56058SMatthew Dillon 
145096e95c0SMatthew Dillon /*
146096e95c0SMatthew Dillon  * vm_swapcached is the high level pageout daemon.
147096e95c0SMatthew Dillon  */
148096e95c0SMatthew Dillon static void
149096e95c0SMatthew Dillon vm_swapcached(void)
150096e95c0SMatthew Dillon {
15100a3fdcaSMatthew Dillon 	enum { SWAPC_WRITING, SWAPC_CLEANING } state = SWAPC_WRITING;
1523ffc7051SMatthew Dillon 	enum { SWAPB_BURSTING, SWAPB_RECOVERING } burst = SWAPB_BURSTING;
15300a3fdcaSMatthew Dillon 	struct vm_page page_marker;
15400a3fdcaSMatthew Dillon 	struct vm_object object_marker;
155096e95c0SMatthew Dillon 
156096e95c0SMatthew Dillon 	/*
157096e95c0SMatthew Dillon 	 * Thread setup
158096e95c0SMatthew Dillon 	 */
159096e95c0SMatthew Dillon 	curthread->td_flags |= TDF_SYSTHREAD;
16000a3fdcaSMatthew Dillon 	crit_enter();
161096e95c0SMatthew Dillon 
162096e95c0SMatthew Dillon 	/*
16300a3fdcaSMatthew Dillon 	 * Initialize our marker for the inactive scan (SWAPC_WRITING)
164096e95c0SMatthew Dillon 	 */
16500a3fdcaSMatthew Dillon 	bzero(&page_marker, sizeof(page_marker));
16600a3fdcaSMatthew Dillon 	page_marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
16700a3fdcaSMatthew Dillon 	page_marker.queue = PQ_INACTIVE;
16800a3fdcaSMatthew Dillon 	page_marker.wire_count = 1;
16900a3fdcaSMatthew Dillon 	TAILQ_INSERT_HEAD(INACTIVE_LIST, &page_marker, pageq);
170e527fb6bSMatthew Dillon 	vm_swapcache_hysteresis = vmstats.v_inactive_target / 2;
171e527fb6bSMatthew Dillon 	vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
172096e95c0SMatthew Dillon 
17300a3fdcaSMatthew Dillon 	/*
17400a3fdcaSMatthew Dillon 	 * Initialize our marker for the vm_object scan (SWAPC_CLEANING)
17500a3fdcaSMatthew Dillon 	 */
17600a3fdcaSMatthew Dillon 	bzero(&object_marker, sizeof(object_marker));
17700a3fdcaSMatthew Dillon 	object_marker.type = OBJT_MARKER;
17800a3fdcaSMatthew Dillon 	TAILQ_INSERT_HEAD(&vm_object_list, &object_marker, object_list);
179096e95c0SMatthew Dillon 
180096e95c0SMatthew Dillon 	for (;;) {
181096e95c0SMatthew Dillon 		/*
1823da46bd7SMatthew Dillon 		 * Check every 5 seconds when not enabled or if no swap
1833da46bd7SMatthew Dillon 		 * is present.
184096e95c0SMatthew Dillon 		 */
1853da46bd7SMatthew Dillon 		if ((vm_swapcache_data_enable == 0 &&
1863da46bd7SMatthew Dillon 		     vm_swapcache_meta_enable == 0) ||
1873da46bd7SMatthew Dillon 		    vm_swap_max == 0) {
188096e95c0SMatthew Dillon 			tsleep(&vm_swapcache_sleep, 0, "csleep", hz * 5);
189096e95c0SMatthew Dillon 			continue;
190096e95c0SMatthew Dillon 		}
191c504e38eSMatthew Dillon 
192c504e38eSMatthew Dillon 		/*
1933da46bd7SMatthew Dillon 		 * Polling rate when enabled is approximately 10 hz.
194c504e38eSMatthew Dillon 		 */
195c504e38eSMatthew Dillon 		tsleep(&vm_swapcache_sleep, 0, "csleep", hz / 10);
19600a3fdcaSMatthew Dillon 
19700a3fdcaSMatthew Dillon 		/*
19800a3fdcaSMatthew Dillon 		 * State hysteresis.  Generate write activity up to 75% of
19900a3fdcaSMatthew Dillon 		 * swap, then clean out swap assignments down to 70%, then
20000a3fdcaSMatthew Dillon 		 * repeat.
20100a3fdcaSMatthew Dillon 		 */
20200a3fdcaSMatthew Dillon 		if (state == SWAPC_WRITING) {
203e9b56058SMatthew Dillon 			if (vm_swap_cache_use > SWAPMAX(0))
20400a3fdcaSMatthew Dillon 				state = SWAPC_CLEANING;
20500a3fdcaSMatthew Dillon 		} else {
206e9b56058SMatthew Dillon 			if (vm_swap_cache_use < SWAPMAX(-5))
20700a3fdcaSMatthew Dillon 				state = SWAPC_WRITING;
20800a3fdcaSMatthew Dillon 		}
20900a3fdcaSMatthew Dillon 
21000a3fdcaSMatthew Dillon 		/*
21100a3fdcaSMatthew Dillon 		 * We are allowed to continue accumulating burst value
2123ffc7051SMatthew Dillon 		 * in either state.  Allow the user to set curburst > maxburst
2133ffc7051SMatthew Dillon 		 * for the initial load-in.
21400a3fdcaSMatthew Dillon 		 */
2153ffc7051SMatthew Dillon 		if (vm_swapcache_curburst < vm_swapcache_maxburst) {
216c504e38eSMatthew Dillon 			vm_swapcache_curburst += vm_swapcache_accrate / 10;
217c504e38eSMatthew Dillon 			if (vm_swapcache_curburst > vm_swapcache_maxburst)
218c504e38eSMatthew Dillon 				vm_swapcache_curburst = vm_swapcache_maxburst;
2193ffc7051SMatthew Dillon 		}
220c504e38eSMatthew Dillon 
221c504e38eSMatthew Dillon 		/*
22200a3fdcaSMatthew Dillon 		 * We don't want to nickle-and-dime the scan as that will
22300a3fdcaSMatthew Dillon 		 * create unnecessary fragmentation.  The minimum burst
22400a3fdcaSMatthew Dillon 		 * is one-seconds worth of accumulation.
225c504e38eSMatthew Dillon 		 */
22600a3fdcaSMatthew Dillon 		if (state == SWAPC_WRITING) {
2273ffc7051SMatthew Dillon 			if (vm_swapcache_curburst >= vm_swapcache_accrate) {
2283ffc7051SMatthew Dillon 				if (burst == SWAPB_BURSTING) {
22900a3fdcaSMatthew Dillon 					vm_swapcache_writing(&page_marker);
2303ffc7051SMatthew Dillon 					if (vm_swapcache_curburst <= 0)
2313ffc7051SMatthew Dillon 						burst = SWAPB_RECOVERING;
2323ffc7051SMatthew Dillon 				} else if (vm_swapcache_curburst >
2333ffc7051SMatthew Dillon 					   vm_swapcache_minburst) {
2343ffc7051SMatthew Dillon 					vm_swapcache_writing(&page_marker);
2353ffc7051SMatthew Dillon 					burst = SWAPB_BURSTING;
2363ffc7051SMatthew Dillon 				}
2373ffc7051SMatthew Dillon 			}
23800a3fdcaSMatthew Dillon 		} else {
23900a3fdcaSMatthew Dillon 			vm_swapcache_cleaning(&object_marker);
24000a3fdcaSMatthew Dillon 		}
24100a3fdcaSMatthew Dillon 	}
24200a3fdcaSMatthew Dillon 	TAILQ_REMOVE(INACTIVE_LIST, &page_marker, pageq);
24300a3fdcaSMatthew Dillon 	TAILQ_REMOVE(&vm_object_list, &object_marker, object_list);
24400a3fdcaSMatthew Dillon 	crit_exit();
24500a3fdcaSMatthew Dillon }
246096e95c0SMatthew Dillon 
24700a3fdcaSMatthew Dillon static void
24800a3fdcaSMatthew Dillon vm_swapcache_writing(vm_page_t marker)
24900a3fdcaSMatthew Dillon {
25000a3fdcaSMatthew Dillon 	vm_object_t object;
25100a3fdcaSMatthew Dillon 	struct vnode *vp;
25200a3fdcaSMatthew Dillon 	vm_page_t m;
25300a3fdcaSMatthew Dillon 	int count;
254096e95c0SMatthew Dillon 
255096e95c0SMatthew Dillon 	/*
256*fdc53cc7SMatthew Dillon 	 * Deal with an overflow of the heuristic counter or if the user
257*fdc53cc7SMatthew Dillon 	 * manually changes the hysteresis.
258*fdc53cc7SMatthew Dillon 	 *
259e527fb6bSMatthew Dillon 	 * Try to avoid small incremental pageouts by waiting for enough
260e527fb6bSMatthew Dillon 	 * pages to buildup in the inactive queue to hopefully get a good
261e527fb6bSMatthew Dillon 	 * burst in.  This heuristic is bumped by the VM system and reset
262e527fb6bSMatthew Dillon 	 * when our scan hits the end of the queue.
263e527fb6bSMatthew Dillon 	 */
264*fdc53cc7SMatthew Dillon 	if (vm_swapcache_inactive_heuristic < -vm_swapcache_hysteresis)
265*fdc53cc7SMatthew Dillon 		vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
266e527fb6bSMatthew Dillon 	if (vm_swapcache_inactive_heuristic < 0)
267e527fb6bSMatthew Dillon 		return;
268e527fb6bSMatthew Dillon 
269e527fb6bSMatthew Dillon 	/*
270096e95c0SMatthew Dillon 	 * Scan the inactive queue from our marker to locate
271096e95c0SMatthew Dillon 	 * suitable pages to push to the swap cache.
272096e95c0SMatthew Dillon 	 *
273096e95c0SMatthew Dillon 	 * We are looking for clean vnode-backed pages.
2745ac04117SMatthew Dillon 	 *
2755ac04117SMatthew Dillon 	 * NOTE: PG_SWAPPED pages in particular are not part of
2765ac04117SMatthew Dillon 	 *	 our count because once the cache stabilizes we
2775ac04117SMatthew Dillon 	 *	 can end up with a very high datarate of VM pages
2785ac04117SMatthew Dillon 	 *	 cycling from it.
279096e95c0SMatthew Dillon 	 */
28000a3fdcaSMatthew Dillon 	m = marker;
28100a3fdcaSMatthew Dillon 	count = vm_swapcache_maxlaunder;
28200a3fdcaSMatthew Dillon 
283096e95c0SMatthew Dillon 	while ((m = TAILQ_NEXT(m, pageq)) != NULL && count--) {
2845ac04117SMatthew Dillon 		if (m->flags & (PG_MARKER | PG_SWAPPED)) {
285096e95c0SMatthew Dillon 			++count;
286096e95c0SMatthew Dillon 			continue;
287096e95c0SMatthew Dillon 		}
288c504e38eSMatthew Dillon 		if (vm_swapcache_curburst < 0)
289c504e38eSMatthew Dillon 			break;
2903ffc7051SMatthew Dillon 		if (vm_swapcache_test(m))
291096e95c0SMatthew Dillon 			continue;
2923ffc7051SMatthew Dillon 		object = m->object;
293c504e38eSMatthew Dillon 		vp = object->handle;
294c504e38eSMatthew Dillon 		if (vp == NULL)
295c504e38eSMatthew Dillon 			continue;
296d3070b8dSMatthew Dillon 
297c504e38eSMatthew Dillon 		switch(vp->v_type) {
298c504e38eSMatthew Dillon 		case VREG:
299e9b56058SMatthew Dillon 			/*
300e9b56058SMatthew Dillon 			 * If data_enable is 0 do not try to swapcache data.
301e9b56058SMatthew Dillon 			 * If use_chflags is set then only swapcache data for
302e9b56058SMatthew Dillon 			 * VSWAPCACHE marked vnodes, otherwise any vnode.
303e9b56058SMatthew Dillon 			 */
304e9b56058SMatthew Dillon 			if (vm_swapcache_data_enable == 0 ||
305e9b56058SMatthew Dillon 			    ((vp->v_flag & VSWAPCACHE) == 0 &&
306e9b56058SMatthew Dillon 			     vm_swapcache_use_chflags)) {
307c504e38eSMatthew Dillon 				continue;
308e9b56058SMatthew Dillon 			}
309d3070b8dSMatthew Dillon 			if (vm_swapcache_maxfilesize &&
310d3070b8dSMatthew Dillon 			    object->size >
311d3070b8dSMatthew Dillon 			    (vm_swapcache_maxfilesize >> PAGE_SHIFT)) {
312d3070b8dSMatthew Dillon 				continue;
313d3070b8dSMatthew Dillon 			}
314c504e38eSMatthew Dillon 			break;
315c504e38eSMatthew Dillon 		case VCHR:
316c504e38eSMatthew Dillon 			if (vm_swapcache_meta_enable == 0)
317c504e38eSMatthew Dillon 				continue;
318c504e38eSMatthew Dillon 			break;
319c504e38eSMatthew Dillon 		default:
320c504e38eSMatthew Dillon 			continue;
321c504e38eSMatthew Dillon 		}
322096e95c0SMatthew Dillon 
323096e95c0SMatthew Dillon 		/*
324096e95c0SMatthew Dillon 		 * Ok, move the marker and soft-busy the page.
325096e95c0SMatthew Dillon 		 */
32600a3fdcaSMatthew Dillon 		TAILQ_REMOVE(INACTIVE_LIST, marker, pageq);
32700a3fdcaSMatthew Dillon 		TAILQ_INSERT_AFTER(INACTIVE_LIST, m, marker, pageq);
328096e95c0SMatthew Dillon 
329096e95c0SMatthew Dillon 		/*
3303ffc7051SMatthew Dillon 		 * Assign swap and initiate I/O.
3313ffc7051SMatthew Dillon 		 *
3323ffc7051SMatthew Dillon 		 * (adjust for the --count which also occurs in the loop)
333096e95c0SMatthew Dillon 		 */
3343ffc7051SMatthew Dillon 		count -= vm_swapcached_flush(m) - 1;
335096e95c0SMatthew Dillon 
336096e95c0SMatthew Dillon 		/*
337096e95c0SMatthew Dillon 		 * Setup for next loop using marker.
338096e95c0SMatthew Dillon 		 */
33900a3fdcaSMatthew Dillon 		m = marker;
340096e95c0SMatthew Dillon 	}
3411e5196f0SMatthew Dillon 
3421e5196f0SMatthew Dillon 	/*
3431e5196f0SMatthew Dillon 	 * Cleanup marker position.  If we hit the end of the
3441e5196f0SMatthew Dillon 	 * list the marker is placed at the tail.  Newly deactivated
3451e5196f0SMatthew Dillon 	 * pages will be placed after it.
3461e5196f0SMatthew Dillon 	 *
3471e5196f0SMatthew Dillon 	 * Earlier inactive pages that were dirty and become clean
3481e5196f0SMatthew Dillon 	 * are typically moved to the end of PQ_INACTIVE by virtue
3491e5196f0SMatthew Dillon 	 * of vfs_vmio_release() when they become unwired from the
3501e5196f0SMatthew Dillon 	 * buffer cache.
3511e5196f0SMatthew Dillon 	 */
35200a3fdcaSMatthew Dillon 	TAILQ_REMOVE(INACTIVE_LIST, marker, pageq);
353e527fb6bSMatthew Dillon 	if (m) {
35400a3fdcaSMatthew Dillon 		TAILQ_INSERT_BEFORE(m, marker, pageq);
355e527fb6bSMatthew Dillon 	} else {
35600a3fdcaSMatthew Dillon 		TAILQ_INSERT_TAIL(INACTIVE_LIST, marker, pageq);
357e527fb6bSMatthew Dillon 		vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
358e527fb6bSMatthew Dillon 	}
359096e95c0SMatthew Dillon }
360096e95c0SMatthew Dillon 
361096e95c0SMatthew Dillon /*
362096e95c0SMatthew Dillon  * Flush the specified page using the swap_pager.
3633ffc7051SMatthew Dillon  *
3643ffc7051SMatthew Dillon  * Try to collect surrounding pages, including pages which may
3653ffc7051SMatthew Dillon  * have already been assigned swap.  Try to cluster within a
3663ffc7051SMatthew Dillon  * contiguous aligned SMAP_META_PAGES (typ 16 x PAGE_SIZE) block
3673ffc7051SMatthew Dillon  * to match what swap_pager_putpages() can do.
3683ffc7051SMatthew Dillon  *
3693ffc7051SMatthew Dillon  * We also want to try to match against the buffer cache blocksize
3703ffc7051SMatthew Dillon  * but we don't really know what it is here.  Since the buffer cache
3713ffc7051SMatthew Dillon  * wires and unwires pages in groups the fact that we skip wired pages
3723ffc7051SMatthew Dillon  * should be sufficient.
3733ffc7051SMatthew Dillon  *
3743ffc7051SMatthew Dillon  * Returns a count of pages we might have flushed (minimum 1)
375096e95c0SMatthew Dillon  */
376096e95c0SMatthew Dillon static
3773ffc7051SMatthew Dillon int
378096e95c0SMatthew Dillon vm_swapcached_flush(vm_page_t m)
379096e95c0SMatthew Dillon {
380096e95c0SMatthew Dillon 	vm_object_t object;
3813ffc7051SMatthew Dillon 	vm_page_t marray[SWAP_META_PAGES];
3823ffc7051SMatthew Dillon 	vm_pindex_t basei;
3833ffc7051SMatthew Dillon 	int rtvals[SWAP_META_PAGES];
3843ffc7051SMatthew Dillon 	int x;
3853ffc7051SMatthew Dillon 	int i;
3863ffc7051SMatthew Dillon 	int j;
3873ffc7051SMatthew Dillon 	int count;
388096e95c0SMatthew Dillon 
389096e95c0SMatthew Dillon 	vm_page_io_start(m);
390096e95c0SMatthew Dillon 	vm_page_protect(m, VM_PROT_READ);
391096e95c0SMatthew Dillon 	object = m->object;
392096e95c0SMatthew Dillon 
3933ffc7051SMatthew Dillon 	/*
3943ffc7051SMatthew Dillon 	 * Try to cluster around (m), keeping in mind that the swap pager
3953ffc7051SMatthew Dillon 	 * can only do SMAP_META_PAGES worth of continguous write.
3963ffc7051SMatthew Dillon 	 */
3973ffc7051SMatthew Dillon 	x = (int)m->pindex & SWAP_META_MASK;
3983ffc7051SMatthew Dillon 	marray[x] = m;
3993ffc7051SMatthew Dillon 	basei = m->pindex;
4003ffc7051SMatthew Dillon 
4013ffc7051SMatthew Dillon 	for (i = x - 1; i >= 0; --i) {
4023ffc7051SMatthew Dillon 		m = vm_page_lookup(object, basei - x + i);
4033ffc7051SMatthew Dillon 		if (m == NULL)
4043ffc7051SMatthew Dillon 			break;
4053ffc7051SMatthew Dillon 		if (vm_swapcache_test(m))
4063ffc7051SMatthew Dillon 			break;
4073ffc7051SMatthew Dillon 		vm_page_io_start(m);
4083ffc7051SMatthew Dillon 		vm_page_protect(m, VM_PROT_READ);
4093ffc7051SMatthew Dillon 		if (m->queue - m->pc == PQ_CACHE) {
4103ffc7051SMatthew Dillon 			vm_page_unqueue_nowakeup(m);
4113ffc7051SMatthew Dillon 			vm_page_deactivate(m);
4123ffc7051SMatthew Dillon 		}
4133ffc7051SMatthew Dillon 		marray[i] = m;
4143ffc7051SMatthew Dillon 	}
4153ffc7051SMatthew Dillon 	++i;
4163ffc7051SMatthew Dillon 
4173ffc7051SMatthew Dillon 	for (j = x + 1; j < SWAP_META_PAGES; ++j) {
4183ffc7051SMatthew Dillon 		m = vm_page_lookup(object, basei - x + j);
4193ffc7051SMatthew Dillon 		if (m == NULL)
4203ffc7051SMatthew Dillon 			break;
4213ffc7051SMatthew Dillon 		if (vm_swapcache_test(m))
4223ffc7051SMatthew Dillon 			break;
4233ffc7051SMatthew Dillon 		vm_page_io_start(m);
4243ffc7051SMatthew Dillon 		vm_page_protect(m, VM_PROT_READ);
4253ffc7051SMatthew Dillon 		if (m->queue - m->pc == PQ_CACHE) {
4263ffc7051SMatthew Dillon 			vm_page_unqueue_nowakeup(m);
4273ffc7051SMatthew Dillon 			vm_page_deactivate(m);
4283ffc7051SMatthew Dillon 		}
4293ffc7051SMatthew Dillon 		marray[j] = m;
4303ffc7051SMatthew Dillon 	}
4313ffc7051SMatthew Dillon 
4323ffc7051SMatthew Dillon 	count = j - i;
4333ffc7051SMatthew Dillon 	vm_object_pip_add(object, count);
4343ffc7051SMatthew Dillon 	swap_pager_putpages(object, marray + i, count, FALSE, rtvals + i);
4353ffc7051SMatthew Dillon 	vm_swapcache_write_count += count * PAGE_SIZE;
4363ffc7051SMatthew Dillon 	vm_swapcache_curburst -= count * PAGE_SIZE;
4373ffc7051SMatthew Dillon 
4383ffc7051SMatthew Dillon 	while (i < j) {
4393ffc7051SMatthew Dillon 		if (rtvals[i] != VM_PAGER_PEND) {
4403ffc7051SMatthew Dillon 			vm_page_io_finish(marray[i]);
441096e95c0SMatthew Dillon 			vm_object_pip_wakeup(object);
442096e95c0SMatthew Dillon 		}
4433ffc7051SMatthew Dillon 		++i;
4443ffc7051SMatthew Dillon 	}
4453ffc7051SMatthew Dillon 	return(count);
446096e95c0SMatthew Dillon }
44700a3fdcaSMatthew Dillon 
4483ffc7051SMatthew Dillon /*
4493ffc7051SMatthew Dillon  * Test whether a VM page is suitable for writing to the swapcache.
4503ffc7051SMatthew Dillon  * Does not test m->queue, PG_MARKER, or PG_SWAPPED.
4513ffc7051SMatthew Dillon  *
4523ffc7051SMatthew Dillon  * Returns 0 on success, 1 on failure
4533ffc7051SMatthew Dillon  */
4543ffc7051SMatthew Dillon static int
4553ffc7051SMatthew Dillon vm_swapcache_test(vm_page_t m)
4563ffc7051SMatthew Dillon {
4573ffc7051SMatthew Dillon 	vm_object_t object;
4583ffc7051SMatthew Dillon 
459b8a41159SMatthew Dillon 	if (m->flags & (PG_BUSY | PG_UNMANAGED | PG_NOTMETA))
4603ffc7051SMatthew Dillon 		return(1);
4613ffc7051SMatthew Dillon 	if (m->busy || m->hold_count || m->wire_count)
4623ffc7051SMatthew Dillon 		return(1);
4633ffc7051SMatthew Dillon 	if (m->valid != VM_PAGE_BITS_ALL)
4643ffc7051SMatthew Dillon 		return(1);
4653ffc7051SMatthew Dillon 	if (m->dirty & m->valid)
4663ffc7051SMatthew Dillon 		return(1);
4673ffc7051SMatthew Dillon 	if ((object = m->object) == NULL)
4683ffc7051SMatthew Dillon 		return(1);
4693ffc7051SMatthew Dillon 	if (object->type != OBJT_VNODE ||
4703ffc7051SMatthew Dillon 	    (object->flags & OBJ_DEAD)) {
4713ffc7051SMatthew Dillon 		return(1);
4723ffc7051SMatthew Dillon 	}
4733ffc7051SMatthew Dillon 	vm_page_test_dirty(m);
4743ffc7051SMatthew Dillon 	if (m->dirty & m->valid)
4753ffc7051SMatthew Dillon 		return(1);
4763ffc7051SMatthew Dillon 	return(0);
4773ffc7051SMatthew Dillon }
4783ffc7051SMatthew Dillon 
4793ffc7051SMatthew Dillon /*
4803ffc7051SMatthew Dillon  * Cleaning pass
4813ffc7051SMatthew Dillon  */
48200a3fdcaSMatthew Dillon static
48300a3fdcaSMatthew Dillon void
48400a3fdcaSMatthew Dillon vm_swapcache_cleaning(vm_object_t marker)
48500a3fdcaSMatthew Dillon {
48600a3fdcaSMatthew Dillon 	vm_object_t object;
48700a3fdcaSMatthew Dillon 	struct vnode *vp;
48800a3fdcaSMatthew Dillon 	int count;
48900a3fdcaSMatthew Dillon 	int n;
49000a3fdcaSMatthew Dillon 
49100a3fdcaSMatthew Dillon 	object = marker;
49200a3fdcaSMatthew Dillon 	count = vm_swapcache_maxlaunder;
49300a3fdcaSMatthew Dillon 
49400a3fdcaSMatthew Dillon 	/*
49500a3fdcaSMatthew Dillon 	 * Look for vnode objects
49600a3fdcaSMatthew Dillon 	 */
49700a3fdcaSMatthew Dillon 	while ((object = TAILQ_NEXT(object, object_list)) != NULL && count--) {
49800a3fdcaSMatthew Dillon 		if (object->type != OBJT_VNODE)
49900a3fdcaSMatthew Dillon 			continue;
50000a3fdcaSMatthew Dillon 		if ((object->flags & OBJ_DEAD) || object->swblock_count == 0)
50100a3fdcaSMatthew Dillon 			continue;
50200a3fdcaSMatthew Dillon 		if ((vp = object->handle) == NULL)
50300a3fdcaSMatthew Dillon 			continue;
50400a3fdcaSMatthew Dillon 		if (vp->v_type != VREG && vp->v_type != VCHR)
50500a3fdcaSMatthew Dillon 			continue;
50600a3fdcaSMatthew Dillon 
50700a3fdcaSMatthew Dillon 		/*
50800a3fdcaSMatthew Dillon 		 * Adjust iterator.
50900a3fdcaSMatthew Dillon 		 */
51000a3fdcaSMatthew Dillon 		if (marker->backing_object != object)
51100a3fdcaSMatthew Dillon 			marker->size = 0;
51200a3fdcaSMatthew Dillon 
51300a3fdcaSMatthew Dillon 		/*
51400a3fdcaSMatthew Dillon 		 * Move the marker so we can work on the VM object
51500a3fdcaSMatthew Dillon 		 */
51600a3fdcaSMatthew Dillon 		TAILQ_REMOVE(&vm_object_list, marker, object_list);
51700a3fdcaSMatthew Dillon 		TAILQ_INSERT_AFTER(&vm_object_list, object,
51800a3fdcaSMatthew Dillon 				   marker, object_list);
51900a3fdcaSMatthew Dillon 
52000a3fdcaSMatthew Dillon 		/*
52100a3fdcaSMatthew Dillon 		 * Look for swblocks starting at our iterator.
52200a3fdcaSMatthew Dillon 		 *
52300a3fdcaSMatthew Dillon 		 * The swap_pager_condfree() function attempts to free
52400a3fdcaSMatthew Dillon 		 * swap space starting at the specified index.  The index
52500a3fdcaSMatthew Dillon 		 * will be updated on return.  The function will return
52600a3fdcaSMatthew Dillon 		 * a scan factor (NOT the number of blocks freed).
52700a3fdcaSMatthew Dillon 		 *
52800a3fdcaSMatthew Dillon 		 * If it must cut its scan of the object short due to an
52900a3fdcaSMatthew Dillon 		 * excessive number of swblocks, or is able to free the
53000a3fdcaSMatthew Dillon 		 * requested number of blocks, it will return n >= count
53100a3fdcaSMatthew Dillon 		 * and we break and pick it back up on a future attempt.
53200a3fdcaSMatthew Dillon 		 */
53300a3fdcaSMatthew Dillon 		n = swap_pager_condfree(object, &marker->size, count);
53400a3fdcaSMatthew Dillon 		count -= n;
53500a3fdcaSMatthew Dillon 		if (count < 0)
53600a3fdcaSMatthew Dillon 			break;
53700a3fdcaSMatthew Dillon 
53800a3fdcaSMatthew Dillon 		/*
53900a3fdcaSMatthew Dillon 		 * Setup for loop.
54000a3fdcaSMatthew Dillon 		 */
54100a3fdcaSMatthew Dillon 		marker->size = 0;
54200a3fdcaSMatthew Dillon 		object = marker;
54300a3fdcaSMatthew Dillon 	}
54400a3fdcaSMatthew Dillon 
54500a3fdcaSMatthew Dillon 	/*
54600a3fdcaSMatthew Dillon 	 * Adjust marker so we continue the scan from where we left off.
54700a3fdcaSMatthew Dillon 	 * When we reach the end we start back at the beginning.
54800a3fdcaSMatthew Dillon 	 */
54900a3fdcaSMatthew Dillon 	TAILQ_REMOVE(&vm_object_list, marker, object_list);
55000a3fdcaSMatthew Dillon 	if (object)
55100a3fdcaSMatthew Dillon 		TAILQ_INSERT_BEFORE(object, marker, object_list);
55200a3fdcaSMatthew Dillon 	else
55300a3fdcaSMatthew Dillon 		TAILQ_INSERT_HEAD(&vm_object_list, marker, object_list);
55400a3fdcaSMatthew Dillon 	marker->backing_object = object;
55500a3fdcaSMatthew Dillon }
556