xref: /dragonfly/sys/vm/vm_swapcache.c (revision e527fb6b)
1096e95c0SMatthew Dillon /*
2096e95c0SMatthew Dillon  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3096e95c0SMatthew Dillon  *
4096e95c0SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
5096e95c0SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
6096e95c0SMatthew Dillon  *
7096e95c0SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
8096e95c0SMatthew Dillon  * modification, are permitted provided that the following conditions
9096e95c0SMatthew Dillon  * are met:
10096e95c0SMatthew Dillon  *
11096e95c0SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
12096e95c0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
13096e95c0SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
14096e95c0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
15096e95c0SMatthew Dillon  *    the documentation and/or other materials provided with the
16096e95c0SMatthew Dillon  *    distribution.
17096e95c0SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
18096e95c0SMatthew Dillon  *    contributors may be used to endorse or promote products derived
19096e95c0SMatthew Dillon  *    from this software without specific, prior written permission.
20096e95c0SMatthew Dillon  *
21096e95c0SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22096e95c0SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23096e95c0SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24096e95c0SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25096e95c0SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26096e95c0SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27096e95c0SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28096e95c0SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29096e95c0SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30096e95c0SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31096e95c0SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32096e95c0SMatthew Dillon  * SUCH DAMAGE.
33096e95c0SMatthew Dillon  */
34096e95c0SMatthew Dillon 
35096e95c0SMatthew Dillon /*
36096e95c0SMatthew Dillon  * Implement the swapcache daemon.  When enabled swap is assumed to be
37096e95c0SMatthew Dillon  * configured on a fast storage device such as a SSD.  Swap is assigned
38096e95c0SMatthew Dillon  * to clean vnode-backed pages in the inactive queue, clustered by object
39096e95c0SMatthew Dillon  * if possible, and written out.  The swap assignment sticks around even
40096e95c0SMatthew Dillon  * after the underlying pages have been recycled.
41096e95c0SMatthew Dillon  *
42096e95c0SMatthew Dillon  * The daemon manages write bandwidth based on sysctl settings to control
43096e95c0SMatthew Dillon  * wear on the SSD.
44096e95c0SMatthew Dillon  *
45096e95c0SMatthew Dillon  * The vnode strategy code will check for the swap assignments and divert
463ffc7051SMatthew Dillon  * reads to the swap device when the data is present in the swapcache.
47096e95c0SMatthew Dillon  *
48096e95c0SMatthew Dillon  * This operates on both regular files and the block device vnodes used by
49096e95c0SMatthew Dillon  * filesystems to manage meta-data.
50096e95c0SMatthew Dillon  */
51096e95c0SMatthew Dillon 
52096e95c0SMatthew Dillon #include "opt_vm.h"
53096e95c0SMatthew Dillon #include <sys/param.h>
54096e95c0SMatthew Dillon #include <sys/systm.h>
55096e95c0SMatthew Dillon #include <sys/kernel.h>
56096e95c0SMatthew Dillon #include <sys/proc.h>
57096e95c0SMatthew Dillon #include <sys/kthread.h>
58096e95c0SMatthew Dillon #include <sys/resourcevar.h>
59096e95c0SMatthew Dillon #include <sys/signalvar.h>
60096e95c0SMatthew Dillon #include <sys/vnode.h>
61096e95c0SMatthew Dillon #include <sys/vmmeter.h>
62096e95c0SMatthew Dillon #include <sys/sysctl.h>
63096e95c0SMatthew Dillon 
64096e95c0SMatthew Dillon #include <vm/vm.h>
65096e95c0SMatthew Dillon #include <vm/vm_param.h>
66096e95c0SMatthew Dillon #include <sys/lock.h>
67096e95c0SMatthew Dillon #include <vm/vm_object.h>
68096e95c0SMatthew Dillon #include <vm/vm_page.h>
69096e95c0SMatthew Dillon #include <vm/vm_map.h>
70096e95c0SMatthew Dillon #include <vm/vm_pageout.h>
71096e95c0SMatthew Dillon #include <vm/vm_pager.h>
72096e95c0SMatthew Dillon #include <vm/swap_pager.h>
73096e95c0SMatthew Dillon #include <vm/vm_extern.h>
74096e95c0SMatthew Dillon 
75096e95c0SMatthew Dillon #include <sys/thread2.h>
76096e95c0SMatthew Dillon #include <vm/vm_page2.h>
77096e95c0SMatthew Dillon 
78096e95c0SMatthew Dillon #define INACTIVE_LIST	(&vm_page_queues[PQ_INACTIVE].pl)
79096e95c0SMatthew Dillon 
80096e95c0SMatthew Dillon /* the kernel process "vm_pageout"*/
81096e95c0SMatthew Dillon static void vm_swapcached (void);
823ffc7051SMatthew Dillon static int vm_swapcached_flush (vm_page_t m);
833ffc7051SMatthew Dillon static int vm_swapcache_test(vm_page_t m);
8400a3fdcaSMatthew Dillon static void vm_swapcache_writing(vm_page_t marker);
8500a3fdcaSMatthew Dillon static void vm_swapcache_cleaning(vm_object_t marker);
86096e95c0SMatthew Dillon struct thread *swapcached_thread;
87096e95c0SMatthew Dillon 
88096e95c0SMatthew Dillon static struct kproc_desc swpc_kp = {
89096e95c0SMatthew Dillon 	"swapcached",
90096e95c0SMatthew Dillon 	vm_swapcached,
91096e95c0SMatthew Dillon 	&swapcached_thread
92096e95c0SMatthew Dillon };
93096e95c0SMatthew Dillon SYSINIT(swapcached, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &swpc_kp)
94096e95c0SMatthew Dillon 
95096e95c0SMatthew Dillon SYSCTL_NODE(_vm, OID_AUTO, swapcache, CTLFLAG_RW, NULL, NULL);
96096e95c0SMatthew Dillon 
97c504e38eSMatthew Dillon int vm_swapcache_read_enable;
98*e527fb6bSMatthew Dillon int vm_swapcache_inactive_heuristic;
99096e95c0SMatthew Dillon static int vm_swapcache_sleep;
1001e5196f0SMatthew Dillon static int vm_swapcache_maxlaunder = 256;
101096e95c0SMatthew Dillon static int vm_swapcache_data_enable = 0;
102096e95c0SMatthew Dillon static int vm_swapcache_meta_enable = 0;
103e9b56058SMatthew Dillon static int vm_swapcache_maxswappct = 75;
104*e527fb6bSMatthew Dillon static int vm_swapcache_hysteresis;
105e9b56058SMatthew Dillon static int vm_swapcache_use_chflags = 1;	/* require chflags cache */
1063ffc7051SMatthew Dillon static int64_t vm_swapcache_minburst = 10000000LL;	/* 10MB */
1073ffc7051SMatthew Dillon static int64_t vm_swapcache_curburst = 4000000000LL;	/* 4G after boot */
1083ffc7051SMatthew Dillon static int64_t vm_swapcache_maxburst = 2000000000LL;	/* 2G nominal max */
1093ffc7051SMatthew Dillon static int64_t vm_swapcache_accrate = 100000LL;		/* 100K/s */
110096e95c0SMatthew Dillon static int64_t vm_swapcache_write_count;
1113ffc7051SMatthew Dillon static int64_t vm_swapcache_maxfilesize;
112096e95c0SMatthew Dillon 
113096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxlaunder,
114096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxlaunder, 0, "");
115c504e38eSMatthew Dillon 
116096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, data_enable,
117096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_data_enable, 0, "");
118096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, meta_enable,
119096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_meta_enable, 0, "");
120c504e38eSMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, read_enable,
121c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_read_enable, 0, "");
122e9b56058SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxswappct,
123e9b56058SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxswappct, 0, "");
124*e527fb6bSMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, hysteresis,
125*e527fb6bSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_hysteresis, 0, "");
126e9b56058SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, use_chflags,
127e9b56058SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_use_chflags, 0, "");
128c504e38eSMatthew Dillon 
1293ffc7051SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, minburst,
1303ffc7051SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_minburst, 0, "");
131c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, curburst,
132c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_curburst, 0, "");
133c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, maxburst,
134c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxburst, 0, "");
1353ffc7051SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, maxfilesize,
1363ffc7051SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxfilesize, 0, "");
137c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, accrate,
138c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_accrate, 0, "");
139096e95c0SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, write_count,
140096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_write_count, 0, "");
141096e95c0SMatthew Dillon 
142e9b56058SMatthew Dillon #define SWAPMAX(adj)	\
143e9b56058SMatthew Dillon 	((int64_t)vm_swap_max * (vm_swapcache_maxswappct + (adj)) / 100)
144e9b56058SMatthew Dillon 
145096e95c0SMatthew Dillon /*
146096e95c0SMatthew Dillon  * vm_swapcached is the high level pageout daemon.
147096e95c0SMatthew Dillon  */
148096e95c0SMatthew Dillon static void
149096e95c0SMatthew Dillon vm_swapcached(void)
150096e95c0SMatthew Dillon {
15100a3fdcaSMatthew Dillon 	enum { SWAPC_WRITING, SWAPC_CLEANING } state = SWAPC_WRITING;
1523ffc7051SMatthew Dillon 	enum { SWAPB_BURSTING, SWAPB_RECOVERING } burst = SWAPB_BURSTING;
15300a3fdcaSMatthew Dillon 	struct vm_page page_marker;
15400a3fdcaSMatthew Dillon 	struct vm_object object_marker;
155096e95c0SMatthew Dillon 
156096e95c0SMatthew Dillon 	/*
157096e95c0SMatthew Dillon 	 * Thread setup
158096e95c0SMatthew Dillon 	 */
159096e95c0SMatthew Dillon 	curthread->td_flags |= TDF_SYSTHREAD;
16000a3fdcaSMatthew Dillon 	crit_enter();
161096e95c0SMatthew Dillon 
162096e95c0SMatthew Dillon 	/*
16300a3fdcaSMatthew Dillon 	 * Initialize our marker for the inactive scan (SWAPC_WRITING)
164096e95c0SMatthew Dillon 	 */
16500a3fdcaSMatthew Dillon 	bzero(&page_marker, sizeof(page_marker));
16600a3fdcaSMatthew Dillon 	page_marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
16700a3fdcaSMatthew Dillon 	page_marker.queue = PQ_INACTIVE;
16800a3fdcaSMatthew Dillon 	page_marker.wire_count = 1;
16900a3fdcaSMatthew Dillon 	TAILQ_INSERT_HEAD(INACTIVE_LIST, &page_marker, pageq);
170*e527fb6bSMatthew Dillon 	vm_swapcache_hysteresis = vmstats.v_inactive_target / 2;
171*e527fb6bSMatthew Dillon 	vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
172096e95c0SMatthew Dillon 
17300a3fdcaSMatthew Dillon 	/*
17400a3fdcaSMatthew Dillon 	 * Initialize our marker for the vm_object scan (SWAPC_CLEANING)
17500a3fdcaSMatthew Dillon 	 */
17600a3fdcaSMatthew Dillon 	bzero(&object_marker, sizeof(object_marker));
17700a3fdcaSMatthew Dillon 	object_marker.type = OBJT_MARKER;
17800a3fdcaSMatthew Dillon 	TAILQ_INSERT_HEAD(&vm_object_list, &object_marker, object_list);
179096e95c0SMatthew Dillon 
180096e95c0SMatthew Dillon 	for (;;) {
181096e95c0SMatthew Dillon 		/*
1823da46bd7SMatthew Dillon 		 * Check every 5 seconds when not enabled or if no swap
1833da46bd7SMatthew Dillon 		 * is present.
184096e95c0SMatthew Dillon 		 */
1853da46bd7SMatthew Dillon 		if ((vm_swapcache_data_enable == 0 &&
1863da46bd7SMatthew Dillon 		     vm_swapcache_meta_enable == 0) ||
1873da46bd7SMatthew Dillon 		    vm_swap_max == 0) {
188096e95c0SMatthew Dillon 			tsleep(&vm_swapcache_sleep, 0, "csleep", hz * 5);
189096e95c0SMatthew Dillon 			continue;
190096e95c0SMatthew Dillon 		}
191c504e38eSMatthew Dillon 
192c504e38eSMatthew Dillon 		/*
1933da46bd7SMatthew Dillon 		 * Polling rate when enabled is approximately 10 hz.
194c504e38eSMatthew Dillon 		 */
195c504e38eSMatthew Dillon 		tsleep(&vm_swapcache_sleep, 0, "csleep", hz / 10);
19600a3fdcaSMatthew Dillon 
19700a3fdcaSMatthew Dillon 		/*
19800a3fdcaSMatthew Dillon 		 * State hysteresis.  Generate write activity up to 75% of
19900a3fdcaSMatthew Dillon 		 * swap, then clean out swap assignments down to 70%, then
20000a3fdcaSMatthew Dillon 		 * repeat.
20100a3fdcaSMatthew Dillon 		 */
20200a3fdcaSMatthew Dillon 		if (state == SWAPC_WRITING) {
203e9b56058SMatthew Dillon 			if (vm_swap_cache_use > SWAPMAX(0))
20400a3fdcaSMatthew Dillon 				state = SWAPC_CLEANING;
20500a3fdcaSMatthew Dillon 		} else {
206e9b56058SMatthew Dillon 			if (vm_swap_cache_use < SWAPMAX(-5))
20700a3fdcaSMatthew Dillon 				state = SWAPC_WRITING;
20800a3fdcaSMatthew Dillon 		}
20900a3fdcaSMatthew Dillon 
21000a3fdcaSMatthew Dillon 		/*
21100a3fdcaSMatthew Dillon 		 * We are allowed to continue accumulating burst value
2123ffc7051SMatthew Dillon 		 * in either state.  Allow the user to set curburst > maxburst
2133ffc7051SMatthew Dillon 		 * for the initial load-in.
21400a3fdcaSMatthew Dillon 		 */
2153ffc7051SMatthew Dillon 		if (vm_swapcache_curburst < vm_swapcache_maxburst) {
216c504e38eSMatthew Dillon 			vm_swapcache_curburst += vm_swapcache_accrate / 10;
217c504e38eSMatthew Dillon 			if (vm_swapcache_curburst > vm_swapcache_maxburst)
218c504e38eSMatthew Dillon 				vm_swapcache_curburst = vm_swapcache_maxburst;
2193ffc7051SMatthew Dillon 		}
220c504e38eSMatthew Dillon 
221c504e38eSMatthew Dillon 		/*
22200a3fdcaSMatthew Dillon 		 * We don't want to nickle-and-dime the scan as that will
22300a3fdcaSMatthew Dillon 		 * create unnecessary fragmentation.  The minimum burst
22400a3fdcaSMatthew Dillon 		 * is one-seconds worth of accumulation.
225c504e38eSMatthew Dillon 		 */
22600a3fdcaSMatthew Dillon 		if (state == SWAPC_WRITING) {
2273ffc7051SMatthew Dillon 			if (vm_swapcache_curburst >= vm_swapcache_accrate) {
2283ffc7051SMatthew Dillon 				if (burst == SWAPB_BURSTING) {
22900a3fdcaSMatthew Dillon 					vm_swapcache_writing(&page_marker);
2303ffc7051SMatthew Dillon 					if (vm_swapcache_curburst <= 0)
2313ffc7051SMatthew Dillon 						burst = SWAPB_RECOVERING;
2323ffc7051SMatthew Dillon 				} else if (vm_swapcache_curburst >
2333ffc7051SMatthew Dillon 					   vm_swapcache_minburst) {
2343ffc7051SMatthew Dillon 					vm_swapcache_writing(&page_marker);
2353ffc7051SMatthew Dillon 					burst = SWAPB_BURSTING;
2363ffc7051SMatthew Dillon 				}
2373ffc7051SMatthew Dillon 			}
23800a3fdcaSMatthew Dillon 		} else {
23900a3fdcaSMatthew Dillon 			vm_swapcache_cleaning(&object_marker);
24000a3fdcaSMatthew Dillon 		}
24100a3fdcaSMatthew Dillon 	}
24200a3fdcaSMatthew Dillon 	TAILQ_REMOVE(INACTIVE_LIST, &page_marker, pageq);
24300a3fdcaSMatthew Dillon 	TAILQ_REMOVE(&vm_object_list, &object_marker, object_list);
24400a3fdcaSMatthew Dillon 	crit_exit();
24500a3fdcaSMatthew Dillon }
246096e95c0SMatthew Dillon 
24700a3fdcaSMatthew Dillon static void
24800a3fdcaSMatthew Dillon vm_swapcache_writing(vm_page_t marker)
24900a3fdcaSMatthew Dillon {
25000a3fdcaSMatthew Dillon 	vm_object_t object;
25100a3fdcaSMatthew Dillon 	struct vnode *vp;
25200a3fdcaSMatthew Dillon 	vm_page_t m;
25300a3fdcaSMatthew Dillon 	int count;
254096e95c0SMatthew Dillon 
255096e95c0SMatthew Dillon 	/*
256*e527fb6bSMatthew Dillon 	 * Try to avoid small incremental pageouts by waiting for enough
257*e527fb6bSMatthew Dillon 	 * pages to buildup in the inactive queue to hopefully get a good
258*e527fb6bSMatthew Dillon 	 * burst in.  This heuristic is bumped by the VM system and reset
259*e527fb6bSMatthew Dillon 	 * when our scan hits the end of the queue.
260*e527fb6bSMatthew Dillon 	 */
261*e527fb6bSMatthew Dillon 	if (vm_swapcache_inactive_heuristic < 0)
262*e527fb6bSMatthew Dillon 		return;
263*e527fb6bSMatthew Dillon 
264*e527fb6bSMatthew Dillon 	/*
265096e95c0SMatthew Dillon 	 * Scan the inactive queue from our marker to locate
266096e95c0SMatthew Dillon 	 * suitable pages to push to the swap cache.
267096e95c0SMatthew Dillon 	 *
268096e95c0SMatthew Dillon 	 * We are looking for clean vnode-backed pages.
2695ac04117SMatthew Dillon 	 *
2705ac04117SMatthew Dillon 	 * NOTE: PG_SWAPPED pages in particular are not part of
2715ac04117SMatthew Dillon 	 *	 our count because once the cache stabilizes we
2725ac04117SMatthew Dillon 	 *	 can end up with a very high datarate of VM pages
2735ac04117SMatthew Dillon 	 *	 cycling from it.
274096e95c0SMatthew Dillon 	 */
27500a3fdcaSMatthew Dillon 	m = marker;
27600a3fdcaSMatthew Dillon 	count = vm_swapcache_maxlaunder;
27700a3fdcaSMatthew Dillon 
278096e95c0SMatthew Dillon 	while ((m = TAILQ_NEXT(m, pageq)) != NULL && count--) {
2795ac04117SMatthew Dillon 		if (m->flags & (PG_MARKER | PG_SWAPPED)) {
280096e95c0SMatthew Dillon 			++count;
281096e95c0SMatthew Dillon 			continue;
282096e95c0SMatthew Dillon 		}
283c504e38eSMatthew Dillon 		if (vm_swapcache_curburst < 0)
284c504e38eSMatthew Dillon 			break;
2853ffc7051SMatthew Dillon 		if (vm_swapcache_test(m))
286096e95c0SMatthew Dillon 			continue;
2873ffc7051SMatthew Dillon 		object = m->object;
288c504e38eSMatthew Dillon 		vp = object->handle;
289c504e38eSMatthew Dillon 		if (vp == NULL)
290c504e38eSMatthew Dillon 			continue;
291d3070b8dSMatthew Dillon 
292c504e38eSMatthew Dillon 		switch(vp->v_type) {
293c504e38eSMatthew Dillon 		case VREG:
294e9b56058SMatthew Dillon 			/*
295e9b56058SMatthew Dillon 			 * If data_enable is 0 do not try to swapcache data.
296e9b56058SMatthew Dillon 			 * If use_chflags is set then only swapcache data for
297e9b56058SMatthew Dillon 			 * VSWAPCACHE marked vnodes, otherwise any vnode.
298e9b56058SMatthew Dillon 			 */
299e9b56058SMatthew Dillon 			if (vm_swapcache_data_enable == 0 ||
300e9b56058SMatthew Dillon 			    ((vp->v_flag & VSWAPCACHE) == 0 &&
301e9b56058SMatthew Dillon 			     vm_swapcache_use_chflags)) {
302c504e38eSMatthew Dillon 				continue;
303e9b56058SMatthew Dillon 			}
304d3070b8dSMatthew Dillon 			if (vm_swapcache_maxfilesize &&
305d3070b8dSMatthew Dillon 			    object->size >
306d3070b8dSMatthew Dillon 			    (vm_swapcache_maxfilesize >> PAGE_SHIFT)) {
307d3070b8dSMatthew Dillon 				continue;
308d3070b8dSMatthew Dillon 			}
309c504e38eSMatthew Dillon 			break;
310c504e38eSMatthew Dillon 		case VCHR:
311c504e38eSMatthew Dillon 			if (vm_swapcache_meta_enable == 0)
312c504e38eSMatthew Dillon 				continue;
313c504e38eSMatthew Dillon 			break;
314c504e38eSMatthew Dillon 		default:
315c504e38eSMatthew Dillon 			continue;
316c504e38eSMatthew Dillon 		}
317096e95c0SMatthew Dillon 
318096e95c0SMatthew Dillon 		/*
319096e95c0SMatthew Dillon 		 * Ok, move the marker and soft-busy the page.
320096e95c0SMatthew Dillon 		 */
32100a3fdcaSMatthew Dillon 		TAILQ_REMOVE(INACTIVE_LIST, marker, pageq);
32200a3fdcaSMatthew Dillon 		TAILQ_INSERT_AFTER(INACTIVE_LIST, m, marker, pageq);
323096e95c0SMatthew Dillon 
324096e95c0SMatthew Dillon 		/*
3253ffc7051SMatthew Dillon 		 * Assign swap and initiate I/O.
3263ffc7051SMatthew Dillon 		 *
3273ffc7051SMatthew Dillon 		 * (adjust for the --count which also occurs in the loop)
328096e95c0SMatthew Dillon 		 */
3293ffc7051SMatthew Dillon 		count -= vm_swapcached_flush(m) - 1;
330096e95c0SMatthew Dillon 
331096e95c0SMatthew Dillon 		/*
332096e95c0SMatthew Dillon 		 * Setup for next loop using marker.
333096e95c0SMatthew Dillon 		 */
33400a3fdcaSMatthew Dillon 		m = marker;
335096e95c0SMatthew Dillon 	}
3361e5196f0SMatthew Dillon 
3371e5196f0SMatthew Dillon 	/*
3381e5196f0SMatthew Dillon 	 * Cleanup marker position.  If we hit the end of the
3391e5196f0SMatthew Dillon 	 * list the marker is placed at the tail.  Newly deactivated
3401e5196f0SMatthew Dillon 	 * pages will be placed after it.
3411e5196f0SMatthew Dillon 	 *
3421e5196f0SMatthew Dillon 	 * Earlier inactive pages that were dirty and become clean
3431e5196f0SMatthew Dillon 	 * are typically moved to the end of PQ_INACTIVE by virtue
3441e5196f0SMatthew Dillon 	 * of vfs_vmio_release() when they become unwired from the
3451e5196f0SMatthew Dillon 	 * buffer cache.
3461e5196f0SMatthew Dillon 	 */
34700a3fdcaSMatthew Dillon 	TAILQ_REMOVE(INACTIVE_LIST, marker, pageq);
348*e527fb6bSMatthew Dillon 	if (m) {
34900a3fdcaSMatthew Dillon 		TAILQ_INSERT_BEFORE(m, marker, pageq);
350*e527fb6bSMatthew Dillon 	} else {
35100a3fdcaSMatthew Dillon 		TAILQ_INSERT_TAIL(INACTIVE_LIST, marker, pageq);
352*e527fb6bSMatthew Dillon 		vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
353*e527fb6bSMatthew Dillon 	}
354096e95c0SMatthew Dillon }
355096e95c0SMatthew Dillon 
356096e95c0SMatthew Dillon /*
357096e95c0SMatthew Dillon  * Flush the specified page using the swap_pager.
3583ffc7051SMatthew Dillon  *
3593ffc7051SMatthew Dillon  * Try to collect surrounding pages, including pages which may
3603ffc7051SMatthew Dillon  * have already been assigned swap.  Try to cluster within a
3613ffc7051SMatthew Dillon  * contiguous aligned SMAP_META_PAGES (typ 16 x PAGE_SIZE) block
3623ffc7051SMatthew Dillon  * to match what swap_pager_putpages() can do.
3633ffc7051SMatthew Dillon  *
3643ffc7051SMatthew Dillon  * We also want to try to match against the buffer cache blocksize
3653ffc7051SMatthew Dillon  * but we don't really know what it is here.  Since the buffer cache
3663ffc7051SMatthew Dillon  * wires and unwires pages in groups the fact that we skip wired pages
3673ffc7051SMatthew Dillon  * should be sufficient.
3683ffc7051SMatthew Dillon  *
3693ffc7051SMatthew Dillon  * Returns a count of pages we might have flushed (minimum 1)
370096e95c0SMatthew Dillon  */
371096e95c0SMatthew Dillon static
3723ffc7051SMatthew Dillon int
373096e95c0SMatthew Dillon vm_swapcached_flush(vm_page_t m)
374096e95c0SMatthew Dillon {
375096e95c0SMatthew Dillon 	vm_object_t object;
3763ffc7051SMatthew Dillon 	vm_page_t marray[SWAP_META_PAGES];
3773ffc7051SMatthew Dillon 	vm_pindex_t basei;
3783ffc7051SMatthew Dillon 	int rtvals[SWAP_META_PAGES];
3793ffc7051SMatthew Dillon 	int x;
3803ffc7051SMatthew Dillon 	int i;
3813ffc7051SMatthew Dillon 	int j;
3823ffc7051SMatthew Dillon 	int count;
383096e95c0SMatthew Dillon 
384096e95c0SMatthew Dillon 	vm_page_io_start(m);
385096e95c0SMatthew Dillon 	vm_page_protect(m, VM_PROT_READ);
386096e95c0SMatthew Dillon 	object = m->object;
387096e95c0SMatthew Dillon 
3883ffc7051SMatthew Dillon 	/*
3893ffc7051SMatthew Dillon 	 * Try to cluster around (m), keeping in mind that the swap pager
3903ffc7051SMatthew Dillon 	 * can only do SMAP_META_PAGES worth of continguous write.
3913ffc7051SMatthew Dillon 	 */
3923ffc7051SMatthew Dillon 	x = (int)m->pindex & SWAP_META_MASK;
3933ffc7051SMatthew Dillon 	marray[x] = m;
3943ffc7051SMatthew Dillon 	basei = m->pindex;
3953ffc7051SMatthew Dillon 
3963ffc7051SMatthew Dillon 	for (i = x - 1; i >= 0; --i) {
3973ffc7051SMatthew Dillon 		m = vm_page_lookup(object, basei - x + i);
3983ffc7051SMatthew Dillon 		if (m == NULL)
3993ffc7051SMatthew Dillon 			break;
4003ffc7051SMatthew Dillon 		if (vm_swapcache_test(m))
4013ffc7051SMatthew Dillon 			break;
4023ffc7051SMatthew Dillon 		vm_page_io_start(m);
4033ffc7051SMatthew Dillon 		vm_page_protect(m, VM_PROT_READ);
4043ffc7051SMatthew Dillon 		if (m->queue - m->pc == PQ_CACHE) {
4053ffc7051SMatthew Dillon 			vm_page_unqueue_nowakeup(m);
4063ffc7051SMatthew Dillon 			vm_page_deactivate(m);
4073ffc7051SMatthew Dillon 		}
4083ffc7051SMatthew Dillon 		marray[i] = m;
4093ffc7051SMatthew Dillon 	}
4103ffc7051SMatthew Dillon 	++i;
4113ffc7051SMatthew Dillon 
4123ffc7051SMatthew Dillon 	for (j = x + 1; j < SWAP_META_PAGES; ++j) {
4133ffc7051SMatthew Dillon 		m = vm_page_lookup(object, basei - x + j);
4143ffc7051SMatthew Dillon 		if (m == NULL)
4153ffc7051SMatthew Dillon 			break;
4163ffc7051SMatthew Dillon 		if (vm_swapcache_test(m))
4173ffc7051SMatthew Dillon 			break;
4183ffc7051SMatthew Dillon 		vm_page_io_start(m);
4193ffc7051SMatthew Dillon 		vm_page_protect(m, VM_PROT_READ);
4203ffc7051SMatthew Dillon 		if (m->queue - m->pc == PQ_CACHE) {
4213ffc7051SMatthew Dillon 			vm_page_unqueue_nowakeup(m);
4223ffc7051SMatthew Dillon 			vm_page_deactivate(m);
4233ffc7051SMatthew Dillon 		}
4243ffc7051SMatthew Dillon 		marray[j] = m;
4253ffc7051SMatthew Dillon 	}
4263ffc7051SMatthew Dillon 
4273ffc7051SMatthew Dillon 	count = j - i;
4283ffc7051SMatthew Dillon 	vm_object_pip_add(object, count);
4293ffc7051SMatthew Dillon 	swap_pager_putpages(object, marray + i, count, FALSE, rtvals + i);
4303ffc7051SMatthew Dillon 	vm_swapcache_write_count += count * PAGE_SIZE;
4313ffc7051SMatthew Dillon 	vm_swapcache_curburst -= count * PAGE_SIZE;
4323ffc7051SMatthew Dillon 
4333ffc7051SMatthew Dillon 	while (i < j) {
4343ffc7051SMatthew Dillon 		if (rtvals[i] != VM_PAGER_PEND) {
4353ffc7051SMatthew Dillon 			vm_page_io_finish(marray[i]);
436096e95c0SMatthew Dillon 			vm_object_pip_wakeup(object);
437096e95c0SMatthew Dillon 		}
4383ffc7051SMatthew Dillon 		++i;
4393ffc7051SMatthew Dillon 	}
4403ffc7051SMatthew Dillon 	return(count);
441096e95c0SMatthew Dillon }
44200a3fdcaSMatthew Dillon 
4433ffc7051SMatthew Dillon /*
4443ffc7051SMatthew Dillon  * Test whether a VM page is suitable for writing to the swapcache.
4453ffc7051SMatthew Dillon  * Does not test m->queue, PG_MARKER, or PG_SWAPPED.
4463ffc7051SMatthew Dillon  *
4473ffc7051SMatthew Dillon  * Returns 0 on success, 1 on failure
4483ffc7051SMatthew Dillon  */
4493ffc7051SMatthew Dillon static int
4503ffc7051SMatthew Dillon vm_swapcache_test(vm_page_t m)
4513ffc7051SMatthew Dillon {
4523ffc7051SMatthew Dillon 	vm_object_t object;
4533ffc7051SMatthew Dillon 
454b8a41159SMatthew Dillon 	if (m->flags & (PG_BUSY | PG_UNMANAGED | PG_NOTMETA))
4553ffc7051SMatthew Dillon 		return(1);
4563ffc7051SMatthew Dillon 	if (m->busy || m->hold_count || m->wire_count)
4573ffc7051SMatthew Dillon 		return(1);
4583ffc7051SMatthew Dillon 	if (m->valid != VM_PAGE_BITS_ALL)
4593ffc7051SMatthew Dillon 		return(1);
4603ffc7051SMatthew Dillon 	if (m->dirty & m->valid)
4613ffc7051SMatthew Dillon 		return(1);
4623ffc7051SMatthew Dillon 	if ((object = m->object) == NULL)
4633ffc7051SMatthew Dillon 		return(1);
4643ffc7051SMatthew Dillon 	if (object->type != OBJT_VNODE ||
4653ffc7051SMatthew Dillon 	    (object->flags & OBJ_DEAD)) {
4663ffc7051SMatthew Dillon 		return(1);
4673ffc7051SMatthew Dillon 	}
4683ffc7051SMatthew Dillon 	vm_page_test_dirty(m);
4693ffc7051SMatthew Dillon 	if (m->dirty & m->valid)
4703ffc7051SMatthew Dillon 		return(1);
4713ffc7051SMatthew Dillon 	return(0);
4723ffc7051SMatthew Dillon }
4733ffc7051SMatthew Dillon 
4743ffc7051SMatthew Dillon /*
4753ffc7051SMatthew Dillon  * Cleaning pass
4763ffc7051SMatthew Dillon  */
47700a3fdcaSMatthew Dillon static
47800a3fdcaSMatthew Dillon void
47900a3fdcaSMatthew Dillon vm_swapcache_cleaning(vm_object_t marker)
48000a3fdcaSMatthew Dillon {
48100a3fdcaSMatthew Dillon 	vm_object_t object;
48200a3fdcaSMatthew Dillon 	struct vnode *vp;
48300a3fdcaSMatthew Dillon 	int count;
48400a3fdcaSMatthew Dillon 	int n;
48500a3fdcaSMatthew Dillon 
48600a3fdcaSMatthew Dillon 	object = marker;
48700a3fdcaSMatthew Dillon 	count = vm_swapcache_maxlaunder;
48800a3fdcaSMatthew Dillon 
48900a3fdcaSMatthew Dillon 	/*
49000a3fdcaSMatthew Dillon 	 * Look for vnode objects
49100a3fdcaSMatthew Dillon 	 */
49200a3fdcaSMatthew Dillon 	while ((object = TAILQ_NEXT(object, object_list)) != NULL && count--) {
49300a3fdcaSMatthew Dillon 		if (object->type != OBJT_VNODE)
49400a3fdcaSMatthew Dillon 			continue;
49500a3fdcaSMatthew Dillon 		if ((object->flags & OBJ_DEAD) || object->swblock_count == 0)
49600a3fdcaSMatthew Dillon 			continue;
49700a3fdcaSMatthew Dillon 		if ((vp = object->handle) == NULL)
49800a3fdcaSMatthew Dillon 			continue;
49900a3fdcaSMatthew Dillon 		if (vp->v_type != VREG && vp->v_type != VCHR)
50000a3fdcaSMatthew Dillon 			continue;
50100a3fdcaSMatthew Dillon 
50200a3fdcaSMatthew Dillon 		/*
50300a3fdcaSMatthew Dillon 		 * Adjust iterator.
50400a3fdcaSMatthew Dillon 		 */
50500a3fdcaSMatthew Dillon 		if (marker->backing_object != object)
50600a3fdcaSMatthew Dillon 			marker->size = 0;
50700a3fdcaSMatthew Dillon 
50800a3fdcaSMatthew Dillon 		/*
50900a3fdcaSMatthew Dillon 		 * Move the marker so we can work on the VM object
51000a3fdcaSMatthew Dillon 		 */
51100a3fdcaSMatthew Dillon 		TAILQ_REMOVE(&vm_object_list, marker, object_list);
51200a3fdcaSMatthew Dillon 		TAILQ_INSERT_AFTER(&vm_object_list, object,
51300a3fdcaSMatthew Dillon 				   marker, object_list);
51400a3fdcaSMatthew Dillon 
51500a3fdcaSMatthew Dillon 		/*
51600a3fdcaSMatthew Dillon 		 * Look for swblocks starting at our iterator.
51700a3fdcaSMatthew Dillon 		 *
51800a3fdcaSMatthew Dillon 		 * The swap_pager_condfree() function attempts to free
51900a3fdcaSMatthew Dillon 		 * swap space starting at the specified index.  The index
52000a3fdcaSMatthew Dillon 		 * will be updated on return.  The function will return
52100a3fdcaSMatthew Dillon 		 * a scan factor (NOT the number of blocks freed).
52200a3fdcaSMatthew Dillon 		 *
52300a3fdcaSMatthew Dillon 		 * If it must cut its scan of the object short due to an
52400a3fdcaSMatthew Dillon 		 * excessive number of swblocks, or is able to free the
52500a3fdcaSMatthew Dillon 		 * requested number of blocks, it will return n >= count
52600a3fdcaSMatthew Dillon 		 * and we break and pick it back up on a future attempt.
52700a3fdcaSMatthew Dillon 		 */
52800a3fdcaSMatthew Dillon 		n = swap_pager_condfree(object, &marker->size, count);
52900a3fdcaSMatthew Dillon 		count -= n;
53000a3fdcaSMatthew Dillon 		if (count < 0)
53100a3fdcaSMatthew Dillon 			break;
53200a3fdcaSMatthew Dillon 
53300a3fdcaSMatthew Dillon 		/*
53400a3fdcaSMatthew Dillon 		 * Setup for loop.
53500a3fdcaSMatthew Dillon 		 */
53600a3fdcaSMatthew Dillon 		marker->size = 0;
53700a3fdcaSMatthew Dillon 		object = marker;
53800a3fdcaSMatthew Dillon 	}
53900a3fdcaSMatthew Dillon 
54000a3fdcaSMatthew Dillon 	/*
54100a3fdcaSMatthew Dillon 	 * Adjust marker so we continue the scan from where we left off.
54200a3fdcaSMatthew Dillon 	 * When we reach the end we start back at the beginning.
54300a3fdcaSMatthew Dillon 	 */
54400a3fdcaSMatthew Dillon 	TAILQ_REMOVE(&vm_object_list, marker, object_list);
54500a3fdcaSMatthew Dillon 	if (object)
54600a3fdcaSMatthew Dillon 		TAILQ_INSERT_BEFORE(object, marker, object_list);
54700a3fdcaSMatthew Dillon 	else
54800a3fdcaSMatthew Dillon 		TAILQ_INSERT_HEAD(&vm_object_list, marker, object_list);
54900a3fdcaSMatthew Dillon 	marker->backing_object = object;
55000a3fdcaSMatthew Dillon }
551