xref: /dragonfly/sys/vm/vm_swapcache.c (revision 0bf81261)
1096e95c0SMatthew Dillon /*
28e7c4729SMatthew Dillon  * (MPSAFE)
38e7c4729SMatthew Dillon  *
4096e95c0SMatthew Dillon  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
5096e95c0SMatthew Dillon  *
6096e95c0SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
7096e95c0SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
8096e95c0SMatthew Dillon  *
9096e95c0SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
10096e95c0SMatthew Dillon  * modification, are permitted provided that the following conditions
11096e95c0SMatthew Dillon  * are met:
12096e95c0SMatthew Dillon  *
13096e95c0SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
14096e95c0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
15096e95c0SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
16096e95c0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
17096e95c0SMatthew Dillon  *    the documentation and/or other materials provided with the
18096e95c0SMatthew Dillon  *    distribution.
19096e95c0SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
20096e95c0SMatthew Dillon  *    contributors may be used to endorse or promote products derived
21096e95c0SMatthew Dillon  *    from this software without specific, prior written permission.
22096e95c0SMatthew Dillon  *
23096e95c0SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24096e95c0SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25096e95c0SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26096e95c0SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27096e95c0SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28096e95c0SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29096e95c0SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30096e95c0SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31096e95c0SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32096e95c0SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33096e95c0SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34096e95c0SMatthew Dillon  * SUCH DAMAGE.
35096e95c0SMatthew Dillon  */
36096e95c0SMatthew Dillon 
37096e95c0SMatthew Dillon /*
38096e95c0SMatthew Dillon  * Implement the swapcache daemon.  When enabled swap is assumed to be
39096e95c0SMatthew Dillon  * configured on a fast storage device such as a SSD.  Swap is assigned
40096e95c0SMatthew Dillon  * to clean vnode-backed pages in the inactive queue, clustered by object
41096e95c0SMatthew Dillon  * if possible, and written out.  The swap assignment sticks around even
42096e95c0SMatthew Dillon  * after the underlying pages have been recycled.
43096e95c0SMatthew Dillon  *
44096e95c0SMatthew Dillon  * The daemon manages write bandwidth based on sysctl settings to control
45096e95c0SMatthew Dillon  * wear on the SSD.
46096e95c0SMatthew Dillon  *
47096e95c0SMatthew Dillon  * The vnode strategy code will check for the swap assignments and divert
483ffc7051SMatthew Dillon  * reads to the swap device when the data is present in the swapcache.
49096e95c0SMatthew Dillon  *
50096e95c0SMatthew Dillon  * This operates on both regular files and the block device vnodes used by
51096e95c0SMatthew Dillon  * filesystems to manage meta-data.
52096e95c0SMatthew Dillon  */
53096e95c0SMatthew Dillon 
54096e95c0SMatthew Dillon #include "opt_vm.h"
55096e95c0SMatthew Dillon #include <sys/param.h>
56096e95c0SMatthew Dillon #include <sys/systm.h>
57096e95c0SMatthew Dillon #include <sys/kernel.h>
58096e95c0SMatthew Dillon #include <sys/proc.h>
59096e95c0SMatthew Dillon #include <sys/kthread.h>
60096e95c0SMatthew Dillon #include <sys/resourcevar.h>
61096e95c0SMatthew Dillon #include <sys/signalvar.h>
62096e95c0SMatthew Dillon #include <sys/vnode.h>
63096e95c0SMatthew Dillon #include <sys/vmmeter.h>
64096e95c0SMatthew Dillon #include <sys/sysctl.h>
65497524bfSMatthew Dillon #include <sys/eventhandler.h>
66096e95c0SMatthew Dillon 
67096e95c0SMatthew Dillon #include <vm/vm.h>
68096e95c0SMatthew Dillon #include <vm/vm_param.h>
69096e95c0SMatthew Dillon #include <sys/lock.h>
70096e95c0SMatthew Dillon #include <vm/vm_object.h>
71096e95c0SMatthew Dillon #include <vm/vm_page.h>
72096e95c0SMatthew Dillon #include <vm/vm_map.h>
73096e95c0SMatthew Dillon #include <vm/vm_pageout.h>
74096e95c0SMatthew Dillon #include <vm/vm_pager.h>
75096e95c0SMatthew Dillon #include <vm/swap_pager.h>
76096e95c0SMatthew Dillon #include <vm/vm_extern.h>
77096e95c0SMatthew Dillon 
78096e95c0SMatthew Dillon #include <sys/thread2.h>
79b12defdcSMatthew Dillon #include <sys/spinlock2.h>
80096e95c0SMatthew Dillon #include <vm/vm_page2.h>
81096e95c0SMatthew Dillon 
82096e95c0SMatthew Dillon /* the kernel process "vm_pageout"*/
83aabd5ce8SMatthew Dillon static int vm_swapcached_flush (vm_page_t m, int isblkdev);
843ffc7051SMatthew Dillon static int vm_swapcache_test(vm_page_t m);
8500a3fdcaSMatthew Dillon static void vm_swapcache_writing(vm_page_t marker);
8600a3fdcaSMatthew Dillon static void vm_swapcache_cleaning(vm_object_t marker);
87f5f6d247SMatthew Dillon static void vm_swapcache_movemarker(vm_object_t marker, vm_object_t object);
88096e95c0SMatthew Dillon struct thread *swapcached_thread;
89096e95c0SMatthew Dillon 
90096e95c0SMatthew Dillon SYSCTL_NODE(_vm, OID_AUTO, swapcache, CTLFLAG_RW, NULL, NULL);
91096e95c0SMatthew Dillon 
92c504e38eSMatthew Dillon int vm_swapcache_read_enable;
93e527fb6bSMatthew Dillon int vm_swapcache_inactive_heuristic;
94096e95c0SMatthew Dillon static int vm_swapcache_sleep;
95*0bf81261SMatthew Dillon static int vm_swapcache_maxscan = 256 * 4;
961e5196f0SMatthew Dillon static int vm_swapcache_maxlaunder = 256;
97096e95c0SMatthew Dillon static int vm_swapcache_data_enable = 0;
98096e95c0SMatthew Dillon static int vm_swapcache_meta_enable = 0;
99e9b56058SMatthew Dillon static int vm_swapcache_maxswappct = 75;
100e527fb6bSMatthew Dillon static int vm_swapcache_hysteresis;
101bfa86281SMatthew Dillon int vm_swapcache_use_chflags = 1;	/* require chflags cache */
1023ffc7051SMatthew Dillon static int64_t vm_swapcache_minburst = 10000000LL;	/* 10MB */
1033ffc7051SMatthew Dillon static int64_t vm_swapcache_curburst = 4000000000LL;	/* 4G after boot */
1043ffc7051SMatthew Dillon static int64_t vm_swapcache_maxburst = 2000000000LL;	/* 2G nominal max */
1053ffc7051SMatthew Dillon static int64_t vm_swapcache_accrate = 100000LL;		/* 100K/s */
106096e95c0SMatthew Dillon static int64_t vm_swapcache_write_count;
1073ffc7051SMatthew Dillon static int64_t vm_swapcache_maxfilesize;
108f5f6d247SMatthew Dillon static int64_t vm_swapcache_cleanperobj = 16*1024*1024;
109096e95c0SMatthew Dillon 
110096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxlaunder,
111096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxlaunder, 0, "");
112*0bf81261SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxscan,
113*0bf81261SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxscan, 0, "");
114c504e38eSMatthew Dillon 
115096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, data_enable,
116096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_data_enable, 0, "");
117096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, meta_enable,
118096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_meta_enable, 0, "");
119c504e38eSMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, read_enable,
120c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_read_enable, 0, "");
121e9b56058SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxswappct,
122e9b56058SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxswappct, 0, "");
123e527fb6bSMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, hysteresis,
124e527fb6bSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_hysteresis, 0, "");
125e9b56058SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, use_chflags,
126e9b56058SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_use_chflags, 0, "");
127c504e38eSMatthew Dillon 
1283ffc7051SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, minburst,
1293ffc7051SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_minburst, 0, "");
130c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, curburst,
131c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_curburst, 0, "");
132c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, maxburst,
133c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxburst, 0, "");
1343ffc7051SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, maxfilesize,
1353ffc7051SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxfilesize, 0, "");
136c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, accrate,
137c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_accrate, 0, "");
138096e95c0SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, write_count,
139096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_write_count, 0, "");
140f5f6d247SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, cleanperobj,
141f5f6d247SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_cleanperobj, 0, "");
142096e95c0SMatthew Dillon 
143e9b56058SMatthew Dillon #define SWAPMAX(adj)	\
144e9b56058SMatthew Dillon 	((int64_t)vm_swap_max * (vm_swapcache_maxswappct + (adj)) / 100)
145e9b56058SMatthew Dillon 
146096e95c0SMatthew Dillon /*
147497524bfSMatthew Dillon  * When shutting down the machine we want to stop swapcache operation
148497524bfSMatthew Dillon  * immediately so swap is not accessed after devices have been shuttered.
149497524bfSMatthew Dillon  */
150497524bfSMatthew Dillon static void
151497524bfSMatthew Dillon shutdown_swapcache(void *arg __unused)
152497524bfSMatthew Dillon {
153497524bfSMatthew Dillon 	vm_swapcache_read_enable = 0;
154497524bfSMatthew Dillon 	vm_swapcache_data_enable = 0;
155497524bfSMatthew Dillon 	vm_swapcache_meta_enable = 0;
156497524bfSMatthew Dillon 	wakeup(&vm_swapcache_sleep);	/* shortcut 5-second wait */
157497524bfSMatthew Dillon }
158497524bfSMatthew Dillon 
159497524bfSMatthew Dillon /*
160096e95c0SMatthew Dillon  * vm_swapcached is the high level pageout daemon.
1618e7c4729SMatthew Dillon  *
1628e7c4729SMatthew Dillon  * No requirements.
163096e95c0SMatthew Dillon  */
164096e95c0SMatthew Dillon static void
165cd8ab232SMatthew Dillon vm_swapcached_thread(void)
166096e95c0SMatthew Dillon {
16700a3fdcaSMatthew Dillon 	enum { SWAPC_WRITING, SWAPC_CLEANING } state = SWAPC_WRITING;
1683ffc7051SMatthew Dillon 	enum { SWAPB_BURSTING, SWAPB_RECOVERING } burst = SWAPB_BURSTING;
16951c99c61SMatthew Dillon 	static struct vm_page page_marker[PQ_L2_SIZE];
170027193ebSMatthew Dillon 	static struct vm_object object_marker;
171027193ebSMatthew Dillon 	int q;
172096e95c0SMatthew Dillon 
173096e95c0SMatthew Dillon 	/*
174096e95c0SMatthew Dillon 	 * Thread setup
175096e95c0SMatthew Dillon 	 */
176096e95c0SMatthew Dillon 	curthread->td_flags |= TDF_SYSTHREAD;
177497524bfSMatthew Dillon 	EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc,
178497524bfSMatthew Dillon 			      swapcached_thread, SHUTDOWN_PRI_FIRST);
179497524bfSMatthew Dillon 	EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_swapcache,
180497524bfSMatthew Dillon 			      NULL, SHUTDOWN_PRI_SECOND);
181096e95c0SMatthew Dillon 
182096e95c0SMatthew Dillon 	/*
18300a3fdcaSMatthew Dillon 	 * Initialize our marker for the inactive scan (SWAPC_WRITING)
184096e95c0SMatthew Dillon 	 */
18500a3fdcaSMatthew Dillon 	bzero(&page_marker, sizeof(page_marker));
18651c99c61SMatthew Dillon 	for (q = 0; q < PQ_L2_SIZE; ++q) {
187027193ebSMatthew Dillon 		page_marker[q].flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
188027193ebSMatthew Dillon 		page_marker[q].queue = PQ_INACTIVE + q;
189027193ebSMatthew Dillon 		page_marker[q].pc = q;
190027193ebSMatthew Dillon 		page_marker[q].wire_count = 1;
191027193ebSMatthew Dillon 		vm_page_queues_spin_lock(PQ_INACTIVE + q);
192027193ebSMatthew Dillon 		TAILQ_INSERT_HEAD(
193027193ebSMatthew Dillon 			&vm_page_queues[PQ_INACTIVE + q].pl,
194027193ebSMatthew Dillon 			&page_marker[q], pageq);
195027193ebSMatthew Dillon 		vm_page_queues_spin_unlock(PQ_INACTIVE + q);
196027193ebSMatthew Dillon 	}
197b12defdcSMatthew Dillon 
198e527fb6bSMatthew Dillon 	vm_swapcache_hysteresis = vmstats.v_inactive_target / 2;
199e527fb6bSMatthew Dillon 	vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
200096e95c0SMatthew Dillon 
20100a3fdcaSMatthew Dillon 	/*
20200a3fdcaSMatthew Dillon 	 * Initialize our marker for the vm_object scan (SWAPC_CLEANING)
20300a3fdcaSMatthew Dillon 	 */
20400a3fdcaSMatthew Dillon 	bzero(&object_marker, sizeof(object_marker));
20500a3fdcaSMatthew Dillon 	object_marker.type = OBJT_MARKER;
2062de4f77eSMatthew Dillon 	lwkt_gettoken(&vmobj_token);
20700a3fdcaSMatthew Dillon 	TAILQ_INSERT_HEAD(&vm_object_list, &object_marker, object_list);
2082de4f77eSMatthew Dillon 	lwkt_reltoken(&vmobj_token);
209096e95c0SMatthew Dillon 
210096e95c0SMatthew Dillon 	for (;;) {
211096e95c0SMatthew Dillon 		/*
212497524bfSMatthew Dillon 		 * Handle shutdown
213497524bfSMatthew Dillon 		 */
214497524bfSMatthew Dillon 		kproc_suspend_loop();
215497524bfSMatthew Dillon 
216497524bfSMatthew Dillon 		/*
2173da46bd7SMatthew Dillon 		 * Check every 5 seconds when not enabled or if no swap
2183da46bd7SMatthew Dillon 		 * is present.
219096e95c0SMatthew Dillon 		 */
2203da46bd7SMatthew Dillon 		if ((vm_swapcache_data_enable == 0 &&
2213da46bd7SMatthew Dillon 		     vm_swapcache_meta_enable == 0) ||
2223da46bd7SMatthew Dillon 		    vm_swap_max == 0) {
223096e95c0SMatthew Dillon 			tsleep(&vm_swapcache_sleep, 0, "csleep", hz * 5);
224096e95c0SMatthew Dillon 			continue;
225096e95c0SMatthew Dillon 		}
226c504e38eSMatthew Dillon 
227c504e38eSMatthew Dillon 		/*
2283da46bd7SMatthew Dillon 		 * Polling rate when enabled is approximately 10 hz.
229c504e38eSMatthew Dillon 		 */
230c504e38eSMatthew Dillon 		tsleep(&vm_swapcache_sleep, 0, "csleep", hz / 10);
23100a3fdcaSMatthew Dillon 
23200a3fdcaSMatthew Dillon 		/*
23300a3fdcaSMatthew Dillon 		 * State hysteresis.  Generate write activity up to 75% of
23400a3fdcaSMatthew Dillon 		 * swap, then clean out swap assignments down to 70%, then
23500a3fdcaSMatthew Dillon 		 * repeat.
23600a3fdcaSMatthew Dillon 		 */
23700a3fdcaSMatthew Dillon 		if (state == SWAPC_WRITING) {
238e9b56058SMatthew Dillon 			if (vm_swap_cache_use > SWAPMAX(0))
23900a3fdcaSMatthew Dillon 				state = SWAPC_CLEANING;
24000a3fdcaSMatthew Dillon 		} else {
24108fb7a9dSMatthew Dillon 			if (vm_swap_cache_use < SWAPMAX(-10))
24200a3fdcaSMatthew Dillon 				state = SWAPC_WRITING;
24300a3fdcaSMatthew Dillon 		}
24400a3fdcaSMatthew Dillon 
24500a3fdcaSMatthew Dillon 		/*
24600a3fdcaSMatthew Dillon 		 * We are allowed to continue accumulating burst value
2473ffc7051SMatthew Dillon 		 * in either state.  Allow the user to set curburst > maxburst
2483ffc7051SMatthew Dillon 		 * for the initial load-in.
24900a3fdcaSMatthew Dillon 		 */
2503ffc7051SMatthew Dillon 		if (vm_swapcache_curburst < vm_swapcache_maxburst) {
251c504e38eSMatthew Dillon 			vm_swapcache_curburst += vm_swapcache_accrate / 10;
252c504e38eSMatthew Dillon 			if (vm_swapcache_curburst > vm_swapcache_maxburst)
253c504e38eSMatthew Dillon 				vm_swapcache_curburst = vm_swapcache_maxburst;
2543ffc7051SMatthew Dillon 		}
255c504e38eSMatthew Dillon 
256c504e38eSMatthew Dillon 		/*
25700a3fdcaSMatthew Dillon 		 * We don't want to nickle-and-dime the scan as that will
25800a3fdcaSMatthew Dillon 		 * create unnecessary fragmentation.  The minimum burst
25900a3fdcaSMatthew Dillon 		 * is one-seconds worth of accumulation.
260c504e38eSMatthew Dillon 		 */
26100a3fdcaSMatthew Dillon 		if (state == SWAPC_WRITING) {
2623ffc7051SMatthew Dillon 			if (vm_swapcache_curburst >= vm_swapcache_accrate) {
2633ffc7051SMatthew Dillon 				if (burst == SWAPB_BURSTING) {
26451c99c61SMatthew Dillon 					for (q = 0; q < PQ_L2_SIZE; ++q) {
265027193ebSMatthew Dillon 						vm_swapcache_writing(
266027193ebSMatthew Dillon 							&page_marker[q]);
267027193ebSMatthew Dillon 					}
2683ffc7051SMatthew Dillon 					if (vm_swapcache_curburst <= 0)
2693ffc7051SMatthew Dillon 						burst = SWAPB_RECOVERING;
2703ffc7051SMatthew Dillon 				} else if (vm_swapcache_curburst >
2713ffc7051SMatthew Dillon 					   vm_swapcache_minburst) {
27251c99c61SMatthew Dillon 					for (q = 0; q < PQ_L2_SIZE; ++q) {
273027193ebSMatthew Dillon 						vm_swapcache_writing(
274027193ebSMatthew Dillon 							&page_marker[q]);
275027193ebSMatthew Dillon 					}
2763ffc7051SMatthew Dillon 					burst = SWAPB_BURSTING;
2773ffc7051SMatthew Dillon 				}
2783ffc7051SMatthew Dillon 			}
27900a3fdcaSMatthew Dillon 		} else {
28000a3fdcaSMatthew Dillon 			vm_swapcache_cleaning(&object_marker);
28100a3fdcaSMatthew Dillon 		}
28200a3fdcaSMatthew Dillon 	}
283eccc8ca1SMatthew Dillon 
284eccc8ca1SMatthew Dillon 	/*
285eccc8ca1SMatthew Dillon 	 * Cleanup (NOT REACHED)
286eccc8ca1SMatthew Dillon 	 */
28751c99c61SMatthew Dillon 	for (q = 0; q < PQ_L2_SIZE; ++q) {
288027193ebSMatthew Dillon 		vm_page_queues_spin_lock(PQ_INACTIVE + q);
289027193ebSMatthew Dillon 		TAILQ_REMOVE(
290027193ebSMatthew Dillon 			&vm_page_queues[PQ_INACTIVE + q].pl,
291027193ebSMatthew Dillon 			&page_marker[q], pageq);
292027193ebSMatthew Dillon 		vm_page_queues_spin_unlock(PQ_INACTIVE + q);
293027193ebSMatthew Dillon 	}
294eccc8ca1SMatthew Dillon 
2952de4f77eSMatthew Dillon 	lwkt_gettoken(&vmobj_token);
29600a3fdcaSMatthew Dillon 	TAILQ_REMOVE(&vm_object_list, &object_marker, object_list);
2972de4f77eSMatthew Dillon 	lwkt_reltoken(&vmobj_token);
29800a3fdcaSMatthew Dillon }
299096e95c0SMatthew Dillon 
300cd8ab232SMatthew Dillon static struct kproc_desc swpc_kp = {
301cd8ab232SMatthew Dillon 	"swapcached",
302cd8ab232SMatthew Dillon 	vm_swapcached_thread,
303cd8ab232SMatthew Dillon 	&swapcached_thread
304cd8ab232SMatthew Dillon };
305cd8ab232SMatthew Dillon SYSINIT(swapcached, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &swpc_kp)
306cd8ab232SMatthew Dillon 
30700a3fdcaSMatthew Dillon static void
30800a3fdcaSMatthew Dillon vm_swapcache_writing(vm_page_t marker)
30900a3fdcaSMatthew Dillon {
31000a3fdcaSMatthew Dillon 	vm_object_t object;
31100a3fdcaSMatthew Dillon 	struct vnode *vp;
31200a3fdcaSMatthew Dillon 	vm_page_t m;
31300a3fdcaSMatthew Dillon 	int count;
314*0bf81261SMatthew Dillon 	int scount;
315aabd5ce8SMatthew Dillon 	int isblkdev;
316096e95c0SMatthew Dillon 
317096e95c0SMatthew Dillon 	/*
318fdc53cc7SMatthew Dillon 	 * Deal with an overflow of the heuristic counter or if the user
319fdc53cc7SMatthew Dillon 	 * manually changes the hysteresis.
320fdc53cc7SMatthew Dillon 	 *
321e527fb6bSMatthew Dillon 	 * Try to avoid small incremental pageouts by waiting for enough
322e527fb6bSMatthew Dillon 	 * pages to buildup in the inactive queue to hopefully get a good
323e527fb6bSMatthew Dillon 	 * burst in.  This heuristic is bumped by the VM system and reset
324e527fb6bSMatthew Dillon 	 * when our scan hits the end of the queue.
325e527fb6bSMatthew Dillon 	 */
326fdc53cc7SMatthew Dillon 	if (vm_swapcache_inactive_heuristic < -vm_swapcache_hysteresis)
327fdc53cc7SMatthew Dillon 		vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
328e527fb6bSMatthew Dillon 	if (vm_swapcache_inactive_heuristic < 0)
329e527fb6bSMatthew Dillon 		return;
330e527fb6bSMatthew Dillon 
331e527fb6bSMatthew Dillon 	/*
332096e95c0SMatthew Dillon 	 * Scan the inactive queue from our marker to locate
333096e95c0SMatthew Dillon 	 * suitable pages to push to the swap cache.
334096e95c0SMatthew Dillon 	 *
335096e95c0SMatthew Dillon 	 * We are looking for clean vnode-backed pages.
336096e95c0SMatthew Dillon 	 */
33700a3fdcaSMatthew Dillon 	count = vm_swapcache_maxlaunder;
338*0bf81261SMatthew Dillon 	scount = vm_swapcache_maxscan;
33900a3fdcaSMatthew Dillon 
340027193ebSMatthew Dillon 	vm_page_queues_spin_lock(marker->queue);
341*0bf81261SMatthew Dillon 	while ((m = TAILQ_NEXT(marker, pageq)) != NULL &&
342*0bf81261SMatthew Dillon 	       count > 0 && scount-- > 0) {
343027193ebSMatthew Dillon 		KKASSERT(m->queue == marker->queue);
344b12defdcSMatthew Dillon 
345b12defdcSMatthew Dillon 		if (vm_swapcache_curburst < 0)
346b12defdcSMatthew Dillon 			break;
347027193ebSMatthew Dillon 		TAILQ_REMOVE(
348027193ebSMatthew Dillon 			&vm_page_queues[marker->queue].pl, marker, pageq);
349027193ebSMatthew Dillon 		TAILQ_INSERT_AFTER(
350027193ebSMatthew Dillon 			&vm_page_queues[marker->queue].pl, m, marker, pageq);
351f5f6d247SMatthew Dillon 
352f5f6d247SMatthew Dillon 		/*
353f5f6d247SMatthew Dillon 		 * Ignore markers and ignore pages that already have a swap
354f5f6d247SMatthew Dillon 		 * assignment.
355f5f6d247SMatthew Dillon 		 */
356*0bf81261SMatthew Dillon 		if (m->flags & (PG_MARKER | PG_SWAPPED))
357096e95c0SMatthew Dillon 			continue;
358b12defdcSMatthew Dillon 		if (vm_page_busy_try(m, TRUE))
359096e95c0SMatthew Dillon 			continue;
360027193ebSMatthew Dillon 		vm_page_queues_spin_unlock(marker->queue);
361b12defdcSMatthew Dillon 
362b12defdcSMatthew Dillon 		if ((object = m->object) == NULL) {
363b12defdcSMatthew Dillon 			vm_page_wakeup(m);
364027193ebSMatthew Dillon 			vm_page_queues_spin_lock(marker->queue);
365b12defdcSMatthew Dillon 			continue;
366b12defdcSMatthew Dillon 		}
367b12defdcSMatthew Dillon 		vm_object_hold(object);
368b12defdcSMatthew Dillon 		if (m->object != object) {
369b12defdcSMatthew Dillon 			vm_object_drop(object);
370b12defdcSMatthew Dillon 			vm_page_wakeup(m);
371027193ebSMatthew Dillon 			vm_page_queues_spin_lock(marker->queue);
372b12defdcSMatthew Dillon 			continue;
373b12defdcSMatthew Dillon 		}
374b12defdcSMatthew Dillon 		if (vm_swapcache_test(m)) {
375b12defdcSMatthew Dillon 			vm_object_drop(object);
376b12defdcSMatthew Dillon 			vm_page_wakeup(m);
377027193ebSMatthew Dillon 			vm_page_queues_spin_lock(marker->queue);
378b12defdcSMatthew Dillon 			continue;
379b12defdcSMatthew Dillon 		}
380b12defdcSMatthew Dillon 
381c504e38eSMatthew Dillon 		vp = object->handle;
382b12defdcSMatthew Dillon 		if (vp == NULL) {
383b12defdcSMatthew Dillon 			vm_object_drop(object);
384b12defdcSMatthew Dillon 			vm_page_wakeup(m);
385027193ebSMatthew Dillon 			vm_page_queues_spin_lock(marker->queue);
386c504e38eSMatthew Dillon 			continue;
387b12defdcSMatthew Dillon 		}
388d3070b8dSMatthew Dillon 
389c504e38eSMatthew Dillon 		switch(vp->v_type) {
390c504e38eSMatthew Dillon 		case VREG:
391e9b56058SMatthew Dillon 			/*
392bfa86281SMatthew Dillon 			 * PG_NOTMETA generically means 'don't swapcache this',
393bfa86281SMatthew Dillon 			 * and HAMMER will set this for regular data buffers
394bfa86281SMatthew Dillon 			 * (and leave it unset for meta-data buffers) as
395bfa86281SMatthew Dillon 			 * appropriate when double buffering is enabled.
396bfa86281SMatthew Dillon 			 */
397b12defdcSMatthew Dillon 			if (m->flags & PG_NOTMETA) {
398b12defdcSMatthew Dillon 				vm_object_drop(object);
399b12defdcSMatthew Dillon 				vm_page_wakeup(m);
400027193ebSMatthew Dillon 				vm_page_queues_spin_lock(marker->queue);
401bfa86281SMatthew Dillon 				continue;
402b12defdcSMatthew Dillon 			}
403bfa86281SMatthew Dillon 
404bfa86281SMatthew Dillon 			/*
405e9b56058SMatthew Dillon 			 * If data_enable is 0 do not try to swapcache data.
406e9b56058SMatthew Dillon 			 * If use_chflags is set then only swapcache data for
407e9b56058SMatthew Dillon 			 * VSWAPCACHE marked vnodes, otherwise any vnode.
408e9b56058SMatthew Dillon 			 */
409e9b56058SMatthew Dillon 			if (vm_swapcache_data_enable == 0 ||
410e9b56058SMatthew Dillon 			    ((vp->v_flag & VSWAPCACHE) == 0 &&
411e9b56058SMatthew Dillon 			     vm_swapcache_use_chflags)) {
412b12defdcSMatthew Dillon 				vm_object_drop(object);
413b12defdcSMatthew Dillon 				vm_page_wakeup(m);
414027193ebSMatthew Dillon 				vm_page_queues_spin_lock(marker->queue);
415c504e38eSMatthew Dillon 				continue;
416e9b56058SMatthew Dillon 			}
417d3070b8dSMatthew Dillon 			if (vm_swapcache_maxfilesize &&
418d3070b8dSMatthew Dillon 			    object->size >
419d3070b8dSMatthew Dillon 			    (vm_swapcache_maxfilesize >> PAGE_SHIFT)) {
420b12defdcSMatthew Dillon 				vm_object_drop(object);
421b12defdcSMatthew Dillon 				vm_page_wakeup(m);
422027193ebSMatthew Dillon 				vm_page_queues_spin_lock(marker->queue);
423d3070b8dSMatthew Dillon 				continue;
424d3070b8dSMatthew Dillon 			}
425aabd5ce8SMatthew Dillon 			isblkdev = 0;
426c504e38eSMatthew Dillon 			break;
427c504e38eSMatthew Dillon 		case VCHR:
428aabd5ce8SMatthew Dillon 			/*
429bfa86281SMatthew Dillon 			 * PG_NOTMETA generically means 'don't swapcache this',
430bfa86281SMatthew Dillon 			 * and HAMMER will set this for regular data buffers
431bfa86281SMatthew Dillon 			 * (and leave it unset for meta-data buffers) as
432bfa86281SMatthew Dillon 			 * appropriate when double buffering is enabled.
433aabd5ce8SMatthew Dillon 			 */
434b12defdcSMatthew Dillon 			if (m->flags & PG_NOTMETA) {
435b12defdcSMatthew Dillon 				vm_object_drop(object);
436b12defdcSMatthew Dillon 				vm_page_wakeup(m);
437027193ebSMatthew Dillon 				vm_page_queues_spin_lock(marker->queue);
438aabd5ce8SMatthew Dillon 				continue;
439b12defdcSMatthew Dillon 			}
440b12defdcSMatthew Dillon 			if (vm_swapcache_meta_enable == 0) {
441b12defdcSMatthew Dillon 				vm_object_drop(object);
442b12defdcSMatthew Dillon 				vm_page_wakeup(m);
443027193ebSMatthew Dillon 				vm_page_queues_spin_lock(marker->queue);
444c504e38eSMatthew Dillon 				continue;
445b12defdcSMatthew Dillon 			}
446aabd5ce8SMatthew Dillon 			isblkdev = 1;
447c504e38eSMatthew Dillon 			break;
448c504e38eSMatthew Dillon 		default:
449b12defdcSMatthew Dillon 			vm_object_drop(object);
450b12defdcSMatthew Dillon 			vm_page_wakeup(m);
451027193ebSMatthew Dillon 			vm_page_queues_spin_lock(marker->queue);
452c504e38eSMatthew Dillon 			continue;
453c504e38eSMatthew Dillon 		}
454096e95c0SMatthew Dillon 
455096e95c0SMatthew Dillon 
456096e95c0SMatthew Dillon 		/*
4573ffc7051SMatthew Dillon 		 * Assign swap and initiate I/O.
4583ffc7051SMatthew Dillon 		 *
4593ffc7051SMatthew Dillon 		 * (adjust for the --count which also occurs in the loop)
460096e95c0SMatthew Dillon 		 */
461*0bf81261SMatthew Dillon 		count -= vm_swapcached_flush(m, isblkdev);
462096e95c0SMatthew Dillon 
463096e95c0SMatthew Dillon 		/*
464096e95c0SMatthew Dillon 		 * Setup for next loop using marker.
465096e95c0SMatthew Dillon 		 */
466b12defdcSMatthew Dillon 		vm_object_drop(object);
467027193ebSMatthew Dillon 		vm_page_queues_spin_lock(marker->queue);
468096e95c0SMatthew Dillon 	}
4691e5196f0SMatthew Dillon 
4701e5196f0SMatthew Dillon 	/*
471b12defdcSMatthew Dillon 	 * The marker could wind up at the end, which is ok.  If we hit the
472b12defdcSMatthew Dillon 	 * end of the list adjust the heuristic.
4731e5196f0SMatthew Dillon 	 *
4741e5196f0SMatthew Dillon 	 * Earlier inactive pages that were dirty and become clean
4751e5196f0SMatthew Dillon 	 * are typically moved to the end of PQ_INACTIVE by virtue
4761e5196f0SMatthew Dillon 	 * of vfs_vmio_release() when they become unwired from the
4771e5196f0SMatthew Dillon 	 * buffer cache.
4781e5196f0SMatthew Dillon 	 */
479b12defdcSMatthew Dillon 	if (m == NULL)
480e527fb6bSMatthew Dillon 		vm_swapcache_inactive_heuristic = -vm_swapcache_hysteresis;
481027193ebSMatthew Dillon 	vm_page_queues_spin_unlock(marker->queue);
482096e95c0SMatthew Dillon }
483096e95c0SMatthew Dillon 
484096e95c0SMatthew Dillon /*
485b12defdcSMatthew Dillon  * Flush the specified page using the swap_pager.  The page
486b12defdcSMatthew Dillon  * must be busied by the caller and its disposition will become
487b12defdcSMatthew Dillon  * the responsibility of this function.
4883ffc7051SMatthew Dillon  *
4893ffc7051SMatthew Dillon  * Try to collect surrounding pages, including pages which may
4903ffc7051SMatthew Dillon  * have already been assigned swap.  Try to cluster within a
4913ffc7051SMatthew Dillon  * contiguous aligned SMAP_META_PAGES (typ 16 x PAGE_SIZE) block
4923ffc7051SMatthew Dillon  * to match what swap_pager_putpages() can do.
4933ffc7051SMatthew Dillon  *
4943ffc7051SMatthew Dillon  * We also want to try to match against the buffer cache blocksize
4953ffc7051SMatthew Dillon  * but we don't really know what it is here.  Since the buffer cache
4963ffc7051SMatthew Dillon  * wires and unwires pages in groups the fact that we skip wired pages
4973ffc7051SMatthew Dillon  * should be sufficient.
4983ffc7051SMatthew Dillon  *
4993ffc7051SMatthew Dillon  * Returns a count of pages we might have flushed (minimum 1)
500096e95c0SMatthew Dillon  */
501096e95c0SMatthew Dillon static
5023ffc7051SMatthew Dillon int
503aabd5ce8SMatthew Dillon vm_swapcached_flush(vm_page_t m, int isblkdev)
504096e95c0SMatthew Dillon {
505096e95c0SMatthew Dillon 	vm_object_t object;
5063ffc7051SMatthew Dillon 	vm_page_t marray[SWAP_META_PAGES];
5073ffc7051SMatthew Dillon 	vm_pindex_t basei;
5083ffc7051SMatthew Dillon 	int rtvals[SWAP_META_PAGES];
5093ffc7051SMatthew Dillon 	int x;
5103ffc7051SMatthew Dillon 	int i;
5113ffc7051SMatthew Dillon 	int j;
5123ffc7051SMatthew Dillon 	int count;
513b12defdcSMatthew Dillon 	int error;
514096e95c0SMatthew Dillon 
515096e95c0SMatthew Dillon 	vm_page_io_start(m);
516096e95c0SMatthew Dillon 	vm_page_protect(m, VM_PROT_READ);
517096e95c0SMatthew Dillon 	object = m->object;
518b12defdcSMatthew Dillon 	vm_object_hold(object);
519096e95c0SMatthew Dillon 
5203ffc7051SMatthew Dillon 	/*
5213ffc7051SMatthew Dillon 	 * Try to cluster around (m), keeping in mind that the swap pager
5223ffc7051SMatthew Dillon 	 * can only do SMAP_META_PAGES worth of continguous write.
5233ffc7051SMatthew Dillon 	 */
5243ffc7051SMatthew Dillon 	x = (int)m->pindex & SWAP_META_MASK;
5253ffc7051SMatthew Dillon 	marray[x] = m;
5263ffc7051SMatthew Dillon 	basei = m->pindex;
527b12defdcSMatthew Dillon 	vm_page_wakeup(m);
5283ffc7051SMatthew Dillon 
5293ffc7051SMatthew Dillon 	for (i = x - 1; i >= 0; --i) {
530b12defdcSMatthew Dillon 		m = vm_page_lookup_busy_try(object, basei - x + i,
531b12defdcSMatthew Dillon 					    TRUE, &error);
532b12defdcSMatthew Dillon 		if (error || m == NULL)
5333ffc7051SMatthew Dillon 			break;
534b12defdcSMatthew Dillon 		if (vm_swapcache_test(m)) {
535b12defdcSMatthew Dillon 			vm_page_wakeup(m);
5363ffc7051SMatthew Dillon 			break;
537b12defdcSMatthew Dillon 		}
538b12defdcSMatthew Dillon 		if (isblkdev && (m->flags & PG_NOTMETA)) {
539b12defdcSMatthew Dillon 			vm_page_wakeup(m);
540aabd5ce8SMatthew Dillon 			break;
541b12defdcSMatthew Dillon 		}
5423ffc7051SMatthew Dillon 		vm_page_io_start(m);
5433ffc7051SMatthew Dillon 		vm_page_protect(m, VM_PROT_READ);
5443ffc7051SMatthew Dillon 		if (m->queue - m->pc == PQ_CACHE) {
5453ffc7051SMatthew Dillon 			vm_page_unqueue_nowakeup(m);
5463ffc7051SMatthew Dillon 			vm_page_deactivate(m);
5473ffc7051SMatthew Dillon 		}
5483ffc7051SMatthew Dillon 		marray[i] = m;
549b12defdcSMatthew Dillon 		vm_page_wakeup(m);
5503ffc7051SMatthew Dillon 	}
5513ffc7051SMatthew Dillon 	++i;
5523ffc7051SMatthew Dillon 
5533ffc7051SMatthew Dillon 	for (j = x + 1; j < SWAP_META_PAGES; ++j) {
554b12defdcSMatthew Dillon 		m = vm_page_lookup_busy_try(object, basei - x + j,
555b12defdcSMatthew Dillon 					    TRUE, &error);
556b12defdcSMatthew Dillon 		if (error || m == NULL)
5573ffc7051SMatthew Dillon 			break;
558b12defdcSMatthew Dillon 		if (vm_swapcache_test(m)) {
559b12defdcSMatthew Dillon 			vm_page_wakeup(m);
5603ffc7051SMatthew Dillon 			break;
561b12defdcSMatthew Dillon 		}
562b12defdcSMatthew Dillon 		if (isblkdev && (m->flags & PG_NOTMETA)) {
563b12defdcSMatthew Dillon 			vm_page_wakeup(m);
564aabd5ce8SMatthew Dillon 			break;
565b12defdcSMatthew Dillon 		}
5663ffc7051SMatthew Dillon 		vm_page_io_start(m);
5673ffc7051SMatthew Dillon 		vm_page_protect(m, VM_PROT_READ);
5683ffc7051SMatthew Dillon 		if (m->queue - m->pc == PQ_CACHE) {
5693ffc7051SMatthew Dillon 			vm_page_unqueue_nowakeup(m);
5703ffc7051SMatthew Dillon 			vm_page_deactivate(m);
5713ffc7051SMatthew Dillon 		}
5723ffc7051SMatthew Dillon 		marray[j] = m;
573b12defdcSMatthew Dillon 		vm_page_wakeup(m);
5743ffc7051SMatthew Dillon 	}
5753ffc7051SMatthew Dillon 
5763ffc7051SMatthew Dillon 	count = j - i;
5773ffc7051SMatthew Dillon 	vm_object_pip_add(object, count);
5783ffc7051SMatthew Dillon 	swap_pager_putpages(object, marray + i, count, FALSE, rtvals + i);
5793ffc7051SMatthew Dillon 	vm_swapcache_write_count += count * PAGE_SIZE;
5803ffc7051SMatthew Dillon 	vm_swapcache_curburst -= count * PAGE_SIZE;
5813ffc7051SMatthew Dillon 
5823ffc7051SMatthew Dillon 	while (i < j) {
5833ffc7051SMatthew Dillon 		if (rtvals[i] != VM_PAGER_PEND) {
584b12defdcSMatthew Dillon 			vm_page_busy_wait(marray[i], FALSE, "swppgfd");
5853ffc7051SMatthew Dillon 			vm_page_io_finish(marray[i]);
586b12defdcSMatthew Dillon 			vm_page_wakeup(marray[i]);
587096e95c0SMatthew Dillon 			vm_object_pip_wakeup(object);
588096e95c0SMatthew Dillon 		}
5893ffc7051SMatthew Dillon 		++i;
5903ffc7051SMatthew Dillon 	}
591b12defdcSMatthew Dillon 	vm_object_drop(object);
5923ffc7051SMatthew Dillon 	return(count);
593096e95c0SMatthew Dillon }
59400a3fdcaSMatthew Dillon 
5953ffc7051SMatthew Dillon /*
5963ffc7051SMatthew Dillon  * Test whether a VM page is suitable for writing to the swapcache.
5973ffc7051SMatthew Dillon  * Does not test m->queue, PG_MARKER, or PG_SWAPPED.
5983ffc7051SMatthew Dillon  *
5993ffc7051SMatthew Dillon  * Returns 0 on success, 1 on failure
6003ffc7051SMatthew Dillon  */
6013ffc7051SMatthew Dillon static int
6023ffc7051SMatthew Dillon vm_swapcache_test(vm_page_t m)
6033ffc7051SMatthew Dillon {
6043ffc7051SMatthew Dillon 	vm_object_t object;
6053ffc7051SMatthew Dillon 
606b12defdcSMatthew Dillon 	if (m->flags & PG_UNMANAGED)
6073ffc7051SMatthew Dillon 		return(1);
608b12defdcSMatthew Dillon 	if (m->hold_count || m->wire_count)
6093ffc7051SMatthew Dillon 		return(1);
6103ffc7051SMatthew Dillon 	if (m->valid != VM_PAGE_BITS_ALL)
6113ffc7051SMatthew Dillon 		return(1);
6123ffc7051SMatthew Dillon 	if (m->dirty & m->valid)
6133ffc7051SMatthew Dillon 		return(1);
6143ffc7051SMatthew Dillon 	if ((object = m->object) == NULL)
6153ffc7051SMatthew Dillon 		return(1);
6163ffc7051SMatthew Dillon 	if (object->type != OBJT_VNODE ||
6173ffc7051SMatthew Dillon 	    (object->flags & OBJ_DEAD)) {
6183ffc7051SMatthew Dillon 		return(1);
6193ffc7051SMatthew Dillon 	}
6203ffc7051SMatthew Dillon 	vm_page_test_dirty(m);
6213ffc7051SMatthew Dillon 	if (m->dirty & m->valid)
6223ffc7051SMatthew Dillon 		return(1);
6233ffc7051SMatthew Dillon 	return(0);
6243ffc7051SMatthew Dillon }
6253ffc7051SMatthew Dillon 
6263ffc7051SMatthew Dillon /*
627f5f6d247SMatthew Dillon  * Cleaning pass.
628f5f6d247SMatthew Dillon  *
629f5f6d247SMatthew Dillon  * We clean whole objects up to 16MB
6303ffc7051SMatthew Dillon  */
63100a3fdcaSMatthew Dillon static
63200a3fdcaSMatthew Dillon void
63300a3fdcaSMatthew Dillon vm_swapcache_cleaning(vm_object_t marker)
63400a3fdcaSMatthew Dillon {
63500a3fdcaSMatthew Dillon 	vm_object_t object;
63600a3fdcaSMatthew Dillon 	struct vnode *vp;
63700a3fdcaSMatthew Dillon 	int count;
638*0bf81261SMatthew Dillon 	int scount;
63900a3fdcaSMatthew Dillon 	int n;
64000a3fdcaSMatthew Dillon 
64100a3fdcaSMatthew Dillon 	count = vm_swapcache_maxlaunder;
642*0bf81261SMatthew Dillon 	scount = vm_swapcache_maxscan;
64300a3fdcaSMatthew Dillon 
64400a3fdcaSMatthew Dillon 	/*
64500a3fdcaSMatthew Dillon 	 * Look for vnode objects
64600a3fdcaSMatthew Dillon 	 */
6472de4f77eSMatthew Dillon 	lwkt_gettoken(&vmobj_token);
6482de4f77eSMatthew Dillon 
649f5f6d247SMatthew Dillon 	while ((object = TAILQ_NEXT(marker, object_list)) != NULL) {
6502f2d9e58SVenkatesh Srinivas 		/*
651f5f6d247SMatthew Dillon 		 * We have to skip markers.  We cannot hold/drop marker
652f5f6d247SMatthew Dillon 		 * objects!
6532f2d9e58SVenkatesh Srinivas 		 */
654f5f6d247SMatthew Dillon 		if (object->type == OBJT_MARKER) {
655f5f6d247SMatthew Dillon 			vm_swapcache_movemarker(marker, object);
65600a3fdcaSMatthew Dillon 			continue;
6572f2d9e58SVenkatesh Srinivas 		}
65800a3fdcaSMatthew Dillon 
65900a3fdcaSMatthew Dillon 		/*
660f5f6d247SMatthew Dillon 		 * Safety, or in case there are millions of VM objects
661f5f6d247SMatthew Dillon 		 * without swapcache backing.
66200a3fdcaSMatthew Dillon 		 */
663*0bf81261SMatthew Dillon 		if (--scount <= 0)
664f5f6d247SMatthew Dillon 			break;
66500a3fdcaSMatthew Dillon 
66600a3fdcaSMatthew Dillon 		/*
667f5f6d247SMatthew Dillon 		 * We must hold the object before potentially yielding.
66800a3fdcaSMatthew Dillon 		 */
669f5f6d247SMatthew Dillon 		vm_object_hold(object);
670f5f6d247SMatthew Dillon 		lwkt_yield();
671f5f6d247SMatthew Dillon 
672f5f6d247SMatthew Dillon 		/*
673f5f6d247SMatthew Dillon 		 * Only operate on live VNODE objects that are either
674f5f6d247SMatthew Dillon 		 * VREG or VCHR (VCHR for meta-data).
675f5f6d247SMatthew Dillon 		 */
676f5f6d247SMatthew Dillon 		if ((object->type != OBJT_VNODE) ||
677f5f6d247SMatthew Dillon 		    ((object->flags & OBJ_DEAD) ||
678f5f6d247SMatthew Dillon 		     object->swblock_count == 0) ||
679f5f6d247SMatthew Dillon 		    ((vp = object->handle) == NULL) ||
680f5f6d247SMatthew Dillon 		    (vp->v_type != VREG && vp->v_type != VCHR)) {
681f5f6d247SMatthew Dillon 			vm_object_drop(object);
682f5f6d247SMatthew Dillon 			/* object may be invalid now */
683f5f6d247SMatthew Dillon 			vm_swapcache_movemarker(marker, object);
684f5f6d247SMatthew Dillon 			continue;
685f5f6d247SMatthew Dillon 		}
686f5f6d247SMatthew Dillon 
687f5f6d247SMatthew Dillon 		/*
688f5f6d247SMatthew Dillon 		 * Reset the object pindex stored in the marker if the
689f5f6d247SMatthew Dillon 		 * working object has changed.
690f5f6d247SMatthew Dillon 		 */
691f5f6d247SMatthew Dillon 		if (marker->backing_object != object) {
692f5f6d247SMatthew Dillon 			marker->size = 0;
693f5f6d247SMatthew Dillon 			marker->backing_object_offset = 0;
694f5f6d247SMatthew Dillon 			marker->backing_object = object;
695f5f6d247SMatthew Dillon 		}
69600a3fdcaSMatthew Dillon 
69700a3fdcaSMatthew Dillon 		/*
69800a3fdcaSMatthew Dillon 		 * Look for swblocks starting at our iterator.
69900a3fdcaSMatthew Dillon 		 *
70000a3fdcaSMatthew Dillon 		 * The swap_pager_condfree() function attempts to free
70100a3fdcaSMatthew Dillon 		 * swap space starting at the specified index.  The index
70200a3fdcaSMatthew Dillon 		 * will be updated on return.  The function will return
70300a3fdcaSMatthew Dillon 		 * a scan factor (NOT the number of blocks freed).
70400a3fdcaSMatthew Dillon 		 *
70500a3fdcaSMatthew Dillon 		 * If it must cut its scan of the object short due to an
70600a3fdcaSMatthew Dillon 		 * excessive number of swblocks, or is able to free the
70700a3fdcaSMatthew Dillon 		 * requested number of blocks, it will return n >= count
70800a3fdcaSMatthew Dillon 		 * and we break and pick it back up on a future attempt.
709f5f6d247SMatthew Dillon 		 *
710f5f6d247SMatthew Dillon 		 * Scan the object linearly and try to batch large sets of
711f5f6d247SMatthew Dillon 		 * blocks that are likely to clean out entire swap radix
712f5f6d247SMatthew Dillon 		 * tree leafs.
71300a3fdcaSMatthew Dillon 		 */
714739be60bSMatthew Dillon 		lwkt_token_swap();
71508fb7a9dSMatthew Dillon 		lwkt_reltoken(&vmobj_token);
71627b6ee03SMatthew Dillon 
717f5f6d247SMatthew Dillon 		n = swap_pager_condfree(object, &marker->size,
718f5f6d247SMatthew Dillon 				    (count + SWAP_META_MASK) & ~SWAP_META_MASK);
7192f2d9e58SVenkatesh Srinivas 
720f5f6d247SMatthew Dillon 		vm_object_drop(object);		/* object may be invalid now */
72127b6ee03SMatthew Dillon 		lwkt_gettoken(&vmobj_token);
7222f2d9e58SVenkatesh Srinivas 
72300a3fdcaSMatthew Dillon 		/*
724f5f6d247SMatthew Dillon 		 * If we have exhausted the object or deleted our per-pass
725f5f6d247SMatthew Dillon 		 * page limit then move us to the next object.  Note that
726f5f6d247SMatthew Dillon 		 * the current object may no longer be on the vm_object_list.
72700a3fdcaSMatthew Dillon 		 */
728f5f6d247SMatthew Dillon 		if (n <= 0 ||
729f5f6d247SMatthew Dillon 		    marker->backing_object_offset > vm_swapcache_cleanperobj) {
730f5f6d247SMatthew Dillon 			vm_swapcache_movemarker(marker, object);
73100a3fdcaSMatthew Dillon 		}
73200a3fdcaSMatthew Dillon 
73300a3fdcaSMatthew Dillon 		/*
734f5f6d247SMatthew Dillon 		 * If we have exhausted our max-launder stop for now.
73500a3fdcaSMatthew Dillon 		 */
736f5f6d247SMatthew Dillon 		count -= n;
737f5f6d247SMatthew Dillon 		marker->backing_object_offset += n * PAGE_SIZE;
738f5f6d247SMatthew Dillon 		if (count < 0)
739f5f6d247SMatthew Dillon 			break;
740f5f6d247SMatthew Dillon 	}
7417a175765SMatthew Dillon 
7427a175765SMatthew Dillon 	/*
7437a175765SMatthew Dillon 	 * If we wound up at the end of the list this will move the
7447a175765SMatthew Dillon 	 * marker back to the beginning.
7457a175765SMatthew Dillon 	 */
7467a175765SMatthew Dillon 	if (object == NULL)
7477a175765SMatthew Dillon 		vm_swapcache_movemarker(marker, NULL);
7487a175765SMatthew Dillon 
7492de4f77eSMatthew Dillon 	lwkt_reltoken(&vmobj_token);
75000a3fdcaSMatthew Dillon }
751f5f6d247SMatthew Dillon 
752f5f6d247SMatthew Dillon /*
753f5f6d247SMatthew Dillon  * Move the marker past the current object.  Object can be stale, but we
754f5f6d247SMatthew Dillon  * still need it to determine if the marker has to be moved.  If the object
755f5f6d247SMatthew Dillon  * is still the 'current object' (object after the marker), we hop-scotch
756f5f6d247SMatthew Dillon  * the marker past it.
757f5f6d247SMatthew Dillon  */
758f5f6d247SMatthew Dillon static void
759f5f6d247SMatthew Dillon vm_swapcache_movemarker(vm_object_t marker, vm_object_t object)
760f5f6d247SMatthew Dillon {
761f5f6d247SMatthew Dillon 	if (TAILQ_NEXT(marker, object_list) == object) {
762f5f6d247SMatthew Dillon 		TAILQ_REMOVE(&vm_object_list, marker, object_list);
763f5f6d247SMatthew Dillon 		if (object) {
764f5f6d247SMatthew Dillon 			TAILQ_INSERT_AFTER(&vm_object_list, object,
765f5f6d247SMatthew Dillon 					   marker, object_list);
766f5f6d247SMatthew Dillon 		} else {
767f5f6d247SMatthew Dillon 			TAILQ_INSERT_HEAD(&vm_object_list,
768f5f6d247SMatthew Dillon 					  marker, object_list);
769f5f6d247SMatthew Dillon 		}
770f5f6d247SMatthew Dillon 	}
771f5f6d247SMatthew Dillon }
772