xref: /dragonfly/sys/vm/vm_swapcache.c (revision c504e38e)
1096e95c0SMatthew Dillon /*
2096e95c0SMatthew Dillon  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3096e95c0SMatthew Dillon  *
4096e95c0SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
5096e95c0SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
6096e95c0SMatthew Dillon  *
7096e95c0SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
8096e95c0SMatthew Dillon  * modification, are permitted provided that the following conditions
9096e95c0SMatthew Dillon  * are met:
10096e95c0SMatthew Dillon  *
11096e95c0SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
12096e95c0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
13096e95c0SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
14096e95c0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
15096e95c0SMatthew Dillon  *    the documentation and/or other materials provided with the
16096e95c0SMatthew Dillon  *    distribution.
17096e95c0SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
18096e95c0SMatthew Dillon  *    contributors may be used to endorse or promote products derived
19096e95c0SMatthew Dillon  *    from this software without specific, prior written permission.
20096e95c0SMatthew Dillon  *
21096e95c0SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22096e95c0SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23096e95c0SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24096e95c0SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25096e95c0SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26096e95c0SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27096e95c0SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28096e95c0SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29096e95c0SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30096e95c0SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31096e95c0SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32096e95c0SMatthew Dillon  * SUCH DAMAGE.
33096e95c0SMatthew Dillon  */
34096e95c0SMatthew Dillon 
35096e95c0SMatthew Dillon /*
36096e95c0SMatthew Dillon  * Implement the swapcache daemon.  When enabled swap is assumed to be
37096e95c0SMatthew Dillon  * configured on a fast storage device such as a SSD.  Swap is assigned
38096e95c0SMatthew Dillon  * to clean vnode-backed pages in the inactive queue, clustered by object
39096e95c0SMatthew Dillon  * if possible, and written out.  The swap assignment sticks around even
40096e95c0SMatthew Dillon  * after the underlying pages have been recycled.
41096e95c0SMatthew Dillon  *
42096e95c0SMatthew Dillon  * The daemon manages write bandwidth based on sysctl settings to control
43096e95c0SMatthew Dillon  * wear on the SSD.
44096e95c0SMatthew Dillon  *
45096e95c0SMatthew Dillon  * The vnode strategy code will check for the swap assignments and divert
46096e95c0SMatthew Dillon  * reads to the swap device.
47096e95c0SMatthew Dillon  *
48096e95c0SMatthew Dillon  * This operates on both regular files and the block device vnodes used by
49096e95c0SMatthew Dillon  * filesystems to manage meta-data.
50096e95c0SMatthew Dillon  */
51096e95c0SMatthew Dillon 
52096e95c0SMatthew Dillon #include "opt_vm.h"
53096e95c0SMatthew Dillon #include <sys/param.h>
54096e95c0SMatthew Dillon #include <sys/systm.h>
55096e95c0SMatthew Dillon #include <sys/kernel.h>
56096e95c0SMatthew Dillon #include <sys/proc.h>
57096e95c0SMatthew Dillon #include <sys/kthread.h>
58096e95c0SMatthew Dillon #include <sys/resourcevar.h>
59096e95c0SMatthew Dillon #include <sys/signalvar.h>
60096e95c0SMatthew Dillon #include <sys/vnode.h>
61096e95c0SMatthew Dillon #include <sys/vmmeter.h>
62096e95c0SMatthew Dillon #include <sys/sysctl.h>
63096e95c0SMatthew Dillon 
64096e95c0SMatthew Dillon #include <vm/vm.h>
65096e95c0SMatthew Dillon #include <vm/vm_param.h>
66096e95c0SMatthew Dillon #include <sys/lock.h>
67096e95c0SMatthew Dillon #include <vm/vm_object.h>
68096e95c0SMatthew Dillon #include <vm/vm_page.h>
69096e95c0SMatthew Dillon #include <vm/vm_map.h>
70096e95c0SMatthew Dillon #include <vm/vm_pageout.h>
71096e95c0SMatthew Dillon #include <vm/vm_pager.h>
72096e95c0SMatthew Dillon #include <vm/swap_pager.h>
73096e95c0SMatthew Dillon #include <vm/vm_extern.h>
74096e95c0SMatthew Dillon 
75096e95c0SMatthew Dillon #include <sys/thread2.h>
76096e95c0SMatthew Dillon #include <vm/vm_page2.h>
77096e95c0SMatthew Dillon 
78096e95c0SMatthew Dillon #define INACTIVE_LIST	(&vm_page_queues[PQ_INACTIVE].pl)
79096e95c0SMatthew Dillon 
80096e95c0SMatthew Dillon /* the kernel process "vm_pageout"*/
81096e95c0SMatthew Dillon static void vm_swapcached (void);
82096e95c0SMatthew Dillon static void vm_swapcached_flush (vm_page_t m);
83096e95c0SMatthew Dillon struct thread *swapcached_thread;
84096e95c0SMatthew Dillon 
85096e95c0SMatthew Dillon static struct kproc_desc swpc_kp = {
86096e95c0SMatthew Dillon 	"swapcached",
87096e95c0SMatthew Dillon 	vm_swapcached,
88096e95c0SMatthew Dillon 	&swapcached_thread
89096e95c0SMatthew Dillon };
90096e95c0SMatthew Dillon SYSINIT(swapcached, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &swpc_kp)
91096e95c0SMatthew Dillon 
92096e95c0SMatthew Dillon SYSCTL_NODE(_vm, OID_AUTO, swapcache, CTLFLAG_RW, NULL, NULL);
93096e95c0SMatthew Dillon 
94*c504e38eSMatthew Dillon int vm_swapcache_read_enable;
95096e95c0SMatthew Dillon static int vm_swapcache_sleep;
96*c504e38eSMatthew Dillon static int vm_swapcache_maxlaunder = 128;
97096e95c0SMatthew Dillon static int vm_swapcache_data_enable = 0;
98096e95c0SMatthew Dillon static int vm_swapcache_meta_enable = 0;
99*c504e38eSMatthew Dillon static int64_t vm_swapcache_curburst = 1000000000LL;
100*c504e38eSMatthew Dillon static int64_t vm_swapcache_maxburst = 1000000000LL;
101*c504e38eSMatthew Dillon static int64_t vm_swapcache_accrate = 1000000LL;
102096e95c0SMatthew Dillon static int64_t vm_swapcache_write_count;
103096e95c0SMatthew Dillon 
104096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxlaunder,
105096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxlaunder, 0, "");
106*c504e38eSMatthew Dillon 
107096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, data_enable,
108096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_data_enable, 0, "");
109096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, meta_enable,
110096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_meta_enable, 0, "");
111*c504e38eSMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, read_enable,
112*c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_read_enable, 0, "");
113*c504e38eSMatthew Dillon 
114*c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, curburst,
115*c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_curburst, 0, "");
116*c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, maxburst,
117*c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxburst, 0, "");
118*c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, accrate,
119*c504e38eSMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_accrate, 0, "");
120096e95c0SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, write_count,
121096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_write_count, 0, "");
122096e95c0SMatthew Dillon 
123096e95c0SMatthew Dillon /*
124096e95c0SMatthew Dillon  * vm_swapcached is the high level pageout daemon.
125096e95c0SMatthew Dillon  */
126096e95c0SMatthew Dillon static void
127096e95c0SMatthew Dillon vm_swapcached(void)
128096e95c0SMatthew Dillon {
129096e95c0SMatthew Dillon 	struct vm_page marker;
130096e95c0SMatthew Dillon 	vm_object_t object;
131*c504e38eSMatthew Dillon 	struct vnode *vp;
132096e95c0SMatthew Dillon 	vm_page_t m;
133096e95c0SMatthew Dillon 	int count;
134096e95c0SMatthew Dillon 
135096e95c0SMatthew Dillon 	/*
136096e95c0SMatthew Dillon 	 * Thread setup
137096e95c0SMatthew Dillon 	 */
138096e95c0SMatthew Dillon 	curthread->td_flags |= TDF_SYSTHREAD;
139096e95c0SMatthew Dillon 
140096e95c0SMatthew Dillon 	/*
141096e95c0SMatthew Dillon 	 * Initialize our marker
142096e95c0SMatthew Dillon 	 */
143096e95c0SMatthew Dillon 	bzero(&marker, sizeof(marker));
144096e95c0SMatthew Dillon 	marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
145096e95c0SMatthew Dillon 	marker.queue = PQ_INACTIVE;
146096e95c0SMatthew Dillon 	marker.wire_count = 1;
147096e95c0SMatthew Dillon 
148096e95c0SMatthew Dillon 	crit_enter();
149096e95c0SMatthew Dillon 	TAILQ_INSERT_HEAD(INACTIVE_LIST, &marker, pageq);
150096e95c0SMatthew Dillon 
151096e95c0SMatthew Dillon 	for (;;) {
152096e95c0SMatthew Dillon 		/*
153096e95c0SMatthew Dillon 		 * Loop once a second or so looking for work when enabled.
154096e95c0SMatthew Dillon 		 */
155096e95c0SMatthew Dillon 		if (vm_swapcache_data_enable == 0 &&
156096e95c0SMatthew Dillon 		    vm_swapcache_meta_enable == 0) {
157096e95c0SMatthew Dillon 			tsleep(&vm_swapcache_sleep, 0, "csleep", hz * 5);
158096e95c0SMatthew Dillon 			continue;
159096e95c0SMatthew Dillon 		}
160*c504e38eSMatthew Dillon 
161*c504e38eSMatthew Dillon 		/*
162*c504e38eSMatthew Dillon 		 * Polling rate when enabled is 10 hz.  Deal with write
163*c504e38eSMatthew Dillon 		 * bandwidth limits.
164*c504e38eSMatthew Dillon 		 *
165*c504e38eSMatthew Dillon 		 * We don't want to nickle-and-dime the scan as that will
166*c504e38eSMatthew Dillon 		 * create unnecessary fragmentation.
167*c504e38eSMatthew Dillon 		 */
168*c504e38eSMatthew Dillon 		tsleep(&vm_swapcache_sleep, 0, "csleep", hz / 10);
169*c504e38eSMatthew Dillon 		vm_swapcache_curburst += vm_swapcache_accrate / 10;
170*c504e38eSMatthew Dillon 		if (vm_swapcache_curburst > vm_swapcache_maxburst)
171*c504e38eSMatthew Dillon 			vm_swapcache_curburst = vm_swapcache_maxburst;
172*c504e38eSMatthew Dillon 		if (vm_swapcache_curburst < vm_swapcache_accrate)
173*c504e38eSMatthew Dillon 			continue;
174*c504e38eSMatthew Dillon 
175*c504e38eSMatthew Dillon 		/*
176*c504e38eSMatthew Dillon 		 * Don't load any more into the cache once we have exceeded
177*c504e38eSMatthew Dillon 		 * 2/3 of available swap space.  XXX need to start cleaning
178*c504e38eSMatthew Dillon 		 * it out, though vnode recycling will accomplish that to
179*c504e38eSMatthew Dillon 		 * some degree.
180*c504e38eSMatthew Dillon 		 */
181*c504e38eSMatthew Dillon 		if (vm_swap_cache_use > vm_swap_size * 2 / 3)
182*c504e38eSMatthew Dillon 			continue;
183096e95c0SMatthew Dillon 
184096e95c0SMatthew Dillon 		/*
185096e95c0SMatthew Dillon 		 * Calculate the number of pages to test.  We don't want
186096e95c0SMatthew Dillon 		 * to get into a cpu-bound loop.
187096e95c0SMatthew Dillon 		 */
188096e95c0SMatthew Dillon 		count = vmstats.v_inactive_count;
189096e95c0SMatthew Dillon 		if (count > vm_swapcache_maxlaunder)
190096e95c0SMatthew Dillon 			count = vm_swapcache_maxlaunder;
191096e95c0SMatthew Dillon 
192096e95c0SMatthew Dillon 		/*
193096e95c0SMatthew Dillon 		 * Scan the inactive queue from our marker to locate
194096e95c0SMatthew Dillon 		 * suitable pages to push to the swap cache.
195096e95c0SMatthew Dillon 		 *
196096e95c0SMatthew Dillon 		 * We are looking for clean vnode-backed pages.
197096e95c0SMatthew Dillon 		 */
198096e95c0SMatthew Dillon 		m = &marker;
199096e95c0SMatthew Dillon 		while ((m = TAILQ_NEXT(m, pageq)) != NULL && count--) {
200096e95c0SMatthew Dillon 			if (m->flags & PG_MARKER) {
201096e95c0SMatthew Dillon 				++count;
202096e95c0SMatthew Dillon 				continue;
203096e95c0SMatthew Dillon 			}
204*c504e38eSMatthew Dillon 			if (vm_swapcache_curburst < 0)
205*c504e38eSMatthew Dillon 				break;
206096e95c0SMatthew Dillon 			if (m->flags & (PG_SWAPPED | PG_BUSY | PG_UNMANAGED))
207096e95c0SMatthew Dillon 				continue;
208096e95c0SMatthew Dillon 			if (m->busy || m->hold_count || m->wire_count)
209096e95c0SMatthew Dillon 				continue;
210096e95c0SMatthew Dillon 			if (m->valid != VM_PAGE_BITS_ALL)
211096e95c0SMatthew Dillon 				continue;
212096e95c0SMatthew Dillon 			if (m->dirty & m->valid)
213096e95c0SMatthew Dillon 				continue;
214096e95c0SMatthew Dillon 			if ((object = m->object) == NULL)
215096e95c0SMatthew Dillon 				continue;
216*c504e38eSMatthew Dillon 			if (object->type != OBJT_VNODE ||
217*c504e38eSMatthew Dillon 			    (object->flags & OBJ_DEAD)) {
218096e95c0SMatthew Dillon 				continue;
219*c504e38eSMatthew Dillon 			}
220096e95c0SMatthew Dillon 			vm_page_test_dirty(m);
221096e95c0SMatthew Dillon 			if (m->dirty & m->valid)
222096e95c0SMatthew Dillon 				continue;
223*c504e38eSMatthew Dillon 			vp = object->handle;
224*c504e38eSMatthew Dillon 			if (vp == NULL)
225*c504e38eSMatthew Dillon 				continue;
226*c504e38eSMatthew Dillon 			switch(vp->v_type) {
227*c504e38eSMatthew Dillon 			case VREG:
228*c504e38eSMatthew Dillon 				if (vm_swapcache_data_enable == 0)
229*c504e38eSMatthew Dillon 					continue;
230*c504e38eSMatthew Dillon 				break;
231*c504e38eSMatthew Dillon 			case VCHR:
232*c504e38eSMatthew Dillon 				if (vm_swapcache_meta_enable == 0)
233*c504e38eSMatthew Dillon 					continue;
234*c504e38eSMatthew Dillon 				break;
235*c504e38eSMatthew Dillon 			default:
236*c504e38eSMatthew Dillon 				continue;
237*c504e38eSMatthew Dillon 			}
238096e95c0SMatthew Dillon 
239096e95c0SMatthew Dillon 			/*
240096e95c0SMatthew Dillon 			 * Ok, move the marker and soft-busy the page.
241096e95c0SMatthew Dillon 			 */
242096e95c0SMatthew Dillon 			TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq);
243096e95c0SMatthew Dillon 			TAILQ_INSERT_AFTER(INACTIVE_LIST, m, &marker, pageq);
244096e95c0SMatthew Dillon 
245096e95c0SMatthew Dillon 			/*
246096e95c0SMatthew Dillon 			 * Assign swap and initiate I/O
247096e95c0SMatthew Dillon 			 */
248096e95c0SMatthew Dillon 			vm_swapcached_flush(m);
249096e95c0SMatthew Dillon 
250096e95c0SMatthew Dillon 			/*
251096e95c0SMatthew Dillon 			 * Setup for next loop using marker.
252096e95c0SMatthew Dillon 			 */
253096e95c0SMatthew Dillon 			m = &marker;
254096e95c0SMatthew Dillon 		}
255096e95c0SMatthew Dillon 		TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq);
256096e95c0SMatthew Dillon 		if (m)
257096e95c0SMatthew Dillon 			TAILQ_INSERT_BEFORE(m, &marker, pageq);
258096e95c0SMatthew Dillon 		else
259096e95c0SMatthew Dillon 			TAILQ_INSERT_HEAD(INACTIVE_LIST, &marker, pageq);
260096e95c0SMatthew Dillon 
261096e95c0SMatthew Dillon 	}
262096e95c0SMatthew Dillon 	TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq);
263096e95c0SMatthew Dillon 	crit_exit();
264096e95c0SMatthew Dillon }
265096e95c0SMatthew Dillon 
266096e95c0SMatthew Dillon /*
267096e95c0SMatthew Dillon  * Flush the specified page using the swap_pager.
268096e95c0SMatthew Dillon  */
269096e95c0SMatthew Dillon static
270096e95c0SMatthew Dillon void
271096e95c0SMatthew Dillon vm_swapcached_flush(vm_page_t m)
272096e95c0SMatthew Dillon {
273096e95c0SMatthew Dillon 	vm_object_t object;
274096e95c0SMatthew Dillon 	int rtvals;
275096e95c0SMatthew Dillon 
276096e95c0SMatthew Dillon 	vm_page_io_start(m);
277096e95c0SMatthew Dillon 	vm_page_protect(m, VM_PROT_READ);
278096e95c0SMatthew Dillon 
279096e95c0SMatthew Dillon 	object = m->object;
280096e95c0SMatthew Dillon 	vm_object_pip_add(object, 1);
281096e95c0SMatthew Dillon 	swap_pager_putpages(object, &m, 1, FALSE, &rtvals);
282*c504e38eSMatthew Dillon 	vm_swapcache_write_count += PAGE_SIZE;
283*c504e38eSMatthew Dillon 	vm_swapcache_curburst -= PAGE_SIZE;
284096e95c0SMatthew Dillon 
285096e95c0SMatthew Dillon 	if (rtvals != VM_PAGER_PEND) {
286096e95c0SMatthew Dillon 		vm_object_pip_wakeup(object);
287096e95c0SMatthew Dillon 		vm_page_io_finish(m);
288096e95c0SMatthew Dillon 	}
289096e95c0SMatthew Dillon }
290