xref: /dragonfly/sys/vm/vm_swapcache.c (revision 096e95c0)
1*096e95c0SMatthew Dillon /*
2*096e95c0SMatthew Dillon  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3*096e95c0SMatthew Dillon  *
4*096e95c0SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
5*096e95c0SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
6*096e95c0SMatthew Dillon  *
7*096e95c0SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
8*096e95c0SMatthew Dillon  * modification, are permitted provided that the following conditions
9*096e95c0SMatthew Dillon  * are met:
10*096e95c0SMatthew Dillon  *
11*096e95c0SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
12*096e95c0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
13*096e95c0SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
14*096e95c0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
15*096e95c0SMatthew Dillon  *    the documentation and/or other materials provided with the
16*096e95c0SMatthew Dillon  *    distribution.
17*096e95c0SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
18*096e95c0SMatthew Dillon  *    contributors may be used to endorse or promote products derived
19*096e95c0SMatthew Dillon  *    from this software without specific, prior written permission.
20*096e95c0SMatthew Dillon  *
21*096e95c0SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22*096e95c0SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23*096e95c0SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24*096e95c0SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25*096e95c0SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26*096e95c0SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27*096e95c0SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28*096e95c0SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29*096e95c0SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30*096e95c0SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31*096e95c0SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32*096e95c0SMatthew Dillon  * SUCH DAMAGE.
33*096e95c0SMatthew Dillon  */
34*096e95c0SMatthew Dillon 
35*096e95c0SMatthew Dillon /*
36*096e95c0SMatthew Dillon  * Implement the swapcache daemon.  When enabled swap is assumed to be
37*096e95c0SMatthew Dillon  * configured on a fast storage device such as a SSD.  Swap is assigned
38*096e95c0SMatthew Dillon  * to clean vnode-backed pages in the inactive queue, clustered by object
39*096e95c0SMatthew Dillon  * if possible, and written out.  The swap assignment sticks around even
40*096e95c0SMatthew Dillon  * after the underlying pages have been recycled.
41*096e95c0SMatthew Dillon  *
42*096e95c0SMatthew Dillon  * The daemon manages write bandwidth based on sysctl settings to control
43*096e95c0SMatthew Dillon  * wear on the SSD.
44*096e95c0SMatthew Dillon  *
45*096e95c0SMatthew Dillon  * The vnode strategy code will check for the swap assignments and divert
46*096e95c0SMatthew Dillon  * reads to the swap device.
47*096e95c0SMatthew Dillon  *
48*096e95c0SMatthew Dillon  * This operates on both regular files and the block device vnodes used by
49*096e95c0SMatthew Dillon  * filesystems to manage meta-data.
50*096e95c0SMatthew Dillon  */
51*096e95c0SMatthew Dillon 
52*096e95c0SMatthew Dillon #include "opt_vm.h"
53*096e95c0SMatthew Dillon #include <sys/param.h>
54*096e95c0SMatthew Dillon #include <sys/systm.h>
55*096e95c0SMatthew Dillon #include <sys/kernel.h>
56*096e95c0SMatthew Dillon #include <sys/proc.h>
57*096e95c0SMatthew Dillon #include <sys/kthread.h>
58*096e95c0SMatthew Dillon #include <sys/resourcevar.h>
59*096e95c0SMatthew Dillon #include <sys/signalvar.h>
60*096e95c0SMatthew Dillon #include <sys/vnode.h>
61*096e95c0SMatthew Dillon #include <sys/vmmeter.h>
62*096e95c0SMatthew Dillon #include <sys/sysctl.h>
63*096e95c0SMatthew Dillon 
64*096e95c0SMatthew Dillon #include <vm/vm.h>
65*096e95c0SMatthew Dillon #include <vm/vm_param.h>
66*096e95c0SMatthew Dillon #include <sys/lock.h>
67*096e95c0SMatthew Dillon #include <vm/vm_object.h>
68*096e95c0SMatthew Dillon #include <vm/vm_page.h>
69*096e95c0SMatthew Dillon #include <vm/vm_map.h>
70*096e95c0SMatthew Dillon #include <vm/vm_pageout.h>
71*096e95c0SMatthew Dillon #include <vm/vm_pager.h>
72*096e95c0SMatthew Dillon #include <vm/swap_pager.h>
73*096e95c0SMatthew Dillon #include <vm/vm_extern.h>
74*096e95c0SMatthew Dillon 
75*096e95c0SMatthew Dillon #include <sys/thread2.h>
76*096e95c0SMatthew Dillon #include <vm/vm_page2.h>
77*096e95c0SMatthew Dillon 
78*096e95c0SMatthew Dillon #define INACTIVE_LIST	(&vm_page_queues[PQ_INACTIVE].pl)
79*096e95c0SMatthew Dillon 
80*096e95c0SMatthew Dillon /* the kernel process "vm_pageout"*/
81*096e95c0SMatthew Dillon static void vm_swapcached (void);
82*096e95c0SMatthew Dillon static void vm_swapcached_flush (vm_page_t m);
83*096e95c0SMatthew Dillon struct thread *swapcached_thread;
84*096e95c0SMatthew Dillon 
85*096e95c0SMatthew Dillon static struct kproc_desc swpc_kp = {
86*096e95c0SMatthew Dillon 	"swapcached",
87*096e95c0SMatthew Dillon 	vm_swapcached,
88*096e95c0SMatthew Dillon 	&swapcached_thread
89*096e95c0SMatthew Dillon };
90*096e95c0SMatthew Dillon SYSINIT(swapcached, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &swpc_kp)
91*096e95c0SMatthew Dillon 
92*096e95c0SMatthew Dillon SYSCTL_NODE(_vm, OID_AUTO, swapcache, CTLFLAG_RW, NULL, NULL);
93*096e95c0SMatthew Dillon 
94*096e95c0SMatthew Dillon static int vm_swapcache_sleep;
95*096e95c0SMatthew Dillon static int vm_swapcache_maxlaunder = 64;
96*096e95c0SMatthew Dillon static int vm_swapcache_data_enable = 0;
97*096e95c0SMatthew Dillon static int vm_swapcache_meta_enable = 0;
98*096e95c0SMatthew Dillon static int64_t vm_swapcache_write_count;
99*096e95c0SMatthew Dillon 
100*096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxlaunder,
101*096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_maxlaunder, 0, "");
102*096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, data_enable,
103*096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_data_enable, 0, "");
104*096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, meta_enable,
105*096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_meta_enable, 0, "");
106*096e95c0SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, write_count,
107*096e95c0SMatthew Dillon 	CTLFLAG_RW, &vm_swapcache_write_count, 0, "");
108*096e95c0SMatthew Dillon 
109*096e95c0SMatthew Dillon /*
110*096e95c0SMatthew Dillon  * vm_swapcached is the high level pageout daemon.
111*096e95c0SMatthew Dillon  */
112*096e95c0SMatthew Dillon static void
113*096e95c0SMatthew Dillon vm_swapcached(void)
114*096e95c0SMatthew Dillon {
115*096e95c0SMatthew Dillon 	struct vm_page marker;
116*096e95c0SMatthew Dillon 	vm_object_t object;
117*096e95c0SMatthew Dillon 	vm_page_t m;
118*096e95c0SMatthew Dillon 	int count;
119*096e95c0SMatthew Dillon 
120*096e95c0SMatthew Dillon 	/*
121*096e95c0SMatthew Dillon 	 * Thread setup
122*096e95c0SMatthew Dillon 	 */
123*096e95c0SMatthew Dillon 	curthread->td_flags |= TDF_SYSTHREAD;
124*096e95c0SMatthew Dillon 
125*096e95c0SMatthew Dillon 	/*
126*096e95c0SMatthew Dillon 	 * Initialize our marker
127*096e95c0SMatthew Dillon 	 */
128*096e95c0SMatthew Dillon 	bzero(&marker, sizeof(marker));
129*096e95c0SMatthew Dillon 	marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
130*096e95c0SMatthew Dillon 	marker.queue = PQ_INACTIVE;
131*096e95c0SMatthew Dillon 	marker.wire_count = 1;
132*096e95c0SMatthew Dillon 
133*096e95c0SMatthew Dillon 	crit_enter();
134*096e95c0SMatthew Dillon 	TAILQ_INSERT_HEAD(INACTIVE_LIST, &marker, pageq);
135*096e95c0SMatthew Dillon 
136*096e95c0SMatthew Dillon 	for (;;) {
137*096e95c0SMatthew Dillon 		/*
138*096e95c0SMatthew Dillon 		 * Loop once a second or so looking for work when enabled.
139*096e95c0SMatthew Dillon 		 */
140*096e95c0SMatthew Dillon 		if (vm_swapcache_data_enable == 0 &&
141*096e95c0SMatthew Dillon 		    vm_swapcache_meta_enable == 0) {
142*096e95c0SMatthew Dillon 			tsleep(&vm_swapcache_sleep, 0, "csleep", hz * 5);
143*096e95c0SMatthew Dillon 			continue;
144*096e95c0SMatthew Dillon 		}
145*096e95c0SMatthew Dillon 		tsleep(&vm_swapcache_sleep, 0, "csleep", hz);
146*096e95c0SMatthew Dillon 
147*096e95c0SMatthew Dillon 		/*
148*096e95c0SMatthew Dillon 		 * Calculate the number of pages to test.  We don't want
149*096e95c0SMatthew Dillon 		 * to get into a cpu-bound loop.
150*096e95c0SMatthew Dillon 		 */
151*096e95c0SMatthew Dillon 		count = vmstats.v_inactive_count;
152*096e95c0SMatthew Dillon 		if (count > vm_swapcache_maxlaunder)
153*096e95c0SMatthew Dillon 			count = vm_swapcache_maxlaunder;
154*096e95c0SMatthew Dillon 
155*096e95c0SMatthew Dillon 		/*
156*096e95c0SMatthew Dillon 		 * Scan the inactive queue from our marker to locate
157*096e95c0SMatthew Dillon 		 * suitable pages to push to the swap cache.
158*096e95c0SMatthew Dillon 		 *
159*096e95c0SMatthew Dillon 		 * We are looking for clean vnode-backed pages.
160*096e95c0SMatthew Dillon 		 */
161*096e95c0SMatthew Dillon 		m = &marker;
162*096e95c0SMatthew Dillon 		while ((m = TAILQ_NEXT(m, pageq)) != NULL && count--) {
163*096e95c0SMatthew Dillon 			if (m->flags & PG_MARKER) {
164*096e95c0SMatthew Dillon 				++count;
165*096e95c0SMatthew Dillon 				continue;
166*096e95c0SMatthew Dillon 			}
167*096e95c0SMatthew Dillon 			if (m->flags & (PG_SWAPPED | PG_BUSY | PG_UNMANAGED))
168*096e95c0SMatthew Dillon 				continue;
169*096e95c0SMatthew Dillon 			if (m->busy || m->hold_count || m->wire_count)
170*096e95c0SMatthew Dillon 				continue;
171*096e95c0SMatthew Dillon 			if (m->valid != VM_PAGE_BITS_ALL)
172*096e95c0SMatthew Dillon 				continue;
173*096e95c0SMatthew Dillon 			if (m->dirty & m->valid)
174*096e95c0SMatthew Dillon 				continue;
175*096e95c0SMatthew Dillon 			if ((object = m->object) == NULL)
176*096e95c0SMatthew Dillon 				continue;
177*096e95c0SMatthew Dillon 			if (object->type != OBJT_VNODE)
178*096e95c0SMatthew Dillon 				continue;
179*096e95c0SMatthew Dillon 			vm_page_test_dirty(m);
180*096e95c0SMatthew Dillon 			if (m->dirty & m->valid)
181*096e95c0SMatthew Dillon 				continue;
182*096e95c0SMatthew Dillon 
183*096e95c0SMatthew Dillon 			/*
184*096e95c0SMatthew Dillon 			 * Ok, move the marker and soft-busy the page.
185*096e95c0SMatthew Dillon 			 */
186*096e95c0SMatthew Dillon 			TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq);
187*096e95c0SMatthew Dillon 			TAILQ_INSERT_AFTER(INACTIVE_LIST, m, &marker, pageq);
188*096e95c0SMatthew Dillon 
189*096e95c0SMatthew Dillon 			/*
190*096e95c0SMatthew Dillon 			 * Assign swap and initiate I/O
191*096e95c0SMatthew Dillon 			 */
192*096e95c0SMatthew Dillon 			vm_swapcached_flush(m);
193*096e95c0SMatthew Dillon 
194*096e95c0SMatthew Dillon 			/*
195*096e95c0SMatthew Dillon 			 * Setup for next loop using marker.
196*096e95c0SMatthew Dillon 			 */
197*096e95c0SMatthew Dillon 			m = &marker;
198*096e95c0SMatthew Dillon 		}
199*096e95c0SMatthew Dillon 		TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq);
200*096e95c0SMatthew Dillon 		if (m)
201*096e95c0SMatthew Dillon 			TAILQ_INSERT_BEFORE(m, &marker, pageq);
202*096e95c0SMatthew Dillon 		else
203*096e95c0SMatthew Dillon 			TAILQ_INSERT_HEAD(INACTIVE_LIST, &marker, pageq);
204*096e95c0SMatthew Dillon 
205*096e95c0SMatthew Dillon 	}
206*096e95c0SMatthew Dillon 	TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq);
207*096e95c0SMatthew Dillon 	crit_exit();
208*096e95c0SMatthew Dillon }
209*096e95c0SMatthew Dillon 
210*096e95c0SMatthew Dillon /*
211*096e95c0SMatthew Dillon  * Flush the specified page using the swap_pager.
212*096e95c0SMatthew Dillon  */
213*096e95c0SMatthew Dillon static
214*096e95c0SMatthew Dillon void
215*096e95c0SMatthew Dillon vm_swapcached_flush(vm_page_t m)
216*096e95c0SMatthew Dillon {
217*096e95c0SMatthew Dillon 	vm_object_t object;
218*096e95c0SMatthew Dillon 	int rtvals;
219*096e95c0SMatthew Dillon 
220*096e95c0SMatthew Dillon 	vm_page_io_start(m);
221*096e95c0SMatthew Dillon 	vm_page_protect(m, VM_PROT_READ);
222*096e95c0SMatthew Dillon 
223*096e95c0SMatthew Dillon 	object = m->object;
224*096e95c0SMatthew Dillon 	vm_object_pip_add(object, 1);
225*096e95c0SMatthew Dillon 	swap_pager_putpages(object, &m, 1, FALSE, &rtvals);
226*096e95c0SMatthew Dillon 
227*096e95c0SMatthew Dillon 	if (rtvals != VM_PAGER_PEND) {
228*096e95c0SMatthew Dillon 		vm_object_pip_wakeup(object);
229*096e95c0SMatthew Dillon 		vm_page_io_finish(m);
230*096e95c0SMatthew Dillon 	}
231*096e95c0SMatthew Dillon }
232