1*096e95c0SMatthew Dillon /* 2*096e95c0SMatthew Dillon * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3*096e95c0SMatthew Dillon * 4*096e95c0SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 5*096e95c0SMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 6*096e95c0SMatthew Dillon * 7*096e95c0SMatthew Dillon * Redistribution and use in source and binary forms, with or without 8*096e95c0SMatthew Dillon * modification, are permitted provided that the following conditions 9*096e95c0SMatthew Dillon * are met: 10*096e95c0SMatthew Dillon * 11*096e95c0SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 12*096e95c0SMatthew Dillon * notice, this list of conditions and the following disclaimer. 13*096e95c0SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 14*096e95c0SMatthew Dillon * notice, this list of conditions and the following disclaimer in 15*096e95c0SMatthew Dillon * the documentation and/or other materials provided with the 16*096e95c0SMatthew Dillon * distribution. 17*096e95c0SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 18*096e95c0SMatthew Dillon * contributors may be used to endorse or promote products derived 19*096e95c0SMatthew Dillon * from this software without specific, prior written permission. 20*096e95c0SMatthew Dillon * 21*096e95c0SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22*096e95c0SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23*096e95c0SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24*096e95c0SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25*096e95c0SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26*096e95c0SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27*096e95c0SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28*096e95c0SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29*096e95c0SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30*096e95c0SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31*096e95c0SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32*096e95c0SMatthew Dillon * SUCH DAMAGE. 33*096e95c0SMatthew Dillon */ 34*096e95c0SMatthew Dillon 35*096e95c0SMatthew Dillon /* 36*096e95c0SMatthew Dillon * Implement the swapcache daemon. When enabled swap is assumed to be 37*096e95c0SMatthew Dillon * configured on a fast storage device such as a SSD. Swap is assigned 38*096e95c0SMatthew Dillon * to clean vnode-backed pages in the inactive queue, clustered by object 39*096e95c0SMatthew Dillon * if possible, and written out. The swap assignment sticks around even 40*096e95c0SMatthew Dillon * after the underlying pages have been recycled. 41*096e95c0SMatthew Dillon * 42*096e95c0SMatthew Dillon * The daemon manages write bandwidth based on sysctl settings to control 43*096e95c0SMatthew Dillon * wear on the SSD. 44*096e95c0SMatthew Dillon * 45*096e95c0SMatthew Dillon * The vnode strategy code will check for the swap assignments and divert 46*096e95c0SMatthew Dillon * reads to the swap device. 47*096e95c0SMatthew Dillon * 48*096e95c0SMatthew Dillon * This operates on both regular files and the block device vnodes used by 49*096e95c0SMatthew Dillon * filesystems to manage meta-data. 50*096e95c0SMatthew Dillon */ 51*096e95c0SMatthew Dillon 52*096e95c0SMatthew Dillon #include "opt_vm.h" 53*096e95c0SMatthew Dillon #include <sys/param.h> 54*096e95c0SMatthew Dillon #include <sys/systm.h> 55*096e95c0SMatthew Dillon #include <sys/kernel.h> 56*096e95c0SMatthew Dillon #include <sys/proc.h> 57*096e95c0SMatthew Dillon #include <sys/kthread.h> 58*096e95c0SMatthew Dillon #include <sys/resourcevar.h> 59*096e95c0SMatthew Dillon #include <sys/signalvar.h> 60*096e95c0SMatthew Dillon #include <sys/vnode.h> 61*096e95c0SMatthew Dillon #include <sys/vmmeter.h> 62*096e95c0SMatthew Dillon #include <sys/sysctl.h> 63*096e95c0SMatthew Dillon 64*096e95c0SMatthew Dillon #include <vm/vm.h> 65*096e95c0SMatthew Dillon #include <vm/vm_param.h> 66*096e95c0SMatthew Dillon #include <sys/lock.h> 67*096e95c0SMatthew Dillon #include <vm/vm_object.h> 68*096e95c0SMatthew Dillon #include <vm/vm_page.h> 69*096e95c0SMatthew Dillon #include <vm/vm_map.h> 70*096e95c0SMatthew Dillon #include <vm/vm_pageout.h> 71*096e95c0SMatthew Dillon #include <vm/vm_pager.h> 72*096e95c0SMatthew Dillon #include <vm/swap_pager.h> 73*096e95c0SMatthew Dillon #include <vm/vm_extern.h> 74*096e95c0SMatthew Dillon 75*096e95c0SMatthew Dillon #include <sys/thread2.h> 76*096e95c0SMatthew Dillon #include <vm/vm_page2.h> 77*096e95c0SMatthew Dillon 78*096e95c0SMatthew Dillon #define INACTIVE_LIST (&vm_page_queues[PQ_INACTIVE].pl) 79*096e95c0SMatthew Dillon 80*096e95c0SMatthew Dillon /* the kernel process "vm_pageout"*/ 81*096e95c0SMatthew Dillon static void vm_swapcached (void); 82*096e95c0SMatthew Dillon static void vm_swapcached_flush (vm_page_t m); 83*096e95c0SMatthew Dillon struct thread *swapcached_thread; 84*096e95c0SMatthew Dillon 85*096e95c0SMatthew Dillon static struct kproc_desc swpc_kp = { 86*096e95c0SMatthew Dillon "swapcached", 87*096e95c0SMatthew Dillon vm_swapcached, 88*096e95c0SMatthew Dillon &swapcached_thread 89*096e95c0SMatthew Dillon }; 90*096e95c0SMatthew Dillon SYSINIT(swapcached, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &swpc_kp) 91*096e95c0SMatthew Dillon 92*096e95c0SMatthew Dillon SYSCTL_NODE(_vm, OID_AUTO, swapcache, CTLFLAG_RW, NULL, NULL); 93*096e95c0SMatthew Dillon 94*096e95c0SMatthew Dillon static int vm_swapcache_sleep; 95*096e95c0SMatthew Dillon static int vm_swapcache_maxlaunder = 64; 96*096e95c0SMatthew Dillon static int vm_swapcache_data_enable = 0; 97*096e95c0SMatthew Dillon static int vm_swapcache_meta_enable = 0; 98*096e95c0SMatthew Dillon static int64_t vm_swapcache_write_count; 99*096e95c0SMatthew Dillon 100*096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxlaunder, 101*096e95c0SMatthew Dillon CTLFLAG_RW, &vm_swapcache_maxlaunder, 0, ""); 102*096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, data_enable, 103*096e95c0SMatthew Dillon CTLFLAG_RW, &vm_swapcache_data_enable, 0, ""); 104*096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, meta_enable, 105*096e95c0SMatthew Dillon CTLFLAG_RW, &vm_swapcache_meta_enable, 0, ""); 106*096e95c0SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, write_count, 107*096e95c0SMatthew Dillon CTLFLAG_RW, &vm_swapcache_write_count, 0, ""); 108*096e95c0SMatthew Dillon 109*096e95c0SMatthew Dillon /* 110*096e95c0SMatthew Dillon * vm_swapcached is the high level pageout daemon. 111*096e95c0SMatthew Dillon */ 112*096e95c0SMatthew Dillon static void 113*096e95c0SMatthew Dillon vm_swapcached(void) 114*096e95c0SMatthew Dillon { 115*096e95c0SMatthew Dillon struct vm_page marker; 116*096e95c0SMatthew Dillon vm_object_t object; 117*096e95c0SMatthew Dillon vm_page_t m; 118*096e95c0SMatthew Dillon int count; 119*096e95c0SMatthew Dillon 120*096e95c0SMatthew Dillon /* 121*096e95c0SMatthew Dillon * Thread setup 122*096e95c0SMatthew Dillon */ 123*096e95c0SMatthew Dillon curthread->td_flags |= TDF_SYSTHREAD; 124*096e95c0SMatthew Dillon 125*096e95c0SMatthew Dillon /* 126*096e95c0SMatthew Dillon * Initialize our marker 127*096e95c0SMatthew Dillon */ 128*096e95c0SMatthew Dillon bzero(&marker, sizeof(marker)); 129*096e95c0SMatthew Dillon marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; 130*096e95c0SMatthew Dillon marker.queue = PQ_INACTIVE; 131*096e95c0SMatthew Dillon marker.wire_count = 1; 132*096e95c0SMatthew Dillon 133*096e95c0SMatthew Dillon crit_enter(); 134*096e95c0SMatthew Dillon TAILQ_INSERT_HEAD(INACTIVE_LIST, &marker, pageq); 135*096e95c0SMatthew Dillon 136*096e95c0SMatthew Dillon for (;;) { 137*096e95c0SMatthew Dillon /* 138*096e95c0SMatthew Dillon * Loop once a second or so looking for work when enabled. 139*096e95c0SMatthew Dillon */ 140*096e95c0SMatthew Dillon if (vm_swapcache_data_enable == 0 && 141*096e95c0SMatthew Dillon vm_swapcache_meta_enable == 0) { 142*096e95c0SMatthew Dillon tsleep(&vm_swapcache_sleep, 0, "csleep", hz * 5); 143*096e95c0SMatthew Dillon continue; 144*096e95c0SMatthew Dillon } 145*096e95c0SMatthew Dillon tsleep(&vm_swapcache_sleep, 0, "csleep", hz); 146*096e95c0SMatthew Dillon 147*096e95c0SMatthew Dillon /* 148*096e95c0SMatthew Dillon * Calculate the number of pages to test. We don't want 149*096e95c0SMatthew Dillon * to get into a cpu-bound loop. 150*096e95c0SMatthew Dillon */ 151*096e95c0SMatthew Dillon count = vmstats.v_inactive_count; 152*096e95c0SMatthew Dillon if (count > vm_swapcache_maxlaunder) 153*096e95c0SMatthew Dillon count = vm_swapcache_maxlaunder; 154*096e95c0SMatthew Dillon 155*096e95c0SMatthew Dillon /* 156*096e95c0SMatthew Dillon * Scan the inactive queue from our marker to locate 157*096e95c0SMatthew Dillon * suitable pages to push to the swap cache. 158*096e95c0SMatthew Dillon * 159*096e95c0SMatthew Dillon * We are looking for clean vnode-backed pages. 160*096e95c0SMatthew Dillon */ 161*096e95c0SMatthew Dillon m = ▮ 162*096e95c0SMatthew Dillon while ((m = TAILQ_NEXT(m, pageq)) != NULL && count--) { 163*096e95c0SMatthew Dillon if (m->flags & PG_MARKER) { 164*096e95c0SMatthew Dillon ++count; 165*096e95c0SMatthew Dillon continue; 166*096e95c0SMatthew Dillon } 167*096e95c0SMatthew Dillon if (m->flags & (PG_SWAPPED | PG_BUSY | PG_UNMANAGED)) 168*096e95c0SMatthew Dillon continue; 169*096e95c0SMatthew Dillon if (m->busy || m->hold_count || m->wire_count) 170*096e95c0SMatthew Dillon continue; 171*096e95c0SMatthew Dillon if (m->valid != VM_PAGE_BITS_ALL) 172*096e95c0SMatthew Dillon continue; 173*096e95c0SMatthew Dillon if (m->dirty & m->valid) 174*096e95c0SMatthew Dillon continue; 175*096e95c0SMatthew Dillon if ((object = m->object) == NULL) 176*096e95c0SMatthew Dillon continue; 177*096e95c0SMatthew Dillon if (object->type != OBJT_VNODE) 178*096e95c0SMatthew Dillon continue; 179*096e95c0SMatthew Dillon vm_page_test_dirty(m); 180*096e95c0SMatthew Dillon if (m->dirty & m->valid) 181*096e95c0SMatthew Dillon continue; 182*096e95c0SMatthew Dillon 183*096e95c0SMatthew Dillon /* 184*096e95c0SMatthew Dillon * Ok, move the marker and soft-busy the page. 185*096e95c0SMatthew Dillon */ 186*096e95c0SMatthew Dillon TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq); 187*096e95c0SMatthew Dillon TAILQ_INSERT_AFTER(INACTIVE_LIST, m, &marker, pageq); 188*096e95c0SMatthew Dillon 189*096e95c0SMatthew Dillon /* 190*096e95c0SMatthew Dillon * Assign swap and initiate I/O 191*096e95c0SMatthew Dillon */ 192*096e95c0SMatthew Dillon vm_swapcached_flush(m); 193*096e95c0SMatthew Dillon 194*096e95c0SMatthew Dillon /* 195*096e95c0SMatthew Dillon * Setup for next loop using marker. 196*096e95c0SMatthew Dillon */ 197*096e95c0SMatthew Dillon m = ▮ 198*096e95c0SMatthew Dillon } 199*096e95c0SMatthew Dillon TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq); 200*096e95c0SMatthew Dillon if (m) 201*096e95c0SMatthew Dillon TAILQ_INSERT_BEFORE(m, &marker, pageq); 202*096e95c0SMatthew Dillon else 203*096e95c0SMatthew Dillon TAILQ_INSERT_HEAD(INACTIVE_LIST, &marker, pageq); 204*096e95c0SMatthew Dillon 205*096e95c0SMatthew Dillon } 206*096e95c0SMatthew Dillon TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq); 207*096e95c0SMatthew Dillon crit_exit(); 208*096e95c0SMatthew Dillon } 209*096e95c0SMatthew Dillon 210*096e95c0SMatthew Dillon /* 211*096e95c0SMatthew Dillon * Flush the specified page using the swap_pager. 212*096e95c0SMatthew Dillon */ 213*096e95c0SMatthew Dillon static 214*096e95c0SMatthew Dillon void 215*096e95c0SMatthew Dillon vm_swapcached_flush(vm_page_t m) 216*096e95c0SMatthew Dillon { 217*096e95c0SMatthew Dillon vm_object_t object; 218*096e95c0SMatthew Dillon int rtvals; 219*096e95c0SMatthew Dillon 220*096e95c0SMatthew Dillon vm_page_io_start(m); 221*096e95c0SMatthew Dillon vm_page_protect(m, VM_PROT_READ); 222*096e95c0SMatthew Dillon 223*096e95c0SMatthew Dillon object = m->object; 224*096e95c0SMatthew Dillon vm_object_pip_add(object, 1); 225*096e95c0SMatthew Dillon swap_pager_putpages(object, &m, 1, FALSE, &rtvals); 226*096e95c0SMatthew Dillon 227*096e95c0SMatthew Dillon if (rtvals != VM_PAGER_PEND) { 228*096e95c0SMatthew Dillon vm_object_pip_wakeup(object); 229*096e95c0SMatthew Dillon vm_page_io_finish(m); 230*096e95c0SMatthew Dillon } 231*096e95c0SMatthew Dillon } 232