1096e95c0SMatthew Dillon /* 2096e95c0SMatthew Dillon * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3096e95c0SMatthew Dillon * 4096e95c0SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 5096e95c0SMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 6096e95c0SMatthew Dillon * 7096e95c0SMatthew Dillon * Redistribution and use in source and binary forms, with or without 8096e95c0SMatthew Dillon * modification, are permitted provided that the following conditions 9096e95c0SMatthew Dillon * are met: 10096e95c0SMatthew Dillon * 11096e95c0SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 12096e95c0SMatthew Dillon * notice, this list of conditions and the following disclaimer. 13096e95c0SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 14096e95c0SMatthew Dillon * notice, this list of conditions and the following disclaimer in 15096e95c0SMatthew Dillon * the documentation and/or other materials provided with the 16096e95c0SMatthew Dillon * distribution. 17096e95c0SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 18096e95c0SMatthew Dillon * contributors may be used to endorse or promote products derived 19096e95c0SMatthew Dillon * from this software without specific, prior written permission. 20096e95c0SMatthew Dillon * 21096e95c0SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22096e95c0SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23096e95c0SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24096e95c0SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25096e95c0SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26096e95c0SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27096e95c0SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28096e95c0SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29096e95c0SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30096e95c0SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31096e95c0SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32096e95c0SMatthew Dillon * SUCH DAMAGE. 33096e95c0SMatthew Dillon */ 34096e95c0SMatthew Dillon 35096e95c0SMatthew Dillon /* 36096e95c0SMatthew Dillon * Implement the swapcache daemon. When enabled swap is assumed to be 37096e95c0SMatthew Dillon * configured on a fast storage device such as a SSD. Swap is assigned 38096e95c0SMatthew Dillon * to clean vnode-backed pages in the inactive queue, clustered by object 39096e95c0SMatthew Dillon * if possible, and written out. The swap assignment sticks around even 40096e95c0SMatthew Dillon * after the underlying pages have been recycled. 41096e95c0SMatthew Dillon * 42096e95c0SMatthew Dillon * The daemon manages write bandwidth based on sysctl settings to control 43096e95c0SMatthew Dillon * wear on the SSD. 44096e95c0SMatthew Dillon * 45096e95c0SMatthew Dillon * The vnode strategy code will check for the swap assignments and divert 46096e95c0SMatthew Dillon * reads to the swap device. 47096e95c0SMatthew Dillon * 48096e95c0SMatthew Dillon * This operates on both regular files and the block device vnodes used by 49096e95c0SMatthew Dillon * filesystems to manage meta-data. 50096e95c0SMatthew Dillon */ 51096e95c0SMatthew Dillon 52096e95c0SMatthew Dillon #include "opt_vm.h" 53096e95c0SMatthew Dillon #include <sys/param.h> 54096e95c0SMatthew Dillon #include <sys/systm.h> 55096e95c0SMatthew Dillon #include <sys/kernel.h> 56096e95c0SMatthew Dillon #include <sys/proc.h> 57096e95c0SMatthew Dillon #include <sys/kthread.h> 58096e95c0SMatthew Dillon #include <sys/resourcevar.h> 59096e95c0SMatthew Dillon #include <sys/signalvar.h> 60096e95c0SMatthew Dillon #include <sys/vnode.h> 61096e95c0SMatthew Dillon #include <sys/vmmeter.h> 62096e95c0SMatthew Dillon #include <sys/sysctl.h> 63096e95c0SMatthew Dillon 64096e95c0SMatthew Dillon #include <vm/vm.h> 65096e95c0SMatthew Dillon #include <vm/vm_param.h> 66096e95c0SMatthew Dillon #include <sys/lock.h> 67096e95c0SMatthew Dillon #include <vm/vm_object.h> 68096e95c0SMatthew Dillon #include <vm/vm_page.h> 69096e95c0SMatthew Dillon #include <vm/vm_map.h> 70096e95c0SMatthew Dillon #include <vm/vm_pageout.h> 71096e95c0SMatthew Dillon #include <vm/vm_pager.h> 72096e95c0SMatthew Dillon #include <vm/swap_pager.h> 73096e95c0SMatthew Dillon #include <vm/vm_extern.h> 74096e95c0SMatthew Dillon 75096e95c0SMatthew Dillon #include <sys/thread2.h> 76096e95c0SMatthew Dillon #include <vm/vm_page2.h> 77096e95c0SMatthew Dillon 78096e95c0SMatthew Dillon #define INACTIVE_LIST (&vm_page_queues[PQ_INACTIVE].pl) 79096e95c0SMatthew Dillon 80096e95c0SMatthew Dillon /* the kernel process "vm_pageout"*/ 81096e95c0SMatthew Dillon static void vm_swapcached (void); 82096e95c0SMatthew Dillon static void vm_swapcached_flush (vm_page_t m); 83096e95c0SMatthew Dillon struct thread *swapcached_thread; 84096e95c0SMatthew Dillon 85096e95c0SMatthew Dillon static struct kproc_desc swpc_kp = { 86096e95c0SMatthew Dillon "swapcached", 87096e95c0SMatthew Dillon vm_swapcached, 88096e95c0SMatthew Dillon &swapcached_thread 89096e95c0SMatthew Dillon }; 90096e95c0SMatthew Dillon SYSINIT(swapcached, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &swpc_kp) 91096e95c0SMatthew Dillon 92096e95c0SMatthew Dillon SYSCTL_NODE(_vm, OID_AUTO, swapcache, CTLFLAG_RW, NULL, NULL); 93096e95c0SMatthew Dillon 94*c504e38eSMatthew Dillon int vm_swapcache_read_enable; 95096e95c0SMatthew Dillon static int vm_swapcache_sleep; 96*c504e38eSMatthew Dillon static int vm_swapcache_maxlaunder = 128; 97096e95c0SMatthew Dillon static int vm_swapcache_data_enable = 0; 98096e95c0SMatthew Dillon static int vm_swapcache_meta_enable = 0; 99*c504e38eSMatthew Dillon static int64_t vm_swapcache_curburst = 1000000000LL; 100*c504e38eSMatthew Dillon static int64_t vm_swapcache_maxburst = 1000000000LL; 101*c504e38eSMatthew Dillon static int64_t vm_swapcache_accrate = 1000000LL; 102096e95c0SMatthew Dillon static int64_t vm_swapcache_write_count; 103096e95c0SMatthew Dillon 104096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, maxlaunder, 105096e95c0SMatthew Dillon CTLFLAG_RW, &vm_swapcache_maxlaunder, 0, ""); 106*c504e38eSMatthew Dillon 107096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, data_enable, 108096e95c0SMatthew Dillon CTLFLAG_RW, &vm_swapcache_data_enable, 0, ""); 109096e95c0SMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, meta_enable, 110096e95c0SMatthew Dillon CTLFLAG_RW, &vm_swapcache_meta_enable, 0, ""); 111*c504e38eSMatthew Dillon SYSCTL_INT(_vm_swapcache, OID_AUTO, read_enable, 112*c504e38eSMatthew Dillon CTLFLAG_RW, &vm_swapcache_read_enable, 0, ""); 113*c504e38eSMatthew Dillon 114*c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, curburst, 115*c504e38eSMatthew Dillon CTLFLAG_RW, &vm_swapcache_curburst, 0, ""); 116*c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, maxburst, 117*c504e38eSMatthew Dillon CTLFLAG_RW, &vm_swapcache_maxburst, 0, ""); 118*c504e38eSMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, accrate, 119*c504e38eSMatthew Dillon CTLFLAG_RW, &vm_swapcache_accrate, 0, ""); 120096e95c0SMatthew Dillon SYSCTL_QUAD(_vm_swapcache, OID_AUTO, write_count, 121096e95c0SMatthew Dillon CTLFLAG_RW, &vm_swapcache_write_count, 0, ""); 122096e95c0SMatthew Dillon 123096e95c0SMatthew Dillon /* 124096e95c0SMatthew Dillon * vm_swapcached is the high level pageout daemon. 125096e95c0SMatthew Dillon */ 126096e95c0SMatthew Dillon static void 127096e95c0SMatthew Dillon vm_swapcached(void) 128096e95c0SMatthew Dillon { 129096e95c0SMatthew Dillon struct vm_page marker; 130096e95c0SMatthew Dillon vm_object_t object; 131*c504e38eSMatthew Dillon struct vnode *vp; 132096e95c0SMatthew Dillon vm_page_t m; 133096e95c0SMatthew Dillon int count; 134096e95c0SMatthew Dillon 135096e95c0SMatthew Dillon /* 136096e95c0SMatthew Dillon * Thread setup 137096e95c0SMatthew Dillon */ 138096e95c0SMatthew Dillon curthread->td_flags |= TDF_SYSTHREAD; 139096e95c0SMatthew Dillon 140096e95c0SMatthew Dillon /* 141096e95c0SMatthew Dillon * Initialize our marker 142096e95c0SMatthew Dillon */ 143096e95c0SMatthew Dillon bzero(&marker, sizeof(marker)); 144096e95c0SMatthew Dillon marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; 145096e95c0SMatthew Dillon marker.queue = PQ_INACTIVE; 146096e95c0SMatthew Dillon marker.wire_count = 1; 147096e95c0SMatthew Dillon 148096e95c0SMatthew Dillon crit_enter(); 149096e95c0SMatthew Dillon TAILQ_INSERT_HEAD(INACTIVE_LIST, &marker, pageq); 150096e95c0SMatthew Dillon 151096e95c0SMatthew Dillon for (;;) { 152096e95c0SMatthew Dillon /* 153096e95c0SMatthew Dillon * Loop once a second or so looking for work when enabled. 154096e95c0SMatthew Dillon */ 155096e95c0SMatthew Dillon if (vm_swapcache_data_enable == 0 && 156096e95c0SMatthew Dillon vm_swapcache_meta_enable == 0) { 157096e95c0SMatthew Dillon tsleep(&vm_swapcache_sleep, 0, "csleep", hz * 5); 158096e95c0SMatthew Dillon continue; 159096e95c0SMatthew Dillon } 160*c504e38eSMatthew Dillon 161*c504e38eSMatthew Dillon /* 162*c504e38eSMatthew Dillon * Polling rate when enabled is 10 hz. Deal with write 163*c504e38eSMatthew Dillon * bandwidth limits. 164*c504e38eSMatthew Dillon * 165*c504e38eSMatthew Dillon * We don't want to nickle-and-dime the scan as that will 166*c504e38eSMatthew Dillon * create unnecessary fragmentation. 167*c504e38eSMatthew Dillon */ 168*c504e38eSMatthew Dillon tsleep(&vm_swapcache_sleep, 0, "csleep", hz / 10); 169*c504e38eSMatthew Dillon vm_swapcache_curburst += vm_swapcache_accrate / 10; 170*c504e38eSMatthew Dillon if (vm_swapcache_curburst > vm_swapcache_maxburst) 171*c504e38eSMatthew Dillon vm_swapcache_curburst = vm_swapcache_maxburst; 172*c504e38eSMatthew Dillon if (vm_swapcache_curburst < vm_swapcache_accrate) 173*c504e38eSMatthew Dillon continue; 174*c504e38eSMatthew Dillon 175*c504e38eSMatthew Dillon /* 176*c504e38eSMatthew Dillon * Don't load any more into the cache once we have exceeded 177*c504e38eSMatthew Dillon * 2/3 of available swap space. XXX need to start cleaning 178*c504e38eSMatthew Dillon * it out, though vnode recycling will accomplish that to 179*c504e38eSMatthew Dillon * some degree. 180*c504e38eSMatthew Dillon */ 181*c504e38eSMatthew Dillon if (vm_swap_cache_use > vm_swap_size * 2 / 3) 182*c504e38eSMatthew Dillon continue; 183096e95c0SMatthew Dillon 184096e95c0SMatthew Dillon /* 185096e95c0SMatthew Dillon * Calculate the number of pages to test. We don't want 186096e95c0SMatthew Dillon * to get into a cpu-bound loop. 187096e95c0SMatthew Dillon */ 188096e95c0SMatthew Dillon count = vmstats.v_inactive_count; 189096e95c0SMatthew Dillon if (count > vm_swapcache_maxlaunder) 190096e95c0SMatthew Dillon count = vm_swapcache_maxlaunder; 191096e95c0SMatthew Dillon 192096e95c0SMatthew Dillon /* 193096e95c0SMatthew Dillon * Scan the inactive queue from our marker to locate 194096e95c0SMatthew Dillon * suitable pages to push to the swap cache. 195096e95c0SMatthew Dillon * 196096e95c0SMatthew Dillon * We are looking for clean vnode-backed pages. 197096e95c0SMatthew Dillon */ 198096e95c0SMatthew Dillon m = ▮ 199096e95c0SMatthew Dillon while ((m = TAILQ_NEXT(m, pageq)) != NULL && count--) { 200096e95c0SMatthew Dillon if (m->flags & PG_MARKER) { 201096e95c0SMatthew Dillon ++count; 202096e95c0SMatthew Dillon continue; 203096e95c0SMatthew Dillon } 204*c504e38eSMatthew Dillon if (vm_swapcache_curburst < 0) 205*c504e38eSMatthew Dillon break; 206096e95c0SMatthew Dillon if (m->flags & (PG_SWAPPED | PG_BUSY | PG_UNMANAGED)) 207096e95c0SMatthew Dillon continue; 208096e95c0SMatthew Dillon if (m->busy || m->hold_count || m->wire_count) 209096e95c0SMatthew Dillon continue; 210096e95c0SMatthew Dillon if (m->valid != VM_PAGE_BITS_ALL) 211096e95c0SMatthew Dillon continue; 212096e95c0SMatthew Dillon if (m->dirty & m->valid) 213096e95c0SMatthew Dillon continue; 214096e95c0SMatthew Dillon if ((object = m->object) == NULL) 215096e95c0SMatthew Dillon continue; 216*c504e38eSMatthew Dillon if (object->type != OBJT_VNODE || 217*c504e38eSMatthew Dillon (object->flags & OBJ_DEAD)) { 218096e95c0SMatthew Dillon continue; 219*c504e38eSMatthew Dillon } 220096e95c0SMatthew Dillon vm_page_test_dirty(m); 221096e95c0SMatthew Dillon if (m->dirty & m->valid) 222096e95c0SMatthew Dillon continue; 223*c504e38eSMatthew Dillon vp = object->handle; 224*c504e38eSMatthew Dillon if (vp == NULL) 225*c504e38eSMatthew Dillon continue; 226*c504e38eSMatthew Dillon switch(vp->v_type) { 227*c504e38eSMatthew Dillon case VREG: 228*c504e38eSMatthew Dillon if (vm_swapcache_data_enable == 0) 229*c504e38eSMatthew Dillon continue; 230*c504e38eSMatthew Dillon break; 231*c504e38eSMatthew Dillon case VCHR: 232*c504e38eSMatthew Dillon if (vm_swapcache_meta_enable == 0) 233*c504e38eSMatthew Dillon continue; 234*c504e38eSMatthew Dillon break; 235*c504e38eSMatthew Dillon default: 236*c504e38eSMatthew Dillon continue; 237*c504e38eSMatthew Dillon } 238096e95c0SMatthew Dillon 239096e95c0SMatthew Dillon /* 240096e95c0SMatthew Dillon * Ok, move the marker and soft-busy the page. 241096e95c0SMatthew Dillon */ 242096e95c0SMatthew Dillon TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq); 243096e95c0SMatthew Dillon TAILQ_INSERT_AFTER(INACTIVE_LIST, m, &marker, pageq); 244096e95c0SMatthew Dillon 245096e95c0SMatthew Dillon /* 246096e95c0SMatthew Dillon * Assign swap and initiate I/O 247096e95c0SMatthew Dillon */ 248096e95c0SMatthew Dillon vm_swapcached_flush(m); 249096e95c0SMatthew Dillon 250096e95c0SMatthew Dillon /* 251096e95c0SMatthew Dillon * Setup for next loop using marker. 252096e95c0SMatthew Dillon */ 253096e95c0SMatthew Dillon m = ▮ 254096e95c0SMatthew Dillon } 255096e95c0SMatthew Dillon TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq); 256096e95c0SMatthew Dillon if (m) 257096e95c0SMatthew Dillon TAILQ_INSERT_BEFORE(m, &marker, pageq); 258096e95c0SMatthew Dillon else 259096e95c0SMatthew Dillon TAILQ_INSERT_HEAD(INACTIVE_LIST, &marker, pageq); 260096e95c0SMatthew Dillon 261096e95c0SMatthew Dillon } 262096e95c0SMatthew Dillon TAILQ_REMOVE(INACTIVE_LIST, &marker, pageq); 263096e95c0SMatthew Dillon crit_exit(); 264096e95c0SMatthew Dillon } 265096e95c0SMatthew Dillon 266096e95c0SMatthew Dillon /* 267096e95c0SMatthew Dillon * Flush the specified page using the swap_pager. 268096e95c0SMatthew Dillon */ 269096e95c0SMatthew Dillon static 270096e95c0SMatthew Dillon void 271096e95c0SMatthew Dillon vm_swapcached_flush(vm_page_t m) 272096e95c0SMatthew Dillon { 273096e95c0SMatthew Dillon vm_object_t object; 274096e95c0SMatthew Dillon int rtvals; 275096e95c0SMatthew Dillon 276096e95c0SMatthew Dillon vm_page_io_start(m); 277096e95c0SMatthew Dillon vm_page_protect(m, VM_PROT_READ); 278096e95c0SMatthew Dillon 279096e95c0SMatthew Dillon object = m->object; 280096e95c0SMatthew Dillon vm_object_pip_add(object, 1); 281096e95c0SMatthew Dillon swap_pager_putpages(object, &m, 1, FALSE, &rtvals); 282*c504e38eSMatthew Dillon vm_swapcache_write_count += PAGE_SIZE; 283*c504e38eSMatthew Dillon vm_swapcache_curburst -= PAGE_SIZE; 284096e95c0SMatthew Dillon 285096e95c0SMatthew Dillon if (rtvals != VM_PAGER_PEND) { 286096e95c0SMatthew Dillon vm_object_pip_wakeup(object); 287096e95c0SMatthew Dillon vm_page_io_finish(m); 288096e95c0SMatthew Dillon } 289096e95c0SMatthew Dillon } 290