1 /* 2 * Copyright (c) 1998 David Greenman. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $DragonFly: src/sys/kern/kern_sfbuf.c,v 1.15 2007/04/30 07:18:53 dillon Exp $ 26 */ 27 28 #include <sys/param.h> 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/malloc.h> 33 #include <sys/queue.h> 34 #include <sys/sfbuf.h> 35 #include <sys/globaldata.h> 36 #include <sys/thread.h> 37 #include <sys/sysctl.h> 38 #include <vm/vm.h> 39 #include <vm/vm_extern.h> 40 #include <vm/vm_kern.h> 41 #include <vm/vm_page.h> 42 #include <vm/pmap.h> 43 #include <sys/thread2.h> 44 45 static void sf_buf_init(void *arg); 46 SYSINIT(sock_sf, SI_BOOT2_MACHDEP, SI_ORDER_ANY, sf_buf_init, NULL) 47 48 LIST_HEAD(sf_buf_list, sf_buf); 49 50 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RD, &nsfbufs, 0, 51 "Maximum number of sf_bufs available to the system"); 52 53 /* 54 * A hash table of active sendfile(2) buffers 55 */ 56 static struct sf_buf_list *sf_buf_hashtable; 57 static u_long sf_buf_hashmask; 58 59 static TAILQ_HEAD(, sf_buf) sf_buf_freelist; 60 static u_int sf_buf_alloc_want; 61 62 static vm_offset_t sf_base; 63 static struct sf_buf *sf_bufs; 64 65 static int sfbuf_quick = 1; 66 SYSCTL_INT(_debug, OID_AUTO, sfbuf_quick, CTLFLAG_RW, &sfbuf_quick, 0, ""); 67 static int nsffree; 68 SYSCTL_INT(_kern_ipc, OID_AUTO, nsffree, CTLFLAG_RD, &nsffree, 0, 69 "Number of free sf_bufs available to the system"); 70 71 static __inline 72 int 73 sf_buf_hash(vm_page_t m) 74 { 75 int hv; 76 77 hv = ((int)(intptr_t)m / sizeof(vm_page_t)) + ((int)(intptr_t)m >> 12); 78 return(hv & sf_buf_hashmask); 79 } 80 81 /* 82 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 83 */ 84 static void 85 sf_buf_init(void *arg) 86 { 87 int i; 88 89 sf_buf_hashtable = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask); 90 TAILQ_INIT(&sf_buf_freelist); 91 sf_base = kmem_alloc_nofault(&kernel_map, nsfbufs * PAGE_SIZE); 92 sf_bufs = kmalloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 93 M_WAITOK | M_ZERO); 94 for (i = 0; i < nsfbufs; i++) { 95 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 96 sf_bufs[i].flags |= SFBA_ONFREEQ; 97 TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry); 98 ++nsffree; 99 } 100 } 101 102 /* 103 * Get an sf_buf from the freelist. Will block if none are available. 104 */ 105 struct sf_buf * 106 sf_buf_alloc(struct vm_page *m, int flags) 107 { 108 struct sf_buf_list *hash_chain; 109 struct sf_buf *sf; 110 globaldata_t gd; 111 int error; 112 int pflags; 113 114 gd = mycpu; 115 crit_enter(); 116 hash_chain = &sf_buf_hashtable[sf_buf_hash(m)]; 117 LIST_FOREACH(sf, hash_chain, list_entry) { 118 if (sf->m == m) { 119 /* 120 * cache hit 121 * 122 * We must invalidate the TLB entry based on whether 123 * it need only be valid on the local cpu (SFB_CPUPRIVATE), 124 * or on all cpus. This is conditionalized and in 125 * most cases no system-wide invalidation should be 126 * needed. 127 * 128 * Note: we do not remove the entry from the freelist 129 * on the 0->1 transition. 130 */ 131 ++sf->refcnt; 132 if ((flags & SFB_CPUPRIVATE) && sfbuf_quick) { 133 if ((sf->cpumask & gd->gd_cpumask) == 0) { 134 pmap_kenter_sync_quick(sf->kva); 135 sf->cpumask |= gd->gd_cpumask; 136 } 137 } else { 138 if (sf->cpumask != (cpumask_t)-1) { 139 pmap_kenter_sync(sf->kva); 140 sf->cpumask = (cpumask_t)-1; 141 } 142 } 143 goto done; /* found existing mapping */ 144 } 145 } 146 147 /* 148 * Didn't find old mapping. Get a buffer off the freelist. We 149 * may have to remove and skip buffers with non-zero ref counts 150 * that were lazily allocated. 151 */ 152 for (;;) { 153 if ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) { 154 pflags = (flags & SFB_CATCH) ? PCATCH : 0; 155 ++sf_buf_alloc_want; 156 error = tsleep(&sf_buf_freelist, pflags, "sfbufa", 0); 157 --sf_buf_alloc_want; 158 if (error) 159 goto done; 160 } else { 161 /* 162 * We may have to do delayed removals for referenced 163 * sf_buf's here in addition to locating a sf_buf 164 * to reuse. The sf_bufs must be removed. 165 * 166 * We are finished when we find an sf_buf with a 167 * refcnt of 0. We theoretically do not have to 168 * remove it from the freelist but it's a good idea 169 * to do so to preserve LRU operation for the 170 * (1) never before seen before case and (2) 171 * accidently recycled due to prior cached uses not 172 * removing the buffer case. 173 */ 174 KKASSERT(sf->flags & SFBA_ONFREEQ); 175 TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); 176 --nsffree; 177 sf->flags &= ~SFBA_ONFREEQ; 178 if (sf->refcnt == 0) 179 break; 180 } 181 } 182 if (sf->m != NULL) /* remove previous mapping from hash table */ 183 LIST_REMOVE(sf, list_entry); 184 LIST_INSERT_HEAD(hash_chain, sf, list_entry); 185 sf->refcnt = 1; 186 sf->m = m; 187 if ((flags & SFB_CPUPRIVATE) && sfbuf_quick) { 188 pmap_kenter_quick(sf->kva, sf->m->phys_addr); 189 sf->cpumask = gd->gd_cpumask; 190 } else { 191 pmap_kenter(sf->kva, sf->m->phys_addr); 192 sf->cpumask = (cpumask_t)-1; 193 } 194 done: 195 crit_exit(); 196 return (sf); 197 } 198 199 #if 0 200 201 /* 202 * Add a reference to a buffer (currently unused) 203 */ 204 void 205 sf_buf_ref(struct sf_buf *sf) 206 { 207 if (sf->refcnt == 0) 208 panic("sf_buf_ref: referencing a free sf_buf"); 209 crit_enter(); 210 sf->refcnt++; 211 crit_exit(); 212 } 213 214 #endif 215 216 /* 217 * Lose a reference to an sf_buf. When none left, detach mapped page 218 * and release resources back to the system. Note that the sfbuf's 219 * removal from the freelist is delayed, so it may in fact already be 220 * on the free list. This is the optimal (and most likely) scenario. 221 * 222 * Must be called at splimp. 223 */ 224 void 225 sf_buf_free(struct sf_buf *sf) 226 { 227 if (sf->refcnt == 0) 228 panic("sf_buf_free: freeing free sf_buf"); 229 crit_enter(); 230 sf->refcnt--; 231 if (sf->refcnt == 0 && (sf->flags & SFBA_ONFREEQ) == 0) { 232 TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); 233 ++nsffree; 234 sf->flags |= SFBA_ONFREEQ; 235 if (sf_buf_alloc_want > 0) 236 wakeup_one(&sf_buf_freelist); 237 } 238 crit_exit(); 239 } 240 241