1 /* 2 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Hiten Pandya <hmp@backplane.com>. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 /* 36 * Copyright (c) 1991 Regents of the University of California. 37 * All rights reserved. 38 * 39 * This code is derived from software contributed to Berkeley by 40 * The Mach Operating System project at Carnegie-Mellon University. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 67 * $DragonFly: src/sys/vm/vm_contig.c,v 1.21 2006/12/28 21:24:02 dillon Exp $ 68 */ 69 70 /* 71 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /* 98 * Contiguous memory allocation API. 99 */ 100 101 #include <sys/param.h> 102 #include <sys/systm.h> 103 #include <sys/malloc.h> 104 #include <sys/proc.h> 105 #include <sys/lock.h> 106 #include <sys/vmmeter.h> 107 #include <sys/vnode.h> 108 109 #include <vm/vm.h> 110 #include <vm/vm_param.h> 111 #include <vm/vm_kern.h> 112 #include <vm/pmap.h> 113 #include <vm/vm_map.h> 114 #include <vm/vm_object.h> 115 #include <vm/vm_page.h> 116 #include <vm/vm_pageout.h> 117 #include <vm/vm_pager.h> 118 #include <vm/vm_extern.h> 119 120 #include <sys/thread2.h> 121 #include <vm/vm_page2.h> 122 123 /* 124 * vm_contig_pg_clean: 125 * 126 * Do a thorough cleanup of the specified 'queue', which can be either 127 * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough. If the page is not 128 * marked dirty, it is shoved into the page cache, provided no one has 129 * currently aqcuired it, otherwise localized action per object type 130 * is taken for cleanup: 131 * 132 * In the OBJT_VNODE case, the whole page range is cleaned up 133 * using the vm_object_page_clean() routine, by specyfing a 134 * start and end of '0'. 135 * 136 * Otherwise if the object is of any other type, the generic 137 * pageout (daemon) flush routine is invoked. 138 * 139 * We must be in a critical section. 140 */ 141 static int 142 vm_contig_pg_clean(int queue) 143 { 144 vm_object_t object; 145 vm_page_t m, m_tmp, next; 146 147 for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) { 148 KASSERT(m->queue == queue, 149 ("vm_contig_clean: page %p's queue is not %d", 150 m, queue)); 151 next = TAILQ_NEXT(m, pageq); 152 153 if (vm_page_sleep_busy(m, TRUE, "vpctw0")) 154 return (TRUE); 155 156 vm_page_test_dirty(m); 157 if (m->dirty) { 158 object = m->object; 159 if (object->type == OBJT_VNODE) { 160 vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY); 161 vm_object_page_clean(object, 0, 0, OBJPC_SYNC); 162 vn_unlock(((struct vnode *)object->handle)); 163 return (TRUE); 164 } else if (object->type == OBJT_SWAP || 165 object->type == OBJT_DEFAULT) { 166 m_tmp = m; 167 vm_pageout_flush(&m_tmp, 1, 0); 168 return (TRUE); 169 } 170 } 171 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 172 vm_page_cache(m); 173 } 174 return (FALSE); 175 } 176 177 /* 178 * vm_contig_pg_flush: 179 * 180 * Attempt to flush (count) pages from the given page queue. This may or 181 * may not succeed. Take up to <count> passes and delay 1/20 of a second 182 * between each pass. 183 */ 184 static void 185 vm_contig_pg_flush(int queue, int count) 186 { 187 while (count > 0) { 188 if (!vm_contig_pg_clean(queue)) 189 break; 190 --count; 191 } 192 } 193 /* 194 * vm_contig_pg_alloc: 195 * 196 * Allocate contiguous pages from the VM. This function does not 197 * map the allocated pages into the kernel map, otherwise it is 198 * impossible to make large allocations (i.e. >2G). 199 * 200 * Malloc()'s data structures have been used for collection of 201 * statistics and for allocations of less than a page. 202 */ 203 static int 204 vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high, 205 unsigned long alignment, unsigned long boundary, int mflags) 206 { 207 int i, start, pass; 208 vm_offset_t phys; 209 vm_page_t pga = vm_page_array; 210 vm_page_t m; 211 int pqtype; 212 213 size = round_page(size); 214 if (size == 0) 215 panic("vm_contig_pg_alloc: size must not be 0"); 216 if ((alignment & (alignment - 1)) != 0) 217 panic("vm_contig_pg_alloc: alignment must be a power of 2"); 218 if ((boundary & (boundary - 1)) != 0) 219 panic("vm_contig_pg_alloc: boundary must be a power of 2"); 220 221 start = 0; 222 crit_enter(); 223 224 /* 225 * Three passes (0, 1, 2). Each pass scans the VM page list for 226 * free or cached pages. After each pass if the entire scan failed 227 * we attempt to flush inactive pages and reset the start index back 228 * to 0. For passes 1 and 2 we also attempt to flush active pages. 229 */ 230 for (pass = 0; pass < 3; pass++) { 231 /* 232 * Find first page in array that is free, within range, 233 * aligned, and such that the boundary won't be crossed. 234 */ 235 again: 236 for (i = start; i < vmstats.v_page_count; i++) { 237 m = &pga[i]; 238 phys = VM_PAGE_TO_PHYS(m); 239 pqtype = m->queue - m->pc; 240 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 241 (phys >= low) && (phys < high) && 242 ((phys & (alignment - 1)) == 0) && 243 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0) && 244 m->busy == 0 && m->wire_count == 0 && 245 m->hold_count == 0 && (m->flags & PG_BUSY) == 0 246 247 ) { 248 break; 249 } 250 } 251 252 /* 253 * If we cannot find the page in the given range, or we have 254 * crossed the boundary, call the vm_contig_pg_clean() function 255 * for flushing out the queues, and returning it back to 256 * normal state. 257 */ 258 if ((i == vmstats.v_page_count) || 259 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 260 261 /* 262 * Best effort flush of all inactive pages. 263 * This is quite quick, for now stall all 264 * callers, even if they've specified M_NOWAIT. 265 */ 266 vm_contig_pg_flush(PQ_INACTIVE, 267 vmstats.v_inactive_count); 268 269 crit_exit(); /* give interrupts a chance */ 270 crit_enter(); 271 272 /* 273 * Best effort flush of active pages. 274 * 275 * This is very, very slow. 276 * Only do this if the caller has agreed to M_WAITOK. 277 * 278 * If enough pages are flushed, we may succeed on 279 * next (final) pass, if not the caller, contigmalloc(), 280 * will fail in the index < 0 case. 281 */ 282 if (pass > 0 && (mflags & M_WAITOK)) { 283 vm_contig_pg_flush (PQ_ACTIVE, 284 vmstats.v_active_count); 285 } 286 287 /* 288 * We're already too high in the address space 289 * to succeed, reset to 0 for the next iteration. 290 */ 291 start = 0; 292 crit_exit(); /* give interrupts a chance */ 293 crit_enter(); 294 continue; /* next pass */ 295 } 296 start = i; 297 298 /* 299 * Check successive pages for contiguous and free. 300 * 301 * (still in critical section) 302 */ 303 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 304 m = &pga[i]; 305 pqtype = m->queue - m->pc; 306 if ((VM_PAGE_TO_PHYS(&m[0]) != 307 (VM_PAGE_TO_PHYS(&m[-1]) + PAGE_SIZE)) || 308 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE)) || 309 m->busy || m->wire_count || 310 m->hold_count || (m->flags & PG_BUSY) 311 ) { 312 start++; 313 goto again; 314 } 315 } 316 317 /* 318 * (still in critical section) 319 */ 320 for (i = start; i < (start + size / PAGE_SIZE); i++) { 321 m = &pga[i]; 322 pqtype = m->queue - m->pc; 323 if (pqtype == PQ_CACHE) { 324 vm_page_busy(m); 325 vm_page_free(m); 326 } 327 KKASSERT(m->object == NULL); 328 vm_page_unqueue_nowakeup(m); 329 m->valid = VM_PAGE_BITS_ALL; 330 if (m->flags & PG_ZERO) 331 vm_page_zero_count--; 332 /* Don't clear the PG_ZERO flag, we'll need it later. */ 333 m->flags &= PG_ZERO; 334 KASSERT(m->dirty == 0, 335 ("vm_contig_pg_alloc: page %p was dirty", m)); 336 m->wire_count = 0; 337 m->busy = 0; 338 } 339 340 /* 341 * Our job is done, return the index page of vm_page_array. 342 */ 343 crit_exit(); 344 return (start); /* aka &pga[start] */ 345 } 346 347 /* 348 * Failed. 349 */ 350 crit_exit(); 351 return (-1); 352 } 353 354 /* 355 * vm_contig_pg_free: 356 * 357 * Remove pages previously allocated by vm_contig_pg_alloc, and 358 * assume all references to the pages have been removed, and that 359 * it is OK to add them back to the free list. 360 */ 361 void 362 vm_contig_pg_free(int start, u_long size) 363 { 364 vm_page_t pga = vm_page_array; 365 vm_page_t m; 366 int i; 367 368 size = round_page(size); 369 if (size == 0) 370 panic("vm_contig_pg_free: size must not be 0"); 371 372 for (i = start; i < (start + size / PAGE_SIZE); i++) { 373 m = &pga[i]; 374 vm_page_busy(m); 375 vm_page_free(m); 376 } 377 } 378 379 /* 380 * vm_contig_pg_kmap: 381 * 382 * Map previously allocated (vm_contig_pg_alloc) range of pages from 383 * vm_page_array[] into the KVA. Once mapped, the pages are part of 384 * the Kernel, and are to free'ed with kmem_free(&kernel_map, addr, size). 385 */ 386 vm_offset_t 387 vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags) 388 { 389 vm_offset_t addr, tmp_addr; 390 vm_page_t pga = vm_page_array; 391 int i, count; 392 393 size = round_page(size); 394 if (size == 0) 395 panic("vm_contig_pg_kmap: size must not be 0"); 396 397 crit_enter(); 398 399 /* 400 * We've found a contiguous chunk that meets our requirements. 401 * Allocate KVM, and assign phys pages and return a kernel VM 402 * pointer. 403 */ 404 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 405 vm_map_lock(map); 406 if (vm_map_findspace(map, vm_map_min(map), size, 1, &addr) != 407 KERN_SUCCESS) { 408 /* 409 * XXX We almost never run out of kernel virtual 410 * space, so we don't make the allocated memory 411 * above available. 412 */ 413 vm_map_unlock(map); 414 vm_map_entry_release(count); 415 crit_exit(); 416 return (0); 417 } 418 419 /* 420 * kernel_object maps 1:1 to kernel_map. 421 */ 422 vm_object_reference(&kernel_object); 423 vm_map_insert(map, &count, 424 &kernel_object, addr, 425 addr, addr + size, 426 VM_MAPTYPE_NORMAL, 427 VM_PROT_ALL, VM_PROT_ALL, 428 0); 429 vm_map_unlock(map); 430 vm_map_entry_release(count); 431 432 tmp_addr = addr; 433 for (i = start; i < (start + size / PAGE_SIZE); i++) { 434 vm_page_t m = &pga[i]; 435 vm_page_insert(m, &kernel_object, OFF_TO_IDX(tmp_addr)); 436 if ((flags & M_ZERO) && !(m->flags & PG_ZERO)) 437 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 438 m->flags = 0; 439 tmp_addr += PAGE_SIZE; 440 } 441 vm_map_wire(map, addr, addr + size, 0); 442 443 crit_exit(); 444 return (addr); 445 } 446 447 void * 448 contigmalloc( 449 unsigned long size, /* should be size_t here and for malloc() */ 450 struct malloc_type *type, 451 int flags, 452 vm_paddr_t low, 453 vm_paddr_t high, 454 unsigned long alignment, 455 unsigned long boundary) 456 { 457 return contigmalloc_map(size, type, flags, low, high, alignment, 458 boundary, &kernel_map); 459 } 460 461 void * 462 contigmalloc_map( 463 unsigned long size, /* should be size_t here and for malloc() */ 464 struct malloc_type *type, 465 int flags, 466 vm_paddr_t low, 467 vm_paddr_t high, 468 unsigned long alignment, 469 unsigned long boundary, 470 vm_map_t map) 471 { 472 int index; 473 void *rv; 474 475 index = vm_contig_pg_alloc(size, low, high, alignment, boundary, flags); 476 if (index < 0) { 477 kprintf("contigmalloc_map: failed in index < 0 case!\n"); 478 return NULL; 479 } 480 481 rv = (void *)vm_contig_pg_kmap(index, size, map, flags); 482 if (!rv) 483 vm_contig_pg_free(index, size); 484 485 return rv; 486 } 487 488 void 489 contigfree(void *addr, unsigned long size, struct malloc_type *type) 490 { 491 kmem_free(&kernel_map, (vm_offset_t)addr, size); 492 } 493 494 vm_offset_t 495 vm_page_alloc_contig( 496 vm_offset_t size, 497 vm_paddr_t low, 498 vm_paddr_t high, 499 vm_offset_t alignment) 500 { 501 return ((vm_offset_t)contigmalloc_map(size, M_DEVBUF, M_NOWAIT, low, 502 high, alignment, 0ul, &kernel_map)); 503 } 504