1 /* 2 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Hiten Pandya <hmp@backplane.com>. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 /* 36 * Copyright (c) 1991 Regents of the University of California. 37 * All rights reserved. 38 * 39 * This code is derived from software contributed to Berkeley by 40 * The Mach Operating System project at Carnegie-Mellon University. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 67 * $DragonFly: src/sys/vm/vm_contig.c,v 1.14 2005/06/03 22:58:09 dillon Exp $ 68 */ 69 70 /* 71 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /* 98 * Contiguous memory allocation API. 99 */ 100 101 #include <sys/param.h> 102 #include <sys/systm.h> 103 #include <sys/malloc.h> 104 #include <sys/proc.h> 105 #include <sys/lock.h> 106 #include <sys/vmmeter.h> 107 #include <sys/vnode.h> 108 109 #include <vm/vm.h> 110 #include <vm/vm_param.h> 111 #include <vm/vm_kern.h> 112 #include <vm/pmap.h> 113 #include <vm/vm_map.h> 114 #include <vm/vm_object.h> 115 #include <vm/vm_page.h> 116 #include <vm/vm_pageout.h> 117 #include <vm/vm_pager.h> 118 #include <vm/vm_extern.h> 119 120 #include <sys/thread2.h> 121 #include <vm/vm_page2.h> 122 123 /* 124 * vm_contig_pg_clean: 125 * 126 * Do a thorough cleanup of the specified 'queue', which can be either 127 * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough. If the page is not 128 * marked dirty, it is shoved into the page cache, provided no one has 129 * currently aqcuired it, otherwise localized action per object type 130 * is taken for cleanup: 131 * 132 * In the OBJT_VNODE case, the whole page range is cleaned up 133 * using the vm_object_page_clean() routine, by specyfing a 134 * start and end of '0'. 135 * 136 * Otherwise if the object is of any other type, the generic 137 * pageout (daemon) flush routine is invoked. 138 * 139 * We must be in a critical section. 140 */ 141 static int 142 vm_contig_pg_clean(int queue) 143 { 144 vm_object_t object; 145 vm_page_t m, m_tmp, next; 146 147 for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) { 148 KASSERT(m->queue == queue, 149 ("vm_contig_clean: page %p's queue is not %d", 150 m, queue)); 151 next = TAILQ_NEXT(m, pageq); 152 153 if (vm_page_sleep_busy(m, TRUE, "vpctw0")) 154 return (TRUE); 155 156 vm_page_test_dirty(m); 157 if (m->dirty) { 158 object = m->object; 159 if (object->type == OBJT_VNODE) { 160 vn_lock(object->handle, 161 LK_EXCLUSIVE | LK_RETRY, curthread); 162 vm_object_page_clean(object, 0, 0, OBJPC_SYNC); 163 VOP_UNLOCK(((struct vnode *)object->handle), 164 0, curthread); 165 return (TRUE); 166 } else if (object->type == OBJT_SWAP || 167 object->type == OBJT_DEFAULT) { 168 m_tmp = m; 169 vm_pageout_flush(&m_tmp, 1, 0); 170 return (TRUE); 171 } 172 } 173 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 174 vm_page_cache(m); 175 } 176 return (FALSE); 177 } 178 179 /* 180 * vm_contig_pg_flush: 181 * 182 * Attempt to flush (count) pages from the given page queue. This may or 183 * may not succeed. Take up to <count> passes and delay 1/20 of a second 184 * between each pass. 185 */ 186 static void 187 vm_contig_pg_flush(int queue, int count) 188 { 189 while (count > 0) { 190 if (!vm_contig_pg_clean(queue)) 191 break; 192 --count; 193 } 194 } 195 /* 196 * vm_contig_pg_alloc: 197 * 198 * Allocate contiguous pages from the VM. This function does not 199 * map the allocated pages into the kernel map, otherwise it is 200 * impossible to make large allocations (i.e. >2G). 201 * 202 * Malloc()'s data structures have been used for collection of 203 * statistics and for allocations of less than a page. 204 */ 205 static int 206 vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high, 207 unsigned long alignment, unsigned long boundary, int mflags) 208 { 209 int i, start, pass; 210 vm_offset_t phys; 211 vm_page_t pga = vm_page_array; 212 vm_page_t m; 213 int pqtype; 214 215 size = round_page(size); 216 if (size == 0) 217 panic("vm_contig_pg_alloc: size must not be 0"); 218 if ((alignment & (alignment - 1)) != 0) 219 panic("vm_contig_pg_alloc: alignment must be a power of 2"); 220 if ((boundary & (boundary - 1)) != 0) 221 panic("vm_contig_pg_alloc: boundary must be a power of 2"); 222 223 start = 0; 224 crit_enter(); 225 226 /* 227 * Three passes (0, 1, 2). Each pass scans the VM page list for 228 * free or cached pages. After each pass if the entire scan failed 229 * we attempt to flush inactive pages and reset the start index back 230 * to 0. For passes 1 and 2 we also attempt to flush active pages. 231 */ 232 for (pass = 0; pass < 3; pass++) { 233 /* 234 * Find first page in array that is free, within range, 235 * aligned, and such that the boundary won't be crossed. 236 */ 237 again: 238 for (i = start; i < vmstats.v_page_count; i++) { 239 m = &pga[i]; 240 phys = VM_PAGE_TO_PHYS(m); 241 pqtype = m->queue - m->pc; 242 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 243 (phys >= low) && (phys < high) && 244 ((phys & (alignment - 1)) == 0) && 245 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0) && 246 m->busy == 0 && m->wire_count == 0 && 247 m->hold_count == 0 && (m->flags & PG_BUSY) == 0 248 249 ) { 250 break; 251 } 252 } 253 254 /* 255 * If we cannot find the page in the given range, or we have 256 * crossed the boundary, call the vm_contig_pg_clean() function 257 * for flushing out the queues, and returning it back to 258 * normal state. 259 */ 260 if ((i == vmstats.v_page_count) || 261 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 262 263 /* 264 * Best effort flush of all inactive pages. 265 * This is quite quick, for now stall all 266 * callers, even if they've specified M_NOWAIT. 267 */ 268 vm_contig_pg_flush(PQ_INACTIVE, 269 vmstats.v_inactive_count); 270 271 crit_exit(); /* give interrupts a chance */ 272 crit_enter(); 273 274 /* 275 * Best effort flush of active pages. 276 * 277 * This is very, very slow. 278 * Only do this if the caller has agreed to M_WAITOK. 279 * 280 * If enough pages are flushed, we may succeed on 281 * next (final) pass, if not the caller, contigmalloc(), 282 * will fail in the index < 0 case. 283 */ 284 if (pass > 0 && (mflags & M_WAITOK)) { 285 vm_contig_pg_flush (PQ_ACTIVE, 286 vmstats.v_active_count); 287 } 288 289 /* 290 * We're already too high in the address space 291 * to succeed, reset to 0 for the next iteration. 292 */ 293 start = 0; 294 crit_exit(); /* give interrupts a chance */ 295 crit_enter(); 296 continue; /* next pass */ 297 } 298 start = i; 299 300 /* 301 * Check successive pages for contiguous and free. 302 * 303 * (still in critical section) 304 */ 305 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 306 m = &pga[i]; 307 pqtype = m->queue - m->pc; 308 if ((VM_PAGE_TO_PHYS(&m[0]) != 309 (VM_PAGE_TO_PHYS(&m[-1]) + PAGE_SIZE)) || 310 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE)) || 311 m->busy || m->wire_count || 312 m->hold_count || (m->flags & PG_BUSY) 313 ) { 314 start++; 315 goto again; 316 } 317 } 318 319 /* 320 * (still in critical section) 321 */ 322 for (i = start; i < (start + size / PAGE_SIZE); i++) { 323 m = &pga[i]; 324 pqtype = m->queue - m->pc; 325 if (pqtype == PQ_CACHE) { 326 vm_page_busy(m); 327 vm_page_free(m); 328 } 329 KKASSERT(m->object == NULL); 330 vm_page_unqueue_nowakeup(m); 331 m->valid = VM_PAGE_BITS_ALL; 332 if (m->flags & PG_ZERO) 333 vm_page_zero_count--; 334 /* Don't clear the PG_ZERO flag, we'll need it later. */ 335 m->flags &= PG_ZERO; 336 KASSERT(m->dirty == 0, 337 ("vm_contig_pg_alloc: page %p was dirty", m)); 338 m->wire_count = 0; 339 m->busy = 0; 340 } 341 342 /* 343 * Our job is done, return the index page of vm_page_array. 344 */ 345 crit_exit(); 346 return (start); /* aka &pga[start] */ 347 } 348 349 /* 350 * Failed. 351 */ 352 crit_exit(); 353 return (-1); 354 } 355 356 /* 357 * vm_contig_pg_free: 358 * 359 * Remove pages previously allocated by vm_contig_pg_alloc, and 360 * assume all references to the pages have been removed, and that 361 * it is OK to add them back to the free list. 362 */ 363 void 364 vm_contig_pg_free(int start, u_long size) 365 { 366 vm_page_t pga = vm_page_array; 367 vm_page_t m; 368 int i; 369 370 size = round_page(size); 371 if (size == 0) 372 panic("vm_contig_pg_free: size must not be 0"); 373 374 for (i = start; i < (start + size / PAGE_SIZE); i++) { 375 m = &pga[i]; 376 vm_page_busy(m); 377 vm_page_free(m); 378 } 379 } 380 381 /* 382 * vm_contig_pg_kmap: 383 * 384 * Map previously allocated (vm_contig_pg_alloc) range of pages from 385 * vm_page_array[] into the KVA. Once mapped, the pages are part of 386 * the Kernel, and are to free'ed with kmem_free(kernel_map, addr, size). 387 */ 388 vm_offset_t 389 vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags) 390 { 391 vm_offset_t addr, tmp_addr; 392 vm_page_t pga = vm_page_array; 393 int i, count; 394 395 size = round_page(size); 396 if (size == 0) 397 panic("vm_contig_pg_kmap: size must not be 0"); 398 399 crit_enter(); 400 401 /* 402 * We've found a contiguous chunk that meets our requirements. 403 * Allocate KVM, and assign phys pages and return a kernel VM 404 * pointer. 405 */ 406 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 407 vm_map_lock(map); 408 if (vm_map_findspace(map, vm_map_min(map), size, 1, &addr) != 409 KERN_SUCCESS) { 410 /* 411 * XXX We almost never run out of kernel virtual 412 * space, so we don't make the allocated memory 413 * above available. 414 */ 415 vm_map_unlock(map); 416 vm_map_entry_release(count); 417 crit_exit(); 418 return (0); 419 } 420 vm_object_reference(kernel_object); 421 vm_map_insert(map, &count, 422 kernel_object, addr - VM_MIN_KERNEL_ADDRESS, 423 addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); 424 vm_map_unlock(map); 425 vm_map_entry_release(count); 426 427 tmp_addr = addr; 428 for (i = start; i < (start + size / PAGE_SIZE); i++) { 429 vm_page_t m = &pga[i]; 430 vm_page_insert(m, kernel_object, 431 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 432 if ((flags & M_ZERO) && !(m->flags & PG_ZERO)) 433 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 434 m->flags = 0; 435 tmp_addr += PAGE_SIZE; 436 } 437 vm_map_wire(map, addr, addr + size, 0); 438 439 crit_exit(); 440 return (addr); 441 } 442 443 void * 444 contigmalloc( 445 unsigned long size, /* should be size_t here and for malloc() */ 446 struct malloc_type *type, 447 int flags, 448 vm_paddr_t low, 449 vm_paddr_t high, 450 unsigned long alignment, 451 unsigned long boundary) 452 { 453 return contigmalloc_map(size, type, flags, low, high, alignment, 454 boundary, kernel_map); 455 } 456 457 void * 458 contigmalloc_map( 459 unsigned long size, /* should be size_t here and for malloc() */ 460 struct malloc_type *type, 461 int flags, 462 vm_paddr_t low, 463 vm_paddr_t high, 464 unsigned long alignment, 465 unsigned long boundary, 466 vm_map_t map) 467 { 468 int index; 469 void *rv; 470 471 index = vm_contig_pg_alloc(size, low, high, alignment, boundary, flags); 472 if (index < 0) { 473 printf("contigmalloc_map: failed in index < 0 case!"); 474 return NULL; 475 } 476 477 rv = (void *)vm_contig_pg_kmap(index, size, map, flags); 478 if (!rv) 479 vm_contig_pg_free(index, size); 480 481 return rv; 482 } 483 484 void 485 contigfree(void *addr, unsigned long size, struct malloc_type *type) 486 { 487 kmem_free(kernel_map, (vm_offset_t)addr, size); 488 } 489 490 vm_offset_t 491 vm_page_alloc_contig( 492 vm_offset_t size, 493 vm_paddr_t low, 494 vm_paddr_t high, 495 vm_offset_t alignment) 496 { 497 return ((vm_offset_t)contigmalloc_map(size, M_DEVBUF, M_NOWAIT, low, 498 high, alignment, 0ul, kernel_map)); 499 } 500