1 /* 2 * Copyright (c) 1997, 1998 John S. Dyson. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice immediately at the beginning of the file, without modification, 9 * this list of conditions, and the following disclaimer. 10 * 2. Absolutely no warranty of function or purpose is made by the author 11 * John S. Dyson. 12 * 13 * $FreeBSD: src/sys/vm/vm_zone.c,v 1.30.2.6 2002/10/10 19:50:16 dillon Exp $ 14 * 15 * Copyright (c) 2003-2017,2019 The DragonFly Project. All rights reserved. 16 * 17 * This code is derived from software contributed to The DragonFly Project 18 * by Matthew Dillon <dillon@backplane.com> 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * 1. Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * 2. Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * 3. Neither the name of The DragonFly Project nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific, prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 37 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 38 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 39 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 40 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 41 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 42 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 43 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 44 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 45 * SUCH DAMAGE. 46 */ 47 48 #include <sys/param.h> 49 #include <sys/queue.h> 50 #include <sys/systm.h> 51 #include <sys/kernel.h> 52 #include <sys/lock.h> 53 #include <sys/malloc.h> 54 #include <sys/sysctl.h> 55 #include <sys/vmmeter.h> 56 57 #include <vm/vm.h> 58 #include <vm/vm_object.h> 59 #include <vm/vm_page.h> 60 #include <vm/vm_map.h> 61 #include <vm/vm_kern.h> 62 #include <vm/vm_extern.h> 63 #include <vm/vm_zone.h> 64 65 #include <sys/spinlock2.h> 66 #include <vm/vm_page2.h> 67 68 static MALLOC_DEFINE(M_ZONE, "ZONE", "Zone header"); 69 70 #define ZONE_ERROR_INVALID 0 71 #define ZONE_ERROR_NOTFREE 1 72 #define ZONE_ERROR_ALREADYFREE 2 73 74 #define ZONE_ROUNDING 32 75 76 #define ZENTRY_FREE 0x12342378 77 78 long zone_burst = 128; 79 80 static void *zget(vm_zone_t z, int *tryagainp); 81 82 /* 83 * Return an item from the specified zone. This function is non-blocking for 84 * ZONE_INTERRUPT zones. 85 * 86 * No requirements. 87 */ 88 void * 89 zalloc(vm_zone_t z) 90 { 91 globaldata_t gd = mycpu; 92 vm_zpcpu_t *zpcpu; 93 void *item; 94 int tryagain; 95 long n; 96 97 #ifdef INVARIANTS 98 if (z == NULL) 99 zerror(ZONE_ERROR_INVALID); 100 #endif 101 zpcpu = &z->zpcpu[gd->gd_cpuid]; 102 retry: 103 /* 104 * Avoid spinlock contention by allocating from a per-cpu queue 105 */ 106 if (zpcpu->zfreecnt > 0) { 107 crit_enter_gd(gd); 108 if (zpcpu->zfreecnt > 0) { 109 item = zpcpu->zitems; 110 #ifdef INVARIANTS 111 KASSERT(item != NULL, 112 ("zitems_pcpu unexpectedly NULL")); 113 if (((void **)item)[1] != (void *)ZENTRY_FREE) 114 zerror(ZONE_ERROR_NOTFREE); 115 ((void **)item)[1] = NULL; 116 #endif 117 zpcpu->zitems = ((void **) item)[0]; 118 --zpcpu->zfreecnt; 119 ++zpcpu->znalloc; 120 crit_exit_gd(gd); 121 122 return item; 123 } 124 crit_exit_gd(gd); 125 } 126 127 /* 128 * Per-zone spinlock for the remainder. Always load at least one 129 * item. 130 */ 131 spin_lock(&z->zspin); 132 if (z->zfreecnt > z->zfreemin) { 133 n = zone_burst; 134 do { 135 item = z->zitems; 136 #ifdef INVARIANTS 137 KASSERT(item != NULL, ("zitems unexpectedly NULL")); 138 if (((void **)item)[1] != (void *)ZENTRY_FREE) 139 zerror(ZONE_ERROR_NOTFREE); 140 #endif 141 z->zitems = ((void **)item)[0]; 142 --z->zfreecnt; 143 ((void **)item)[0] = zpcpu->zitems; 144 zpcpu->zitems = item; 145 ++zpcpu->zfreecnt; 146 } while (--n > 0 && z->zfreecnt > z->zfreemin); 147 spin_unlock(&z->zspin); 148 goto retry; 149 } else { 150 spin_unlock(&z->zspin); 151 tryagain = 0; 152 item = zget(z, &tryagain); 153 if (tryagain) 154 goto retry; 155 156 /* 157 * PANICFAIL allows the caller to assume that the zalloc() 158 * will always succeed. If it doesn't, we panic here. 159 */ 160 if (item == NULL && (z->zflags & ZONE_PANICFAIL)) 161 panic("zalloc(%s) failed", z->zname); 162 } 163 return item; 164 } 165 166 /* 167 * Free an item to the specified zone. 168 * 169 * No requirements. 170 */ 171 void 172 zfree(vm_zone_t z, void *item) 173 { 174 globaldata_t gd = mycpu; 175 vm_zpcpu_t *zpcpu; 176 void *tail_item; 177 long count; 178 long zmax; 179 180 zpcpu = &z->zpcpu[gd->gd_cpuid]; 181 182 /* 183 * Avoid spinlock contention by freeing into a per-cpu queue 184 */ 185 zmax = z->zmax_pcpu; 186 if (zmax < 1024) 187 zmax = 1024; 188 189 /* 190 * Add to pcpu cache 191 */ 192 crit_enter_gd(gd); 193 ((void **)item)[0] = zpcpu->zitems; 194 #ifdef INVARIANTS 195 if (((void **)item)[1] == (void *)ZENTRY_FREE) 196 zerror(ZONE_ERROR_ALREADYFREE); 197 ((void **)item)[1] = (void *)ZENTRY_FREE; 198 #endif 199 zpcpu->zitems = item; 200 ++zpcpu->zfreecnt; 201 202 if (zpcpu->zfreecnt < zmax) { 203 crit_exit_gd(gd); 204 return; 205 } 206 207 /* 208 * Hystereis, move (zmax) (calculated below) items to the pool. 209 */ 210 zmax = zmax / 2; 211 if (zmax > zone_burst) 212 zmax = zone_burst; 213 tail_item = item; 214 count = 1; 215 216 while (count < zmax) { 217 tail_item = ((void **)tail_item)[0]; 218 ++count; 219 } 220 zpcpu->zitems = ((void **)tail_item)[0]; 221 zpcpu->zfreecnt -= count; 222 223 /* 224 * Per-zone spinlock for the remainder. 225 * 226 * Also implement hysteresis by freeing a number of pcpu 227 * entries. 228 */ 229 spin_lock(&z->zspin); 230 ((void **)tail_item)[0] = z->zitems; 231 z->zitems = item; 232 z->zfreecnt += count; 233 spin_unlock(&z->zspin); 234 235 crit_exit_gd(gd); 236 } 237 238 /* 239 * This file comprises a very simple zone allocator. This is used 240 * in lieu of the malloc allocator, where needed or more optimal. 241 * 242 * Note that the initial implementation of this had coloring, and 243 * absolutely no improvement (actually perf degradation) occurred. 244 * 245 * Note also that the zones are type stable. The only restriction is 246 * that the first two longwords of a data structure can be changed 247 * between allocations. Any data that must be stable between allocations 248 * must reside in areas after the first two longwords. 249 * 250 * zinitna, zinit, zbootinit are the initialization routines. 251 * zalloc, zfree, are the allocation/free routines. 252 */ 253 254 LIST_HEAD(zlist, vm_zone) zlist = LIST_HEAD_INITIALIZER(zlist); 255 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 256 static vm_pindex_t zone_kmem_pages, zone_kern_pages; 257 static long zone_kmem_kvaspace; 258 259 /* 260 * Create a zone, but don't allocate the zone structure. If the 261 * zone had been previously created by the zone boot code, initialize 262 * various parts of the zone code. 263 * 264 * If waits are not allowed during allocation (e.g. during interrupt 265 * code), a-priori allocate the kernel virtual space, and allocate 266 * only pages when needed. 267 * 268 * Arguments: 269 * z pointer to zone structure. 270 * obj pointer to VM object (opt). 271 * name name of zone. 272 * size size of zone entries. 273 * nentries number of zone entries allocated (only ZONE_INTERRUPT.) 274 * flags ZONE_INTERRUPT -- items can be allocated at interrupt time. 275 * zalloc number of pages allocated when memory is needed. 276 * 277 * Note that when using ZONE_INTERRUPT, the size of the zone is limited 278 * by the nentries argument. The size of the memory allocatable is 279 * unlimited if ZONE_INTERRUPT is not set. 280 * 281 * No requirements. 282 */ 283 int 284 zinitna(vm_zone_t z, char *name, size_t size, long nentries, uint32_t flags) 285 { 286 size_t totsize; 287 288 /* 289 * Only zones created with zinit() are destroyable. 290 */ 291 if (z->zflags & ZONE_DESTROYABLE) 292 panic("zinitna: can't create destroyable zone"); 293 294 /* 295 * NOTE: We can only adjust zsize if we previously did not 296 * use zbootinit(). 297 */ 298 if ((z->zflags & ZONE_BOOT) == 0) { 299 z->zsize = roundup2(size, ZONE_ROUNDING); 300 spin_init(&z->zspin, "zinitna"); 301 lockinit(&z->zgetlk, "zgetlk", 0, LK_CANRECURSE); 302 303 z->zfreecnt = 0; 304 z->ztotal = 0; 305 z->zmax = 0; 306 z->zname = name; 307 z->zitems = NULL; 308 309 lwkt_gettoken(&vm_token); 310 LIST_INSERT_HEAD(&zlist, z, zlink); 311 lwkt_reltoken(&vm_token); 312 313 bzero(z->zpcpu, sizeof(z->zpcpu)); 314 } 315 316 z->zkmvec = NULL; 317 z->zkmcur = z->zkmmax = 0; 318 z->zflags |= flags; 319 320 /* 321 * If we cannot wait, allocate KVA space up front, and we will fill 322 * in pages as needed. This is particularly required when creating 323 * an allocation space for map entries in kernel_map, because we 324 * do not want to go into a recursion deadlock with 325 * vm_map_entry_reserve(). 326 */ 327 if (z->zflags & ZONE_INTERRUPT) { 328 totsize = round_page((size_t)z->zsize * nentries); 329 atomic_add_long(&zone_kmem_kvaspace, totsize); 330 331 z->zkva = kmem_alloc_pageable(&kernel_map, totsize, 332 VM_SUBSYS_ZALLOC); 333 if (z->zkva == 0) { 334 LIST_REMOVE(z, zlink); 335 return 0; 336 } 337 338 z->zpagemax = totsize / PAGE_SIZE; 339 z->zallocflag = VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT | 340 VM_ALLOC_NORMAL | VM_ALLOC_RETRY; 341 z->zmax += nentries; 342 343 /* 344 * Set reasonable pcpu cache bounds. Low-memory systems 345 * might try to cache too little, large-memory systems 346 * might try to cache more than necessarsy. 347 * 348 * In particular, pvzone can wind up being excessive and 349 * waste memory unnecessarily. 350 */ 351 z->zmax_pcpu = z->zmax / ncpus / 64; 352 if (z->zmax_pcpu < 1024) 353 z->zmax_pcpu = 1024; 354 if (z->zmax_pcpu * z->zsize > 16*1024*1024) 355 z->zmax_pcpu = 16*1024*1024 / z->zsize; 356 } else { 357 z->zallocflag = VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM; 358 z->zmax = 0; 359 z->zmax_pcpu = 8192; 360 } 361 362 363 if (z->zsize > PAGE_SIZE) 364 z->zfreemin = 1; 365 else 366 z->zfreemin = PAGE_SIZE / z->zsize; 367 368 z->zpagecount = 0; 369 370 /* 371 * Reduce kernel_map spam by allocating in chunks. 372 */ 373 z->zalloc = ZONE_MAXPGLOAD; 374 375 /* 376 * Populate the interrrupt zone at creation time rather than 377 * on first allocation, as this is a potentially long operation. 378 */ 379 if (z->zflags & ZONE_INTERRUPT) { 380 void *buf; 381 382 buf = zget(z, NULL); 383 if (buf) 384 zfree(z, buf); 385 } 386 387 return 1; 388 } 389 390 /* 391 * Subroutine same as zinitna, except zone data structure is allocated 392 * automatically by malloc. This routine should normally be used, except 393 * in certain tricky startup conditions in the VM system -- then 394 * zbootinit and zinitna can be used. Zinit is the standard zone 395 * initialization call. 396 * 397 * No requirements. 398 */ 399 vm_zone_t 400 zinit(char *name, size_t size, long nentries, uint32_t flags) 401 { 402 vm_zone_t z; 403 404 z = (vm_zone_t) kmalloc(sizeof (struct vm_zone), M_ZONE, M_NOWAIT); 405 if (z == NULL) 406 return NULL; 407 408 z->zflags = 0; 409 if (zinitna(z, name, size, nentries, flags & ~ZONE_DESTROYABLE) == 0) { 410 kfree(z, M_ZONE); 411 return NULL; 412 } 413 414 if (flags & ZONE_DESTROYABLE) 415 z->zflags |= ZONE_DESTROYABLE; 416 417 return z; 418 } 419 420 /* 421 * Initialize a zone before the system is fully up. This routine should 422 * only be called before full VM startup. 423 * 424 * Called from the low level boot code only. 425 */ 426 void 427 zbootinit(vm_zone_t z, char *name, size_t size, void *item, long nitems) 428 { 429 long i; 430 431 spin_init(&z->zspin, "zbootinit"); 432 lockinit(&z->zgetlk, "zgetlk", 0, LK_CANRECURSE); 433 bzero(z->zpcpu, sizeof(z->zpcpu)); 434 z->zname = name; 435 z->zsize = size; 436 z->zpagemax = 0; 437 z->zflags = ZONE_BOOT; 438 z->zfreemin = 0; 439 z->zallocflag = 0; 440 z->zpagecount = 0; 441 z->zalloc = 0; 442 443 bzero(item, (size_t)nitems * z->zsize); 444 z->zitems = NULL; 445 for (i = 0; i < nitems; i++) { 446 ((void **)item)[0] = z->zitems; 447 #ifdef INVARIANTS 448 ((void **)item)[1] = (void *)ZENTRY_FREE; 449 #endif 450 z->zitems = item; 451 item = (uint8_t *)item + z->zsize; 452 } 453 z->zfreecnt = nitems; 454 z->zmax = nitems; 455 z->ztotal = nitems; 456 457 lwkt_gettoken(&vm_token); 458 LIST_INSERT_HEAD(&zlist, z, zlink); 459 lwkt_reltoken(&vm_token); 460 } 461 462 /* 463 * Release all resources owned by zone created with zinit(). 464 * 465 * No requirements. 466 */ 467 void 468 zdestroy(vm_zone_t z) 469 { 470 vm_pindex_t i; 471 472 if (z == NULL) 473 panic("zdestroy: null zone"); 474 if ((z->zflags & ZONE_DESTROYABLE) == 0) 475 panic("zdestroy: undestroyable zone"); 476 477 lwkt_gettoken(&vm_token); 478 LIST_REMOVE(z, zlink); 479 lwkt_reltoken(&vm_token); 480 481 /* 482 * Release virtual mappings, physical memory and update sysctl stats. 483 */ 484 KKASSERT((z->zflags & ZONE_INTERRUPT) == 0); 485 for (i = 0; i < z->zkmcur; i++) { 486 kmem_free(&kernel_map, z->zkmvec[i], 487 (size_t)z->zalloc * PAGE_SIZE); 488 atomic_subtract_long(&zone_kern_pages, z->zalloc); 489 } 490 if (z->zkmvec != NULL) 491 kfree(z->zkmvec, M_ZONE); 492 493 spin_uninit(&z->zspin); 494 kfree(z, M_ZONE); 495 } 496 497 498 /* 499 * void *zalloc(vm_zone_t zone) -- 500 * Returns an item from a specified zone. May not be called from a 501 * FAST interrupt or IPI function. 502 * 503 * void zfree(vm_zone_t zone, void *item) -- 504 * Frees an item back to a specified zone. May not be called from a 505 * FAST interrupt or IPI function. 506 */ 507 508 /* 509 * Internal zone routine. Not to be called from external (non vm_zone) code. 510 * 511 * This function may return NULL. 512 * 513 * No requirements. 514 */ 515 static void * 516 zget(vm_zone_t z, int *tryagainp) 517 { 518 vm_page_t pgs[ZONE_MAXPGLOAD]; 519 vm_page_t m; 520 long nitems; 521 long savezpc; 522 size_t nbytes; 523 size_t noffset; 524 void *item; 525 vm_pindex_t npages; 526 vm_pindex_t nalloc; 527 vm_pindex_t i; 528 529 if (z == NULL) 530 panic("zget: null zone"); 531 532 /* 533 * We need an encompassing per-zone lock for zget() refills. 534 * 535 * Without this we wind up locking on the vm_map inside kmem_alloc*() 536 * prior to any entries actually being added to the zone, potentially 537 * exhausting the per-cpu cache of vm_map_entry's when multiple threads 538 * are blocked on the same lock on the same cpu. 539 */ 540 if ((z->zflags & ZONE_INTERRUPT) == 0) { 541 if (lockmgr(&z->zgetlk, LK_EXCLUSIVE | LK_SLEEPFAIL)) { 542 *tryagainp = 1; 543 return NULL; 544 } 545 } 546 547 if (z->zflags & ZONE_INTERRUPT) { 548 /* 549 * Interrupt zones do not mess with the kernel_map, they 550 * simply populate an existing mapping. 551 * 552 * First allocate as many pages as we can, stopping at 553 * our limit or if the page allocation fails. Try to 554 * avoid exhausting the interrupt free minimum by backing 555 * off to normal page allocations after a certain point. 556 */ 557 for (i = 0; i < ZONE_MAXPGLOAD && i < z->zalloc; ++i) { 558 if (i < 4) { 559 m = vm_page_alloc(NULL, 560 mycpu->gd_rand_incr++, 561 z->zallocflag); 562 } else { 563 m = vm_page_alloc(NULL, 564 mycpu->gd_rand_incr++, 565 VM_ALLOC_NORMAL | 566 VM_ALLOC_SYSTEM); 567 } 568 if (m == NULL) 569 break; 570 pgs[i] = m; 571 } 572 nalloc = i; 573 574 /* 575 * Account for the pages. 576 * 577 * NOTE! Do not allow overlap with a prior page as it 578 * may still be undergoing allocation on another 579 * cpu. 580 */ 581 spin_lock(&z->zspin); 582 noffset = (size_t)z->zpagecount * PAGE_SIZE; 583 /* noffset -= noffset % z->zsize; */ 584 savezpc = z->zpagecount; 585 586 /* 587 * Track total memory use and kmem offset. 588 */ 589 if (z->zpagecount + nalloc > z->zpagemax) 590 z->zpagecount = z->zpagemax; 591 else 592 z->zpagecount += nalloc; 593 594 item = (char *)z->zkva + noffset; 595 npages = z->zpagecount - savezpc; 596 nitems = ((size_t)(savezpc + npages) * PAGE_SIZE - noffset) / 597 z->zsize; 598 atomic_add_long(&zone_kmem_pages, npages); 599 spin_unlock(&z->zspin); 600 601 /* 602 * Enter the pages into the reserved KVA space. 603 */ 604 for (i = 0; i < npages; ++i) { 605 vm_offset_t zkva; 606 607 m = pgs[i]; 608 KKASSERT(m->queue == PQ_NONE); 609 m->valid = VM_PAGE_BITS_ALL; 610 vm_page_wire(m); 611 vm_page_wakeup(m); 612 613 zkva = z->zkva + (size_t)(savezpc + i) * PAGE_SIZE; 614 pmap_kenter(zkva, VM_PAGE_TO_PHYS(m)); 615 bzero((void *)zkva, PAGE_SIZE); 616 } 617 for (i = npages; i < nalloc; ++i) { 618 m = pgs[i]; 619 vm_page_free(m); 620 } 621 } else if (z->zflags & ZONE_SPECIAL) { 622 /* 623 * The special zone is the one used for vm_map_entry_t's. 624 * We have to avoid an infinite recursion in 625 * vm_map_entry_reserve() by using vm_map_entry_kreserve() 626 * instead. The map entries are pre-reserved by the kernel 627 * by vm_map_entry_reserve_cpu_init(). 628 */ 629 nbytes = (size_t)z->zalloc * PAGE_SIZE; 630 z->zpagecount += z->zalloc; /* Track total memory use */ 631 632 item = (void *)kmem_alloc3(&kernel_map, nbytes, 633 VM_SUBSYS_ZALLOC, KM_KRESERVE); 634 635 /* note: z might be modified due to blocking */ 636 if (item != NULL) { 637 atomic_add_long(&zone_kern_pages, z->zalloc); 638 bzero(item, nbytes); 639 } else { 640 nbytes = 0; 641 } 642 nitems = nbytes / z->zsize; 643 } else { 644 /* 645 * Otherwise allocate KVA from the kernel_map. 646 */ 647 nbytes = (size_t)z->zalloc * PAGE_SIZE; 648 z->zpagecount += z->zalloc; /* Track total memory use */ 649 650 item = (void *)kmem_alloc3(&kernel_map, nbytes, 651 VM_SUBSYS_ZALLOC, 0); 652 653 /* note: z might be modified due to blocking */ 654 if (item != NULL) { 655 atomic_add_long(&zone_kern_pages, z->zalloc); 656 bzero(item, nbytes); 657 658 if (z->zflags & ZONE_DESTROYABLE) { 659 if (z->zkmcur == z->zkmmax) { 660 z->zkmmax = 661 z->zkmmax==0 ? 1 : z->zkmmax*2; 662 z->zkmvec = krealloc(z->zkmvec, 663 z->zkmmax * sizeof(z->zkmvec[0]), 664 M_ZONE, M_WAITOK); 665 } 666 z->zkmvec[z->zkmcur++] = (vm_offset_t)item; 667 } 668 } else { 669 nbytes = 0; 670 } 671 nitems = nbytes / z->zsize; 672 } 673 674 /* 675 * Enter any new pages into the pool, reserving one, or get the 676 * item from the existing pool. 677 */ 678 spin_lock(&z->zspin); 679 z->ztotal += nitems; 680 681 /* 682 * The zone code may need to allocate kernel memory, which can 683 * recurse zget() infinitely if we do not handle it properly. 684 * We deal with this by directly repopulating the pcpu vm_map_entry 685 * cache. 686 */ 687 if (nitems > 1 && (z->zflags & ZONE_SPECIAL)) { 688 struct globaldata *gd = mycpu; 689 vm_map_entry_t entry; 690 691 /* 692 * Make sure we have enough structures in gd_vme_base to handle 693 * the reservation request. 694 * 695 * The critical section protects access to the per-cpu gd. 696 */ 697 crit_enter(); 698 while (gd->gd_vme_avail < 2 && nitems > 1) { 699 entry = item; 700 MAPENT_FREELIST(entry) = gd->gd_vme_base; 701 gd->gd_vme_base = entry; 702 atomic_add_int(&gd->gd_vme_avail, 1); 703 item = (uint8_t *)item + z->zsize; 704 --nitems; 705 } 706 crit_exit(); 707 } 708 709 if (nitems != 0) { 710 /* 711 * Enter pages into the pool saving one for immediate 712 * allocation. 713 */ 714 nitems -= 1; 715 for (i = 0; i < nitems; i++) { 716 ((void **)item)[0] = z->zitems; 717 #ifdef INVARIANTS 718 ((void **)item)[1] = (void *)ZENTRY_FREE; 719 #endif 720 z->zitems = item; 721 item = (uint8_t *)item + z->zsize; 722 } 723 z->zfreecnt += nitems; 724 ++z->znalloc; 725 } else if (z->zfreecnt > 0) { 726 /* 727 * Get an item from the existing pool. 728 */ 729 item = z->zitems; 730 z->zitems = ((void **)item)[0]; 731 #ifdef INVARIANTS 732 if (((void **)item)[1] != (void *)ZENTRY_FREE) 733 zerror(ZONE_ERROR_NOTFREE); 734 ((void **) item)[1] = NULL; 735 #endif 736 --z->zfreecnt; 737 ++z->znalloc; 738 } else { 739 /* 740 * No items available. 741 */ 742 item = NULL; 743 } 744 spin_unlock(&z->zspin); 745 746 /* 747 * Release the per-zone global lock after the items have been 748 * added. Any other threads blocked in zget()'s zgetlk will 749 * then retry rather than potentially exhaust the per-cpu cache 750 * of vm_map_entry structures doing their own kmem_alloc() calls, 751 * or allocating excessive amounts of space unnecessarily. 752 */ 753 if ((z->zflags & ZONE_INTERRUPT) == 0) 754 lockmgr(&z->zgetlk, LK_RELEASE); 755 756 return item; 757 } 758 759 /* 760 * No requirements. 761 */ 762 static int 763 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 764 { 765 vm_zone_t curzone; 766 char tmpbuf[128]; 767 char tmpname[14]; 768 int error = 0; 769 770 ksnprintf(tmpbuf, sizeof(tmpbuf), 771 "\nITEM SIZE LIMIT USED FREE REQUESTS\n"); 772 error = SYSCTL_OUT(req, tmpbuf, strlen(tmpbuf)); 773 if (error) 774 return (error); 775 776 lwkt_gettoken(&vm_token); 777 LIST_FOREACH(curzone, &zlist, zlink) { 778 size_t i; 779 size_t len; 780 int offset; 781 long freecnt; 782 long znalloc; 783 int n; 784 785 len = strlen(curzone->zname); 786 if (len >= (sizeof(tmpname) - 1)) 787 len = (sizeof(tmpname) - 1); 788 for(i = 0; i < sizeof(tmpname) - 1; i++) 789 tmpname[i] = ' '; 790 tmpname[i] = 0; 791 memcpy(tmpname, curzone->zname, len); 792 tmpname[len] = ':'; 793 offset = 0; 794 if (curzone == LIST_FIRST(&zlist)) { 795 offset = 1; 796 tmpbuf[0] = '\n'; 797 } 798 freecnt = curzone->zfreecnt; 799 znalloc = curzone->znalloc; 800 for (n = 0; n < ncpus; ++n) { 801 freecnt += curzone->zpcpu[n].zfreecnt; 802 znalloc += curzone->zpcpu[n].znalloc; 803 } 804 805 ksnprintf(tmpbuf + offset, sizeof(tmpbuf) - offset, 806 "%s %6.6lu, %8.8lu, %6.6lu, %6.6lu, %8.8lu\n", 807 tmpname, curzone->zsize, curzone->zmax, 808 (curzone->ztotal - freecnt), 809 freecnt, znalloc); 810 811 len = strlen((char *)tmpbuf); 812 if (LIST_NEXT(curzone, zlink) == NULL) 813 tmpbuf[len - 1] = 0; 814 815 error = SYSCTL_OUT(req, tmpbuf, len); 816 817 if (error) 818 break; 819 } 820 lwkt_reltoken(&vm_token); 821 return (error); 822 } 823 824 #if defined(INVARIANTS) 825 826 /* 827 * Debugging only. 828 */ 829 void 830 zerror(int error) 831 { 832 char *msg; 833 834 switch (error) { 835 case ZONE_ERROR_INVALID: 836 msg = "zone: invalid zone"; 837 break; 838 case ZONE_ERROR_NOTFREE: 839 msg = "zone: entry not free"; 840 break; 841 case ZONE_ERROR_ALREADYFREE: 842 msg = "zone: freeing free entry"; 843 break; 844 default: 845 msg = "zone: invalid error"; 846 break; 847 } 848 panic("%s", msg); 849 } 850 #endif 851 852 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, \ 853 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 854 855 SYSCTL_LONG(_vm, OID_AUTO, zone_kmem_pages, 856 CTLFLAG_RD, &zone_kmem_pages, 0, "Number of interrupt safe pages allocated by zone"); 857 SYSCTL_LONG(_vm, OID_AUTO, zone_burst, 858 CTLFLAG_RW, &zone_burst, 0, "Burst from depot to pcpu cache"); 859 SYSCTL_LONG(_vm, OID_AUTO, zone_kmem_kvaspace, 860 CTLFLAG_RD, &zone_kmem_kvaspace, 0, "KVA space allocated by zone"); 861 SYSCTL_LONG(_vm, OID_AUTO, zone_kern_pages, 862 CTLFLAG_RD, &zone_kern_pages, 0, "Number of non-interrupt safe pages allocated by zone"); 863