1 /* 2 * Copyright (c) 1997, 1998 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Absolutely no warranty of function or purpose is made by the author 12 * John S. Dyson. 13 * 14 * $FreeBSD: src/sys/vm/vm_zone.c,v 1.30.2.6 2002/10/10 19:50:16 dillon Exp $ 15 * $DragonFly: src/sys/vm/vm_zone.c,v 1.28 2008/01/23 17:35:48 nth Exp $ 16 */ 17 18 #include <sys/param.h> 19 #include <sys/queue.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/lock.h> 23 #include <sys/malloc.h> 24 #include <sys/sysctl.h> 25 #include <sys/vmmeter.h> 26 27 #include <vm/vm.h> 28 #include <vm/vm_object.h> 29 #include <vm/vm_page.h> 30 #include <vm/vm_map.h> 31 #include <vm/vm_kern.h> 32 #include <vm/vm_extern.h> 33 #include <vm/vm_zone.h> 34 #include <sys/spinlock2.h> /* XXX */ 35 36 static MALLOC_DEFINE(M_ZONE, "ZONE", "Zone header"); 37 38 #define ZONE_ERROR_INVALID 0 39 #define ZONE_ERROR_NOTFREE 1 40 #define ZONE_ERROR_ALREADYFREE 2 41 42 #define ZONE_ROUNDING 32 43 44 #define ZENTRY_FREE 0x12342378 45 46 static void *zget(vm_zone_t z); 47 48 /* 49 * Return an item from the specified zone. This function is non-blocking for 50 * ZONE_INTERRUPT zones. 51 */ 52 void * 53 zalloc(vm_zone_t z) 54 { 55 void *item; 56 57 #ifdef INVARIANTS 58 if (z == NULL) 59 zerror(ZONE_ERROR_INVALID); 60 #endif 61 spin_lock_wr(&z->zlock); 62 if (z->zfreecnt > z->zfreemin) { 63 item = z->zitems; 64 #ifdef INVARIANTS 65 KASSERT(item != NULL, ("zitems unexpectedly NULL")); 66 if (((void **) item)[1] != (void *) ZENTRY_FREE) 67 zerror(ZONE_ERROR_NOTFREE); 68 ((void **) item)[1] = 0; 69 #endif 70 z->zitems = ((void **) item)[0]; 71 z->zfreecnt--; 72 z->znalloc++; 73 spin_unlock_wr(&z->zlock); 74 } else { 75 spin_unlock_wr(&z->zlock); 76 item = zget(z); 77 /* 78 * PANICFAIL allows the caller to assume that the zalloc() 79 * will always succeed. If it doesn't, we panic here. 80 */ 81 if (item == NULL && (z->zflags & ZONE_PANICFAIL)) 82 panic("zalloc(%s) failed", z->zname); 83 } 84 return item; 85 } 86 87 /* 88 * Free an item to the specified zone. 89 */ 90 void 91 zfree(vm_zone_t z, void *item) 92 { 93 94 spin_lock_wr(&z->zlock); 95 ((void **) item)[0] = z->zitems; 96 #ifdef INVARIANTS 97 if (((void **) item)[1] == (void *) ZENTRY_FREE) 98 zerror(ZONE_ERROR_ALREADYFREE); 99 ((void **) item)[1] = (void *) ZENTRY_FREE; 100 #endif 101 z->zitems = item; 102 z->zfreecnt++; 103 spin_unlock_wr(&z->zlock); 104 } 105 106 /* 107 * This file comprises a very simple zone allocator. This is used 108 * in lieu of the malloc allocator, where needed or more optimal. 109 * 110 * Note that the initial implementation of this had coloring, and 111 * absolutely no improvement (actually perf degradation) occurred. 112 * 113 * Note also that the zones are type stable. The only restriction is 114 * that the first two longwords of a data structure can be changed 115 * between allocations. Any data that must be stable between allocations 116 * must reside in areas after the first two longwords. 117 * 118 * zinitna, zinit, zbootinit are the initialization routines. 119 * zalloc, zfree, are the allocation/free routines. 120 */ 121 122 LIST_HEAD(zlist, vm_zone) zlist = LIST_HEAD_INITIALIZER(zlist); 123 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 124 static int zone_kmem_pages, zone_kern_pages, zone_kmem_kvaspace; 125 126 /* 127 * Create a zone, but don't allocate the zone structure. If the 128 * zone had been previously created by the zone boot code, initialize 129 * various parts of the zone code. 130 * 131 * If waits are not allowed during allocation (e.g. during interrupt 132 * code), a-priori allocate the kernel virtual space, and allocate 133 * only pages when needed. 134 * 135 * Arguments: 136 * z pointer to zone structure. 137 * obj pointer to VM object (opt). 138 * name name of zone. 139 * size size of zone entries. 140 * nentries number of zone entries allocated (only ZONE_INTERRUPT.) 141 * flags ZONE_INTERRUPT -- items can be allocated at interrupt time. 142 * zalloc number of pages allocated when memory is needed. 143 * 144 * Note that when using ZONE_INTERRUPT, the size of the zone is limited 145 * by the nentries argument. The size of the memory allocatable is 146 * unlimited if ZONE_INTERRUPT is not set. 147 * 148 */ 149 int 150 zinitna(vm_zone_t z, vm_object_t obj, char *name, int size, 151 int nentries, int flags, int zalloc) 152 { 153 int totsize; 154 155 /* 156 * Only zones created with zinit() are destroyable. 157 */ 158 if (z->zflags & ZONE_DESTROYABLE) 159 panic("zinitna: can't create destroyable zone"); 160 161 if ((z->zflags & ZONE_BOOT) == 0) { 162 z->zsize = (size + ZONE_ROUNDING - 1) & ~(ZONE_ROUNDING - 1); 163 spin_init(&z->zlock); 164 z->zfreecnt = 0; 165 z->ztotal = 0; 166 z->zmax = 0; 167 z->zname = name; 168 z->znalloc = 0; 169 z->zitems = NULL; 170 171 LIST_INSERT_HEAD(&zlist, z, zlink); 172 } 173 174 z->zkmvec = NULL; 175 z->zkmcur = z->zkmmax = 0; 176 z->zflags |= flags; 177 178 /* 179 * If we cannot wait, allocate KVA space up front, and we will fill 180 * in pages as needed. This is particularly required when creating 181 * an allocation space for map entries in kernel_map, because we 182 * do not want to go into a recursion deadlock with 183 * vm_map_entry_reserve(). 184 */ 185 if (z->zflags & ZONE_INTERRUPT) { 186 totsize = round_page(z->zsize * nentries); 187 zone_kmem_kvaspace += totsize; 188 189 z->zkva = kmem_alloc_pageable(&kernel_map, totsize); 190 if (z->zkva == 0) { 191 LIST_REMOVE(z, zlink); 192 return 0; 193 } 194 195 z->zpagemax = totsize / PAGE_SIZE; 196 if (obj == NULL) { 197 z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax); 198 } else { 199 z->zobj = obj; 200 _vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj); 201 } 202 z->zallocflag = VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT; 203 z->zmax += nentries; 204 } else { 205 z->zallocflag = VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM; 206 z->zmax = 0; 207 } 208 209 210 if (z->zsize > PAGE_SIZE) 211 z->zfreemin = 1; 212 else 213 z->zfreemin = PAGE_SIZE / z->zsize; 214 215 z->zpagecount = 0; 216 if (zalloc) 217 z->zalloc = zalloc; 218 else 219 z->zalloc = 1; 220 221 /* 222 * Populate the interrrupt zone at creation time rather than 223 * on first allocation, as this is a potentially long operation. 224 */ 225 if (z->zflags & ZONE_INTERRUPT) { 226 void *buf; 227 228 buf = zget(z); 229 zfree(z, buf); 230 } 231 232 return 1; 233 } 234 235 /* 236 * Subroutine same as zinitna, except zone data structure is allocated 237 * automatically by malloc. This routine should normally be used, except 238 * in certain tricky startup conditions in the VM system -- then 239 * zbootinit and zinitna can be used. Zinit is the standard zone 240 * initialization call. 241 */ 242 vm_zone_t 243 zinit(char *name, int size, int nentries, int flags, int zalloc) 244 { 245 vm_zone_t z; 246 247 z = (vm_zone_t) kmalloc(sizeof (struct vm_zone), M_ZONE, M_NOWAIT); 248 if (z == NULL) 249 return NULL; 250 251 z->zflags = 0; 252 if (zinitna(z, NULL, name, size, nentries, 253 flags & ~ZONE_DESTROYABLE, zalloc) == 0) { 254 kfree(z, M_ZONE); 255 return NULL; 256 } 257 258 if (flags & ZONE_DESTROYABLE) 259 z->zflags |= ZONE_DESTROYABLE; 260 261 return z; 262 } 263 264 /* 265 * Initialize a zone before the system is fully up. This routine should 266 * only be called before full VM startup. 267 */ 268 void 269 zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems) 270 { 271 int i; 272 273 z->zname = name; 274 z->zsize = size; 275 z->zpagemax = 0; 276 z->zobj = NULL; 277 z->zflags = ZONE_BOOT; 278 z->zfreemin = 0; 279 z->zallocflag = 0; 280 z->zpagecount = 0; 281 z->zalloc = 0; 282 z->znalloc = 0; 283 spin_init(&z->zlock); 284 285 bzero(item, nitems * z->zsize); 286 z->zitems = NULL; 287 for (i = 0; i < nitems; i++) { 288 ((void **) item)[0] = z->zitems; 289 #ifdef INVARIANTS 290 ((void **) item)[1] = (void *) ZENTRY_FREE; 291 #endif 292 z->zitems = item; 293 item = (uint8_t *)item + z->zsize; 294 } 295 z->zfreecnt = nitems; 296 z->zmax = nitems; 297 z->ztotal = nitems; 298 299 LIST_INSERT_HEAD(&zlist, z, zlink); 300 } 301 302 /* 303 * Release all resources owned by zone created with zinit(). 304 */ 305 void 306 zdestroy(vm_zone_t z) 307 { 308 int i; 309 310 if (z == NULL) 311 panic("zdestroy: null zone"); 312 if ((z->zflags & ZONE_DESTROYABLE) == 0) 313 panic("zdestroy: undestroyable zone"); 314 315 LIST_REMOVE(z, zlink); 316 317 /* 318 * Release virtual mappings, physical memory and update sysctl stats. 319 */ 320 if (z->zflags & ZONE_INTERRUPT) { 321 /* 322 * Free the mapping. 323 */ 324 kmem_free(&kernel_map, z->zkva, z->zpagemax*PAGE_SIZE); 325 atomic_subtract_int(&zone_kmem_kvaspace, z->zpagemax*PAGE_SIZE); 326 /* 327 * Free the backing object and physical pages. 328 */ 329 vm_object_deallocate(z->zobj); 330 atomic_subtract_int(&zone_kmem_pages, z->zpagecount); 331 } else { 332 for (i=0; i < z->zkmcur; i++) { 333 kmem_free(&kernel_map, z->zkmvec[i], 334 z->zalloc*PAGE_SIZE); 335 atomic_subtract_int(&zone_kern_pages, z->zalloc); 336 } 337 if (z->zkmvec != NULL) 338 kfree(z->zkmvec, M_ZONE); 339 } 340 341 spin_uninit(&z->zlock); 342 kfree(z, M_ZONE); 343 } 344 345 346 /* 347 * void *zalloc(vm_zone_t zone) -- 348 * Returns an item from a specified zone. May not be called from a 349 * FAST interrupt or IPI function. 350 * 351 * void zfree(vm_zone_t zone, void *item) -- 352 * Frees an item back to a specified zone. May not be called from a 353 * FAST interrupt or IPI function. 354 */ 355 356 /* 357 * Internal zone routine. Not to be called from external (non vm_zone) code. 358 */ 359 static void * 360 zget(vm_zone_t z) 361 { 362 int i; 363 vm_page_t m; 364 int nitems, nbytes; 365 void *item; 366 367 if (z == NULL) 368 panic("zget: null zone"); 369 370 if (z->zflags & ZONE_INTERRUPT) { 371 /* 372 * Interrupt zones do not mess with the kernel_map, they 373 * simply populate an existing mapping. 374 */ 375 nbytes = z->zpagecount * PAGE_SIZE; 376 nbytes -= nbytes % z->zsize; 377 item = (char *) z->zkva + nbytes; 378 for (i = 0; ((i < z->zalloc) && (z->zpagecount < z->zpagemax)); 379 i++) { 380 vm_offset_t zkva; 381 382 m = vm_page_alloc(z->zobj, z->zpagecount, 383 z->zallocflag); 384 /* note: z might be modified due to blocking */ 385 if (m == NULL) 386 break; 387 388 /* 389 * Unbusy page so it can freed in zdestroy(). Make 390 * sure it is not on any queue and so can not be 391 * recycled under our feet. 392 */ 393 KKASSERT(m->queue == PQ_NONE); 394 vm_page_flag_clear(m, PG_BUSY); 395 396 zkva = z->zkva + z->zpagecount * PAGE_SIZE; 397 pmap_kenter(zkva, VM_PAGE_TO_PHYS(m)); /* YYY */ 398 bzero((void *)zkva, PAGE_SIZE); 399 z->zpagecount++; 400 zone_kmem_pages++; 401 vmstats.v_wire_count++; 402 } 403 nitems = ((z->zpagecount * PAGE_SIZE) - nbytes) / z->zsize; 404 } else if (z->zflags & ZONE_SPECIAL) { 405 /* 406 * The special zone is the one used for vm_map_entry_t's. 407 * We have to avoid an infinite recursion in 408 * vm_map_entry_reserve() by using vm_map_entry_kreserve() 409 * instead. The map entries are pre-reserved by the kernel 410 * by vm_map_entry_reserve_cpu_init(). 411 */ 412 nbytes = z->zalloc * PAGE_SIZE; 413 414 item = (void *)kmem_alloc3(&kernel_map, nbytes, KM_KRESERVE); 415 416 /* note: z might be modified due to blocking */ 417 if (item != NULL) { 418 zone_kern_pages += z->zalloc; /* not MP-safe XXX */ 419 bzero(item, nbytes); 420 } else { 421 nbytes = 0; 422 } 423 nitems = nbytes / z->zsize; 424 } else { 425 /* 426 * Otherwise allocate KVA from the kernel_map. 427 */ 428 nbytes = z->zalloc * PAGE_SIZE; 429 430 item = (void *)kmem_alloc3(&kernel_map, nbytes, 0); 431 432 /* note: z might be modified due to blocking */ 433 if (item != NULL) { 434 zone_kern_pages += z->zalloc; /* not MP-safe XXX */ 435 bzero(item, nbytes); 436 437 if (z->zflags & ZONE_DESTROYABLE) { 438 if (z->zkmcur == z->zkmmax) { 439 z->zkmmax = 440 z->zkmmax==0 ? 1 : z->zkmmax*2; 441 z->zkmvec = krealloc(z->zkmvec, 442 z->zkmmax * sizeof(z->zkmvec[0]), 443 M_ZONE, M_WAITOK); 444 } 445 z->zkmvec[z->zkmcur++] = (vm_offset_t)item; 446 } 447 } else { 448 nbytes = 0; 449 } 450 nitems = nbytes / z->zsize; 451 } 452 453 spin_lock_wr(&z->zlock); 454 z->ztotal += nitems; 455 /* 456 * Save one for immediate allocation 457 */ 458 if (nitems != 0) { 459 nitems -= 1; 460 for (i = 0; i < nitems; i++) { 461 ((void **) item)[0] = z->zitems; 462 #ifdef INVARIANTS 463 ((void **) item)[1] = (void *) ZENTRY_FREE; 464 #endif 465 z->zitems = item; 466 item = (uint8_t *)item + z->zsize; 467 } 468 z->zfreecnt += nitems; 469 z->znalloc++; 470 } else if (z->zfreecnt > 0) { 471 item = z->zitems; 472 z->zitems = ((void **) item)[0]; 473 #ifdef INVARIANTS 474 if (((void **) item)[1] != (void *) ZENTRY_FREE) 475 zerror(ZONE_ERROR_NOTFREE); 476 ((void **) item)[1] = 0; 477 #endif 478 z->zfreecnt--; 479 z->znalloc++; 480 } else { 481 item = NULL; 482 } 483 spin_unlock_wr(&z->zlock); 484 485 /* 486 * A special zone may have used a kernel-reserved vm_map_entry. If 487 * so we have to be sure to recover our reserve so we don't run out. 488 * We will panic if we run out. 489 */ 490 if (z->zflags & ZONE_SPECIAL) 491 vm_map_entry_reserve(0); 492 493 return item; 494 } 495 496 static int 497 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 498 { 499 int error=0; 500 vm_zone_t curzone; 501 char tmpbuf[128]; 502 char tmpname[14]; 503 504 ksnprintf(tmpbuf, sizeof(tmpbuf), 505 "\nITEM SIZE LIMIT USED FREE REQUESTS\n"); 506 error = SYSCTL_OUT(req, tmpbuf, strlen(tmpbuf)); 507 if (error) 508 return (error); 509 510 LIST_FOREACH(curzone, &zlist, zlink) { 511 int i; 512 int len; 513 int offset; 514 515 len = strlen(curzone->zname); 516 if (len >= (sizeof(tmpname) - 1)) 517 len = (sizeof(tmpname) - 1); 518 for(i = 0; i < sizeof(tmpname) - 1; i++) 519 tmpname[i] = ' '; 520 tmpname[i] = 0; 521 memcpy(tmpname, curzone->zname, len); 522 tmpname[len] = ':'; 523 offset = 0; 524 if (curzone == LIST_FIRST(&zlist)) { 525 offset = 1; 526 tmpbuf[0] = '\n'; 527 } 528 529 ksnprintf(tmpbuf + offset, sizeof(tmpbuf) - offset, 530 "%s %6.6u, %8.8u, %6.6u, %6.6u, %8.8u\n", 531 tmpname, curzone->zsize, curzone->zmax, 532 (curzone->ztotal - curzone->zfreecnt), 533 curzone->zfreecnt, curzone->znalloc); 534 535 len = strlen((char *)tmpbuf); 536 if (LIST_NEXT(curzone, zlink) == NULL) 537 tmpbuf[len - 1] = 0; 538 539 error = SYSCTL_OUT(req, tmpbuf, len); 540 541 if (error) 542 return (error); 543 } 544 return (0); 545 } 546 547 #if defined(INVARIANTS) 548 void 549 zerror(int error) 550 { 551 char *msg; 552 553 switch (error) { 554 case ZONE_ERROR_INVALID: 555 msg = "zone: invalid zone"; 556 break; 557 case ZONE_ERROR_NOTFREE: 558 msg = "zone: entry not free"; 559 break; 560 case ZONE_ERROR_ALREADYFREE: 561 msg = "zone: freeing free entry"; 562 break; 563 default: 564 msg = "zone: invalid error"; 565 break; 566 } 567 panic(msg); 568 } 569 #endif 570 571 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, \ 572 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 573 574 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_pages, 575 CTLFLAG_RD, &zone_kmem_pages, 0, "Number of interrupt safe pages allocated by zone"); 576 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_kvaspace, 577 CTLFLAG_RD, &zone_kmem_kvaspace, 0, "KVA space allocated by zone"); 578 SYSCTL_INT(_vm, OID_AUTO, zone_kern_pages, 579 CTLFLAG_RD, &zone_kern_pages, 0, "Number of non-interrupt safe pages allocated by zone"); 580