1 /* LWIP service - mempool.c - memory pool management and slab allocation */ 2 /* 3 * This module should be considered a replacement for lwIP's PBUF_POOL and 4 * custom-pools functionality. lwIP's PBUF_POOL system allows a PBUF_POOL type 5 * allocation for a moderately large amount of memory, for example for a full- 6 * sized packet, to be turned into a chain of "pbuf" buffers, each of a static 7 * size. Most of lwIP can deal with such pbuf chains, because many other types 8 * of allocations also end up consisting of pbuf chains. However, lwIP will 9 * never use PBUF_POOL for its own memory allocations, and use PBUF_RAM 10 * allocations instead. Such PBUF_RAM allocations always return one single 11 * pbuf with a contiguous memory area. lwIP's custom pools support allows such 12 * PBUF_RAM allocations to draw from user-defined pools of statically allocated 13 * memory, as an alternative to turning such allocations into malloc() calls. 14 * 15 * However, lwIP itself does not offer a way to combine these two pool systems: 16 * the PBUF_POOL buffer pool and the custom pools are completely separate. We 17 * want to be able to draw both kinds of memory from the same pool. This is 18 * the first reason that we are using our own memory pools. The second is 19 * something that lwIP could never offer anyway: we would like to provide a 20 * certain amount of static/preallocated memory for those types of allocations, 21 * but optionally also add a much larger amount of dynamic memory when needed. 22 * 23 * In order to make this module work, we do not use PBUF_POOL anywhere. 24 * Instead, we use chained static-sized PBUF_RAM allocations for all types of 25 * allocations that we manage ourselves--see pchain_alloc(). We tell lwIP to 26 * use the functions in this module to do the malloc-type allocations for those 27 * PBUF_RAM buffers. As such, this module manages all PBUF_RAM allocations, 28 * both from our own code and from lwIP. Note that we do still use lwIP's own 29 * pools for various lwIP structures. We do want to keep the isolation 30 * provided by the use of such pools, even though that means that we have to 31 * provision some of those pools for the worst case, resulting in some memory 32 * overhead that is unnecessary for the common case. 33 * 34 * With PBUF_RAM allocation redirection system in place, this module has to 35 * manage the memory for those allocations. It does this based on the 36 * assertion that there are three main classes of PBUF_RAM allocation sizes: 37 * 38 * - "large" allocations: these are allocations for up to MEMPOOL_BUFSIZE bytes 39 * of PBUF_RAM data, where MEMPOOL_BUFSIZE is the allocation granularity that 40 * we have picked for the individual buffers in larger chains. It is set to 41 * 512 bytes right now, mainly to keep pbuf chains for full-sized ethernet 42 * packets short, which has many performance advantages. Since the pbuf 43 * header itself also takes some space (16 bytes, right now), this results in 44 * allocations seen by mempool_malloc() of up to just over 512 bytes. 45 * - "small" allocations: these are allocations mostly for packet headers, as 46 * needed by lwIP to prepend to (mainly TCP) packet data that we give to it. 47 * The size of these allocations varies, but most are 76 bytes (80 bytes if 48 * we ever add VLAN support), plus once again the pbuf header. 49 * - "excessive" allocations: these are allocations larger than the maximum 50 * we have configured, effectively requesting contiguous memory of (possibly 51 * far) more than 512 bytes. We do not make such allocations ourselves, as 52 * we only ever create pbuf chains. Thus, any such allocations come from 53 * lwIP. There are a few locations in lwIP that attempt to make those kinds 54 * of allocations, but we replace one important case in the lwIP code with 55 * a chained allocation, (currently) leaving only one case: allocation of 56 * ICMP ping reply packets. In this module, we outright *deny* any excessive 57 * allocations. Practically, that means that no replies are generated for 58 * requests exceeding around 460 bytes, which is in fact not bad, especially 59 * since we have multicast ICMP ping replying enabled. If any new cases of 60 * excessive allocations are added to lwIP in the future, we will have to 61 * deal with those on a case-by-case basis, but for now this should be all. 62 * 63 * This module caters to the first two types of allocations. For large buffer 64 * allocations, it provides a standard slab allocator, with a hardcoded slab 65 * size of MEMPOOL_LARGE_COUNT buffers with a 512-byte data area each. One 66 * slab is allocated at service start-up; additional slabs up to a configured 67 * maximum are allocated on demand. Once fallen out of use, all but one slabs 68 * will be freed after a while, using a timer. The current per-slab count of 69 * 512 large buffers, combined with the buffer size of 512 plus the pbuf header 70 * plus a bit of extra overhead, results in about 266 KB per slab. 71 * 72 * For small buffer allocations, there are two facilities. First, there is a 73 * static pool of small buffers. This pool currently provides 256 small-sized 74 * buffers, mainly in order to allow packet headers to be produced even in low- 75 * memory conditions. In addition, small buffers may be formed by allocating 76 * and then splitting up one large buffer. The module is currently configured 77 * to split one large buffer into four small buffers, which yields a small 78 * buffer size of just over 100 bytes--enough for the packet headers while 79 * leaving little slack on either side. 80 * 81 * It is important to note that large and small buffer allocations are freed up 82 * through the same function, with no information on the original allocation 83 * size. As a result, we have to distinguish between large and small buffers 84 * using a unified system. In particular, this module prepends each of its 85 * allocations by a single pointer, which points to a header structure that is 86 * at the very beginning of the slab that contains the allocated buffer. That 87 * header structure contains information about the type of slab (large or 88 * small) as well as some accounting information used by both types. 89 * 90 * For large-buffer slabs, this header is part of a larger structure with for 91 * example the slab's list of free buffers. This larger structure is then 92 * followed by the actual buffers in the slab. 93 * 94 * For small-buffer slabs, the header is followed directly by the actual small 95 * buffers. Thus, when a large buffer is split up into four small buffers, the 96 * data area of that large buffer consists of a small-type slab header and four 97 * small buffers. The large buffer itself is simply considered in use, as 98 * though it was allocated for regular data. This nesting approach saves a lot 99 * of memory for small allocations, at the cost of a bit more computation. 100 * 101 * It should be noted that all allocations should be (and are) pointer-aligned. 102 * Normally lwIP would check for this, but we cannot tell lwIP the platform 103 * pointer size without hardcoding that size. This module performs proper 104 * alignment of all buffers itself though, regardless of the pointer size. 105 */ 106 107 #include "lwip.h" 108 109 #include <sys/mman.h> 110 111 /* Alignment to pointer sizes. */ 112 #define MEMPOOL_ALIGN_DOWN(s) ((s) & ~(sizeof(void *) - 1)) 113 #define MEMPOOL_ALIGN_UP(s) MEMPOOL_ALIGN_DOWN((s) + sizeof(void *) - 1) 114 115 /* Large buffers: per-slab count and data area size. */ 116 #define MEMPOOL_LARGE_COUNT 512 117 #define MEMPOOL_LARGE_SIZE \ 118 (MEMPOOL_ALIGN_UP(sizeof(struct pbuf)) + MEMPOOL_BUFSIZE) 119 120 /* Small buffers: per-slab count and data area size. */ 121 #define MEMPOOL_SMALL_COUNT 4 122 #define MEMPOOL_SMALL_SIZE \ 123 (MEMPOOL_ALIGN_DOWN(MEMPOOL_LARGE_SIZE / MEMPOOL_SMALL_COUNT) - \ 124 sizeof(struct mempool_header)) 125 126 /* Memory pool slab header, part of both small and large slabs. */ 127 struct mempool_header { 128 union { 129 struct { 130 uint8_t mhui_flags; 131 uint32_t mhui_inuse; 132 } mhu_info; 133 void *mhu_align; /* force pointer alignment */ 134 } mh_u; 135 }; 136 #define mh_flags mh_u.mhu_info.mhui_flags 137 #define mh_inuse mh_u.mhu_info.mhui_inuse 138 139 /* Header flags. */ 140 #define MHF_SMALL 0x01 /* slab is for small buffers, not large ones */ 141 #define MHF_STATIC 0x02 /* small slab is statically allocated */ 142 #define MHF_MARKED 0x04 /* large empty slab is up for deallocation */ 143 144 /* 145 * Large buffer. When allocated, mlb_header points to the (header of) the 146 * containing large slab, and mlb_data is returned for arbitrary use by the 147 * user of the buffer. When free, mlb_header is NULL and instead mlb_header2 148 * points to the containing slab (allowing for double-free detection), and the 149 * buffer is on the slab's free list by using mlb_next. 150 */ 151 struct mempool_large_buf { 152 struct mempool_header *mlb_header; 153 union { 154 struct { 155 struct mempool_header *mlbuf_header2; 156 LIST_ENTRY(mempool_large_buf) mlbuf_next; 157 } mlbu_free; 158 char mlbu_data[MEMPOOL_LARGE_SIZE]; 159 } mlb_u; 160 }; 161 #define mlb_header2 mlb_u.mlbu_free.mlbuf_header2 162 #define mlb_next mlb_u.mlbu_free.mlbuf_next 163 #define mlb_data mlb_u.mlbu_data 164 165 /* Small buffer. Same idea, different size. */ 166 struct mempool_small_buf { 167 struct mempool_header *msb_header; 168 union { 169 struct { 170 struct mempool_header *msbuf_header2; 171 TAILQ_ENTRY(mempool_small_buf) msbuf_next; 172 } msbu_free; 173 char msbu_data[MEMPOOL_SMALL_SIZE]; 174 } msb_u; 175 }; 176 #define msb_header2 msb_u.msbu_free.msbuf_header2 177 #define msb_next msb_u.msbu_free.msbuf_next 178 #define msb_data msb_u.msbu_data 179 180 /* 181 * A large slab, including header, other per-slab fields, and large buffers. 182 * Each of these structures is on exactly one of three slab lists, depending 183 * on whether all its buffers are free (empty), some but not all of its buffers 184 * are in use (partial), or all of its buffers are in use (full). The mls_next 185 * field is used for that list. The mls_free field is the per-slab list of 186 * free buffers. 187 */ 188 struct mempool_large_slab { 189 struct mempool_header mls_header; /* MUST be first */ 190 LIST_ENTRY(mempool_large_slab) mls_next; 191 LIST_HEAD(, mempool_large_buf) mls_free; 192 struct mempool_large_buf mls_buf[MEMPOOL_LARGE_COUNT]; 193 }; 194 195 /* The three slab lists for large slabs, as described above. */ 196 static LIST_HEAD(, mempool_large_slab) mempool_empty_slabs; 197 static LIST_HEAD(, mempool_large_slab) mempool_partial_slabs; 198 static LIST_HEAD(, mempool_large_slab) mempool_full_slabs; 199 200 /* 201 * A small slab, including header and small buffers. We use unified free lists 202 * for small buffers, and these small slabs are not part of any lists 203 * themselves, so we need neither of the two fields from large slabs for that. 204 */ 205 struct mempool_small_slab { 206 struct mempool_header mss_header; /* MUST be first */ 207 struct mempool_small_buf mss_buf[MEMPOOL_SMALL_COUNT]; 208 }; 209 210 /* 211 * The free lists for static small buffers (from the static pool, see below) 212 * and dynamic small buffers (as obtained by splitting large buffers). 213 */ 214 static TAILQ_HEAD(, mempool_small_buf) mempool_small_static_freelist; 215 static TAILQ_HEAD(, mempool_small_buf) mempool_small_dynamic_freelist; 216 217 /* 218 * A static pool of small buffers. Small buffers are somewhat more important 219 * than large buffers, because they are used for packet headers. The purpose 220 * of this static pool is to be able to make progress even if all large buffers 221 * are allocated for data, typically in the case that the system is low on 222 * memory. Note that the number of static small buffers is the given number of 223 * small slabs multiplied by MEMPOOL_SMALL_COUNT, hence the division. 224 */ 225 #define MEMPOOL_SMALL_SLABS (256 / MEMPOOL_SMALL_COUNT) 226 227 static struct mempool_small_slab mempool_small_pool[MEMPOOL_SMALL_SLABS]; 228 229 /* 230 * The following setting (mempool_max_slabs) can be changed through sysctl(7). 231 * As such it may be set by userland to a completely arbitrary value and must 232 * be sanity-checked before any actual use. The default is picked such that 233 * all TCP sockets can fill up their send and receive queues: (TCP_SNDBUF_DEF + 234 * TCP_RCVBUF_DEF) * NR_TCPSOCK / (MEMPOOL_BUFSIZE * MEMPOOL_LARGE_COUNT) = 235 * (32768 + 32768) * 256 / (512 * 512) = 64. We put in the resulting number 236 * rather than the formula because not all those definitions are public. 237 */ 238 #define MEMPOOL_DEFAULT_MAX_SLABS 64 /* about 17 MB of memory */ 239 240 static int mempool_max_slabs; /* maximum number of large slabs */ 241 static int mempool_nr_slabs; /* current number of large slabs */ 242 243 static int mempool_nr_large; /* current number of large buffers */ 244 static int mempool_used_large; /* large buffers currently in use */ 245 static int mempool_used_small; /* small buffers currently in use */ 246 247 /* 248 * Number of clock ticks between timer invocations. The timer is used to 249 * deallocate unused slabs. 250 */ 251 #define MEMPOOL_TIMER_TICKS (10 * sys_hz()) 252 253 static minix_timer_t mempool_timer; 254 255 static int mempool_defer_alloc; /* allocation failed, defer next try */ 256 257 /* The CTL_MINIX MINIX_LWIP "mempool" subtree. Dynamically numbered. */ 258 static struct rmib_node minix_lwip_mempool_table[] = { 259 RMIB_INTPTR(RMIB_RW, &mempool_max_slabs, "slab_max", 260 "Maximum number of memory slabs (configurable)"), 261 RMIB_INTPTR(RMIB_RO, &mempool_nr_slabs, "slab_num", 262 "Current number of memory slabs"), 263 RMIB_INT(RMIB_RO, sizeof(struct mempool_large_slab), "slab_size", 264 "Byte size of a single memory slab"), 265 RMIB_INT(RMIB_RO, MEMPOOL_LARGE_COUNT, "slab_bufs", 266 "Number of large buffers per memory slab"), 267 RMIB_INTPTR(RMIB_RO, &mempool_nr_large, "large_num", 268 "Current total number of large buffers"), 269 RMIB_INTPTR(RMIB_RO, &mempool_used_large, "large_used", 270 "Current number of used large buffers"), 271 RMIB_INT(RMIB_RO, MEMPOOL_LARGE_SIZE, "large_size", 272 "Byte size of a single large buffer"), 273 RMIB_INTPTR(RMIB_RO, &mempool_used_small, "small_used", 274 "Current number of used small buffers"), 275 RMIB_INT(RMIB_RO, MEMPOOL_SMALL_SIZE, "small_size", 276 "Byte size of a single small buffer"), 277 }; 278 279 static struct rmib_node minix_lwip_mempool_node = 280 RMIB_NODE(RMIB_RO, minix_lwip_mempool_table, "mempool", 281 "Memory pool settings"); 282 283 /* 284 * Initialize the given "slab" of small buffers. The slab may either come from 285 * the statically allocated pool ('is_static' is TRUE) or a single large buffer 286 * that we aim to chop up into small buffers. 287 */ 288 static void 289 mempool_prepare_small(struct mempool_small_slab * mss, int is_static) 290 { 291 struct mempool_small_buf *msb; 292 unsigned int count; 293 294 mss->mss_header.mh_flags = MHF_SMALL | ((is_static) ? MHF_STATIC : 0); 295 mss->mss_header.mh_inuse = 0; 296 297 msb = mss->mss_buf; 298 299 for (count = 0; count < MEMPOOL_SMALL_COUNT; count++, msb++) { 300 msb->msb_header = NULL; 301 msb->msb_header2 = &mss->mss_header; 302 303 if (is_static) 304 TAILQ_INSERT_HEAD(&mempool_small_static_freelist, msb, 305 msb_next); 306 else 307 TAILQ_INSERT_HEAD(&mempool_small_dynamic_freelist, msb, 308 msb_next); 309 } 310 } 311 312 /* 313 * Allocate a new slab for large buffers, if allowed by policy and possible. 314 */ 315 static void 316 mempool_new_slab(void) 317 { 318 struct mempool_large_slab *mls; 319 struct mempool_large_buf *mlb; 320 unsigned int count; 321 322 /* 323 * See if allocating a new slab would result in overrunning the 324 * configured maximum number of large buffers. Round the maximum, 325 * which is probably what the user intended. 326 */ 327 if (mempool_cur_buffers() + MEMPOOL_LARGE_COUNT / 2 > 328 mempool_max_buffers()) { 329 assert(mempool_nr_slabs > 0); 330 331 return; 332 } 333 334 /* 335 * If a previous allocation failed before during this timer interval, 336 * do not try again now. 337 */ 338 if (mempool_defer_alloc) 339 return; 340 341 /* 342 * Allocate the slab. Preallocate the memory, or we might crash later 343 * during low-memory conditions. If allocation fails, simply do 344 * nothing further. The caller will check the free lists. 345 */ 346 mls = (struct mempool_large_slab *)mmap(NULL, 347 sizeof(struct mempool_large_slab), PROT_READ | PROT_WRITE, 348 MAP_ANON | MAP_PRIVATE | MAP_PREALLOC, -1, 0); 349 350 if (mls == MAP_FAILED) { 351 if (mempool_nr_slabs == 0) 352 panic("unable to allocate initial memory pool"); 353 354 /* 355 * Do not keep hammering VM with mmap requests when the system 356 * is out of memory. Try again after the next timer tick. 357 */ 358 mempool_defer_alloc = TRUE; 359 360 return; 361 } 362 363 /* Initialize the new slab. */ 364 mls->mls_header.mh_flags = 0; 365 mls->mls_header.mh_inuse = 0; 366 367 mlb = mls->mls_buf; 368 369 LIST_INIT(&mls->mls_free); 370 371 for (count = 0; count < MEMPOOL_LARGE_COUNT; count++, mlb++) { 372 mlb->mlb_header = NULL; 373 mlb->mlb_header2 = &mls->mls_header; 374 375 LIST_INSERT_HEAD(&mls->mls_free, mlb, mlb_next); 376 } 377 378 LIST_INSERT_HEAD(&mempool_empty_slabs, mls, mls_next); 379 380 mempool_nr_slabs++; 381 mempool_nr_large += MEMPOOL_LARGE_COUNT; 382 } 383 384 /* 385 * Deallocate a slab for large buffers, if allowed. 386 */ 387 static void 388 mempool_destroy_slab(struct mempool_large_slab * mls) 389 { 390 391 assert(mempool_nr_slabs > 0); 392 393 assert(!(mls->mls_header.mh_flags & MHF_SMALL)); 394 assert(mls->mls_header.mh_inuse == 0); 395 396 /* Never deallocate the last large slab. */ 397 if (mempool_nr_slabs == 1) 398 return; 399 400 LIST_REMOVE(mls, mls_next); 401 402 if (munmap(mls, sizeof(*mls)) != 0) 403 panic("munmap failed: %d", -errno); 404 405 assert(mempool_nr_large > MEMPOOL_LARGE_COUNT); 406 mempool_nr_large -= MEMPOOL_LARGE_COUNT; 407 mempool_nr_slabs--; 408 } 409 410 /* 411 * Regular timer. Deallocate empty slabs already marked for deallocation, and 412 * mark any other empty slabs for deallocation. 413 */ 414 static void 415 mempool_tick(int arg __unused) 416 { 417 struct mempool_large_slab *mls, *tmls; 418 419 /* 420 * Go through all the empty slabs, destroying marked slabs and marking 421 * unmarked slabs. 422 */ 423 LIST_FOREACH_SAFE(mls, &mempool_empty_slabs, mls_next, tmls) { 424 if (mls->mls_header.mh_flags & MHF_MARKED) 425 mempool_destroy_slab(mls); 426 else 427 mls->mls_header.mh_flags |= MHF_MARKED; 428 } 429 430 /* 431 * If allocation failed during the last interval, allow a new attempt 432 * during the next. 433 */ 434 mempool_defer_alloc = FALSE; 435 436 /* Set the next timer. */ 437 set_timer(&mempool_timer, MEMPOOL_TIMER_TICKS, mempool_tick, 0); 438 } 439 440 /* 441 * Initialize the memory pool module. 442 */ 443 void 444 mempool_init(void) 445 { 446 unsigned int slot; 447 448 /* These checks are for absolutely essential points. */ 449 assert(sizeof(void *) == MEM_ALIGNMENT); 450 assert(sizeof(struct mempool_small_slab) <= MEMPOOL_LARGE_SIZE); 451 assert(offsetof(struct mempool_small_buf, msb_data) == sizeof(void *)); 452 assert(offsetof(struct mempool_large_buf, mlb_data) == sizeof(void *)); 453 454 /* Initialize module-local variables. */ 455 LIST_INIT(&mempool_empty_slabs); 456 LIST_INIT(&mempool_partial_slabs); 457 LIST_INIT(&mempool_full_slabs); 458 459 TAILQ_INIT(&mempool_small_static_freelist); 460 TAILQ_INIT(&mempool_small_dynamic_freelist); 461 462 mempool_max_slabs = MEMPOOL_DEFAULT_MAX_SLABS; 463 mempool_nr_slabs = 0; 464 465 mempool_nr_large = 0; 466 mempool_used_large = 0; 467 mempool_used_small = 0; 468 469 mempool_defer_alloc = FALSE; 470 471 /* Initialize the static pool of small buffers. */ 472 for (slot = 0; slot < __arraycount(mempool_small_pool); slot++) 473 mempool_prepare_small(&mempool_small_pool[slot], 474 TRUE /*is_static*/); 475 476 /* 477 * Allocate one large slab. The service needs at least one large slab 478 * for basic operation, and therefore will never deallocate the last. 479 */ 480 mempool_new_slab(); 481 482 /* Set a regular low-frequency timer to deallocate unused slabs. */ 483 set_timer(&mempool_timer, MEMPOOL_TIMER_TICKS, mempool_tick, 0); 484 485 /* Register the minix.lwip.mempool subtree. */ 486 mibtree_register_lwip(&minix_lwip_mempool_node); 487 } 488 489 /* 490 * Return the total number of large buffers currently in the system, regardless 491 * of allocation status. 492 */ 493 unsigned int 494 mempool_cur_buffers(void) 495 { 496 497 return mempool_nr_large; 498 } 499 500 /* 501 * Return the maximum number of large buffers that the system has been allowed 502 * to allocate. Note that due to low-memory conditions, this maximum may not 503 * be allocated in practice even when desired. 504 */ 505 unsigned int 506 mempool_max_buffers(void) 507 { 508 509 if (mempool_max_slabs <= 1) 510 return MEMPOOL_LARGE_COUNT; 511 512 if ((size_t)mempool_max_slabs > 513 INT_MAX / sizeof(struct mempool_large_slab)) 514 return INT_MAX / sizeof(struct mempool_large_slab); 515 516 return (size_t)mempool_max_slabs * MEMPOOL_LARGE_COUNT; 517 } 518 519 /* 520 * Allocate a large buffer, either by taking one off a free list or by 521 * allocating a new large slab. On success, return a pointer to the data area 522 * of the large buffer. This data area is exactly MEMPOOL_LARGE_SIZE bytes in 523 * size. If no large buffer could be allocated, return NULL. 524 */ 525 static void * 526 mempool_alloc_large(void) 527 { 528 struct mempool_large_slab *mls; 529 struct mempool_large_buf *mlb; 530 531 /* 532 * Find a large slab that has free large blocks. As is standard for 533 * slab allocation, favor partially used slabs over empty slabs for 534 * eventual consolidation. If both lists are empty, try allocating a 535 * new slab. If that fails, we are out of memory, and return NULL. 536 */ 537 if (!LIST_EMPTY(&mempool_partial_slabs)) 538 mls = LIST_FIRST(&mempool_partial_slabs); 539 else { 540 if (LIST_EMPTY(&mempool_empty_slabs)) { 541 mempool_new_slab(); 542 543 if (LIST_EMPTY(&mempool_empty_slabs)) 544 return NULL; /* out of memory */ 545 } 546 547 mls = LIST_FIRST(&mempool_empty_slabs); 548 } 549 550 /* Allocate a block from the slab that we picked. */ 551 assert(mls != NULL); 552 assert(!LIST_EMPTY(&mls->mls_free)); 553 554 mlb = LIST_FIRST(&mls->mls_free); 555 LIST_REMOVE(mlb, mlb_next); 556 557 assert(mlb->mlb_header == NULL); 558 assert(mlb->mlb_header2 == &mls->mls_header); 559 560 mlb->mlb_header = &mls->mls_header; 561 562 /* 563 * Adjust accounting for the large slab, which may involve moving it 564 * to another list. 565 */ 566 assert(mls->mls_header.mh_inuse < MEMPOOL_LARGE_COUNT); 567 mls->mls_header.mh_inuse++; 568 569 if (mls->mls_header.mh_inuse == MEMPOOL_LARGE_COUNT) { 570 LIST_REMOVE(mls, mls_next); 571 572 LIST_INSERT_HEAD(&mempool_full_slabs, mls, mls_next); 573 } else if (mls->mls_header.mh_inuse == 1) { 574 LIST_REMOVE(mls, mls_next); 575 576 LIST_INSERT_HEAD(&mempool_partial_slabs, mls, mls_next); 577 } 578 579 assert(mempool_used_large < mempool_nr_large); 580 mempool_used_large++; 581 582 /* Return the block's data area. */ 583 return (void *)mlb->mlb_data; 584 } 585 586 /* 587 * Allocate a small buffer, either by taking one off a free list or by 588 * allocating a large buffer and splitting it up in new free small buffers. On 589 * success, return a pointer to the data area of the small buffer. This data 590 * area is exactly MEMPOOL_SMALL_SIZE bytes in size. If no small buffer could 591 * be allocated, return NULL. 592 */ 593 static void * 594 mempool_alloc_small(void) 595 { 596 struct mempool_small_slab *mss; 597 struct mempool_small_buf *msb; 598 struct mempool_header *mh; 599 600 /* 601 * Find a free small block and take it off the free list. Try the 602 * static free list before the dynamic one, so that after a peak in 603 * buffer usage we are likely to be able to free up the dynamic slabs 604 * quickly. If both lists are empty, try allocating a large block to 605 * divvy up into small blocks. If that fails, we are out of memory. 606 */ 607 if (!TAILQ_EMPTY(&mempool_small_static_freelist)) { 608 msb = TAILQ_FIRST(&mempool_small_static_freelist); 609 610 TAILQ_REMOVE(&mempool_small_static_freelist, msb, msb_next); 611 } else { 612 if (TAILQ_EMPTY(&mempool_small_dynamic_freelist)) { 613 mss = 614 (struct mempool_small_slab *)mempool_alloc_large(); 615 616 if (mss == NULL) 617 return NULL; /* out of memory */ 618 619 /* Initialize the small slab, including its blocks. */ 620 mempool_prepare_small(mss, FALSE /*is_static*/); 621 } 622 623 msb = TAILQ_FIRST(&mempool_small_dynamic_freelist); 624 assert(msb != NULL); 625 626 TAILQ_REMOVE(&mempool_small_dynamic_freelist, msb, msb_next); 627 } 628 629 /* Mark the small block as allocated, and return its data area. */ 630 assert(msb != NULL); 631 632 assert(msb->msb_header == NULL); 633 assert(msb->msb_header2 != NULL); 634 635 mh = msb->msb_header2; 636 msb->msb_header = mh; 637 638 assert(mh->mh_inuse < MEMPOOL_SMALL_COUNT); 639 mh->mh_inuse++; 640 641 mempool_used_small++; 642 643 return (void *)msb->msb_data; 644 } 645 646 /* 647 * Memory pool wrapper function for malloc() calls from lwIP. 648 */ 649 void * 650 mempool_malloc(size_t size) 651 { 652 653 /* 654 * It is currently expected that there will be allocation attempts for 655 * sizes larger than our large size, in particular for ICMP ping 656 * replies as described elsewhere. As such, we cannot print any 657 * warnings here. For now, refusing these excessive allocations should 658 * not be a problem in practice. 659 */ 660 if (size > MEMPOOL_LARGE_SIZE) 661 return NULL; 662 663 if (size <= MEMPOOL_SMALL_SIZE) 664 return mempool_alloc_small(); 665 else 666 return mempool_alloc_large(); 667 } 668 669 /* 670 * Memory pool wrapper function for free() calls from lwIP. 671 */ 672 void 673 mempool_free(void * ptr) 674 { 675 struct mempool_large_slab *mls; 676 struct mempool_large_buf *mlb; 677 struct mempool_small_slab *mss; 678 struct mempool_small_buf *msb; 679 struct mempool_header *mh; 680 unsigned int count; 681 682 /* 683 * Get a pointer to the slab header, which is right before the data 684 * area for both large and small buffers. This pointer is NULL if the 685 * buffer is free, which would indicate that something is very wrong. 686 */ 687 ptr = (void *)((char *)ptr - sizeof(mh)); 688 689 memcpy(&mh, ptr, sizeof(mh)); 690 691 if (mh == NULL) 692 panic("mempool_free called on unallocated object!"); 693 694 /* 695 * If the slab header says that the slab is for small buffers, deal 696 * with that case first. If we free up the last small buffer of a 697 * dynamically allocated small slab, we also free up the entire small 698 * slab, which is in fact the data area of a large buffer. 699 */ 700 if (mh->mh_flags & MHF_SMALL) { 701 /* 702 * Move the small buffer onto the appropriate small free list. 703 */ 704 msb = (struct mempool_small_buf *)ptr; 705 706 msb->msb_header2 = mh; 707 msb->msb_header = NULL; 708 709 /* 710 * Simple heuristic, unless the buffer is static: favor reuse 711 * of small buffers in containers that are already in use 712 * for other small buffers as well, for consolidation. 713 */ 714 if (mh->mh_flags & MHF_STATIC) 715 TAILQ_INSERT_HEAD(&mempool_small_static_freelist, msb, 716 msb_next); 717 else if (mh->mh_inuse > 1) 718 TAILQ_INSERT_HEAD(&mempool_small_dynamic_freelist, msb, 719 msb_next); 720 else 721 TAILQ_INSERT_TAIL(&mempool_small_dynamic_freelist, msb, 722 msb_next); 723 724 assert(mh->mh_inuse > 0); 725 mh->mh_inuse--; 726 727 assert(mempool_used_small > 0); 728 mempool_used_small--; 729 730 /* 731 * If the small buffer is statically allocated, or it was not 732 * the last allocated small buffer in its containing large 733 * buffer, then we are done. 734 */ 735 if (mh->mh_inuse > 0 || (mh->mh_flags & MHF_STATIC)) 736 return; 737 738 /* 739 * Otherwise, free the containing large buffer as well. First, 740 * remove all its small buffers from the free list. 741 */ 742 mss = (struct mempool_small_slab *)mh; 743 msb = mss->mss_buf; 744 745 for (count = 0; count < MEMPOOL_SMALL_COUNT; count++, msb++) { 746 assert(msb->msb_header == NULL); 747 assert(msb->msb_header2 == mh); 748 749 TAILQ_REMOVE(&mempool_small_dynamic_freelist, msb, 750 msb_next); 751 } 752 753 /* Then, fall through to the large-buffer free code. */ 754 ptr = (void *)((char *)mh - sizeof(mh)); 755 756 memcpy(&mh, ptr, sizeof(mh)); 757 758 assert(mh != NULL); 759 assert(!(mh->mh_flags & MHF_SMALL)); 760 } 761 762 /* 763 * Move the large buffer onto the free list of the large slab to which 764 * it belongs. 765 */ 766 mls = (struct mempool_large_slab *)mh; 767 mlb = (struct mempool_large_buf *)ptr; 768 769 mlb->mlb_header2 = &mls->mls_header; 770 mlb->mlb_header = NULL; 771 772 LIST_INSERT_HEAD(&mls->mls_free, mlb, mlb_next); 773 774 /* 775 * Adjust accounting for the large slab, which may involve moving it 776 * to another list. 777 */ 778 assert(mls->mls_header.mh_inuse > 0); 779 mls->mls_header.mh_inuse--; 780 781 if (mls->mls_header.mh_inuse == 0) { 782 LIST_REMOVE(mls, mls_next); 783 784 LIST_INSERT_HEAD(&mempool_empty_slabs, mls, mls_next); 785 786 mls->mls_header.mh_flags &= ~MHF_MARKED; 787 } else if (mls->mls_header.mh_inuse == MEMPOOL_LARGE_COUNT - 1) { 788 LIST_REMOVE(mls, mls_next); 789 790 LIST_INSERT_HEAD(&mempool_partial_slabs, mls, mls_next); 791 } 792 793 assert(mempool_used_large > 0); 794 mempool_used_large--; 795 } 796 797 /* 798 * Memory pool wrapper function for calloc() calls from lwIP. 799 */ 800 void * 801 mempool_calloc(size_t num, size_t size) 802 { 803 void *ptr; 804 size_t total; 805 806 /* 807 * Standard overflow check. This can be improved, but it doesn't have 808 * to be, because in practice lwIP never calls calloc() anyway. 809 */ 810 if (num > 0 && size > 0 && (size_t)-1 / size < num) 811 return NULL; 812 813 total = num * size; 814 815 if ((ptr = mempool_malloc(total)) == NULL) 816 return NULL; 817 818 memset(ptr, 0, total); 819 820 return ptr; 821 } 822