1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004, 2005, 5 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_param.h" 34 35 #include <sys/param.h> 36 #include <sys/malloc.h> 37 #include <sys/types.h> 38 #include <sys/systm.h> 39 #include <sys/mbuf.h> 40 #include <sys/domain.h> 41 #include <sys/eventhandler.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/mutex.h> 45 #include <sys/protosw.h> 46 #include <sys/smp.h> 47 #include <sys/sysctl.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_extern.h> 51 #include <vm/vm_kern.h> 52 #include <vm/vm_page.h> 53 #include <vm/vm_map.h> 54 #include <vm/uma.h> 55 #include <vm/uma_dbg.h> 56 57 /* 58 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 59 * Zones. 60 * 61 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 62 * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 63 * administrator so desires. 64 * 65 * Mbufs are allocated from a UMA Master Zone called the Mbuf 66 * Zone. 67 * 68 * Additionally, FreeBSD provides a Packet Zone, which it 69 * configures as a Secondary Zone to the Mbuf Master Zone, 70 * thus sharing backend Slab kegs with the Mbuf Master Zone. 71 * 72 * Thus common-case allocations and locking are simplified: 73 * 74 * m_clget() m_getcl() 75 * | | 76 * | .------------>[(Packet Cache)] m_get(), m_gethdr() 77 * | | [ Packet ] | 78 * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 79 * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 80 * | \________ | 81 * [ Cluster Keg ] \ / 82 * | [ Mbuf Keg ] 83 * [ Cluster Slabs ] | 84 * | [ Mbuf Slabs ] 85 * \____________(VM)_________________/ 86 * 87 * 88 * Whenever an object is allocated with uma_zalloc() out of 89 * one of the Zones its _ctor_ function is executed. The same 90 * for any deallocation through uma_zfree() the _dtor_ function 91 * is executed. 92 * 93 * Caches are per-CPU and are filled from the Master Zone. 94 * 95 * Whenever an object is allocated from the underlying global 96 * memory pool it gets pre-initialized with the _zinit_ functions. 97 * When the Keg's are overfull objects get decommissioned with 98 * _zfini_ functions and free'd back to the global memory pool. 99 * 100 */ 101 102 int nmbufs; /* limits number of mbufs */ 103 int nmbclusters; /* limits number of mbuf clusters */ 104 int nmbjumbop; /* limits number of page size jumbo clusters */ 105 int nmbjumbo9; /* limits number of 9k jumbo clusters */ 106 int nmbjumbo16; /* limits number of 16k jumbo clusters */ 107 108 static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ 109 110 SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, 111 "Maximum real memory allocatable to various mbuf types"); 112 113 /* 114 * tunable_mbinit() has to be run before any mbuf allocations are done. 115 */ 116 static void 117 tunable_mbinit(void *dummy) 118 { 119 quad_t realmem; 120 121 /* 122 * The default limit for all mbuf related memory is 1/2 of all 123 * available kernel memory (physical or kmem). 124 * At most it can be 3/4 of available kernel memory. 125 */ 126 realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); 127 maxmbufmem = realmem / 2; 128 TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); 129 if (maxmbufmem > realmem / 4 * 3) 130 maxmbufmem = realmem / 4 * 3; 131 132 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 133 if (nmbclusters == 0) 134 nmbclusters = maxmbufmem / MCLBYTES / 4; 135 136 TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); 137 if (nmbjumbop == 0) 138 nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; 139 140 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); 141 if (nmbjumbo9 == 0) 142 nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; 143 144 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); 145 if (nmbjumbo16 == 0) 146 nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; 147 148 /* 149 * We need at least as many mbufs as we have clusters of 150 * the various types added together. 151 */ 152 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 153 if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 154 nmbufs = lmax(maxmbufmem / MSIZE / 5, 155 nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); 156 } 157 SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 158 159 static int 160 sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 161 { 162 int error, newnmbclusters; 163 164 newnmbclusters = nmbclusters; 165 error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 166 if (error == 0 && req->newptr && newnmbclusters != nmbclusters) { 167 if (newnmbclusters > nmbclusters && 168 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 169 nmbclusters = newnmbclusters; 170 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 171 EVENTHANDLER_INVOKE(nmbclusters_change); 172 } else 173 error = EINVAL; 174 } 175 return (error); 176 } 177 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 178 &nmbclusters, 0, sysctl_nmbclusters, "IU", 179 "Maximum number of mbuf clusters allowed"); 180 181 static int 182 sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 183 { 184 int error, newnmbjumbop; 185 186 newnmbjumbop = nmbjumbop; 187 error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 188 if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) { 189 if (newnmbjumbop > nmbjumbop && 190 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 191 nmbjumbop = newnmbjumbop; 192 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 193 } else 194 error = EINVAL; 195 } 196 return (error); 197 } 198 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 199 &nmbjumbop, 0, sysctl_nmbjumbop, "IU", 200 "Maximum number of mbuf page size jumbo clusters allowed"); 201 202 static int 203 sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 204 { 205 int error, newnmbjumbo9; 206 207 newnmbjumbo9 = nmbjumbo9; 208 error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 209 if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) { 210 if (newnmbjumbo9 > nmbjumbo9 && 211 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 212 nmbjumbo9 = newnmbjumbo9; 213 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 214 } else 215 error = EINVAL; 216 } 217 return (error); 218 } 219 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 220 &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 221 "Maximum number of mbuf 9k jumbo clusters allowed"); 222 223 static int 224 sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 225 { 226 int error, newnmbjumbo16; 227 228 newnmbjumbo16 = nmbjumbo16; 229 error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 230 if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) { 231 if (newnmbjumbo16 > nmbjumbo16 && 232 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 233 nmbjumbo16 = newnmbjumbo16; 234 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 235 } else 236 error = EINVAL; 237 } 238 return (error); 239 } 240 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 241 &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 242 "Maximum number of mbuf 16k jumbo clusters allowed"); 243 244 static int 245 sysctl_nmbufs(SYSCTL_HANDLER_ARGS) 246 { 247 int error, newnmbufs; 248 249 newnmbufs = nmbufs; 250 error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 251 if (error == 0 && req->newptr && newnmbufs != nmbufs) { 252 if (newnmbufs > nmbufs) { 253 nmbufs = newnmbufs; 254 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 255 EVENTHANDLER_INVOKE(nmbufs_change); 256 } else 257 error = EINVAL; 258 } 259 return (error); 260 } 261 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, 262 &nmbufs, 0, sysctl_nmbufs, "IU", 263 "Maximum number of mbufs allowed"); 264 265 /* 266 * Zones from which we allocate. 267 */ 268 uma_zone_t zone_mbuf; 269 uma_zone_t zone_clust; 270 uma_zone_t zone_pack; 271 uma_zone_t zone_jumbop; 272 uma_zone_t zone_jumbo9; 273 uma_zone_t zone_jumbo16; 274 275 /* 276 * Local prototypes. 277 */ 278 static int mb_ctor_mbuf(void *, int, void *, int); 279 static int mb_ctor_clust(void *, int, void *, int); 280 static int mb_ctor_pack(void *, int, void *, int); 281 static void mb_dtor_mbuf(void *, int, void *); 282 static void mb_dtor_pack(void *, int, void *); 283 static int mb_zinit_pack(void *, int, int); 284 static void mb_zfini_pack(void *, int); 285 static void mb_reclaim(uma_zone_t, int); 286 static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int); 287 288 /* Ensure that MSIZE is a power of 2. */ 289 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 290 291 /* 292 * Initialize FreeBSD Network buffer allocation. 293 */ 294 static void 295 mbuf_init(void *dummy) 296 { 297 298 /* 299 * Configure UMA zones for Mbufs, Clusters, and Packets. 300 */ 301 zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 302 mb_ctor_mbuf, mb_dtor_mbuf, 303 #ifdef INVARIANTS 304 trash_init, trash_fini, 305 #else 306 NULL, NULL, 307 #endif 308 MSIZE - 1, UMA_ZONE_MAXBUCKET); 309 if (nmbufs > 0) 310 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 311 uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); 312 uma_zone_set_maxaction(zone_mbuf, mb_reclaim); 313 314 zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 315 mb_ctor_clust, 316 #ifdef INVARIANTS 317 trash_dtor, trash_init, trash_fini, 318 #else 319 NULL, NULL, NULL, 320 #endif 321 UMA_ALIGN_PTR, 0); 322 if (nmbclusters > 0) 323 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 324 uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); 325 uma_zone_set_maxaction(zone_clust, mb_reclaim); 326 327 zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 328 mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 329 330 /* Make jumbo frame zone too. Page size, 9k and 16k. */ 331 zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 332 mb_ctor_clust, 333 #ifdef INVARIANTS 334 trash_dtor, trash_init, trash_fini, 335 #else 336 NULL, NULL, NULL, 337 #endif 338 UMA_ALIGN_PTR, 0); 339 if (nmbjumbop > 0) 340 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 341 uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); 342 uma_zone_set_maxaction(zone_jumbop, mb_reclaim); 343 344 zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 345 mb_ctor_clust, 346 #ifdef INVARIANTS 347 trash_dtor, trash_init, trash_fini, 348 #else 349 NULL, NULL, NULL, 350 #endif 351 UMA_ALIGN_PTR, 0); 352 uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 353 if (nmbjumbo9 > 0) 354 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 355 uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); 356 uma_zone_set_maxaction(zone_jumbo9, mb_reclaim); 357 358 zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 359 mb_ctor_clust, 360 #ifdef INVARIANTS 361 trash_dtor, trash_init, trash_fini, 362 #else 363 NULL, NULL, NULL, 364 #endif 365 UMA_ALIGN_PTR, 0); 366 uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 367 if (nmbjumbo16 > 0) 368 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 369 uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); 370 uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); 371 372 /* 373 * Hook event handler for low-memory situation, used to 374 * drain protocols and push data back to the caches (UMA 375 * later pushes it back to VM). 376 */ 377 EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 378 EVENTHANDLER_PRI_FIRST); 379 } 380 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 381 382 /* 383 * UMA backend page allocator for the jumbo frame zones. 384 * 385 * Allocates kernel virtual memory that is backed by contiguous physical 386 * pages. 387 */ 388 static void * 389 mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) 390 { 391 392 /* Inform UMA that this allocator uses kernel_map/object. */ 393 *flags = UMA_SLAB_KERNEL; 394 return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 395 (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); 396 } 397 398 /* 399 * Constructor for Mbuf master zone. 400 * 401 * The 'arg' pointer points to a mb_args structure which 402 * contains call-specific information required to support the 403 * mbuf allocation API. See mbuf.h. 404 */ 405 static int 406 mb_ctor_mbuf(void *mem, int size, void *arg, int how) 407 { 408 struct mbuf *m; 409 struct mb_args *args; 410 int error; 411 int flags; 412 short type; 413 414 #ifdef INVARIANTS 415 trash_ctor(mem, size, arg, how); 416 #endif 417 args = (struct mb_args *)arg; 418 type = args->type; 419 420 /* 421 * The mbuf is initialized later. The caller has the 422 * responsibility to set up any MAC labels too. 423 */ 424 if (type == MT_NOINIT) 425 return (0); 426 427 m = (struct mbuf *)mem; 428 flags = args->flags; 429 MPASS((flags & M_NOFREE) == 0); 430 431 error = m_init(m, how, type, flags); 432 433 return (error); 434 } 435 436 /* 437 * The Mbuf master zone destructor. 438 */ 439 static void 440 mb_dtor_mbuf(void *mem, int size, void *arg) 441 { 442 struct mbuf *m; 443 unsigned long flags; 444 445 m = (struct mbuf *)mem; 446 flags = (unsigned long)arg; 447 448 KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 449 if (!(flags & MB_DTOR_SKIP) && (m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) 450 m_tag_delete_chain(m, NULL); 451 #ifdef INVARIANTS 452 trash_dtor(mem, size, arg); 453 #endif 454 } 455 456 /* 457 * The Mbuf Packet zone destructor. 458 */ 459 static void 460 mb_dtor_pack(void *mem, int size, void *arg) 461 { 462 struct mbuf *m; 463 464 m = (struct mbuf *)mem; 465 if ((m->m_flags & M_PKTHDR) != 0) 466 m_tag_delete_chain(m, NULL); 467 468 /* Make sure we've got a clean cluster back. */ 469 KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 470 KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 471 KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 472 KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 473 KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 474 KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 475 KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 476 #ifdef INVARIANTS 477 trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 478 #endif 479 /* 480 * If there are processes blocked on zone_clust, waiting for pages 481 * to be freed up, * cause them to be woken up by draining the 482 * packet zone. We are exposed to a race here * (in the check for 483 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 484 * is deliberate. We don't want to acquire the zone lock for every 485 * mbuf free. 486 */ 487 if (uma_zone_exhausted_nolock(zone_clust)) 488 zone_drain(zone_pack); 489 } 490 491 /* 492 * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 493 * 494 * Here the 'arg' pointer points to the Mbuf which we 495 * are configuring cluster storage for. If 'arg' is 496 * empty we allocate just the cluster without setting 497 * the mbuf to it. See mbuf.h. 498 */ 499 static int 500 mb_ctor_clust(void *mem, int size, void *arg, int how) 501 { 502 struct mbuf *m; 503 504 #ifdef INVARIANTS 505 trash_ctor(mem, size, arg, how); 506 #endif 507 m = (struct mbuf *)arg; 508 if (m != NULL) { 509 m->m_ext.ext_buf = (char *)mem; 510 m->m_data = m->m_ext.ext_buf; 511 m->m_flags |= M_EXT; 512 m->m_ext.ext_free = NULL; 513 m->m_ext.ext_arg1 = NULL; 514 m->m_ext.ext_arg2 = NULL; 515 m->m_ext.ext_size = size; 516 m->m_ext.ext_type = m_gettype(size); 517 m->m_ext.ext_flags = EXT_FLAG_EMBREF; 518 m->m_ext.ext_count = 1; 519 } 520 521 return (0); 522 } 523 524 /* 525 * The Packet secondary zone's init routine, executed on the 526 * object's transition from mbuf keg slab to zone cache. 527 */ 528 static int 529 mb_zinit_pack(void *mem, int size, int how) 530 { 531 struct mbuf *m; 532 533 m = (struct mbuf *)mem; /* m is virgin. */ 534 if (uma_zalloc_arg(zone_clust, m, how) == NULL || 535 m->m_ext.ext_buf == NULL) 536 return (ENOMEM); 537 m->m_ext.ext_type = EXT_PACKET; /* Override. */ 538 #ifdef INVARIANTS 539 trash_init(m->m_ext.ext_buf, MCLBYTES, how); 540 #endif 541 return (0); 542 } 543 544 /* 545 * The Packet secondary zone's fini routine, executed on the 546 * object's transition from zone cache to keg slab. 547 */ 548 static void 549 mb_zfini_pack(void *mem, int size) 550 { 551 struct mbuf *m; 552 553 m = (struct mbuf *)mem; 554 #ifdef INVARIANTS 555 trash_fini(m->m_ext.ext_buf, MCLBYTES); 556 #endif 557 uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 558 #ifdef INVARIANTS 559 trash_dtor(mem, size, NULL); 560 #endif 561 } 562 563 /* 564 * The "packet" keg constructor. 565 */ 566 static int 567 mb_ctor_pack(void *mem, int size, void *arg, int how) 568 { 569 struct mbuf *m; 570 struct mb_args *args; 571 int error, flags; 572 short type; 573 574 m = (struct mbuf *)mem; 575 args = (struct mb_args *)arg; 576 flags = args->flags; 577 type = args->type; 578 MPASS((flags & M_NOFREE) == 0); 579 580 #ifdef INVARIANTS 581 trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 582 #endif 583 584 error = m_init(m, how, type, flags); 585 586 /* m_ext is already initialized. */ 587 m->m_data = m->m_ext.ext_buf; 588 m->m_flags = (flags | M_EXT); 589 590 return (error); 591 } 592 593 /* 594 * This is the protocol drain routine. Called by UMA whenever any of the 595 * mbuf zones is closed to its limit. 596 * 597 * No locks should be held when this is called. The drain routines have to 598 * presently acquire some locks which raises the possibility of lock order 599 * reversal. 600 */ 601 static void 602 mb_reclaim(uma_zone_t zone __unused, int pending __unused) 603 { 604 struct domain *dp; 605 struct protosw *pr; 606 607 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); 608 609 for (dp = domains; dp != NULL; dp = dp->dom_next) 610 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 611 if (pr->pr_drain != NULL) 612 (*pr->pr_drain)(); 613 } 614 615 /* 616 * Clean up after mbufs with M_EXT storage attached to them if the 617 * reference count hits 1. 618 */ 619 void 620 mb_free_ext(struct mbuf *m) 621 { 622 volatile u_int *refcnt; 623 struct mbuf *mref; 624 int freembuf; 625 626 KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); 627 628 /* See if this is the mbuf that holds the embedded refcount. */ 629 if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 630 refcnt = &m->m_ext.ext_count; 631 mref = m; 632 } else { 633 KASSERT(m->m_ext.ext_cnt != NULL, 634 ("%s: no refcounting pointer on %p", __func__, m)); 635 refcnt = m->m_ext.ext_cnt; 636 mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); 637 } 638 639 /* 640 * Check if the header is embedded in the cluster. It is 641 * important that we can't touch any of the mbuf fields 642 * after we have freed the external storage, since mbuf 643 * could have been embedded in it. For now, the mbufs 644 * embedded into the cluster are always of type EXT_EXTREF, 645 * and for this type we won't free the mref. 646 */ 647 if (m->m_flags & M_NOFREE) { 648 freembuf = 0; 649 KASSERT(m->m_ext.ext_type == EXT_EXTREF, 650 ("%s: no-free mbuf %p has wrong type", __func__, m)); 651 } else 652 freembuf = 1; 653 654 /* Free attached storage if this mbuf is the only reference to it. */ 655 if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { 656 switch (m->m_ext.ext_type) { 657 case EXT_PACKET: 658 /* The packet zone is special. */ 659 if (*refcnt == 0) 660 *refcnt = 1; 661 uma_zfree(zone_pack, mref); 662 break; 663 case EXT_CLUSTER: 664 uma_zfree(zone_clust, m->m_ext.ext_buf); 665 uma_zfree(zone_mbuf, mref); 666 break; 667 case EXT_JUMBOP: 668 uma_zfree(zone_jumbop, m->m_ext.ext_buf); 669 uma_zfree(zone_mbuf, mref); 670 break; 671 case EXT_JUMBO9: 672 uma_zfree(zone_jumbo9, m->m_ext.ext_buf); 673 uma_zfree(zone_mbuf, mref); 674 break; 675 case EXT_JUMBO16: 676 uma_zfree(zone_jumbo16, m->m_ext.ext_buf); 677 uma_zfree(zone_mbuf, mref); 678 break; 679 case EXT_SFBUF: 680 case EXT_NET_DRV: 681 case EXT_MOD_TYPE: 682 case EXT_DISPOSABLE: 683 KASSERT(mref->m_ext.ext_free != NULL, 684 ("%s: ext_free not set", __func__)); 685 mref->m_ext.ext_free(mref); 686 uma_zfree(zone_mbuf, mref); 687 break; 688 case EXT_EXTREF: 689 KASSERT(m->m_ext.ext_free != NULL, 690 ("%s: ext_free not set", __func__)); 691 m->m_ext.ext_free(m); 692 break; 693 default: 694 KASSERT(m->m_ext.ext_type == 0, 695 ("%s: unknown ext_type", __func__)); 696 } 697 } 698 699 if (freembuf && m != mref) 700 uma_zfree(zone_mbuf, m); 701 } 702 703 /* 704 * Official mbuf(9) allocation KPI for stack and drivers: 705 * 706 * m_get() - a single mbuf without any attachments, sys/mbuf.h. 707 * m_gethdr() - a single mbuf initialized as M_PKTHDR, sys/mbuf.h. 708 * m_getcl() - an mbuf + 2k cluster, sys/mbuf.h. 709 * m_clget() - attach cluster to already allocated mbuf. 710 * m_cljget() - attach jumbo cluster to already allocated mbuf. 711 * m_get2() - allocate minimum mbuf that would fit size argument. 712 * m_getm2() - allocate a chain of mbufs/clusters. 713 * m_extadd() - attach external cluster to mbuf. 714 * 715 * m_free() - free single mbuf with its tags and ext, sys/mbuf.h. 716 * m_freem() - free chain of mbufs. 717 */ 718 719 int 720 m_clget(struct mbuf *m, int how) 721 { 722 723 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 724 __func__, m)); 725 m->m_ext.ext_buf = (char *)NULL; 726 uma_zalloc_arg(zone_clust, m, how); 727 /* 728 * On a cluster allocation failure, drain the packet zone and retry, 729 * we might be able to loosen a few clusters up on the drain. 730 */ 731 if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { 732 zone_drain(zone_pack); 733 uma_zalloc_arg(zone_clust, m, how); 734 } 735 MBUF_PROBE2(m__clget, m, how); 736 return (m->m_flags & M_EXT); 737 } 738 739 /* 740 * m_cljget() is different from m_clget() as it can allocate clusters without 741 * attaching them to an mbuf. In that case the return value is the pointer 742 * to the cluster of the requested size. If an mbuf was specified, it gets 743 * the cluster attached to it and the return value can be safely ignored. 744 * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 745 */ 746 void * 747 m_cljget(struct mbuf *m, int how, int size) 748 { 749 uma_zone_t zone; 750 void *retval; 751 752 if (m != NULL) { 753 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 754 __func__, m)); 755 m->m_ext.ext_buf = NULL; 756 } 757 758 zone = m_getzone(size); 759 retval = uma_zalloc_arg(zone, m, how); 760 761 MBUF_PROBE4(m__cljget, m, how, size, retval); 762 763 return (retval); 764 } 765 766 /* 767 * m_get2() allocates minimum mbuf that would fit "size" argument. 768 */ 769 struct mbuf * 770 m_get2(int size, int how, short type, int flags) 771 { 772 struct mb_args args; 773 struct mbuf *m, *n; 774 775 args.flags = flags; 776 args.type = type; 777 778 if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) 779 return (uma_zalloc_arg(zone_mbuf, &args, how)); 780 if (size <= MCLBYTES) 781 return (uma_zalloc_arg(zone_pack, &args, how)); 782 783 if (size > MJUMPAGESIZE) 784 return (NULL); 785 786 m = uma_zalloc_arg(zone_mbuf, &args, how); 787 if (m == NULL) 788 return (NULL); 789 790 n = uma_zalloc_arg(zone_jumbop, m, how); 791 if (n == NULL) { 792 uma_zfree(zone_mbuf, m); 793 return (NULL); 794 } 795 796 return (m); 797 } 798 799 /* 800 * m_getjcl() returns an mbuf with a cluster of the specified size attached. 801 * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 802 */ 803 struct mbuf * 804 m_getjcl(int how, short type, int flags, int size) 805 { 806 struct mb_args args; 807 struct mbuf *m, *n; 808 uma_zone_t zone; 809 810 if (size == MCLBYTES) 811 return m_getcl(how, type, flags); 812 813 args.flags = flags; 814 args.type = type; 815 816 m = uma_zalloc_arg(zone_mbuf, &args, how); 817 if (m == NULL) 818 return (NULL); 819 820 zone = m_getzone(size); 821 n = uma_zalloc_arg(zone, m, how); 822 if (n == NULL) { 823 uma_zfree(zone_mbuf, m); 824 return (NULL); 825 } 826 return (m); 827 } 828 829 /* 830 * Allocate a given length worth of mbufs and/or clusters (whatever fits 831 * best) and return a pointer to the top of the allocated chain. If an 832 * existing mbuf chain is provided, then we will append the new chain 833 * to the existing one but still return the top of the newly allocated 834 * chain. 835 */ 836 struct mbuf * 837 m_getm2(struct mbuf *m, int len, int how, short type, int flags) 838 { 839 struct mbuf *mb, *nm = NULL, *mtail = NULL; 840 841 KASSERT(len >= 0, ("%s: len is < 0", __func__)); 842 843 /* Validate flags. */ 844 flags &= (M_PKTHDR | M_EOR); 845 846 /* Packet header mbuf must be first in chain. */ 847 if ((flags & M_PKTHDR) && m != NULL) 848 flags &= ~M_PKTHDR; 849 850 /* Loop and append maximum sized mbufs to the chain tail. */ 851 while (len > 0) { 852 if (len > MCLBYTES) 853 mb = m_getjcl(how, type, (flags & M_PKTHDR), 854 MJUMPAGESIZE); 855 else if (len >= MINCLSIZE) 856 mb = m_getcl(how, type, (flags & M_PKTHDR)); 857 else if (flags & M_PKTHDR) 858 mb = m_gethdr(how, type); 859 else 860 mb = m_get(how, type); 861 862 /* Fail the whole operation if one mbuf can't be allocated. */ 863 if (mb == NULL) { 864 if (nm != NULL) 865 m_freem(nm); 866 return (NULL); 867 } 868 869 /* Book keeping. */ 870 len -= M_SIZE(mb); 871 if (mtail != NULL) 872 mtail->m_next = mb; 873 else 874 nm = mb; 875 mtail = mb; 876 flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 877 } 878 if (flags & M_EOR) 879 mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ 880 881 /* If mbuf was supplied, append new chain to the end of it. */ 882 if (m != NULL) { 883 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) 884 ; 885 mtail->m_next = nm; 886 mtail->m_flags &= ~M_EOR; 887 } else 888 m = nm; 889 890 return (m); 891 } 892 893 /*- 894 * Configure a provided mbuf to refer to the provided external storage 895 * buffer and setup a reference count for said buffer. 896 * 897 * Arguments: 898 * mb The existing mbuf to which to attach the provided buffer. 899 * buf The address of the provided external storage buffer. 900 * size The size of the provided buffer. 901 * freef A pointer to a routine that is responsible for freeing the 902 * provided external storage buffer. 903 * args A pointer to an argument structure (of any type) to be passed 904 * to the provided freef routine (may be NULL). 905 * flags Any other flags to be passed to the provided mbuf. 906 * type The type that the external storage buffer should be 907 * labeled with. 908 * 909 * Returns: 910 * Nothing. 911 */ 912 void 913 m_extadd(struct mbuf *mb, char *buf, u_int size, m_ext_free_t freef, 914 void *arg1, void *arg2, int flags, int type) 915 { 916 917 KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); 918 919 mb->m_flags |= (M_EXT | flags); 920 mb->m_ext.ext_buf = buf; 921 mb->m_data = mb->m_ext.ext_buf; 922 mb->m_ext.ext_size = size; 923 mb->m_ext.ext_free = freef; 924 mb->m_ext.ext_arg1 = arg1; 925 mb->m_ext.ext_arg2 = arg2; 926 mb->m_ext.ext_type = type; 927 928 if (type != EXT_EXTREF) { 929 mb->m_ext.ext_count = 1; 930 mb->m_ext.ext_flags = EXT_FLAG_EMBREF; 931 } else 932 mb->m_ext.ext_flags = 0; 933 } 934 935 /* 936 * Free an entire chain of mbufs and associated external buffers, if 937 * applicable. 938 */ 939 void 940 m_freem(struct mbuf *mb) 941 { 942 943 MBUF_PROBE1(m__freem, mb); 944 while (mb != NULL) 945 mb = m_free(mb); 946 } 947