1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 5 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jeffrey M. Hsu. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 1982, 1986, 1988, 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 65 * $FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.51.2.24 2003/04/15 06:59:29 silby Exp $ 66 */ 67 68 #include "opt_param.h" 69 #include "opt_mbuf_stress_test.h" 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/file.h> 73 #include <sys/malloc.h> 74 #include <sys/mbuf.h> 75 #include <sys/kernel.h> 76 #include <sys/sysctl.h> 77 #include <sys/domain.h> 78 #include <sys/objcache.h> 79 #include <sys/tree.h> 80 #include <sys/protosw.h> 81 #include <sys/uio.h> 82 #include <sys/thread.h> 83 #include <sys/proc.h> 84 #include <sys/globaldata.h> 85 86 #include <sys/spinlock2.h> 87 88 #include <machine/atomic.h> 89 #include <machine/limits.h> 90 91 #include <vm/vm.h> 92 #include <vm/vm_kern.h> 93 #include <vm/vm_extern.h> 94 95 #ifdef INVARIANTS 96 #include <machine/cpu.h> 97 #endif 98 99 /* 100 * mbuf cluster meta-data 101 */ 102 struct mbcluster { 103 int32_t mcl_refs; 104 void *mcl_data; 105 }; 106 107 /* 108 * mbuf tracking for debugging purposes 109 */ 110 #ifdef MBUF_DEBUG 111 112 static MALLOC_DEFINE(M_MTRACK, "mtrack", "mtrack"); 113 114 struct mbctrack; 115 RB_HEAD(mbuf_rb_tree, mbtrack); 116 RB_PROTOTYPE2(mbuf_rb_tree, mbtrack, rb_node, mbtrack_cmp, struct mbuf *); 117 118 struct mbtrack { 119 RB_ENTRY(mbtrack) rb_node; 120 int trackid; 121 struct mbuf *m; 122 }; 123 124 static int 125 mbtrack_cmp(struct mbtrack *mb1, struct mbtrack *mb2) 126 { 127 if (mb1->m < mb2->m) 128 return (-1); 129 if (mb1->m > mb2->m) 130 return (1); 131 return (0); 132 } 133 134 RB_GENERATE2(mbuf_rb_tree, mbtrack, rb_node, mbtrack_cmp, struct mbuf *, m); 135 136 struct mbuf_rb_tree mbuf_track_root; 137 static struct spinlock mbuf_track_spin = 138 SPINLOCK_INITIALIZER(mbuf_track_spin, "mbuf_track_spin"); 139 140 static void 141 mbuftrack(struct mbuf *m) 142 { 143 struct mbtrack *mbt; 144 145 mbt = kmalloc(sizeof(*mbt), M_MTRACK, M_INTWAIT|M_ZERO); 146 spin_lock(&mbuf_track_spin); 147 mbt->m = m; 148 if (mbuf_rb_tree_RB_INSERT(&mbuf_track_root, mbt)) { 149 spin_unlock(&mbuf_track_spin); 150 panic("%s: mbuf %p already being tracked", __func__, m); 151 } 152 spin_unlock(&mbuf_track_spin); 153 } 154 155 static void 156 mbufuntrack(struct mbuf *m) 157 { 158 struct mbtrack *mbt; 159 160 spin_lock(&mbuf_track_spin); 161 mbt = mbuf_rb_tree_RB_LOOKUP(&mbuf_track_root, m); 162 if (mbt == NULL) { 163 spin_unlock(&mbuf_track_spin); 164 panic("%s: mbuf %p was not tracked", __func__, m); 165 } else { 166 mbuf_rb_tree_RB_REMOVE(&mbuf_track_root, mbt); 167 spin_unlock(&mbuf_track_spin); 168 kfree(mbt, M_MTRACK); 169 } 170 } 171 172 void 173 mbuftrackid(struct mbuf *m, int trackid) 174 { 175 struct mbtrack *mbt; 176 struct mbuf *n; 177 178 spin_lock(&mbuf_track_spin); 179 while (m) { 180 n = m->m_nextpkt; 181 while (m) { 182 mbt = mbuf_rb_tree_RB_LOOKUP(&mbuf_track_root, m); 183 if (mbt == NULL) { 184 spin_unlock(&mbuf_track_spin); 185 panic("%s: mbuf %p not tracked", __func__, m); 186 } 187 mbt->trackid = trackid; 188 m = m->m_next; 189 } 190 m = n; 191 } 192 spin_unlock(&mbuf_track_spin); 193 } 194 195 static int 196 mbuftrack_callback(struct mbtrack *mbt, void *arg) 197 { 198 struct sysctl_req *req = arg; 199 char buf[64]; 200 int error; 201 202 ksnprintf(buf, sizeof(buf), "mbuf %p track %d\n", mbt->m, mbt->trackid); 203 204 spin_unlock(&mbuf_track_spin); 205 error = SYSCTL_OUT(req, buf, strlen(buf)); 206 spin_lock(&mbuf_track_spin); 207 if (error) 208 return (-error); 209 return (0); 210 } 211 212 static int 213 mbuftrack_show(SYSCTL_HANDLER_ARGS) 214 { 215 int error; 216 217 spin_lock(&mbuf_track_spin); 218 error = mbuf_rb_tree_RB_SCAN(&mbuf_track_root, NULL, 219 mbuftrack_callback, req); 220 spin_unlock(&mbuf_track_spin); 221 return (-error); 222 } 223 SYSCTL_PROC(_kern_ipc, OID_AUTO, showmbufs, CTLFLAG_RD|CTLTYPE_STRING, 224 0, 0, mbuftrack_show, "A", "Show all in-use mbufs"); 225 226 #else 227 228 #define mbuftrack(m) 229 #define mbufuntrack(m) 230 231 #endif 232 233 static void mbinit(void *); 234 SYSINIT(mbuf, SI_BOOT2_MACHDEP, SI_ORDER_FIRST, mbinit, NULL); 235 236 struct mbtypes_stat { 237 u_long stats[MT_NTYPES]; 238 } __cachealign; 239 240 static struct mbtypes_stat mbtypes[SMP_MAXCPU]; 241 242 static struct mbstat mbstat[SMP_MAXCPU] __cachealign; 243 int max_linkhdr; 244 int max_protohdr; 245 int max_hdr; 246 int max_datalen; 247 int m_defragpackets; 248 int m_defragbytes; 249 int m_defraguseless; 250 int m_defragfailure; 251 #ifdef MBUF_STRESS_TEST 252 int m_defragrandomfailures; 253 #endif 254 255 struct objcache *mbuf_cache, *mbufphdr_cache; 256 struct objcache *mclmeta_cache, *mjclmeta_cache; 257 struct objcache *mbufcluster_cache, *mbufphdrcluster_cache; 258 struct objcache *mbufjcluster_cache, *mbufphdrjcluster_cache; 259 260 struct lock mbupdate_lk = LOCK_INITIALIZER("mbupdate", 0, LK_CANRECURSE); 261 262 int nmbclusters; 263 static int nmbjclusters; 264 int nmbufs; 265 266 static int mjclph_cachefrac; 267 static int mjcl_cachefrac; 268 static int mclph_cachefrac; 269 static int mcl_cachefrac; 270 271 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 272 &max_linkhdr, 0, "Max size of a link-level header"); 273 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 274 &max_protohdr, 0, "Max size of a protocol header"); 275 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, 276 "Max size of link+protocol headers"); 277 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 278 &max_datalen, 0, "Max data payload size without headers"); 279 280 static int do_mbstat(SYSCTL_HANDLER_ARGS); 281 SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat, CTLTYPE_STRUCT|CTLFLAG_RD, 282 0, 0, do_mbstat, "S,mbstat", "mbuf usage statistics"); 283 284 static int do_mbtypes(SYSCTL_HANDLER_ARGS); 285 SYSCTL_PROC(_kern_ipc, OID_AUTO, mbtypes, CTLTYPE_ULONG|CTLFLAG_RD, 286 0, 0, do_mbtypes, "LU", ""); 287 288 static int 289 do_mbstat(SYSCTL_HANDLER_ARGS) 290 { 291 struct mbstat mbstat_total; 292 struct mbstat *mbstat_totalp; 293 int i; 294 295 bzero(&mbstat_total, sizeof(mbstat_total)); 296 mbstat_totalp = &mbstat_total; 297 298 for (i = 0; i < ncpus; i++) { 299 mbstat_total.m_mbufs += mbstat[i].m_mbufs; 300 mbstat_total.m_clusters += mbstat[i].m_clusters; 301 mbstat_total.m_jclusters += mbstat[i].m_jclusters; 302 mbstat_total.m_clfree += mbstat[i].m_clfree; 303 mbstat_total.m_drops += mbstat[i].m_drops; 304 mbstat_total.m_wait += mbstat[i].m_wait; 305 mbstat_total.m_drain += mbstat[i].m_drain; 306 mbstat_total.m_mcfail += mbstat[i].m_mcfail; 307 mbstat_total.m_mpfail += mbstat[i].m_mpfail; 308 } 309 310 /* 311 * The following fields are not cumulative fields so just 312 * get their values once. 313 */ 314 mbstat_total.m_msize = mbstat[0].m_msize; 315 mbstat_total.m_mclbytes = mbstat[0].m_mclbytes; 316 mbstat_total.m_minclsize = mbstat[0].m_minclsize; 317 mbstat_total.m_mlen = mbstat[0].m_mlen; 318 mbstat_total.m_mhlen = mbstat[0].m_mhlen; 319 320 return sysctl_handle_opaque(oidp, mbstat_totalp, 321 sizeof(mbstat_total), req); 322 } 323 324 static int 325 do_mbtypes(SYSCTL_HANDLER_ARGS) 326 { 327 u_long totals[MT_NTYPES]; 328 int i, j; 329 330 for (i = 0; i < MT_NTYPES; i++) 331 totals[i] = 0; 332 333 for (i = 0; i < ncpus; i++) { 334 for (j = 0; j < MT_NTYPES; j++) 335 totals[j] += mbtypes[i].stats[j]; 336 } 337 338 return sysctl_handle_opaque(oidp, totals, sizeof(totals), req); 339 } 340 341 /* 342 * The variables may be set as boot-time tunables or live. Setting these 343 * values too low can deadlock your network. Network interfaces may also 344 * adjust nmbclusters and/or nmbjclusters to account for preloading the 345 * hardware rings. 346 */ 347 static int sysctl_nmbclusters(SYSCTL_HANDLER_ARGS); 348 static int sysctl_nmbjclusters(SYSCTL_HANDLER_ARGS); 349 static int sysctl_nmbufs(SYSCTL_HANDLER_ARGS); 350 SYSCTL_PROC(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLTYPE_INT | CTLFLAG_RW, 351 0, 0, sysctl_nmbclusters, "I", 352 "Maximum number of mbuf clusters available"); 353 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjclusters, CTLTYPE_INT | CTLFLAG_RW, 354 0, 0, sysctl_nmbjclusters, "I", 355 "Maximum number of mbuf jclusters available"); 356 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT | CTLFLAG_RW, 357 0, 0, sysctl_nmbufs, "I", 358 "Maximum number of mbufs available"); 359 360 SYSCTL_INT(_kern_ipc, OID_AUTO, mjclph_cachefrac, CTLFLAG_RD, 361 &mjclph_cachefrac, 0, 362 "Fraction of cacheable mbuf jclusters w/ pkthdr"); 363 SYSCTL_INT(_kern_ipc, OID_AUTO, mjcl_cachefrac, CTLFLAG_RD, 364 &mjcl_cachefrac, 0, 365 "Fraction of cacheable mbuf jclusters"); 366 SYSCTL_INT(_kern_ipc, OID_AUTO, mclph_cachefrac, CTLFLAG_RD, 367 &mclph_cachefrac, 0, 368 "Fraction of cacheable mbuf clusters w/ pkthdr"); 369 SYSCTL_INT(_kern_ipc, OID_AUTO, mcl_cachefrac, CTLFLAG_RD, 370 &mcl_cachefrac, 0, "Fraction of cacheable mbuf clusters"); 371 372 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD, 373 &m_defragpackets, 0, "Number of defragment packets"); 374 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD, 375 &m_defragbytes, 0, "Number of defragment bytes"); 376 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD, 377 &m_defraguseless, 0, 378 "Number of useless defragment mbuf chain operations"); 379 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD, 380 &m_defragfailure, 0, 381 "Number of failed defragment mbuf chain operations"); 382 #ifdef MBUF_STRESS_TEST 383 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, 384 &m_defragrandomfailures, 0, ""); 385 #endif 386 387 static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 388 static MALLOC_DEFINE(M_MBUFCL, "mbufcl", "mbufcl"); 389 static MALLOC_DEFINE(M_MCLMETA, "mclmeta", "mclmeta"); 390 391 static void m_reclaim (void); 392 static void m_mclref(void *arg); 393 static void m_mclfree(void *arg); 394 static void m_mjclfree(void *arg); 395 396 static void mbupdatelimits(void); 397 398 /* 399 * Generally scale default mbufs to maxproc. 400 * 401 * NOTE: Default NMBUFS must take into account a possible DOS attack 402 * using fd passing on unix domain sockets. 403 */ 404 #ifndef NMBCLUSTERS 405 #define NMBCLUSTERS (512 + maxproc * 4) 406 #endif 407 #ifndef BASE_CACHEFRAC 408 #define BASE_CACHEFRAC 16 409 #endif 410 #ifndef MJCLPH_CACHEFRAC 411 #define MJCLPH_CACHEFRAC (BASE_CACHEFRAC * 2) 412 #endif 413 #ifndef MJCL_CACHEFRAC 414 #define MJCL_CACHEFRAC (BASE_CACHEFRAC * 2) 415 #endif 416 #ifndef MCLPH_CACHEFRAC 417 #define MCLPH_CACHEFRAC (BASE_CACHEFRAC * 2) 418 #endif 419 #ifndef MCL_CACHEFRAC 420 #define MCL_CACHEFRAC (BASE_CACHEFRAC * 2) 421 #endif 422 #ifndef NMBJCLUSTERS 423 #define NMBJCLUSTERS (NMBCLUSTERS / 4) 424 #endif 425 #ifndef NMBUFS 426 #define NMBUFS (nmbclusters / 2 + maxfiles) 427 #endif 428 429 #define NMBCLUSTERS_MIN (NMBCLUSTERS / 2) 430 #define NMBJCLUSTERS_MIN (NMBJCLUSTERS / 2) 431 #define NMBUFS_MIN (NMBUFS / 2) 432 433 /* 434 * Perform sanity checks of tunables declared above. 435 */ 436 static void 437 tunable_mbinit(void *dummy __unused) 438 { 439 /* 440 * This has to be done before VM init. 441 */ 442 nmbclusters = NMBCLUSTERS; 443 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 444 mjclph_cachefrac = MJCLPH_CACHEFRAC; 445 TUNABLE_INT_FETCH("kern.ipc.mjclph_cachefrac", &mjclph_cachefrac); 446 mjcl_cachefrac = MJCL_CACHEFRAC; 447 TUNABLE_INT_FETCH("kern.ipc.mjcl_cachefrac", &mjcl_cachefrac); 448 mclph_cachefrac = MCLPH_CACHEFRAC; 449 TUNABLE_INT_FETCH("kern.ipc.mclph_cachefrac", &mclph_cachefrac); 450 mcl_cachefrac = MCL_CACHEFRAC; 451 TUNABLE_INT_FETCH("kern.ipc.mcl_cachefrac", &mcl_cachefrac); 452 453 /* 454 * WARNING! each mcl cache feeds two mbuf caches, so the minimum 455 * cachefrac is 2. For safety, use 3. 456 */ 457 if (mjclph_cachefrac < 3) 458 mjclph_cachefrac = 3; 459 if (mjcl_cachefrac < 3) 460 mjcl_cachefrac = 3; 461 if (mclph_cachefrac < 3) 462 mclph_cachefrac = 3; 463 if (mcl_cachefrac < 3) 464 mcl_cachefrac = 3; 465 466 nmbjclusters = NMBJCLUSTERS; 467 TUNABLE_INT_FETCH("kern.ipc.nmbjclusters", &nmbjclusters); 468 469 nmbufs = NMBUFS; 470 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 471 472 /* Sanity checks */ 473 if (nmbufs < nmbclusters * 2) 474 nmbufs = nmbclusters * 2; 475 } 476 SYSINIT(tunable_mbinit, SI_BOOT1_TUNABLES, SI_ORDER_ANY, 477 tunable_mbinit, NULL); 478 479 static void 480 mbinclimit(int *limit, int inc, int minlim) 481 { 482 int new_limit; 483 484 lockmgr(&mbupdate_lk, LK_EXCLUSIVE); 485 486 new_limit = *limit + inc; 487 if (new_limit < minlim) 488 new_limit = minlim; 489 490 if (*limit != new_limit) { 491 *limit = new_limit; 492 mbupdatelimits(); 493 } 494 495 lockmgr(&mbupdate_lk, LK_RELEASE); 496 } 497 498 static int 499 mbsetlimit(int *limit, int new_limit, int minlim) 500 { 501 if (new_limit < minlim) 502 return EINVAL; 503 504 lockmgr(&mbupdate_lk, LK_EXCLUSIVE); 505 mbinclimit(limit, new_limit - *limit, minlim); 506 lockmgr(&mbupdate_lk, LK_RELEASE); 507 return 0; 508 } 509 510 static int 511 sysctl_mblimit(SYSCTL_HANDLER_ARGS, int *limit, int minlim) 512 { 513 int error, value; 514 515 value = *limit; 516 error = sysctl_handle_int(oidp, &value, 0, req); 517 if (error || req->newptr == NULL) 518 return error; 519 520 return mbsetlimit(limit, value, minlim); 521 } 522 523 /* 524 * Sysctl support to update nmbclusters, nmbjclusters, and nmbufs. 525 */ 526 static int 527 sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 528 { 529 return sysctl_mblimit(oidp, arg1, arg2, req, &nmbclusters, 530 NMBCLUSTERS_MIN); 531 } 532 533 static int 534 sysctl_nmbjclusters(SYSCTL_HANDLER_ARGS) 535 { 536 return sysctl_mblimit(oidp, arg1, arg2, req, &nmbjclusters, 537 NMBJCLUSTERS_MIN); 538 } 539 540 static int 541 sysctl_nmbufs(SYSCTL_HANDLER_ARGS) 542 { 543 return sysctl_mblimit(oidp, arg1, arg2, req, &nmbufs, NMBUFS_MIN); 544 } 545 546 void 547 mcl_inclimit(int inc) 548 { 549 mbinclimit(&nmbclusters, inc, NMBCLUSTERS_MIN); 550 } 551 552 void 553 mjcl_inclimit(int inc) 554 { 555 mbinclimit(&nmbjclusters, inc, NMBJCLUSTERS_MIN); 556 } 557 558 void 559 mb_inclimit(int inc) 560 { 561 mbinclimit(&nmbufs, inc, NMBUFS_MIN); 562 } 563 564 /* 565 * The mbuf object cache only guarantees that m_next and m_nextpkt are 566 * NULL and that m_data points to the beginning of the data area. In 567 * particular, m_len and m_pkthdr.len are uninitialized. It is the 568 * responsibility of the caller to initialize those fields before use. 569 */ 570 static __inline boolean_t 571 mbuf_ctor(void *obj, void *private __unused, int ocflags __unused) 572 { 573 struct mbuf *m = obj; 574 575 m->m_next = NULL; 576 m->m_nextpkt = NULL; 577 m->m_data = m->m_dat; 578 m->m_flags = 0; 579 580 return (TRUE); 581 } 582 583 /* 584 * Initialize the mbuf and the packet header fields. 585 */ 586 static boolean_t 587 mbufphdr_ctor(void *obj, void *private __unused, int ocflags __unused) 588 { 589 struct mbuf *m = obj; 590 591 m->m_next = NULL; 592 m->m_nextpkt = NULL; 593 m->m_data = m->m_pktdat; 594 m->m_flags = M_PKTHDR | M_PHCACHE; 595 596 m->m_pkthdr.rcvif = NULL; /* eliminate XXX JH */ 597 SLIST_INIT(&m->m_pkthdr.tags); 598 m->m_pkthdr.csum_flags = 0; /* eliminate XXX JH */ 599 m->m_pkthdr.fw_flags = 0; /* eliminate XXX JH */ 600 601 return (TRUE); 602 } 603 604 /* 605 * A mbcluster object consists of 2K (MCLBYTES) cluster and a refcount. 606 */ 607 static boolean_t 608 mclmeta_ctor(void *obj, void *private __unused, int ocflags) 609 { 610 struct mbcluster *cl = obj; 611 void *buf; 612 613 if (ocflags & M_NOWAIT) 614 buf = kmalloc(MCLBYTES, M_MBUFCL, M_NOWAIT | M_ZERO); 615 else 616 buf = kmalloc(MCLBYTES, M_MBUFCL, M_INTWAIT | M_ZERO); 617 if (buf == NULL) 618 return (FALSE); 619 cl->mcl_refs = 0; 620 cl->mcl_data = buf; 621 return (TRUE); 622 } 623 624 static boolean_t 625 mjclmeta_ctor(void *obj, void *private __unused, int ocflags) 626 { 627 struct mbcluster *cl = obj; 628 void *buf; 629 630 if (ocflags & M_NOWAIT) 631 buf = kmalloc(MJUMPAGESIZE, M_MBUFCL, M_NOWAIT | M_ZERO); 632 else 633 buf = kmalloc(MJUMPAGESIZE, M_MBUFCL, M_INTWAIT | M_ZERO); 634 if (buf == NULL) 635 return (FALSE); 636 cl->mcl_refs = 0; 637 cl->mcl_data = buf; 638 return (TRUE); 639 } 640 641 static void 642 mclmeta_dtor(void *obj, void *private __unused) 643 { 644 struct mbcluster *mcl = obj; 645 646 KKASSERT(mcl->mcl_refs == 0); 647 kfree(mcl->mcl_data, M_MBUFCL); 648 } 649 650 static void 651 linkjcluster(struct mbuf *m, struct mbcluster *cl, u_int size) 652 { 653 /* 654 * Add the cluster to the mbuf. The caller will detect that the 655 * mbuf now has an attached cluster. 656 */ 657 m->m_ext.ext_arg = cl; 658 m->m_ext.ext_buf = cl->mcl_data; 659 m->m_ext.ext_ref = m_mclref; 660 if (size != MCLBYTES) 661 m->m_ext.ext_free = m_mjclfree; 662 else 663 m->m_ext.ext_free = m_mclfree; 664 m->m_ext.ext_size = size; 665 atomic_add_int(&cl->mcl_refs, 1); 666 667 m->m_data = m->m_ext.ext_buf; 668 m->m_flags |= M_EXT | M_EXT_CLUSTER; 669 } 670 671 static void 672 linkcluster(struct mbuf *m, struct mbcluster *cl) 673 { 674 linkjcluster(m, cl, MCLBYTES); 675 } 676 677 static boolean_t 678 mbufphdrcluster_ctor(void *obj, void *private, int ocflags) 679 { 680 struct mbuf *m = obj; 681 struct mbcluster *cl; 682 683 mbufphdr_ctor(obj, private, ocflags); 684 cl = objcache_get(mclmeta_cache, ocflags); 685 if (cl == NULL) { 686 ++mbstat[mycpu->gd_cpuid].m_drops; 687 return (FALSE); 688 } 689 m->m_flags |= M_CLCACHE; 690 linkcluster(m, cl); 691 return (TRUE); 692 } 693 694 static boolean_t 695 mbufphdrjcluster_ctor(void *obj, void *private, int ocflags) 696 { 697 struct mbuf *m = obj; 698 struct mbcluster *cl; 699 700 mbufphdr_ctor(obj, private, ocflags); 701 cl = objcache_get(mjclmeta_cache, ocflags); 702 if (cl == NULL) { 703 ++mbstat[mycpu->gd_cpuid].m_drops; 704 return (FALSE); 705 } 706 m->m_flags |= M_CLCACHE; 707 linkjcluster(m, cl, MJUMPAGESIZE); 708 return (TRUE); 709 } 710 711 static boolean_t 712 mbufcluster_ctor(void *obj, void *private, int ocflags) 713 { 714 struct mbuf *m = obj; 715 struct mbcluster *cl; 716 717 mbuf_ctor(obj, private, ocflags); 718 cl = objcache_get(mclmeta_cache, ocflags); 719 if (cl == NULL) { 720 ++mbstat[mycpu->gd_cpuid].m_drops; 721 return (FALSE); 722 } 723 m->m_flags |= M_CLCACHE; 724 linkcluster(m, cl); 725 return (TRUE); 726 } 727 728 static boolean_t 729 mbufjcluster_ctor(void *obj, void *private, int ocflags) 730 { 731 struct mbuf *m = obj; 732 struct mbcluster *cl; 733 734 mbuf_ctor(obj, private, ocflags); 735 cl = objcache_get(mjclmeta_cache, ocflags); 736 if (cl == NULL) { 737 ++mbstat[mycpu->gd_cpuid].m_drops; 738 return (FALSE); 739 } 740 m->m_flags |= M_CLCACHE; 741 linkjcluster(m, cl, MJUMPAGESIZE); 742 return (TRUE); 743 } 744 745 /* 746 * Used for both the cluster and cluster PHDR caches. 747 * 748 * The mbuf may have lost its cluster due to sharing, deal 749 * with the situation by checking M_EXT. 750 */ 751 static void 752 mbufcluster_dtor(void *obj, void *private) 753 { 754 struct mbuf *m = obj; 755 struct mbcluster *mcl; 756 757 if (m->m_flags & M_EXT) { 758 KKASSERT((m->m_flags & M_EXT_CLUSTER) != 0); 759 mcl = m->m_ext.ext_arg; 760 KKASSERT(mcl->mcl_refs == 1); 761 mcl->mcl_refs = 0; 762 if (m->m_flags & M_EXT && m->m_ext.ext_size != MCLBYTES) 763 objcache_put(mjclmeta_cache, mcl); 764 else 765 objcache_put(mclmeta_cache, mcl); 766 } 767 } 768 769 struct objcache_malloc_args mbuf_malloc_args = { MSIZE, M_MBUF }; 770 struct objcache_malloc_args mclmeta_malloc_args = 771 { sizeof(struct mbcluster), M_MCLMETA }; 772 773 static void 774 mbinit(void *dummy __unused) 775 { 776 int limit, mb_limit, cl_limit, ncl_limit, jcl_limit, i; 777 778 /* 779 * Initialize statistics 780 */ 781 for (i = 0; i < ncpus; i++) { 782 mbstat[i].m_msize = MSIZE; 783 mbstat[i].m_mclbytes = MCLBYTES; 784 mbstat[i].m_mjumpagesize = MJUMPAGESIZE; 785 mbstat[i].m_minclsize = MINCLSIZE; 786 mbstat[i].m_mlen = MLEN; 787 mbstat[i].m_mhlen = MHLEN; 788 } 789 790 /* 791 * Create object caches and save cluster limits, which will 792 * be used to adjust backing kmalloc pools' limit later. 793 */ 794 795 mb_limit = cl_limit = 0; 796 797 limit = nmbufs; 798 mbuf_cache = objcache_create("mbuf", 799 limit, nmbufs / BASE_CACHEFRAC, 800 mbuf_ctor, NULL, NULL, 801 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 802 mb_limit += limit; 803 804 limit = nmbufs; 805 mbufphdr_cache = objcache_create("mbuf pkthdr", 806 limit, nmbufs / BASE_CACHEFRAC, 807 mbufphdr_ctor, NULL, NULL, 808 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 809 mb_limit += limit; 810 811 ncl_limit = nmbclusters; 812 mclmeta_cache = objcache_create("mbuf cluster", 813 ncl_limit, nmbclusters / BASE_CACHEFRAC, 814 mclmeta_ctor, mclmeta_dtor, NULL, 815 objcache_malloc_alloc, objcache_malloc_free, &mclmeta_malloc_args); 816 cl_limit += ncl_limit; 817 818 jcl_limit = nmbjclusters; 819 mjclmeta_cache = objcache_create("mbuf jcluster", 820 jcl_limit, nmbjclusters / BASE_CACHEFRAC, 821 mjclmeta_ctor, mclmeta_dtor, NULL, 822 objcache_malloc_alloc, objcache_malloc_free, &mclmeta_malloc_args); 823 cl_limit += jcl_limit; 824 825 limit = nmbclusters; 826 mbufcluster_cache = objcache_create("mbuf+cl", 827 limit, nmbclusters / mcl_cachefrac, 828 mbufcluster_ctor, mbufcluster_dtor, NULL, 829 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 830 mb_limit += limit; 831 832 limit = nmbclusters; 833 mbufphdrcluster_cache = objcache_create("mbuf pkthdr+cl", 834 limit, nmbclusters / mclph_cachefrac, 835 mbufphdrcluster_ctor, mbufcluster_dtor, NULL, 836 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 837 mb_limit += limit; 838 839 limit = nmbjclusters; 840 mbufjcluster_cache = objcache_create("mbuf+jcl", 841 limit, nmbjclusters / mjcl_cachefrac, 842 mbufjcluster_ctor, mbufcluster_dtor, NULL, 843 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 844 mb_limit += limit; 845 846 limit = nmbjclusters; 847 mbufphdrjcluster_cache = objcache_create("mbuf pkthdr+jcl", 848 limit, nmbjclusters / mjclph_cachefrac, 849 mbufphdrjcluster_ctor, mbufcluster_dtor, NULL, 850 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 851 mb_limit += limit; 852 853 /* 854 * Adjust backing kmalloc pools' limit 855 * 856 * NOTE: We raise the limit by another 1/8 to take the effect 857 * of loosememuse into account. 858 */ 859 cl_limit += cl_limit / 8; 860 kmalloc_raise_limit(mclmeta_malloc_args.mtype, 861 mclmeta_malloc_args.objsize * (size_t)cl_limit); 862 kmalloc_raise_limit(M_MBUFCL, 863 (MCLBYTES * (size_t)ncl_limit) + 864 (MJUMPAGESIZE * (size_t)jcl_limit)); 865 866 mb_limit += mb_limit / 8; 867 kmalloc_raise_limit(mbuf_malloc_args.mtype, 868 mbuf_malloc_args.objsize * (size_t)mb_limit); 869 } 870 871 /* 872 * Adjust mbuf limits after changes have been made 873 * 874 * Caller must hold mbupdate_lk 875 */ 876 static void 877 mbupdatelimits(void) 878 { 879 int limit, mb_limit, cl_limit, ncl_limit, jcl_limit; 880 881 KASSERT(lockstatus(&mbupdate_lk, curthread) != 0, 882 ("mbupdate_lk is not held")); 883 884 /* 885 * Figure out adjustments to object caches after nmbufs, nmbclusters, 886 * or nmbjclusters has been modified. 887 */ 888 mb_limit = cl_limit = 0; 889 890 limit = nmbufs; 891 objcache_set_cluster_limit(mbuf_cache, limit); 892 mb_limit += limit; 893 894 limit = nmbufs; 895 objcache_set_cluster_limit(mbufphdr_cache, limit); 896 mb_limit += limit; 897 898 ncl_limit = nmbclusters; 899 objcache_set_cluster_limit(mclmeta_cache, ncl_limit); 900 cl_limit += ncl_limit; 901 902 jcl_limit = nmbjclusters; 903 objcache_set_cluster_limit(mjclmeta_cache, jcl_limit); 904 cl_limit += jcl_limit; 905 906 limit = nmbclusters; 907 objcache_set_cluster_limit(mbufcluster_cache, limit); 908 mb_limit += limit; 909 910 limit = nmbclusters; 911 objcache_set_cluster_limit(mbufphdrcluster_cache, limit); 912 mb_limit += limit; 913 914 limit = nmbjclusters; 915 objcache_set_cluster_limit(mbufjcluster_cache, limit); 916 mb_limit += limit; 917 918 limit = nmbjclusters; 919 objcache_set_cluster_limit(mbufphdrjcluster_cache, limit); 920 mb_limit += limit; 921 922 /* 923 * Adjust backing kmalloc pools' limit 924 * 925 * NOTE: We raise the limit by another 1/8 to take the effect 926 * of loosememuse into account. 927 */ 928 cl_limit += cl_limit / 8; 929 kmalloc_raise_limit(mclmeta_malloc_args.mtype, 930 mclmeta_malloc_args.objsize * (size_t)cl_limit); 931 kmalloc_raise_limit(M_MBUFCL, 932 (MCLBYTES * (size_t)ncl_limit) + 933 (MJUMPAGESIZE * (size_t)jcl_limit)); 934 mb_limit += mb_limit / 8; 935 kmalloc_raise_limit(mbuf_malloc_args.mtype, 936 mbuf_malloc_args.objsize * (size_t)mb_limit); 937 } 938 939 /* 940 * Return the number of references to this mbuf's data. 0 is returned 941 * if the mbuf is not M_EXT, a reference count is returned if it is 942 * M_EXT | M_EXT_CLUSTER, and 99 is returned if it is a special M_EXT. 943 */ 944 int 945 m_sharecount(struct mbuf *m) 946 { 947 switch (m->m_flags & (M_EXT | M_EXT_CLUSTER)) { 948 case 0: 949 return (0); 950 case M_EXT: 951 return (99); 952 case M_EXT | M_EXT_CLUSTER: 953 return (((struct mbcluster *)m->m_ext.ext_arg)->mcl_refs); 954 } 955 /* NOTREACHED */ 956 return (0); /* to shut up compiler */ 957 } 958 959 /* 960 * change mbuf to new type 961 */ 962 void 963 m_chtype(struct mbuf *m, int type) 964 { 965 struct globaldata *gd = mycpu; 966 967 ++mbtypes[gd->gd_cpuid].stats[type]; 968 --mbtypes[gd->gd_cpuid].stats[m->m_type]; 969 m->m_type = type; 970 } 971 972 static void 973 m_reclaim(void) 974 { 975 struct domain *dp; 976 struct protosw *pr; 977 978 kprintf("Debug: m_reclaim() called\n"); 979 980 SLIST_FOREACH(dp, &domains, dom_next) { 981 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { 982 if (pr->pr_drain) 983 (*pr->pr_drain)(); 984 } 985 } 986 ++mbstat[mycpu->gd_cpuid].m_drain; 987 } 988 989 static __inline void 990 updatestats(struct mbuf *m, int type) 991 { 992 struct globaldata *gd = mycpu; 993 994 m->m_type = type; 995 mbuftrack(m); 996 #ifdef MBUF_DEBUG 997 KASSERT(m->m_next == NULL, ("mbuf %p: bad m_next in get", m)); 998 KASSERT(m->m_nextpkt == NULL, ("mbuf %p: bad m_nextpkt in get", m)); 999 #endif 1000 1001 ++mbtypes[gd->gd_cpuid].stats[type]; 1002 ++mbstat[gd->gd_cpuid].m_mbufs; 1003 1004 } 1005 1006 /* 1007 * Allocate an mbuf. 1008 */ 1009 struct mbuf * 1010 m_get(int how, int type) 1011 { 1012 struct mbuf *m; 1013 int ntries = 0; 1014 int ocf = MB_OCFLAG(how); 1015 1016 retryonce: 1017 1018 m = objcache_get(mbuf_cache, ocf); 1019 1020 if (m == NULL) { 1021 if ((ocf & M_WAITOK) && ntries++ == 0) { 1022 struct objcache *reclaimlist[] = { 1023 mbufphdr_cache, 1024 mbufcluster_cache, 1025 mbufphdrcluster_cache, 1026 mbufjcluster_cache, 1027 mbufphdrjcluster_cache 1028 }; 1029 const int nreclaims = NELEM(reclaimlist); 1030 1031 if (!objcache_reclaimlist(reclaimlist, nreclaims)) 1032 m_reclaim(); 1033 goto retryonce; 1034 } 1035 ++mbstat[mycpu->gd_cpuid].m_drops; 1036 return (NULL); 1037 } 1038 #ifdef MBUF_DEBUG 1039 KASSERT(m->m_data == m->m_dat, ("mbuf %p: bad m_data in get", m)); 1040 #endif 1041 m->m_len = 0; 1042 1043 updatestats(m, type); 1044 return (m); 1045 } 1046 1047 struct mbuf * 1048 m_gethdr(int how, int type) 1049 { 1050 struct mbuf *m; 1051 int ocf = MB_OCFLAG(how); 1052 int ntries = 0; 1053 1054 retryonce: 1055 1056 m = objcache_get(mbufphdr_cache, ocf); 1057 1058 if (m == NULL) { 1059 if ((ocf & M_WAITOK) && ntries++ == 0) { 1060 struct objcache *reclaimlist[] = { 1061 mbuf_cache, 1062 mbufcluster_cache, mbufphdrcluster_cache, 1063 mbufjcluster_cache, mbufphdrjcluster_cache 1064 }; 1065 const int nreclaims = NELEM(reclaimlist); 1066 1067 if (!objcache_reclaimlist(reclaimlist, nreclaims)) 1068 m_reclaim(); 1069 goto retryonce; 1070 } 1071 ++mbstat[mycpu->gd_cpuid].m_drops; 1072 return (NULL); 1073 } 1074 #ifdef MBUF_DEBUG 1075 KASSERT(m->m_data == m->m_pktdat, ("mbuf %p: bad m_data in get", m)); 1076 #endif 1077 m->m_len = 0; 1078 m->m_pkthdr.len = 0; 1079 1080 updatestats(m, type); 1081 return (m); 1082 } 1083 1084 /* 1085 * Get a mbuf (not a mbuf cluster!) and zero it. 1086 * 1087 * Deprecated. 1088 */ 1089 struct mbuf * 1090 m_getclr(int how, int type) 1091 { 1092 struct mbuf *m; 1093 1094 m = m_get(how, type); 1095 if (m != NULL) 1096 bzero(m->m_data, MLEN); 1097 return (m); 1098 } 1099 1100 static struct mbuf * 1101 m_getcl_cache(int how, short type, int flags, struct objcache *mbclc, 1102 struct objcache *mbphclc, u_long *cl_stats) 1103 { 1104 struct mbuf *m = NULL; 1105 int ocflags = MB_OCFLAG(how); 1106 int ntries = 0; 1107 1108 retryonce: 1109 1110 if (flags & M_PKTHDR) 1111 m = objcache_get(mbphclc, ocflags); 1112 else 1113 m = objcache_get(mbclc, ocflags); 1114 1115 if (m == NULL) { 1116 if ((ocflags & M_WAITOK) && ntries++ == 0) { 1117 struct objcache *reclaimlist[1]; 1118 1119 if (flags & M_PKTHDR) 1120 reclaimlist[0] = mbclc; 1121 else 1122 reclaimlist[0] = mbphclc; 1123 if (!objcache_reclaimlist(reclaimlist, 1)) 1124 m_reclaim(); 1125 goto retryonce; 1126 } 1127 ++mbstat[mycpu->gd_cpuid].m_drops; 1128 return (NULL); 1129 } 1130 1131 #ifdef MBUF_DEBUG 1132 KASSERT(m->m_data == m->m_ext.ext_buf, 1133 ("mbuf %p: bad m_data in get", m)); 1134 #endif 1135 m->m_type = type; 1136 m->m_len = 0; 1137 m->m_pkthdr.len = 0; /* just do it unconditionally */ 1138 1139 mbuftrack(m); 1140 1141 ++mbtypes[mycpu->gd_cpuid].stats[type]; 1142 ++(*cl_stats); 1143 return (m); 1144 } 1145 1146 struct mbuf * 1147 m_getjcl(int how, short type, int flags, size_t size) 1148 { 1149 struct objcache *mbclc, *mbphclc; 1150 u_long *cl_stats; 1151 1152 switch (size) { 1153 case MCLBYTES: 1154 mbclc = mbufcluster_cache; 1155 mbphclc = mbufphdrcluster_cache; 1156 cl_stats = &mbstat[mycpu->gd_cpuid].m_clusters; 1157 break; 1158 1159 default: 1160 mbclc = mbufjcluster_cache; 1161 mbphclc = mbufphdrjcluster_cache; 1162 cl_stats = &mbstat[mycpu->gd_cpuid].m_jclusters; 1163 break; 1164 } 1165 return m_getcl_cache(how, type, flags, mbclc, mbphclc, cl_stats); 1166 } 1167 1168 /* 1169 * Returns an mbuf with an attached cluster. 1170 * Because many network drivers use this kind of buffers a lot, it is 1171 * convenient to keep a small pool of free buffers of this kind. 1172 * Even a small size such as 10 gives about 10% improvement in the 1173 * forwarding rate in a bridge or router. 1174 */ 1175 struct mbuf * 1176 m_getcl(int how, short type, int flags) 1177 { 1178 return m_getcl_cache(how, type, flags, 1179 mbufcluster_cache, mbufphdrcluster_cache, 1180 &mbstat[mycpu->gd_cpuid].m_clusters); 1181 } 1182 1183 /* 1184 * Allocate chain of requested length. 1185 */ 1186 struct mbuf * 1187 m_getc(int len, int how, int type) 1188 { 1189 struct mbuf *n, *nfirst = NULL, **ntail = &nfirst; 1190 int nsize; 1191 1192 while (len > 0) { 1193 n = m_getl(len, how, type, 0, &nsize); 1194 if (n == NULL) 1195 goto failed; 1196 n->m_len = 0; 1197 *ntail = n; 1198 ntail = &n->m_next; 1199 len -= nsize; 1200 } 1201 return (nfirst); 1202 1203 failed: 1204 m_freem(nfirst); 1205 return (NULL); 1206 } 1207 1208 /* 1209 * Allocate len-worth of mbufs and/or mbuf clusters (whatever fits best) 1210 * and return a pointer to the head of the allocated chain. If m0 is 1211 * non-null, then we assume that it is a single mbuf or an mbuf chain to 1212 * which we want len bytes worth of mbufs and/or clusters attached, and so 1213 * if we succeed in allocating it, we will just return a pointer to m0. 1214 * 1215 * If we happen to fail at any point during the allocation, we will free 1216 * up everything we have already allocated and return NULL. 1217 * 1218 * Deprecated. Use m_getc() and m_cat() instead. 1219 */ 1220 struct mbuf * 1221 m_getm(struct mbuf *m0, int len, int type, int how) 1222 { 1223 struct mbuf *nfirst; 1224 1225 nfirst = m_getc(len, how, type); 1226 1227 if (m0 != NULL) { 1228 m_last(m0)->m_next = nfirst; 1229 return (m0); 1230 } 1231 1232 return (nfirst); 1233 } 1234 1235 /* 1236 * Adds a cluster to a normal mbuf, M_EXT is set on success. 1237 * Deprecated. Use m_getcl() instead. 1238 */ 1239 void 1240 m_mclget(struct mbuf *m, int how) 1241 { 1242 struct mbcluster *mcl; 1243 1244 KKASSERT((m->m_flags & M_EXT) == 0); 1245 mcl = objcache_get(mclmeta_cache, MB_OCFLAG(how)); 1246 if (mcl != NULL) { 1247 linkcluster(m, mcl); 1248 ++mbstat[mycpu->gd_cpuid].m_clusters; 1249 } else { 1250 ++mbstat[mycpu->gd_cpuid].m_drops; 1251 } 1252 } 1253 1254 /* 1255 * Updates to mbcluster must be MPSAFE. Only an entity which already has 1256 * a reference to the cluster can ref it, so we are in no danger of 1257 * racing an add with a subtract. But the operation must still be atomic 1258 * since multiple entities may have a reference on the cluster. 1259 * 1260 * m_mclfree() is almost the same but it must contend with two entities 1261 * freeing the cluster at the same time. 1262 */ 1263 static void 1264 m_mclref(void *arg) 1265 { 1266 struct mbcluster *mcl = arg; 1267 1268 atomic_add_int(&mcl->mcl_refs, 1); 1269 } 1270 1271 /* 1272 * When dereferencing a cluster we have to deal with a N->0 race, where 1273 * N entities free their references simultaneously. To do this we use 1274 * atomic_fetchadd_int(). 1275 */ 1276 static void 1277 m_mclfree(void *arg) 1278 { 1279 struct mbcluster *mcl = arg; 1280 1281 if (atomic_fetchadd_int(&mcl->mcl_refs, -1) == 1) { 1282 --mbstat[mycpu->gd_cpuid].m_clusters; 1283 objcache_put(mclmeta_cache, mcl); 1284 } 1285 } 1286 1287 static void 1288 m_mjclfree(void *arg) 1289 { 1290 struct mbcluster *mcl = arg; 1291 1292 if (atomic_fetchadd_int(&mcl->mcl_refs, -1) == 1) { 1293 --mbstat[mycpu->gd_cpuid].m_jclusters; 1294 objcache_put(mjclmeta_cache, mcl); 1295 } 1296 } 1297 1298 /* 1299 * Free a single mbuf and any associated external storage. The successor, 1300 * if any, is returned. 1301 * 1302 * We do need to check non-first mbuf for m_aux, since some of existing 1303 * code does not call M_PREPEND properly. 1304 * (example: call to bpf_mtap from drivers) 1305 */ 1306 1307 struct mbuf * 1308 #ifdef MBUF_DEBUG 1309 _m_free(struct mbuf *m, const char *func) 1310 #else 1311 m_free(struct mbuf *m) 1312 #endif 1313 { 1314 struct mbuf *n; 1315 struct globaldata *gd = mycpu; 1316 1317 KASSERT(m->m_type != MT_FREE, ("freeing free mbuf %p", m)); 1318 KASSERT(M_TRAILINGSPACE(m) >= 0, ("overflowed mbuf %p", m)); 1319 --mbtypes[gd->gd_cpuid].stats[m->m_type]; 1320 1321 n = m->m_next; 1322 1323 /* 1324 * Make sure the mbuf is in constructed state before returning it 1325 * to the objcache. 1326 */ 1327 m->m_next = NULL; 1328 mbufuntrack(m); 1329 #ifdef MBUF_DEBUG 1330 m->m_hdr.mh_lastfunc = func; 1331 #endif 1332 #ifdef notyet 1333 KKASSERT(m->m_nextpkt == NULL); 1334 #else 1335 if (m->m_nextpkt != NULL) { 1336 static int afewtimes = 10; 1337 1338 if (afewtimes-- > 0) { 1339 kprintf("mfree: m->m_nextpkt != NULL\n"); 1340 print_backtrace(-1); 1341 } 1342 m->m_nextpkt = NULL; 1343 } 1344 #endif 1345 if (m->m_flags & M_PKTHDR) { 1346 m_tag_delete_chain(m); /* eliminate XXX JH */ 1347 } 1348 1349 m->m_flags &= (M_EXT | M_EXT_CLUSTER | M_CLCACHE | M_PHCACHE); 1350 1351 /* 1352 * Clean the M_PKTHDR state so we can return the mbuf to its original 1353 * cache. This is based on the PHCACHE flag which tells us whether 1354 * the mbuf was originally allocated out of a packet-header cache 1355 * or a non-packet-header cache. 1356 */ 1357 if (m->m_flags & M_PHCACHE) { 1358 m->m_flags |= M_PKTHDR; 1359 m->m_pkthdr.rcvif = NULL; /* eliminate XXX JH */ 1360 m->m_pkthdr.csum_flags = 0; /* eliminate XXX JH */ 1361 m->m_pkthdr.fw_flags = 0; /* eliminate XXX JH */ 1362 SLIST_INIT(&m->m_pkthdr.tags); 1363 } 1364 1365 /* 1366 * Handle remaining flags combinations. M_CLCACHE tells us whether 1367 * the mbuf was originally allocated from a cluster cache or not, 1368 * and is totally separate from whether the mbuf is currently 1369 * associated with a cluster. 1370 */ 1371 switch (m->m_flags & (M_CLCACHE | M_EXT | M_EXT_CLUSTER)) { 1372 case M_CLCACHE | M_EXT | M_EXT_CLUSTER: 1373 /* 1374 * mbuf+cluster cache case. The mbuf was allocated from the 1375 * combined mbuf_cluster cache and can be returned to the 1376 * cache if the cluster hasn't been shared. 1377 */ 1378 if (m_sharecount(m) == 1) { 1379 /* 1380 * The cluster has not been shared, we can just 1381 * reset the data pointer and return the mbuf 1382 * to the cluster cache. Note that the reference 1383 * count is left intact (it is still associated with 1384 * an mbuf). 1385 */ 1386 m->m_data = m->m_ext.ext_buf; 1387 if ((m->m_flags & M_EXT) && 1388 m->m_ext.ext_size != MCLBYTES) { 1389 if (m->m_flags & M_PHCACHE) 1390 objcache_put(mbufphdrjcluster_cache, m); 1391 else 1392 objcache_put(mbufjcluster_cache, m); 1393 --mbstat[mycpu->gd_cpuid].m_jclusters; 1394 } else { 1395 if (m->m_flags & M_PHCACHE) 1396 objcache_put(mbufphdrcluster_cache, m); 1397 else 1398 objcache_put(mbufcluster_cache, m); 1399 --mbstat[mycpu->gd_cpuid].m_clusters; 1400 } 1401 } else { 1402 /* 1403 * Hell. Someone else has a ref on this cluster, 1404 * we have to disconnect it which means we can't 1405 * put it back into the mbufcluster_cache, we 1406 * have to destroy the mbuf. 1407 * 1408 * Other mbuf references to the cluster will typically 1409 * be M_EXT | M_EXT_CLUSTER but without M_CLCACHE. 1410 * 1411 * XXX we could try to connect another cluster to it. 1412 */ 1413 m->m_ext.ext_free(m->m_ext.ext_arg); 1414 m->m_flags &= ~(M_EXT | M_EXT_CLUSTER); 1415 if (m->m_ext.ext_size == MCLBYTES) { 1416 if (m->m_flags & M_PHCACHE) 1417 objcache_dtor(mbufphdrcluster_cache, m); 1418 else 1419 objcache_dtor(mbufcluster_cache, m); 1420 } else { 1421 if (m->m_flags & M_PHCACHE) 1422 objcache_dtor(mbufphdrjcluster_cache, m); 1423 else 1424 objcache_dtor(mbufjcluster_cache, m); 1425 } 1426 } 1427 break; 1428 case M_EXT | M_EXT_CLUSTER: 1429 case M_EXT: 1430 /* 1431 * Normal cluster association case, disconnect the cluster from 1432 * the mbuf. The cluster may or may not be custom. 1433 */ 1434 m->m_ext.ext_free(m->m_ext.ext_arg); 1435 m->m_flags &= ~(M_EXT | M_EXT_CLUSTER); 1436 /* FALLTHROUGH */ 1437 case 0: 1438 /* 1439 * return the mbuf to the mbuf cache. 1440 */ 1441 if (m->m_flags & M_PHCACHE) { 1442 m->m_data = m->m_pktdat; 1443 objcache_put(mbufphdr_cache, m); 1444 } else { 1445 m->m_data = m->m_dat; 1446 objcache_put(mbuf_cache, m); 1447 } 1448 --mbstat[mycpu->gd_cpuid].m_mbufs; 1449 break; 1450 default: 1451 if (!panicstr) 1452 panic("bad mbuf flags %p %08x", m, m->m_flags); 1453 break; 1454 } 1455 return (n); 1456 } 1457 1458 #ifdef MBUF_DEBUG 1459 1460 void 1461 _m_freem(struct mbuf *m, const char *func) 1462 { 1463 while (m) 1464 m = _m_free(m, func); 1465 } 1466 1467 #else 1468 1469 void 1470 m_freem(struct mbuf *m) 1471 { 1472 while (m) 1473 m = m_free(m); 1474 } 1475 1476 #endif /* MBUF_DEBUG */ 1477 1478 void 1479 m_extadd(struct mbuf *m, void *buf, u_int size, void (*reff)(void *), 1480 void (*freef)(void *), void *arg) 1481 { 1482 m->m_ext.ext_arg = arg; 1483 m->m_ext.ext_buf = buf; 1484 m->m_ext.ext_ref = reff; 1485 m->m_ext.ext_free = freef; 1486 m->m_ext.ext_size = size; 1487 reff(arg); 1488 m->m_data = buf; 1489 m->m_flags |= M_EXT; 1490 } 1491 1492 /* 1493 * mbuf utility routines 1494 */ 1495 1496 /* 1497 * Lesser-used path for M_PREPEND: allocate new mbuf to prepend to chain and 1498 * copy junk along. 1499 */ 1500 struct mbuf * 1501 m_prepend(struct mbuf *m, int len, int how) 1502 { 1503 struct mbuf *mn; 1504 1505 if (m->m_flags & M_PKTHDR) 1506 mn = m_gethdr(how, m->m_type); 1507 else 1508 mn = m_get(how, m->m_type); 1509 if (mn == NULL) { 1510 m_freem(m); 1511 return (NULL); 1512 } 1513 if (m->m_flags & M_PKTHDR) 1514 M_MOVE_PKTHDR(mn, m); 1515 mn->m_next = m; 1516 m = mn; 1517 if (len < MHLEN) 1518 MH_ALIGN(m, len); 1519 m->m_len = len; 1520 return (m); 1521 } 1522 1523 /* 1524 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 1525 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 1526 * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller. 1527 * Note that the copy is read-only, because clusters are not copied, 1528 * only their reference counts are incremented. 1529 */ 1530 struct mbuf * 1531 m_copym(const struct mbuf *m, int off0, int len, int wait) 1532 { 1533 struct mbuf *n, **np; 1534 int off = off0; 1535 struct mbuf *top; 1536 int copyhdr = 0; 1537 1538 KASSERT(off >= 0, ("%s: negative off %d", __func__, off)); 1539 KASSERT(len >= 0, ("%s: negative len %d", __func__, len)); 1540 if (off == 0 && (m->m_flags & M_PKTHDR)) 1541 copyhdr = 1; 1542 while (off > 0) { 1543 KASSERT(m != NULL, 1544 ("%s: offset > size of mbuf chain", __func__)); 1545 if (off < m->m_len) 1546 break; 1547 off -= m->m_len; 1548 m = m->m_next; 1549 } 1550 np = ⊤ 1551 top = NULL; 1552 while (len > 0) { 1553 if (m == NULL) { 1554 KASSERT(len == M_COPYALL, 1555 ("%s: length > size of mbuf chain", __func__)); 1556 break; 1557 } 1558 /* 1559 * Because we are sharing any cluster attachment below, 1560 * be sure to get an mbuf that does not have a cluster 1561 * associated with it. 1562 */ 1563 if (copyhdr) 1564 n = m_gethdr(wait, m->m_type); 1565 else 1566 n = m_get(wait, m->m_type); 1567 *np = n; 1568 if (n == NULL) 1569 goto nospace; 1570 if (copyhdr) { 1571 if (!m_dup_pkthdr(n, m, wait)) 1572 goto nospace; 1573 if (len == M_COPYALL) 1574 n->m_pkthdr.len -= off0; 1575 else 1576 n->m_pkthdr.len = len; 1577 copyhdr = 0; 1578 } 1579 n->m_len = min(len, m->m_len - off); 1580 if (m->m_flags & M_EXT) { 1581 KKASSERT((n->m_flags & M_EXT) == 0); 1582 n->m_data = m->m_data + off; 1583 m->m_ext.ext_ref(m->m_ext.ext_arg); 1584 n->m_ext = m->m_ext; 1585 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1586 } else { 1587 bcopy(mtod(m, caddr_t) + off, mtod(n, caddr_t), 1588 n->m_len); 1589 } 1590 if (len != M_COPYALL) 1591 len -= n->m_len; 1592 off = 0; 1593 m = m->m_next; 1594 np = &n->m_next; 1595 } 1596 if (top == NULL) 1597 ++mbstat[mycpu->gd_cpuid].m_mcfail; 1598 return (top); 1599 nospace: 1600 m_freem(top); 1601 ++mbstat[mycpu->gd_cpuid].m_mcfail; 1602 return (NULL); 1603 } 1604 1605 /* 1606 * Copy an entire packet, including header (which must be present). 1607 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 1608 * Note that the copy is read-only, because clusters are not copied, 1609 * only their reference counts are incremented. 1610 * Preserve alignment of the first mbuf so if the creator has left 1611 * some room at the beginning (e.g. for inserting protocol headers) 1612 * the copies also have the room available. 1613 */ 1614 struct mbuf * 1615 m_copypacket(struct mbuf *m, int how) 1616 { 1617 struct mbuf *top, *n, *o; 1618 1619 n = m_gethdr(how, m->m_type); 1620 top = n; 1621 if (!n) 1622 goto nospace; 1623 1624 if (!m_dup_pkthdr(n, m, how)) 1625 goto nospace; 1626 n->m_len = m->m_len; 1627 if (m->m_flags & M_EXT) { 1628 KKASSERT((n->m_flags & M_EXT) == 0); 1629 n->m_data = m->m_data; 1630 m->m_ext.ext_ref(m->m_ext.ext_arg); 1631 n->m_ext = m->m_ext; 1632 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1633 } else { 1634 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat); 1635 bcopy(mtod(m, void *), mtod(n, void *), n->m_len); 1636 } 1637 1638 m = m->m_next; 1639 while (m) { 1640 o = m_get(how, m->m_type); 1641 if (!o) 1642 goto nospace; 1643 1644 n->m_next = o; 1645 n = n->m_next; 1646 1647 n->m_len = m->m_len; 1648 if (m->m_flags & M_EXT) { 1649 KKASSERT((n->m_flags & M_EXT) == 0); 1650 n->m_data = m->m_data; 1651 m->m_ext.ext_ref(m->m_ext.ext_arg); 1652 n->m_ext = m->m_ext; 1653 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1654 } else { 1655 bcopy(mtod(m, void *), mtod(n, void *), n->m_len); 1656 } 1657 1658 m = m->m_next; 1659 } 1660 return top; 1661 nospace: 1662 m_freem(top); 1663 ++mbstat[mycpu->gd_cpuid].m_mcfail; 1664 return (NULL); 1665 } 1666 1667 /* 1668 * Copy data from an mbuf chain starting "off" bytes from the beginning, 1669 * continuing for "len" bytes, into the indicated buffer. 1670 */ 1671 void 1672 m_copydata(const struct mbuf *m, int off, int len, void *_cp) 1673 { 1674 caddr_t cp = _cp; 1675 unsigned count; 1676 1677 KASSERT(off >= 0, ("%s: negative off %d", __func__, off)); 1678 KASSERT(len >= 0, ("%s: negative len %d", __func__, len)); 1679 while (off > 0) { 1680 KASSERT(m != NULL, 1681 ("%s: offset > size of mbuf chain", __func__)); 1682 if (off < m->m_len) 1683 break; 1684 off -= m->m_len; 1685 m = m->m_next; 1686 } 1687 while (len > 0) { 1688 KASSERT(m != NULL, 1689 ("%s: length > size of mbuf chain", __func__)); 1690 count = min(m->m_len - off, len); 1691 bcopy(mtod(m, caddr_t) + off, cp, count); 1692 len -= count; 1693 cp += count; 1694 off = 0; 1695 m = m->m_next; 1696 } 1697 } 1698 1699 /* 1700 * Copy a packet header mbuf chain into a completely new chain, including 1701 * copying any mbuf clusters. Use this instead of m_copypacket() when 1702 * you need a writable copy of an mbuf chain. 1703 */ 1704 struct mbuf * 1705 m_dup(struct mbuf *m, int how) 1706 { 1707 struct mbuf **p, *n, *top = NULL; 1708 int remain, moff, nsize, chunk; 1709 1710 /* Sanity check */ 1711 if (m == NULL) 1712 return (NULL); 1713 1714 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __func__)); 1715 1716 /* While there's more data, get a new mbuf, tack it on, and fill it */ 1717 remain = m->m_pkthdr.len; 1718 moff = 0; 1719 p = ⊤ 1720 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 1721 /* Get the next new mbuf */ 1722 n = m_getl(remain, how, m->m_type, top == NULL ? M_PKTHDR : 0, 1723 &nsize); 1724 if (n == NULL) 1725 goto nospace; 1726 if (top == NULL) 1727 if (!m_dup_pkthdr(n, m, how)) 1728 goto nospace0; 1729 1730 /* Link it into the new chain */ 1731 *p = n; 1732 p = &n->m_next; 1733 1734 /* Copy data from original mbuf(s) into new mbuf */ 1735 n->m_len = 0; 1736 while (n->m_len < nsize && m != NULL) { 1737 chunk = min(nsize - n->m_len, m->m_len - moff); 1738 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 1739 moff += chunk; 1740 n->m_len += chunk; 1741 remain -= chunk; 1742 if (moff == m->m_len) { 1743 m = m->m_next; 1744 moff = 0; 1745 } 1746 } 1747 1748 /* Check correct total mbuf length */ 1749 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 1750 ("%s: bogus m_pkthdr.len", __func__)); 1751 } 1752 return (top); 1753 1754 nospace: 1755 m_freem(top); 1756 nospace0: 1757 ++mbstat[mycpu->gd_cpuid].m_mcfail; 1758 return (NULL); 1759 } 1760 1761 /* 1762 * Copy the non-packet mbuf data chain into a new set of mbufs, including 1763 * copying any mbuf clusters. This is typically used to realign a data 1764 * chain by nfs_realign(). 1765 * 1766 * The original chain is left intact. how should be M_WAITOK or M_NOWAIT 1767 * and NULL can be returned if M_NOWAIT is passed. 1768 * 1769 * Be careful to use cluster mbufs, a large mbuf chain converted to non 1770 * cluster mbufs can exhaust our supply of mbufs. 1771 */ 1772 struct mbuf * 1773 m_dup_data(struct mbuf *m, int how) 1774 { 1775 struct mbuf **p, *n, *top = NULL; 1776 int mlen, moff, chunk, gsize, nsize; 1777 1778 /* Degenerate case */ 1779 if (m == NULL) 1780 return (NULL); 1781 1782 /* 1783 * Optimize the mbuf allocation but do not get too carried away. 1784 */ 1785 if (m->m_next || m->m_len > MLEN) 1786 if (m->m_flags & M_EXT && m->m_ext.ext_size == MCLBYTES) 1787 gsize = MCLBYTES; 1788 else 1789 gsize = MJUMPAGESIZE; 1790 else 1791 gsize = MLEN; 1792 1793 /* Chain control */ 1794 p = ⊤ 1795 n = NULL; 1796 nsize = 0; 1797 1798 /* 1799 * Scan the mbuf chain until nothing is left, the new mbuf chain 1800 * will be allocated on the fly as needed. 1801 */ 1802 while (m) { 1803 mlen = m->m_len; 1804 moff = 0; 1805 1806 while (mlen) { 1807 KKASSERT(m->m_type == MT_DATA); 1808 if (n == NULL) { 1809 n = m_getl(gsize, how, MT_DATA, 0, &nsize); 1810 if (n == NULL) 1811 goto nospace; 1812 n->m_len = 0; 1813 *p = n; 1814 p = &n->m_next; 1815 } 1816 chunk = imin(mlen, nsize); 1817 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 1818 mlen -= chunk; 1819 moff += chunk; 1820 n->m_len += chunk; 1821 nsize -= chunk; 1822 if (nsize == 0) 1823 n = NULL; 1824 } 1825 m = m->m_next; 1826 } 1827 *p = NULL; 1828 return(top); 1829 nospace: 1830 *p = NULL; 1831 m_freem(top); 1832 ++mbstat[mycpu->gd_cpuid].m_mcfail; 1833 return (NULL); 1834 } 1835 1836 /* 1837 * Concatenate mbuf chain n to m. 1838 * Both chains must be of the same type (e.g. MT_DATA). 1839 * Any m_pkthdr is not updated. 1840 */ 1841 void 1842 m_cat(struct mbuf *m, struct mbuf *n) 1843 { 1844 m = m_last(m); 1845 while (n) { 1846 if (m->m_flags & M_EXT || 1847 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 1848 /* just join the two chains */ 1849 m->m_next = n; 1850 return; 1851 } 1852 /* splat the data from one into the other */ 1853 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, n->m_len); 1854 m->m_len += n->m_len; 1855 n = m_free(n); 1856 } 1857 } 1858 1859 void 1860 m_adj(struct mbuf *mp, int req_len) 1861 { 1862 struct mbuf *m; 1863 int count, len = req_len; 1864 1865 if ((m = mp) == NULL) 1866 return; 1867 if (len >= 0) { 1868 /* 1869 * Trim from head. 1870 */ 1871 while (m != NULL && len > 0) { 1872 if (m->m_len <= len) { 1873 len -= m->m_len; 1874 m->m_len = 0; 1875 m = m->m_next; 1876 } else { 1877 m->m_len -= len; 1878 m->m_data += len; 1879 len = 0; 1880 } 1881 } 1882 m = mp; 1883 if (mp->m_flags & M_PKTHDR) 1884 m->m_pkthdr.len -= (req_len - len); 1885 } else { 1886 /* 1887 * Trim from tail. Scan the mbuf chain, 1888 * calculating its length and finding the last mbuf. 1889 * If the adjustment only affects this mbuf, then just 1890 * adjust and return. Otherwise, rescan and truncate 1891 * after the remaining size. 1892 */ 1893 len = -len; 1894 count = 0; 1895 for (;;) { 1896 count += m->m_len; 1897 if (m->m_next == NULL) 1898 break; 1899 m = m->m_next; 1900 } 1901 if (m->m_len >= len) { 1902 m->m_len -= len; 1903 if (mp->m_flags & M_PKTHDR) 1904 mp->m_pkthdr.len -= len; 1905 return; 1906 } 1907 count -= len; 1908 if (count < 0) 1909 count = 0; 1910 /* 1911 * Correct length for chain is "count". 1912 * Find the mbuf with last data, adjust its length, 1913 * and toss data from remaining mbufs on chain. 1914 */ 1915 m = mp; 1916 if (m->m_flags & M_PKTHDR) 1917 m->m_pkthdr.len = count; 1918 for (; m; m = m->m_next) { 1919 if (m->m_len >= count) { 1920 m->m_len = count; 1921 break; 1922 } 1923 count -= m->m_len; 1924 } 1925 while ((m = m->m_next) != NULL) 1926 m->m_len = 0; 1927 } 1928 } 1929 1930 /* 1931 * Set the m_data pointer of a newly-allocated mbuf 1932 * to place an object of the specified size at the 1933 * end of the mbuf, longword aligned. 1934 */ 1935 void 1936 m_align(struct mbuf *m, int len) 1937 { 1938 int adjust; 1939 1940 if (m->m_flags & M_EXT) 1941 adjust = m->m_ext.ext_size - len; 1942 else if (m->m_flags & M_PKTHDR) 1943 adjust = MHLEN - len; 1944 else 1945 adjust = MLEN - len; 1946 m->m_data += rounddown2(adjust, sizeof(long)); 1947 } 1948 1949 /* 1950 * Create a writable copy of the mbuf chain. While doing this 1951 * we compact the chain with a goal of producing a chain with 1952 * at most two mbufs. The second mbuf in this chain is likely 1953 * to be a cluster. The primary purpose of this work is to create 1954 * a writable packet for encryption, compression, etc. The 1955 * secondary goal is to linearize the data so the data can be 1956 * passed to crypto hardware in the most efficient manner possible. 1957 */ 1958 struct mbuf * 1959 m_unshare(struct mbuf *m0, int how) 1960 { 1961 struct mbuf *m, *mprev; 1962 struct mbuf *n, *mfirst, *mlast; 1963 int len, off; 1964 1965 mprev = NULL; 1966 for (m = m0; m != NULL; m = mprev->m_next) { 1967 /* 1968 * Regular mbufs are ignored unless there's a cluster 1969 * in front of it that we can use to coalesce. We do 1970 * the latter mainly so later clusters can be coalesced 1971 * also w/o having to handle them specially (i.e. convert 1972 * mbuf+cluster -> cluster). This optimization is heavily 1973 * influenced by the assumption that we're running over 1974 * Ethernet where MCLBYTES is large enough that the max 1975 * packet size will permit lots of coalescing into a 1976 * single cluster. This in turn permits efficient 1977 * crypto operations, especially when using hardware. 1978 */ 1979 if ((m->m_flags & M_EXT) == 0) { 1980 if (mprev != NULL && (mprev->m_flags & M_EXT) && 1981 m->m_len <= M_TRAILINGSPACE(mprev)) { 1982 /* XXX: this ignores mbuf types */ 1983 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 1984 mtod(m, caddr_t), m->m_len); 1985 mprev->m_len += m->m_len; 1986 /* unlink from chain and reclaim */ 1987 mprev->m_next = m->m_next; 1988 m_free(m); 1989 } else { 1990 mprev = m; 1991 } 1992 continue; 1993 } 1994 /* 1995 * Writable mbufs are left alone (for now). 1996 */ 1997 if (M_WRITABLE(m)) { 1998 mprev = m; 1999 continue; 2000 } 2001 2002 /* 2003 * Not writable, replace with a copy or coalesce with 2004 * the previous mbuf if possible (since we have to copy 2005 * it anyway, we try to reduce the number of mbufs and 2006 * clusters so that future work is easier). 2007 */ 2008 KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags)); 2009 /* NB: we only coalesce into a cluster or larger */ 2010 if (mprev != NULL && (mprev->m_flags & M_EXT) && 2011 m->m_len <= M_TRAILINGSPACE(mprev)) { 2012 /* XXX: this ignores mbuf types */ 2013 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 2014 mtod(m, caddr_t), m->m_len); 2015 mprev->m_len += m->m_len; 2016 /* unlink from chain and reclaim */ 2017 mprev->m_next = m->m_next; 2018 m_free(m); 2019 continue; 2020 } 2021 2022 /* 2023 * Allocate new space to hold the copy... 2024 */ 2025 /* XXX why can M_PKTHDR be set past the first mbuf? */ 2026 if (mprev == NULL && (m->m_flags & M_PKTHDR)) { 2027 /* 2028 * NB: if a packet header is present we must 2029 * allocate the mbuf separately from any cluster 2030 * because M_MOVE_PKTHDR will smash the data 2031 * pointer and drop the M_EXT marker. 2032 */ 2033 MGETHDR(n, how, m->m_type); 2034 if (n == NULL) { 2035 m_freem(m0); 2036 return (NULL); 2037 } 2038 M_MOVE_PKTHDR(n, m); 2039 MCLGET(n, how); 2040 if ((n->m_flags & M_EXT) == 0) { 2041 m_free(n); 2042 m_freem(m0); 2043 return (NULL); 2044 } 2045 } else { 2046 n = m_getcl(how, m->m_type, m->m_flags); 2047 if (n == NULL) { 2048 m_freem(m0); 2049 return (NULL); 2050 } 2051 } 2052 /* 2053 * ... and copy the data. We deal with jumbo mbufs 2054 * (i.e. m_len > MCLBYTES) by splitting them into 2055 * clusters. We could just malloc a buffer and make 2056 * it external but too many device drivers don't know 2057 * how to break up the non-contiguous memory when 2058 * doing DMA. 2059 */ 2060 len = m->m_len; 2061 off = 0; 2062 mfirst = n; 2063 mlast = NULL; 2064 for (;;) { 2065 int cc = min(len, MCLBYTES); 2066 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); 2067 n->m_len = cc; 2068 if (mlast != NULL) 2069 mlast->m_next = n; 2070 mlast = n; 2071 2072 len -= cc; 2073 if (len <= 0) 2074 break; 2075 off += cc; 2076 2077 n = m_getcl(how, m->m_type, m->m_flags); 2078 if (n == NULL) { 2079 m_freem(mfirst); 2080 m_freem(m0); 2081 return (NULL); 2082 } 2083 } 2084 n->m_next = m->m_next; 2085 if (mprev == NULL) 2086 m0 = mfirst; /* new head of chain */ 2087 else 2088 mprev->m_next = mfirst; /* replace old mbuf */ 2089 m_free(m); /* release old mbuf */ 2090 mprev = mfirst; 2091 } 2092 return (m0); 2093 } 2094 2095 /* 2096 * Rearrange an mbuf chain so that len bytes are contiguous 2097 * and in the data area of an mbuf (so that mtod will work for a structure 2098 * of size len). Returns the resulting mbuf chain on success, frees it and 2099 * returns null on failure. If there is room, it will add up to 2100 * max_protohdr-len extra bytes to the contiguous region in an attempt to 2101 * avoid being called next time. 2102 */ 2103 struct mbuf * 2104 m_pullup(struct mbuf *n, int len) 2105 { 2106 struct mbuf *m; 2107 int count; 2108 int space; 2109 2110 /* 2111 * If first mbuf has no cluster, and has room for len bytes 2112 * without shifting current data, pullup into it, 2113 * otherwise allocate a new mbuf to prepend to the chain. 2114 */ 2115 if (!(n->m_flags & M_EXT) && 2116 n->m_data + len < &n->m_dat[MLEN] && 2117 n->m_next) { 2118 if (n->m_len >= len) 2119 return (n); 2120 m = n; 2121 n = n->m_next; 2122 len -= m->m_len; 2123 } else { 2124 if (len > MHLEN) 2125 goto bad; 2126 if (n->m_flags & M_PKTHDR) 2127 m = m_gethdr(M_NOWAIT, n->m_type); 2128 else 2129 m = m_get(M_NOWAIT, n->m_type); 2130 if (m == NULL) 2131 goto bad; 2132 m->m_len = 0; 2133 if (n->m_flags & M_PKTHDR) 2134 M_MOVE_PKTHDR(m, n); 2135 } 2136 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 2137 do { 2138 count = min(min(max(len, max_protohdr), space), n->m_len); 2139 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, count); 2140 len -= count; 2141 m->m_len += count; 2142 n->m_len -= count; 2143 space -= count; 2144 if (n->m_len) 2145 n->m_data += count; 2146 else 2147 n = m_free(n); 2148 } while (len > 0 && n); 2149 if (len > 0) { 2150 m_free(m); 2151 goto bad; 2152 } 2153 m->m_next = n; 2154 return (m); 2155 bad: 2156 m_freem(n); 2157 ++mbstat[mycpu->gd_cpuid].m_mcfail; 2158 return (NULL); 2159 } 2160 2161 /* 2162 * Partition an mbuf chain in two pieces, returning the tail -- 2163 * all but the first len0 bytes. In case of failure, it returns NULL and 2164 * attempts to restore the chain to its original state. 2165 * 2166 * Note that the resulting mbufs might be read-only, because the new 2167 * mbuf can end up sharing an mbuf cluster with the original mbuf if 2168 * the "breaking point" happens to lie within a cluster mbuf. Use the 2169 * M_WRITABLE() macro to check for this case. 2170 */ 2171 struct mbuf * 2172 m_split(struct mbuf *m0, int len0, int wait) 2173 { 2174 struct mbuf *m, *n; 2175 unsigned len = len0, remain; 2176 2177 for (m = m0; m && len > m->m_len; m = m->m_next) 2178 len -= m->m_len; 2179 if (m == NULL) 2180 return (NULL); 2181 remain = m->m_len - len; 2182 if (m0->m_flags & M_PKTHDR) { 2183 n = m_gethdr(wait, m0->m_type); 2184 if (n == NULL) 2185 return (NULL); 2186 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 2187 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 2188 m0->m_pkthdr.len = len0; 2189 if (m->m_flags & M_EXT) 2190 goto extpacket; 2191 if (remain > MHLEN) { 2192 /* m can't be the lead packet */ 2193 MH_ALIGN(n, 0); 2194 n->m_next = m_split(m, len, wait); 2195 if (n->m_next == NULL) { 2196 m_free(n); 2197 return (NULL); 2198 } else { 2199 n->m_len = 0; 2200 return (n); 2201 } 2202 } else 2203 MH_ALIGN(n, remain); 2204 } else if (remain == 0) { 2205 n = m->m_next; 2206 m->m_next = NULL; 2207 return (n); 2208 } else { 2209 n = m_get(wait, m->m_type); 2210 if (n == NULL) 2211 return (NULL); 2212 M_ALIGN(n, remain); 2213 } 2214 extpacket: 2215 if (m->m_flags & M_EXT) { 2216 KKASSERT((n->m_flags & M_EXT) == 0); 2217 n->m_data = m->m_data + len; 2218 m->m_ext.ext_ref(m->m_ext.ext_arg); 2219 n->m_ext = m->m_ext; 2220 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 2221 } else { 2222 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 2223 } 2224 n->m_len = remain; 2225 m->m_len = len; 2226 n->m_next = m->m_next; 2227 m->m_next = NULL; 2228 return (n); 2229 } 2230 2231 /* 2232 * Routine to copy from device local memory into mbufs. 2233 * Note: "offset" is ill-defined and always called as 0, so ignore it. 2234 */ 2235 struct mbuf * 2236 m_devget(void *_buf, int len, int offset __unused, struct ifnet *ifp) 2237 { 2238 struct mbuf *m, *mfirst = NULL, **mtail; 2239 caddr_t buf = _buf; 2240 int nsize, flags; 2241 2242 KKASSERT(offset == 0); 2243 mtail = &mfirst; 2244 flags = M_PKTHDR; 2245 2246 while (len > 0) { 2247 m = m_getl(len, M_NOWAIT, MT_DATA, flags, &nsize); 2248 if (m == NULL) { 2249 m_freem(mfirst); 2250 return (NULL); 2251 } 2252 m->m_len = min(len, nsize); 2253 2254 if (flags & M_PKTHDR) { 2255 if (len + max_linkhdr <= nsize) 2256 m->m_data += max_linkhdr; 2257 m->m_pkthdr.rcvif = ifp; 2258 m->m_pkthdr.len = len; 2259 flags = 0; 2260 } 2261 2262 bcopy(buf, m->m_data, m->m_len); 2263 buf += m->m_len; 2264 len -= m->m_len; 2265 *mtail = m; 2266 mtail = &m->m_next; 2267 } 2268 2269 return (mfirst); 2270 } 2271 2272 /* 2273 * Routine to pad mbuf to the specified length 'padto'. 2274 */ 2275 int 2276 m_devpad(struct mbuf *m, int padto) 2277 { 2278 struct mbuf *last = NULL; 2279 int padlen; 2280 2281 if (padto <= m->m_pkthdr.len) 2282 return 0; 2283 2284 padlen = padto - m->m_pkthdr.len; 2285 2286 /* if there's only the packet-header and we can pad there, use it. */ 2287 if (m->m_pkthdr.len == m->m_len && M_TRAILINGSPACE(m) >= padlen) { 2288 last = m; 2289 } else { 2290 /* 2291 * Walk packet chain to find last mbuf. We will either 2292 * pad there, or append a new mbuf and pad it 2293 */ 2294 for (last = m; last->m_next != NULL; last = last->m_next) 2295 ; /* EMPTY */ 2296 2297 /* `last' now points to last in chain. */ 2298 if (M_TRAILINGSPACE(last) < padlen) { 2299 struct mbuf *n; 2300 2301 /* Allocate new empty mbuf, pad it. Compact later. */ 2302 MGET(n, M_NOWAIT, MT_DATA); 2303 if (n == NULL) 2304 return ENOBUFS; 2305 n->m_len = 0; 2306 last->m_next = n; 2307 last = n; 2308 } 2309 } 2310 KKASSERT(M_TRAILINGSPACE(last) >= padlen); 2311 KKASSERT(M_WRITABLE(last)); 2312 2313 /* Now zero the pad area */ 2314 bzero(mtod(last, caddr_t) + last->m_len, padlen); 2315 last->m_len += padlen; 2316 m->m_pkthdr.len += padlen; 2317 return 0; 2318 } 2319 2320 /* 2321 * Copy data from a buffer back into the indicated mbuf chain, 2322 * starting "off" bytes from the beginning, extending the mbuf 2323 * chain if necessary. 2324 * 2325 * Note that m0->m_len may be 0 (e.g., a newly allocated mbuf). 2326 */ 2327 static __inline int 2328 _m_copyback2(struct mbuf *m0, int off, int len, const void *_cp, int how, 2329 boolean_t allow_alloc) 2330 { 2331 struct mbuf *m = m0, *n; 2332 c_caddr_t cp = _cp; 2333 int mlen, tlen, nsize, totlen = 0, error = ENOBUFS; 2334 2335 KASSERT(off >= 0, ("%s: negative off %d", __func__, off)); 2336 KASSERT(len >= 0, ("%s: negative len %d", __func__, len)); 2337 2338 if (m0 == NULL) 2339 return (0); 2340 2341 while (off > m->m_len) { 2342 if (m->m_next == NULL && (tlen = M_TRAILINGSPACE(m)) > 0) { 2343 /* Use the trailing space of the last mbuf. */ 2344 mlen = min(off - m->m_len, tlen); 2345 bzero(mtod(m, caddr_t) + m->m_len, mlen); 2346 m->m_len += mlen; 2347 } 2348 off -= m->m_len; 2349 totlen += m->m_len; 2350 if (m->m_next == NULL) { 2351 if (!allow_alloc) 2352 goto out; 2353 n = m_getl(off + len, how, m->m_type, 0, &nsize); 2354 if (n == NULL) 2355 goto out; 2356 n->m_len = min(nsize, off + len); 2357 bzero(mtod(n, void *), n->m_len); 2358 m->m_next = n; 2359 } 2360 m = m->m_next; 2361 } 2362 while (len > 0) { 2363 if (m->m_next == NULL && 2364 m->m_len < off + len && 2365 (tlen = M_TRAILINGSPACE(m)) > 0) { 2366 /* Use the trailing space of the last mbuf. */ 2367 m->m_len += min(off + len - m->m_len, tlen); 2368 } 2369 mlen = min(m->m_len - off, len); 2370 bcopy(cp, mtod(m, caddr_t) + off, mlen); 2371 off = 0; 2372 cp += mlen; 2373 len -= mlen; 2374 totlen += mlen + off; 2375 if (len == 0) 2376 break; 2377 if (m->m_next == NULL) { 2378 if (!allow_alloc) 2379 goto out; 2380 n = m_getl(len, how, m->m_type, 0, &nsize); 2381 if (n == NULL) 2382 goto out; 2383 n->m_len = min(nsize, len); 2384 m->m_next = n; 2385 } 2386 m = m->m_next; 2387 } 2388 error = 0; 2389 2390 out: 2391 if ((m0->m_flags & M_PKTHDR) && (m0->m_pkthdr.len < totlen)) 2392 m0->m_pkthdr.len = totlen; 2393 2394 return (error); 2395 } 2396 2397 int 2398 m_copyback2(struct mbuf *m0, int off, int len, const void *cp, int how) 2399 { 2400 return _m_copyback2(m0, off, len, cp, how, TRUE); 2401 } 2402 2403 /* 2404 * Similar to m_copyback2() but forbid mbuf expansion. The caller must 2405 * ensure that the mbuf (chain) is big enough; otherwise, the copyback 2406 * would fail with diagnostics printed to the console. 2407 */ 2408 void 2409 m_copyback(struct mbuf *m0, int off, int len, const void *cp) 2410 { 2411 if (_m_copyback2(m0, off, len, cp, 0, FALSE) != 0) { 2412 kprintf("%s: unexpected mbuf expansion required, " 2413 "code path needs to be fixed:\n", __func__); 2414 print_backtrace(8); 2415 } 2416 } 2417 2418 /* 2419 * Append the specified data to the indicated mbuf chain, 2420 * Extend the mbuf chain if the new data does not fit in 2421 * existing space. 2422 * 2423 * Return 1 if able to complete the job; otherwise 0. 2424 */ 2425 int 2426 m_append(struct mbuf *m0, int len, const void *_cp) 2427 { 2428 struct mbuf *m, *n; 2429 c_caddr_t cp = _cp; 2430 int remainder, space; 2431 2432 for (m = m0; m->m_next != NULL; m = m->m_next) 2433 ; 2434 remainder = len; 2435 space = M_TRAILINGSPACE(m); 2436 if (space > 0) { 2437 /* 2438 * Copy into available space. 2439 */ 2440 if (space > remainder) 2441 space = remainder; 2442 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 2443 m->m_len += space; 2444 cp += space, remainder -= space; 2445 } 2446 while (remainder > 0) { 2447 /* 2448 * Allocate a new mbuf; could check space 2449 * and allocate a cluster instead. 2450 */ 2451 n = m_get(M_NOWAIT, m->m_type); 2452 if (n == NULL) 2453 break; 2454 n->m_len = min(MLEN, remainder); 2455 bcopy(cp, mtod(n, caddr_t), n->m_len); 2456 cp += n->m_len, remainder -= n->m_len; 2457 m->m_next = n; 2458 m = n; 2459 } 2460 if (m0->m_flags & M_PKTHDR) 2461 m0->m_pkthdr.len += len - remainder; 2462 return (remainder == 0); 2463 } 2464 2465 /* 2466 * Apply function f to the data in an mbuf chain starting "off" bytes from 2467 * the beginning, continuing for "len" bytes. 2468 */ 2469 int 2470 m_apply(struct mbuf *m, int off, int len, 2471 int (*f)(void *, void *, u_int), void *arg) 2472 { 2473 u_int count; 2474 int rval; 2475 2476 KASSERT(off >= 0, ("%s: negative off %d", __func__, off)); 2477 KASSERT(len >= 0, ("%s: negative len %d", __func__, len)); 2478 while (off > 0) { 2479 KASSERT(m != NULL, 2480 ("%s: offset > size of mbuf chain", __func__)); 2481 if (off < m->m_len) 2482 break; 2483 off -= m->m_len; 2484 m = m->m_next; 2485 } 2486 while (len > 0) { 2487 KASSERT(m != NULL, 2488 ("%s: offset > size of mbuf chain", __func__)); 2489 count = min(m->m_len - off, len); 2490 rval = (*f)(arg, mtod(m, caddr_t) + off, count); 2491 if (rval) 2492 return (rval); 2493 len -= count; 2494 off = 0; 2495 m = m->m_next; 2496 } 2497 return (0); 2498 } 2499 2500 /* 2501 * Return a pointer to mbuf/offset of location in mbuf chain. 2502 */ 2503 struct mbuf * 2504 m_getptr(struct mbuf *m, int loc, int *off) 2505 { 2506 while (loc >= 0) { 2507 /* Normal end of search. */ 2508 if (m->m_len > loc) { 2509 *off = loc; 2510 return (m); 2511 } else { 2512 loc -= m->m_len; 2513 if (m->m_next == NULL) { 2514 if (loc == 0) { 2515 /* Point at the end of valid data. */ 2516 *off = m->m_len; 2517 return (m); 2518 } 2519 return (NULL); 2520 } 2521 m = m->m_next; 2522 } 2523 } 2524 return (NULL); 2525 } 2526 2527 void 2528 m_print(const struct mbuf *m) 2529 { 2530 int len; 2531 const struct mbuf *m2; 2532 char *hexstr; 2533 2534 len = m->m_pkthdr.len; 2535 m2 = m; 2536 hexstr = kmalloc(HEX_NCPYLEN(len), M_TEMP, M_ZERO | M_WAITOK); 2537 while (len) { 2538 kprintf("%p %s\n", m2, hexncpy(m2->m_data, m2->m_len, hexstr, 2539 HEX_NCPYLEN(m2->m_len), "-")); 2540 len -= m2->m_len; 2541 m2 = m2->m_next; 2542 } 2543 kfree(hexstr, M_TEMP); 2544 return; 2545 } 2546 2547 /* 2548 * "Move" mbuf pkthdr from "from" to "to". 2549 * "from" must have M_PKTHDR set, and "to" must be empty. 2550 */ 2551 void 2552 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 2553 { 2554 KASSERT((to->m_flags & M_PKTHDR), ("%s: not packet header", __func__)); 2555 2556 to->m_flags |= from->m_flags & M_COPYFLAGS; 2557 to->m_pkthdr = from->m_pkthdr; /* especially tags */ 2558 SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ 2559 } 2560 2561 /* 2562 * Duplicate "from"'s mbuf pkthdr in "to". 2563 * "from" must have M_PKTHDR set, and "to" must be empty. 2564 * In particular, this does a deep copy of the packet tags. 2565 */ 2566 int 2567 m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how) 2568 { 2569 KASSERT((to->m_flags & M_PKTHDR), ("%s: not packet header", __func__)); 2570 2571 to->m_flags = (from->m_flags & M_COPYFLAGS) | 2572 (to->m_flags & ~M_COPYFLAGS); 2573 to->m_pkthdr = from->m_pkthdr; 2574 SLIST_INIT(&to->m_pkthdr.tags); 2575 return (m_tag_copy_chain(to, from, how)); 2576 } 2577 2578 /* 2579 * Defragment a mbuf chain, returning the shortest possible 2580 * chain of mbufs and clusters. If allocation fails and 2581 * this cannot be completed, NULL will be returned, but 2582 * the passed in chain will be unchanged. Upon success, 2583 * the original chain will be freed, and the new chain 2584 * will be returned. 2585 * 2586 * If a non-packet header is passed in, the original 2587 * mbuf (chain?) will be returned unharmed. 2588 * 2589 * m_defrag_nofree doesn't free the passed in mbuf. 2590 */ 2591 struct mbuf * 2592 m_defrag(struct mbuf *m0, int how) 2593 { 2594 struct mbuf *m_new; 2595 2596 if ((m_new = m_defrag_nofree(m0, how)) == NULL) 2597 return (NULL); 2598 if (m_new != m0) 2599 m_freem(m0); 2600 return (m_new); 2601 } 2602 2603 struct mbuf * 2604 m_defrag_nofree(struct mbuf *m0, int how) 2605 { 2606 struct mbuf *m_new = NULL, *m_final = NULL; 2607 int progress = 0, length, nsize; 2608 2609 if (!(m0->m_flags & M_PKTHDR)) 2610 return (m0); 2611 2612 #ifdef MBUF_STRESS_TEST 2613 if (m_defragrandomfailures) { 2614 int temp = karc4random() & 0xff; 2615 if (temp == 0xba) 2616 goto nospace; 2617 } 2618 #endif 2619 2620 m_final = m_getl(m0->m_pkthdr.len, how, MT_DATA, M_PKTHDR, &nsize); 2621 if (m_final == NULL) 2622 goto nospace; 2623 m_final->m_len = 0; /* in case m0->m_pkthdr.len is zero */ 2624 2625 if (m_dup_pkthdr(m_final, m0, how) == 0) 2626 goto nospace; 2627 2628 m_new = m_final; 2629 2630 while (progress < m0->m_pkthdr.len) { 2631 length = m0->m_pkthdr.len - progress; 2632 if (length > MCLBYTES) 2633 length = MCLBYTES; 2634 2635 if (m_new == NULL) { 2636 m_new = m_getl(length, how, MT_DATA, 0, &nsize); 2637 if (m_new == NULL) 2638 goto nospace; 2639 } 2640 2641 m_copydata(m0, progress, length, mtod(m_new, void *)); 2642 progress += length; 2643 m_new->m_len = length; 2644 if (m_new != m_final) 2645 m_cat(m_final, m_new); 2646 m_new = NULL; 2647 } 2648 if (m0->m_next == NULL) 2649 m_defraguseless++; 2650 m_defragpackets++; 2651 m_defragbytes += m_final->m_pkthdr.len; 2652 return (m_final); 2653 nospace: 2654 m_defragfailure++; 2655 if (m_new) 2656 m_free(m_new); 2657 m_freem(m_final); 2658 return (NULL); 2659 } 2660 2661 /* 2662 * Move data from uio into mbufs. 2663 */ 2664 struct mbuf * 2665 m_uiomove(struct uio *uio) 2666 { 2667 struct mbuf *m; /* current working mbuf */ 2668 struct mbuf *head = NULL; /* result mbuf chain */ 2669 struct mbuf **mp = &head; 2670 int flags = M_PKTHDR; 2671 int nsize; 2672 int error; 2673 int resid; 2674 2675 do { 2676 if (uio->uio_resid > INT_MAX) 2677 resid = INT_MAX; 2678 else 2679 resid = (int)uio->uio_resid; 2680 m = m_getl(resid, M_WAITOK, MT_DATA, flags, &nsize); 2681 if (flags) { 2682 m->m_pkthdr.len = 0; 2683 /* Leave room for protocol headers. */ 2684 if (resid < MHLEN) 2685 MH_ALIGN(m, resid); 2686 flags = 0; 2687 } 2688 m->m_len = imin(nsize, resid); 2689 error = uiomove(mtod(m, caddr_t), m->m_len, uio); 2690 if (error) { 2691 m_free(m); 2692 goto failed; 2693 } 2694 *mp = m; 2695 mp = &m->m_next; 2696 head->m_pkthdr.len += m->m_len; 2697 } while (uio->uio_resid > 0); 2698 2699 return (head); 2700 2701 failed: 2702 m_freem(head); 2703 return (NULL); 2704 } 2705 2706 struct mbuf * 2707 m_last(struct mbuf *m) 2708 { 2709 while (m->m_next) 2710 m = m->m_next; 2711 return (m); 2712 } 2713 2714 /* 2715 * Return the number of bytes in an mbuf chain. 2716 * If lastm is not NULL, also return the last mbuf. 2717 */ 2718 u_int 2719 m_lengthm(struct mbuf *m, struct mbuf **lastm) 2720 { 2721 u_int len = 0; 2722 struct mbuf *prev = m; 2723 2724 while (m) { 2725 len += m->m_len; 2726 prev = m; 2727 m = m->m_next; 2728 } 2729 if (lastm != NULL) 2730 *lastm = prev; 2731 return (len); 2732 } 2733 2734 /* 2735 * Like m_lengthm(), except also keep track of mbuf usage. 2736 */ 2737 u_int 2738 m_countm(struct mbuf *m, struct mbuf **lastm, u_int *pmbcnt) 2739 { 2740 u_int len = 0, mbcnt = 0; 2741 struct mbuf *prev = m; 2742 2743 while (m) { 2744 len += m->m_len; 2745 mbcnt += MSIZE; 2746 if (m->m_flags & M_EXT) 2747 mbcnt += m->m_ext.ext_size; 2748 prev = m; 2749 m = m->m_next; 2750 } 2751 if (lastm != NULL) 2752 *lastm = prev; 2753 *pmbcnt = mbcnt; 2754 return (len); 2755 } 2756