1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 5 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jeffrey M. Hsu. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 1982, 1986, 1988, 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 65 * $FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.51.2.24 2003/04/15 06:59:29 silby Exp $ 66 */ 67 68 #include "opt_param.h" 69 #include "opt_mbuf_stress_test.h" 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/file.h> 73 #include <sys/malloc.h> 74 #include <sys/mbuf.h> 75 #include <sys/kernel.h> 76 #include <sys/sysctl.h> 77 #include <sys/domain.h> 78 #include <sys/objcache.h> 79 #include <sys/tree.h> 80 #include <sys/protosw.h> 81 #include <sys/uio.h> 82 #include <sys/thread.h> 83 #include <sys/globaldata.h> 84 85 #include <sys/thread2.h> 86 #include <sys/spinlock2.h> 87 88 #include <machine/atomic.h> 89 #include <machine/limits.h> 90 91 #include <vm/vm.h> 92 #include <vm/vm_kern.h> 93 #include <vm/vm_extern.h> 94 95 #ifdef INVARIANTS 96 #include <machine/cpu.h> 97 #endif 98 99 /* 100 * mbuf cluster meta-data 101 */ 102 struct mbcluster { 103 int32_t mcl_refs; 104 void *mcl_data; 105 }; 106 107 /* 108 * mbuf tracking for debugging purposes 109 */ 110 #ifdef MBUF_DEBUG 111 112 static MALLOC_DEFINE(M_MTRACK, "mtrack", "mtrack"); 113 114 struct mbctrack; 115 RB_HEAD(mbuf_rb_tree, mbtrack); 116 RB_PROTOTYPE2(mbuf_rb_tree, mbtrack, rb_node, mbtrack_cmp, struct mbuf *); 117 118 struct mbtrack { 119 RB_ENTRY(mbtrack) rb_node; 120 int trackid; 121 struct mbuf *m; 122 }; 123 124 static int 125 mbtrack_cmp(struct mbtrack *mb1, struct mbtrack *mb2) 126 { 127 if (mb1->m < mb2->m) 128 return(-1); 129 if (mb1->m > mb2->m) 130 return(1); 131 return(0); 132 } 133 134 RB_GENERATE2(mbuf_rb_tree, mbtrack, rb_node, mbtrack_cmp, struct mbuf *, m); 135 136 struct mbuf_rb_tree mbuf_track_root; 137 static struct spinlock mbuf_track_spin = SPINLOCK_INITIALIZER(mbuf_track_spin, "mbuf_track_spin"); 138 139 static void 140 mbuftrack(struct mbuf *m) 141 { 142 struct mbtrack *mbt; 143 144 mbt = kmalloc(sizeof(*mbt), M_MTRACK, M_INTWAIT|M_ZERO); 145 spin_lock(&mbuf_track_spin); 146 mbt->m = m; 147 if (mbuf_rb_tree_RB_INSERT(&mbuf_track_root, mbt)) { 148 spin_unlock(&mbuf_track_spin); 149 panic("mbuftrack: mbuf %p already being tracked", m); 150 } 151 spin_unlock(&mbuf_track_spin); 152 } 153 154 static void 155 mbufuntrack(struct mbuf *m) 156 { 157 struct mbtrack *mbt; 158 159 spin_lock(&mbuf_track_spin); 160 mbt = mbuf_rb_tree_RB_LOOKUP(&mbuf_track_root, m); 161 if (mbt == NULL) { 162 spin_unlock(&mbuf_track_spin); 163 panic("mbufuntrack: mbuf %p was not tracked", m); 164 } else { 165 mbuf_rb_tree_RB_REMOVE(&mbuf_track_root, mbt); 166 spin_unlock(&mbuf_track_spin); 167 kfree(mbt, M_MTRACK); 168 } 169 } 170 171 void 172 mbuftrackid(struct mbuf *m, int trackid) 173 { 174 struct mbtrack *mbt; 175 struct mbuf *n; 176 177 spin_lock(&mbuf_track_spin); 178 while (m) { 179 n = m->m_nextpkt; 180 while (m) { 181 mbt = mbuf_rb_tree_RB_LOOKUP(&mbuf_track_root, m); 182 if (mbt == NULL) { 183 spin_unlock(&mbuf_track_spin); 184 panic("mbuftrackid: mbuf %p not tracked", m); 185 } 186 mbt->trackid = trackid; 187 m = m->m_next; 188 } 189 m = n; 190 } 191 spin_unlock(&mbuf_track_spin); 192 } 193 194 static int 195 mbuftrack_callback(struct mbtrack *mbt, void *arg) 196 { 197 struct sysctl_req *req = arg; 198 char buf[64]; 199 int error; 200 201 ksnprintf(buf, sizeof(buf), "mbuf %p track %d\n", mbt->m, mbt->trackid); 202 203 spin_unlock(&mbuf_track_spin); 204 error = SYSCTL_OUT(req, buf, strlen(buf)); 205 spin_lock(&mbuf_track_spin); 206 if (error) 207 return(-error); 208 return(0); 209 } 210 211 static int 212 mbuftrack_show(SYSCTL_HANDLER_ARGS) 213 { 214 int error; 215 216 spin_lock(&mbuf_track_spin); 217 error = mbuf_rb_tree_RB_SCAN(&mbuf_track_root, NULL, 218 mbuftrack_callback, req); 219 spin_unlock(&mbuf_track_spin); 220 return (-error); 221 } 222 SYSCTL_PROC(_kern_ipc, OID_AUTO, showmbufs, CTLFLAG_RD|CTLTYPE_STRING, 223 0, 0, mbuftrack_show, "A", "Show all in-use mbufs"); 224 225 #else 226 227 #define mbuftrack(m) 228 #define mbufuntrack(m) 229 230 #endif 231 232 static void mbinit(void *); 233 SYSINIT(mbuf, SI_BOOT2_MACHDEP, SI_ORDER_FIRST, mbinit, NULL); 234 235 struct mbtypes_stat { 236 u_long stats[MT_NTYPES]; 237 } __cachealign; 238 239 static struct mbtypes_stat mbtypes[SMP_MAXCPU]; 240 241 static struct mbstat mbstat[SMP_MAXCPU] __cachealign; 242 int max_linkhdr; 243 int max_protohdr; 244 int max_hdr; 245 int max_datalen; 246 int m_defragpackets; 247 int m_defragbytes; 248 int m_defraguseless; 249 int m_defragfailure; 250 #ifdef MBUF_STRESS_TEST 251 int m_defragrandomfailures; 252 #endif 253 254 struct objcache *mbuf_cache, *mbufphdr_cache; 255 struct objcache *mclmeta_cache, *mjclmeta_cache; 256 struct objcache *mbufcluster_cache, *mbufphdrcluster_cache; 257 struct objcache *mbufjcluster_cache, *mbufphdrjcluster_cache; 258 259 struct lock mbupdate_lk = LOCK_INITIALIZER("mbupdate", 0, LK_CANRECURSE); 260 261 int nmbclusters; 262 static int nmbjclusters; 263 int nmbufs; 264 265 static int mjclph_cachefrac; 266 static int mjcl_cachefrac; 267 static int mclph_cachefrac; 268 static int mcl_cachefrac; 269 270 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 271 &max_linkhdr, 0, "Max size of a link-level header"); 272 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 273 &max_protohdr, 0, "Max size of a protocol header"); 274 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, 275 "Max size of link+protocol headers"); 276 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 277 &max_datalen, 0, "Max data payload size without headers"); 278 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 279 &mbuf_wait, 0, "Time in ticks to sleep after failed mbuf allocations"); 280 static int do_mbstat(SYSCTL_HANDLER_ARGS); 281 282 SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat, CTLTYPE_STRUCT|CTLFLAG_RD, 283 0, 0, do_mbstat, "S,mbstat", "mbuf usage statistics"); 284 285 static int do_mbtypes(SYSCTL_HANDLER_ARGS); 286 287 SYSCTL_PROC(_kern_ipc, OID_AUTO, mbtypes, CTLTYPE_ULONG|CTLFLAG_RD, 288 0, 0, do_mbtypes, "LU", ""); 289 290 static int 291 do_mbstat(SYSCTL_HANDLER_ARGS) 292 { 293 struct mbstat mbstat_total; 294 struct mbstat *mbstat_totalp; 295 int i; 296 297 bzero(&mbstat_total, sizeof(mbstat_total)); 298 mbstat_totalp = &mbstat_total; 299 300 for (i = 0; i < ncpus; i++) 301 { 302 mbstat_total.m_mbufs += mbstat[i].m_mbufs; 303 mbstat_total.m_clusters += mbstat[i].m_clusters; 304 mbstat_total.m_jclusters += mbstat[i].m_jclusters; 305 mbstat_total.m_clfree += mbstat[i].m_clfree; 306 mbstat_total.m_drops += mbstat[i].m_drops; 307 mbstat_total.m_wait += mbstat[i].m_wait; 308 mbstat_total.m_drain += mbstat[i].m_drain; 309 mbstat_total.m_mcfail += mbstat[i].m_mcfail; 310 mbstat_total.m_mpfail += mbstat[i].m_mpfail; 311 312 } 313 /* 314 * The following fields are not cumulative fields so just 315 * get their values once. 316 */ 317 mbstat_total.m_msize = mbstat[0].m_msize; 318 mbstat_total.m_mclbytes = mbstat[0].m_mclbytes; 319 mbstat_total.m_minclsize = mbstat[0].m_minclsize; 320 mbstat_total.m_mlen = mbstat[0].m_mlen; 321 mbstat_total.m_mhlen = mbstat[0].m_mhlen; 322 323 return(sysctl_handle_opaque(oidp, mbstat_totalp, sizeof(mbstat_total), req)); 324 } 325 326 static int 327 do_mbtypes(SYSCTL_HANDLER_ARGS) 328 { 329 u_long totals[MT_NTYPES]; 330 int i, j; 331 332 for (i = 0; i < MT_NTYPES; i++) 333 totals[i] = 0; 334 335 for (i = 0; i < ncpus; i++) 336 { 337 for (j = 0; j < MT_NTYPES; j++) 338 totals[j] += mbtypes[i].stats[j]; 339 } 340 341 return(sysctl_handle_opaque(oidp, totals, sizeof(totals), req)); 342 } 343 344 /* 345 * The variables may be set as boot-time tunables or live. Setting these 346 * values too low can deadlock your network. Network interfaces may also 347 * adjust nmbclusters and/or nmbjclusters to account for preloading the 348 * hardware rings. 349 */ 350 static int sysctl_nmbclusters(SYSCTL_HANDLER_ARGS); 351 static int sysctl_nmbjclusters(SYSCTL_HANDLER_ARGS); 352 static int sysctl_nmbufs(SYSCTL_HANDLER_ARGS); 353 SYSCTL_PROC(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLTYPE_INT | CTLFLAG_RW, 354 0, 0, sysctl_nmbclusters, "I", 355 "Maximum number of mbuf clusters available"); 356 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjclusters, CTLTYPE_INT | CTLFLAG_RW, 357 0, 0, sysctl_nmbjclusters, "I", 358 "Maximum number of mbuf jclusters available"); 359 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT | CTLFLAG_RW, 360 0, 0, sysctl_nmbufs, "I", 361 "Maximum number of mbufs available"); 362 363 SYSCTL_INT(_kern_ipc, OID_AUTO, mjclph_cachefrac, CTLFLAG_RD, 364 &mjclph_cachefrac, 0, 365 "Fraction of cacheable mbuf jclusters w/ pkthdr"); 366 SYSCTL_INT(_kern_ipc, OID_AUTO, mjcl_cachefrac, CTLFLAG_RD, 367 &mjcl_cachefrac, 0, 368 "Fraction of cacheable mbuf jclusters"); 369 SYSCTL_INT(_kern_ipc, OID_AUTO, mclph_cachefrac, CTLFLAG_RD, 370 &mclph_cachefrac, 0, 371 "Fraction of cacheable mbuf clusters w/ pkthdr"); 372 SYSCTL_INT(_kern_ipc, OID_AUTO, mcl_cachefrac, CTLFLAG_RD, 373 &mcl_cachefrac, 0, "Fraction of cacheable mbuf clusters"); 374 375 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD, 376 &m_defragpackets, 0, "Number of defragment packets"); 377 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD, 378 &m_defragbytes, 0, "Number of defragment bytes"); 379 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD, 380 &m_defraguseless, 0, "Number of useless defragment mbuf chain operations"); 381 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD, 382 &m_defragfailure, 0, "Number of failed defragment mbuf chain operations"); 383 #ifdef MBUF_STRESS_TEST 384 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, 385 &m_defragrandomfailures, 0, ""); 386 #endif 387 388 static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 389 static MALLOC_DEFINE(M_MBUFCL, "mbufcl", "mbufcl"); 390 static MALLOC_DEFINE(M_MCLMETA, "mclmeta", "mclmeta"); 391 392 static void m_reclaim (void); 393 static void m_mclref(void *arg); 394 static void m_mclfree(void *arg); 395 static void m_mjclfree(void *arg); 396 397 static void mbupdatelimits(void); 398 399 /* 400 * NOTE: Default NMBUFS must take into account a possible DOS attack 401 * using fd passing on unix domain sockets. 402 */ 403 #ifndef NMBCLUSTERS 404 #define NMBCLUSTERS (512 + maxusers * 16) 405 #endif 406 #ifndef MJCLPH_CACHEFRAC 407 #define MJCLPH_CACHEFRAC 16 408 #endif 409 #ifndef MJCL_CACHEFRAC 410 #define MJCL_CACHEFRAC 4 411 #endif 412 #ifndef MCLPH_CACHEFRAC 413 #define MCLPH_CACHEFRAC 16 414 #endif 415 #ifndef MCL_CACHEFRAC 416 #define MCL_CACHEFRAC 4 417 #endif 418 #ifndef NMBJCLUSTERS 419 #define NMBJCLUSTERS (NMBCLUSTERS / 2) 420 #endif 421 #ifndef NMBUFS 422 #define NMBUFS (nmbclusters * 2 + maxfiles) 423 #endif 424 425 #define NMBCLUSTERS_MIN (NMBCLUSTERS / 2) 426 #define NMBJCLUSTERS_MIN (NMBJCLUSTERS / 2) 427 #define NMBUFS_MIN ((NMBCLUSTERS * 2 + maxfiles) / 2) 428 429 /* 430 * Perform sanity checks of tunables declared above. 431 */ 432 static void 433 tunable_mbinit(void *dummy) 434 { 435 /* 436 * This has to be done before VM init. 437 */ 438 nmbclusters = NMBCLUSTERS; 439 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 440 mjclph_cachefrac = MJCLPH_CACHEFRAC; 441 TUNABLE_INT_FETCH("kern.ipc.mjclph_cachefrac", &mjclph_cachefrac); 442 mjcl_cachefrac = MJCL_CACHEFRAC; 443 TUNABLE_INT_FETCH("kern.ipc.mjcl_cachefrac", &mjcl_cachefrac); 444 mclph_cachefrac = MCLPH_CACHEFRAC; 445 TUNABLE_INT_FETCH("kern.ipc.mclph_cachefrac", &mclph_cachefrac); 446 mcl_cachefrac = MCL_CACHEFRAC; 447 TUNABLE_INT_FETCH("kern.ipc.mcl_cachefrac", &mcl_cachefrac); 448 449 /* 450 * WARNING! each mcl cache feeds two mbuf caches, so the minimum 451 * cachefrac is 2. For safety, use 3. 452 */ 453 if (mjclph_cachefrac < 3) 454 mjclph_cachefrac = 3; 455 if (mjcl_cachefrac < 3) 456 mjcl_cachefrac = 3; 457 if (mclph_cachefrac < 3) 458 mclph_cachefrac = 3; 459 if (mcl_cachefrac < 3) 460 mcl_cachefrac = 3; 461 462 nmbjclusters = NMBJCLUSTERS; 463 TUNABLE_INT_FETCH("kern.ipc.nmbjclusters", &nmbjclusters); 464 465 nmbufs = NMBUFS; 466 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 467 468 /* Sanity checks */ 469 if (nmbufs < nmbclusters * 2) 470 nmbufs = nmbclusters * 2; 471 } 472 SYSINIT(tunable_mbinit, SI_BOOT1_TUNABLES, SI_ORDER_ANY, 473 tunable_mbinit, NULL); 474 475 static void 476 mbinclimit(int *limit, int inc, int minlim) 477 { 478 int new_limit; 479 480 lockmgr(&mbupdate_lk, LK_EXCLUSIVE); 481 482 new_limit = *limit + inc; 483 if (new_limit < minlim) 484 new_limit = minlim; 485 486 if (*limit != new_limit) { 487 *limit = new_limit; 488 mbupdatelimits(); 489 } 490 491 lockmgr(&mbupdate_lk, LK_RELEASE); 492 } 493 494 static int 495 mbsetlimit(int *limit, int new_limit, int minlim) 496 { 497 if (new_limit < minlim) 498 return EINVAL; 499 500 lockmgr(&mbupdate_lk, LK_EXCLUSIVE); 501 mbinclimit(limit, new_limit - *limit, minlim); 502 lockmgr(&mbupdate_lk, LK_RELEASE); 503 return 0; 504 } 505 506 static int 507 sysctl_mblimit(SYSCTL_HANDLER_ARGS, int *limit, int minlim) 508 { 509 int error, value; 510 511 value = *limit; 512 error = sysctl_handle_int(oidp, &value, 0, req); 513 if (error || req->newptr == NULL) 514 return error; 515 516 return mbsetlimit(limit, value, minlim); 517 } 518 519 /* 520 * Sysctl support to update nmbclusters, nmbjclusters, and nmbufs. 521 */ 522 static int 523 sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 524 { 525 return sysctl_mblimit(oidp, arg1, arg2, req, &nmbclusters, 526 NMBCLUSTERS_MIN); 527 } 528 529 static int 530 sysctl_nmbjclusters(SYSCTL_HANDLER_ARGS) 531 { 532 return sysctl_mblimit(oidp, arg1, arg2, req, &nmbjclusters, 533 NMBJCLUSTERS_MIN); 534 } 535 536 static int 537 sysctl_nmbufs(SYSCTL_HANDLER_ARGS) 538 { 539 return sysctl_mblimit(oidp, arg1, arg2, req, &nmbufs, NMBUFS_MIN); 540 } 541 542 void 543 mcl_inclimit(int inc) 544 { 545 mbinclimit(&nmbclusters, inc, NMBCLUSTERS_MIN); 546 } 547 548 void 549 mjcl_inclimit(int inc) 550 { 551 mbinclimit(&nmbjclusters, inc, NMBJCLUSTERS_MIN); 552 } 553 554 void 555 mb_inclimit(int inc) 556 { 557 mbinclimit(&nmbufs, inc, NMBUFS_MIN); 558 } 559 560 /* "number of clusters of pages" */ 561 #define NCL_INIT 1 562 563 #define NMB_INIT 16 564 565 /* 566 * The mbuf object cache only guarantees that m_next and m_nextpkt are 567 * NULL and that m_data points to the beginning of the data area. In 568 * particular, m_len and m_pkthdr.len are uninitialized. It is the 569 * responsibility of the caller to initialize those fields before use. 570 */ 571 static __inline boolean_t 572 mbuf_ctor(void *obj, void *private, int ocflags) 573 { 574 struct mbuf *m = obj; 575 576 m->m_next = NULL; 577 m->m_nextpkt = NULL; 578 m->m_data = m->m_dat; 579 m->m_flags = 0; 580 581 return (TRUE); 582 } 583 584 /* 585 * Initialize the mbuf and the packet header fields. 586 */ 587 static boolean_t 588 mbufphdr_ctor(void *obj, void *private, int ocflags) 589 { 590 struct mbuf *m = obj; 591 592 m->m_next = NULL; 593 m->m_nextpkt = NULL; 594 m->m_data = m->m_pktdat; 595 m->m_flags = M_PKTHDR | M_PHCACHE; 596 597 m->m_pkthdr.rcvif = NULL; /* eliminate XXX JH */ 598 SLIST_INIT(&m->m_pkthdr.tags); 599 m->m_pkthdr.csum_flags = 0; /* eliminate XXX JH */ 600 m->m_pkthdr.fw_flags = 0; /* eliminate XXX JH */ 601 602 return (TRUE); 603 } 604 605 /* 606 * A mbcluster object consists of 2K (MCLBYTES) cluster and a refcount. 607 */ 608 static boolean_t 609 mclmeta_ctor(void *obj, void *private, int ocflags) 610 { 611 struct mbcluster *cl = obj; 612 void *buf; 613 614 if (ocflags & M_NOWAIT) 615 buf = kmalloc(MCLBYTES, M_MBUFCL, M_NOWAIT | M_ZERO); 616 else 617 buf = kmalloc(MCLBYTES, M_MBUFCL, M_INTWAIT | M_ZERO); 618 if (buf == NULL) 619 return (FALSE); 620 cl->mcl_refs = 0; 621 cl->mcl_data = buf; 622 return (TRUE); 623 } 624 625 static boolean_t 626 mjclmeta_ctor(void *obj, void *private, int ocflags) 627 { 628 struct mbcluster *cl = obj; 629 void *buf; 630 631 if (ocflags & M_NOWAIT) 632 buf = kmalloc(MJUMPAGESIZE, M_MBUFCL, M_NOWAIT | M_ZERO); 633 else 634 buf = kmalloc(MJUMPAGESIZE, M_MBUFCL, M_INTWAIT | M_ZERO); 635 if (buf == NULL) 636 return (FALSE); 637 cl->mcl_refs = 0; 638 cl->mcl_data = buf; 639 return (TRUE); 640 } 641 642 static void 643 mclmeta_dtor(void *obj, void *private) 644 { 645 struct mbcluster *mcl = obj; 646 647 KKASSERT(mcl->mcl_refs == 0); 648 kfree(mcl->mcl_data, M_MBUFCL); 649 } 650 651 static void 652 linkjcluster(struct mbuf *m, struct mbcluster *cl, uint size) 653 { 654 /* 655 * Add the cluster to the mbuf. The caller will detect that the 656 * mbuf now has an attached cluster. 657 */ 658 m->m_ext.ext_arg = cl; 659 m->m_ext.ext_buf = cl->mcl_data; 660 m->m_ext.ext_ref = m_mclref; 661 if (size != MCLBYTES) 662 m->m_ext.ext_free = m_mjclfree; 663 else 664 m->m_ext.ext_free = m_mclfree; 665 m->m_ext.ext_size = size; 666 atomic_add_int(&cl->mcl_refs, 1); 667 668 m->m_data = m->m_ext.ext_buf; 669 m->m_flags |= M_EXT | M_EXT_CLUSTER; 670 } 671 672 static void 673 linkcluster(struct mbuf *m, struct mbcluster *cl) 674 { 675 linkjcluster(m, cl, MCLBYTES); 676 } 677 678 static boolean_t 679 mbufphdrcluster_ctor(void *obj, void *private, int ocflags) 680 { 681 struct mbuf *m = obj; 682 struct mbcluster *cl; 683 684 mbufphdr_ctor(obj, private, ocflags); 685 cl = objcache_get(mclmeta_cache, ocflags); 686 if (cl == NULL) { 687 ++mbstat[mycpu->gd_cpuid].m_drops; 688 return (FALSE); 689 } 690 m->m_flags |= M_CLCACHE; 691 linkcluster(m, cl); 692 return (TRUE); 693 } 694 695 static boolean_t 696 mbufphdrjcluster_ctor(void *obj, void *private, int ocflags) 697 { 698 struct mbuf *m = obj; 699 struct mbcluster *cl; 700 701 mbufphdr_ctor(obj, private, ocflags); 702 cl = objcache_get(mjclmeta_cache, ocflags); 703 if (cl == NULL) { 704 ++mbstat[mycpu->gd_cpuid].m_drops; 705 return (FALSE); 706 } 707 m->m_flags |= M_CLCACHE; 708 linkjcluster(m, cl, MJUMPAGESIZE); 709 return (TRUE); 710 } 711 712 static boolean_t 713 mbufcluster_ctor(void *obj, void *private, int ocflags) 714 { 715 struct mbuf *m = obj; 716 struct mbcluster *cl; 717 718 mbuf_ctor(obj, private, ocflags); 719 cl = objcache_get(mclmeta_cache, ocflags); 720 if (cl == NULL) { 721 ++mbstat[mycpu->gd_cpuid].m_drops; 722 return (FALSE); 723 } 724 m->m_flags |= M_CLCACHE; 725 linkcluster(m, cl); 726 return (TRUE); 727 } 728 729 static boolean_t 730 mbufjcluster_ctor(void *obj, void *private, int ocflags) 731 { 732 struct mbuf *m = obj; 733 struct mbcluster *cl; 734 735 mbuf_ctor(obj, private, ocflags); 736 cl = objcache_get(mjclmeta_cache, ocflags); 737 if (cl == NULL) { 738 ++mbstat[mycpu->gd_cpuid].m_drops; 739 return (FALSE); 740 } 741 m->m_flags |= M_CLCACHE; 742 linkjcluster(m, cl, MJUMPAGESIZE); 743 return (TRUE); 744 } 745 746 /* 747 * Used for both the cluster and cluster PHDR caches. 748 * 749 * The mbuf may have lost its cluster due to sharing, deal 750 * with the situation by checking M_EXT. 751 */ 752 static void 753 mbufcluster_dtor(void *obj, void *private) 754 { 755 struct mbuf *m = obj; 756 struct mbcluster *mcl; 757 758 if (m->m_flags & M_EXT) { 759 KKASSERT((m->m_flags & M_EXT_CLUSTER) != 0); 760 mcl = m->m_ext.ext_arg; 761 KKASSERT(mcl->mcl_refs == 1); 762 mcl->mcl_refs = 0; 763 if (m->m_flags & M_EXT && m->m_ext.ext_size != MCLBYTES) 764 objcache_put(mjclmeta_cache, mcl); 765 else 766 objcache_put(mclmeta_cache, mcl); 767 } 768 } 769 770 struct objcache_malloc_args mbuf_malloc_args = { MSIZE, M_MBUF }; 771 struct objcache_malloc_args mclmeta_malloc_args = 772 { sizeof(struct mbcluster), M_MCLMETA }; 773 774 /* ARGSUSED*/ 775 static void 776 mbinit(void *dummy) 777 { 778 int mb_limit, cl_limit, ncl_limit, jcl_limit; 779 int limit; 780 int i; 781 782 /* 783 * Initialize statistics 784 */ 785 for (i = 0; i < ncpus; i++) { 786 mbstat[i].m_msize = MSIZE; 787 mbstat[i].m_mclbytes = MCLBYTES; 788 mbstat[i].m_mjumpagesize = MJUMPAGESIZE; 789 mbstat[i].m_minclsize = MINCLSIZE; 790 mbstat[i].m_mlen = MLEN; 791 mbstat[i].m_mhlen = MHLEN; 792 } 793 794 /* 795 * Create object caches and save cluster limits, which will 796 * be used to adjust backing kmalloc pools' limit later. 797 */ 798 799 mb_limit = cl_limit = 0; 800 801 limit = nmbufs; 802 mbuf_cache = objcache_create("mbuf", 803 limit, nmbufs / 4, 804 mbuf_ctor, NULL, NULL, 805 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 806 mb_limit += limit; 807 808 limit = nmbufs; 809 mbufphdr_cache = objcache_create("mbuf pkt hdr", 810 limit, nmbufs / 4, 811 mbufphdr_ctor, NULL, NULL, 812 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 813 mb_limit += limit; 814 815 ncl_limit = nmbclusters; 816 mclmeta_cache = objcache_create("cluster mbuf", 817 ncl_limit, nmbclusters / 4, 818 mclmeta_ctor, mclmeta_dtor, NULL, 819 objcache_malloc_alloc, objcache_malloc_free, &mclmeta_malloc_args); 820 cl_limit += ncl_limit; 821 822 jcl_limit = nmbjclusters; 823 mjclmeta_cache = objcache_create("jcluster mbuf", 824 jcl_limit, nmbjclusters / 4, 825 mjclmeta_ctor, mclmeta_dtor, NULL, 826 objcache_malloc_alloc, objcache_malloc_free, &mclmeta_malloc_args); 827 cl_limit += jcl_limit; 828 829 limit = nmbclusters; 830 mbufcluster_cache = objcache_create("mbuf + cluster", 831 limit, nmbclusters / mcl_cachefrac, 832 mbufcluster_ctor, mbufcluster_dtor, NULL, 833 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 834 mb_limit += limit; 835 836 limit = nmbclusters; 837 mbufphdrcluster_cache = objcache_create("mbuf pkt hdr + cluster", 838 limit, nmbclusters / mclph_cachefrac, 839 mbufphdrcluster_ctor, mbufcluster_dtor, NULL, 840 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 841 mb_limit += limit; 842 843 limit = nmbjclusters; 844 mbufjcluster_cache = objcache_create("mbuf + jcluster", 845 limit, nmbjclusters / mjcl_cachefrac, 846 mbufjcluster_ctor, mbufcluster_dtor, NULL, 847 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 848 mb_limit += limit; 849 850 limit = nmbjclusters; 851 mbufphdrjcluster_cache = objcache_create("mbuf pkt hdr + jcluster", 852 limit, nmbjclusters / mjclph_cachefrac, 853 mbufphdrjcluster_ctor, mbufcluster_dtor, NULL, 854 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 855 mb_limit += limit; 856 857 /* 858 * Adjust backing kmalloc pools' limit 859 * 860 * NOTE: We raise the limit by another 1/8 to take the effect 861 * of loosememuse into account. 862 */ 863 cl_limit += cl_limit / 8; 864 kmalloc_raise_limit(mclmeta_malloc_args.mtype, 865 mclmeta_malloc_args.objsize * (size_t)cl_limit); 866 kmalloc_raise_limit(M_MBUFCL, 867 (MCLBYTES * (size_t)ncl_limit) + 868 (MJUMPAGESIZE * (size_t)jcl_limit)); 869 870 mb_limit += mb_limit / 8; 871 kmalloc_raise_limit(mbuf_malloc_args.mtype, 872 mbuf_malloc_args.objsize * (size_t)mb_limit); 873 } 874 875 /* 876 * Adjust mbuf limits after changes have been made 877 * 878 * Caller must hold mbupdate_lk 879 */ 880 static void 881 mbupdatelimits(void) 882 { 883 int mb_limit, cl_limit, ncl_limit, jcl_limit; 884 int limit; 885 886 KASSERT(lockstatus(&mbupdate_lk, curthread) != 0, 887 ("mbupdate_lk is not held")); 888 889 /* 890 * Figure out adjustments to object caches after nmbufs, nmbclusters, 891 * or nmbjclusters has been modified. 892 */ 893 mb_limit = cl_limit = 0; 894 895 limit = nmbufs; 896 objcache_set_cluster_limit(mbuf_cache, limit); 897 mb_limit += limit; 898 899 limit = nmbufs; 900 objcache_set_cluster_limit(mbufphdr_cache, limit); 901 mb_limit += limit; 902 903 ncl_limit = nmbclusters; 904 objcache_set_cluster_limit(mclmeta_cache, ncl_limit); 905 cl_limit += ncl_limit; 906 907 jcl_limit = nmbjclusters; 908 objcache_set_cluster_limit(mjclmeta_cache, jcl_limit); 909 cl_limit += jcl_limit; 910 911 limit = nmbclusters; 912 objcache_set_cluster_limit(mbufcluster_cache, limit); 913 mb_limit += limit; 914 915 limit = nmbclusters; 916 objcache_set_cluster_limit(mbufphdrcluster_cache, limit); 917 mb_limit += limit; 918 919 limit = nmbjclusters; 920 objcache_set_cluster_limit(mbufjcluster_cache, limit); 921 mb_limit += limit; 922 923 limit = nmbjclusters; 924 objcache_set_cluster_limit(mbufphdrjcluster_cache, limit); 925 mb_limit += limit; 926 927 /* 928 * Adjust backing kmalloc pools' limit 929 * 930 * NOTE: We raise the limit by another 1/8 to take the effect 931 * of loosememuse into account. 932 */ 933 cl_limit += cl_limit / 8; 934 kmalloc_raise_limit(mclmeta_malloc_args.mtype, 935 mclmeta_malloc_args.objsize * (size_t)cl_limit); 936 kmalloc_raise_limit(M_MBUFCL, 937 (MCLBYTES * (size_t)ncl_limit) + 938 (MJUMPAGESIZE * (size_t)jcl_limit)); 939 mb_limit += mb_limit / 8; 940 kmalloc_raise_limit(mbuf_malloc_args.mtype, 941 mbuf_malloc_args.objsize * (size_t)mb_limit); 942 } 943 944 /* 945 * Return the number of references to this mbuf's data. 0 is returned 946 * if the mbuf is not M_EXT, a reference count is returned if it is 947 * M_EXT | M_EXT_CLUSTER, and 99 is returned if it is a special M_EXT. 948 */ 949 int 950 m_sharecount(struct mbuf *m) 951 { 952 switch (m->m_flags & (M_EXT | M_EXT_CLUSTER)) { 953 case 0: 954 return (0); 955 case M_EXT: 956 return (99); 957 case M_EXT | M_EXT_CLUSTER: 958 return (((struct mbcluster *)m->m_ext.ext_arg)->mcl_refs); 959 } 960 /* NOTREACHED */ 961 return (0); /* to shut up compiler */ 962 } 963 964 /* 965 * change mbuf to new type 966 */ 967 void 968 m_chtype(struct mbuf *m, int type) 969 { 970 struct globaldata *gd = mycpu; 971 972 ++mbtypes[gd->gd_cpuid].stats[type]; 973 --mbtypes[gd->gd_cpuid].stats[m->m_type]; 974 m->m_type = type; 975 } 976 977 static void 978 m_reclaim(void) 979 { 980 struct domain *dp; 981 struct protosw *pr; 982 983 kprintf("Debug: m_reclaim() called\n"); 984 985 SLIST_FOREACH(dp, &domains, dom_next) { 986 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { 987 if (pr->pr_drain) 988 (*pr->pr_drain)(); 989 } 990 } 991 ++mbstat[mycpu->gd_cpuid].m_drain; 992 } 993 994 static __inline void 995 updatestats(struct mbuf *m, int type) 996 { 997 struct globaldata *gd = mycpu; 998 999 m->m_type = type; 1000 mbuftrack(m); 1001 #ifdef MBUF_DEBUG 1002 KASSERT(m->m_next == NULL, ("mbuf %p: bad m_next in get", m)); 1003 KASSERT(m->m_nextpkt == NULL, ("mbuf %p: bad m_nextpkt in get", m)); 1004 #endif 1005 1006 ++mbtypes[gd->gd_cpuid].stats[type]; 1007 ++mbstat[gd->gd_cpuid].m_mbufs; 1008 1009 } 1010 1011 /* 1012 * Allocate an mbuf. 1013 */ 1014 struct mbuf * 1015 m_get(int how, int type) 1016 { 1017 struct mbuf *m; 1018 int ntries = 0; 1019 int ocf = MB_OCFLAG(how); 1020 1021 retryonce: 1022 1023 m = objcache_get(mbuf_cache, ocf); 1024 1025 if (m == NULL) { 1026 if ((ocf & M_WAITOK) && ntries++ == 0) { 1027 struct objcache *reclaimlist[] = { 1028 mbufphdr_cache, 1029 mbufcluster_cache, 1030 mbufphdrcluster_cache, 1031 mbufjcluster_cache, 1032 mbufphdrjcluster_cache 1033 }; 1034 const int nreclaims = NELEM(reclaimlist); 1035 1036 if (!objcache_reclaimlist(reclaimlist, nreclaims, ocf)) 1037 m_reclaim(); 1038 goto retryonce; 1039 } 1040 ++mbstat[mycpu->gd_cpuid].m_drops; 1041 return (NULL); 1042 } 1043 #ifdef MBUF_DEBUG 1044 KASSERT(m->m_data == m->m_dat, ("mbuf %p: bad m_data in get", m)); 1045 #endif 1046 m->m_len = 0; 1047 1048 updatestats(m, type); 1049 return (m); 1050 } 1051 1052 struct mbuf * 1053 m_gethdr(int how, int type) 1054 { 1055 struct mbuf *m; 1056 int ocf = MB_OCFLAG(how); 1057 int ntries = 0; 1058 1059 retryonce: 1060 1061 m = objcache_get(mbufphdr_cache, ocf); 1062 1063 if (m == NULL) { 1064 if ((ocf & M_WAITOK) && ntries++ == 0) { 1065 struct objcache *reclaimlist[] = { 1066 mbuf_cache, 1067 mbufcluster_cache, mbufphdrcluster_cache, 1068 mbufjcluster_cache, mbufphdrjcluster_cache 1069 }; 1070 const int nreclaims = NELEM(reclaimlist); 1071 1072 if (!objcache_reclaimlist(reclaimlist, nreclaims, ocf)) 1073 m_reclaim(); 1074 goto retryonce; 1075 } 1076 ++mbstat[mycpu->gd_cpuid].m_drops; 1077 return (NULL); 1078 } 1079 #ifdef MBUF_DEBUG 1080 KASSERT(m->m_data == m->m_pktdat, ("mbuf %p: bad m_data in get", m)); 1081 #endif 1082 m->m_len = 0; 1083 m->m_pkthdr.len = 0; 1084 1085 updatestats(m, type); 1086 return (m); 1087 } 1088 1089 /* 1090 * Get a mbuf (not a mbuf cluster!) and zero it. 1091 * Deprecated. 1092 */ 1093 struct mbuf * 1094 m_getclr(int how, int type) 1095 { 1096 struct mbuf *m; 1097 1098 m = m_get(how, type); 1099 if (m != NULL) 1100 bzero(m->m_data, MLEN); 1101 return (m); 1102 } 1103 1104 static struct mbuf * 1105 m_getcl_cache(int how, short type, int flags, struct objcache *mbclc, 1106 struct objcache *mbphclc, u_long *cl_stats) 1107 { 1108 struct mbuf *m = NULL; 1109 int ocflags = MB_OCFLAG(how); 1110 int ntries = 0; 1111 1112 retryonce: 1113 1114 if (flags & M_PKTHDR) 1115 m = objcache_get(mbphclc, ocflags); 1116 else 1117 m = objcache_get(mbclc, ocflags); 1118 1119 if (m == NULL) { 1120 if ((ocflags & M_WAITOK) && ntries++ == 0) { 1121 struct objcache *reclaimlist[1]; 1122 1123 if (flags & M_PKTHDR) 1124 reclaimlist[0] = mbclc; 1125 else 1126 reclaimlist[0] = mbphclc; 1127 if (!objcache_reclaimlist(reclaimlist, 1, ocflags)) 1128 m_reclaim(); 1129 goto retryonce; 1130 } 1131 ++mbstat[mycpu->gd_cpuid].m_drops; 1132 return (NULL); 1133 } 1134 1135 #ifdef MBUF_DEBUG 1136 KASSERT(m->m_data == m->m_ext.ext_buf, 1137 ("mbuf %p: bad m_data in get", m)); 1138 #endif 1139 m->m_type = type; 1140 m->m_len = 0; 1141 m->m_pkthdr.len = 0; /* just do it unconditonally */ 1142 1143 mbuftrack(m); 1144 1145 ++mbtypes[mycpu->gd_cpuid].stats[type]; 1146 ++(*cl_stats); 1147 return (m); 1148 } 1149 1150 struct mbuf * 1151 m_getjcl(int how, short type, int flags, size_t size) 1152 { 1153 struct objcache *mbclc, *mbphclc; 1154 u_long *cl_stats; 1155 1156 switch (size) { 1157 case MCLBYTES: 1158 mbclc = mbufcluster_cache; 1159 mbphclc = mbufphdrcluster_cache; 1160 cl_stats = &mbstat[mycpu->gd_cpuid].m_clusters; 1161 break; 1162 1163 default: 1164 mbclc = mbufjcluster_cache; 1165 mbphclc = mbufphdrjcluster_cache; 1166 cl_stats = &mbstat[mycpu->gd_cpuid].m_jclusters; 1167 break; 1168 } 1169 return m_getcl_cache(how, type, flags, mbclc, mbphclc, cl_stats); 1170 } 1171 1172 /* 1173 * Returns an mbuf with an attached cluster. 1174 * Because many network drivers use this kind of buffers a lot, it is 1175 * convenient to keep a small pool of free buffers of this kind. 1176 * Even a small size such as 10 gives about 10% improvement in the 1177 * forwarding rate in a bridge or router. 1178 */ 1179 struct mbuf * 1180 m_getcl(int how, short type, int flags) 1181 { 1182 return m_getcl_cache(how, type, flags, 1183 mbufcluster_cache, mbufphdrcluster_cache, 1184 &mbstat[mycpu->gd_cpuid].m_clusters); 1185 } 1186 1187 /* 1188 * Allocate chain of requested length. 1189 */ 1190 struct mbuf * 1191 m_getc(int len, int how, int type) 1192 { 1193 struct mbuf *n, *nfirst = NULL, **ntail = &nfirst; 1194 int nsize; 1195 1196 while (len > 0) { 1197 n = m_getl(len, how, type, 0, &nsize); 1198 if (n == NULL) 1199 goto failed; 1200 n->m_len = 0; 1201 *ntail = n; 1202 ntail = &n->m_next; 1203 len -= nsize; 1204 } 1205 return (nfirst); 1206 1207 failed: 1208 m_freem(nfirst); 1209 return (NULL); 1210 } 1211 1212 /* 1213 * Allocate len-worth of mbufs and/or mbuf clusters (whatever fits best) 1214 * and return a pointer to the head of the allocated chain. If m0 is 1215 * non-null, then we assume that it is a single mbuf or an mbuf chain to 1216 * which we want len bytes worth of mbufs and/or clusters attached, and so 1217 * if we succeed in allocating it, we will just return a pointer to m0. 1218 * 1219 * If we happen to fail at any point during the allocation, we will free 1220 * up everything we have already allocated and return NULL. 1221 * 1222 * Deprecated. Use m_getc() and m_cat() instead. 1223 */ 1224 struct mbuf * 1225 m_getm(struct mbuf *m0, int len, int type, int how) 1226 { 1227 struct mbuf *nfirst; 1228 1229 nfirst = m_getc(len, how, type); 1230 1231 if (m0 != NULL) { 1232 m_last(m0)->m_next = nfirst; 1233 return (m0); 1234 } 1235 1236 return (nfirst); 1237 } 1238 1239 /* 1240 * Adds a cluster to a normal mbuf, M_EXT is set on success. 1241 * Deprecated. Use m_getcl() instead. 1242 */ 1243 void 1244 m_mclget(struct mbuf *m, int how) 1245 { 1246 struct mbcluster *mcl; 1247 1248 KKASSERT((m->m_flags & M_EXT) == 0); 1249 mcl = objcache_get(mclmeta_cache, MB_OCFLAG(how)); 1250 if (mcl != NULL) { 1251 linkcluster(m, mcl); 1252 ++mbstat[mycpu->gd_cpuid].m_clusters; 1253 } else { 1254 ++mbstat[mycpu->gd_cpuid].m_drops; 1255 } 1256 } 1257 1258 /* 1259 * Updates to mbcluster must be MPSAFE. Only an entity which already has 1260 * a reference to the cluster can ref it, so we are in no danger of 1261 * racing an add with a subtract. But the operation must still be atomic 1262 * since multiple entities may have a reference on the cluster. 1263 * 1264 * m_mclfree() is almost the same but it must contend with two entities 1265 * freeing the cluster at the same time. 1266 */ 1267 static void 1268 m_mclref(void *arg) 1269 { 1270 struct mbcluster *mcl = arg; 1271 1272 atomic_add_int(&mcl->mcl_refs, 1); 1273 } 1274 1275 /* 1276 * When dereferencing a cluster we have to deal with a N->0 race, where 1277 * N entities free their references simultaniously. To do this we use 1278 * atomic_fetchadd_int(). 1279 */ 1280 static void 1281 m_mclfree(void *arg) 1282 { 1283 struct mbcluster *mcl = arg; 1284 1285 if (atomic_fetchadd_int(&mcl->mcl_refs, -1) == 1) { 1286 --mbstat[mycpu->gd_cpuid].m_clusters; 1287 objcache_put(mclmeta_cache, mcl); 1288 } 1289 } 1290 1291 static void 1292 m_mjclfree(void *arg) 1293 { 1294 struct mbcluster *mcl = arg; 1295 1296 if (atomic_fetchadd_int(&mcl->mcl_refs, -1) == 1) { 1297 --mbstat[mycpu->gd_cpuid].m_jclusters; 1298 objcache_put(mjclmeta_cache, mcl); 1299 } 1300 } 1301 1302 /* 1303 * Free a single mbuf and any associated external storage. The successor, 1304 * if any, is returned. 1305 * 1306 * We do need to check non-first mbuf for m_aux, since some of existing 1307 * code does not call M_PREPEND properly. 1308 * (example: call to bpf_mtap from drivers) 1309 */ 1310 1311 #ifdef MBUF_DEBUG 1312 1313 struct mbuf * 1314 _m_free(struct mbuf *m, const char *func) 1315 1316 #else 1317 1318 struct mbuf * 1319 m_free(struct mbuf *m) 1320 1321 #endif 1322 { 1323 struct mbuf *n; 1324 struct globaldata *gd = mycpu; 1325 1326 KASSERT(m->m_type != MT_FREE, ("freeing free mbuf %p", m)); 1327 KASSERT(M_TRAILINGSPACE(m) >= 0, ("overflowed mbuf %p", m)); 1328 --mbtypes[gd->gd_cpuid].stats[m->m_type]; 1329 1330 n = m->m_next; 1331 1332 /* 1333 * Make sure the mbuf is in constructed state before returning it 1334 * to the objcache. 1335 */ 1336 m->m_next = NULL; 1337 mbufuntrack(m); 1338 #ifdef MBUF_DEBUG 1339 m->m_hdr.mh_lastfunc = func; 1340 #endif 1341 #ifdef notyet 1342 KKASSERT(m->m_nextpkt == NULL); 1343 #else 1344 if (m->m_nextpkt != NULL) { 1345 static int afewtimes = 10; 1346 1347 if (afewtimes-- > 0) { 1348 kprintf("mfree: m->m_nextpkt != NULL\n"); 1349 print_backtrace(-1); 1350 } 1351 m->m_nextpkt = NULL; 1352 } 1353 #endif 1354 if (m->m_flags & M_PKTHDR) { 1355 m_tag_delete_chain(m); /* eliminate XXX JH */ 1356 } 1357 1358 m->m_flags &= (M_EXT | M_EXT_CLUSTER | M_CLCACHE | M_PHCACHE); 1359 1360 /* 1361 * Clean the M_PKTHDR state so we can return the mbuf to its original 1362 * cache. This is based on the PHCACHE flag which tells us whether 1363 * the mbuf was originally allocated out of a packet-header cache 1364 * or a non-packet-header cache. 1365 */ 1366 if (m->m_flags & M_PHCACHE) { 1367 m->m_flags |= M_PKTHDR; 1368 m->m_pkthdr.rcvif = NULL; /* eliminate XXX JH */ 1369 m->m_pkthdr.csum_flags = 0; /* eliminate XXX JH */ 1370 m->m_pkthdr.fw_flags = 0; /* eliminate XXX JH */ 1371 SLIST_INIT(&m->m_pkthdr.tags); 1372 } 1373 1374 /* 1375 * Handle remaining flags combinations. M_CLCACHE tells us whether 1376 * the mbuf was originally allocated from a cluster cache or not, 1377 * and is totally separate from whether the mbuf is currently 1378 * associated with a cluster. 1379 */ 1380 switch(m->m_flags & (M_CLCACHE | M_EXT | M_EXT_CLUSTER)) { 1381 case M_CLCACHE | M_EXT | M_EXT_CLUSTER: 1382 /* 1383 * mbuf+cluster cache case. The mbuf was allocated from the 1384 * combined mbuf_cluster cache and can be returned to the 1385 * cache if the cluster hasn't been shared. 1386 */ 1387 if (m_sharecount(m) == 1) { 1388 /* 1389 * The cluster has not been shared, we can just 1390 * reset the data pointer and return the mbuf 1391 * to the cluster cache. Note that the reference 1392 * count is left intact (it is still associated with 1393 * an mbuf). 1394 */ 1395 m->m_data = m->m_ext.ext_buf; 1396 if (m->m_flags & M_EXT && m->m_ext.ext_size != MCLBYTES) { 1397 if (m->m_flags & M_PHCACHE) 1398 objcache_put(mbufphdrjcluster_cache, m); 1399 else 1400 objcache_put(mbufjcluster_cache, m); 1401 --mbstat[mycpu->gd_cpuid].m_jclusters; 1402 } else { 1403 if (m->m_flags & M_PHCACHE) 1404 objcache_put(mbufphdrcluster_cache, m); 1405 else 1406 objcache_put(mbufcluster_cache, m); 1407 --mbstat[mycpu->gd_cpuid].m_clusters; 1408 } 1409 } else { 1410 /* 1411 * Hell. Someone else has a ref on this cluster, 1412 * we have to disconnect it which means we can't 1413 * put it back into the mbufcluster_cache, we 1414 * have to destroy the mbuf. 1415 * 1416 * Other mbuf references to the cluster will typically 1417 * be M_EXT | M_EXT_CLUSTER but without M_CLCACHE. 1418 * 1419 * XXX we could try to connect another cluster to 1420 * it. 1421 */ 1422 m->m_ext.ext_free(m->m_ext.ext_arg); 1423 m->m_flags &= ~(M_EXT | M_EXT_CLUSTER); 1424 if (m->m_ext.ext_size == MCLBYTES) { 1425 if (m->m_flags & M_PHCACHE) 1426 objcache_dtor(mbufphdrcluster_cache, m); 1427 else 1428 objcache_dtor(mbufcluster_cache, m); 1429 } else { 1430 if (m->m_flags & M_PHCACHE) 1431 objcache_dtor(mbufphdrjcluster_cache, m); 1432 else 1433 objcache_dtor(mbufjcluster_cache, m); 1434 } 1435 } 1436 break; 1437 case M_EXT | M_EXT_CLUSTER: 1438 case M_EXT: 1439 /* 1440 * Normal cluster association case, disconnect the cluster from 1441 * the mbuf. The cluster may or may not be custom. 1442 */ 1443 m->m_ext.ext_free(m->m_ext.ext_arg); 1444 m->m_flags &= ~(M_EXT | M_EXT_CLUSTER); 1445 /* fall through */ 1446 case 0: 1447 /* 1448 * return the mbuf to the mbuf cache. 1449 */ 1450 if (m->m_flags & M_PHCACHE) { 1451 m->m_data = m->m_pktdat; 1452 objcache_put(mbufphdr_cache, m); 1453 } else { 1454 m->m_data = m->m_dat; 1455 objcache_put(mbuf_cache, m); 1456 } 1457 --mbstat[mycpu->gd_cpuid].m_mbufs; 1458 break; 1459 default: 1460 if (!panicstr) 1461 panic("bad mbuf flags %p %08x", m, m->m_flags); 1462 break; 1463 } 1464 return (n); 1465 } 1466 1467 #ifdef MBUF_DEBUG 1468 1469 void 1470 _m_freem(struct mbuf *m, const char *func) 1471 { 1472 while (m) 1473 m = _m_free(m, func); 1474 } 1475 1476 #else 1477 1478 void 1479 m_freem(struct mbuf *m) 1480 { 1481 while (m) 1482 m = m_free(m); 1483 } 1484 1485 #endif 1486 1487 void 1488 m_extadd(struct mbuf *m, caddr_t buf, u_int size, void (*reff)(void *), 1489 void (*freef)(void *), void *arg) 1490 { 1491 m->m_ext.ext_arg = arg; 1492 m->m_ext.ext_buf = buf; 1493 m->m_ext.ext_ref = reff; 1494 m->m_ext.ext_free = freef; 1495 m->m_ext.ext_size = size; 1496 reff(arg); 1497 m->m_data = buf; 1498 m->m_flags |= M_EXT; 1499 } 1500 1501 /* 1502 * mbuf utility routines 1503 */ 1504 1505 /* 1506 * Lesser-used path for M_PREPEND: allocate new mbuf to prepend to chain and 1507 * copy junk along. 1508 */ 1509 struct mbuf * 1510 m_prepend(struct mbuf *m, int len, int how) 1511 { 1512 struct mbuf *mn; 1513 1514 if (m->m_flags & M_PKTHDR) 1515 mn = m_gethdr(how, m->m_type); 1516 else 1517 mn = m_get(how, m->m_type); 1518 if (mn == NULL) { 1519 m_freem(m); 1520 return (NULL); 1521 } 1522 if (m->m_flags & M_PKTHDR) 1523 M_MOVE_PKTHDR(mn, m); 1524 mn->m_next = m; 1525 m = mn; 1526 if (len < MHLEN) 1527 MH_ALIGN(m, len); 1528 m->m_len = len; 1529 return (m); 1530 } 1531 1532 /* 1533 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 1534 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 1535 * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller. 1536 * Note that the copy is read-only, because clusters are not copied, 1537 * only their reference counts are incremented. 1538 */ 1539 struct mbuf * 1540 m_copym(const struct mbuf *m, int off0, int len, int wait) 1541 { 1542 struct mbuf *n, **np; 1543 int off = off0; 1544 struct mbuf *top; 1545 int copyhdr = 0; 1546 1547 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 1548 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 1549 if (off == 0 && (m->m_flags & M_PKTHDR)) 1550 copyhdr = 1; 1551 while (off > 0) { 1552 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 1553 if (off < m->m_len) 1554 break; 1555 off -= m->m_len; 1556 m = m->m_next; 1557 } 1558 np = ⊤ 1559 top = NULL; 1560 while (len > 0) { 1561 if (m == NULL) { 1562 KASSERT(len == M_COPYALL, 1563 ("m_copym, length > size of mbuf chain")); 1564 break; 1565 } 1566 /* 1567 * Because we are sharing any cluster attachment below, 1568 * be sure to get an mbuf that does not have a cluster 1569 * associated with it. 1570 */ 1571 if (copyhdr) 1572 n = m_gethdr(wait, m->m_type); 1573 else 1574 n = m_get(wait, m->m_type); 1575 *np = n; 1576 if (n == NULL) 1577 goto nospace; 1578 if (copyhdr) { 1579 if (!m_dup_pkthdr(n, m, wait)) 1580 goto nospace; 1581 if (len == M_COPYALL) 1582 n->m_pkthdr.len -= off0; 1583 else 1584 n->m_pkthdr.len = len; 1585 copyhdr = 0; 1586 } 1587 n->m_len = min(len, m->m_len - off); 1588 if (m->m_flags & M_EXT) { 1589 KKASSERT((n->m_flags & M_EXT) == 0); 1590 n->m_data = m->m_data + off; 1591 m->m_ext.ext_ref(m->m_ext.ext_arg); 1592 n->m_ext = m->m_ext; 1593 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1594 } else { 1595 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 1596 (unsigned)n->m_len); 1597 } 1598 if (len != M_COPYALL) 1599 len -= n->m_len; 1600 off = 0; 1601 m = m->m_next; 1602 np = &n->m_next; 1603 } 1604 if (top == NULL) 1605 ++mbstat[mycpu->gd_cpuid].m_mcfail; 1606 return (top); 1607 nospace: 1608 m_freem(top); 1609 ++mbstat[mycpu->gd_cpuid].m_mcfail; 1610 return (NULL); 1611 } 1612 1613 /* 1614 * Copy an entire packet, including header (which must be present). 1615 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 1616 * Note that the copy is read-only, because clusters are not copied, 1617 * only their reference counts are incremented. 1618 * Preserve alignment of the first mbuf so if the creator has left 1619 * some room at the beginning (e.g. for inserting protocol headers) 1620 * the copies also have the room available. 1621 */ 1622 struct mbuf * 1623 m_copypacket(struct mbuf *m, int how) 1624 { 1625 struct mbuf *top, *n, *o; 1626 1627 n = m_gethdr(how, m->m_type); 1628 top = n; 1629 if (!n) 1630 goto nospace; 1631 1632 if (!m_dup_pkthdr(n, m, how)) 1633 goto nospace; 1634 n->m_len = m->m_len; 1635 if (m->m_flags & M_EXT) { 1636 KKASSERT((n->m_flags & M_EXT) == 0); 1637 n->m_data = m->m_data; 1638 m->m_ext.ext_ref(m->m_ext.ext_arg); 1639 n->m_ext = m->m_ext; 1640 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1641 } else { 1642 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 1643 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 1644 } 1645 1646 m = m->m_next; 1647 while (m) { 1648 o = m_get(how, m->m_type); 1649 if (!o) 1650 goto nospace; 1651 1652 n->m_next = o; 1653 n = n->m_next; 1654 1655 n->m_len = m->m_len; 1656 if (m->m_flags & M_EXT) { 1657 KKASSERT((n->m_flags & M_EXT) == 0); 1658 n->m_data = m->m_data; 1659 m->m_ext.ext_ref(m->m_ext.ext_arg); 1660 n->m_ext = m->m_ext; 1661 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1662 } else { 1663 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 1664 } 1665 1666 m = m->m_next; 1667 } 1668 return top; 1669 nospace: 1670 m_freem(top); 1671 ++mbstat[mycpu->gd_cpuid].m_mcfail; 1672 return (NULL); 1673 } 1674 1675 /* 1676 * Copy data from an mbuf chain starting "off" bytes from the beginning, 1677 * continuing for "len" bytes, into the indicated buffer. 1678 */ 1679 void 1680 m_copydata(const struct mbuf *m, int off, int len, caddr_t cp) 1681 { 1682 unsigned count; 1683 1684 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 1685 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 1686 while (off > 0) { 1687 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 1688 if (off < m->m_len) 1689 break; 1690 off -= m->m_len; 1691 m = m->m_next; 1692 } 1693 while (len > 0) { 1694 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 1695 count = min(m->m_len - off, len); 1696 bcopy(mtod(m, caddr_t) + off, cp, count); 1697 len -= count; 1698 cp += count; 1699 off = 0; 1700 m = m->m_next; 1701 } 1702 } 1703 1704 /* 1705 * Copy a packet header mbuf chain into a completely new chain, including 1706 * copying any mbuf clusters. Use this instead of m_copypacket() when 1707 * you need a writable copy of an mbuf chain. 1708 */ 1709 struct mbuf * 1710 m_dup(struct mbuf *m, int how) 1711 { 1712 struct mbuf **p, *top = NULL; 1713 int remain, moff, nsize; 1714 1715 /* Sanity check */ 1716 if (m == NULL) 1717 return (NULL); 1718 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __func__)); 1719 1720 /* While there's more data, get a new mbuf, tack it on, and fill it */ 1721 remain = m->m_pkthdr.len; 1722 moff = 0; 1723 p = ⊤ 1724 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 1725 struct mbuf *n; 1726 1727 /* Get the next new mbuf */ 1728 n = m_getl(remain, how, m->m_type, top == NULL ? M_PKTHDR : 0, 1729 &nsize); 1730 if (n == NULL) 1731 goto nospace; 1732 if (top == NULL) 1733 if (!m_dup_pkthdr(n, m, how)) 1734 goto nospace0; 1735 1736 /* Link it into the new chain */ 1737 *p = n; 1738 p = &n->m_next; 1739 1740 /* Copy data from original mbuf(s) into new mbuf */ 1741 n->m_len = 0; 1742 while (n->m_len < nsize && m != NULL) { 1743 int chunk = min(nsize - n->m_len, m->m_len - moff); 1744 1745 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 1746 moff += chunk; 1747 n->m_len += chunk; 1748 remain -= chunk; 1749 if (moff == m->m_len) { 1750 m = m->m_next; 1751 moff = 0; 1752 } 1753 } 1754 1755 /* Check correct total mbuf length */ 1756 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 1757 ("%s: bogus m_pkthdr.len", __func__)); 1758 } 1759 return (top); 1760 1761 nospace: 1762 m_freem(top); 1763 nospace0: 1764 ++mbstat[mycpu->gd_cpuid].m_mcfail; 1765 return (NULL); 1766 } 1767 1768 /* 1769 * Copy the non-packet mbuf data chain into a new set of mbufs, including 1770 * copying any mbuf clusters. This is typically used to realign a data 1771 * chain by nfs_realign(). 1772 * 1773 * The original chain is left intact. how should be M_WAITOK or M_NOWAIT 1774 * and NULL can be returned if M_NOWAIT is passed. 1775 * 1776 * Be careful to use cluster mbufs, a large mbuf chain converted to non 1777 * cluster mbufs can exhaust our supply of mbufs. 1778 */ 1779 struct mbuf * 1780 m_dup_data(struct mbuf *m, int how) 1781 { 1782 struct mbuf **p, *n, *top = NULL; 1783 int mlen, moff, chunk, gsize, nsize; 1784 1785 /* 1786 * Degenerate case 1787 */ 1788 if (m == NULL) 1789 return (NULL); 1790 1791 /* 1792 * Optimize the mbuf allocation but do not get too carried away. 1793 */ 1794 if (m->m_next || m->m_len > MLEN) 1795 if (m->m_flags & M_EXT && m->m_ext.ext_size == MCLBYTES) 1796 gsize = MCLBYTES; 1797 else 1798 gsize = MJUMPAGESIZE; 1799 else 1800 gsize = MLEN; 1801 1802 /* Chain control */ 1803 p = ⊤ 1804 n = NULL; 1805 nsize = 0; 1806 1807 /* 1808 * Scan the mbuf chain until nothing is left, the new mbuf chain 1809 * will be allocated on the fly as needed. 1810 */ 1811 while (m) { 1812 mlen = m->m_len; 1813 moff = 0; 1814 1815 while (mlen) { 1816 KKASSERT(m->m_type == MT_DATA); 1817 if (n == NULL) { 1818 n = m_getl(gsize, how, MT_DATA, 0, &nsize); 1819 n->m_len = 0; 1820 if (n == NULL) 1821 goto nospace; 1822 *p = n; 1823 p = &n->m_next; 1824 } 1825 chunk = imin(mlen, nsize); 1826 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 1827 mlen -= chunk; 1828 moff += chunk; 1829 n->m_len += chunk; 1830 nsize -= chunk; 1831 if (nsize == 0) 1832 n = NULL; 1833 } 1834 m = m->m_next; 1835 } 1836 *p = NULL; 1837 return(top); 1838 nospace: 1839 *p = NULL; 1840 m_freem(top); 1841 ++mbstat[mycpu->gd_cpuid].m_mcfail; 1842 return (NULL); 1843 } 1844 1845 /* 1846 * Concatenate mbuf chain n to m. 1847 * Both chains must be of the same type (e.g. MT_DATA). 1848 * Any m_pkthdr is not updated. 1849 */ 1850 void 1851 m_cat(struct mbuf *m, struct mbuf *n) 1852 { 1853 m = m_last(m); 1854 while (n) { 1855 if (m->m_flags & M_EXT || 1856 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 1857 /* just join the two chains */ 1858 m->m_next = n; 1859 return; 1860 } 1861 /* splat the data from one into the other */ 1862 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 1863 (u_int)n->m_len); 1864 m->m_len += n->m_len; 1865 n = m_free(n); 1866 } 1867 } 1868 1869 void 1870 m_adj(struct mbuf *mp, int req_len) 1871 { 1872 int len = req_len; 1873 struct mbuf *m; 1874 int count; 1875 1876 if ((m = mp) == NULL) 1877 return; 1878 if (len >= 0) { 1879 /* 1880 * Trim from head. 1881 */ 1882 while (m != NULL && len > 0) { 1883 if (m->m_len <= len) { 1884 len -= m->m_len; 1885 m->m_len = 0; 1886 m = m->m_next; 1887 } else { 1888 m->m_len -= len; 1889 m->m_data += len; 1890 len = 0; 1891 } 1892 } 1893 m = mp; 1894 if (mp->m_flags & M_PKTHDR) 1895 m->m_pkthdr.len -= (req_len - len); 1896 } else { 1897 /* 1898 * Trim from tail. Scan the mbuf chain, 1899 * calculating its length and finding the last mbuf. 1900 * If the adjustment only affects this mbuf, then just 1901 * adjust and return. Otherwise, rescan and truncate 1902 * after the remaining size. 1903 */ 1904 len = -len; 1905 count = 0; 1906 for (;;) { 1907 count += m->m_len; 1908 if (m->m_next == NULL) 1909 break; 1910 m = m->m_next; 1911 } 1912 if (m->m_len >= len) { 1913 m->m_len -= len; 1914 if (mp->m_flags & M_PKTHDR) 1915 mp->m_pkthdr.len -= len; 1916 return; 1917 } 1918 count -= len; 1919 if (count < 0) 1920 count = 0; 1921 /* 1922 * Correct length for chain is "count". 1923 * Find the mbuf with last data, adjust its length, 1924 * and toss data from remaining mbufs on chain. 1925 */ 1926 m = mp; 1927 if (m->m_flags & M_PKTHDR) 1928 m->m_pkthdr.len = count; 1929 for (; m; m = m->m_next) { 1930 if (m->m_len >= count) { 1931 m->m_len = count; 1932 break; 1933 } 1934 count -= m->m_len; 1935 } 1936 while (m->m_next) 1937 (m = m->m_next) ->m_len = 0; 1938 } 1939 } 1940 1941 /* 1942 * Set the m_data pointer of a newly-allocated mbuf 1943 * to place an object of the specified size at the 1944 * end of the mbuf, longword aligned. 1945 */ 1946 void 1947 m_align(struct mbuf *m, int len) 1948 { 1949 int adjust; 1950 1951 if (m->m_flags & M_EXT) 1952 adjust = m->m_ext.ext_size - len; 1953 else if (m->m_flags & M_PKTHDR) 1954 adjust = MHLEN - len; 1955 else 1956 adjust = MLEN - len; 1957 m->m_data += adjust &~ (sizeof(long)-1); 1958 } 1959 1960 /* 1961 * Create a writable copy of the mbuf chain. While doing this 1962 * we compact the chain with a goal of producing a chain with 1963 * at most two mbufs. The second mbuf in this chain is likely 1964 * to be a cluster. The primary purpose of this work is to create 1965 * a writable packet for encryption, compression, etc. The 1966 * secondary goal is to linearize the data so the data can be 1967 * passed to crypto hardware in the most efficient manner possible. 1968 */ 1969 struct mbuf * 1970 m_unshare(struct mbuf *m0, int how) 1971 { 1972 struct mbuf *m, *mprev; 1973 struct mbuf *n, *mfirst, *mlast; 1974 int len, off; 1975 1976 mprev = NULL; 1977 for (m = m0; m != NULL; m = mprev->m_next) { 1978 /* 1979 * Regular mbufs are ignored unless there's a cluster 1980 * in front of it that we can use to coalesce. We do 1981 * the latter mainly so later clusters can be coalesced 1982 * also w/o having to handle them specially (i.e. convert 1983 * mbuf+cluster -> cluster). This optimization is heavily 1984 * influenced by the assumption that we're running over 1985 * Ethernet where MCLBYTES is large enough that the max 1986 * packet size will permit lots of coalescing into a 1987 * single cluster. This in turn permits efficient 1988 * crypto operations, especially when using hardware. 1989 */ 1990 if ((m->m_flags & M_EXT) == 0) { 1991 if (mprev && (mprev->m_flags & M_EXT) && 1992 m->m_len <= M_TRAILINGSPACE(mprev)) { 1993 /* XXX: this ignores mbuf types */ 1994 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 1995 mtod(m, caddr_t), m->m_len); 1996 mprev->m_len += m->m_len; 1997 mprev->m_next = m->m_next; /* unlink from chain */ 1998 m_free(m); /* reclaim mbuf */ 1999 } else { 2000 mprev = m; 2001 } 2002 continue; 2003 } 2004 /* 2005 * Writable mbufs are left alone (for now). 2006 */ 2007 if (M_WRITABLE(m)) { 2008 mprev = m; 2009 continue; 2010 } 2011 2012 /* 2013 * Not writable, replace with a copy or coalesce with 2014 * the previous mbuf if possible (since we have to copy 2015 * it anyway, we try to reduce the number of mbufs and 2016 * clusters so that future work is easier). 2017 */ 2018 KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags)); 2019 /* NB: we only coalesce into a cluster or larger */ 2020 if (mprev != NULL && (mprev->m_flags & M_EXT) && 2021 m->m_len <= M_TRAILINGSPACE(mprev)) { 2022 /* XXX: this ignores mbuf types */ 2023 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 2024 mtod(m, caddr_t), m->m_len); 2025 mprev->m_len += m->m_len; 2026 mprev->m_next = m->m_next; /* unlink from chain */ 2027 m_free(m); /* reclaim mbuf */ 2028 continue; 2029 } 2030 2031 /* 2032 * Allocate new space to hold the copy... 2033 */ 2034 /* XXX why can M_PKTHDR be set past the first mbuf? */ 2035 if (mprev == NULL && (m->m_flags & M_PKTHDR)) { 2036 /* 2037 * NB: if a packet header is present we must 2038 * allocate the mbuf separately from any cluster 2039 * because M_MOVE_PKTHDR will smash the data 2040 * pointer and drop the M_EXT marker. 2041 */ 2042 MGETHDR(n, how, m->m_type); 2043 if (n == NULL) { 2044 m_freem(m0); 2045 return (NULL); 2046 } 2047 M_MOVE_PKTHDR(n, m); 2048 MCLGET(n, how); 2049 if ((n->m_flags & M_EXT) == 0) { 2050 m_free(n); 2051 m_freem(m0); 2052 return (NULL); 2053 } 2054 } else { 2055 n = m_getcl(how, m->m_type, m->m_flags); 2056 if (n == NULL) { 2057 m_freem(m0); 2058 return (NULL); 2059 } 2060 } 2061 /* 2062 * ... and copy the data. We deal with jumbo mbufs 2063 * (i.e. m_len > MCLBYTES) by splitting them into 2064 * clusters. We could just malloc a buffer and make 2065 * it external but too many device drivers don't know 2066 * how to break up the non-contiguous memory when 2067 * doing DMA. 2068 */ 2069 len = m->m_len; 2070 off = 0; 2071 mfirst = n; 2072 mlast = NULL; 2073 for (;;) { 2074 int cc = min(len, MCLBYTES); 2075 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); 2076 n->m_len = cc; 2077 if (mlast != NULL) 2078 mlast->m_next = n; 2079 mlast = n; 2080 2081 len -= cc; 2082 if (len <= 0) 2083 break; 2084 off += cc; 2085 2086 n = m_getcl(how, m->m_type, m->m_flags); 2087 if (n == NULL) { 2088 m_freem(mfirst); 2089 m_freem(m0); 2090 return (NULL); 2091 } 2092 } 2093 n->m_next = m->m_next; 2094 if (mprev == NULL) 2095 m0 = mfirst; /* new head of chain */ 2096 else 2097 mprev->m_next = mfirst; /* replace old mbuf */ 2098 m_free(m); /* release old mbuf */ 2099 mprev = mfirst; 2100 } 2101 return (m0); 2102 } 2103 2104 /* 2105 * Rearrange an mbuf chain so that len bytes are contiguous 2106 * and in the data area of an mbuf (so that mtod will work for a structure 2107 * of size len). Returns the resulting mbuf chain on success, frees it and 2108 * returns null on failure. If there is room, it will add up to 2109 * max_protohdr-len extra bytes to the contiguous region in an attempt to 2110 * avoid being called next time. 2111 */ 2112 struct mbuf * 2113 m_pullup(struct mbuf *n, int len) 2114 { 2115 struct mbuf *m; 2116 int count; 2117 int space; 2118 2119 /* 2120 * If first mbuf has no cluster, and has room for len bytes 2121 * without shifting current data, pullup into it, 2122 * otherwise allocate a new mbuf to prepend to the chain. 2123 */ 2124 if (!(n->m_flags & M_EXT) && 2125 n->m_data + len < &n->m_dat[MLEN] && 2126 n->m_next) { 2127 if (n->m_len >= len) 2128 return (n); 2129 m = n; 2130 n = n->m_next; 2131 len -= m->m_len; 2132 } else { 2133 if (len > MHLEN) 2134 goto bad; 2135 if (n->m_flags & M_PKTHDR) 2136 m = m_gethdr(M_NOWAIT, n->m_type); 2137 else 2138 m = m_get(M_NOWAIT, n->m_type); 2139 if (m == NULL) 2140 goto bad; 2141 m->m_len = 0; 2142 if (n->m_flags & M_PKTHDR) 2143 M_MOVE_PKTHDR(m, n); 2144 } 2145 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 2146 do { 2147 count = min(min(max(len, max_protohdr), space), n->m_len); 2148 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 2149 (unsigned)count); 2150 len -= count; 2151 m->m_len += count; 2152 n->m_len -= count; 2153 space -= count; 2154 if (n->m_len) 2155 n->m_data += count; 2156 else 2157 n = m_free(n); 2158 } while (len > 0 && n); 2159 if (len > 0) { 2160 m_free(m); 2161 goto bad; 2162 } 2163 m->m_next = n; 2164 return (m); 2165 bad: 2166 m_freem(n); 2167 ++mbstat[mycpu->gd_cpuid].m_mcfail; 2168 return (NULL); 2169 } 2170 2171 /* 2172 * Partition an mbuf chain in two pieces, returning the tail -- 2173 * all but the first len0 bytes. In case of failure, it returns NULL and 2174 * attempts to restore the chain to its original state. 2175 * 2176 * Note that the resulting mbufs might be read-only, because the new 2177 * mbuf can end up sharing an mbuf cluster with the original mbuf if 2178 * the "breaking point" happens to lie within a cluster mbuf. Use the 2179 * M_WRITABLE() macro to check for this case. 2180 */ 2181 struct mbuf * 2182 m_split(struct mbuf *m0, int len0, int wait) 2183 { 2184 struct mbuf *m, *n; 2185 unsigned len = len0, remain; 2186 2187 for (m = m0; m && len > m->m_len; m = m->m_next) 2188 len -= m->m_len; 2189 if (m == NULL) 2190 return (NULL); 2191 remain = m->m_len - len; 2192 if (m0->m_flags & M_PKTHDR) { 2193 n = m_gethdr(wait, m0->m_type); 2194 if (n == NULL) 2195 return (NULL); 2196 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 2197 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 2198 m0->m_pkthdr.len = len0; 2199 if (m->m_flags & M_EXT) 2200 goto extpacket; 2201 if (remain > MHLEN) { 2202 /* m can't be the lead packet */ 2203 MH_ALIGN(n, 0); 2204 n->m_next = m_split(m, len, wait); 2205 if (n->m_next == NULL) { 2206 m_free(n); 2207 return (NULL); 2208 } else { 2209 n->m_len = 0; 2210 return (n); 2211 } 2212 } else 2213 MH_ALIGN(n, remain); 2214 } else if (remain == 0) { 2215 n = m->m_next; 2216 m->m_next = NULL; 2217 return (n); 2218 } else { 2219 n = m_get(wait, m->m_type); 2220 if (n == NULL) 2221 return (NULL); 2222 M_ALIGN(n, remain); 2223 } 2224 extpacket: 2225 if (m->m_flags & M_EXT) { 2226 KKASSERT((n->m_flags & M_EXT) == 0); 2227 n->m_data = m->m_data + len; 2228 m->m_ext.ext_ref(m->m_ext.ext_arg); 2229 n->m_ext = m->m_ext; 2230 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 2231 } else { 2232 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 2233 } 2234 n->m_len = remain; 2235 m->m_len = len; 2236 n->m_next = m->m_next; 2237 m->m_next = NULL; 2238 return (n); 2239 } 2240 2241 /* 2242 * Routine to copy from device local memory into mbufs. 2243 * Note: "offset" is ill-defined and always called as 0, so ignore it. 2244 */ 2245 struct mbuf * 2246 m_devget(char *buf, int len, int offset, struct ifnet *ifp) 2247 { 2248 struct mbuf *m, *mfirst = NULL, **mtail; 2249 int nsize, flags; 2250 2251 mtail = &mfirst; 2252 flags = M_PKTHDR; 2253 2254 while (len > 0) { 2255 m = m_getl(len, M_NOWAIT, MT_DATA, flags, &nsize); 2256 if (m == NULL) { 2257 m_freem(mfirst); 2258 return (NULL); 2259 } 2260 m->m_len = min(len, nsize); 2261 2262 if (flags & M_PKTHDR) { 2263 if (len + max_linkhdr <= nsize) 2264 m->m_data += max_linkhdr; 2265 m->m_pkthdr.rcvif = ifp; 2266 m->m_pkthdr.len = len; 2267 flags = 0; 2268 } 2269 2270 bcopy(buf, m->m_data, (unsigned)m->m_len); 2271 buf += m->m_len; 2272 len -= m->m_len; 2273 *mtail = m; 2274 mtail = &m->m_next; 2275 } 2276 2277 return (mfirst); 2278 } 2279 2280 /* 2281 * Routine to pad mbuf to the specified length 'padto'. 2282 */ 2283 int 2284 m_devpad(struct mbuf *m, int padto) 2285 { 2286 struct mbuf *last = NULL; 2287 int padlen; 2288 2289 if (padto <= m->m_pkthdr.len) 2290 return 0; 2291 2292 padlen = padto - m->m_pkthdr.len; 2293 2294 /* if there's only the packet-header and we can pad there, use it. */ 2295 if (m->m_pkthdr.len == m->m_len && M_TRAILINGSPACE(m) >= padlen) { 2296 last = m; 2297 } else { 2298 /* 2299 * Walk packet chain to find last mbuf. We will either 2300 * pad there, or append a new mbuf and pad it 2301 */ 2302 for (last = m; last->m_next != NULL; last = last->m_next) 2303 ; /* EMPTY */ 2304 2305 /* `last' now points to last in chain. */ 2306 if (M_TRAILINGSPACE(last) < padlen) { 2307 struct mbuf *n; 2308 2309 /* Allocate new empty mbuf, pad it. Compact later. */ 2310 MGET(n, M_NOWAIT, MT_DATA); 2311 if (n == NULL) 2312 return ENOBUFS; 2313 n->m_len = 0; 2314 last->m_next = n; 2315 last = n; 2316 } 2317 } 2318 KKASSERT(M_TRAILINGSPACE(last) >= padlen); 2319 KKASSERT(M_WRITABLE(last)); 2320 2321 /* Now zero the pad area */ 2322 bzero(mtod(last, char *) + last->m_len, padlen); 2323 last->m_len += padlen; 2324 m->m_pkthdr.len += padlen; 2325 return 0; 2326 } 2327 2328 /* 2329 * Copy data from a buffer back into the indicated mbuf chain, 2330 * starting "off" bytes from the beginning, extending the mbuf 2331 * chain if necessary. 2332 */ 2333 void 2334 m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 2335 { 2336 int mlen; 2337 struct mbuf *m = m0, *n; 2338 int totlen = 0; 2339 2340 if (m0 == NULL) 2341 return; 2342 while (off > (mlen = m->m_len)) { 2343 off -= mlen; 2344 totlen += mlen; 2345 if (m->m_next == NULL) { 2346 n = m_getclr(M_NOWAIT, m->m_type); 2347 if (n == NULL) 2348 goto out; 2349 n->m_len = min(MLEN, len + off); 2350 m->m_next = n; 2351 } 2352 m = m->m_next; 2353 } 2354 while (len > 0) { 2355 mlen = min (m->m_len - off, len); 2356 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 2357 cp += mlen; 2358 len -= mlen; 2359 mlen += off; 2360 off = 0; 2361 totlen += mlen; 2362 if (len == 0) 2363 break; 2364 if (m->m_next == NULL) { 2365 n = m_get(M_NOWAIT, m->m_type); 2366 if (n == NULL) 2367 break; 2368 n->m_len = min(MLEN, len); 2369 m->m_next = n; 2370 } 2371 m = m->m_next; 2372 } 2373 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 2374 m->m_pkthdr.len = totlen; 2375 } 2376 2377 /* 2378 * Append the specified data to the indicated mbuf chain, 2379 * Extend the mbuf chain if the new data does not fit in 2380 * existing space. 2381 * 2382 * Return 1 if able to complete the job; otherwise 0. 2383 */ 2384 int 2385 m_append(struct mbuf *m0, int len, c_caddr_t cp) 2386 { 2387 struct mbuf *m, *n; 2388 int remainder, space; 2389 2390 for (m = m0; m->m_next != NULL; m = m->m_next) 2391 ; 2392 remainder = len; 2393 space = M_TRAILINGSPACE(m); 2394 if (space > 0) { 2395 /* 2396 * Copy into available space. 2397 */ 2398 if (space > remainder) 2399 space = remainder; 2400 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 2401 m->m_len += space; 2402 cp += space, remainder -= space; 2403 } 2404 while (remainder > 0) { 2405 /* 2406 * Allocate a new mbuf; could check space 2407 * and allocate a cluster instead. 2408 */ 2409 n = m_get(M_NOWAIT, m->m_type); 2410 if (n == NULL) 2411 break; 2412 n->m_len = min(MLEN, remainder); 2413 bcopy(cp, mtod(n, caddr_t), n->m_len); 2414 cp += n->m_len, remainder -= n->m_len; 2415 m->m_next = n; 2416 m = n; 2417 } 2418 if (m0->m_flags & M_PKTHDR) 2419 m0->m_pkthdr.len += len - remainder; 2420 return (remainder == 0); 2421 } 2422 2423 /* 2424 * Apply function f to the data in an mbuf chain starting "off" bytes from 2425 * the beginning, continuing for "len" bytes. 2426 */ 2427 int 2428 m_apply(struct mbuf *m, int off, int len, 2429 int (*f)(void *, void *, u_int), void *arg) 2430 { 2431 u_int count; 2432 int rval; 2433 2434 KASSERT(off >= 0, ("m_apply, negative off %d", off)); 2435 KASSERT(len >= 0, ("m_apply, negative len %d", len)); 2436 while (off > 0) { 2437 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); 2438 if (off < m->m_len) 2439 break; 2440 off -= m->m_len; 2441 m = m->m_next; 2442 } 2443 while (len > 0) { 2444 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); 2445 count = min(m->m_len - off, len); 2446 rval = (*f)(arg, mtod(m, caddr_t) + off, count); 2447 if (rval) 2448 return (rval); 2449 len -= count; 2450 off = 0; 2451 m = m->m_next; 2452 } 2453 return (0); 2454 } 2455 2456 /* 2457 * Return a pointer to mbuf/offset of location in mbuf chain. 2458 */ 2459 struct mbuf * 2460 m_getptr(struct mbuf *m, int loc, int *off) 2461 { 2462 2463 while (loc >= 0) { 2464 /* Normal end of search. */ 2465 if (m->m_len > loc) { 2466 *off = loc; 2467 return (m); 2468 } else { 2469 loc -= m->m_len; 2470 if (m->m_next == NULL) { 2471 if (loc == 0) { 2472 /* Point at the end of valid data. */ 2473 *off = m->m_len; 2474 return (m); 2475 } 2476 return (NULL); 2477 } 2478 m = m->m_next; 2479 } 2480 } 2481 return (NULL); 2482 } 2483 2484 void 2485 m_print(const struct mbuf *m) 2486 { 2487 int len; 2488 const struct mbuf *m2; 2489 char *hexstr; 2490 2491 len = m->m_pkthdr.len; 2492 m2 = m; 2493 hexstr = kmalloc(HEX_NCPYLEN(len), M_TEMP, M_ZERO | M_WAITOK); 2494 while (len) { 2495 kprintf("%p %s\n", m2, hexncpy(m2->m_data, m2->m_len, hexstr, 2496 HEX_NCPYLEN(m2->m_len), "-")); 2497 len -= m2->m_len; 2498 m2 = m2->m_next; 2499 } 2500 kfree(hexstr, M_TEMP); 2501 return; 2502 } 2503 2504 /* 2505 * "Move" mbuf pkthdr from "from" to "to". 2506 * "from" must have M_PKTHDR set, and "to" must be empty. 2507 */ 2508 void 2509 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 2510 { 2511 KASSERT((to->m_flags & M_PKTHDR), ("m_move_pkthdr: not packet header")); 2512 2513 to->m_flags |= from->m_flags & M_COPYFLAGS; 2514 to->m_pkthdr = from->m_pkthdr; /* especially tags */ 2515 SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ 2516 } 2517 2518 /* 2519 * Duplicate "from"'s mbuf pkthdr in "to". 2520 * "from" must have M_PKTHDR set, and "to" must be empty. 2521 * In particular, this does a deep copy of the packet tags. 2522 */ 2523 int 2524 m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how) 2525 { 2526 KASSERT((to->m_flags & M_PKTHDR), ("m_dup_pkthdr: not packet header")); 2527 2528 to->m_flags = (from->m_flags & M_COPYFLAGS) | 2529 (to->m_flags & ~M_COPYFLAGS); 2530 to->m_pkthdr = from->m_pkthdr; 2531 SLIST_INIT(&to->m_pkthdr.tags); 2532 return (m_tag_copy_chain(to, from, how)); 2533 } 2534 2535 /* 2536 * Defragment a mbuf chain, returning the shortest possible 2537 * chain of mbufs and clusters. If allocation fails and 2538 * this cannot be completed, NULL will be returned, but 2539 * the passed in chain will be unchanged. Upon success, 2540 * the original chain will be freed, and the new chain 2541 * will be returned. 2542 * 2543 * If a non-packet header is passed in, the original 2544 * mbuf (chain?) will be returned unharmed. 2545 * 2546 * m_defrag_nofree doesn't free the passed in mbuf. 2547 */ 2548 struct mbuf * 2549 m_defrag(struct mbuf *m0, int how) 2550 { 2551 struct mbuf *m_new; 2552 2553 if ((m_new = m_defrag_nofree(m0, how)) == NULL) 2554 return (NULL); 2555 if (m_new != m0) 2556 m_freem(m0); 2557 return (m_new); 2558 } 2559 2560 struct mbuf * 2561 m_defrag_nofree(struct mbuf *m0, int how) 2562 { 2563 struct mbuf *m_new = NULL, *m_final = NULL; 2564 int progress = 0, length, nsize; 2565 2566 if (!(m0->m_flags & M_PKTHDR)) 2567 return (m0); 2568 2569 #ifdef MBUF_STRESS_TEST 2570 if (m_defragrandomfailures) { 2571 int temp = karc4random() & 0xff; 2572 if (temp == 0xba) 2573 goto nospace; 2574 } 2575 #endif 2576 2577 m_final = m_getl(m0->m_pkthdr.len, how, MT_DATA, M_PKTHDR, &nsize); 2578 if (m_final == NULL) 2579 goto nospace; 2580 m_final->m_len = 0; /* in case m0->m_pkthdr.len is zero */ 2581 2582 if (m_dup_pkthdr(m_final, m0, how) == 0) 2583 goto nospace; 2584 2585 m_new = m_final; 2586 2587 while (progress < m0->m_pkthdr.len) { 2588 length = m0->m_pkthdr.len - progress; 2589 if (length > MCLBYTES) 2590 length = MCLBYTES; 2591 2592 if (m_new == NULL) { 2593 m_new = m_getl(length, how, MT_DATA, 0, &nsize); 2594 if (m_new == NULL) 2595 goto nospace; 2596 } 2597 2598 m_copydata(m0, progress, length, mtod(m_new, caddr_t)); 2599 progress += length; 2600 m_new->m_len = length; 2601 if (m_new != m_final) 2602 m_cat(m_final, m_new); 2603 m_new = NULL; 2604 } 2605 if (m0->m_next == NULL) 2606 m_defraguseless++; 2607 m_defragpackets++; 2608 m_defragbytes += m_final->m_pkthdr.len; 2609 return (m_final); 2610 nospace: 2611 m_defragfailure++; 2612 if (m_new) 2613 m_free(m_new); 2614 m_freem(m_final); 2615 return (NULL); 2616 } 2617 2618 /* 2619 * Move data from uio into mbufs. 2620 */ 2621 struct mbuf * 2622 m_uiomove(struct uio *uio) 2623 { 2624 struct mbuf *m; /* current working mbuf */ 2625 struct mbuf *head = NULL; /* result mbuf chain */ 2626 struct mbuf **mp = &head; 2627 int flags = M_PKTHDR; 2628 int nsize; 2629 int error; 2630 int resid; 2631 2632 do { 2633 if (uio->uio_resid > INT_MAX) 2634 resid = INT_MAX; 2635 else 2636 resid = (int)uio->uio_resid; 2637 m = m_getl(resid, M_WAITOK, MT_DATA, flags, &nsize); 2638 if (flags) { 2639 m->m_pkthdr.len = 0; 2640 /* Leave room for protocol headers. */ 2641 if (resid < MHLEN) 2642 MH_ALIGN(m, resid); 2643 flags = 0; 2644 } 2645 m->m_len = imin(nsize, resid); 2646 error = uiomove(mtod(m, caddr_t), m->m_len, uio); 2647 if (error) { 2648 m_free(m); 2649 goto failed; 2650 } 2651 *mp = m; 2652 mp = &m->m_next; 2653 head->m_pkthdr.len += m->m_len; 2654 } while (uio->uio_resid > 0); 2655 2656 return (head); 2657 2658 failed: 2659 m_freem(head); 2660 return (NULL); 2661 } 2662 2663 struct mbuf * 2664 m_last(struct mbuf *m) 2665 { 2666 while (m->m_next) 2667 m = m->m_next; 2668 return (m); 2669 } 2670 2671 /* 2672 * Return the number of bytes in an mbuf chain. 2673 * If lastm is not NULL, also return the last mbuf. 2674 */ 2675 u_int 2676 m_lengthm(struct mbuf *m, struct mbuf **lastm) 2677 { 2678 u_int len = 0; 2679 struct mbuf *prev = m; 2680 2681 while (m) { 2682 len += m->m_len; 2683 prev = m; 2684 m = m->m_next; 2685 } 2686 if (lastm != NULL) 2687 *lastm = prev; 2688 return (len); 2689 } 2690 2691 /* 2692 * Like m_lengthm(), except also keep track of mbuf usage. 2693 */ 2694 u_int 2695 m_countm(struct mbuf *m, struct mbuf **lastm, u_int *pmbcnt) 2696 { 2697 u_int len = 0, mbcnt = 0; 2698 struct mbuf *prev = m; 2699 2700 while (m) { 2701 len += m->m_len; 2702 mbcnt += MSIZE; 2703 if (m->m_flags & M_EXT) 2704 mbcnt += m->m_ext.ext_size; 2705 prev = m; 2706 m = m->m_next; 2707 } 2708 if (lastm != NULL) 2709 *lastm = prev; 2710 *pmbcnt = mbcnt; 2711 return (len); 2712 } 2713