1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 5 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Jeffrey M. Hsu. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 1982, 1986, 1988, 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the University of 51 * California, Berkeley and its contributors. 52 * 4. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 69 * $FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.51.2.24 2003/04/15 06:59:29 silby Exp $ 70 * $DragonFly: src/sys/kern/uipc_mbuf.c,v 1.70 2008/11/20 14:21:01 sephe Exp $ 71 */ 72 73 #include "opt_param.h" 74 #include "opt_mbuf_stress_test.h" 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/malloc.h> 78 #include <sys/mbuf.h> 79 #include <sys/kernel.h> 80 #include <sys/sysctl.h> 81 #include <sys/domain.h> 82 #include <sys/objcache.h> 83 #include <sys/tree.h> 84 #include <sys/protosw.h> 85 #include <sys/uio.h> 86 #include <sys/thread.h> 87 #include <sys/globaldata.h> 88 89 #include <sys/thread2.h> 90 #include <sys/spinlock2.h> 91 92 #include <machine/atomic.h> 93 #include <machine/limits.h> 94 95 #include <vm/vm.h> 96 #include <vm/vm_kern.h> 97 #include <vm/vm_extern.h> 98 99 #ifdef INVARIANTS 100 #include <machine/cpu.h> 101 #endif 102 103 /* 104 * mbuf cluster meta-data 105 */ 106 struct mbcluster { 107 int32_t mcl_refs; 108 void *mcl_data; 109 }; 110 111 /* 112 * mbuf tracking for debugging purposes 113 */ 114 #ifdef MBUF_DEBUG 115 116 static MALLOC_DEFINE(M_MTRACK, "mtrack", "mtrack"); 117 118 struct mbctrack; 119 RB_HEAD(mbuf_rb_tree, mbtrack); 120 RB_PROTOTYPE2(mbuf_rb_tree, mbtrack, rb_node, mbtrack_cmp, struct mbuf *); 121 122 struct mbtrack { 123 RB_ENTRY(mbtrack) rb_node; 124 int trackid; 125 struct mbuf *m; 126 }; 127 128 static int 129 mbtrack_cmp(struct mbtrack *mb1, struct mbtrack *mb2) 130 { 131 if (mb1->m < mb2->m) 132 return(-1); 133 if (mb1->m > mb2->m) 134 return(1); 135 return(0); 136 } 137 138 RB_GENERATE2(mbuf_rb_tree, mbtrack, rb_node, mbtrack_cmp, struct mbuf *, m); 139 140 struct mbuf_rb_tree mbuf_track_root; 141 static struct spinlock mbuf_track_spin = SPINLOCK_INITIALIZER(mbuf_track_spin); 142 143 static void 144 mbuftrack(struct mbuf *m) 145 { 146 struct mbtrack *mbt; 147 148 mbt = kmalloc(sizeof(*mbt), M_MTRACK, M_INTWAIT|M_ZERO); 149 spin_lock(&mbuf_track_spin); 150 mbt->m = m; 151 if (mbuf_rb_tree_RB_INSERT(&mbuf_track_root, mbt)) { 152 spin_unlock(&mbuf_track_spin); 153 panic("mbuftrack: mbuf %p already being tracked\n", m); 154 } 155 spin_unlock(&mbuf_track_spin); 156 } 157 158 static void 159 mbufuntrack(struct mbuf *m) 160 { 161 struct mbtrack *mbt; 162 163 spin_lock(&mbuf_track_spin); 164 mbt = mbuf_rb_tree_RB_LOOKUP(&mbuf_track_root, m); 165 if (mbt == NULL) { 166 spin_unlock(&mbuf_track_spin); 167 panic("mbufuntrack: mbuf %p was not tracked\n", m); 168 } else { 169 mbuf_rb_tree_RB_REMOVE(&mbuf_track_root, mbt); 170 spin_unlock(&mbuf_track_spin); 171 kfree(mbt, M_MTRACK); 172 } 173 } 174 175 void 176 mbuftrackid(struct mbuf *m, int trackid) 177 { 178 struct mbtrack *mbt; 179 struct mbuf *n; 180 181 spin_lock(&mbuf_track_spin); 182 while (m) { 183 n = m->m_nextpkt; 184 while (m) { 185 mbt = mbuf_rb_tree_RB_LOOKUP(&mbuf_track_root, m); 186 if (mbt == NULL) { 187 spin_unlock(&mbuf_track_spin); 188 panic("mbuftrackid: mbuf %p not tracked", m); 189 } 190 mbt->trackid = trackid; 191 m = m->m_next; 192 } 193 m = n; 194 } 195 spin_unlock(&mbuf_track_spin); 196 } 197 198 static int 199 mbuftrack_callback(struct mbtrack *mbt, void *arg) 200 { 201 struct sysctl_req *req = arg; 202 char buf[64]; 203 int error; 204 205 ksnprintf(buf, sizeof(buf), "mbuf %p track %d\n", mbt->m, mbt->trackid); 206 207 spin_unlock(&mbuf_track_spin); 208 error = SYSCTL_OUT(req, buf, strlen(buf)); 209 spin_lock(&mbuf_track_spin); 210 if (error) 211 return(-error); 212 return(0); 213 } 214 215 static int 216 mbuftrack_show(SYSCTL_HANDLER_ARGS) 217 { 218 int error; 219 220 spin_lock(&mbuf_track_spin); 221 error = mbuf_rb_tree_RB_SCAN(&mbuf_track_root, NULL, 222 mbuftrack_callback, req); 223 spin_unlock(&mbuf_track_spin); 224 return (-error); 225 } 226 SYSCTL_PROC(_kern_ipc, OID_AUTO, showmbufs, CTLFLAG_RD|CTLTYPE_STRING, 227 0, 0, mbuftrack_show, "A", "Show all in-use mbufs"); 228 229 #else 230 231 #define mbuftrack(m) 232 #define mbufuntrack(m) 233 234 #endif 235 236 static void mbinit(void *); 237 SYSINIT(mbuf, SI_BOOT2_MACHDEP, SI_ORDER_FIRST, mbinit, NULL) 238 239 static u_long mbtypes[SMP_MAXCPU][MT_NTYPES]; 240 241 static struct mbstat mbstat[SMP_MAXCPU]; 242 int max_linkhdr; 243 int max_protohdr; 244 int max_hdr; 245 int max_datalen; 246 int m_defragpackets; 247 int m_defragbytes; 248 int m_defraguseless; 249 int m_defragfailure; 250 #ifdef MBUF_STRESS_TEST 251 int m_defragrandomfailures; 252 #endif 253 254 struct objcache *mbuf_cache, *mbufphdr_cache; 255 struct objcache *mclmeta_cache; 256 struct objcache *mbufcluster_cache, *mbufphdrcluster_cache; 257 258 int nmbclusters; 259 int nmbufs; 260 261 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 262 &max_linkhdr, 0, ""); 263 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 264 &max_protohdr, 0, ""); 265 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 266 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 267 &max_datalen, 0, ""); 268 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 269 &mbuf_wait, 0, ""); 270 static int do_mbstat(SYSCTL_HANDLER_ARGS); 271 272 SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat, CTLTYPE_STRUCT|CTLFLAG_RD, 273 0, 0, do_mbstat, "S,mbstat", ""); 274 275 static int do_mbtypes(SYSCTL_HANDLER_ARGS); 276 277 SYSCTL_PROC(_kern_ipc, OID_AUTO, mbtypes, CTLTYPE_ULONG|CTLFLAG_RD, 278 0, 0, do_mbtypes, "LU", ""); 279 280 static int 281 do_mbstat(SYSCTL_HANDLER_ARGS) 282 { 283 struct mbstat mbstat_total; 284 struct mbstat *mbstat_totalp; 285 int i; 286 287 bzero(&mbstat_total, sizeof(mbstat_total)); 288 mbstat_totalp = &mbstat_total; 289 290 for (i = 0; i < ncpus; i++) 291 { 292 mbstat_total.m_mbufs += mbstat[i].m_mbufs; 293 mbstat_total.m_clusters += mbstat[i].m_clusters; 294 mbstat_total.m_spare += mbstat[i].m_spare; 295 mbstat_total.m_clfree += mbstat[i].m_clfree; 296 mbstat_total.m_drops += mbstat[i].m_drops; 297 mbstat_total.m_wait += mbstat[i].m_wait; 298 mbstat_total.m_drain += mbstat[i].m_drain; 299 mbstat_total.m_mcfail += mbstat[i].m_mcfail; 300 mbstat_total.m_mpfail += mbstat[i].m_mpfail; 301 302 } 303 /* 304 * The following fields are not cumulative fields so just 305 * get their values once. 306 */ 307 mbstat_total.m_msize = mbstat[0].m_msize; 308 mbstat_total.m_mclbytes = mbstat[0].m_mclbytes; 309 mbstat_total.m_minclsize = mbstat[0].m_minclsize; 310 mbstat_total.m_mlen = mbstat[0].m_mlen; 311 mbstat_total.m_mhlen = mbstat[0].m_mhlen; 312 313 return(sysctl_handle_opaque(oidp, mbstat_totalp, sizeof(mbstat_total), req)); 314 } 315 316 static int 317 do_mbtypes(SYSCTL_HANDLER_ARGS) 318 { 319 u_long totals[MT_NTYPES]; 320 int i, j; 321 322 for (i = 0; i < MT_NTYPES; i++) 323 totals[i] = 0; 324 325 for (i = 0; i < ncpus; i++) 326 { 327 for (j = 0; j < MT_NTYPES; j++) 328 totals[j] += mbtypes[i][j]; 329 } 330 331 return(sysctl_handle_opaque(oidp, totals, sizeof(totals), req)); 332 } 333 334 /* 335 * These are read-only because we do not currently have any code 336 * to adjust the objcache limits after the fact. The variables 337 * may only be set as boot-time tunables. 338 */ 339 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 340 &nmbclusters, 0, "Maximum number of mbuf clusters available"); 341 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 342 "Maximum number of mbufs available"); 343 344 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD, 345 &m_defragpackets, 0, ""); 346 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD, 347 &m_defragbytes, 0, ""); 348 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD, 349 &m_defraguseless, 0, ""); 350 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD, 351 &m_defragfailure, 0, ""); 352 #ifdef MBUF_STRESS_TEST 353 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, 354 &m_defragrandomfailures, 0, ""); 355 #endif 356 357 static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 358 static MALLOC_DEFINE(M_MBUFCL, "mbufcl", "mbufcl"); 359 static MALLOC_DEFINE(M_MCLMETA, "mclmeta", "mclmeta"); 360 361 static void m_reclaim (void); 362 static void m_mclref(void *arg); 363 static void m_mclfree(void *arg); 364 365 #ifndef NMBCLUSTERS 366 #define NMBCLUSTERS (512 + maxusers * 16) 367 #endif 368 #ifndef NMBUFS 369 #define NMBUFS (nmbclusters * 2) 370 #endif 371 372 /* 373 * Perform sanity checks of tunables declared above. 374 */ 375 static void 376 tunable_mbinit(void *dummy) 377 { 378 /* 379 * This has to be done before VM init. 380 */ 381 nmbclusters = NMBCLUSTERS; 382 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 383 nmbufs = NMBUFS; 384 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 385 /* Sanity checks */ 386 if (nmbufs < nmbclusters * 2) 387 nmbufs = nmbclusters * 2; 388 } 389 SYSINIT(tunable_mbinit, SI_BOOT1_TUNABLES, SI_ORDER_ANY, 390 tunable_mbinit, NULL); 391 392 /* "number of clusters of pages" */ 393 #define NCL_INIT 1 394 395 #define NMB_INIT 16 396 397 /* 398 * The mbuf object cache only guarantees that m_next and m_nextpkt are 399 * NULL and that m_data points to the beginning of the data area. In 400 * particular, m_len and m_pkthdr.len are uninitialized. It is the 401 * responsibility of the caller to initialize those fields before use. 402 */ 403 404 static boolean_t __inline 405 mbuf_ctor(void *obj, void *private, int ocflags) 406 { 407 struct mbuf *m = obj; 408 409 m->m_next = NULL; 410 m->m_nextpkt = NULL; 411 m->m_data = m->m_dat; 412 m->m_flags = 0; 413 414 return (TRUE); 415 } 416 417 /* 418 * Initialize the mbuf and the packet header fields. 419 */ 420 static boolean_t 421 mbufphdr_ctor(void *obj, void *private, int ocflags) 422 { 423 struct mbuf *m = obj; 424 425 m->m_next = NULL; 426 m->m_nextpkt = NULL; 427 m->m_data = m->m_pktdat; 428 m->m_flags = M_PKTHDR | M_PHCACHE; 429 430 m->m_pkthdr.rcvif = NULL; /* eliminate XXX JH */ 431 SLIST_INIT(&m->m_pkthdr.tags); 432 m->m_pkthdr.csum_flags = 0; /* eliminate XXX JH */ 433 m->m_pkthdr.fw_flags = 0; /* eliminate XXX JH */ 434 435 return (TRUE); 436 } 437 438 /* 439 * A mbcluster object consists of 2K (MCLBYTES) cluster and a refcount. 440 */ 441 static boolean_t 442 mclmeta_ctor(void *obj, void *private, int ocflags) 443 { 444 struct mbcluster *cl = obj; 445 void *buf; 446 447 if (ocflags & M_NOWAIT) 448 buf = kmalloc(MCLBYTES, M_MBUFCL, M_NOWAIT | M_ZERO); 449 else 450 buf = kmalloc(MCLBYTES, M_MBUFCL, M_INTWAIT | M_ZERO); 451 if (buf == NULL) 452 return (FALSE); 453 cl->mcl_refs = 0; 454 cl->mcl_data = buf; 455 return (TRUE); 456 } 457 458 static void 459 mclmeta_dtor(void *obj, void *private) 460 { 461 struct mbcluster *mcl = obj; 462 463 KKASSERT(mcl->mcl_refs == 0); 464 kfree(mcl->mcl_data, M_MBUFCL); 465 } 466 467 static void 468 linkcluster(struct mbuf *m, struct mbcluster *cl) 469 { 470 /* 471 * Add the cluster to the mbuf. The caller will detect that the 472 * mbuf now has an attached cluster. 473 */ 474 m->m_ext.ext_arg = cl; 475 m->m_ext.ext_buf = cl->mcl_data; 476 m->m_ext.ext_ref = m_mclref; 477 m->m_ext.ext_free = m_mclfree; 478 m->m_ext.ext_size = MCLBYTES; 479 atomic_add_int(&cl->mcl_refs, 1); 480 481 m->m_data = m->m_ext.ext_buf; 482 m->m_flags |= M_EXT | M_EXT_CLUSTER; 483 } 484 485 static boolean_t 486 mbufphdrcluster_ctor(void *obj, void *private, int ocflags) 487 { 488 struct mbuf *m = obj; 489 struct mbcluster *cl; 490 491 mbufphdr_ctor(obj, private, ocflags); 492 cl = objcache_get(mclmeta_cache, ocflags); 493 if (cl == NULL) { 494 ++mbstat[mycpu->gd_cpuid].m_drops; 495 return (FALSE); 496 } 497 m->m_flags |= M_CLCACHE; 498 linkcluster(m, cl); 499 return (TRUE); 500 } 501 502 static boolean_t 503 mbufcluster_ctor(void *obj, void *private, int ocflags) 504 { 505 struct mbuf *m = obj; 506 struct mbcluster *cl; 507 508 mbuf_ctor(obj, private, ocflags); 509 cl = objcache_get(mclmeta_cache, ocflags); 510 if (cl == NULL) { 511 ++mbstat[mycpu->gd_cpuid].m_drops; 512 return (FALSE); 513 } 514 m->m_flags |= M_CLCACHE; 515 linkcluster(m, cl); 516 return (TRUE); 517 } 518 519 /* 520 * Used for both the cluster and cluster PHDR caches. 521 * 522 * The mbuf may have lost its cluster due to sharing, deal 523 * with the situation by checking M_EXT. 524 */ 525 static void 526 mbufcluster_dtor(void *obj, void *private) 527 { 528 struct mbuf *m = obj; 529 struct mbcluster *mcl; 530 531 if (m->m_flags & M_EXT) { 532 KKASSERT((m->m_flags & M_EXT_CLUSTER) != 0); 533 mcl = m->m_ext.ext_arg; 534 KKASSERT(mcl->mcl_refs == 1); 535 mcl->mcl_refs = 0; 536 objcache_put(mclmeta_cache, mcl); 537 } 538 } 539 540 struct objcache_malloc_args mbuf_malloc_args = { MSIZE, M_MBUF }; 541 struct objcache_malloc_args mclmeta_malloc_args = 542 { sizeof(struct mbcluster), M_MCLMETA }; 543 544 /* ARGSUSED*/ 545 static void 546 mbinit(void *dummy) 547 { 548 int mb_limit, cl_limit; 549 int limit; 550 int i; 551 552 /* 553 * Initialize statistics 554 */ 555 for (i = 0; i < ncpus; i++) { 556 atomic_set_long_nonlocked(&mbstat[i].m_msize, MSIZE); 557 atomic_set_long_nonlocked(&mbstat[i].m_mclbytes, MCLBYTES); 558 atomic_set_long_nonlocked(&mbstat[i].m_minclsize, MINCLSIZE); 559 atomic_set_long_nonlocked(&mbstat[i].m_mlen, MLEN); 560 atomic_set_long_nonlocked(&mbstat[i].m_mhlen, MHLEN); 561 } 562 563 /* 564 * Create objtect caches and save cluster limits, which will 565 * be used to adjust backing kmalloc pools' limit later. 566 */ 567 568 mb_limit = cl_limit = 0; 569 570 limit = nmbufs; 571 mbuf_cache = objcache_create("mbuf", &limit, 0, 572 mbuf_ctor, NULL, NULL, 573 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 574 mb_limit += limit; 575 576 limit = nmbufs; 577 mbufphdr_cache = objcache_create("mbuf pkt hdr", &limit, 64, 578 mbufphdr_ctor, NULL, NULL, 579 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 580 mb_limit += limit; 581 582 cl_limit = nmbclusters; 583 mclmeta_cache = objcache_create("cluster mbuf", &cl_limit, 0, 584 mclmeta_ctor, mclmeta_dtor, NULL, 585 objcache_malloc_alloc, objcache_malloc_free, &mclmeta_malloc_args); 586 587 limit = nmbclusters; 588 mbufcluster_cache = objcache_create("mbuf + cluster", &limit, 0, 589 mbufcluster_ctor, mbufcluster_dtor, NULL, 590 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 591 mb_limit += limit; 592 593 limit = nmbclusters; 594 mbufphdrcluster_cache = objcache_create("mbuf pkt hdr + cluster", 595 &limit, 64, mbufphdrcluster_ctor, mbufcluster_dtor, NULL, 596 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 597 mb_limit += limit; 598 599 /* 600 * Adjust backing kmalloc pools' limit 601 * 602 * NOTE: We raise the limit by another 1/8 to take the effect 603 * of loosememuse into account. 604 */ 605 cl_limit += cl_limit / 8; 606 kmalloc_raise_limit(mclmeta_malloc_args.mtype, 607 mclmeta_malloc_args.objsize * cl_limit); 608 kmalloc_raise_limit(M_MBUFCL, MCLBYTES * cl_limit); 609 610 mb_limit += mb_limit / 8; 611 kmalloc_raise_limit(mbuf_malloc_args.mtype, 612 mbuf_malloc_args.objsize * mb_limit); 613 } 614 615 /* 616 * Return the number of references to this mbuf's data. 0 is returned 617 * if the mbuf is not M_EXT, a reference count is returned if it is 618 * M_EXT | M_EXT_CLUSTER, and 99 is returned if it is a special M_EXT. 619 */ 620 int 621 m_sharecount(struct mbuf *m) 622 { 623 switch (m->m_flags & (M_EXT | M_EXT_CLUSTER)) { 624 case 0: 625 return (0); 626 case M_EXT: 627 return (99); 628 case M_EXT | M_EXT_CLUSTER: 629 return (((struct mbcluster *)m->m_ext.ext_arg)->mcl_refs); 630 } 631 /* NOTREACHED */ 632 return (0); /* to shut up compiler */ 633 } 634 635 /* 636 * change mbuf to new type 637 */ 638 void 639 m_chtype(struct mbuf *m, int type) 640 { 641 struct globaldata *gd = mycpu; 642 643 atomic_add_long_nonlocked(&mbtypes[gd->gd_cpuid][type], 1); 644 atomic_subtract_long_nonlocked(&mbtypes[gd->gd_cpuid][m->m_type], 1); 645 atomic_set_short_nonlocked(&m->m_type, type); 646 } 647 648 static void 649 m_reclaim(void) 650 { 651 struct domain *dp; 652 struct protosw *pr; 653 654 kprintf("Debug: m_reclaim() called\n"); 655 656 SLIST_FOREACH(dp, &domains, dom_next) { 657 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { 658 if (pr->pr_drain) 659 (*pr->pr_drain)(); 660 } 661 } 662 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_drain, 1); 663 } 664 665 static void __inline 666 updatestats(struct mbuf *m, int type) 667 { 668 struct globaldata *gd = mycpu; 669 670 m->m_type = type; 671 mbuftrack(m); 672 #ifdef MBUF_DEBUG 673 KASSERT(m->m_next == NULL, ("mbuf %p: bad m_next in get", m)); 674 KASSERT(m->m_nextpkt == NULL, ("mbuf %p: bad m_nextpkt in get", m)); 675 #endif 676 677 atomic_add_long_nonlocked(&mbtypes[gd->gd_cpuid][type], 1); 678 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mbufs, 1); 679 680 } 681 682 /* 683 * Allocate an mbuf. 684 */ 685 struct mbuf * 686 m_get(int how, int type) 687 { 688 struct mbuf *m; 689 int ntries = 0; 690 int ocf = MBTOM(how); 691 692 retryonce: 693 694 m = objcache_get(mbuf_cache, ocf); 695 696 if (m == NULL) { 697 if ((how & MB_TRYWAIT) && ntries++ == 0) { 698 struct objcache *reclaimlist[] = { 699 mbufphdr_cache, 700 mbufcluster_cache, 701 mbufphdrcluster_cache 702 }; 703 const int nreclaims = __arysize(reclaimlist); 704 705 if (!objcache_reclaimlist(reclaimlist, nreclaims, ocf)) 706 m_reclaim(); 707 goto retryonce; 708 } 709 ++mbstat[mycpu->gd_cpuid].m_drops; 710 return (NULL); 711 } 712 #ifdef MBUF_DEBUG 713 KASSERT(m->m_data == m->m_dat, ("mbuf %p: bad m_data in get", m)); 714 #endif 715 m->m_len = 0; 716 717 updatestats(m, type); 718 return (m); 719 } 720 721 struct mbuf * 722 m_gethdr(int how, int type) 723 { 724 struct mbuf *m; 725 int ocf = MBTOM(how); 726 int ntries = 0; 727 728 retryonce: 729 730 m = objcache_get(mbufphdr_cache, ocf); 731 732 if (m == NULL) { 733 if ((how & MB_TRYWAIT) && ntries++ == 0) { 734 struct objcache *reclaimlist[] = { 735 mbuf_cache, 736 mbufcluster_cache, mbufphdrcluster_cache 737 }; 738 const int nreclaims = __arysize(reclaimlist); 739 740 if (!objcache_reclaimlist(reclaimlist, nreclaims, ocf)) 741 m_reclaim(); 742 goto retryonce; 743 } 744 ++mbstat[mycpu->gd_cpuid].m_drops; 745 return (NULL); 746 } 747 #ifdef MBUF_DEBUG 748 KASSERT(m->m_data == m->m_pktdat, ("mbuf %p: bad m_data in get", m)); 749 #endif 750 m->m_len = 0; 751 m->m_pkthdr.len = 0; 752 753 updatestats(m, type); 754 return (m); 755 } 756 757 /* 758 * Get a mbuf (not a mbuf cluster!) and zero it. 759 * Deprecated. 760 */ 761 struct mbuf * 762 m_getclr(int how, int type) 763 { 764 struct mbuf *m; 765 766 m = m_get(how, type); 767 if (m != NULL) 768 bzero(m->m_data, MLEN); 769 return (m); 770 } 771 772 /* 773 * Returns an mbuf with an attached cluster. 774 * Because many network drivers use this kind of buffers a lot, it is 775 * convenient to keep a small pool of free buffers of this kind. 776 * Even a small size such as 10 gives about 10% improvement in the 777 * forwarding rate in a bridge or router. 778 */ 779 struct mbuf * 780 m_getcl(int how, short type, int flags) 781 { 782 struct mbuf *m; 783 int ocflags = MBTOM(how); 784 int ntries = 0; 785 786 retryonce: 787 788 if (flags & M_PKTHDR) 789 m = objcache_get(mbufphdrcluster_cache, ocflags); 790 else 791 m = objcache_get(mbufcluster_cache, ocflags); 792 793 if (m == NULL) { 794 if ((how & MB_TRYWAIT) && ntries++ == 0) { 795 struct objcache *reclaimlist[1]; 796 797 if (flags & M_PKTHDR) 798 reclaimlist[0] = mbufcluster_cache; 799 else 800 reclaimlist[0] = mbufphdrcluster_cache; 801 if (!objcache_reclaimlist(reclaimlist, 1, ocflags)) 802 m_reclaim(); 803 goto retryonce; 804 } 805 ++mbstat[mycpu->gd_cpuid].m_drops; 806 return (NULL); 807 } 808 809 #ifdef MBUF_DEBUG 810 KASSERT(m->m_data == m->m_ext.ext_buf, 811 ("mbuf %p: bad m_data in get", m)); 812 #endif 813 m->m_type = type; 814 m->m_len = 0; 815 m->m_pkthdr.len = 0; /* just do it unconditonally */ 816 817 mbuftrack(m); 818 819 atomic_add_long_nonlocked(&mbtypes[mycpu->gd_cpuid][type], 1); 820 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_clusters, 1); 821 return (m); 822 } 823 824 /* 825 * Allocate chain of requested length. 826 */ 827 struct mbuf * 828 m_getc(int len, int how, int type) 829 { 830 struct mbuf *n, *nfirst = NULL, **ntail = &nfirst; 831 int nsize; 832 833 while (len > 0) { 834 n = m_getl(len, how, type, 0, &nsize); 835 if (n == NULL) 836 goto failed; 837 n->m_len = 0; 838 *ntail = n; 839 ntail = &n->m_next; 840 len -= nsize; 841 } 842 return (nfirst); 843 844 failed: 845 m_freem(nfirst); 846 return (NULL); 847 } 848 849 /* 850 * Allocate len-worth of mbufs and/or mbuf clusters (whatever fits best) 851 * and return a pointer to the head of the allocated chain. If m0 is 852 * non-null, then we assume that it is a single mbuf or an mbuf chain to 853 * which we want len bytes worth of mbufs and/or clusters attached, and so 854 * if we succeed in allocating it, we will just return a pointer to m0. 855 * 856 * If we happen to fail at any point during the allocation, we will free 857 * up everything we have already allocated and return NULL. 858 * 859 * Deprecated. Use m_getc() and m_cat() instead. 860 */ 861 struct mbuf * 862 m_getm(struct mbuf *m0, int len, int type, int how) 863 { 864 struct mbuf *nfirst; 865 866 nfirst = m_getc(len, how, type); 867 868 if (m0 != NULL) { 869 m_last(m0)->m_next = nfirst; 870 return (m0); 871 } 872 873 return (nfirst); 874 } 875 876 /* 877 * Adds a cluster to a normal mbuf, M_EXT is set on success. 878 * Deprecated. Use m_getcl() instead. 879 */ 880 void 881 m_mclget(struct mbuf *m, int how) 882 { 883 struct mbcluster *mcl; 884 885 KKASSERT((m->m_flags & M_EXT) == 0); 886 mcl = objcache_get(mclmeta_cache, MBTOM(how)); 887 if (mcl != NULL) { 888 linkcluster(m, mcl); 889 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_clusters, 890 1); 891 } else { 892 ++mbstat[mycpu->gd_cpuid].m_drops; 893 } 894 } 895 896 /* 897 * Updates to mbcluster must be MPSAFE. Only an entity which already has 898 * a reference to the cluster can ref it, so we are in no danger of 899 * racing an add with a subtract. But the operation must still be atomic 900 * since multiple entities may have a reference on the cluster. 901 * 902 * m_mclfree() is almost the same but it must contend with two entities 903 * freeing the cluster at the same time. 904 */ 905 static void 906 m_mclref(void *arg) 907 { 908 struct mbcluster *mcl = arg; 909 910 atomic_add_int(&mcl->mcl_refs, 1); 911 } 912 913 /* 914 * When dereferencing a cluster we have to deal with a N->0 race, where 915 * N entities free their references simultaniously. To do this we use 916 * atomic_fetchadd_int(). 917 */ 918 static void 919 m_mclfree(void *arg) 920 { 921 struct mbcluster *mcl = arg; 922 923 if (atomic_fetchadd_int(&mcl->mcl_refs, -1) == 1) 924 objcache_put(mclmeta_cache, mcl); 925 } 926 927 /* 928 * Free a single mbuf and any associated external storage. The successor, 929 * if any, is returned. 930 * 931 * We do need to check non-first mbuf for m_aux, since some of existing 932 * code does not call M_PREPEND properly. 933 * (example: call to bpf_mtap from drivers) 934 */ 935 936 #ifdef MBUF_DEBUG 937 938 struct mbuf * 939 _m_free(struct mbuf *m, const char *func) 940 941 #else 942 943 struct mbuf * 944 m_free(struct mbuf *m) 945 946 #endif 947 { 948 struct mbuf *n; 949 struct globaldata *gd = mycpu; 950 951 KASSERT(m->m_type != MT_FREE, ("freeing free mbuf %p", m)); 952 KASSERT(M_TRAILINGSPACE(m) >= 0, ("overflowed mbuf %p", m)); 953 atomic_subtract_long_nonlocked(&mbtypes[gd->gd_cpuid][m->m_type], 1); 954 955 n = m->m_next; 956 957 /* 958 * Make sure the mbuf is in constructed state before returning it 959 * to the objcache. 960 */ 961 m->m_next = NULL; 962 mbufuntrack(m); 963 #ifdef MBUF_DEBUG 964 m->m_hdr.mh_lastfunc = func; 965 #endif 966 #ifdef notyet 967 KKASSERT(m->m_nextpkt == NULL); 968 #else 969 if (m->m_nextpkt != NULL) { 970 static int afewtimes = 10; 971 972 if (afewtimes-- > 0) { 973 kprintf("mfree: m->m_nextpkt != NULL\n"); 974 print_backtrace(-1); 975 } 976 m->m_nextpkt = NULL; 977 } 978 #endif 979 if (m->m_flags & M_PKTHDR) { 980 m_tag_delete_chain(m); /* eliminate XXX JH */ 981 } 982 983 m->m_flags &= (M_EXT | M_EXT_CLUSTER | M_CLCACHE | M_PHCACHE); 984 985 /* 986 * Clean the M_PKTHDR state so we can return the mbuf to its original 987 * cache. This is based on the PHCACHE flag which tells us whether 988 * the mbuf was originally allocated out of a packet-header cache 989 * or a non-packet-header cache. 990 */ 991 if (m->m_flags & M_PHCACHE) { 992 m->m_flags |= M_PKTHDR; 993 m->m_pkthdr.rcvif = NULL; /* eliminate XXX JH */ 994 m->m_pkthdr.csum_flags = 0; /* eliminate XXX JH */ 995 m->m_pkthdr.fw_flags = 0; /* eliminate XXX JH */ 996 SLIST_INIT(&m->m_pkthdr.tags); 997 } 998 999 /* 1000 * Handle remaining flags combinations. M_CLCACHE tells us whether 1001 * the mbuf was originally allocated from a cluster cache or not, 1002 * and is totally separate from whether the mbuf is currently 1003 * associated with a cluster. 1004 */ 1005 switch(m->m_flags & (M_CLCACHE | M_EXT | M_EXT_CLUSTER)) { 1006 case M_CLCACHE | M_EXT | M_EXT_CLUSTER: 1007 /* 1008 * mbuf+cluster cache case. The mbuf was allocated from the 1009 * combined mbuf_cluster cache and can be returned to the 1010 * cache if the cluster hasn't been shared. 1011 */ 1012 if (m_sharecount(m) == 1) { 1013 /* 1014 * The cluster has not been shared, we can just 1015 * reset the data pointer and return the mbuf 1016 * to the cluster cache. Note that the reference 1017 * count is left intact (it is still associated with 1018 * an mbuf). 1019 */ 1020 m->m_data = m->m_ext.ext_buf; 1021 if (m->m_flags & M_PHCACHE) 1022 objcache_put(mbufphdrcluster_cache, m); 1023 else 1024 objcache_put(mbufcluster_cache, m); 1025 atomic_subtract_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_clusters, 1); 1026 } else { 1027 /* 1028 * Hell. Someone else has a ref on this cluster, 1029 * we have to disconnect it which means we can't 1030 * put it back into the mbufcluster_cache, we 1031 * have to destroy the mbuf. 1032 * 1033 * Other mbuf references to the cluster will typically 1034 * be M_EXT | M_EXT_CLUSTER but without M_CLCACHE. 1035 * 1036 * XXX we could try to connect another cluster to 1037 * it. 1038 */ 1039 m->m_ext.ext_free(m->m_ext.ext_arg); 1040 m->m_flags &= ~(M_EXT | M_EXT_CLUSTER); 1041 if (m->m_flags & M_PHCACHE) 1042 objcache_dtor(mbufphdrcluster_cache, m); 1043 else 1044 objcache_dtor(mbufcluster_cache, m); 1045 } 1046 break; 1047 case M_EXT | M_EXT_CLUSTER: 1048 /* 1049 * Normal cluster associated with an mbuf that was allocated 1050 * from the normal mbuf pool rather then the cluster pool. 1051 * The cluster has to be independantly disassociated from the 1052 * mbuf. 1053 */ 1054 if (m_sharecount(m) == 1) 1055 atomic_subtract_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_clusters, 1); 1056 /* fall through */ 1057 case M_EXT: 1058 /* 1059 * Normal cluster association case, disconnect the cluster from 1060 * the mbuf. The cluster may or may not be custom. 1061 */ 1062 m->m_ext.ext_free(m->m_ext.ext_arg); 1063 m->m_flags &= ~(M_EXT | M_EXT_CLUSTER); 1064 /* fall through */ 1065 case 0: 1066 /* 1067 * return the mbuf to the mbuf cache. 1068 */ 1069 if (m->m_flags & M_PHCACHE) { 1070 m->m_data = m->m_pktdat; 1071 objcache_put(mbufphdr_cache, m); 1072 } else { 1073 m->m_data = m->m_dat; 1074 objcache_put(mbuf_cache, m); 1075 } 1076 atomic_subtract_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mbufs, 1); 1077 break; 1078 default: 1079 if (!panicstr) 1080 panic("bad mbuf flags %p %08x\n", m, m->m_flags); 1081 break; 1082 } 1083 return (n); 1084 } 1085 1086 #ifdef MBUF_DEBUG 1087 1088 void 1089 _m_freem(struct mbuf *m, const char *func) 1090 { 1091 while (m) 1092 m = _m_free(m, func); 1093 } 1094 1095 #else 1096 1097 void 1098 m_freem(struct mbuf *m) 1099 { 1100 while (m) 1101 m = m_free(m); 1102 } 1103 1104 #endif 1105 1106 /* 1107 * mbuf utility routines 1108 */ 1109 1110 /* 1111 * Lesser-used path for M_PREPEND: allocate new mbuf to prepend to chain and 1112 * copy junk along. 1113 */ 1114 struct mbuf * 1115 m_prepend(struct mbuf *m, int len, int how) 1116 { 1117 struct mbuf *mn; 1118 1119 if (m->m_flags & M_PKTHDR) 1120 mn = m_gethdr(how, m->m_type); 1121 else 1122 mn = m_get(how, m->m_type); 1123 if (mn == NULL) { 1124 m_freem(m); 1125 return (NULL); 1126 } 1127 if (m->m_flags & M_PKTHDR) 1128 M_MOVE_PKTHDR(mn, m); 1129 mn->m_next = m; 1130 m = mn; 1131 if (len < MHLEN) 1132 MH_ALIGN(m, len); 1133 m->m_len = len; 1134 return (m); 1135 } 1136 1137 /* 1138 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 1139 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 1140 * The wait parameter is a choice of MB_WAIT/MB_DONTWAIT from caller. 1141 * Note that the copy is read-only, because clusters are not copied, 1142 * only their reference counts are incremented. 1143 */ 1144 struct mbuf * 1145 m_copym(const struct mbuf *m, int off0, int len, int wait) 1146 { 1147 struct mbuf *n, **np; 1148 int off = off0; 1149 struct mbuf *top; 1150 int copyhdr = 0; 1151 1152 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 1153 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 1154 if (off == 0 && (m->m_flags & M_PKTHDR)) 1155 copyhdr = 1; 1156 while (off > 0) { 1157 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 1158 if (off < m->m_len) 1159 break; 1160 off -= m->m_len; 1161 m = m->m_next; 1162 } 1163 np = ⊤ 1164 top = NULL; 1165 while (len > 0) { 1166 if (m == NULL) { 1167 KASSERT(len == M_COPYALL, 1168 ("m_copym, length > size of mbuf chain")); 1169 break; 1170 } 1171 /* 1172 * Because we are sharing any cluster attachment below, 1173 * be sure to get an mbuf that does not have a cluster 1174 * associated with it. 1175 */ 1176 if (copyhdr) 1177 n = m_gethdr(wait, m->m_type); 1178 else 1179 n = m_get(wait, m->m_type); 1180 *np = n; 1181 if (n == NULL) 1182 goto nospace; 1183 if (copyhdr) { 1184 if (!m_dup_pkthdr(n, m, wait)) 1185 goto nospace; 1186 if (len == M_COPYALL) 1187 n->m_pkthdr.len -= off0; 1188 else 1189 n->m_pkthdr.len = len; 1190 copyhdr = 0; 1191 } 1192 n->m_len = min(len, m->m_len - off); 1193 if (m->m_flags & M_EXT) { 1194 KKASSERT((n->m_flags & M_EXT) == 0); 1195 n->m_data = m->m_data + off; 1196 m->m_ext.ext_ref(m->m_ext.ext_arg); 1197 n->m_ext = m->m_ext; 1198 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1199 } else { 1200 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 1201 (unsigned)n->m_len); 1202 } 1203 if (len != M_COPYALL) 1204 len -= n->m_len; 1205 off = 0; 1206 m = m->m_next; 1207 np = &n->m_next; 1208 } 1209 if (top == NULL) 1210 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1); 1211 return (top); 1212 nospace: 1213 m_freem(top); 1214 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1); 1215 return (NULL); 1216 } 1217 1218 /* 1219 * Copy an entire packet, including header (which must be present). 1220 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 1221 * Note that the copy is read-only, because clusters are not copied, 1222 * only their reference counts are incremented. 1223 * Preserve alignment of the first mbuf so if the creator has left 1224 * some room at the beginning (e.g. for inserting protocol headers) 1225 * the copies also have the room available. 1226 */ 1227 struct mbuf * 1228 m_copypacket(struct mbuf *m, int how) 1229 { 1230 struct mbuf *top, *n, *o; 1231 1232 n = m_gethdr(how, m->m_type); 1233 top = n; 1234 if (!n) 1235 goto nospace; 1236 1237 if (!m_dup_pkthdr(n, m, how)) 1238 goto nospace; 1239 n->m_len = m->m_len; 1240 if (m->m_flags & M_EXT) { 1241 KKASSERT((n->m_flags & M_EXT) == 0); 1242 n->m_data = m->m_data; 1243 m->m_ext.ext_ref(m->m_ext.ext_arg); 1244 n->m_ext = m->m_ext; 1245 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1246 } else { 1247 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 1248 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 1249 } 1250 1251 m = m->m_next; 1252 while (m) { 1253 o = m_get(how, m->m_type); 1254 if (!o) 1255 goto nospace; 1256 1257 n->m_next = o; 1258 n = n->m_next; 1259 1260 n->m_len = m->m_len; 1261 if (m->m_flags & M_EXT) { 1262 KKASSERT((n->m_flags & M_EXT) == 0); 1263 n->m_data = m->m_data; 1264 m->m_ext.ext_ref(m->m_ext.ext_arg); 1265 n->m_ext = m->m_ext; 1266 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1267 } else { 1268 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 1269 } 1270 1271 m = m->m_next; 1272 } 1273 return top; 1274 nospace: 1275 m_freem(top); 1276 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1); 1277 return (NULL); 1278 } 1279 1280 /* 1281 * Copy data from an mbuf chain starting "off" bytes from the beginning, 1282 * continuing for "len" bytes, into the indicated buffer. 1283 */ 1284 void 1285 m_copydata(const struct mbuf *m, int off, int len, caddr_t cp) 1286 { 1287 unsigned count; 1288 1289 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 1290 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 1291 while (off > 0) { 1292 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 1293 if (off < m->m_len) 1294 break; 1295 off -= m->m_len; 1296 m = m->m_next; 1297 } 1298 while (len > 0) { 1299 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 1300 count = min(m->m_len - off, len); 1301 bcopy(mtod(m, caddr_t) + off, cp, count); 1302 len -= count; 1303 cp += count; 1304 off = 0; 1305 m = m->m_next; 1306 } 1307 } 1308 1309 /* 1310 * Copy a packet header mbuf chain into a completely new chain, including 1311 * copying any mbuf clusters. Use this instead of m_copypacket() when 1312 * you need a writable copy of an mbuf chain. 1313 */ 1314 struct mbuf * 1315 m_dup(struct mbuf *m, int how) 1316 { 1317 struct mbuf **p, *top = NULL; 1318 int remain, moff, nsize; 1319 1320 /* Sanity check */ 1321 if (m == NULL) 1322 return (NULL); 1323 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __func__)); 1324 1325 /* While there's more data, get a new mbuf, tack it on, and fill it */ 1326 remain = m->m_pkthdr.len; 1327 moff = 0; 1328 p = ⊤ 1329 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 1330 struct mbuf *n; 1331 1332 /* Get the next new mbuf */ 1333 n = m_getl(remain, how, m->m_type, top == NULL ? M_PKTHDR : 0, 1334 &nsize); 1335 if (n == NULL) 1336 goto nospace; 1337 if (top == NULL) 1338 if (!m_dup_pkthdr(n, m, how)) 1339 goto nospace0; 1340 1341 /* Link it into the new chain */ 1342 *p = n; 1343 p = &n->m_next; 1344 1345 /* Copy data from original mbuf(s) into new mbuf */ 1346 n->m_len = 0; 1347 while (n->m_len < nsize && m != NULL) { 1348 int chunk = min(nsize - n->m_len, m->m_len - moff); 1349 1350 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 1351 moff += chunk; 1352 n->m_len += chunk; 1353 remain -= chunk; 1354 if (moff == m->m_len) { 1355 m = m->m_next; 1356 moff = 0; 1357 } 1358 } 1359 1360 /* Check correct total mbuf length */ 1361 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 1362 ("%s: bogus m_pkthdr.len", __func__)); 1363 } 1364 return (top); 1365 1366 nospace: 1367 m_freem(top); 1368 nospace0: 1369 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1); 1370 return (NULL); 1371 } 1372 1373 /* 1374 * Copy the non-packet mbuf data chain into a new set of mbufs, including 1375 * copying any mbuf clusters. This is typically used to realign a data 1376 * chain by nfs_realign(). 1377 * 1378 * The original chain is left intact. how should be MB_WAIT or MB_DONTWAIT 1379 * and NULL can be returned if MB_DONTWAIT is passed. 1380 * 1381 * Be careful to use cluster mbufs, a large mbuf chain converted to non 1382 * cluster mbufs can exhaust our supply of mbufs. 1383 */ 1384 struct mbuf * 1385 m_dup_data(struct mbuf *m, int how) 1386 { 1387 struct mbuf **p, *n, *top = NULL; 1388 int mlen, moff, chunk, gsize, nsize; 1389 1390 /* 1391 * Degenerate case 1392 */ 1393 if (m == NULL) 1394 return (NULL); 1395 1396 /* 1397 * Optimize the mbuf allocation but do not get too carried away. 1398 */ 1399 if (m->m_next || m->m_len > MLEN) 1400 gsize = MCLBYTES; 1401 else 1402 gsize = MLEN; 1403 1404 /* Chain control */ 1405 p = ⊤ 1406 n = NULL; 1407 nsize = 0; 1408 1409 /* 1410 * Scan the mbuf chain until nothing is left, the new mbuf chain 1411 * will be allocated on the fly as needed. 1412 */ 1413 while (m) { 1414 mlen = m->m_len; 1415 moff = 0; 1416 1417 while (mlen) { 1418 KKASSERT(m->m_type == MT_DATA); 1419 if (n == NULL) { 1420 n = m_getl(gsize, how, MT_DATA, 0, &nsize); 1421 n->m_len = 0; 1422 if (n == NULL) 1423 goto nospace; 1424 *p = n; 1425 p = &n->m_next; 1426 } 1427 chunk = imin(mlen, nsize); 1428 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 1429 mlen -= chunk; 1430 moff += chunk; 1431 n->m_len += chunk; 1432 nsize -= chunk; 1433 if (nsize == 0) 1434 n = NULL; 1435 } 1436 m = m->m_next; 1437 } 1438 *p = NULL; 1439 return(top); 1440 nospace: 1441 *p = NULL; 1442 m_freem(top); 1443 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1); 1444 return (NULL); 1445 } 1446 1447 /* 1448 * Concatenate mbuf chain n to m. 1449 * Both chains must be of the same type (e.g. MT_DATA). 1450 * Any m_pkthdr is not updated. 1451 */ 1452 void 1453 m_cat(struct mbuf *m, struct mbuf *n) 1454 { 1455 m = m_last(m); 1456 while (n) { 1457 if (m->m_flags & M_EXT || 1458 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 1459 /* just join the two chains */ 1460 m->m_next = n; 1461 return; 1462 } 1463 /* splat the data from one into the other */ 1464 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 1465 (u_int)n->m_len); 1466 m->m_len += n->m_len; 1467 n = m_free(n); 1468 } 1469 } 1470 1471 void 1472 m_adj(struct mbuf *mp, int req_len) 1473 { 1474 int len = req_len; 1475 struct mbuf *m; 1476 int count; 1477 1478 if ((m = mp) == NULL) 1479 return; 1480 if (len >= 0) { 1481 /* 1482 * Trim from head. 1483 */ 1484 while (m != NULL && len > 0) { 1485 if (m->m_len <= len) { 1486 len -= m->m_len; 1487 m->m_len = 0; 1488 m = m->m_next; 1489 } else { 1490 m->m_len -= len; 1491 m->m_data += len; 1492 len = 0; 1493 } 1494 } 1495 m = mp; 1496 if (mp->m_flags & M_PKTHDR) 1497 m->m_pkthdr.len -= (req_len - len); 1498 } else { 1499 /* 1500 * Trim from tail. Scan the mbuf chain, 1501 * calculating its length and finding the last mbuf. 1502 * If the adjustment only affects this mbuf, then just 1503 * adjust and return. Otherwise, rescan and truncate 1504 * after the remaining size. 1505 */ 1506 len = -len; 1507 count = 0; 1508 for (;;) { 1509 count += m->m_len; 1510 if (m->m_next == NULL) 1511 break; 1512 m = m->m_next; 1513 } 1514 if (m->m_len >= len) { 1515 m->m_len -= len; 1516 if (mp->m_flags & M_PKTHDR) 1517 mp->m_pkthdr.len -= len; 1518 return; 1519 } 1520 count -= len; 1521 if (count < 0) 1522 count = 0; 1523 /* 1524 * Correct length for chain is "count". 1525 * Find the mbuf with last data, adjust its length, 1526 * and toss data from remaining mbufs on chain. 1527 */ 1528 m = mp; 1529 if (m->m_flags & M_PKTHDR) 1530 m->m_pkthdr.len = count; 1531 for (; m; m = m->m_next) { 1532 if (m->m_len >= count) { 1533 m->m_len = count; 1534 break; 1535 } 1536 count -= m->m_len; 1537 } 1538 while (m->m_next) 1539 (m = m->m_next) ->m_len = 0; 1540 } 1541 } 1542 1543 /* 1544 * Set the m_data pointer of a newly-allocated mbuf 1545 * to place an object of the specified size at the 1546 * end of the mbuf, longword aligned. 1547 */ 1548 void 1549 m_align(struct mbuf *m, int len) 1550 { 1551 int adjust; 1552 1553 if (m->m_flags & M_EXT) 1554 adjust = m->m_ext.ext_size - len; 1555 else if (m->m_flags & M_PKTHDR) 1556 adjust = MHLEN - len; 1557 else 1558 adjust = MLEN - len; 1559 m->m_data += adjust &~ (sizeof(long)-1); 1560 } 1561 1562 /* 1563 * Rearrange an mbuf chain so that len bytes are contiguous 1564 * and in the data area of an mbuf (so that mtod will work for a structure 1565 * of size len). Returns the resulting mbuf chain on success, frees it and 1566 * returns null on failure. If there is room, it will add up to 1567 * max_protohdr-len extra bytes to the contiguous region in an attempt to 1568 * avoid being called next time. 1569 */ 1570 struct mbuf * 1571 m_pullup(struct mbuf *n, int len) 1572 { 1573 struct mbuf *m; 1574 int count; 1575 int space; 1576 1577 /* 1578 * If first mbuf has no cluster, and has room for len bytes 1579 * without shifting current data, pullup into it, 1580 * otherwise allocate a new mbuf to prepend to the chain. 1581 */ 1582 if (!(n->m_flags & M_EXT) && 1583 n->m_data + len < &n->m_dat[MLEN] && 1584 n->m_next) { 1585 if (n->m_len >= len) 1586 return (n); 1587 m = n; 1588 n = n->m_next; 1589 len -= m->m_len; 1590 } else { 1591 if (len > MHLEN) 1592 goto bad; 1593 if (n->m_flags & M_PKTHDR) 1594 m = m_gethdr(MB_DONTWAIT, n->m_type); 1595 else 1596 m = m_get(MB_DONTWAIT, n->m_type); 1597 if (m == NULL) 1598 goto bad; 1599 m->m_len = 0; 1600 if (n->m_flags & M_PKTHDR) 1601 M_MOVE_PKTHDR(m, n); 1602 } 1603 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1604 do { 1605 count = min(min(max(len, max_protohdr), space), n->m_len); 1606 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 1607 (unsigned)count); 1608 len -= count; 1609 m->m_len += count; 1610 n->m_len -= count; 1611 space -= count; 1612 if (n->m_len) 1613 n->m_data += count; 1614 else 1615 n = m_free(n); 1616 } while (len > 0 && n); 1617 if (len > 0) { 1618 m_free(m); 1619 goto bad; 1620 } 1621 m->m_next = n; 1622 return (m); 1623 bad: 1624 m_freem(n); 1625 atomic_add_long_nonlocked(&mbstat[mycpu->gd_cpuid].m_mcfail, 1); 1626 return (NULL); 1627 } 1628 1629 /* 1630 * Partition an mbuf chain in two pieces, returning the tail -- 1631 * all but the first len0 bytes. In case of failure, it returns NULL and 1632 * attempts to restore the chain to its original state. 1633 * 1634 * Note that the resulting mbufs might be read-only, because the new 1635 * mbuf can end up sharing an mbuf cluster with the original mbuf if 1636 * the "breaking point" happens to lie within a cluster mbuf. Use the 1637 * M_WRITABLE() macro to check for this case. 1638 */ 1639 struct mbuf * 1640 m_split(struct mbuf *m0, int len0, int wait) 1641 { 1642 struct mbuf *m, *n; 1643 unsigned len = len0, remain; 1644 1645 for (m = m0; m && len > m->m_len; m = m->m_next) 1646 len -= m->m_len; 1647 if (m == NULL) 1648 return (NULL); 1649 remain = m->m_len - len; 1650 if (m0->m_flags & M_PKTHDR) { 1651 n = m_gethdr(wait, m0->m_type); 1652 if (n == NULL) 1653 return (NULL); 1654 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1655 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1656 m0->m_pkthdr.len = len0; 1657 if (m->m_flags & M_EXT) 1658 goto extpacket; 1659 if (remain > MHLEN) { 1660 /* m can't be the lead packet */ 1661 MH_ALIGN(n, 0); 1662 n->m_next = m_split(m, len, wait); 1663 if (n->m_next == NULL) { 1664 m_free(n); 1665 return (NULL); 1666 } else { 1667 n->m_len = 0; 1668 return (n); 1669 } 1670 } else 1671 MH_ALIGN(n, remain); 1672 } else if (remain == 0) { 1673 n = m->m_next; 1674 m->m_next = 0; 1675 return (n); 1676 } else { 1677 n = m_get(wait, m->m_type); 1678 if (n == NULL) 1679 return (NULL); 1680 M_ALIGN(n, remain); 1681 } 1682 extpacket: 1683 if (m->m_flags & M_EXT) { 1684 KKASSERT((n->m_flags & M_EXT) == 0); 1685 n->m_data = m->m_data + len; 1686 m->m_ext.ext_ref(m->m_ext.ext_arg); 1687 n->m_ext = m->m_ext; 1688 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1689 } else { 1690 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1691 } 1692 n->m_len = remain; 1693 m->m_len = len; 1694 n->m_next = m->m_next; 1695 m->m_next = 0; 1696 return (n); 1697 } 1698 1699 /* 1700 * Routine to copy from device local memory into mbufs. 1701 * Note: "offset" is ill-defined and always called as 0, so ignore it. 1702 */ 1703 struct mbuf * 1704 m_devget(char *buf, int len, int offset, struct ifnet *ifp, 1705 void (*copy)(volatile const void *from, volatile void *to, size_t length)) 1706 { 1707 struct mbuf *m, *mfirst = NULL, **mtail; 1708 int nsize, flags; 1709 1710 if (copy == NULL) 1711 copy = bcopy; 1712 mtail = &mfirst; 1713 flags = M_PKTHDR; 1714 1715 while (len > 0) { 1716 m = m_getl(len, MB_DONTWAIT, MT_DATA, flags, &nsize); 1717 if (m == NULL) { 1718 m_freem(mfirst); 1719 return (NULL); 1720 } 1721 m->m_len = min(len, nsize); 1722 1723 if (flags & M_PKTHDR) { 1724 if (len + max_linkhdr <= nsize) 1725 m->m_data += max_linkhdr; 1726 m->m_pkthdr.rcvif = ifp; 1727 m->m_pkthdr.len = len; 1728 flags = 0; 1729 } 1730 1731 copy(buf, m->m_data, (unsigned)m->m_len); 1732 buf += m->m_len; 1733 len -= m->m_len; 1734 *mtail = m; 1735 mtail = &m->m_next; 1736 } 1737 1738 return (mfirst); 1739 } 1740 1741 /* 1742 * Routine to pad mbuf to the specified length 'padto'. 1743 */ 1744 int 1745 m_devpad(struct mbuf *m, int padto) 1746 { 1747 struct mbuf *last = NULL; 1748 int padlen; 1749 1750 if (padto <= m->m_pkthdr.len) 1751 return 0; 1752 1753 padlen = padto - m->m_pkthdr.len; 1754 1755 /* if there's only the packet-header and we can pad there, use it. */ 1756 if (m->m_pkthdr.len == m->m_len && M_TRAILINGSPACE(m) >= padlen) { 1757 last = m; 1758 } else { 1759 /* 1760 * Walk packet chain to find last mbuf. We will either 1761 * pad there, or append a new mbuf and pad it 1762 */ 1763 for (last = m; last->m_next != NULL; last = last->m_next) 1764 ; /* EMPTY */ 1765 1766 /* `last' now points to last in chain. */ 1767 if (M_TRAILINGSPACE(last) < padlen) { 1768 struct mbuf *n; 1769 1770 /* Allocate new empty mbuf, pad it. Compact later. */ 1771 MGET(n, MB_DONTWAIT, MT_DATA); 1772 if (n == NULL) 1773 return ENOBUFS; 1774 n->m_len = 0; 1775 last->m_next = n; 1776 last = n; 1777 } 1778 } 1779 KKASSERT(M_TRAILINGSPACE(last) >= padlen); 1780 KKASSERT(M_WRITABLE(last)); 1781 1782 /* Now zero the pad area */ 1783 bzero(mtod(last, char *) + last->m_len, padlen); 1784 last->m_len += padlen; 1785 m->m_pkthdr.len += padlen; 1786 return 0; 1787 } 1788 1789 /* 1790 * Copy data from a buffer back into the indicated mbuf chain, 1791 * starting "off" bytes from the beginning, extending the mbuf 1792 * chain if necessary. 1793 */ 1794 void 1795 m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 1796 { 1797 int mlen; 1798 struct mbuf *m = m0, *n; 1799 int totlen = 0; 1800 1801 if (m0 == NULL) 1802 return; 1803 while (off > (mlen = m->m_len)) { 1804 off -= mlen; 1805 totlen += mlen; 1806 if (m->m_next == NULL) { 1807 n = m_getclr(MB_DONTWAIT, m->m_type); 1808 if (n == NULL) 1809 goto out; 1810 n->m_len = min(MLEN, len + off); 1811 m->m_next = n; 1812 } 1813 m = m->m_next; 1814 } 1815 while (len > 0) { 1816 mlen = min (m->m_len - off, len); 1817 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 1818 cp += mlen; 1819 len -= mlen; 1820 mlen += off; 1821 off = 0; 1822 totlen += mlen; 1823 if (len == 0) 1824 break; 1825 if (m->m_next == NULL) { 1826 n = m_get(MB_DONTWAIT, m->m_type); 1827 if (n == NULL) 1828 break; 1829 n->m_len = min(MLEN, len); 1830 m->m_next = n; 1831 } 1832 m = m->m_next; 1833 } 1834 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1835 m->m_pkthdr.len = totlen; 1836 } 1837 1838 /* 1839 * Append the specified data to the indicated mbuf chain, 1840 * Extend the mbuf chain if the new data does not fit in 1841 * existing space. 1842 * 1843 * Return 1 if able to complete the job; otherwise 0. 1844 */ 1845 int 1846 m_append(struct mbuf *m0, int len, c_caddr_t cp) 1847 { 1848 struct mbuf *m, *n; 1849 int remainder, space; 1850 1851 for (m = m0; m->m_next != NULL; m = m->m_next) 1852 ; 1853 remainder = len; 1854 space = M_TRAILINGSPACE(m); 1855 if (space > 0) { 1856 /* 1857 * Copy into available space. 1858 */ 1859 if (space > remainder) 1860 space = remainder; 1861 bcopy(cp, mtod(m, caddr_t) + m->m_len, space); 1862 m->m_len += space; 1863 cp += space, remainder -= space; 1864 } 1865 while (remainder > 0) { 1866 /* 1867 * Allocate a new mbuf; could check space 1868 * and allocate a cluster instead. 1869 */ 1870 n = m_get(MB_DONTWAIT, m->m_type); 1871 if (n == NULL) 1872 break; 1873 n->m_len = min(MLEN, remainder); 1874 bcopy(cp, mtod(n, caddr_t), n->m_len); 1875 cp += n->m_len, remainder -= n->m_len; 1876 m->m_next = n; 1877 m = n; 1878 } 1879 if (m0->m_flags & M_PKTHDR) 1880 m0->m_pkthdr.len += len - remainder; 1881 return (remainder == 0); 1882 } 1883 1884 /* 1885 * Apply function f to the data in an mbuf chain starting "off" bytes from 1886 * the beginning, continuing for "len" bytes. 1887 */ 1888 int 1889 m_apply(struct mbuf *m, int off, int len, 1890 int (*f)(void *, void *, u_int), void *arg) 1891 { 1892 u_int count; 1893 int rval; 1894 1895 KASSERT(off >= 0, ("m_apply, negative off %d", off)); 1896 KASSERT(len >= 0, ("m_apply, negative len %d", len)); 1897 while (off > 0) { 1898 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); 1899 if (off < m->m_len) 1900 break; 1901 off -= m->m_len; 1902 m = m->m_next; 1903 } 1904 while (len > 0) { 1905 KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain")); 1906 count = min(m->m_len - off, len); 1907 rval = (*f)(arg, mtod(m, caddr_t) + off, count); 1908 if (rval) 1909 return (rval); 1910 len -= count; 1911 off = 0; 1912 m = m->m_next; 1913 } 1914 return (0); 1915 } 1916 1917 /* 1918 * Return a pointer to mbuf/offset of location in mbuf chain. 1919 */ 1920 struct mbuf * 1921 m_getptr(struct mbuf *m, int loc, int *off) 1922 { 1923 1924 while (loc >= 0) { 1925 /* Normal end of search. */ 1926 if (m->m_len > loc) { 1927 *off = loc; 1928 return (m); 1929 } else { 1930 loc -= m->m_len; 1931 if (m->m_next == NULL) { 1932 if (loc == 0) { 1933 /* Point at the end of valid data. */ 1934 *off = m->m_len; 1935 return (m); 1936 } 1937 return (NULL); 1938 } 1939 m = m->m_next; 1940 } 1941 } 1942 return (NULL); 1943 } 1944 1945 void 1946 m_print(const struct mbuf *m) 1947 { 1948 int len; 1949 const struct mbuf *m2; 1950 1951 len = m->m_pkthdr.len; 1952 m2 = m; 1953 while (len) { 1954 kprintf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 1955 len -= m2->m_len; 1956 m2 = m2->m_next; 1957 } 1958 return; 1959 } 1960 1961 /* 1962 * "Move" mbuf pkthdr from "from" to "to". 1963 * "from" must have M_PKTHDR set, and "to" must be empty. 1964 */ 1965 void 1966 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1967 { 1968 KASSERT((to->m_flags & M_PKTHDR), ("m_move_pkthdr: not packet header")); 1969 1970 to->m_flags |= from->m_flags & M_COPYFLAGS; 1971 to->m_pkthdr = from->m_pkthdr; /* especially tags */ 1972 SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ 1973 } 1974 1975 /* 1976 * Duplicate "from"'s mbuf pkthdr in "to". 1977 * "from" must have M_PKTHDR set, and "to" must be empty. 1978 * In particular, this does a deep copy of the packet tags. 1979 */ 1980 int 1981 m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how) 1982 { 1983 KASSERT((to->m_flags & M_PKTHDR), ("m_dup_pkthdr: not packet header")); 1984 1985 to->m_flags = (from->m_flags & M_COPYFLAGS) | 1986 (to->m_flags & ~M_COPYFLAGS); 1987 to->m_pkthdr = from->m_pkthdr; 1988 SLIST_INIT(&to->m_pkthdr.tags); 1989 return (m_tag_copy_chain(to, from, how)); 1990 } 1991 1992 /* 1993 * Defragment a mbuf chain, returning the shortest possible 1994 * chain of mbufs and clusters. If allocation fails and 1995 * this cannot be completed, NULL will be returned, but 1996 * the passed in chain will be unchanged. Upon success, 1997 * the original chain will be freed, and the new chain 1998 * will be returned. 1999 * 2000 * If a non-packet header is passed in, the original 2001 * mbuf (chain?) will be returned unharmed. 2002 * 2003 * m_defrag_nofree doesn't free the passed in mbuf. 2004 */ 2005 struct mbuf * 2006 m_defrag(struct mbuf *m0, int how) 2007 { 2008 struct mbuf *m_new; 2009 2010 if ((m_new = m_defrag_nofree(m0, how)) == NULL) 2011 return (NULL); 2012 if (m_new != m0) 2013 m_freem(m0); 2014 return (m_new); 2015 } 2016 2017 struct mbuf * 2018 m_defrag_nofree(struct mbuf *m0, int how) 2019 { 2020 struct mbuf *m_new = NULL, *m_final = NULL; 2021 int progress = 0, length, nsize; 2022 2023 if (!(m0->m_flags & M_PKTHDR)) 2024 return (m0); 2025 2026 #ifdef MBUF_STRESS_TEST 2027 if (m_defragrandomfailures) { 2028 int temp = karc4random() & 0xff; 2029 if (temp == 0xba) 2030 goto nospace; 2031 } 2032 #endif 2033 2034 m_final = m_getl(m0->m_pkthdr.len, how, MT_DATA, M_PKTHDR, &nsize); 2035 if (m_final == NULL) 2036 goto nospace; 2037 m_final->m_len = 0; /* in case m0->m_pkthdr.len is zero */ 2038 2039 if (m_dup_pkthdr(m_final, m0, how) == 0) 2040 goto nospace; 2041 2042 m_new = m_final; 2043 2044 while (progress < m0->m_pkthdr.len) { 2045 length = m0->m_pkthdr.len - progress; 2046 if (length > MCLBYTES) 2047 length = MCLBYTES; 2048 2049 if (m_new == NULL) { 2050 m_new = m_getl(length, how, MT_DATA, 0, &nsize); 2051 if (m_new == NULL) 2052 goto nospace; 2053 } 2054 2055 m_copydata(m0, progress, length, mtod(m_new, caddr_t)); 2056 progress += length; 2057 m_new->m_len = length; 2058 if (m_new != m_final) 2059 m_cat(m_final, m_new); 2060 m_new = NULL; 2061 } 2062 if (m0->m_next == NULL) 2063 m_defraguseless++; 2064 m_defragpackets++; 2065 m_defragbytes += m_final->m_pkthdr.len; 2066 return (m_final); 2067 nospace: 2068 m_defragfailure++; 2069 if (m_new) 2070 m_free(m_new); 2071 m_freem(m_final); 2072 return (NULL); 2073 } 2074 2075 /* 2076 * Move data from uio into mbufs. 2077 */ 2078 struct mbuf * 2079 m_uiomove(struct uio *uio) 2080 { 2081 struct mbuf *m; /* current working mbuf */ 2082 struct mbuf *head = NULL; /* result mbuf chain */ 2083 struct mbuf **mp = &head; 2084 int flags = M_PKTHDR; 2085 int nsize; 2086 int error; 2087 int resid; 2088 2089 do { 2090 if (uio->uio_resid > INT_MAX) 2091 resid = INT_MAX; 2092 else 2093 resid = (int)uio->uio_resid; 2094 m = m_getl(resid, MB_WAIT, MT_DATA, flags, &nsize); 2095 if (flags) { 2096 m->m_pkthdr.len = 0; 2097 /* Leave room for protocol headers. */ 2098 if (resid < MHLEN) 2099 MH_ALIGN(m, resid); 2100 flags = 0; 2101 } 2102 m->m_len = imin(nsize, resid); 2103 error = uiomove(mtod(m, caddr_t), m->m_len, uio); 2104 if (error) { 2105 m_free(m); 2106 goto failed; 2107 } 2108 *mp = m; 2109 mp = &m->m_next; 2110 head->m_pkthdr.len += m->m_len; 2111 } while (uio->uio_resid > 0); 2112 2113 return (head); 2114 2115 failed: 2116 m_freem(head); 2117 return (NULL); 2118 } 2119 2120 struct mbuf * 2121 m_last(struct mbuf *m) 2122 { 2123 while (m->m_next) 2124 m = m->m_next; 2125 return (m); 2126 } 2127 2128 /* 2129 * Return the number of bytes in an mbuf chain. 2130 * If lastm is not NULL, also return the last mbuf. 2131 */ 2132 u_int 2133 m_lengthm(struct mbuf *m, struct mbuf **lastm) 2134 { 2135 u_int len = 0; 2136 struct mbuf *prev = m; 2137 2138 while (m) { 2139 len += m->m_len; 2140 prev = m; 2141 m = m->m_next; 2142 } 2143 if (lastm != NULL) 2144 *lastm = prev; 2145 return (len); 2146 } 2147 2148 /* 2149 * Like m_lengthm(), except also keep track of mbuf usage. 2150 */ 2151 u_int 2152 m_countm(struct mbuf *m, struct mbuf **lastm, u_int *pmbcnt) 2153 { 2154 u_int len = 0, mbcnt = 0; 2155 struct mbuf *prev = m; 2156 2157 while (m) { 2158 len += m->m_len; 2159 mbcnt += MSIZE; 2160 if (m->m_flags & M_EXT) 2161 mbcnt += m->m_ext.ext_size; 2162 prev = m; 2163 m = m->m_next; 2164 } 2165 if (lastm != NULL) 2166 *lastm = prev; 2167 *pmbcnt = mbcnt; 2168 return (len); 2169 } 2170