1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 36 * 37 * License terms: all terms for the DragonFly license above plus the following: 38 * 39 * 4. All advertising materials mentioning features or use of this software 40 * must display the following acknowledgement: 41 * 42 * This product includes software developed by Jeffrey M. Hsu 43 * for the DragonFly Project. 44 * 45 * This requirement may be waived with permission from Jeffrey Hsu. 46 * This requirement will sunset and may be removed on July 8 2005, 47 * after which the standard DragonFly license (as shown above) will 48 * apply. 49 */ 50 51 /* 52 * Copyright (c) 1982, 1986, 1988, 1991, 1993 53 * The Regents of the University of California. All rights reserved. 54 * 55 * Redistribution and use in source and binary forms, with or without 56 * modification, are permitted provided that the following conditions 57 * are met: 58 * 1. Redistributions of source code must retain the above copyright 59 * notice, this list of conditions and the following disclaimer. 60 * 2. Redistributions in binary form must reproduce the above copyright 61 * notice, this list of conditions and the following disclaimer in the 62 * documentation and/or other materials provided with the distribution. 63 * 3. All advertising materials mentioning features or use of this software 64 * must display the following acknowledgement: 65 * This product includes software developed by the University of 66 * California, Berkeley and its contributors. 67 * 4. Neither the name of the University nor the names of its contributors 68 * may be used to endorse or promote products derived from this software 69 * without specific prior written permission. 70 * 71 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 72 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 73 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 74 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 75 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 76 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 77 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 78 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 79 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 80 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 81 * SUCH DAMAGE. 82 * 83 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 84 * $FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.51.2.24 2003/04/15 06:59:29 silby Exp $ 85 * $DragonFly: src/sys/kern/uipc_mbuf.c,v 1.55 2006/01/21 19:05:42 dillon Exp $ 86 */ 87 88 #include "opt_param.h" 89 #include "opt_ddb.h" 90 #include "opt_mbuf_stress_test.h" 91 #include <sys/param.h> 92 #include <sys/systm.h> 93 #include <sys/malloc.h> 94 #include <sys/mbuf.h> 95 #include <sys/kernel.h> 96 #include <sys/sysctl.h> 97 #include <sys/domain.h> 98 #include <sys/objcache.h> 99 #include <sys/protosw.h> 100 #include <sys/uio.h> 101 #include <sys/thread.h> 102 #include <sys/globaldata.h> 103 #include <sys/serialize.h> 104 #include <sys/thread2.h> 105 106 #include <vm/vm.h> 107 #include <vm/vm_kern.h> 108 #include <vm/vm_extern.h> 109 110 #ifdef INVARIANTS 111 #include <machine/cpu.h> 112 #endif 113 114 /* 115 * mbuf cluster meta-data 116 */ 117 struct mbcluster { 118 int32_t mcl_refs; 119 void *mcl_data; 120 struct lwkt_serialize mcl_serializer; 121 }; 122 123 static void mbinit(void *); 124 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) 125 126 static u_long mbtypes[MT_NTYPES]; 127 128 struct mbstat mbstat; 129 int max_linkhdr; 130 int max_protohdr; 131 int max_hdr; 132 int max_datalen; 133 int m_defragpackets; 134 int m_defragbytes; 135 int m_defraguseless; 136 int m_defragfailure; 137 #ifdef MBUF_STRESS_TEST 138 int m_defragrandomfailures; 139 #endif 140 141 struct objcache *mbuf_cache, *mbufphdr_cache; 142 struct objcache *mclmeta_cache; 143 struct objcache *mbufcluster_cache, *mbufphdrcluster_cache; 144 145 int nmbclusters; 146 int nmbufs; 147 148 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 149 &max_linkhdr, 0, ""); 150 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 151 &max_protohdr, 0, ""); 152 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 153 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 154 &max_datalen, 0, ""); 155 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 156 &mbuf_wait, 0, ""); 157 SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RW, &mbstat, mbstat, ""); 158 SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 159 sizeof(mbtypes), "LU", ""); 160 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RW, 161 &nmbclusters, 0, "Maximum number of mbuf clusters available"); 162 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RW, &nmbufs, 0, 163 "Maximum number of mbufs available"); 164 165 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD, 166 &m_defragpackets, 0, ""); 167 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD, 168 &m_defragbytes, 0, ""); 169 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD, 170 &m_defraguseless, 0, ""); 171 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD, 172 &m_defragfailure, 0, ""); 173 #ifdef MBUF_STRESS_TEST 174 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW, 175 &m_defragrandomfailures, 0, ""); 176 #endif 177 178 static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); 179 static MALLOC_DEFINE(M_MBUFCL, "mbufcl", "mbufcl"); 180 static MALLOC_DEFINE(M_MCLMETA, "mclmeta", "mclmeta"); 181 182 static void m_reclaim (void); 183 static void m_mclref(void *arg); 184 static void m_mclfree(void *arg); 185 186 #ifndef NMBCLUSTERS 187 #define NMBCLUSTERS (512 + maxusers * 16) 188 #endif 189 #ifndef NMBUFS 190 #define NMBUFS (nmbclusters * 2) 191 #endif 192 193 /* 194 * Perform sanity checks of tunables declared above. 195 */ 196 static void 197 tunable_mbinit(void *dummy) 198 { 199 200 /* 201 * This has to be done before VM init. 202 */ 203 nmbclusters = NMBCLUSTERS; 204 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 205 nmbufs = NMBUFS; 206 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 207 /* Sanity checks */ 208 if (nmbufs < nmbclusters * 2) 209 nmbufs = nmbclusters * 2; 210 211 return; 212 } 213 SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL); 214 215 /* "number of clusters of pages" */ 216 #define NCL_INIT 1 217 218 #define NMB_INIT 16 219 220 /* 221 * The mbuf object cache only guarantees that m_next and m_nextpkt are 222 * NULL and that m_data points to the beginning of the data area. In 223 * particular, m_len and m_pkthdr.len are uninitialized. It is the 224 * responsibility of the caller to initialize those fields before use. 225 */ 226 227 static boolean_t __inline 228 mbuf_ctor(void *obj, void *private, int ocflags) 229 { 230 struct mbuf *m = obj; 231 232 m->m_next = NULL; 233 m->m_nextpkt = NULL; 234 m->m_data = m->m_dat; 235 m->m_flags = 0; 236 237 return (TRUE); 238 } 239 240 /* 241 * Initialize the mbuf and the packet header fields. 242 */ 243 static boolean_t 244 mbufphdr_ctor(void *obj, void *private, int ocflags) 245 { 246 struct mbuf *m = obj; 247 248 m->m_next = NULL; 249 m->m_nextpkt = NULL; 250 m->m_data = m->m_pktdat; 251 m->m_flags = M_PKTHDR | M_PHCACHE; 252 253 m->m_pkthdr.rcvif = NULL; /* eliminate XXX JH */ 254 SLIST_INIT(&m->m_pkthdr.tags); 255 m->m_pkthdr.csum_flags = 0; /* eliminate XXX JH */ 256 m->m_pkthdr.fw_flags = 0; /* eliminate XXX JH */ 257 258 return (TRUE); 259 } 260 261 /* 262 * A mbcluster object consists of 2K (MCLBYTES) cluster and a refcount. 263 */ 264 static boolean_t 265 mclmeta_ctor(void *obj, void *private, int ocflags) 266 { 267 struct mbcluster *cl = obj; 268 void *buf; 269 270 if (ocflags & M_NOWAIT) 271 buf = malloc(MCLBYTES, M_MBUFCL, M_NOWAIT | M_ZERO); 272 else 273 buf = malloc(MCLBYTES, M_MBUFCL, M_INTWAIT | M_ZERO); 274 if (buf == NULL) 275 return (FALSE); 276 cl->mcl_refs = 0; 277 cl->mcl_data = buf; 278 lwkt_serialize_init(&cl->mcl_serializer); 279 return (TRUE); 280 } 281 282 static void 283 mclmeta_dtor(void *obj, void *private) 284 { 285 struct mbcluster *mcl = obj; 286 287 KKASSERT(mcl->mcl_refs == 0); 288 free(mcl->mcl_data, M_MBUFCL); 289 } 290 291 static void 292 linkcluster(struct mbuf *m, struct mbcluster *cl) 293 { 294 /* 295 * Add the cluster to the mbuf. The caller will detect that the 296 * mbuf now has an attached cluster. 297 */ 298 m->m_ext.ext_arg = cl; 299 m->m_ext.ext_buf = cl->mcl_data; 300 m->m_ext.ext_ref = m_mclref; 301 m->m_ext.ext_free = m_mclfree; 302 m->m_ext.ext_size = MCLBYTES; 303 atomic_add_int(&cl->mcl_refs, 1); 304 305 m->m_data = m->m_ext.ext_buf; 306 m->m_flags |= M_EXT | M_EXT_CLUSTER; 307 } 308 309 static boolean_t 310 mbufphdrcluster_ctor(void *obj, void *private, int ocflags) 311 { 312 struct mbuf *m = obj; 313 struct mbcluster *cl; 314 315 mbufphdr_ctor(obj, private, ocflags); 316 cl = objcache_get(mclmeta_cache, ocflags); 317 if (cl == NULL) 318 return (FALSE); 319 m->m_flags |= M_CLCACHE; 320 linkcluster(m, cl); 321 return (TRUE); 322 } 323 324 static boolean_t 325 mbufcluster_ctor(void *obj, void *private, int ocflags) 326 { 327 struct mbuf *m = obj; 328 struct mbcluster *cl; 329 330 mbuf_ctor(obj, private, ocflags); 331 cl = objcache_get(mclmeta_cache, ocflags); 332 if (cl == NULL) 333 return (FALSE); 334 m->m_flags |= M_CLCACHE; 335 linkcluster(m, cl); 336 return (TRUE); 337 } 338 339 /* 340 * Used for both the cluster and cluster PHDR caches. 341 * 342 * The mbuf may have lost its cluster due to sharing, deal 343 * with the situation by checking M_EXT. 344 */ 345 static void 346 mbufcluster_dtor(void *obj, void *private) 347 { 348 struct mbuf *m = obj; 349 struct mbcluster *mcl; 350 351 if (m->m_flags & M_EXT) { 352 KKASSERT((m->m_flags & M_EXT_CLUSTER) != 0); 353 mcl = m->m_ext.ext_arg; 354 KKASSERT(mcl->mcl_refs == 1); 355 mcl->mcl_refs = 0; 356 objcache_put(mclmeta_cache, mcl); 357 } 358 } 359 360 struct objcache_malloc_args mbuf_malloc_args = { MSIZE, M_MBUF }; 361 struct objcache_malloc_args mclmeta_malloc_args = 362 { sizeof(struct mbcluster), M_MCLMETA }; 363 364 /* ARGSUSED*/ 365 static void 366 mbinit(void *dummy) 367 { 368 mbstat.m_msize = MSIZE; 369 mbstat.m_mclbytes = MCLBYTES; 370 mbstat.m_minclsize = MINCLSIZE; 371 mbstat.m_mlen = MLEN; 372 mbstat.m_mhlen = MHLEN; 373 374 mbuf_cache = objcache_create("mbuf", nmbufs, 0, 375 mbuf_ctor, null_dtor, NULL, 376 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 377 mbufphdr_cache = objcache_create("mbuf pkt hdr", nmbufs, 64, 378 mbufphdr_ctor, null_dtor, NULL, 379 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 380 mclmeta_cache = objcache_create("cluster mbuf", nmbclusters , 0, 381 mclmeta_ctor, mclmeta_dtor, NULL, 382 objcache_malloc_alloc, objcache_malloc_free, &mclmeta_malloc_args); 383 mbufcluster_cache = objcache_create("mbuf + cluster", nmbclusters, 0, 384 mbufcluster_ctor, mbufcluster_dtor, NULL, 385 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 386 mbufphdrcluster_cache = objcache_create("mbuf pkt hdr + cluster", 387 nmbclusters, 64, mbufphdrcluster_ctor, mbufcluster_dtor, NULL, 388 objcache_malloc_alloc, objcache_malloc_free, &mbuf_malloc_args); 389 return; 390 } 391 392 /* 393 * Return the number of references to this mbuf's data. 0 is returned 394 * if the mbuf is not M_EXT, a reference count is returned if it is 395 * M_EXT | M_EXT_CLUSTER, and 99 is returned if it is a special M_EXT. 396 */ 397 int 398 m_sharecount(struct mbuf *m) 399 { 400 switch (m->m_flags & (M_EXT | M_EXT_CLUSTER)) { 401 case 0: 402 return (0); 403 case M_EXT: 404 return (99); 405 case M_EXT | M_EXT_CLUSTER: 406 return (((struct mbcluster *)m->m_ext.ext_arg)->mcl_refs); 407 } 408 /* NOTREACHED */ 409 return (0); /* to shut up compiler */ 410 } 411 412 /* 413 * change mbuf to new type 414 */ 415 void 416 m_chtype(struct mbuf *m, int type) 417 { 418 crit_enter(); 419 ++mbtypes[type]; 420 --mbtypes[m->m_type]; 421 m->m_type = type; 422 crit_exit(); 423 } 424 425 static void 426 m_reclaim(void) 427 { 428 struct domain *dp; 429 struct protosw *pr; 430 431 crit_enter(); 432 SLIST_FOREACH(dp, &domains, dom_next) { 433 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { 434 if (pr->pr_drain) 435 (*pr->pr_drain)(); 436 } 437 } 438 crit_exit(); 439 mbstat.m_drain++; 440 } 441 442 static void __inline 443 updatestats(struct mbuf *m, int type) 444 { 445 m->m_type = type; 446 447 crit_enter(); 448 ++mbtypes[type]; 449 ++mbstat.m_mbufs; 450 crit_exit(); 451 } 452 453 /* 454 * Allocate an mbuf. 455 */ 456 struct mbuf * 457 m_get(int how, int type) 458 { 459 struct mbuf *m; 460 int ntries = 0; 461 int ocf = MBTOM(how); 462 463 retryonce: 464 465 m = objcache_get(mbuf_cache, ocf); 466 467 if (m == NULL) { 468 if ((how & MB_TRYWAIT) && ntries++ == 0) { 469 struct objcache *reclaimlist[] = { 470 mbufphdr_cache, 471 mbufcluster_cache, mbufphdrcluster_cache 472 }; 473 const int nreclaims = __arysize(reclaimlist); 474 475 if (!objcache_reclaimlist(reclaimlist, nreclaims, ocf)) 476 m_reclaim(); 477 goto retryonce; 478 } 479 return (NULL); 480 } 481 482 updatestats(m, type); 483 return (m); 484 } 485 486 struct mbuf * 487 m_gethdr(int how, int type) 488 { 489 struct mbuf *m; 490 int ocf = MBTOM(how); 491 int ntries = 0; 492 493 retryonce: 494 495 m = objcache_get(mbufphdr_cache, ocf); 496 497 if (m == NULL) { 498 if ((how & MB_TRYWAIT) && ntries++ == 0) { 499 struct objcache *reclaimlist[] = { 500 mbuf_cache, 501 mbufcluster_cache, mbufphdrcluster_cache 502 }; 503 const int nreclaims = __arysize(reclaimlist); 504 505 if (!objcache_reclaimlist(reclaimlist, nreclaims, ocf)) 506 m_reclaim(); 507 goto retryonce; 508 } 509 return (NULL); 510 } 511 512 updatestats(m, type); 513 return (m); 514 } 515 516 /* 517 * Get a mbuf (not a mbuf cluster!) and zero it. 518 * Deprecated. 519 */ 520 struct mbuf * 521 m_getclr(int how, int type) 522 { 523 struct mbuf *m; 524 525 m = m_get(how, type); 526 if (m != NULL) 527 bzero(m->m_data, MLEN); 528 return (m); 529 } 530 531 /* 532 * Returns an mbuf with an attached cluster. 533 * Because many network drivers use this kind of buffers a lot, it is 534 * convenient to keep a small pool of free buffers of this kind. 535 * Even a small size such as 10 gives about 10% improvement in the 536 * forwarding rate in a bridge or router. 537 */ 538 struct mbuf * 539 m_getcl(int how, short type, int flags) 540 { 541 struct mbuf *m; 542 int ocflags = MBTOM(how); 543 int ntries = 0; 544 545 retryonce: 546 547 if (flags & M_PKTHDR) 548 m = objcache_get(mbufphdrcluster_cache, ocflags); 549 else 550 m = objcache_get(mbufcluster_cache, ocflags); 551 552 if (m == NULL) { 553 if ((how & MB_TRYWAIT) && ntries++ == 0) { 554 struct objcache *reclaimlist[1]; 555 556 if (flags & M_PKTHDR) 557 reclaimlist[0] = mbufcluster_cache; 558 else 559 reclaimlist[0] = mbufphdrcluster_cache; 560 if (!objcache_reclaimlist(reclaimlist, 1, ocflags)) 561 m_reclaim(); 562 goto retryonce; 563 } 564 return (NULL); 565 } 566 567 m->m_type = type; 568 569 crit_enter(); 570 ++mbtypes[type]; 571 ++mbstat.m_clusters; 572 crit_exit(); 573 return (m); 574 } 575 576 /* 577 * Allocate chain of requested length. 578 */ 579 struct mbuf * 580 m_getc(int len, int how, int type) 581 { 582 struct mbuf *n, *nfirst = NULL, **ntail = &nfirst; 583 int nsize; 584 585 while (len > 0) { 586 n = m_getl(len, how, type, 0, &nsize); 587 if (n == NULL) 588 goto failed; 589 n->m_len = 0; 590 *ntail = n; 591 ntail = &n->m_next; 592 len -= nsize; 593 } 594 return (nfirst); 595 596 failed: 597 m_freem(nfirst); 598 return (NULL); 599 } 600 601 /* 602 * Allocate len-worth of mbufs and/or mbuf clusters (whatever fits best) 603 * and return a pointer to the head of the allocated chain. If m0 is 604 * non-null, then we assume that it is a single mbuf or an mbuf chain to 605 * which we want len bytes worth of mbufs and/or clusters attached, and so 606 * if we succeed in allocating it, we will just return a pointer to m0. 607 * 608 * If we happen to fail at any point during the allocation, we will free 609 * up everything we have already allocated and return NULL. 610 * 611 * Deprecated. Use m_getc() and m_cat() instead. 612 */ 613 struct mbuf * 614 m_getm(struct mbuf *m0, int len, int type, int how) 615 { 616 struct mbuf *nfirst; 617 618 nfirst = m_getc(len, how, type); 619 620 if (m0 != NULL) { 621 m_last(m0)->m_next = nfirst; 622 return (m0); 623 } 624 625 return (nfirst); 626 } 627 628 /* 629 * Adds a cluster to a normal mbuf, M_EXT is set on success. 630 * Deprecated. Use m_getcl() instead. 631 */ 632 void 633 m_mclget(struct mbuf *m, int how) 634 { 635 struct mbcluster *mcl; 636 637 KKASSERT((m->m_flags & M_EXT) == 0); 638 mcl = objcache_get(mclmeta_cache, MBTOM(how)); 639 if (mcl != NULL) { 640 linkcluster(m, mcl); 641 crit_enter(); 642 ++mbstat.m_clusters; 643 /* leave the m_mbufs count intact for original mbuf */ 644 crit_exit(); 645 } 646 } 647 648 /* 649 * Updates to mbcluster must be MPSAFE. Only an entity which already has 650 * a reference to the cluster can ref it, so we are in no danger of 651 * racing an add with a subtract. But the operation must still be atomic 652 * since multiple entities may have a reference on the cluster. 653 * 654 * m_mclfree() is almost the same but it must contend with two entities 655 * freeing the cluster at the same time. If there is only one reference 656 * count we are the only entity referencing the cluster and no further 657 * locking is required. Otherwise we must protect against a race to 0 658 * with the serializer. 659 */ 660 static void 661 m_mclref(void *arg) 662 { 663 struct mbcluster *mcl = arg; 664 665 atomic_add_int(&mcl->mcl_refs, 1); 666 } 667 668 static void 669 m_mclfree(void *arg) 670 { 671 struct mbcluster *mcl = arg; 672 673 if (mcl->mcl_refs == 1) { 674 mcl->mcl_refs = 0; 675 objcache_put(mclmeta_cache, mcl); 676 } else { 677 lwkt_serialize_enter(&mcl->mcl_serializer); 678 if (mcl->mcl_refs > 1) { 679 atomic_subtract_int(&mcl->mcl_refs, 1); 680 lwkt_serialize_exit(&mcl->mcl_serializer); 681 } else { 682 lwkt_serialize_exit(&mcl->mcl_serializer); 683 KKASSERT(mcl->mcl_refs == 1); 684 mcl->mcl_refs = 0; 685 objcache_put(mclmeta_cache, mcl); 686 } 687 } 688 } 689 690 extern void db_print_backtrace(void); 691 692 /* 693 * Free a single mbuf and any associated external storage. The successor, 694 * if any, is returned. 695 * 696 * We do need to check non-first mbuf for m_aux, since some of existing 697 * code does not call M_PREPEND properly. 698 * (example: call to bpf_mtap from drivers) 699 */ 700 struct mbuf * 701 m_free(struct mbuf *m) 702 { 703 struct mbuf *n; 704 705 KASSERT(m->m_type != MT_FREE, ("freeing free mbuf %p", m)); 706 --mbtypes[m->m_type]; 707 708 n = m->m_next; 709 710 /* 711 * Make sure the mbuf is in constructed state before returning it 712 * to the objcache. 713 */ 714 m->m_next = NULL; 715 #ifdef notyet 716 KKASSERT(m->m_nextpkt == NULL); 717 #else 718 if (m->m_nextpkt != NULL) { 719 #ifdef DDB 720 static int afewtimes = 10; 721 722 if (afewtimes-- > 0) { 723 printf("mfree: m->m_nextpkt != NULL\n"); 724 db_print_backtrace(); 725 } 726 #endif 727 m->m_nextpkt = NULL; 728 } 729 #endif 730 if (m->m_flags & M_PKTHDR) { 731 m_tag_delete_chain(m); /* eliminate XXX JH */ 732 } 733 734 m->m_flags &= (M_EXT | M_EXT_CLUSTER | M_CLCACHE | M_PHCACHE); 735 736 /* 737 * Clean the M_PKTHDR state so we can return the mbuf to its original 738 * cache. This is based on the PHCACHE flag which tells us whether 739 * the mbuf was originally allocated out of a packet-header cache 740 * or a non-packet-header cache. 741 */ 742 if (m->m_flags & M_PHCACHE) { 743 m->m_flags |= M_PKTHDR; 744 m->m_pkthdr.rcvif = NULL; /* eliminate XXX JH */ 745 m->m_pkthdr.csum_flags = 0; /* eliminate XXX JH */ 746 m->m_pkthdr.fw_flags = 0; /* eliminate XXX JH */ 747 SLIST_INIT(&m->m_pkthdr.tags); 748 } 749 750 /* 751 * Handle remaining flags combinations. M_CLCACHE tells us whether 752 * the mbuf was originally allocated from a cluster cache or not, 753 * and is totally separate from whether the mbuf is currently 754 * associated with a cluster. 755 */ 756 crit_enter(); 757 switch(m->m_flags & (M_CLCACHE | M_EXT | M_EXT_CLUSTER)) { 758 case M_CLCACHE | M_EXT | M_EXT_CLUSTER: 759 /* 760 * mbuf+cluster cache case. The mbuf was allocated from the 761 * combined mbuf_cluster cache and can be returned to the 762 * cache if the cluster hasn't been shared. 763 */ 764 if (m_sharecount(m) == 1) { 765 /* 766 * The cluster has not been shared, we can just 767 * reset the data pointer and return the mbuf 768 * to the cluster cache. Note that the reference 769 * count is left intact (it is still associated with 770 * an mbuf). 771 */ 772 m->m_data = m->m_ext.ext_buf; 773 if (m->m_flags & M_PHCACHE) 774 objcache_put(mbufphdrcluster_cache, m); 775 else 776 objcache_put(mbufcluster_cache, m); 777 --mbstat.m_clusters; 778 } else { 779 /* 780 * Hell. Someone else has a ref on this cluster, 781 * we have to disconnect it which means we can't 782 * put it back into the mbufcluster_cache, we 783 * have to destroy the mbuf. 784 * 785 * Other mbuf references to the cluster will typically 786 * be M_EXT | M_EXT_CLUSTER but without M_CLCACHE. 787 * 788 * XXX we could try to connect another cluster to 789 * it. 790 */ 791 m->m_ext.ext_free(m->m_ext.ext_arg); 792 m->m_flags &= ~(M_EXT | M_EXT_CLUSTER); 793 if (m->m_flags & M_PHCACHE) 794 objcache_dtor(mbufphdrcluster_cache, m); 795 else 796 objcache_dtor(mbufcluster_cache, m); 797 } 798 break; 799 case M_EXT | M_EXT_CLUSTER: 800 /* 801 * Normal cluster associated with an mbuf that was allocated 802 * from the normal mbuf pool rather then the cluster pool. 803 * The cluster has to be independantly disassociated from the 804 * mbuf. 805 */ 806 if (m_sharecount(m) == 1) 807 --mbstat.m_clusters; 808 /* fall through */ 809 case M_EXT: 810 /* 811 * Normal cluster association case, disconnect the cluster from 812 * the mbuf. The cluster may or may not be custom. 813 */ 814 m->m_ext.ext_free(m->m_ext.ext_arg); 815 m->m_flags &= ~(M_EXT | M_EXT_CLUSTER); 816 /* fall through */ 817 case 0: 818 /* 819 * return the mbuf to the mbuf cache. 820 */ 821 if (m->m_flags & M_PHCACHE) { 822 m->m_data = m->m_pktdat; 823 objcache_put(mbufphdr_cache, m); 824 } else { 825 m->m_data = m->m_dat; 826 objcache_put(mbuf_cache, m); 827 } 828 --mbstat.m_mbufs; 829 break; 830 default: 831 if (!panicstr) 832 panic("bad mbuf flags %p %08x\n", m, m->m_flags); 833 break; 834 } 835 crit_exit(); 836 return (n); 837 } 838 839 void 840 m_freem(struct mbuf *m) 841 { 842 crit_enter(); 843 while (m) 844 m = m_free(m); 845 crit_exit(); 846 } 847 848 /* 849 * mbuf utility routines 850 */ 851 852 /* 853 * Lesser-used path for M_PREPEND: allocate new mbuf to prepend to chain and 854 * copy junk along. 855 */ 856 struct mbuf * 857 m_prepend(struct mbuf *m, int len, int how) 858 { 859 struct mbuf *mn; 860 861 if (m->m_flags & M_PKTHDR) 862 mn = m_gethdr(how, m->m_type); 863 else 864 mn = m_get(how, m->m_type); 865 if (mn == NULL) { 866 m_freem(m); 867 return (NULL); 868 } 869 if (m->m_flags & M_PKTHDR) 870 M_MOVE_PKTHDR(mn, m); 871 mn->m_next = m; 872 m = mn; 873 if (len < MHLEN) 874 MH_ALIGN(m, len); 875 m->m_len = len; 876 return (m); 877 } 878 879 /* 880 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 881 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 882 * The wait parameter is a choice of MB_WAIT/MB_DONTWAIT from caller. 883 * Note that the copy is read-only, because clusters are not copied, 884 * only their reference counts are incremented. 885 */ 886 struct mbuf * 887 m_copym(const struct mbuf *m, int off0, int len, int wait) 888 { 889 struct mbuf *n, **np; 890 int off = off0; 891 struct mbuf *top; 892 int copyhdr = 0; 893 894 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 895 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 896 if (off == 0 && m->m_flags & M_PKTHDR) 897 copyhdr = 1; 898 while (off > 0) { 899 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 900 if (off < m->m_len) 901 break; 902 off -= m->m_len; 903 m = m->m_next; 904 } 905 np = ⊤ 906 top = 0; 907 while (len > 0) { 908 if (m == NULL) { 909 KASSERT(len == M_COPYALL, 910 ("m_copym, length > size of mbuf chain")); 911 break; 912 } 913 /* 914 * Because we are sharing any cluster attachment below, 915 * be sure to get an mbuf that does not have a cluster 916 * associated with it. 917 */ 918 if (copyhdr) 919 n = m_gethdr(wait, m->m_type); 920 else 921 n = m_get(wait, m->m_type); 922 *np = n; 923 if (n == NULL) 924 goto nospace; 925 if (copyhdr) { 926 if (!m_dup_pkthdr(n, m, wait)) 927 goto nospace; 928 if (len == M_COPYALL) 929 n->m_pkthdr.len -= off0; 930 else 931 n->m_pkthdr.len = len; 932 copyhdr = 0; 933 } 934 n->m_len = min(len, m->m_len - off); 935 if (m->m_flags & M_EXT) { 936 KKASSERT((n->m_flags & M_EXT) == 0); 937 n->m_data = m->m_data + off; 938 m->m_ext.ext_ref(m->m_ext.ext_arg); 939 n->m_ext = m->m_ext; 940 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 941 } else { 942 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 943 (unsigned)n->m_len); 944 } 945 if (len != M_COPYALL) 946 len -= n->m_len; 947 off = 0; 948 m = m->m_next; 949 np = &n->m_next; 950 } 951 if (top == NULL) 952 mbstat.m_mcfail++; 953 return (top); 954 nospace: 955 m_freem(top); 956 mbstat.m_mcfail++; 957 return (NULL); 958 } 959 960 /* 961 * Copy an entire packet, including header (which must be present). 962 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 963 * Note that the copy is read-only, because clusters are not copied, 964 * only their reference counts are incremented. 965 * Preserve alignment of the first mbuf so if the creator has left 966 * some room at the beginning (e.g. for inserting protocol headers) 967 * the copies also have the room available. 968 */ 969 struct mbuf * 970 m_copypacket(struct mbuf *m, int how) 971 { 972 struct mbuf *top, *n, *o; 973 974 n = m_gethdr(how, m->m_type); 975 top = n; 976 if (!n) 977 goto nospace; 978 979 if (!m_dup_pkthdr(n, m, how)) 980 goto nospace; 981 n->m_len = m->m_len; 982 if (m->m_flags & M_EXT) { 983 KKASSERT((n->m_flags & M_EXT) == 0); 984 n->m_data = m->m_data; 985 m->m_ext.ext_ref(m->m_ext.ext_arg); 986 n->m_ext = m->m_ext; 987 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 988 } else { 989 n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); 990 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 991 } 992 993 m = m->m_next; 994 while (m) { 995 o = m_get(how, m->m_type); 996 if (!o) 997 goto nospace; 998 999 n->m_next = o; 1000 n = n->m_next; 1001 1002 n->m_len = m->m_len; 1003 if (m->m_flags & M_EXT) { 1004 KKASSERT((n->m_flags & M_EXT) == 0); 1005 n->m_data = m->m_data; 1006 m->m_ext.ext_ref(m->m_ext.ext_arg); 1007 n->m_ext = m->m_ext; 1008 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1009 } else { 1010 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 1011 } 1012 1013 m = m->m_next; 1014 } 1015 return top; 1016 nospace: 1017 m_freem(top); 1018 mbstat.m_mcfail++; 1019 return (NULL); 1020 } 1021 1022 /* 1023 * Copy data from an mbuf chain starting "off" bytes from the beginning, 1024 * continuing for "len" bytes, into the indicated buffer. 1025 */ 1026 void 1027 m_copydata(const struct mbuf *m, int off, int len, caddr_t cp) 1028 { 1029 unsigned count; 1030 1031 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 1032 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 1033 while (off > 0) { 1034 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 1035 if (off < m->m_len) 1036 break; 1037 off -= m->m_len; 1038 m = m->m_next; 1039 } 1040 while (len > 0) { 1041 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 1042 count = min(m->m_len - off, len); 1043 bcopy(mtod(m, caddr_t) + off, cp, count); 1044 len -= count; 1045 cp += count; 1046 off = 0; 1047 m = m->m_next; 1048 } 1049 } 1050 1051 /* 1052 * Copy a packet header mbuf chain into a completely new chain, including 1053 * copying any mbuf clusters. Use this instead of m_copypacket() when 1054 * you need a writable copy of an mbuf chain. 1055 */ 1056 struct mbuf * 1057 m_dup(struct mbuf *m, int how) 1058 { 1059 struct mbuf **p, *top = NULL; 1060 int remain, moff, nsize; 1061 1062 /* Sanity check */ 1063 if (m == NULL) 1064 return (NULL); 1065 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __func__)); 1066 1067 /* While there's more data, get a new mbuf, tack it on, and fill it */ 1068 remain = m->m_pkthdr.len; 1069 moff = 0; 1070 p = ⊤ 1071 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 1072 struct mbuf *n; 1073 1074 /* Get the next new mbuf */ 1075 n = m_getl(remain, how, m->m_type, top == NULL ? M_PKTHDR : 0, 1076 &nsize); 1077 if (n == NULL) 1078 goto nospace; 1079 if (top == NULL) 1080 if (!m_dup_pkthdr(n, m, how)) 1081 goto nospace0; 1082 1083 /* Link it into the new chain */ 1084 *p = n; 1085 p = &n->m_next; 1086 1087 /* Copy data from original mbuf(s) into new mbuf */ 1088 n->m_len = 0; 1089 while (n->m_len < nsize && m != NULL) { 1090 int chunk = min(nsize - n->m_len, m->m_len - moff); 1091 1092 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 1093 moff += chunk; 1094 n->m_len += chunk; 1095 remain -= chunk; 1096 if (moff == m->m_len) { 1097 m = m->m_next; 1098 moff = 0; 1099 } 1100 } 1101 1102 /* Check correct total mbuf length */ 1103 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 1104 ("%s: bogus m_pkthdr.len", __func__)); 1105 } 1106 return (top); 1107 1108 nospace: 1109 m_freem(top); 1110 nospace0: 1111 mbstat.m_mcfail++; 1112 return (NULL); 1113 } 1114 1115 /* 1116 * Concatenate mbuf chain n to m. 1117 * Both chains must be of the same type (e.g. MT_DATA). 1118 * Any m_pkthdr is not updated. 1119 */ 1120 void 1121 m_cat(struct mbuf *m, struct mbuf *n) 1122 { 1123 m = m_last(m); 1124 while (n) { 1125 if (m->m_flags & M_EXT || 1126 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 1127 /* just join the two chains */ 1128 m->m_next = n; 1129 return; 1130 } 1131 /* splat the data from one into the other */ 1132 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 1133 (u_int)n->m_len); 1134 m->m_len += n->m_len; 1135 n = m_free(n); 1136 } 1137 } 1138 1139 void 1140 m_adj(struct mbuf *mp, int req_len) 1141 { 1142 int len = req_len; 1143 struct mbuf *m; 1144 int count; 1145 1146 if ((m = mp) == NULL) 1147 return; 1148 if (len >= 0) { 1149 /* 1150 * Trim from head. 1151 */ 1152 while (m != NULL && len > 0) { 1153 if (m->m_len <= len) { 1154 len -= m->m_len; 1155 m->m_len = 0; 1156 m = m->m_next; 1157 } else { 1158 m->m_len -= len; 1159 m->m_data += len; 1160 len = 0; 1161 } 1162 } 1163 m = mp; 1164 if (mp->m_flags & M_PKTHDR) 1165 m->m_pkthdr.len -= (req_len - len); 1166 } else { 1167 /* 1168 * Trim from tail. Scan the mbuf chain, 1169 * calculating its length and finding the last mbuf. 1170 * If the adjustment only affects this mbuf, then just 1171 * adjust and return. Otherwise, rescan and truncate 1172 * after the remaining size. 1173 */ 1174 len = -len; 1175 count = 0; 1176 for (;;) { 1177 count += m->m_len; 1178 if (m->m_next == (struct mbuf *)0) 1179 break; 1180 m = m->m_next; 1181 } 1182 if (m->m_len >= len) { 1183 m->m_len -= len; 1184 if (mp->m_flags & M_PKTHDR) 1185 mp->m_pkthdr.len -= len; 1186 return; 1187 } 1188 count -= len; 1189 if (count < 0) 1190 count = 0; 1191 /* 1192 * Correct length for chain is "count". 1193 * Find the mbuf with last data, adjust its length, 1194 * and toss data from remaining mbufs on chain. 1195 */ 1196 m = mp; 1197 if (m->m_flags & M_PKTHDR) 1198 m->m_pkthdr.len = count; 1199 for (; m; m = m->m_next) { 1200 if (m->m_len >= count) { 1201 m->m_len = count; 1202 break; 1203 } 1204 count -= m->m_len; 1205 } 1206 while (m->m_next) 1207 (m = m->m_next) ->m_len = 0; 1208 } 1209 } 1210 1211 /* 1212 * Rearrange an mbuf chain so that len bytes are contiguous 1213 * and in the data area of an mbuf (so that mtod will work for a structure 1214 * of size len). Returns the resulting mbuf chain on success, frees it and 1215 * returns null on failure. If there is room, it will add up to 1216 * max_protohdr-len extra bytes to the contiguous region in an attempt to 1217 * avoid being called next time. 1218 */ 1219 struct mbuf * 1220 m_pullup(struct mbuf *n, int len) 1221 { 1222 struct mbuf *m; 1223 int count; 1224 int space; 1225 1226 /* 1227 * If first mbuf has no cluster, and has room for len bytes 1228 * without shifting current data, pullup into it, 1229 * otherwise allocate a new mbuf to prepend to the chain. 1230 */ 1231 if (!(n->m_flags & M_EXT) && 1232 n->m_data + len < &n->m_dat[MLEN] && 1233 n->m_next) { 1234 if (n->m_len >= len) 1235 return (n); 1236 m = n; 1237 n = n->m_next; 1238 len -= m->m_len; 1239 } else { 1240 if (len > MHLEN) 1241 goto bad; 1242 if (n->m_flags & M_PKTHDR) 1243 m = m_gethdr(MB_DONTWAIT, n->m_type); 1244 else 1245 m = m_get(MB_DONTWAIT, n->m_type); 1246 if (m == NULL) 1247 goto bad; 1248 m->m_len = 0; 1249 if (n->m_flags & M_PKTHDR) 1250 M_MOVE_PKTHDR(m, n); 1251 } 1252 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 1253 do { 1254 count = min(min(max(len, max_protohdr), space), n->m_len); 1255 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 1256 (unsigned)count); 1257 len -= count; 1258 m->m_len += count; 1259 n->m_len -= count; 1260 space -= count; 1261 if (n->m_len) 1262 n->m_data += count; 1263 else 1264 n = m_free(n); 1265 } while (len > 0 && n); 1266 if (len > 0) { 1267 m_free(m); 1268 goto bad; 1269 } 1270 m->m_next = n; 1271 return (m); 1272 bad: 1273 m_freem(n); 1274 mbstat.m_mpfail++; 1275 return (NULL); 1276 } 1277 1278 /* 1279 * Partition an mbuf chain in two pieces, returning the tail -- 1280 * all but the first len0 bytes. In case of failure, it returns NULL and 1281 * attempts to restore the chain to its original state. 1282 * 1283 * Note that the resulting mbufs might be read-only, because the new 1284 * mbuf can end up sharing an mbuf cluster with the original mbuf if 1285 * the "breaking point" happens to lie within a cluster mbuf. Use the 1286 * M_WRITABLE() macro to check for this case. 1287 */ 1288 struct mbuf * 1289 m_split(struct mbuf *m0, int len0, int wait) 1290 { 1291 struct mbuf *m, *n; 1292 unsigned len = len0, remain; 1293 1294 for (m = m0; m && len > m->m_len; m = m->m_next) 1295 len -= m->m_len; 1296 if (m == NULL) 1297 return (NULL); 1298 remain = m->m_len - len; 1299 if (m0->m_flags & M_PKTHDR) { 1300 n = m_gethdr(wait, m0->m_type); 1301 if (n == NULL) 1302 return (NULL); 1303 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1304 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1305 m0->m_pkthdr.len = len0; 1306 if (m->m_flags & M_EXT) 1307 goto extpacket; 1308 if (remain > MHLEN) { 1309 /* m can't be the lead packet */ 1310 MH_ALIGN(n, 0); 1311 n->m_next = m_split(m, len, wait); 1312 if (n->m_next == NULL) { 1313 m_free(n); 1314 return (NULL); 1315 } else { 1316 n->m_len = 0; 1317 return (n); 1318 } 1319 } else 1320 MH_ALIGN(n, remain); 1321 } else if (remain == 0) { 1322 n = m->m_next; 1323 m->m_next = 0; 1324 return (n); 1325 } else { 1326 n = m_get(wait, m->m_type); 1327 if (n == NULL) 1328 return (NULL); 1329 M_ALIGN(n, remain); 1330 } 1331 extpacket: 1332 if (m->m_flags & M_EXT) { 1333 KKASSERT((n->m_flags & M_EXT) == 0); 1334 n->m_data = m->m_data + len; 1335 m->m_ext.ext_ref(m->m_ext.ext_arg); 1336 n->m_ext = m->m_ext; 1337 n->m_flags |= m->m_flags & (M_EXT | M_EXT_CLUSTER); 1338 } else { 1339 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1340 } 1341 n->m_len = remain; 1342 m->m_len = len; 1343 n->m_next = m->m_next; 1344 m->m_next = 0; 1345 return (n); 1346 } 1347 1348 /* 1349 * Routine to copy from device local memory into mbufs. 1350 * Note: "offset" is ill-defined and always called as 0, so ignore it. 1351 */ 1352 struct mbuf * 1353 m_devget(char *buf, int len, int offset, struct ifnet *ifp, 1354 void (*copy)(volatile const void *from, volatile void *to, size_t length)) 1355 { 1356 struct mbuf *m, *mfirst = NULL, **mtail; 1357 int nsize, flags; 1358 1359 if (copy == NULL) 1360 copy = bcopy; 1361 mtail = &mfirst; 1362 flags = M_PKTHDR; 1363 1364 while (len > 0) { 1365 m = m_getl(len, MB_DONTWAIT, MT_DATA, flags, &nsize); 1366 if (m == NULL) { 1367 m_freem(mfirst); 1368 return (NULL); 1369 } 1370 m->m_len = min(len, nsize); 1371 1372 if (flags & M_PKTHDR) { 1373 if (len + max_linkhdr <= nsize) 1374 m->m_data += max_linkhdr; 1375 m->m_pkthdr.rcvif = ifp; 1376 m->m_pkthdr.len = len; 1377 flags = 0; 1378 } 1379 1380 copy(buf, m->m_data, (unsigned)m->m_len); 1381 buf += m->m_len; 1382 len -= m->m_len; 1383 *mtail = m; 1384 mtail = &m->m_next; 1385 } 1386 1387 return (mfirst); 1388 } 1389 1390 /* 1391 * Copy data from a buffer back into the indicated mbuf chain, 1392 * starting "off" bytes from the beginning, extending the mbuf 1393 * chain if necessary. 1394 */ 1395 void 1396 m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) 1397 { 1398 int mlen; 1399 struct mbuf *m = m0, *n; 1400 int totlen = 0; 1401 1402 if (m0 == NULL) 1403 return; 1404 while (off > (mlen = m->m_len)) { 1405 off -= mlen; 1406 totlen += mlen; 1407 if (m->m_next == NULL) { 1408 n = m_getclr(MB_DONTWAIT, m->m_type); 1409 if (n == NULL) 1410 goto out; 1411 n->m_len = min(MLEN, len + off); 1412 m->m_next = n; 1413 } 1414 m = m->m_next; 1415 } 1416 while (len > 0) { 1417 mlen = min (m->m_len - off, len); 1418 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 1419 cp += mlen; 1420 len -= mlen; 1421 mlen += off; 1422 off = 0; 1423 totlen += mlen; 1424 if (len == 0) 1425 break; 1426 if (m->m_next == NULL) { 1427 n = m_get(MB_DONTWAIT, m->m_type); 1428 if (n == NULL) 1429 break; 1430 n->m_len = min(MLEN, len); 1431 m->m_next = n; 1432 } 1433 m = m->m_next; 1434 } 1435 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1436 m->m_pkthdr.len = totlen; 1437 } 1438 1439 void 1440 m_print(const struct mbuf *m) 1441 { 1442 int len; 1443 const struct mbuf *m2; 1444 1445 len = m->m_pkthdr.len; 1446 m2 = m; 1447 while (len) { 1448 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 1449 len -= m2->m_len; 1450 m2 = m2->m_next; 1451 } 1452 return; 1453 } 1454 1455 /* 1456 * "Move" mbuf pkthdr from "from" to "to". 1457 * "from" must have M_PKTHDR set, and "to" must be empty. 1458 */ 1459 void 1460 m_move_pkthdr(struct mbuf *to, struct mbuf *from) 1461 { 1462 KASSERT((to->m_flags & M_PKTHDR), ("m_move_pkthdr: not packet header")); 1463 1464 to->m_flags |= from->m_flags & M_COPYFLAGS; 1465 to->m_pkthdr = from->m_pkthdr; /* especially tags */ 1466 SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ 1467 } 1468 1469 /* 1470 * Duplicate "from"'s mbuf pkthdr in "to". 1471 * "from" must have M_PKTHDR set, and "to" must be empty. 1472 * In particular, this does a deep copy of the packet tags. 1473 */ 1474 int 1475 m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how) 1476 { 1477 KASSERT((to->m_flags & M_PKTHDR), ("m_dup_pkthdr: not packet header")); 1478 1479 to->m_flags = (from->m_flags & M_COPYFLAGS) | 1480 (to->m_flags & ~M_COPYFLAGS); 1481 to->m_pkthdr = from->m_pkthdr; 1482 SLIST_INIT(&to->m_pkthdr.tags); 1483 return (m_tag_copy_chain(to, from, how)); 1484 } 1485 1486 /* 1487 * Defragment a mbuf chain, returning the shortest possible 1488 * chain of mbufs and clusters. If allocation fails and 1489 * this cannot be completed, NULL will be returned, but 1490 * the passed in chain will be unchanged. Upon success, 1491 * the original chain will be freed, and the new chain 1492 * will be returned. 1493 * 1494 * If a non-packet header is passed in, the original 1495 * mbuf (chain?) will be returned unharmed. 1496 * 1497 * m_defrag_nofree doesn't free the passed in mbuf. 1498 */ 1499 struct mbuf * 1500 m_defrag(struct mbuf *m0, int how) 1501 { 1502 struct mbuf *m_new; 1503 1504 if ((m_new = m_defrag_nofree(m0, how)) == NULL) 1505 return (NULL); 1506 if (m_new != m0) 1507 m_freem(m0); 1508 return (m_new); 1509 } 1510 1511 struct mbuf * 1512 m_defrag_nofree(struct mbuf *m0, int how) 1513 { 1514 struct mbuf *m_new = NULL, *m_final = NULL; 1515 int progress = 0, length, nsize; 1516 1517 if (!(m0->m_flags & M_PKTHDR)) 1518 return (m0); 1519 1520 #ifdef MBUF_STRESS_TEST 1521 if (m_defragrandomfailures) { 1522 int temp = arc4random() & 0xff; 1523 if (temp == 0xba) 1524 goto nospace; 1525 } 1526 #endif 1527 1528 m_final = m_getl(m0->m_pkthdr.len, how, MT_DATA, M_PKTHDR, &nsize); 1529 if (m_final == NULL) 1530 goto nospace; 1531 m_final->m_len = 0; /* in case m0->m_pkthdr.len is zero */ 1532 1533 if (m_dup_pkthdr(m_final, m0, how) == NULL) 1534 goto nospace; 1535 1536 m_new = m_final; 1537 1538 while (progress < m0->m_pkthdr.len) { 1539 length = m0->m_pkthdr.len - progress; 1540 if (length > MCLBYTES) 1541 length = MCLBYTES; 1542 1543 if (m_new == NULL) { 1544 m_new = m_getl(length, how, MT_DATA, 0, &nsize); 1545 if (m_new == NULL) 1546 goto nospace; 1547 } 1548 1549 m_copydata(m0, progress, length, mtod(m_new, caddr_t)); 1550 progress += length; 1551 m_new->m_len = length; 1552 if (m_new != m_final) 1553 m_cat(m_final, m_new); 1554 m_new = NULL; 1555 } 1556 if (m0->m_next == NULL) 1557 m_defraguseless++; 1558 m_defragpackets++; 1559 m_defragbytes += m_final->m_pkthdr.len; 1560 return (m_final); 1561 nospace: 1562 m_defragfailure++; 1563 if (m_new) 1564 m_free(m_new); 1565 m_freem(m_final); 1566 return (NULL); 1567 } 1568 1569 /* 1570 * Move data from uio into mbufs. 1571 */ 1572 struct mbuf * 1573 m_uiomove(struct uio *uio) 1574 { 1575 struct mbuf *m; /* current working mbuf */ 1576 struct mbuf *head = NULL; /* result mbuf chain */ 1577 struct mbuf **mp = &head; 1578 int resid = uio->uio_resid, nsize, flags = M_PKTHDR, error; 1579 1580 do { 1581 m = m_getl(resid, MB_WAIT, MT_DATA, flags, &nsize); 1582 if (flags) { 1583 m->m_pkthdr.len = 0; 1584 /* Leave room for protocol headers. */ 1585 if (resid < MHLEN) 1586 MH_ALIGN(m, resid); 1587 flags = 0; 1588 } 1589 m->m_len = min(nsize, resid); 1590 error = uiomove(mtod(m, caddr_t), m->m_len, uio); 1591 if (error) { 1592 m_free(m); 1593 goto failed; 1594 } 1595 *mp = m; 1596 mp = &m->m_next; 1597 head->m_pkthdr.len += m->m_len; 1598 resid -= m->m_len; 1599 } while (resid > 0); 1600 1601 return (head); 1602 1603 failed: 1604 m_freem(head); 1605 return (NULL); 1606 } 1607 1608 struct mbuf * 1609 m_last(struct mbuf *m) 1610 { 1611 while (m->m_next) 1612 m = m->m_next; 1613 return (m); 1614 } 1615 1616 /* 1617 * Return the number of bytes in an mbuf chain. 1618 * If lastm is not NULL, also return the last mbuf. 1619 */ 1620 u_int 1621 m_lengthm(struct mbuf *m, struct mbuf **lastm) 1622 { 1623 u_int len = 0; 1624 struct mbuf *prev = m; 1625 1626 while (m) { 1627 len += m->m_len; 1628 prev = m; 1629 m = m->m_next; 1630 } 1631 if (lastm != NULL) 1632 *lastm = prev; 1633 return (len); 1634 } 1635 1636 /* 1637 * Like m_lengthm(), except also keep track of mbuf usage. 1638 */ 1639 u_int 1640 m_countm(struct mbuf *m, struct mbuf **lastm, u_int *pmbcnt) 1641 { 1642 u_int len = 0, mbcnt = 0; 1643 struct mbuf *prev = m; 1644 1645 while (m) { 1646 len += m->m_len; 1647 mbcnt += MSIZE; 1648 if (m->m_flags & M_EXT) 1649 mbcnt += m->m_ext.ext_size; 1650 prev = m; 1651 m = m->m_next; 1652 } 1653 if (lastm != NULL) 1654 *lastm = prev; 1655 *pmbcnt = mbcnt; 1656 return (len); 1657 } 1658