xref: /dragonfly/sys/kern/uipc_mbuf.c (revision 1de703da)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
34  * $FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.51.2.24 2003/04/15 06:59:29 silby Exp $
35  * $DragonFly: src/sys/kern/uipc_mbuf.c,v 1.2 2003/06/17 04:28:41 dillon Exp $
36  */
37 
38 #include "opt_param.h"
39 #include "opt_mbuf_stress_test.h"
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/kernel.h>
45 #include <sys/sysctl.h>
46 #include <sys/domain.h>
47 #include <sys/protosw.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_kern.h>
51 #include <vm/vm_extern.h>
52 
53 #ifdef INVARIANTS
54 #include <machine/cpu.h>
55 #endif
56 
57 static void mbinit __P((void *));
58 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
59 
60 struct mbuf *mbutl;
61 char	*mclrefcnt;
62 struct mbstat mbstat;
63 u_long	mbtypes[MT_NTYPES];
64 struct mbuf *mmbfree;
65 union mcluster *mclfree;
66 int	max_linkhdr;
67 int	max_protohdr;
68 int	max_hdr;
69 int	max_datalen;
70 int	m_defragpackets;
71 int	m_defragbytes;
72 int	m_defraguseless;
73 int	m_defragfailure;
74 #ifdef MBUF_STRESS_TEST
75 int	m_defragrandomfailures;
76 #endif
77 
78 int	nmbclusters;
79 int	nmbufs;
80 u_int	m_mballoc_wid = 0;
81 u_int	m_clalloc_wid = 0;
82 
83 SYSCTL_DECL(_kern_ipc);
84 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
85 	   &max_linkhdr, 0, "");
86 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
87 	   &max_protohdr, 0, "");
88 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
89 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
90 	   &max_datalen, 0, "");
91 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
92 	   &mbuf_wait, 0, "");
93 SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RW, &mbstat, mbstat, "");
94 SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes,
95 	   sizeof(mbtypes), "LU", "");
96 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD,
97 	   &nmbclusters, 0, "Maximum number of mbuf clusters available");
98 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
99 	   "Maximum number of mbufs available");
100 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
101 	   &m_defragpackets, 0, "");
102 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
103 	   &m_defragbytes, 0, "");
104 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
105 	   &m_defraguseless, 0, "");
106 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
107 	   &m_defragfailure, 0, "");
108 #ifdef MBUF_STRESS_TEST
109 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
110 	   &m_defragrandomfailures, 0, "");
111 #endif
112 
113 static void	m_reclaim __P((void));
114 
115 #ifndef NMBCLUSTERS
116 #define NMBCLUSTERS	(512 + maxusers * 16)
117 #endif
118 #ifndef NMBUFS
119 #define NMBUFS		(nmbclusters * 4)
120 #endif
121 
122 /*
123  * Perform sanity checks of tunables declared above.
124  */
125 static void
126 tunable_mbinit(void *dummy)
127 {
128 
129 	/*
130 	 * This has to be done before VM init.
131 	 */
132 	nmbclusters = NMBCLUSTERS;
133 	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
134 	nmbufs = NMBUFS;
135 	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
136 	/* Sanity checks */
137 	if (nmbufs < nmbclusters * 2)
138 		nmbufs = nmbclusters * 2;
139 
140 	return;
141 }
142 SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
143 
144 /* "number of clusters of pages" */
145 #define NCL_INIT	1
146 
147 #define NMB_INIT	16
148 
149 /* ARGSUSED*/
150 static void
151 mbinit(dummy)
152 	void *dummy;
153 {
154 	int s;
155 
156 	mmbfree = NULL; mclfree = NULL;
157 	mbstat.m_msize = MSIZE;
158 	mbstat.m_mclbytes = MCLBYTES;
159 	mbstat.m_minclsize = MINCLSIZE;
160 	mbstat.m_mlen = MLEN;
161 	mbstat.m_mhlen = MHLEN;
162 
163 	s = splimp();
164 	if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
165 		goto bad;
166 #if MCLBYTES <= PAGE_SIZE
167 	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
168 		goto bad;
169 #else
170 	/* It's OK to call contigmalloc in this context. */
171 	if (m_clalloc(16, M_WAIT) == 0)
172 		goto bad;
173 #endif
174 	splx(s);
175 	return;
176 bad:
177 	panic("mbinit");
178 }
179 
180 /*
181  * Allocate at least nmb mbufs and place on mbuf free list.
182  * Must be called at splimp.
183  */
184 /* ARGSUSED */
185 int
186 m_mballoc(nmb, how)
187 	register int nmb;
188 	int how;
189 {
190 	register caddr_t p;
191 	register int i;
192 	int nbytes;
193 
194 	/*
195 	 * If we've hit the mbuf limit, stop allocating from mb_map,
196 	 * (or trying to) in order to avoid dipping into the section of
197 	 * mb_map which we've "reserved" for clusters.
198 	 */
199 	if ((nmb + mbstat.m_mbufs) > nmbufs)
200 		return (0);
201 
202 	/*
203 	 * Once we run out of map space, it will be impossible to get
204 	 * any more (nothing is ever freed back to the map)
205 	 * -- however you are not dead as m_reclaim might
206 	 * still be able to free a substantial amount of space.
207 	 *
208 	 * XXX Furthermore, we can also work with "recycled" mbufs (when
209 	 * we're calling with M_WAIT the sleep procedure will be woken
210 	 * up when an mbuf is freed. See m_mballoc_wait()).
211 	 */
212 	if (mb_map_full)
213 		return (0);
214 
215 	nbytes = round_page(nmb * MSIZE);
216 	p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT);
217 	if (p == 0 && how == M_WAIT) {
218 		mbstat.m_wait++;
219 		p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK);
220 	}
221 
222 	/*
223 	 * Either the map is now full, or `how' is M_NOWAIT and there
224 	 * are no pages left.
225 	 */
226 	if (p == NULL)
227 		return (0);
228 
229 	nmb = nbytes / MSIZE;
230 	for (i = 0; i < nmb; i++) {
231 		((struct mbuf *)p)->m_next = mmbfree;
232 		mmbfree = (struct mbuf *)p;
233 		p += MSIZE;
234 	}
235 	mbstat.m_mbufs += nmb;
236 	mbtypes[MT_FREE] += nmb;
237 	return (1);
238 }
239 
240 /*
241  * Once the mb_map has been exhausted and if the call to the allocation macros
242  * (or, in some cases, functions) is with M_WAIT, then it is necessary to rely
243  * solely on reclaimed mbufs. Here we wait for an mbuf to be freed for a
244  * designated (mbuf_wait) time.
245  */
246 struct mbuf *
247 m_mballoc_wait(int caller, int type)
248 {
249 	struct mbuf *p;
250 	int s;
251 
252 	s = splimp();
253 	m_mballoc_wid++;
254 	if ((tsleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait)) == EWOULDBLOCK)
255 		m_mballoc_wid--;
256 	splx(s);
257 
258 	/*
259 	 * Now that we (think) that we've got something, we will redo an
260 	 * MGET, but avoid getting into another instance of m_mballoc_wait()
261 	 * XXX: We retry to fetch _even_ if the sleep timed out. This is left
262 	 *      this way, purposely, in the [unlikely] case that an mbuf was
263 	 *      freed but the sleep was not awakened in time.
264 	 */
265 	p = NULL;
266 	switch (caller) {
267 	case MGET_C:
268 		MGET(p, M_DONTWAIT, type);
269 		break;
270 	case MGETHDR_C:
271 		MGETHDR(p, M_DONTWAIT, type);
272 		break;
273 	default:
274 		panic("m_mballoc_wait: invalid caller (%d)", caller);
275 	}
276 
277 	s = splimp();
278 	if (p != NULL) {		/* We waited and got something... */
279 		mbstat.m_wait++;
280 		/* Wake up another if we have more free. */
281 		if (mmbfree != NULL)
282 			MMBWAKEUP();
283 	}
284 	splx(s);
285 	return (p);
286 }
287 
288 #if MCLBYTES > PAGE_SIZE
289 static int i_want_my_mcl;
290 
291 static void
292 kproc_mclalloc(void)
293 {
294 	int status;
295 
296 	while (1) {
297 		tsleep(&i_want_my_mcl, PVM, "mclalloc", 0);
298 
299 		for (; i_want_my_mcl; i_want_my_mcl--) {
300 			if (m_clalloc(1, M_WAIT) == 0)
301 				printf("m_clalloc failed even in process context!\n");
302 		}
303 	}
304 }
305 
306 static struct proc *mclallocproc;
307 static struct kproc_desc mclalloc_kp = {
308 	"mclalloc",
309 	kproc_mclalloc,
310 	&mclallocproc
311 };
312 SYSINIT(mclallocproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,
313 	   &mclalloc_kp);
314 #endif
315 
316 /*
317  * Allocate some number of mbuf clusters
318  * and place on cluster free list.
319  * Must be called at splimp.
320  */
321 /* ARGSUSED */
322 int
323 m_clalloc(ncl, how)
324 	register int ncl;
325 	int how;
326 {
327 	register caddr_t p;
328 	register int i;
329 	int npg;
330 
331 	/*
332 	 * If we've hit the mcluster number limit, stop allocating from
333 	 * mb_map, (or trying to) in order to avoid dipping into the section
334 	 * of mb_map which we've "reserved" for mbufs.
335 	 */
336 	if ((ncl + mbstat.m_clusters) > nmbclusters)
337 		goto m_clalloc_fail;
338 
339 	/*
340 	 * Once we run out of map space, it will be impossible
341 	 * to get any more (nothing is ever freed back to the
342 	 * map). From this point on, we solely rely on freed
343 	 * mclusters.
344 	 */
345 	if (mb_map_full)
346 		goto m_clalloc_fail;
347 
348 #if MCLBYTES > PAGE_SIZE
349 	if (how != M_WAIT) {
350 		i_want_my_mcl += ncl;
351 		wakeup(&i_want_my_mcl);
352 		mbstat.m_wait++;
353 		p = 0;
354 	} else {
355 		p = contigmalloc1(MCLBYTES * ncl, M_DEVBUF, M_WAITOK, 0ul,
356 				  ~0ul, PAGE_SIZE, 0, mb_map);
357 	}
358 #else
359 	npg = ncl;
360 	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
361 				 how != M_WAIT ? M_NOWAIT : M_WAITOK);
362 	ncl = ncl * PAGE_SIZE / MCLBYTES;
363 #endif
364 	/*
365 	 * Either the map is now full, or `how' is M_NOWAIT and there
366 	 * are no pages left.
367 	 */
368 	if (p == NULL) {
369 		static int last_report ; /* when we did that (in ticks) */
370 m_clalloc_fail:
371 		mbstat.m_drops++;
372 		if (ticks < last_report || (ticks - last_report) >= hz) {
373 			last_report = ticks;
374 			printf("All mbuf clusters exhausted, please see tuning(7).\n");
375 		}
376 		return (0);
377 	}
378 
379 	for (i = 0; i < ncl; i++) {
380 		((union mcluster *)p)->mcl_next = mclfree;
381 		mclfree = (union mcluster *)p;
382 		p += MCLBYTES;
383 		mbstat.m_clfree++;
384 	}
385 	mbstat.m_clusters += ncl;
386 	return (1);
387 }
388 
389 /*
390  * Once the mb_map submap has been exhausted and the allocation is called with
391  * M_WAIT, we rely on the mclfree union pointers. If nothing is free, we will
392  * sleep for a designated amount of time (mbuf_wait) or until we're woken up
393  * due to sudden mcluster availability.
394  */
395 caddr_t
396 m_clalloc_wait(void)
397 {
398 	caddr_t p;
399 	int s;
400 
401 #ifdef __i386__
402 	/* If in interrupt context, and INVARIANTS, maintain sanity and die. */
403 	KASSERT(intr_nesting_level == 0, ("CLALLOC: CANNOT WAIT IN INTERRUPT"));
404 #endif
405 
406 	/* Sleep until something's available or until we expire. */
407 	m_clalloc_wid++;
408 	if ((tsleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait)) == EWOULDBLOCK)
409 		m_clalloc_wid--;
410 
411 	/*
412 	 * Now that we (think) that we've got something, we will redo and
413 	 * MGET, but avoid getting into another instance of m_clalloc_wait()
414 	 */
415 	p = NULL;
416 	MCLALLOC(p, M_DONTWAIT);
417 
418 	s = splimp();
419 	if (p != NULL) {	/* We waited and got something... */
420 		mbstat.m_wait++;
421 		/* Wake up another if we have more free. */
422 		if (mclfree != NULL)
423 			MCLWAKEUP();
424 	}
425 
426 	splx(s);
427 	return (p);
428 }
429 
430 /*
431  * When MGET fails, ask protocols to free space when short of memory,
432  * then re-attempt to allocate an mbuf.
433  */
434 struct mbuf *
435 m_retry(i, t)
436 	int i, t;
437 {
438 	register struct mbuf *m;
439 
440 	/*
441 	 * Must only do the reclaim if not in an interrupt context.
442 	 */
443 	if (i == M_WAIT) {
444 #ifdef __i386__
445 		KASSERT(intr_nesting_level == 0,
446 		    ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
447 #endif
448 		m_reclaim();
449 	}
450 
451 	/*
452 	 * Both m_mballoc_wait and m_retry must be nulled because
453 	 * when the MGET macro is run from here, we deffinately do _not_
454 	 * want to enter an instance of m_mballoc_wait() or m_retry() (again!)
455 	 */
456 #define m_mballoc_wait(caller,type)    (struct mbuf *)0
457 #define m_retry(i, t)	(struct mbuf *)0
458 	MGET(m, i, t);
459 #undef m_retry
460 #undef m_mballoc_wait
461 
462 	if (m != NULL)
463 		mbstat.m_wait++;
464 	else {
465 		static int last_report ; /* when we did that (in ticks) */
466 		mbstat.m_drops++;
467 		if (ticks < last_report || (ticks - last_report) >= hz) {
468 			last_report = ticks;
469 			printf("All mbufs exhausted, please see tuning(7).\n");
470 		}
471 	}
472 
473 	return (m);
474 }
475 
476 /*
477  * As above; retry an MGETHDR.
478  */
479 struct mbuf *
480 m_retryhdr(i, t)
481 	int i, t;
482 {
483 	register struct mbuf *m;
484 
485 	/*
486 	 * Must only do the reclaim if not in an interrupt context.
487 	 */
488 	if (i == M_WAIT) {
489 #ifdef __i386__
490 		KASSERT(intr_nesting_level == 0,
491 		    ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
492 #endif
493 		m_reclaim();
494 	}
495 
496 #define m_mballoc_wait(caller,type)    (struct mbuf *)0
497 #define m_retryhdr(i, t) (struct mbuf *)0
498 	MGETHDR(m, i, t);
499 #undef m_retryhdr
500 #undef m_mballoc_wait
501 
502 	if (m != NULL)
503 		mbstat.m_wait++;
504 	else    {
505 		static int last_report ; /* when we did that (in ticks) */
506 		mbstat.m_drops++;
507 		if (ticks < last_report || (ticks - last_report) >= hz) {
508 			last_report = ticks;
509 			printf("All mbufs exhausted, please see tuning(7).\n");
510 		}
511 	}
512 
513 	return (m);
514 }
515 
516 static void
517 m_reclaim()
518 {
519 	register struct domain *dp;
520 	register struct protosw *pr;
521 	int s = splimp();
522 
523 	for (dp = domains; dp; dp = dp->dom_next)
524 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
525 			if (pr->pr_drain)
526 				(*pr->pr_drain)();
527 	splx(s);
528 	mbstat.m_drain++;
529 }
530 
531 /*
532  * Space allocation routines.
533  * These are also available as macros
534  * for critical paths.
535  */
536 struct mbuf *
537 m_get(how, type)
538 	int how, type;
539 {
540 	register struct mbuf *m;
541 
542 	MGET(m, how, type);
543 	return (m);
544 }
545 
546 struct mbuf *
547 m_gethdr(how, type)
548 	int how, type;
549 {
550 	register struct mbuf *m;
551 
552 	MGETHDR(m, how, type);
553 	return (m);
554 }
555 
556 struct mbuf *
557 m_getclr(how, type)
558 	int how, type;
559 {
560 	register struct mbuf *m;
561 
562 	MGET(m, how, type);
563 	if (m == 0)
564 		return (0);
565 	bzero(mtod(m, caddr_t), MLEN);
566 	return (m);
567 }
568 
569 /*
570  * m_getcl() returns an mbuf with an attached cluster.
571  * Because many network drivers use this kind of buffers a lot, it is
572  * convenient to keep a small pool of free buffers of this kind.
573  * Even a small size such as 10 gives about 10% improvement in the
574  * forwarding rate in a bridge or router.
575  * The size of this free list is controlled by the sysctl variable
576  * mcl_pool_max. The list is populated on m_freem(), and used in
577  * m_getcl() if elements are available.
578  */
579 static struct mbuf *mcl_pool;
580 static int mcl_pool_now;
581 static int mcl_pool_max = 0;
582 
583 SYSCTL_INT(_kern_ipc, OID_AUTO, mcl_pool_max, CTLFLAG_RW, &mcl_pool_max, 0,
584            "Maximum number of mbufs+cluster in free list");
585 SYSCTL_INT(_kern_ipc, OID_AUTO, mcl_pool_now, CTLFLAG_RD, &mcl_pool_now, 0,
586            "Current number of mbufs+cluster in free list");
587 
588 struct mbuf *
589 m_getcl(int how, short type, int flags)
590 {
591 	int s = splimp();
592 	struct mbuf *mp;
593 
594 	if (flags & M_PKTHDR) {
595 		if (type == MT_DATA && mcl_pool) {
596 			mp = mcl_pool;
597 			mcl_pool = mp->m_nextpkt;
598 			mcl_pool_now--;
599 			splx(s);
600 			mp->m_nextpkt = NULL;
601 			mp->m_data = mp->m_ext.ext_buf;
602 			mp->m_flags = M_PKTHDR|M_EXT;
603 			mp->m_pkthdr.rcvif = NULL;
604 			mp->m_pkthdr.csum_flags = 0;
605 			return mp;
606 		} else
607 			MGETHDR(mp, how, type);
608 	} else
609 		MGET(mp, how, type);
610 	if (mp) {
611 		MCLGET(mp, how);
612 		if ( (mp->m_flags & M_EXT) == 0) {
613 			m_free(mp);
614 			mp = NULL;
615 		}
616 	}
617 	splx(s);
618 	return mp;
619 }
620 
621 /*
622  * struct mbuf *
623  * m_getm(m, len, how, type)
624  *
625  * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits
626  * best) and return a pointer to the top of the allocated chain. If m is
627  * non-null, then we assume that it is a single mbuf or an mbuf chain to
628  * which we want len bytes worth of mbufs and/or clusters attached, and so
629  * if we succeed in allocating it, we will just return a pointer to m.
630  *
631  * If we happen to fail at any point during the allocation, we will free
632  * up everything we have already allocated and return NULL.
633  *
634  */
635 struct mbuf *
636 m_getm(struct mbuf *m, int len, int how, int type)
637 {
638 	struct mbuf *top, *tail, *mp, *mtail = NULL;
639 
640 	KASSERT(len >= 0, ("len is < 0 in m_getm"));
641 
642 	MGET(mp, how, type);
643 	if (mp == NULL)
644 		return (NULL);
645 	else if (len > MINCLSIZE) {
646 		MCLGET(mp, how);
647 		if ((mp->m_flags & M_EXT) == 0) {
648 			m_free(mp);
649 			return (NULL);
650 		}
651 	}
652 	mp->m_len = 0;
653 	len -= M_TRAILINGSPACE(mp);
654 
655 	if (m != NULL)
656 		for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
657 	else
658 		m = mp;
659 
660 	top = tail = mp;
661 	while (len > 0) {
662 		MGET(mp, how, type);
663 		if (mp == NULL)
664 			goto failed;
665 
666 		tail->m_next = mp;
667 		tail = mp;
668 		if (len > MINCLSIZE) {
669 			MCLGET(mp, how);
670 			if ((mp->m_flags & M_EXT) == 0)
671 				goto failed;
672 		}
673 
674 		mp->m_len = 0;
675 		len -= M_TRAILINGSPACE(mp);
676 	}
677 
678 	if (mtail != NULL)
679 		mtail->m_next = top;
680 	return (m);
681 
682 failed:
683 	m_freem(top);
684 	return (NULL);
685 }
686 
687 /*
688  * MFREE(struct mbuf *m, struct mbuf *n)
689  * Free a single mbuf and associated external storage.
690  * Place the successor, if any, in n.
691  *
692  * we do need to check non-first mbuf for m_aux, since some of existing
693  * code does not call M_PREPEND properly.
694  * (example: call to bpf_mtap from drivers)
695  */
696 #define	MFREE(m, n) MBUFLOCK(						\
697 	struct mbuf *_mm = (m);						\
698 									\
699 	KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf"));		\
700 	mbtypes[_mm->m_type]--;						\
701 	if ((_mm->m_flags & M_PKTHDR) != 0)				\
702 		m_tag_delete_chain(_mm, NULL);				\
703 	if (_mm->m_flags & M_EXT)					\
704 		MEXTFREE1(m);						\
705 	(n) = _mm->m_next;						\
706 	_mm->m_type = MT_FREE;						\
707 	mbtypes[MT_FREE]++;						\
708 	_mm->m_next = mmbfree;						\
709 	mmbfree = _mm;							\
710 	MMBWAKEUP();							\
711 )
712 
713 struct mbuf *
714 m_free(m)
715 	struct mbuf *m;
716 {
717 	register struct mbuf *n;
718 
719 	MFREE(m, n);
720 	return (n);
721 }
722 
723 void
724 m_freem(m)
725 	struct mbuf *m;
726 {
727 	int s = splimp();
728 
729 	/*
730 	 * Try to keep a small pool of mbuf+cluster for quick use in
731 	 * device drivers. A good candidate is a M_PKTHDR buffer with
732 	 * only one cluster attached. Other mbufs, or those exceeding
733 	 * the pool size, are just m_free'd in the usual way.
734 	 * The following code makes sure that m_next, m_type,
735 	 * m_pkthdr.aux and m_ext.* are properly initialized.
736 	 * Other fields in the mbuf are initialized in m_getcl()
737 	 * upon allocation.
738 	 */
739         if (mcl_pool_now < mcl_pool_max && m && m->m_next == NULL &&
740             (m->m_flags & (M_PKTHDR|M_EXT)) == (M_PKTHDR|M_EXT) &&
741             m->m_type == MT_DATA && M_EXT_WRITABLE(m) ) {
742 		m_tag_delete_chain(m, NULL);
743                 m->m_nextpkt = mcl_pool;
744                 mcl_pool = m;
745                 mcl_pool_now++;
746         } else {
747 		while (m)
748 			m = m_free(m);
749 	}
750 	splx(s);
751 }
752 
753 /*
754  * Mbuffer utility routines.
755  */
756 
757 /*
758  * Lesser-used path for M_PREPEND:
759  * allocate new mbuf to prepend to chain,
760  * copy junk along.
761  */
762 struct mbuf *
763 m_prepend(m, len, how)
764 	register struct mbuf *m;
765 	int len, how;
766 {
767 	struct mbuf *mn;
768 
769 	MGET(mn, how, m->m_type);
770 	if (mn == (struct mbuf *)NULL) {
771 		m_freem(m);
772 		return ((struct mbuf *)NULL);
773 	}
774 	if (m->m_flags & M_PKTHDR)
775 		M_MOVE_PKTHDR(mn, m);
776 	mn->m_next = m;
777 	m = mn;
778 	if (len < MHLEN)
779 		MH_ALIGN(m, len);
780 	m->m_len = len;
781 	return (m);
782 }
783 
784 /*
785  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
786  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
787  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
788  * Note that the copy is read-only, because clusters are not copied,
789  * only their reference counts are incremented.
790  */
791 #define MCFail (mbstat.m_mcfail)
792 
793 struct mbuf *
794 m_copym(m, off0, len, wait)
795 	register struct mbuf *m;
796 	int off0, wait;
797 	register int len;
798 {
799 	register struct mbuf *n, **np;
800 	register int off = off0;
801 	struct mbuf *top;
802 	int copyhdr = 0;
803 
804 	KASSERT(off >= 0, ("m_copym, negative off %d", off));
805 	KASSERT(len >= 0, ("m_copym, negative len %d", len));
806 	if (off == 0 && m->m_flags & M_PKTHDR)
807 		copyhdr = 1;
808 	while (off > 0) {
809 		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
810 		if (off < m->m_len)
811 			break;
812 		off -= m->m_len;
813 		m = m->m_next;
814 	}
815 	np = &top;
816 	top = 0;
817 	while (len > 0) {
818 		if (m == 0) {
819 			KASSERT(len == M_COPYALL,
820 			    ("m_copym, length > size of mbuf chain"));
821 			break;
822 		}
823 		MGET(n, wait, m->m_type);
824 		*np = n;
825 		if (n == 0)
826 			goto nospace;
827 		if (copyhdr) {
828 			if (!m_dup_pkthdr(n, m, wait))
829 				goto nospace;
830 			if (len == M_COPYALL)
831 				n->m_pkthdr.len -= off0;
832 			else
833 				n->m_pkthdr.len = len;
834 			copyhdr = 0;
835 		}
836 		n->m_len = min(len, m->m_len - off);
837 		if (m->m_flags & M_EXT) {
838 			n->m_data = m->m_data + off;
839 			if (m->m_ext.ext_ref == NULL) {
840 				atomic_add_char(
841 				    &mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
842 			} else {
843 				int s = splimp();
844 
845 				(*m->m_ext.ext_ref)(m->m_ext.ext_buf,
846 				    m->m_ext.ext_size);
847 				splx(s);
848 			}
849 			n->m_ext = m->m_ext;
850 			n->m_flags |= M_EXT;
851 		} else
852 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
853 			    (unsigned)n->m_len);
854 		if (len != M_COPYALL)
855 			len -= n->m_len;
856 		off = 0;
857 		m = m->m_next;
858 		np = &n->m_next;
859 	}
860 	if (top == 0)
861 		MCFail++;
862 	return (top);
863 nospace:
864 	m_freem(top);
865 	MCFail++;
866 	return (0);
867 }
868 
869 /*
870  * Copy an entire packet, including header (which must be present).
871  * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
872  * Note that the copy is read-only, because clusters are not copied,
873  * only their reference counts are incremented.
874  * Preserve alignment of the first mbuf so if the creator has left
875  * some room at the beginning (e.g. for inserting protocol headers)
876  * the copies also have the room available.
877  */
878 struct mbuf *
879 m_copypacket(m, how)
880 	struct mbuf *m;
881 	int how;
882 {
883 	struct mbuf *top, *n, *o;
884 
885 	MGET(n, how, m->m_type);
886 	top = n;
887 	if (!n)
888 		goto nospace;
889 
890 	if (!m_dup_pkthdr(n, m, how))
891 		goto nospace;
892 	n->m_len = m->m_len;
893 	if (m->m_flags & M_EXT) {
894 		n->m_data = m->m_data;
895 		if (m->m_ext.ext_ref == NULL)
896 			atomic_add_char(&mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
897 		else {
898 			int s = splimp();
899 
900 			(*m->m_ext.ext_ref)(m->m_ext.ext_buf,
901 			    m->m_ext.ext_size);
902 			splx(s);
903 		}
904 		n->m_ext = m->m_ext;
905 		n->m_flags |= M_EXT;
906 	} else {
907 		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
908 		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
909 	}
910 
911 	m = m->m_next;
912 	while (m) {
913 		MGET(o, how, m->m_type);
914 		if (!o)
915 			goto nospace;
916 
917 		n->m_next = o;
918 		n = n->m_next;
919 
920 		n->m_len = m->m_len;
921 		if (m->m_flags & M_EXT) {
922 			n->m_data = m->m_data;
923 			if (m->m_ext.ext_ref == NULL) {
924 				atomic_add_char(
925 				    &mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
926 			} else {
927 				int s = splimp();
928 
929 				(*m->m_ext.ext_ref)(m->m_ext.ext_buf,
930 				    m->m_ext.ext_size);
931 				splx(s);
932 			}
933 			n->m_ext = m->m_ext;
934 			n->m_flags |= M_EXT;
935 		} else {
936 			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
937 		}
938 
939 		m = m->m_next;
940 	}
941 	return top;
942 nospace:
943 	m_freem(top);
944 	MCFail++;
945 	return 0;
946 }
947 
948 /*
949  * Copy data from an mbuf chain starting "off" bytes from the beginning,
950  * continuing for "len" bytes, into the indicated buffer.
951  */
952 void
953 m_copydata(m, off, len, cp)
954 	register struct mbuf *m;
955 	register int off;
956 	register int len;
957 	caddr_t cp;
958 {
959 	register unsigned count;
960 
961 	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
962 	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
963 	while (off > 0) {
964 		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
965 		if (off < m->m_len)
966 			break;
967 		off -= m->m_len;
968 		m = m->m_next;
969 	}
970 	while (len > 0) {
971 		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
972 		count = min(m->m_len - off, len);
973 		bcopy(mtod(m, caddr_t) + off, cp, count);
974 		len -= count;
975 		cp += count;
976 		off = 0;
977 		m = m->m_next;
978 	}
979 }
980 
981 /*
982  * Copy a packet header mbuf chain into a completely new chain, including
983  * copying any mbuf clusters.  Use this instead of m_copypacket() when
984  * you need a writable copy of an mbuf chain.
985  */
986 struct mbuf *
987 m_dup(m, how)
988 	struct mbuf *m;
989 	int how;
990 {
991 	struct mbuf **p, *top = NULL;
992 	int remain, moff, nsize;
993 
994 	/* Sanity check */
995 	if (m == NULL)
996 		return (0);
997 	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__));
998 
999 	/* While there's more data, get a new mbuf, tack it on, and fill it */
1000 	remain = m->m_pkthdr.len;
1001 	moff = 0;
1002 	p = &top;
1003 	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
1004 		struct mbuf *n;
1005 
1006 		/* Get the next new mbuf */
1007 		MGET(n, how, m->m_type);
1008 		if (n == NULL)
1009 			goto nospace;
1010 		if (top == NULL) {		/* first one, must be PKTHDR */
1011 			if (!m_dup_pkthdr(n, m, how))
1012 				goto nospace;
1013 			nsize = MHLEN;
1014 		} else				/* not the first one */
1015 			nsize = MLEN;
1016 		if (remain >= MINCLSIZE) {
1017 			MCLGET(n, how);
1018 			if ((n->m_flags & M_EXT) == 0) {
1019 				(void)m_free(n);
1020 				goto nospace;
1021 			}
1022 			nsize = MCLBYTES;
1023 		}
1024 		n->m_len = 0;
1025 
1026 		/* Link it into the new chain */
1027 		*p = n;
1028 		p = &n->m_next;
1029 
1030 		/* Copy data from original mbuf(s) into new mbuf */
1031 		while (n->m_len < nsize && m != NULL) {
1032 			int chunk = min(nsize - n->m_len, m->m_len - moff);
1033 
1034 			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
1035 			moff += chunk;
1036 			n->m_len += chunk;
1037 			remain -= chunk;
1038 			if (moff == m->m_len) {
1039 				m = m->m_next;
1040 				moff = 0;
1041 			}
1042 		}
1043 
1044 		/* Check correct total mbuf length */
1045 		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
1046 		    	("%s: bogus m_pkthdr.len", __FUNCTION__));
1047 	}
1048 	return (top);
1049 
1050 nospace:
1051 	m_freem(top);
1052 	MCFail++;
1053 	return (0);
1054 }
1055 
1056 /*
1057  * Concatenate mbuf chain n to m.
1058  * Both chains must be of the same type (e.g. MT_DATA).
1059  * Any m_pkthdr is not updated.
1060  */
1061 void
1062 m_cat(m, n)
1063 	register struct mbuf *m, *n;
1064 {
1065 	while (m->m_next)
1066 		m = m->m_next;
1067 	while (n) {
1068 		if (m->m_flags & M_EXT ||
1069 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
1070 			/* just join the two chains */
1071 			m->m_next = n;
1072 			return;
1073 		}
1074 		/* splat the data from one into the other */
1075 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
1076 		    (u_int)n->m_len);
1077 		m->m_len += n->m_len;
1078 		n = m_free(n);
1079 	}
1080 }
1081 
1082 void
1083 m_adj(mp, req_len)
1084 	struct mbuf *mp;
1085 	int req_len;
1086 {
1087 	register int len = req_len;
1088 	register struct mbuf *m;
1089 	register int count;
1090 
1091 	if ((m = mp) == NULL)
1092 		return;
1093 	if (len >= 0) {
1094 		/*
1095 		 * Trim from head.
1096 		 */
1097 		while (m != NULL && len > 0) {
1098 			if (m->m_len <= len) {
1099 				len -= m->m_len;
1100 				m->m_len = 0;
1101 				m = m->m_next;
1102 			} else {
1103 				m->m_len -= len;
1104 				m->m_data += len;
1105 				len = 0;
1106 			}
1107 		}
1108 		m = mp;
1109 		if (mp->m_flags & M_PKTHDR)
1110 			m->m_pkthdr.len -= (req_len - len);
1111 	} else {
1112 		/*
1113 		 * Trim from tail.  Scan the mbuf chain,
1114 		 * calculating its length and finding the last mbuf.
1115 		 * If the adjustment only affects this mbuf, then just
1116 		 * adjust and return.  Otherwise, rescan and truncate
1117 		 * after the remaining size.
1118 		 */
1119 		len = -len;
1120 		count = 0;
1121 		for (;;) {
1122 			count += m->m_len;
1123 			if (m->m_next == (struct mbuf *)0)
1124 				break;
1125 			m = m->m_next;
1126 		}
1127 		if (m->m_len >= len) {
1128 			m->m_len -= len;
1129 			if (mp->m_flags & M_PKTHDR)
1130 				mp->m_pkthdr.len -= len;
1131 			return;
1132 		}
1133 		count -= len;
1134 		if (count < 0)
1135 			count = 0;
1136 		/*
1137 		 * Correct length for chain is "count".
1138 		 * Find the mbuf with last data, adjust its length,
1139 		 * and toss data from remaining mbufs on chain.
1140 		 */
1141 		m = mp;
1142 		if (m->m_flags & M_PKTHDR)
1143 			m->m_pkthdr.len = count;
1144 		for (; m; m = m->m_next) {
1145 			if (m->m_len >= count) {
1146 				m->m_len = count;
1147 				break;
1148 			}
1149 			count -= m->m_len;
1150 		}
1151 		while (m->m_next)
1152 			(m = m->m_next) ->m_len = 0;
1153 	}
1154 }
1155 
1156 /*
1157  * Rearange an mbuf chain so that len bytes are contiguous
1158  * and in the data area of an mbuf (so that mtod and dtom
1159  * will work for a structure of size len).  Returns the resulting
1160  * mbuf chain on success, frees it and returns null on failure.
1161  * If there is room, it will add up to max_protohdr-len extra bytes to the
1162  * contiguous region in an attempt to avoid being called next time.
1163  */
1164 #define MPFail (mbstat.m_mpfail)
1165 
1166 struct mbuf *
1167 m_pullup(n, len)
1168 	register struct mbuf *n;
1169 	int len;
1170 {
1171 	register struct mbuf *m;
1172 	register int count;
1173 	int space;
1174 
1175 	/*
1176 	 * If first mbuf has no cluster, and has room for len bytes
1177 	 * without shifting current data, pullup into it,
1178 	 * otherwise allocate a new mbuf to prepend to the chain.
1179 	 */
1180 	if ((n->m_flags & M_EXT) == 0 &&
1181 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
1182 		if (n->m_len >= len)
1183 			return (n);
1184 		m = n;
1185 		n = n->m_next;
1186 		len -= m->m_len;
1187 	} else {
1188 		if (len > MHLEN)
1189 			goto bad;
1190 		MGET(m, M_DONTWAIT, n->m_type);
1191 		if (m == 0)
1192 			goto bad;
1193 		m->m_len = 0;
1194 		if (n->m_flags & M_PKTHDR)
1195 			M_MOVE_PKTHDR(m, n);
1196 	}
1197 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
1198 	do {
1199 		count = min(min(max(len, max_protohdr), space), n->m_len);
1200 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
1201 		  (unsigned)count);
1202 		len -= count;
1203 		m->m_len += count;
1204 		n->m_len -= count;
1205 		space -= count;
1206 		if (n->m_len)
1207 			n->m_data += count;
1208 		else
1209 			n = m_free(n);
1210 	} while (len > 0 && n);
1211 	if (len > 0) {
1212 		(void) m_free(m);
1213 		goto bad;
1214 	}
1215 	m->m_next = n;
1216 	return (m);
1217 bad:
1218 	m_freem(n);
1219 	MPFail++;
1220 	return (0);
1221 }
1222 
1223 /*
1224  * Partition an mbuf chain in two pieces, returning the tail --
1225  * all but the first len0 bytes.  In case of failure, it returns NULL and
1226  * attempts to restore the chain to its original state.
1227  *
1228  * Note that the resulting mbufs might be read-only, because the new
1229  * mbuf can end up sharing an mbuf cluster with the original mbuf if
1230  * the "breaking point" happens to lie within a cluster mbuf. Use the
1231  * M_WRITABLE() macro to check for this case.
1232  */
1233 struct mbuf *
1234 m_split(m0, len0, wait)
1235 	register struct mbuf *m0;
1236 	int len0, wait;
1237 {
1238 	register struct mbuf *m, *n;
1239 	unsigned len = len0, remain;
1240 
1241 	for (m = m0; m && len > m->m_len; m = m->m_next)
1242 		len -= m->m_len;
1243 	if (m == 0)
1244 		return (0);
1245 	remain = m->m_len - len;
1246 	if (m0->m_flags & M_PKTHDR) {
1247 		MGETHDR(n, wait, m0->m_type);
1248 		if (n == 0)
1249 			return (0);
1250 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
1251 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
1252 		m0->m_pkthdr.len = len0;
1253 		if (m->m_flags & M_EXT)
1254 			goto extpacket;
1255 		if (remain > MHLEN) {
1256 			/* m can't be the lead packet */
1257 			MH_ALIGN(n, 0);
1258 			n->m_next = m_split(m, len, wait);
1259 			if (n->m_next == 0) {
1260 				(void) m_free(n);
1261 				return (0);
1262 			} else {
1263 				n->m_len = 0;
1264 				return (n);
1265 			}
1266 		} else
1267 			MH_ALIGN(n, remain);
1268 	} else if (remain == 0) {
1269 		n = m->m_next;
1270 		m->m_next = 0;
1271 		return (n);
1272 	} else {
1273 		MGET(n, wait, m->m_type);
1274 		if (n == 0)
1275 			return (0);
1276 		M_ALIGN(n, remain);
1277 	}
1278 extpacket:
1279 	if (m->m_flags & M_EXT) {
1280 		n->m_flags |= M_EXT;
1281 		n->m_ext = m->m_ext;
1282 		if (m->m_ext.ext_ref == NULL)
1283 			atomic_add_char(&mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
1284 		else {
1285 			int s = splimp();
1286 
1287 			(*m->m_ext.ext_ref)(m->m_ext.ext_buf,
1288 			    m->m_ext.ext_size);
1289 			splx(s);
1290 		}
1291 		n->m_data = m->m_data + len;
1292 	} else {
1293 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
1294 	}
1295 	n->m_len = remain;
1296 	m->m_len = len;
1297 	n->m_next = m->m_next;
1298 	m->m_next = 0;
1299 	return (n);
1300 }
1301 /*
1302  * Routine to copy from device local memory into mbufs.
1303  */
1304 struct mbuf *
1305 m_devget(buf, totlen, off0, ifp, copy)
1306 	char *buf;
1307 	int totlen, off0;
1308 	struct ifnet *ifp;
1309 	void (*copy) __P((char *from, caddr_t to, u_int len));
1310 {
1311 	register struct mbuf *m;
1312 	struct mbuf *top = 0, **mp = &top;
1313 	register int off = off0, len;
1314 	register char *cp;
1315 	char *epkt;
1316 
1317 	cp = buf;
1318 	epkt = cp + totlen;
1319 	if (off) {
1320 		cp += off + 2 * sizeof(u_short);
1321 		totlen -= 2 * sizeof(u_short);
1322 	}
1323 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1324 	if (m == 0)
1325 		return (0);
1326 	m->m_pkthdr.rcvif = ifp;
1327 	m->m_pkthdr.len = totlen;
1328 	m->m_len = MHLEN;
1329 
1330 	while (totlen > 0) {
1331 		if (top) {
1332 			MGET(m, M_DONTWAIT, MT_DATA);
1333 			if (m == 0) {
1334 				m_freem(top);
1335 				return (0);
1336 			}
1337 			m->m_len = MLEN;
1338 		}
1339 		len = min(totlen, epkt - cp);
1340 		if (len >= MINCLSIZE) {
1341 			MCLGET(m, M_DONTWAIT);
1342 			if (m->m_flags & M_EXT)
1343 				m->m_len = len = min(len, MCLBYTES);
1344 			else
1345 				len = m->m_len;
1346 		} else {
1347 			/*
1348 			 * Place initial small packet/header at end of mbuf.
1349 			 */
1350 			if (len < m->m_len) {
1351 				if (top == 0 && len + max_linkhdr <= m->m_len)
1352 					m->m_data += max_linkhdr;
1353 				m->m_len = len;
1354 			} else
1355 				len = m->m_len;
1356 		}
1357 		if (copy)
1358 			copy(cp, mtod(m, caddr_t), (unsigned)len);
1359 		else
1360 			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
1361 		cp += len;
1362 		*mp = m;
1363 		mp = &m->m_next;
1364 		totlen -= len;
1365 		if (cp == epkt)
1366 			cp = buf;
1367 	}
1368 	return (top);
1369 }
1370 
1371 /*
1372  * Copy data from a buffer back into the indicated mbuf chain,
1373  * starting "off" bytes from the beginning, extending the mbuf
1374  * chain if necessary.
1375  */
1376 void
1377 m_copyback(m0, off, len, cp)
1378 	struct	mbuf *m0;
1379 	register int off;
1380 	register int len;
1381 	caddr_t cp;
1382 {
1383 	register int mlen;
1384 	register struct mbuf *m = m0, *n;
1385 	int totlen = 0;
1386 
1387 	if (m0 == 0)
1388 		return;
1389 	while (off > (mlen = m->m_len)) {
1390 		off -= mlen;
1391 		totlen += mlen;
1392 		if (m->m_next == 0) {
1393 			n = m_getclr(M_DONTWAIT, m->m_type);
1394 			if (n == 0)
1395 				goto out;
1396 			n->m_len = min(MLEN, len + off);
1397 			m->m_next = n;
1398 		}
1399 		m = m->m_next;
1400 	}
1401 	while (len > 0) {
1402 		mlen = min (m->m_len - off, len);
1403 		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
1404 		cp += mlen;
1405 		len -= mlen;
1406 		mlen += off;
1407 		off = 0;
1408 		totlen += mlen;
1409 		if (len == 0)
1410 			break;
1411 		if (m->m_next == 0) {
1412 			n = m_get(M_DONTWAIT, m->m_type);
1413 			if (n == 0)
1414 				break;
1415 			n->m_len = min(MLEN, len);
1416 			m->m_next = n;
1417 		}
1418 		m = m->m_next;
1419 	}
1420 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
1421 		m->m_pkthdr.len = totlen;
1422 }
1423 
1424 void
1425 m_print(const struct mbuf *m)
1426 {
1427 	int len;
1428 	const struct mbuf *m2;
1429 
1430 	len = m->m_pkthdr.len;
1431 	m2 = m;
1432 	while (len) {
1433 		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
1434 		len -= m2->m_len;
1435 		m2 = m2->m_next;
1436 	}
1437 	return;
1438 }
1439 
1440 /*
1441  * "Move" mbuf pkthdr from "from" to "to".
1442  * "from" must have M_PKTHDR set, and "to" must be empty.
1443  */
1444 void
1445 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
1446 {
1447 	KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster"));
1448 
1449 	to->m_flags = from->m_flags & M_COPYFLAGS;
1450 	to->m_data = to->m_pktdat;
1451 	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
1452 	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
1453 	from->m_flags &= ~M_PKTHDR;
1454 }
1455 
1456 /*
1457  * Duplicate "from"'s mbuf pkthdr in "to".
1458  * "from" must have M_PKTHDR set, and "to" must be empty.
1459  * In particular, this does a deep copy of the packet tags.
1460  */
1461 int
1462 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
1463 {
1464 	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
1465 	if ((to->m_flags & M_EXT) == 0)
1466 		to->m_data = to->m_pktdat;
1467 	to->m_pkthdr = from->m_pkthdr;
1468 	SLIST_INIT(&to->m_pkthdr.tags);
1469 	return (m_tag_copy_chain(to, from, how));
1470 }
1471 
1472 /*
1473  * Defragment a mbuf chain, returning the shortest possible
1474  * chain of mbufs and clusters.  If allocation fails and
1475  * this cannot be completed, NULL will be returned, but
1476  * the passed in chain will be unchanged.  Upon success,
1477  * the original chain will be freed, and the new chain
1478  * will be returned.
1479  *
1480  * If a non-packet header is passed in, the original
1481  * mbuf (chain?) will be returned unharmed.
1482  */
1483 struct mbuf *
1484 m_defrag(struct mbuf *m0, int how)
1485 {
1486 	struct mbuf	*m_new = NULL, *m_final = NULL;
1487 	int		progress = 0, length;
1488 
1489 	if (!(m0->m_flags & M_PKTHDR))
1490 		return (m0);
1491 
1492 #ifdef MBUF_STRESS_TEST
1493 	if (m_defragrandomfailures) {
1494 		int temp = arc4random() & 0xff;
1495 		if (temp == 0xba)
1496 			goto nospace;
1497 	}
1498 #endif
1499 
1500 	if (m0->m_pkthdr.len > MHLEN)
1501 		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
1502 	else
1503 		m_final = m_gethdr(how, MT_DATA);
1504 
1505 	if (m_final == NULL)
1506 		goto nospace;
1507 
1508 	if (m_dup_pkthdr(m_final, m0, how) == NULL)
1509 		goto nospace;
1510 
1511 	m_new = m_final;
1512 
1513 	while (progress < m0->m_pkthdr.len) {
1514 		length = m0->m_pkthdr.len - progress;
1515 		if (length > MCLBYTES)
1516 			length = MCLBYTES;
1517 
1518 		if (m_new == NULL) {
1519 			if (length > MLEN)
1520 				m_new = m_getcl(how, MT_DATA, 0);
1521 			else
1522 				m_new = m_get(how, MT_DATA);
1523 			if (m_new == NULL)
1524 				goto nospace;
1525 		}
1526 
1527 		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
1528 		progress += length;
1529 		m_new->m_len = length;
1530 		if (m_new != m_final)
1531 			m_cat(m_final, m_new);
1532 		m_new = NULL;
1533 	}
1534 	if (m0->m_next == NULL)
1535 		m_defraguseless++;
1536 	m_freem(m0);
1537 	m0 = m_final;
1538 	m_defragpackets++;
1539 	m_defragbytes += m0->m_pkthdr.len;
1540 	return (m0);
1541 nospace:
1542 	m_defragfailure++;
1543 	if (m_new)
1544 		m_free(m_new);
1545 	if (m_final)
1546 		m_freem(m_final);
1547 	return (NULL);
1548 }
1549