xref: /openbsd/sys/kern/uipc_mbuf.c (revision 3cab2bb3)
1 /*	$OpenBSD: uipc_mbuf.c,v 1.275 2020/06/21 05:37:26 dlg Exp $	*/
2 /*	$NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
33  */
34 
35 /*
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  *	This product includes software developed by the University of
50  *	California, Berkeley and its contributors.
51  *	This product includes software developed at the Information
52  *	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include "pf.h"
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/atomic.h>
80 #include <sys/malloc.h>
81 #include <sys/mbuf.h>
82 #include <sys/kernel.h>
83 #include <sys/syslog.h>
84 #include <sys/domain.h>
85 #include <sys/protosw.h>
86 #include <sys/pool.h>
87 #include <sys/percpu.h>
88 #include <sys/sysctl.h>
89 
90 #include <sys/socket.h>
91 #include <sys/socketvar.h>
92 #include <net/if.h>
93 
94 
95 #include <uvm/uvm_extern.h>
96 
97 #ifdef DDB
98 #include <machine/db_machdep.h>
99 #endif
100 
101 #if NPF > 0
102 #include <net/pfvar.h>
103 #endif	/* NPF > 0 */
104 
105 /* mbuf stats */
106 COUNTERS_BOOT_MEMORY(mbstat_boot, MBSTAT_COUNT);
107 struct cpumem *mbstat = COUNTERS_BOOT_INITIALIZER(mbstat_boot);
108 /* mbuf pools */
109 struct	pool mbpool;
110 struct	pool mtagpool;
111 
112 /* mbuf cluster pools */
113 u_int	mclsizes[MCLPOOLS] = {
114 	MCLBYTES,	/* must be at slot 0 */
115 	MCLBYTES + 2,	/* ETHER_ALIGNED 2k mbufs */
116 	4 * 1024,
117 	8 * 1024,
118 	9 * 1024,
119 	12 * 1024,
120 	16 * 1024,
121 	64 * 1024
122 };
123 static	char mclnames[MCLPOOLS][8];
124 struct	pool mclpools[MCLPOOLS];
125 
126 struct pool *m_clpool(u_int);
127 
128 int max_linkhdr;		/* largest link-level header */
129 int max_protohdr;		/* largest protocol header */
130 int max_hdr;			/* largest link+protocol header */
131 
132 struct	mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
133 
134 void	m_extfree(struct mbuf *);
135 void	m_zero(struct mbuf *);
136 
137 unsigned long mbuf_mem_limit;	/* how much memory can be allocated */
138 unsigned long mbuf_mem_alloc;	/* how much memory has been allocated */
139 
140 void	*m_pool_alloc(struct pool *, int, int *);
141 void	m_pool_free(struct pool *, void *);
142 
143 struct pool_allocator m_pool_allocator = {
144 	m_pool_alloc,
145 	m_pool_free,
146 	0 /* will be copied from pool_allocator_multi */
147 };
148 
149 static void (*mextfree_fns[4])(caddr_t, u_int, void *);
150 static u_int num_extfree_fns;
151 
152 #define M_DATABUF(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \
153 			(m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat)
154 #define M_SIZE(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_size : \
155 			(m)->m_flags & M_PKTHDR ? MHLEN : MLEN)
156 
157 /*
158  * Initialize the mbuf allocator.
159  */
160 void
161 mbinit(void)
162 {
163 	int i, error;
164 	unsigned int lowbits;
165 
166 	CTASSERT(MSIZE == sizeof(struct mbuf));
167 
168 	m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
169 
170 	mbuf_mem_alloc = 0;
171 
172 #if DIAGNOSTIC
173 	if (mclsizes[0] != MCLBYTES)
174 		panic("mbinit: the smallest cluster size != MCLBYTES");
175 	if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES)
176 		panic("mbinit: the largest cluster size != MAXMCLBYTES");
177 #endif
178 
179 	m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
180 
181 	pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
182 	    IPL_NET, 0, "mtagpl", NULL);
183 
184 	for (i = 0; i < nitems(mclsizes); i++) {
185 		lowbits = mclsizes[i] & ((1 << 10) - 1);
186 		if (lowbits) {
187 			snprintf(mclnames[i], sizeof(mclnames[0]),
188 			    "mcl%dk%u", mclsizes[i] >> 10, lowbits);
189 		} else {
190 			snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
191 			    mclsizes[i] >> 10);
192 		}
193 
194 		m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
195 	}
196 
197 	error = nmbclust_update(nmbclust);
198 	KASSERT(error == 0);
199 
200 	(void)mextfree_register(m_extfree_pool);
201 	KASSERT(num_extfree_fns == 1);
202 }
203 
204 void
205 mbcpuinit()
206 {
207 	int i;
208 
209 	mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT);
210 
211 	pool_cache_init(&mbpool);
212 	pool_cache_init(&mtagpool);
213 
214 	for (i = 0; i < nitems(mclsizes); i++)
215 		pool_cache_init(&mclpools[i]);
216 }
217 
218 int
219 nmbclust_update(long newval)
220 {
221 	int i;
222 
223 	if (newval < 0 || newval > LONG_MAX / MCLBYTES)
224 		return ERANGE;
225 	/* update the global mbuf memory limit */
226 	nmbclust = newval;
227 	mbuf_mem_limit = nmbclust * MCLBYTES;
228 
229 	pool_wakeup(&mbpool);
230 	for (i = 0; i < nitems(mclsizes); i++)
231 		pool_wakeup(&mclpools[i]);
232 
233 	return 0;
234 }
235 
236 /*
237  * Space allocation routines.
238  */
239 struct mbuf *
240 m_get(int nowait, int type)
241 {
242 	struct mbuf *m;
243 	struct counters_ref cr;
244 	uint64_t *counters;
245 	int s;
246 
247 	KDASSERT(type < MT_NTYPES);
248 
249 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
250 	if (m == NULL)
251 		return (NULL);
252 
253 	s = splnet();
254 	counters = counters_enter(&cr, mbstat);
255 	counters[type]++;
256 	counters_leave(&cr, mbstat);
257 	splx(s);
258 
259 	m->m_type = type;
260 	m->m_next = NULL;
261 	m->m_nextpkt = NULL;
262 	m->m_data = m->m_dat;
263 	m->m_flags = 0;
264 
265 	return (m);
266 }
267 
268 /*
269  * ATTN: When changing anything here check m_inithdr() and m_defrag() those
270  * may need to change as well.
271  */
272 struct mbuf *
273 m_gethdr(int nowait, int type)
274 {
275 	struct mbuf *m;
276 	struct counters_ref cr;
277 	uint64_t *counters;
278 	int s;
279 
280 	KDASSERT(type < MT_NTYPES);
281 
282 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
283 	if (m == NULL)
284 		return (NULL);
285 
286 	s = splnet();
287 	counters = counters_enter(&cr, mbstat);
288 	counters[type]++;
289 	counters_leave(&cr, mbstat);
290 	splx(s);
291 
292 	m->m_type = type;
293 
294 	return (m_inithdr(m));
295 }
296 
297 struct mbuf *
298 m_inithdr(struct mbuf *m)
299 {
300 	/* keep in sync with m_gethdr */
301 	m->m_next = NULL;
302 	m->m_nextpkt = NULL;
303 	m->m_data = m->m_pktdat;
304 	m->m_flags = M_PKTHDR;
305 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
306 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
307 
308 	return (m);
309 }
310 
311 static inline void
312 m_clearhdr(struct mbuf *m)
313 {
314 	/* delete all mbuf tags to reset the state */
315 	m_tag_delete_chain(m);
316 #if NPF > 0
317 	pf_mbuf_unlink_state_key(m);
318 	pf_mbuf_unlink_inpcb(m);
319 #endif	/* NPF > 0 */
320 
321 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
322 }
323 
324 void
325 m_removehdr(struct mbuf *m)
326 {
327 	KASSERT(m->m_flags & M_PKTHDR);
328 	m_clearhdr(m);
329 	m->m_flags &= ~M_PKTHDR;
330 }
331 
332 void
333 m_resethdr(struct mbuf *m)
334 {
335 	int len = m->m_pkthdr.len;
336 	u_int8_t loopcnt = m->m_pkthdr.ph_loopcnt;
337 
338 	KASSERT(m->m_flags & M_PKTHDR);
339 	m->m_flags &= (M_EXT|M_PKTHDR|M_EOR|M_EXTWR|M_ZEROIZE);
340 	m_clearhdr(m);
341 	/* like m_inithdr(), but keep any associated data and mbufs */
342 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
343 	m->m_pkthdr.len = len;
344 	m->m_pkthdr.ph_loopcnt = loopcnt;
345 }
346 
347 void
348 m_calchdrlen(struct mbuf *m)
349 {
350 	struct mbuf *n;
351 	int plen = 0;
352 
353 	KASSERT(m->m_flags & M_PKTHDR);
354 	for (n = m; n; n = n->m_next)
355 		plen += n->m_len;
356 	m->m_pkthdr.len = plen;
357 }
358 
359 struct mbuf *
360 m_getclr(int nowait, int type)
361 {
362 	struct mbuf *m;
363 
364 	MGET(m, nowait, type);
365 	if (m == NULL)
366 		return (NULL);
367 	memset(mtod(m, caddr_t), 0, MLEN);
368 	return (m);
369 }
370 
371 struct pool *
372 m_clpool(u_int pktlen)
373 {
374 	struct pool *pp;
375 	int pi;
376 
377 	for (pi = 0; pi < nitems(mclpools); pi++) {
378 		pp = &mclpools[pi];
379 		if (pktlen <= pp->pr_size)
380 			return (pp);
381 	}
382 
383 	return (NULL);
384 }
385 
386 struct mbuf *
387 m_clget(struct mbuf *m, int how, u_int pktlen)
388 {
389 	struct mbuf *m0 = NULL;
390 	struct pool *pp;
391 	caddr_t buf;
392 
393 	pp = m_clpool(pktlen);
394 #ifdef DIAGNOSTIC
395 	if (pp == NULL)
396 		panic("m_clget: request for %u byte cluster", pktlen);
397 #endif
398 
399 	if (m == NULL) {
400 		m0 = m_gethdr(how, MT_DATA);
401 		if (m0 == NULL)
402 			return (NULL);
403 
404 		m = m0;
405 	}
406 	buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT);
407 	if (buf == NULL) {
408 		m_freem(m0);
409 		return (NULL);
410 	}
411 
412 	MEXTADD(m, buf, pp->pr_size, M_EXTWR, MEXTFREE_POOL, pp);
413 	return (m);
414 }
415 
416 void
417 m_extfree_pool(caddr_t buf, u_int size, void *pp)
418 {
419 	pool_put(pp, buf);
420 }
421 
422 struct mbuf *
423 m_free(struct mbuf *m)
424 {
425 	struct mbuf *n;
426 	struct counters_ref cr;
427 	uint64_t *counters;
428 	int s;
429 
430 	if (m == NULL)
431 		return (NULL);
432 
433 	s = splnet();
434 	counters = counters_enter(&cr, mbstat);
435 	counters[m->m_type]--;
436 	counters_leave(&cr, mbstat);
437 	splx(s);
438 
439 	n = m->m_next;
440 	if (m->m_flags & M_ZEROIZE) {
441 		m_zero(m);
442 		/* propagate M_ZEROIZE to the next mbuf in the chain */
443 		if (n)
444 			n->m_flags |= M_ZEROIZE;
445 	}
446 	if (m->m_flags & M_PKTHDR) {
447 		m_tag_delete_chain(m);
448 #if NPF > 0
449 		pf_mbuf_unlink_state_key(m);
450 		pf_mbuf_unlink_inpcb(m);
451 #endif	/* NPF > 0 */
452 	}
453 	if (m->m_flags & M_EXT)
454 		m_extfree(m);
455 
456 	pool_put(&mbpool, m);
457 
458 	return (n);
459 }
460 
461 void
462 m_extref(struct mbuf *o, struct mbuf *n)
463 {
464 	int refs = MCLISREFERENCED(o);
465 
466 	n->m_flags |= o->m_flags & (M_EXT|M_EXTWR);
467 
468 	if (refs)
469 		mtx_enter(&m_extref_mtx);
470 	n->m_ext.ext_nextref = o->m_ext.ext_nextref;
471 	n->m_ext.ext_prevref = o;
472 	o->m_ext.ext_nextref = n;
473 	n->m_ext.ext_nextref->m_ext.ext_prevref = n;
474 	if (refs)
475 		mtx_leave(&m_extref_mtx);
476 
477 	MCLREFDEBUGN((n), __FILE__, __LINE__);
478 }
479 
480 static inline u_int
481 m_extunref(struct mbuf *m)
482 {
483 	int refs = 0;
484 
485 	if (!MCLISREFERENCED(m))
486 		return (0);
487 
488 	mtx_enter(&m_extref_mtx);
489 	if (MCLISREFERENCED(m)) {
490 		m->m_ext.ext_nextref->m_ext.ext_prevref =
491 		    m->m_ext.ext_prevref;
492 		m->m_ext.ext_prevref->m_ext.ext_nextref =
493 		    m->m_ext.ext_nextref;
494 		refs = 1;
495 	}
496 	mtx_leave(&m_extref_mtx);
497 
498 	return (refs);
499 }
500 
501 /*
502  * Returns a number for use with MEXTADD.
503  * Should only be called once per function.
504  * Drivers can be assured that the index will be non zero.
505  */
506 u_int
507 mextfree_register(void (*fn)(caddr_t, u_int, void *))
508 {
509 	KASSERT(num_extfree_fns < nitems(mextfree_fns));
510 	mextfree_fns[num_extfree_fns] = fn;
511 	return num_extfree_fns++;
512 }
513 
514 void
515 m_extfree(struct mbuf *m)
516 {
517 	if (m_extunref(m) == 0) {
518 		KASSERT(m->m_ext.ext_free_fn < num_extfree_fns);
519 		mextfree_fns[m->m_ext.ext_free_fn](m->m_ext.ext_buf,
520 		    m->m_ext.ext_size, m->m_ext.ext_arg);
521 	}
522 
523 	m->m_flags &= ~(M_EXT|M_EXTWR);
524 }
525 
526 struct mbuf *
527 m_freem(struct mbuf *m)
528 {
529 	struct mbuf *n;
530 
531 	if (m == NULL)
532 		return (NULL);
533 
534 	n = m->m_nextpkt;
535 
536 	do
537 		m = m_free(m);
538 	while (m != NULL);
539 
540 	return (n);
541 }
542 
543 void
544 m_purge(struct mbuf *m)
545 {
546 	while (m != NULL)
547 		m = m_freem(m);
548 }
549 
550 /*
551  * mbuf chain defragmenter. This function uses some evil tricks to defragment
552  * an mbuf chain into a single buffer without changing the mbuf pointer.
553  * This needs to know a lot of the mbuf internals to make this work.
554  */
555 int
556 m_defrag(struct mbuf *m, int how)
557 {
558 	struct mbuf *m0;
559 
560 	if (m->m_next == NULL)
561 		return (0);
562 
563 	KASSERT(m->m_flags & M_PKTHDR);
564 
565 	if ((m0 = m_gethdr(how, m->m_type)) == NULL)
566 		return (ENOBUFS);
567 	if (m->m_pkthdr.len > MHLEN) {
568 		MCLGETI(m0, how, NULL, m->m_pkthdr.len);
569 		if (!(m0->m_flags & M_EXT)) {
570 			m_free(m0);
571 			return (ENOBUFS);
572 		}
573 	}
574 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
575 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
576 
577 	/* free chain behind and possible ext buf on the first mbuf */
578 	m_freem(m->m_next);
579 	m->m_next = NULL;
580 	if (m->m_flags & M_EXT)
581 		m_extfree(m);
582 
583 	/*
584 	 * Bounce copy mbuf over to the original mbuf and set everything up.
585 	 * This needs to reset or clear all pointers that may go into the
586 	 * original mbuf chain.
587 	 */
588 	if (m0->m_flags & M_EXT) {
589 		memcpy(&m->m_ext, &m0->m_ext, sizeof(struct mbuf_ext));
590 		MCLINITREFERENCE(m);
591 		m->m_flags |= m0->m_flags & (M_EXT|M_EXTWR);
592 		m->m_data = m->m_ext.ext_buf;
593 	} else {
594 		m->m_data = m->m_pktdat;
595 		memcpy(m->m_data, m0->m_data, m0->m_len);
596 	}
597 	m->m_pkthdr.len = m->m_len = m0->m_len;
598 
599 	m0->m_flags &= ~(M_EXT|M_EXTWR);	/* cluster is gone */
600 	m_free(m0);
601 
602 	return (0);
603 }
604 
605 /*
606  * Mbuffer utility routines.
607  */
608 
609 /*
610  * Ensure len bytes of contiguous space at the beginning of the mbuf chain
611  */
612 struct mbuf *
613 m_prepend(struct mbuf *m, int len, int how)
614 {
615 	struct mbuf *mn;
616 
617 	if (len > MHLEN)
618 		panic("mbuf prepend length too big");
619 
620 	if (m_leadingspace(m) >= len) {
621 		m->m_data -= len;
622 		m->m_len += len;
623 	} else {
624 		MGET(mn, how, m->m_type);
625 		if (mn == NULL) {
626 			m_freem(m);
627 			return (NULL);
628 		}
629 		if (m->m_flags & M_PKTHDR)
630 			M_MOVE_PKTHDR(mn, m);
631 		mn->m_next = m;
632 		m = mn;
633 		m_align(m, len);
634 		m->m_len = len;
635 	}
636 	if (m->m_flags & M_PKTHDR)
637 		m->m_pkthdr.len += len;
638 	return (m);
639 }
640 
641 /*
642  * Make a copy of an mbuf chain starting "off" bytes from the beginning,
643  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
644  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
645  */
646 struct mbuf *
647 m_copym(struct mbuf *m0, int off, int len, int wait)
648 {
649 	struct mbuf *m, *n, **np;
650 	struct mbuf *top;
651 	int copyhdr = 0;
652 
653 	if (off < 0 || len < 0)
654 		panic("m_copym0: off %d, len %d", off, len);
655 	if (off == 0 && m0->m_flags & M_PKTHDR)
656 		copyhdr = 1;
657 	if ((m = m_getptr(m0, off, &off)) == NULL)
658 		panic("m_copym0: short mbuf chain");
659 	np = &top;
660 	top = NULL;
661 	while (len > 0) {
662 		if (m == NULL) {
663 			if (len != M_COPYALL)
664 				panic("m_copym0: m == NULL and not COPYALL");
665 			break;
666 		}
667 		MGET(n, wait, m->m_type);
668 		*np = n;
669 		if (n == NULL)
670 			goto nospace;
671 		if (copyhdr) {
672 			if (m_dup_pkthdr(n, m0, wait))
673 				goto nospace;
674 			if (len != M_COPYALL)
675 				n->m_pkthdr.len = len;
676 			copyhdr = 0;
677 		}
678 		n->m_len = min(len, m->m_len - off);
679 		if (m->m_flags & M_EXT) {
680 			n->m_data = m->m_data + off;
681 			n->m_ext = m->m_ext;
682 			MCLADDREFERENCE(m, n);
683 		} else {
684 			n->m_data += m->m_data -
685 			    (m->m_flags & M_PKTHDR ? m->m_pktdat : m->m_dat);
686 			n->m_data += off;
687 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
688 			    n->m_len);
689 		}
690 		if (len != M_COPYALL)
691 			len -= n->m_len;
692 		off += n->m_len;
693 #ifdef DIAGNOSTIC
694 		if (off > m->m_len)
695 			panic("m_copym0 overrun");
696 #endif
697 		if (off == m->m_len) {
698 			m = m->m_next;
699 			off = 0;
700 		}
701 		np = &n->m_next;
702 	}
703 	return (top);
704 nospace:
705 	m_freem(top);
706 	return (NULL);
707 }
708 
709 /*
710  * Copy data from an mbuf chain starting "off" bytes from the beginning,
711  * continuing for "len" bytes, into the indicated buffer.
712  */
713 void
714 m_copydata(struct mbuf *m, int off, int len, caddr_t cp)
715 {
716 	unsigned count;
717 
718 	if (off < 0)
719 		panic("m_copydata: off %d < 0", off);
720 	if (len < 0)
721 		panic("m_copydata: len %d < 0", len);
722 	if ((m = m_getptr(m, off, &off)) == NULL)
723 		panic("m_copydata: short mbuf chain");
724 	while (len > 0) {
725 		if (m == NULL)
726 			panic("m_copydata: null mbuf");
727 		count = min(m->m_len - off, len);
728 		memmove(cp, mtod(m, caddr_t) + off, count);
729 		len -= count;
730 		cp += count;
731 		off = 0;
732 		m = m->m_next;
733 	}
734 }
735 
736 /*
737  * Copy data from a buffer back into the indicated mbuf chain,
738  * starting "off" bytes from the beginning, extending the mbuf
739  * chain if necessary. The mbuf needs to be properly initialized
740  * including the setting of m_len.
741  */
742 int
743 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait)
744 {
745 	int mlen, totlen = 0;
746 	struct mbuf *m = m0, *n;
747 	caddr_t cp = (caddr_t)_cp;
748 	int error = 0;
749 
750 	if (m0 == NULL)
751 		return (0);
752 	while (off > (mlen = m->m_len)) {
753 		off -= mlen;
754 		totlen += mlen;
755 		if (m->m_next == NULL) {
756 			if ((n = m_get(wait, m->m_type)) == NULL) {
757 				error = ENOBUFS;
758 				goto out;
759 			}
760 
761 			if (off + len > MLEN) {
762 				MCLGETI(n, wait, NULL, off + len);
763 				if (!(n->m_flags & M_EXT)) {
764 					m_free(n);
765 					error = ENOBUFS;
766 					goto out;
767 				}
768 			}
769 			memset(mtod(n, caddr_t), 0, off);
770 			n->m_len = len + off;
771 			m->m_next = n;
772 		}
773 		m = m->m_next;
774 	}
775 	while (len > 0) {
776 		/* extend last packet to be filled fully */
777 		if (m->m_next == NULL && (len > m->m_len - off))
778 			m->m_len += min(len - (m->m_len - off),
779 			    m_trailingspace(m));
780 		mlen = min(m->m_len - off, len);
781 		memmove(mtod(m, caddr_t) + off, cp, mlen);
782 		cp += mlen;
783 		len -= mlen;
784 		totlen += mlen + off;
785 		if (len == 0)
786 			break;
787 		off = 0;
788 
789 		if (m->m_next == NULL) {
790 			if ((n = m_get(wait, m->m_type)) == NULL) {
791 				error = ENOBUFS;
792 				goto out;
793 			}
794 
795 			if (len > MLEN) {
796 				MCLGETI(n, wait, NULL, len);
797 				if (!(n->m_flags & M_EXT)) {
798 					m_free(n);
799 					error = ENOBUFS;
800 					goto out;
801 				}
802 			}
803 			n->m_len = len;
804 			m->m_next = n;
805 		}
806 		m = m->m_next;
807 	}
808 out:
809 	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
810 		m->m_pkthdr.len = totlen;
811 
812 	return (error);
813 }
814 
815 /*
816  * Concatenate mbuf chain n to m.
817  * n might be copied into m (when n->m_len is small), therefore data portion of
818  * n could be copied into an mbuf of different mbuf type.
819  * Therefore both chains should be of the same type (e.g. MT_DATA).
820  * Any m_pkthdr is not updated.
821  */
822 void
823 m_cat(struct mbuf *m, struct mbuf *n)
824 {
825 	while (m->m_next)
826 		m = m->m_next;
827 	while (n) {
828 		if (M_READONLY(m) || n->m_len > m_trailingspace(m)) {
829 			/* just join the two chains */
830 			m->m_next = n;
831 			return;
832 		}
833 		/* splat the data from one into the other */
834 		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
835 		    n->m_len);
836 		m->m_len += n->m_len;
837 		n = m_free(n);
838 	}
839 }
840 
841 void
842 m_adj(struct mbuf *mp, int req_len)
843 {
844 	int len = req_len;
845 	struct mbuf *m;
846 	int count;
847 
848 	if (mp == NULL)
849 		return;
850 	if (len >= 0) {
851 		/*
852 		 * Trim from head.
853 		 */
854 		m = mp;
855 		while (m != NULL && len > 0) {
856 			if (m->m_len <= len) {
857 				len -= m->m_len;
858 				m->m_data += m->m_len;
859 				m->m_len = 0;
860 				m = m->m_next;
861 			} else {
862 				m->m_data += len;
863 				m->m_len -= len;
864 				len = 0;
865 			}
866 		}
867 		if (mp->m_flags & M_PKTHDR)
868 			mp->m_pkthdr.len -= (req_len - len);
869 	} else {
870 		/*
871 		 * Trim from tail.  Scan the mbuf chain,
872 		 * calculating its length and finding the last mbuf.
873 		 * If the adjustment only affects this mbuf, then just
874 		 * adjust and return.  Otherwise, rescan and truncate
875 		 * after the remaining size.
876 		 */
877 		len = -len;
878 		count = 0;
879 		m = mp;
880 		for (;;) {
881 			count += m->m_len;
882 			if (m->m_next == NULL)
883 				break;
884 			m = m->m_next;
885 		}
886 		if (m->m_len >= len) {
887 			m->m_len -= len;
888 			if (mp->m_flags & M_PKTHDR)
889 				mp->m_pkthdr.len -= len;
890 			return;
891 		}
892 		count -= len;
893 		if (count < 0)
894 			count = 0;
895 		/*
896 		 * Correct length for chain is "count".
897 		 * Find the mbuf with last data, adjust its length,
898 		 * and toss data from remaining mbufs on chain.
899 		 */
900 		if (mp->m_flags & M_PKTHDR)
901 			mp->m_pkthdr.len = count;
902 		m = mp;
903 		for (;;) {
904 			if (m->m_len >= count) {
905 				m->m_len = count;
906 				break;
907 			}
908 			count -= m->m_len;
909 			m = m->m_next;
910 		}
911 		while ((m = m->m_next) != NULL)
912 			m->m_len = 0;
913 	}
914 }
915 
916 /*
917  * Rearrange an mbuf chain so that len bytes are contiguous
918  * and in the data area of an mbuf (so that mtod will work
919  * for a structure of size len).  Returns the resulting
920  * mbuf chain on success, frees it and returns null on failure.
921  */
922 struct mbuf *
923 m_pullup(struct mbuf *m0, int len)
924 {
925 	struct mbuf *m;
926 	unsigned int adj;
927 	caddr_t head, tail;
928 	unsigned int space;
929 
930 	/* if len is already contig in m0, then don't do any work */
931 	if (len <= m0->m_len)
932 		return (m0);
933 
934 	/* look for some data */
935 	m = m0->m_next;
936 	if (m == NULL)
937 		goto freem0;
938 
939 	head = M_DATABUF(m0);
940 	if (m0->m_len == 0) {
941 		m0->m_data = head;
942 
943 		while (m->m_len == 0) {
944 			m = m_free(m);
945 			if (m == NULL)
946 				goto freem0;
947 		}
948 
949 		adj = mtod(m, unsigned long) & ALIGNBYTES;
950 	} else
951 		adj = mtod(m0, unsigned long) & ALIGNBYTES;
952 
953 	tail = head + M_SIZE(m0);
954 	head += adj;
955 
956 	if (len <= tail - head) {
957 		/* there's enough space in the first mbuf */
958 
959 		if (len > tail - mtod(m0, caddr_t)) {
960 			/* need to memmove to make space at the end */
961 			memmove(head, mtod(m0, caddr_t), m0->m_len);
962 			m0->m_data = head;
963 		}
964 
965 		len -= m0->m_len;
966 	} else {
967 		/* the first mbuf is too small so make a new one */
968 		space = adj + len;
969 
970 		if (space > MAXMCLBYTES)
971 			goto bad;
972 
973 		m0->m_next = m;
974 		m = m0;
975 
976 		MGET(m0, M_DONTWAIT, m->m_type);
977 		if (m0 == NULL)
978 			goto bad;
979 
980 		if (space > MHLEN) {
981 			MCLGETI(m0, M_DONTWAIT, NULL, space);
982 			if ((m0->m_flags & M_EXT) == 0)
983 				goto bad;
984 		}
985 
986 		if (m->m_flags & M_PKTHDR)
987 			M_MOVE_PKTHDR(m0, m);
988 
989 		m0->m_len = 0;
990 		m0->m_data += adj;
991 	}
992 
993 	KDASSERT(m_trailingspace(m0) >= len);
994 
995 	for (;;) {
996 		space = min(len, m->m_len);
997 		memcpy(mtod(m0, caddr_t) + m0->m_len, mtod(m, caddr_t), space);
998 		len -= space;
999 		m0->m_len += space;
1000 		m->m_len -= space;
1001 
1002 		if (m->m_len > 0)
1003 			m->m_data += space;
1004 		else
1005 			m = m_free(m);
1006 
1007 		if (len == 0)
1008 			break;
1009 
1010 		if (m == NULL)
1011 			goto bad;
1012 	}
1013 
1014 	m0->m_next = m; /* link the chain back up */
1015 
1016 	return (m0);
1017 
1018 bad:
1019 	m_freem(m);
1020 freem0:
1021 	m_free(m0);
1022 	return (NULL);
1023 }
1024 
1025 /*
1026  * Return a pointer to mbuf/offset of location in mbuf chain.
1027  */
1028 struct mbuf *
1029 m_getptr(struct mbuf *m, int loc, int *off)
1030 {
1031 	while (loc >= 0) {
1032 		/* Normal end of search */
1033 		if (m->m_len > loc) {
1034 			*off = loc;
1035 			return (m);
1036 		} else {
1037 			loc -= m->m_len;
1038 
1039 			if (m->m_next == NULL) {
1040 				if (loc == 0) {
1041 					/* Point at the end of valid data */
1042 					*off = m->m_len;
1043 					return (m);
1044 				} else {
1045 					return (NULL);
1046 				}
1047 			} else {
1048 				m = m->m_next;
1049 			}
1050 		}
1051 	}
1052 
1053 	return (NULL);
1054 }
1055 
1056 /*
1057  * Partition an mbuf chain in two pieces, returning the tail --
1058  * all but the first len0 bytes.  In case of failure, it returns NULL and
1059  * attempts to restore the chain to its original state.
1060  */
1061 struct mbuf *
1062 m_split(struct mbuf *m0, int len0, int wait)
1063 {
1064 	struct mbuf *m, *n;
1065 	unsigned len = len0, remain, olen;
1066 
1067 	for (m = m0; m && len > m->m_len; m = m->m_next)
1068 		len -= m->m_len;
1069 	if (m == NULL)
1070 		return (NULL);
1071 	remain = m->m_len - len;
1072 	if (m0->m_flags & M_PKTHDR) {
1073 		MGETHDR(n, wait, m0->m_type);
1074 		if (n == NULL)
1075 			return (NULL);
1076 		if (m_dup_pkthdr(n, m0, wait)) {
1077 			m_freem(n);
1078 			return (NULL);
1079 		}
1080 		n->m_pkthdr.len -= len0;
1081 		olen = m0->m_pkthdr.len;
1082 		m0->m_pkthdr.len = len0;
1083 		if (remain == 0) {
1084 			n->m_next = m->m_next;
1085 			m->m_next = NULL;
1086 			n->m_len = 0;
1087 			return (n);
1088 		}
1089 		if (m->m_flags & M_EXT)
1090 			goto extpacket;
1091 		if (remain > MHLEN) {
1092 			/* m can't be the lead packet */
1093 			m_align(n, 0);
1094 			n->m_next = m_split(m, len, wait);
1095 			if (n->m_next == NULL) {
1096 				(void) m_free(n);
1097 				m0->m_pkthdr.len = olen;
1098 				return (NULL);
1099 			} else {
1100 				n->m_len = 0;
1101 				return (n);
1102 			}
1103 		} else
1104 			m_align(n, remain);
1105 	} else if (remain == 0) {
1106 		n = m->m_next;
1107 		m->m_next = NULL;
1108 		return (n);
1109 	} else {
1110 		MGET(n, wait, m->m_type);
1111 		if (n == NULL)
1112 			return (NULL);
1113 		m_align(n, remain);
1114 	}
1115 extpacket:
1116 	if (m->m_flags & M_EXT) {
1117 		n->m_ext = m->m_ext;
1118 		MCLADDREFERENCE(m, n);
1119 		n->m_data = m->m_data + len;
1120 	} else {
1121 		memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
1122 	}
1123 	n->m_len = remain;
1124 	m->m_len = len;
1125 	n->m_next = m->m_next;
1126 	m->m_next = NULL;
1127 	return (n);
1128 }
1129 
1130 /*
1131  * Make space for a new header of length hlen at skip bytes
1132  * into the packet.  When doing this we allocate new mbufs only
1133  * when absolutely necessary.  The mbuf where the new header
1134  * is to go is returned together with an offset into the mbuf.
1135  * If NULL is returned then the mbuf chain may have been modified;
1136  * the caller is assumed to always free the chain.
1137  */
1138 struct mbuf *
1139 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
1140 {
1141 	struct mbuf *m;
1142 	unsigned remain;
1143 
1144 	KASSERT(m0->m_flags & M_PKTHDR);
1145 	/*
1146 	 * Limit the size of the new header to MHLEN. In case
1147 	 * skip = 0 and the first buffer is not a cluster this
1148 	 * is the maximum space available in that mbuf.
1149 	 * In other words this code never prepends a mbuf.
1150 	 */
1151 	KASSERT(hlen < MHLEN);
1152 
1153 	for (m = m0; m && skip > m->m_len; m = m->m_next)
1154 		skip -= m->m_len;
1155 	if (m == NULL)
1156 		return (NULL);
1157 	/*
1158 	 * At this point skip is the offset into the mbuf m
1159 	 * where the new header should be placed.  Figure out
1160 	 * if there's space to insert the new header.  If so,
1161 	 * and copying the remainder makes sense then do so.
1162 	 * Otherwise insert a new mbuf in the chain, splitting
1163 	 * the contents of m as needed.
1164 	 */
1165 	remain = m->m_len - skip;		/* data to move */
1166 	if (skip < remain && hlen <= m_leadingspace(m)) {
1167 		if (skip)
1168 			memmove(m->m_data-hlen, m->m_data, skip);
1169 		m->m_data -= hlen;
1170 		m->m_len += hlen;
1171 		*off = skip;
1172 	} else if (hlen > m_trailingspace(m)) {
1173 		struct mbuf *n;
1174 
1175 		if (remain > 0) {
1176 			MGET(n, M_DONTWAIT, m->m_type);
1177 			if (n && remain > MLEN) {
1178 				MCLGETI(n, M_DONTWAIT, NULL, remain);
1179 				if ((n->m_flags & M_EXT) == 0) {
1180 					m_free(n);
1181 					n = NULL;
1182 				}
1183 			}
1184 			if (n == NULL)
1185 				return (NULL);
1186 
1187 			memcpy(n->m_data, mtod(m, char *) + skip, remain);
1188 			n->m_len = remain;
1189 			m->m_len -= remain;
1190 
1191 			n->m_next = m->m_next;
1192 			m->m_next = n;
1193 		}
1194 
1195 		if (hlen <= m_trailingspace(m)) {
1196 			m->m_len += hlen;
1197 			*off = skip;
1198 		} else {
1199 			n = m_get(M_DONTWAIT, m->m_type);
1200 			if (n == NULL)
1201 				return NULL;
1202 
1203 			n->m_len = hlen;
1204 
1205 			n->m_next = m->m_next;
1206 			m->m_next = n;
1207 
1208 			*off = 0;	/* header is at front ... */
1209 			m = n;		/* ... of new mbuf */
1210 		}
1211 	} else {
1212 		/*
1213 		 * Copy the remainder to the back of the mbuf
1214 		 * so there's space to write the new header.
1215 		 */
1216 		if (remain > 0)
1217 			memmove(mtod(m, caddr_t) + skip + hlen,
1218 			      mtod(m, caddr_t) + skip, remain);
1219 		m->m_len += hlen;
1220 		*off = skip;
1221 	}
1222 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
1223 	return m;
1224 }
1225 
1226 
1227 /*
1228  * Routine to copy from device local memory into mbufs.
1229  */
1230 struct mbuf *
1231 m_devget(char *buf, int totlen, int off)
1232 {
1233 	struct mbuf	*m;
1234 	struct mbuf	*top, **mp;
1235 	int		 len;
1236 
1237 	top = NULL;
1238 	mp = &top;
1239 
1240 	if (off < 0 || off > MHLEN)
1241 		return (NULL);
1242 
1243 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1244 	if (m == NULL)
1245 		return (NULL);
1246 
1247 	m->m_pkthdr.len = totlen;
1248 
1249 	len = MHLEN;
1250 
1251 	while (totlen > 0) {
1252 		if (top != NULL) {
1253 			MGET(m, M_DONTWAIT, MT_DATA);
1254 			if (m == NULL) {
1255 				/*
1256 				 * As we might get called by pfkey, make sure
1257 				 * we do not leak sensitive data.
1258 				 */
1259 				top->m_flags |= M_ZEROIZE;
1260 				m_freem(top);
1261 				return (NULL);
1262 			}
1263 			len = MLEN;
1264 		}
1265 
1266 		if (totlen + off >= MINCLSIZE) {
1267 			MCLGET(m, M_DONTWAIT);
1268 			if (m->m_flags & M_EXT)
1269 				len = MCLBYTES;
1270 		} else {
1271 			/* Place initial small packet/header at end of mbuf. */
1272 			if (top == NULL && totlen + off + max_linkhdr <= len) {
1273 				m->m_data += max_linkhdr;
1274 				len -= max_linkhdr;
1275 			}
1276 		}
1277 
1278 		if (off) {
1279 			m->m_data += off;
1280 			len -= off;
1281 			off = 0;
1282 		}
1283 
1284 		m->m_len = len = min(totlen, len);
1285 		memcpy(mtod(m, void *), buf, (size_t)len);
1286 
1287 		buf += len;
1288 		*mp = m;
1289 		mp = &m->m_next;
1290 		totlen -= len;
1291 	}
1292 	return (top);
1293 }
1294 
1295 void
1296 m_zero(struct mbuf *m)
1297 {
1298 	if (M_READONLY(m)) {
1299 		mtx_enter(&m_extref_mtx);
1300 		if ((m->m_flags & M_EXT) && MCLISREFERENCED(m)) {
1301 			m->m_ext.ext_nextref->m_flags |= M_ZEROIZE;
1302 			m->m_ext.ext_prevref->m_flags |= M_ZEROIZE;
1303 		}
1304 		mtx_leave(&m_extref_mtx);
1305 		return;
1306 	}
1307 
1308 	explicit_bzero(M_DATABUF(m), M_SIZE(m));
1309 }
1310 
1311 /*
1312  * Apply function f to the data in an mbuf chain starting "off" bytes from the
1313  * beginning, continuing for "len" bytes.
1314  */
1315 int
1316 m_apply(struct mbuf *m, int off, int len,
1317     int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate)
1318 {
1319 	int rval;
1320 	unsigned int count;
1321 
1322 	if (len < 0)
1323 		panic("m_apply: len %d < 0", len);
1324 	if (off < 0)
1325 		panic("m_apply: off %d < 0", off);
1326 	while (off > 0) {
1327 		if (m == NULL)
1328 			panic("m_apply: null mbuf in skip");
1329 		if (off < m->m_len)
1330 			break;
1331 		off -= m->m_len;
1332 		m = m->m_next;
1333 	}
1334 	while (len > 0) {
1335 		if (m == NULL)
1336 			panic("m_apply: null mbuf");
1337 		count = min(m->m_len - off, len);
1338 
1339 		rval = f(fstate, mtod(m, caddr_t) + off, count);
1340 		if (rval)
1341 			return (rval);
1342 
1343 		len -= count;
1344 		off = 0;
1345 		m = m->m_next;
1346 	}
1347 
1348 	return (0);
1349 }
1350 
1351 /*
1352  * Compute the amount of space available before the current start of data
1353  * in an mbuf. Read-only clusters never have space available.
1354  */
1355 int
1356 m_leadingspace(struct mbuf *m)
1357 {
1358 	if (M_READONLY(m))
1359 		return 0;
1360 	KASSERT(m->m_data >= M_DATABUF(m));
1361 	return m->m_data - M_DATABUF(m);
1362 }
1363 
1364 /*
1365  * Compute the amount of space available after the end of data in an mbuf.
1366  * Read-only clusters never have space available.
1367  */
1368 int
1369 m_trailingspace(struct mbuf *m)
1370 {
1371 	if (M_READONLY(m))
1372 		return 0;
1373 	KASSERT(M_DATABUF(m) + M_SIZE(m) >= (m->m_data + m->m_len));
1374 	return M_DATABUF(m) + M_SIZE(m) - (m->m_data + m->m_len);
1375 }
1376 
1377 /*
1378  * Set the m_data pointer of a newly-allocated mbuf to place an object of
1379  * the specified size at the end of the mbuf, longword aligned.
1380  */
1381 void
1382 m_align(struct mbuf *m, int len)
1383 {
1384 	KASSERT(len >= 0 && !M_READONLY(m));
1385 	KASSERT(m->m_data == M_DATABUF(m));	/* newly-allocated check */
1386 	KASSERT(((len + sizeof(long) - 1) &~ (sizeof(long) - 1)) <= M_SIZE(m));
1387 
1388 	m->m_data = M_DATABUF(m) + ((M_SIZE(m) - (len)) &~ (sizeof(long) - 1));
1389 }
1390 
1391 /*
1392  * Duplicate mbuf pkthdr from from to to.
1393  * from must have M_PKTHDR set, and to must be empty.
1394  */
1395 int
1396 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait)
1397 {
1398 	int error;
1399 
1400 	KASSERT(from->m_flags & M_PKTHDR);
1401 
1402 	to->m_flags = (to->m_flags & (M_EXT | M_EXTWR));
1403 	to->m_flags |= (from->m_flags & M_COPYFLAGS);
1404 	to->m_pkthdr = from->m_pkthdr;
1405 
1406 #if NPF > 0
1407 	to->m_pkthdr.pf.statekey = NULL;
1408 	pf_mbuf_link_state_key(to, from->m_pkthdr.pf.statekey);
1409 	to->m_pkthdr.pf.inp = NULL;
1410 	pf_mbuf_link_inpcb(to, from->m_pkthdr.pf.inp);
1411 #endif	/* NPF > 0 */
1412 
1413 	SLIST_INIT(&to->m_pkthdr.ph_tags);
1414 
1415 	if ((error = m_tag_copy_chain(to, from, wait)) != 0)
1416 		return (error);
1417 
1418 	if ((to->m_flags & M_EXT) == 0)
1419 		to->m_data = to->m_pktdat;
1420 
1421 	return (0);
1422 }
1423 
1424 struct mbuf *
1425 m_dup_pkt(struct mbuf *m0, unsigned int adj, int wait)
1426 {
1427 	struct mbuf *m;
1428 	int len;
1429 
1430 	KASSERT(m0->m_flags & M_PKTHDR);
1431 
1432 	len = m0->m_pkthdr.len + adj;
1433 	if (len > MAXMCLBYTES) /* XXX */
1434 		return (NULL);
1435 
1436 	m = m_get(wait, m0->m_type);
1437 	if (m == NULL)
1438 		return (NULL);
1439 
1440 	if (m_dup_pkthdr(m, m0, wait) != 0)
1441 		goto fail;
1442 
1443 	if (len > MHLEN) {
1444 		MCLGETI(m, wait, NULL, len);
1445 		if (!ISSET(m->m_flags, M_EXT))
1446 			goto fail;
1447 	}
1448 
1449 	m->m_len = m->m_pkthdr.len = len;
1450 	m_adj(m, adj);
1451 	m_copydata(m0, 0, m0->m_pkthdr.len, mtod(m, caddr_t));
1452 
1453 	return (m);
1454 
1455 fail:
1456 	m_freem(m);
1457 	return (NULL);
1458 }
1459 
1460 void
1461 m_microtime(const struct mbuf *m, struct timeval *tv)
1462 {
1463 	if (ISSET(m->m_pkthdr.csum_flags, M_TIMESTAMP)) {
1464 		struct timeval btv, utv;
1465 
1466 		NSEC_TO_TIMEVAL(m->m_pkthdr.ph_timestamp, &utv);
1467 		microboottime(&btv);
1468 		timeradd(&btv, &utv, tv);
1469 	} else
1470 		microtime(tv);
1471 }
1472 
1473 void *
1474 m_pool_alloc(struct pool *pp, int flags, int *slowdown)
1475 {
1476 	void *v;
1477 
1478 	if (atomic_add_long_nv(&mbuf_mem_alloc, pp->pr_pgsize) > mbuf_mem_limit)
1479 		goto fail;
1480 
1481 	v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
1482 	if (v != NULL)
1483 		return (v);
1484 
1485  fail:
1486 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1487 	return (NULL);
1488 }
1489 
1490 void
1491 m_pool_free(struct pool *pp, void *v)
1492 {
1493 	(*pool_allocator_multi.pa_free)(pp, v);
1494 
1495 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1496 }
1497 
1498 void
1499 m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
1500 {
1501 	pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
1502 	pool_set_constraints(pp, &kp_dma_contig);
1503 }
1504 
1505 #ifdef DDB
1506 void
1507 m_print(void *v,
1508     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1509 {
1510 	struct mbuf *m = v;
1511 
1512 	(*pr)("mbuf %p\n", m);
1513 	(*pr)("m_type: %i\tm_flags: %b\n", m->m_type, m->m_flags, M_BITS);
1514 	(*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt);
1515 	(*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len);
1516 	(*pr)("m_dat: %p\tm_pktdat: %p\n", m->m_dat, m->m_pktdat);
1517 	if (m->m_flags & M_PKTHDR) {
1518 		(*pr)("m_ptkhdr.ph_ifidx: %u\tm_pkthdr.len: %i\n",
1519 		    m->m_pkthdr.ph_ifidx, m->m_pkthdr.len);
1520 		(*pr)("m_ptkhdr.ph_tags: %p\tm_pkthdr.ph_tagsset: %b\n",
1521 		    SLIST_FIRST(&m->m_pkthdr.ph_tags),
1522 		    m->m_pkthdr.ph_tagsset, MTAG_BITS);
1523 		(*pr)("m_pkthdr.ph_flowid: %u\tm_pkthdr.ph_loopcnt: %u\n",
1524 		    m->m_pkthdr.ph_flowid, m->m_pkthdr.ph_loopcnt);
1525 		(*pr)("m_pkthdr.csum_flags: %b\n",
1526 		    m->m_pkthdr.csum_flags, MCS_BITS);
1527 		(*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n",
1528 		    m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid);
1529 		(*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n",
1530 		    m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp);
1531 		(*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n",
1532 		    m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag);
1533 		(*pr)("m_pkthdr.pf.flags: %b\n",
1534 		    m->m_pkthdr.pf.flags, MPF_BITS);
1535 		(*pr)("m_pkthdr.pf.routed: %u\tm_pkthdr.pf.prio: %u\n",
1536 		    m->m_pkthdr.pf.routed, m->m_pkthdr.pf.prio);
1537 	}
1538 	if (m->m_flags & M_EXT) {
1539 		(*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n",
1540 		    m->m_ext.ext_buf, m->m_ext.ext_size);
1541 		(*pr)("m_ext.ext_free_fn: %u\tm_ext.ext_arg: %p\n",
1542 		    m->m_ext.ext_free_fn, m->m_ext.ext_arg);
1543 		(*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
1544 		    m->m_ext.ext_nextref, m->m_ext.ext_prevref);
1545 
1546 	}
1547 }
1548 #endif
1549 
1550 /*
1551  * mbuf lists
1552  */
1553 
1554 void
1555 ml_init(struct mbuf_list *ml)
1556 {
1557 	ml->ml_head = ml->ml_tail = NULL;
1558 	ml->ml_len = 0;
1559 }
1560 
1561 void
1562 ml_enqueue(struct mbuf_list *ml, struct mbuf *m)
1563 {
1564 	if (ml->ml_tail == NULL)
1565 		ml->ml_head = ml->ml_tail = m;
1566 	else {
1567 		ml->ml_tail->m_nextpkt = m;
1568 		ml->ml_tail = m;
1569 	}
1570 
1571 	m->m_nextpkt = NULL;
1572 	ml->ml_len++;
1573 }
1574 
1575 void
1576 ml_enlist(struct mbuf_list *mla, struct mbuf_list *mlb)
1577 {
1578 	if (!ml_empty(mlb)) {
1579 		if (ml_empty(mla))
1580 			mla->ml_head = mlb->ml_head;
1581 		else
1582 			mla->ml_tail->m_nextpkt = mlb->ml_head;
1583 		mla->ml_tail = mlb->ml_tail;
1584 		mla->ml_len += mlb->ml_len;
1585 
1586 		ml_init(mlb);
1587 	}
1588 }
1589 
1590 struct mbuf *
1591 ml_dequeue(struct mbuf_list *ml)
1592 {
1593 	struct mbuf *m;
1594 
1595 	m = ml->ml_head;
1596 	if (m != NULL) {
1597 		ml->ml_head = m->m_nextpkt;
1598 		if (ml->ml_head == NULL)
1599 			ml->ml_tail = NULL;
1600 
1601 		m->m_nextpkt = NULL;
1602 		ml->ml_len--;
1603 	}
1604 
1605 	return (m);
1606 }
1607 
1608 struct mbuf *
1609 ml_dechain(struct mbuf_list *ml)
1610 {
1611 	struct mbuf *m0;
1612 
1613 	m0 = ml->ml_head;
1614 
1615 	ml_init(ml);
1616 
1617 	return (m0);
1618 }
1619 
1620 unsigned int
1621 ml_purge(struct mbuf_list *ml)
1622 {
1623 	struct mbuf *m, *n;
1624 	unsigned int len;
1625 
1626 	for (m = ml->ml_head; m != NULL; m = n) {
1627 		n = m->m_nextpkt;
1628 		m_freem(m);
1629 	}
1630 
1631 	len = ml->ml_len;
1632 	ml_init(ml);
1633 
1634 	return (len);
1635 }
1636 
1637 unsigned int
1638 ml_hdatalen(struct mbuf_list *ml)
1639 {
1640 	struct mbuf *m;
1641 
1642 	m = ml->ml_head;
1643 	if (m == NULL)
1644 		return (0);
1645 
1646 	KASSERT(ISSET(m->m_flags, M_PKTHDR));
1647 	return (m->m_pkthdr.len);
1648 }
1649 
1650 /*
1651  * mbuf queues
1652  */
1653 
1654 void
1655 mq_init(struct mbuf_queue *mq, u_int maxlen, int ipl)
1656 {
1657 	mtx_init(&mq->mq_mtx, ipl);
1658 	ml_init(&mq->mq_list);
1659 	mq->mq_maxlen = maxlen;
1660 }
1661 
1662 int
1663 mq_push(struct mbuf_queue *mq, struct mbuf *m)
1664 {
1665 	struct mbuf *dropped = NULL;
1666 
1667 	mtx_enter(&mq->mq_mtx);
1668 	if (mq_len(mq) >= mq->mq_maxlen) {
1669 		mq->mq_drops++;
1670 		dropped = ml_dequeue(&mq->mq_list);
1671 	}
1672 	ml_enqueue(&mq->mq_list, m);
1673 	mtx_leave(&mq->mq_mtx);
1674 
1675 	if (dropped)
1676 		m_freem(dropped);
1677 
1678 	return (dropped != NULL);
1679 }
1680 
1681 int
1682 mq_enqueue(struct mbuf_queue *mq, struct mbuf *m)
1683 {
1684 	int dropped = 0;
1685 
1686 	mtx_enter(&mq->mq_mtx);
1687 	if (mq_len(mq) < mq->mq_maxlen)
1688 		ml_enqueue(&mq->mq_list, m);
1689 	else {
1690 		mq->mq_drops++;
1691 		dropped = 1;
1692 	}
1693 	mtx_leave(&mq->mq_mtx);
1694 
1695 	if (dropped)
1696 		m_freem(m);
1697 
1698 	return (dropped);
1699 }
1700 
1701 struct mbuf *
1702 mq_dequeue(struct mbuf_queue *mq)
1703 {
1704 	struct mbuf *m;
1705 
1706 	mtx_enter(&mq->mq_mtx);
1707 	m = ml_dequeue(&mq->mq_list);
1708 	mtx_leave(&mq->mq_mtx);
1709 
1710 	return (m);
1711 }
1712 
1713 int
1714 mq_enlist(struct mbuf_queue *mq, struct mbuf_list *ml)
1715 {
1716 	struct mbuf *m;
1717 	int dropped = 0;
1718 
1719 	mtx_enter(&mq->mq_mtx);
1720 	if (mq_len(mq) < mq->mq_maxlen)
1721 		ml_enlist(&mq->mq_list, ml);
1722 	else {
1723 		dropped = ml_len(ml);
1724 		mq->mq_drops += dropped;
1725 	}
1726 	mtx_leave(&mq->mq_mtx);
1727 
1728 	if (dropped) {
1729 		while ((m = ml_dequeue(ml)) != NULL)
1730 			m_freem(m);
1731 	}
1732 
1733 	return (dropped);
1734 }
1735 
1736 void
1737 mq_delist(struct mbuf_queue *mq, struct mbuf_list *ml)
1738 {
1739 	mtx_enter(&mq->mq_mtx);
1740 	*ml = mq->mq_list;
1741 	ml_init(&mq->mq_list);
1742 	mtx_leave(&mq->mq_mtx);
1743 }
1744 
1745 struct mbuf *
1746 mq_dechain(struct mbuf_queue *mq)
1747 {
1748 	struct mbuf *m0;
1749 
1750 	mtx_enter(&mq->mq_mtx);
1751 	m0 = ml_dechain(&mq->mq_list);
1752 	mtx_leave(&mq->mq_mtx);
1753 
1754 	return (m0);
1755 }
1756 
1757 unsigned int
1758 mq_purge(struct mbuf_queue *mq)
1759 {
1760 	struct mbuf_list ml;
1761 
1762 	mq_delist(mq, &ml);
1763 
1764 	return (ml_purge(&ml));
1765 }
1766 
1767 unsigned int
1768 mq_hdatalen(struct mbuf_queue *mq)
1769 {
1770 	unsigned int hdatalen;
1771 
1772 	mtx_enter(&mq->mq_mtx);
1773 	hdatalen = ml_hdatalen(&mq->mq_list);
1774 	mtx_leave(&mq->mq_mtx);
1775 
1776 	return (hdatalen);
1777 }
1778 
1779 int
1780 sysctl_mq(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1781     void *newp, size_t newlen, struct mbuf_queue *mq)
1782 {
1783 	unsigned int maxlen;
1784 	int error;
1785 
1786 	/* All sysctl names at this level are terminal. */
1787 	if (namelen != 1)
1788 		return (ENOTDIR);
1789 
1790 	switch (name[0]) {
1791 	case IFQCTL_LEN:
1792 		return (sysctl_rdint(oldp, oldlenp, newp, mq_len(mq)));
1793 	case IFQCTL_MAXLEN:
1794 		maxlen = mq->mq_maxlen;
1795 		error = sysctl_int(oldp, oldlenp, newp, newlen, &maxlen);
1796 		if (!error && maxlen != mq->mq_maxlen) {
1797 			mtx_enter(&mq->mq_mtx);
1798 			mq->mq_maxlen = maxlen;
1799 			mtx_leave(&mq->mq_mtx);
1800 		}
1801 		return (error);
1802 	case IFQCTL_DROPS:
1803 		return (sysctl_rdint(oldp, oldlenp, newp, mq_drops(mq)));
1804 	default:
1805 		return (EOPNOTSUPP);
1806 	}
1807 	/* NOTREACHED */
1808 }
1809