xref: /openbsd/sys/kern/uipc_mbuf.c (revision 00ee6dc4)
1 /*	$OpenBSD: uipc_mbuf.c,v 1.289 2024/02/21 09:28:29 claudio Exp $	*/
2 /*	$NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
33  */
34 
35 /*
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  *	This product includes software developed by the University of
50  *	California, Berkeley and its contributors.
51  *	This product includes software developed at the Information
52  *	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include "pf.h"
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/atomic.h>
80 #include <sys/mbuf.h>
81 #include <sys/pool.h>
82 #include <sys/percpu.h>
83 #include <sys/sysctl.h>
84 
85 #include <sys/socket.h>
86 #include <net/if.h>
87 
88 
89 #include <uvm/uvm_extern.h>
90 
91 #ifdef DDB
92 #include <machine/db_machdep.h>
93 #endif
94 
95 #if NPF > 0
96 #include <net/pfvar.h>
97 #endif	/* NPF > 0 */
98 
99 /* mbuf stats */
100 COUNTERS_BOOT_MEMORY(mbstat_boot, MBSTAT_COUNT);
101 struct cpumem *mbstat = COUNTERS_BOOT_INITIALIZER(mbstat_boot);
102 /* mbuf pools */
103 struct	pool mbpool;
104 struct	pool mtagpool;
105 
106 /* mbuf cluster pools */
107 u_int	mclsizes[MCLPOOLS] = {
108 	MCLBYTES,	/* must be at slot 0 */
109 	MCLBYTES + 2,	/* ETHER_ALIGNED 2k mbufs */
110 	4 * 1024,
111 	8 * 1024,
112 	9 * 1024,
113 	12 * 1024,
114 	16 * 1024,
115 	64 * 1024
116 };
117 static	char mclnames[MCLPOOLS][8];
118 struct	pool mclpools[MCLPOOLS];
119 
120 struct pool *m_clpool(u_int);
121 
122 int max_linkhdr;		/* largest link-level header */
123 int max_protohdr;		/* largest protocol header */
124 int max_hdr;			/* largest link+protocol header */
125 
126 struct	mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
127 
128 void	m_extfree(struct mbuf *);
129 void	m_zero(struct mbuf *);
130 
131 unsigned long mbuf_mem_limit;	/* how much memory can be allocated */
132 unsigned long mbuf_mem_alloc;	/* how much memory has been allocated */
133 
134 void	*m_pool_alloc(struct pool *, int, int *);
135 void	m_pool_free(struct pool *, void *);
136 
137 struct pool_allocator m_pool_allocator = {
138 	m_pool_alloc,
139 	m_pool_free,
140 	0 /* will be copied from pool_allocator_multi */
141 };
142 
143 static void (*mextfree_fns[4])(caddr_t, u_int, void *);
144 static u_int num_extfree_fns;
145 
146 #define M_DATABUF(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \
147 			(m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat)
148 #define M_SIZE(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_size : \
149 			(m)->m_flags & M_PKTHDR ? MHLEN : MLEN)
150 
151 /*
152  * Initialize the mbuf allocator.
153  */
154 void
155 mbinit(void)
156 {
157 	int i, error;
158 	unsigned int lowbits;
159 
160 	CTASSERT(MSIZE == sizeof(struct mbuf));
161 
162 	m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
163 
164 	mbuf_mem_alloc = 0;
165 
166 #if DIAGNOSTIC
167 	if (mclsizes[0] != MCLBYTES)
168 		panic("mbinit: the smallest cluster size != MCLBYTES");
169 	if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES)
170 		panic("mbinit: the largest cluster size != MAXMCLBYTES");
171 #endif
172 
173 	m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
174 
175 	pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
176 	    IPL_NET, 0, "mtagpl", NULL);
177 
178 	for (i = 0; i < nitems(mclsizes); i++) {
179 		lowbits = mclsizes[i] & ((1 << 10) - 1);
180 		if (lowbits) {
181 			snprintf(mclnames[i], sizeof(mclnames[0]),
182 			    "mcl%dk%u", mclsizes[i] >> 10, lowbits);
183 		} else {
184 			snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
185 			    mclsizes[i] >> 10);
186 		}
187 
188 		m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
189 	}
190 
191 	error = nmbclust_update(nmbclust);
192 	KASSERT(error == 0);
193 
194 	(void)mextfree_register(m_extfree_pool);
195 	KASSERT(num_extfree_fns == 1);
196 }
197 
198 void
199 mbcpuinit(void)
200 {
201 	int i;
202 
203 	mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT);
204 
205 	pool_cache_init(&mbpool);
206 	pool_cache_init(&mtagpool);
207 
208 	for (i = 0; i < nitems(mclsizes); i++)
209 		pool_cache_init(&mclpools[i]);
210 }
211 
212 int
213 nmbclust_update(long newval)
214 {
215 	int i;
216 
217 	if (newval <= 0 || newval > LONG_MAX / MCLBYTES)
218 		return ERANGE;
219 	/* update the global mbuf memory limit */
220 	nmbclust = newval;
221 	mbuf_mem_limit = nmbclust * MCLBYTES;
222 
223 	pool_wakeup(&mbpool);
224 	for (i = 0; i < nitems(mclsizes); i++)
225 		pool_wakeup(&mclpools[i]);
226 
227 	return 0;
228 }
229 
230 /*
231  * Space allocation routines.
232  */
233 struct mbuf *
234 m_get(int nowait, int type)
235 {
236 	struct mbuf *m;
237 	struct counters_ref cr;
238 	uint64_t *counters;
239 	int s;
240 
241 	KASSERT(type >= 0 && type < MT_NTYPES);
242 
243 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
244 	if (m == NULL)
245 		return (NULL);
246 
247 	s = splnet();
248 	counters = counters_enter(&cr, mbstat);
249 	counters[type]++;
250 	counters_leave(&cr, mbstat);
251 	splx(s);
252 
253 	m->m_type = type;
254 	m->m_next = NULL;
255 	m->m_nextpkt = NULL;
256 	m->m_data = m->m_dat;
257 	m->m_flags = 0;
258 
259 	return (m);
260 }
261 
262 /*
263  * ATTN: When changing anything here check m_inithdr() and m_defrag() those
264  * may need to change as well.
265  */
266 struct mbuf *
267 m_gethdr(int nowait, int type)
268 {
269 	struct mbuf *m;
270 	struct counters_ref cr;
271 	uint64_t *counters;
272 	int s;
273 
274 	KASSERT(type >= 0 && type < MT_NTYPES);
275 
276 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
277 	if (m == NULL)
278 		return (NULL);
279 
280 	s = splnet();
281 	counters = counters_enter(&cr, mbstat);
282 	counters[type]++;
283 	counters_leave(&cr, mbstat);
284 	splx(s);
285 
286 	m->m_type = type;
287 
288 	return (m_inithdr(m));
289 }
290 
291 struct mbuf *
292 m_inithdr(struct mbuf *m)
293 {
294 	/* keep in sync with m_gethdr */
295 	m->m_next = NULL;
296 	m->m_nextpkt = NULL;
297 	m->m_data = m->m_pktdat;
298 	m->m_flags = M_PKTHDR;
299 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
300 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
301 
302 	return (m);
303 }
304 
305 static inline void
306 m_clearhdr(struct mbuf *m)
307 {
308 	/* delete all mbuf tags to reset the state */
309 	m_tag_delete_chain(m);
310 #if NPF > 0
311 	pf_mbuf_unlink_state_key(m);
312 	pf_mbuf_unlink_inpcb(m);
313 #endif	/* NPF > 0 */
314 
315 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
316 }
317 
318 void
319 m_removehdr(struct mbuf *m)
320 {
321 	KASSERT(m->m_flags & M_PKTHDR);
322 	m_clearhdr(m);
323 	m->m_flags &= ~M_PKTHDR;
324 }
325 
326 void
327 m_resethdr(struct mbuf *m)
328 {
329 	int len = m->m_pkthdr.len;
330 	u_int8_t loopcnt = m->m_pkthdr.ph_loopcnt;
331 
332 	KASSERT(m->m_flags & M_PKTHDR);
333 	m->m_flags &= (M_EXT|M_PKTHDR|M_EOR|M_EXTWR|M_ZEROIZE);
334 	m_clearhdr(m);
335 	/* like m_inithdr(), but keep any associated data and mbufs */
336 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
337 	m->m_pkthdr.len = len;
338 	m->m_pkthdr.ph_loopcnt = loopcnt;
339 }
340 
341 void
342 m_calchdrlen(struct mbuf *m)
343 {
344 	struct mbuf *n;
345 	int plen = 0;
346 
347 	KASSERT(m->m_flags & M_PKTHDR);
348 	for (n = m; n; n = n->m_next)
349 		plen += n->m_len;
350 	m->m_pkthdr.len = plen;
351 }
352 
353 struct mbuf *
354 m_getclr(int nowait, int type)
355 {
356 	struct mbuf *m;
357 
358 	MGET(m, nowait, type);
359 	if (m == NULL)
360 		return (NULL);
361 	memset(mtod(m, caddr_t), 0, MLEN);
362 	return (m);
363 }
364 
365 struct pool *
366 m_clpool(u_int pktlen)
367 {
368 	struct pool *pp;
369 	int pi;
370 
371 	for (pi = 0; pi < nitems(mclpools); pi++) {
372 		pp = &mclpools[pi];
373 		if (pktlen <= pp->pr_size)
374 			return (pp);
375 	}
376 
377 	return (NULL);
378 }
379 
380 struct mbuf *
381 m_clget(struct mbuf *m, int how, u_int pktlen)
382 {
383 	struct mbuf *m0 = NULL;
384 	struct pool *pp;
385 	caddr_t buf;
386 
387 	pp = m_clpool(pktlen);
388 #ifdef DIAGNOSTIC
389 	if (pp == NULL)
390 		panic("m_clget: request for %u byte cluster", pktlen);
391 #endif
392 
393 	if (m == NULL) {
394 		m0 = m_gethdr(how, MT_DATA);
395 		if (m0 == NULL)
396 			return (NULL);
397 
398 		m = m0;
399 	}
400 	buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT);
401 	if (buf == NULL) {
402 		m_freem(m0);
403 		return (NULL);
404 	}
405 
406 	MEXTADD(m, buf, pp->pr_size, M_EXTWR, MEXTFREE_POOL, pp);
407 	return (m);
408 }
409 
410 void
411 m_extfree_pool(caddr_t buf, u_int size, void *pp)
412 {
413 	pool_put(pp, buf);
414 }
415 
416 struct mbuf *
417 m_free(struct mbuf *m)
418 {
419 	struct mbuf *n;
420 	struct counters_ref cr;
421 	uint64_t *counters;
422 	int s;
423 
424 	if (m == NULL)
425 		return (NULL);
426 
427 	s = splnet();
428 	counters = counters_enter(&cr, mbstat);
429 	counters[m->m_type]--;
430 	counters_leave(&cr, mbstat);
431 	splx(s);
432 
433 	n = m->m_next;
434 	if (m->m_flags & M_ZEROIZE) {
435 		m_zero(m);
436 		/* propagate M_ZEROIZE to the next mbuf in the chain */
437 		if (n)
438 			n->m_flags |= M_ZEROIZE;
439 	}
440 	if (m->m_flags & M_PKTHDR) {
441 		m_tag_delete_chain(m);
442 #if NPF > 0
443 		pf_mbuf_unlink_state_key(m);
444 		pf_mbuf_unlink_inpcb(m);
445 #endif	/* NPF > 0 */
446 	}
447 	if (m->m_flags & M_EXT)
448 		m_extfree(m);
449 
450 	pool_put(&mbpool, m);
451 
452 	return (n);
453 }
454 
455 void
456 m_extref(struct mbuf *o, struct mbuf *n)
457 {
458 	int refs = MCLISREFERENCED(o);
459 
460 	n->m_flags |= o->m_flags & (M_EXT|M_EXTWR);
461 
462 	if (refs)
463 		mtx_enter(&m_extref_mtx);
464 	n->m_ext.ext_nextref = o->m_ext.ext_nextref;
465 	n->m_ext.ext_prevref = o;
466 	o->m_ext.ext_nextref = n;
467 	n->m_ext.ext_nextref->m_ext.ext_prevref = n;
468 	if (refs)
469 		mtx_leave(&m_extref_mtx);
470 
471 	MCLREFDEBUGN((n), __FILE__, __LINE__);
472 }
473 
474 static inline u_int
475 m_extunref(struct mbuf *m)
476 {
477 	int refs = 0;
478 
479 	if (!MCLISREFERENCED(m))
480 		return (0);
481 
482 	mtx_enter(&m_extref_mtx);
483 	if (MCLISREFERENCED(m)) {
484 		m->m_ext.ext_nextref->m_ext.ext_prevref =
485 		    m->m_ext.ext_prevref;
486 		m->m_ext.ext_prevref->m_ext.ext_nextref =
487 		    m->m_ext.ext_nextref;
488 		refs = 1;
489 	}
490 	mtx_leave(&m_extref_mtx);
491 
492 	return (refs);
493 }
494 
495 /*
496  * Returns a number for use with MEXTADD.
497  * Should only be called once per function.
498  * Drivers can be assured that the index will be non zero.
499  */
500 u_int
501 mextfree_register(void (*fn)(caddr_t, u_int, void *))
502 {
503 	KASSERT(num_extfree_fns < nitems(mextfree_fns));
504 	mextfree_fns[num_extfree_fns] = fn;
505 	return num_extfree_fns++;
506 }
507 
508 void
509 m_extfree(struct mbuf *m)
510 {
511 	if (m_extunref(m) == 0) {
512 		KASSERT(m->m_ext.ext_free_fn < num_extfree_fns);
513 		mextfree_fns[m->m_ext.ext_free_fn](m->m_ext.ext_buf,
514 		    m->m_ext.ext_size, m->m_ext.ext_arg);
515 	}
516 
517 	m->m_flags &= ~(M_EXT|M_EXTWR);
518 }
519 
520 struct mbuf *
521 m_freem(struct mbuf *m)
522 {
523 	struct mbuf *n;
524 
525 	if (m == NULL)
526 		return (NULL);
527 
528 	n = m->m_nextpkt;
529 
530 	do
531 		m = m_free(m);
532 	while (m != NULL);
533 
534 	return (n);
535 }
536 
537 void
538 m_purge(struct mbuf *m)
539 {
540 	while (m != NULL)
541 		m = m_freem(m);
542 }
543 
544 /*
545  * mbuf chain defragmenter. This function uses some evil tricks to defragment
546  * an mbuf chain into a single buffer without changing the mbuf pointer.
547  * This needs to know a lot of the mbuf internals to make this work.
548  */
549 int
550 m_defrag(struct mbuf *m, int how)
551 {
552 	struct mbuf *m0;
553 	unsigned int adj;
554 
555 	if (m->m_next == NULL)
556 		return (0);
557 
558 	KASSERT(m->m_flags & M_PKTHDR);
559 
560 	adj = mtod(m, unsigned long) & (sizeof(long) - 1);
561 	if ((m0 = m_gethdr(how, m->m_type)) == NULL)
562 		return (ENOBUFS);
563 	if (m->m_pkthdr.len + adj > MHLEN) {
564 		MCLGETL(m0, how, m->m_pkthdr.len + adj);
565 		if (!(m0->m_flags & M_EXT)) {
566 			m_free(m0);
567 			return (ENOBUFS);
568 		}
569 	}
570 	m0->m_data += adj;
571 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
572 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
573 
574 	/* free chain behind and possible ext buf on the first mbuf */
575 	m_freem(m->m_next);
576 	m->m_next = NULL;
577 	if (m->m_flags & M_EXT)
578 		m_extfree(m);
579 
580 	/*
581 	 * Bounce copy mbuf over to the original mbuf and set everything up.
582 	 * This needs to reset or clear all pointers that may go into the
583 	 * original mbuf chain.
584 	 */
585 	if (m0->m_flags & M_EXT) {
586 		memcpy(&m->m_ext, &m0->m_ext, sizeof(struct mbuf_ext));
587 		MCLINITREFERENCE(m);
588 		m->m_flags |= m0->m_flags & (M_EXT|M_EXTWR);
589 		m->m_data = m->m_ext.ext_buf + adj;
590 	} else {
591 		m->m_data = m->m_pktdat + adj;
592 		memcpy(m->m_data, m0->m_data, m0->m_len);
593 	}
594 	m->m_pkthdr.len = m->m_len = m0->m_len;
595 
596 	m0->m_flags &= ~(M_EXT|M_EXTWR);	/* cluster is gone */
597 	m_free(m0);
598 
599 	return (0);
600 }
601 
602 /*
603  * Mbuffer utility routines.
604  */
605 
606 /*
607  * Ensure len bytes of contiguous space at the beginning of the mbuf chain
608  */
609 struct mbuf *
610 m_prepend(struct mbuf *m, int len, int how)
611 {
612 	struct mbuf *mn;
613 
614 	if (len > MHLEN)
615 		panic("mbuf prepend length too big");
616 
617 	if (m_leadingspace(m) >= len) {
618 		m->m_data -= len;
619 		m->m_len += len;
620 	} else {
621 		MGET(mn, how, m->m_type);
622 		if (mn == NULL) {
623 			m_freem(m);
624 			return (NULL);
625 		}
626 		if (m->m_flags & M_PKTHDR)
627 			M_MOVE_PKTHDR(mn, m);
628 		mn->m_next = m;
629 		m = mn;
630 		m_align(m, len);
631 		m->m_len = len;
632 	}
633 	if (m->m_flags & M_PKTHDR)
634 		m->m_pkthdr.len += len;
635 	return (m);
636 }
637 
638 /*
639  * Make a copy of an mbuf chain starting "off" bytes from the beginning,
640  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
641  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
642  */
643 struct mbuf *
644 m_copym(struct mbuf *m0, int off, int len, int wait)
645 {
646 	struct mbuf *m, *n, **np;
647 	struct mbuf *top;
648 	int copyhdr = 0;
649 
650 	if (off < 0 || len < 0)
651 		panic("m_copym0: off %d, len %d", off, len);
652 	if (off == 0 && m0->m_flags & M_PKTHDR)
653 		copyhdr = 1;
654 	if ((m = m_getptr(m0, off, &off)) == NULL)
655 		panic("m_copym0: short mbuf chain");
656 	np = &top;
657 	top = NULL;
658 	while (len > 0) {
659 		if (m == NULL) {
660 			if (len != M_COPYALL)
661 				panic("m_copym0: m == NULL and not COPYALL");
662 			break;
663 		}
664 		MGET(n, wait, m->m_type);
665 		*np = n;
666 		if (n == NULL)
667 			goto nospace;
668 		if (copyhdr) {
669 			if (m_dup_pkthdr(n, m0, wait))
670 				goto nospace;
671 			if (len != M_COPYALL)
672 				n->m_pkthdr.len = len;
673 			copyhdr = 0;
674 		}
675 		n->m_len = min(len, m->m_len - off);
676 		if (m->m_flags & M_EXT) {
677 			n->m_data = m->m_data + off;
678 			n->m_ext = m->m_ext;
679 			MCLADDREFERENCE(m, n);
680 		} else {
681 			n->m_data += m->m_data -
682 			    (m->m_flags & M_PKTHDR ? m->m_pktdat : m->m_dat);
683 			n->m_data += off;
684 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
685 			    n->m_len);
686 		}
687 		if (len != M_COPYALL)
688 			len -= n->m_len;
689 		off += n->m_len;
690 #ifdef DIAGNOSTIC
691 		if (off > m->m_len)
692 			panic("m_copym0 overrun");
693 #endif
694 		if (off == m->m_len) {
695 			m = m->m_next;
696 			off = 0;
697 		}
698 		np = &n->m_next;
699 	}
700 	return (top);
701 nospace:
702 	m_freem(top);
703 	return (NULL);
704 }
705 
706 /*
707  * Copy data from an mbuf chain starting "off" bytes from the beginning,
708  * continuing for "len" bytes, into the indicated buffer.
709  */
710 void
711 m_copydata(struct mbuf *m, int off, int len, void *p)
712 {
713 	caddr_t cp = p;
714 	unsigned count;
715 
716 	if (off < 0)
717 		panic("m_copydata: off %d < 0", off);
718 	if (len < 0)
719 		panic("m_copydata: len %d < 0", len);
720 	if ((m = m_getptr(m, off, &off)) == NULL)
721 		panic("m_copydata: short mbuf chain");
722 	while (len > 0) {
723 		if (m == NULL)
724 			panic("m_copydata: null mbuf");
725 		count = min(m->m_len - off, len);
726 		memmove(cp, mtod(m, caddr_t) + off, count);
727 		len -= count;
728 		cp += count;
729 		off = 0;
730 		m = m->m_next;
731 	}
732 }
733 
734 /*
735  * Copy data from a buffer back into the indicated mbuf chain,
736  * starting "off" bytes from the beginning, extending the mbuf
737  * chain if necessary. The mbuf needs to be properly initialized
738  * including the setting of m_len.
739  */
740 int
741 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait)
742 {
743 	int mlen, totlen = 0;
744 	struct mbuf *m = m0, *n;
745 	caddr_t cp = (caddr_t)_cp;
746 	int error = 0;
747 
748 	if (m0 == NULL)
749 		return (0);
750 	while (off > (mlen = m->m_len)) {
751 		off -= mlen;
752 		totlen += mlen;
753 		if (m->m_next == NULL) {
754 			if ((n = m_get(wait, m->m_type)) == NULL) {
755 				error = ENOBUFS;
756 				goto out;
757 			}
758 
759 			if (off + len > MLEN) {
760 				MCLGETL(n, wait, off + len);
761 				if (!(n->m_flags & M_EXT)) {
762 					m_free(n);
763 					error = ENOBUFS;
764 					goto out;
765 				}
766 			}
767 			memset(mtod(n, caddr_t), 0, off);
768 			n->m_len = len + off;
769 			m->m_next = n;
770 		}
771 		m = m->m_next;
772 	}
773 	while (len > 0) {
774 		/* extend last packet to be filled fully */
775 		if (m->m_next == NULL && (len > m->m_len - off))
776 			m->m_len += min(len - (m->m_len - off),
777 			    m_trailingspace(m));
778 		mlen = min(m->m_len - off, len);
779 		memmove(mtod(m, caddr_t) + off, cp, mlen);
780 		cp += mlen;
781 		len -= mlen;
782 		totlen += mlen + off;
783 		if (len == 0)
784 			break;
785 		off = 0;
786 
787 		if (m->m_next == NULL) {
788 			if ((n = m_get(wait, m->m_type)) == NULL) {
789 				error = ENOBUFS;
790 				goto out;
791 			}
792 
793 			if (len > MLEN) {
794 				MCLGETL(n, wait, len);
795 				if (!(n->m_flags & M_EXT)) {
796 					m_free(n);
797 					error = ENOBUFS;
798 					goto out;
799 				}
800 			}
801 			n->m_len = len;
802 			m->m_next = n;
803 		}
804 		m = m->m_next;
805 	}
806 out:
807 	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
808 		m->m_pkthdr.len = totlen;
809 
810 	return (error);
811 }
812 
813 /*
814  * Concatenate mbuf chain n to m.
815  * n might be copied into m (when n->m_len is small), therefore data portion of
816  * n could be copied into an mbuf of different mbuf type.
817  * Therefore both chains should be of the same type (e.g. MT_DATA).
818  * Any m_pkthdr is not updated.
819  */
820 void
821 m_cat(struct mbuf *m, struct mbuf *n)
822 {
823 	while (m->m_next)
824 		m = m->m_next;
825 	while (n) {
826 		if (M_READONLY(m) || n->m_len > m_trailingspace(m)) {
827 			/* just join the two chains */
828 			m->m_next = n;
829 			return;
830 		}
831 		/* splat the data from one into the other */
832 		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
833 		    n->m_len);
834 		m->m_len += n->m_len;
835 		n = m_free(n);
836 	}
837 }
838 
839 void
840 m_adj(struct mbuf *mp, int req_len)
841 {
842 	int len = req_len;
843 	struct mbuf *m;
844 	int count;
845 
846 	if (mp == NULL)
847 		return;
848 	if (len >= 0) {
849 		/*
850 		 * Trim from head.
851 		 */
852 		m = mp;
853 		while (m != NULL && len > 0) {
854 			if (m->m_len <= len) {
855 				len -= m->m_len;
856 				m->m_data += m->m_len;
857 				m->m_len = 0;
858 				m = m->m_next;
859 			} else {
860 				m->m_data += len;
861 				m->m_len -= len;
862 				len = 0;
863 			}
864 		}
865 		if (mp->m_flags & M_PKTHDR)
866 			mp->m_pkthdr.len -= (req_len - len);
867 	} else {
868 		/*
869 		 * Trim from tail.  Scan the mbuf chain,
870 		 * calculating its length and finding the last mbuf.
871 		 * If the adjustment only affects this mbuf, then just
872 		 * adjust and return.  Otherwise, rescan and truncate
873 		 * after the remaining size.
874 		 */
875 		len = -len;
876 		count = 0;
877 		m = mp;
878 		for (;;) {
879 			count += m->m_len;
880 			if (m->m_next == NULL)
881 				break;
882 			m = m->m_next;
883 		}
884 		if (m->m_len >= len) {
885 			m->m_len -= len;
886 			if (mp->m_flags & M_PKTHDR)
887 				mp->m_pkthdr.len -= len;
888 			return;
889 		}
890 		count -= len;
891 		if (count < 0)
892 			count = 0;
893 		/*
894 		 * Correct length for chain is "count".
895 		 * Find the mbuf with last data, adjust its length,
896 		 * and toss data from remaining mbufs on chain.
897 		 */
898 		if (mp->m_flags & M_PKTHDR)
899 			mp->m_pkthdr.len = count;
900 		m = mp;
901 		for (;;) {
902 			if (m->m_len >= count) {
903 				m->m_len = count;
904 				break;
905 			}
906 			count -= m->m_len;
907 			m = m->m_next;
908 		}
909 		while ((m = m->m_next) != NULL)
910 			m->m_len = 0;
911 	}
912 }
913 
914 /*
915  * Rearrange an mbuf chain so that len bytes are contiguous
916  * and in the data area of an mbuf (so that mtod will work
917  * for a structure of size len).  Returns the resulting
918  * mbuf chain on success, frees it and returns null on failure.
919  */
920 struct mbuf *
921 m_pullup(struct mbuf *m0, int len)
922 {
923 	struct mbuf *m;
924 	unsigned int adj;
925 	caddr_t head, tail;
926 	unsigned int space;
927 
928 	/* if len is already contig in m0, then don't do any work */
929 	if (len <= m0->m_len)
930 		return (m0);
931 
932 	/* look for some data */
933 	m = m0->m_next;
934 	if (m == NULL)
935 		goto freem0;
936 
937 	head = M_DATABUF(m0);
938 	if (m0->m_len == 0) {
939 		while (m->m_len == 0) {
940 			m = m_free(m);
941 			if (m == NULL)
942 				goto freem0;
943 		}
944 
945 		adj = mtod(m, unsigned long) & (sizeof(long) - 1);
946 	} else
947 		adj = mtod(m0, unsigned long) & (sizeof(long) - 1);
948 
949 	tail = head + M_SIZE(m0);
950 	head += adj;
951 
952 	if (!M_READONLY(m0) && len <= tail - head) {
953 		/* we can copy everything into the first mbuf */
954 		if (m0->m_len == 0) {
955 			m0->m_data = head;
956 		} else if (len > tail - mtod(m0, caddr_t)) {
957 			/* need to memmove to make space at the end */
958 			memmove(head, mtod(m0, caddr_t), m0->m_len);
959 			m0->m_data = head;
960 		}
961 
962 		len -= m0->m_len;
963 	} else {
964 		/* the first mbuf is too small or read-only, make a new one */
965 		space = adj + len;
966 
967 		if (space > MAXMCLBYTES)
968 			goto bad;
969 
970 		m0->m_next = m;
971 		m = m0;
972 
973 		MGET(m0, M_DONTWAIT, m->m_type);
974 		if (m0 == NULL)
975 			goto bad;
976 
977 		if (space > MHLEN) {
978 			MCLGETL(m0, M_DONTWAIT, space);
979 			if ((m0->m_flags & M_EXT) == 0)
980 				goto bad;
981 		}
982 
983 		if (m->m_flags & M_PKTHDR)
984 			M_MOVE_PKTHDR(m0, m);
985 
986 		m0->m_len = 0;
987 		m0->m_data += adj;
988 	}
989 
990 	KDASSERT(m_trailingspace(m0) >= len);
991 
992 	for (;;) {
993 		space = min(len, m->m_len);
994 		memcpy(mtod(m0, caddr_t) + m0->m_len, mtod(m, caddr_t), space);
995 		len -= space;
996 		m0->m_len += space;
997 		m->m_len -= space;
998 
999 		if (m->m_len > 0)
1000 			m->m_data += space;
1001 		else
1002 			m = m_free(m);
1003 
1004 		if (len == 0)
1005 			break;
1006 
1007 		if (m == NULL)
1008 			goto bad;
1009 	}
1010 
1011 	m0->m_next = m; /* link the chain back up */
1012 
1013 	return (m0);
1014 
1015 bad:
1016 	m_freem(m);
1017 freem0:
1018 	m_free(m0);
1019 	return (NULL);
1020 }
1021 
1022 /*
1023  * Return a pointer to mbuf/offset of location in mbuf chain.
1024  */
1025 struct mbuf *
1026 m_getptr(struct mbuf *m, int loc, int *off)
1027 {
1028 	while (loc >= 0) {
1029 		/* Normal end of search */
1030 		if (m->m_len > loc) {
1031 			*off = loc;
1032 			return (m);
1033 		} else {
1034 			loc -= m->m_len;
1035 
1036 			if (m->m_next == NULL) {
1037 				if (loc == 0) {
1038 					/* Point at the end of valid data */
1039 					*off = m->m_len;
1040 					return (m);
1041 				} else {
1042 					return (NULL);
1043 				}
1044 			} else {
1045 				m = m->m_next;
1046 			}
1047 		}
1048 	}
1049 
1050 	return (NULL);
1051 }
1052 
1053 /*
1054  * Partition an mbuf chain in two pieces, returning the tail --
1055  * all but the first len0 bytes.  In case of failure, it returns NULL and
1056  * attempts to restore the chain to its original state.
1057  */
1058 struct mbuf *
1059 m_split(struct mbuf *m0, int len0, int wait)
1060 {
1061 	struct mbuf *m, *n;
1062 	unsigned len = len0, remain, olen;
1063 
1064 	for (m = m0; m && len > m->m_len; m = m->m_next)
1065 		len -= m->m_len;
1066 	if (m == NULL)
1067 		return (NULL);
1068 	remain = m->m_len - len;
1069 	if (m0->m_flags & M_PKTHDR) {
1070 		MGETHDR(n, wait, m0->m_type);
1071 		if (n == NULL)
1072 			return (NULL);
1073 		if (m_dup_pkthdr(n, m0, wait)) {
1074 			m_freem(n);
1075 			return (NULL);
1076 		}
1077 		n->m_pkthdr.len -= len0;
1078 		olen = m0->m_pkthdr.len;
1079 		m0->m_pkthdr.len = len0;
1080 		if (remain == 0) {
1081 			n->m_next = m->m_next;
1082 			m->m_next = NULL;
1083 			n->m_len = 0;
1084 			return (n);
1085 		}
1086 		if ((m->m_flags & M_EXT) == 0 && remain > MHLEN) {
1087 			/* m can't be the lead packet */
1088 			m_align(n, 0);
1089 			n->m_next = m_split(m, len, wait);
1090 			if (n->m_next == NULL) {
1091 				(void) m_free(n);
1092 				m0->m_pkthdr.len = olen;
1093 				return (NULL);
1094 			} else {
1095 				n->m_len = 0;
1096 				return (n);
1097 			}
1098 		}
1099 	} else if (remain == 0) {
1100 		n = m->m_next;
1101 		m->m_next = NULL;
1102 		return (n);
1103 	} else {
1104 		MGET(n, wait, m->m_type);
1105 		if (n == NULL)
1106 			return (NULL);
1107 	}
1108 	if (m->m_flags & M_EXT) {
1109 		n->m_ext = m->m_ext;
1110 		MCLADDREFERENCE(m, n);
1111 		n->m_data = m->m_data + len;
1112 	} else {
1113 		m_align(n, remain);
1114 		memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
1115 	}
1116 	n->m_len = remain;
1117 	m->m_len = len;
1118 	n->m_next = m->m_next;
1119 	m->m_next = NULL;
1120 	return (n);
1121 }
1122 
1123 /*
1124  * Make space for a new header of length hlen at skip bytes
1125  * into the packet.  When doing this we allocate new mbufs only
1126  * when absolutely necessary.  The mbuf where the new header
1127  * is to go is returned together with an offset into the mbuf.
1128  * If NULL is returned then the mbuf chain may have been modified;
1129  * the caller is assumed to always free the chain.
1130  */
1131 struct mbuf *
1132 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
1133 {
1134 	struct mbuf *m;
1135 	unsigned remain;
1136 
1137 	KASSERT(m0->m_flags & M_PKTHDR);
1138 	/*
1139 	 * Limit the size of the new header to MHLEN. In case
1140 	 * skip = 0 and the first buffer is not a cluster this
1141 	 * is the maximum space available in that mbuf.
1142 	 * In other words this code never prepends a mbuf.
1143 	 */
1144 	KASSERT(hlen < MHLEN);
1145 
1146 	for (m = m0; m && skip > m->m_len; m = m->m_next)
1147 		skip -= m->m_len;
1148 	if (m == NULL)
1149 		return (NULL);
1150 	/*
1151 	 * At this point skip is the offset into the mbuf m
1152 	 * where the new header should be placed.  Figure out
1153 	 * if there's space to insert the new header.  If so,
1154 	 * and copying the remainder makes sense then do so.
1155 	 * Otherwise insert a new mbuf in the chain, splitting
1156 	 * the contents of m as needed.
1157 	 */
1158 	remain = m->m_len - skip;		/* data to move */
1159 	if (skip < remain && hlen <= m_leadingspace(m)) {
1160 		if (skip)
1161 			memmove(m->m_data-hlen, m->m_data, skip);
1162 		m->m_data -= hlen;
1163 		m->m_len += hlen;
1164 		*off = skip;
1165 	} else if (hlen > m_trailingspace(m)) {
1166 		struct mbuf *n;
1167 
1168 		if (remain > 0) {
1169 			MGET(n, M_DONTWAIT, m->m_type);
1170 			if (n && remain > MLEN) {
1171 				MCLGETL(n, M_DONTWAIT, remain);
1172 				if ((n->m_flags & M_EXT) == 0) {
1173 					m_free(n);
1174 					n = NULL;
1175 				}
1176 			}
1177 			if (n == NULL)
1178 				return (NULL);
1179 
1180 			memcpy(n->m_data, mtod(m, char *) + skip, remain);
1181 			n->m_len = remain;
1182 			m->m_len -= remain;
1183 
1184 			n->m_next = m->m_next;
1185 			m->m_next = n;
1186 		}
1187 
1188 		if (hlen <= m_trailingspace(m)) {
1189 			m->m_len += hlen;
1190 			*off = skip;
1191 		} else {
1192 			n = m_get(M_DONTWAIT, m->m_type);
1193 			if (n == NULL)
1194 				return NULL;
1195 
1196 			n->m_len = hlen;
1197 
1198 			n->m_next = m->m_next;
1199 			m->m_next = n;
1200 
1201 			*off = 0;	/* header is at front ... */
1202 			m = n;		/* ... of new mbuf */
1203 		}
1204 	} else {
1205 		/*
1206 		 * Copy the remainder to the back of the mbuf
1207 		 * so there's space to write the new header.
1208 		 */
1209 		if (remain > 0)
1210 			memmove(mtod(m, caddr_t) + skip + hlen,
1211 			      mtod(m, caddr_t) + skip, remain);
1212 		m->m_len += hlen;
1213 		*off = skip;
1214 	}
1215 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
1216 	return m;
1217 }
1218 
1219 
1220 /*
1221  * Routine to copy from device local memory into mbufs.
1222  */
1223 struct mbuf *
1224 m_devget(char *buf, int totlen, int off)
1225 {
1226 	struct mbuf	*m;
1227 	struct mbuf	*top, **mp;
1228 	int		 len;
1229 
1230 	top = NULL;
1231 	mp = &top;
1232 
1233 	if (off < 0 || off > MHLEN)
1234 		return (NULL);
1235 
1236 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1237 	if (m == NULL)
1238 		return (NULL);
1239 
1240 	m->m_pkthdr.len = totlen;
1241 
1242 	len = MHLEN;
1243 
1244 	while (totlen > 0) {
1245 		if (top != NULL) {
1246 			MGET(m, M_DONTWAIT, MT_DATA);
1247 			if (m == NULL) {
1248 				/*
1249 				 * As we might get called by pfkey, make sure
1250 				 * we do not leak sensitive data.
1251 				 */
1252 				top->m_flags |= M_ZEROIZE;
1253 				m_freem(top);
1254 				return (NULL);
1255 			}
1256 			len = MLEN;
1257 		}
1258 
1259 		if (totlen + off >= MINCLSIZE) {
1260 			MCLGET(m, M_DONTWAIT);
1261 			if (m->m_flags & M_EXT)
1262 				len = MCLBYTES;
1263 		} else {
1264 			/* Place initial small packet/header at end of mbuf. */
1265 			if (top == NULL && totlen + off + max_linkhdr <= len) {
1266 				m->m_data += max_linkhdr;
1267 				len -= max_linkhdr;
1268 			}
1269 		}
1270 
1271 		if (off) {
1272 			m->m_data += off;
1273 			len -= off;
1274 			off = 0;
1275 		}
1276 
1277 		m->m_len = len = min(totlen, len);
1278 		memcpy(mtod(m, void *), buf, (size_t)len);
1279 
1280 		buf += len;
1281 		*mp = m;
1282 		mp = &m->m_next;
1283 		totlen -= len;
1284 	}
1285 	return (top);
1286 }
1287 
1288 void
1289 m_zero(struct mbuf *m)
1290 {
1291 	if (M_READONLY(m)) {
1292 		mtx_enter(&m_extref_mtx);
1293 		if ((m->m_flags & M_EXT) && MCLISREFERENCED(m)) {
1294 			m->m_ext.ext_nextref->m_flags |= M_ZEROIZE;
1295 			m->m_ext.ext_prevref->m_flags |= M_ZEROIZE;
1296 		}
1297 		mtx_leave(&m_extref_mtx);
1298 		return;
1299 	}
1300 
1301 	explicit_bzero(M_DATABUF(m), M_SIZE(m));
1302 }
1303 
1304 /*
1305  * Apply function f to the data in an mbuf chain starting "off" bytes from the
1306  * beginning, continuing for "len" bytes.
1307  */
1308 int
1309 m_apply(struct mbuf *m, int off, int len,
1310     int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate)
1311 {
1312 	int rval;
1313 	unsigned int count;
1314 
1315 	if (len < 0)
1316 		panic("m_apply: len %d < 0", len);
1317 	if (off < 0)
1318 		panic("m_apply: off %d < 0", off);
1319 	while (off > 0) {
1320 		if (m == NULL)
1321 			panic("m_apply: null mbuf in skip");
1322 		if (off < m->m_len)
1323 			break;
1324 		off -= m->m_len;
1325 		m = m->m_next;
1326 	}
1327 	while (len > 0) {
1328 		if (m == NULL)
1329 			panic("m_apply: null mbuf");
1330 		count = min(m->m_len - off, len);
1331 
1332 		rval = f(fstate, mtod(m, caddr_t) + off, count);
1333 		if (rval)
1334 			return (rval);
1335 
1336 		len -= count;
1337 		off = 0;
1338 		m = m->m_next;
1339 	}
1340 
1341 	return (0);
1342 }
1343 
1344 /*
1345  * Compute the amount of space available before the current start of data
1346  * in an mbuf. Read-only clusters never have space available.
1347  */
1348 int
1349 m_leadingspace(struct mbuf *m)
1350 {
1351 	if (M_READONLY(m))
1352 		return 0;
1353 	KASSERT(m->m_data >= M_DATABUF(m));
1354 	return m->m_data - M_DATABUF(m);
1355 }
1356 
1357 /*
1358  * Compute the amount of space available after the end of data in an mbuf.
1359  * Read-only clusters never have space available.
1360  */
1361 int
1362 m_trailingspace(struct mbuf *m)
1363 {
1364 	if (M_READONLY(m))
1365 		return 0;
1366 	KASSERT(M_DATABUF(m) + M_SIZE(m) >= (m->m_data + m->m_len));
1367 	return M_DATABUF(m) + M_SIZE(m) - (m->m_data + m->m_len);
1368 }
1369 
1370 /*
1371  * Set the m_data pointer of a newly-allocated mbuf to place an object of
1372  * the specified size at the end of the mbuf, longword aligned.
1373  */
1374 void
1375 m_align(struct mbuf *m, int len)
1376 {
1377 	KASSERT(len >= 0 && !M_READONLY(m));
1378 	KASSERT(m->m_data == M_DATABUF(m));	/* newly-allocated check */
1379 	KASSERT(((len + sizeof(long) - 1) &~ (sizeof(long) - 1)) <= M_SIZE(m));
1380 
1381 	m->m_data = M_DATABUF(m) + ((M_SIZE(m) - (len)) &~ (sizeof(long) - 1));
1382 }
1383 
1384 /*
1385  * Duplicate mbuf pkthdr from from to to.
1386  * from must have M_PKTHDR set, and to must be empty.
1387  */
1388 int
1389 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait)
1390 {
1391 	int error;
1392 
1393 	KASSERT(from->m_flags & M_PKTHDR);
1394 
1395 	to->m_flags = (to->m_flags & (M_EXT | M_EXTWR));
1396 	to->m_flags |= (from->m_flags & M_COPYFLAGS);
1397 	to->m_pkthdr = from->m_pkthdr;
1398 
1399 #if NPF > 0
1400 	to->m_pkthdr.pf.statekey = NULL;
1401 	pf_mbuf_link_state_key(to, from->m_pkthdr.pf.statekey);
1402 	to->m_pkthdr.pf.inp = NULL;
1403 	pf_mbuf_link_inpcb(to, from->m_pkthdr.pf.inp);
1404 #endif	/* NPF > 0 */
1405 
1406 	SLIST_INIT(&to->m_pkthdr.ph_tags);
1407 
1408 	if ((error = m_tag_copy_chain(to, from, wait)) != 0)
1409 		return (error);
1410 
1411 	if ((to->m_flags & M_EXT) == 0)
1412 		to->m_data = to->m_pktdat;
1413 
1414 	return (0);
1415 }
1416 
1417 struct mbuf *
1418 m_dup_pkt(struct mbuf *m0, unsigned int adj, int wait)
1419 {
1420 	struct mbuf *m;
1421 	int len;
1422 
1423 	KASSERT(m0->m_flags & M_PKTHDR);
1424 
1425 	len = m0->m_pkthdr.len + adj;
1426 	if (len > MAXMCLBYTES) /* XXX */
1427 		return (NULL);
1428 
1429 	m = m_get(wait, m0->m_type);
1430 	if (m == NULL)
1431 		return (NULL);
1432 
1433 	if (m_dup_pkthdr(m, m0, wait) != 0)
1434 		goto fail;
1435 
1436 	if (len > MHLEN) {
1437 		MCLGETL(m, wait, len);
1438 		if (!ISSET(m->m_flags, M_EXT))
1439 			goto fail;
1440 	}
1441 
1442 	m->m_len = m->m_pkthdr.len = len;
1443 	m_adj(m, adj);
1444 	m_copydata(m0, 0, m0->m_pkthdr.len, mtod(m, caddr_t));
1445 
1446 	return (m);
1447 
1448 fail:
1449 	m_freem(m);
1450 	return (NULL);
1451 }
1452 
1453 void
1454 m_microtime(const struct mbuf *m, struct timeval *tv)
1455 {
1456 	if (ISSET(m->m_pkthdr.csum_flags, M_TIMESTAMP)) {
1457 		struct timeval btv, utv;
1458 
1459 		NSEC_TO_TIMEVAL(m->m_pkthdr.ph_timestamp, &utv);
1460 		microboottime(&btv);
1461 		timeradd(&btv, &utv, tv);
1462 	} else
1463 		microtime(tv);
1464 }
1465 
1466 void *
1467 m_pool_alloc(struct pool *pp, int flags, int *slowdown)
1468 {
1469 	void *v;
1470 
1471 	if (atomic_add_long_nv(&mbuf_mem_alloc, pp->pr_pgsize) > mbuf_mem_limit)
1472 		goto fail;
1473 
1474 	v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
1475 	if (v != NULL)
1476 		return (v);
1477 
1478  fail:
1479 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1480 	return (NULL);
1481 }
1482 
1483 void
1484 m_pool_free(struct pool *pp, void *v)
1485 {
1486 	(*pool_allocator_multi.pa_free)(pp, v);
1487 
1488 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1489 }
1490 
1491 void
1492 m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
1493 {
1494 	pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
1495 	pool_set_constraints(pp, &kp_dma_contig);
1496 }
1497 
1498 u_int
1499 m_pool_used(void)
1500 {
1501 	return ((mbuf_mem_alloc * 100) / mbuf_mem_limit);
1502 }
1503 
1504 #ifdef DDB
1505 void
1506 m_print(void *v,
1507     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1508 {
1509 	struct mbuf *m = v;
1510 
1511 	(*pr)("mbuf %p\n", m);
1512 	(*pr)("m_type: %i\tm_flags: %b\n", m->m_type, m->m_flags, M_BITS);
1513 	(*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt);
1514 	(*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len);
1515 	(*pr)("m_dat: %p\tm_pktdat: %p\n", m->m_dat, m->m_pktdat);
1516 	if (m->m_flags & M_PKTHDR) {
1517 		(*pr)("m_ptkhdr.ph_ifidx: %u\tm_pkthdr.len: %i\n",
1518 		    m->m_pkthdr.ph_ifidx, m->m_pkthdr.len);
1519 		(*pr)("m_ptkhdr.ph_tags: %p\tm_pkthdr.ph_tagsset: %b\n",
1520 		    SLIST_FIRST(&m->m_pkthdr.ph_tags),
1521 		    m->m_pkthdr.ph_tagsset, MTAG_BITS);
1522 		(*pr)("m_pkthdr.ph_flowid: %u\tm_pkthdr.ph_loopcnt: %u\n",
1523 		    m->m_pkthdr.ph_flowid, m->m_pkthdr.ph_loopcnt);
1524 		(*pr)("m_pkthdr.csum_flags: %b\n",
1525 		    m->m_pkthdr.csum_flags, MCS_BITS);
1526 		(*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n",
1527 		    m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid);
1528 		(*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n",
1529 		    m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp);
1530 		(*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n",
1531 		    m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag);
1532 		(*pr)("m_pkthdr.pf.flags: %b\n",
1533 		    m->m_pkthdr.pf.flags, MPF_BITS);
1534 		(*pr)("m_pkthdr.pf.routed: %u\tm_pkthdr.pf.prio: %u\n",
1535 		    m->m_pkthdr.pf.routed, m->m_pkthdr.pf.prio);
1536 	}
1537 	if (m->m_flags & M_EXT) {
1538 		(*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n",
1539 		    m->m_ext.ext_buf, m->m_ext.ext_size);
1540 		(*pr)("m_ext.ext_free_fn: %u\tm_ext.ext_arg: %p\n",
1541 		    m->m_ext.ext_free_fn, m->m_ext.ext_arg);
1542 		(*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
1543 		    m->m_ext.ext_nextref, m->m_ext.ext_prevref);
1544 
1545 	}
1546 }
1547 #endif
1548 
1549 /*
1550  * mbuf lists
1551  */
1552 
1553 void
1554 ml_init(struct mbuf_list *ml)
1555 {
1556 	ml->ml_head = ml->ml_tail = NULL;
1557 	ml->ml_len = 0;
1558 }
1559 
1560 void
1561 ml_enqueue(struct mbuf_list *ml, struct mbuf *m)
1562 {
1563 	if (ml->ml_tail == NULL)
1564 		ml->ml_head = ml->ml_tail = m;
1565 	else {
1566 		ml->ml_tail->m_nextpkt = m;
1567 		ml->ml_tail = m;
1568 	}
1569 
1570 	m->m_nextpkt = NULL;
1571 	ml->ml_len++;
1572 }
1573 
1574 void
1575 ml_enlist(struct mbuf_list *mla, struct mbuf_list *mlb)
1576 {
1577 	if (!ml_empty(mlb)) {
1578 		if (ml_empty(mla))
1579 			mla->ml_head = mlb->ml_head;
1580 		else
1581 			mla->ml_tail->m_nextpkt = mlb->ml_head;
1582 		mla->ml_tail = mlb->ml_tail;
1583 		mla->ml_len += mlb->ml_len;
1584 
1585 		ml_init(mlb);
1586 	}
1587 }
1588 
1589 struct mbuf *
1590 ml_dequeue(struct mbuf_list *ml)
1591 {
1592 	struct mbuf *m;
1593 
1594 	m = ml->ml_head;
1595 	if (m != NULL) {
1596 		ml->ml_head = m->m_nextpkt;
1597 		if (ml->ml_head == NULL)
1598 			ml->ml_tail = NULL;
1599 
1600 		m->m_nextpkt = NULL;
1601 		ml->ml_len--;
1602 	}
1603 
1604 	return (m);
1605 }
1606 
1607 struct mbuf *
1608 ml_dechain(struct mbuf_list *ml)
1609 {
1610 	struct mbuf *m0;
1611 
1612 	m0 = ml->ml_head;
1613 
1614 	ml_init(ml);
1615 
1616 	return (m0);
1617 }
1618 
1619 unsigned int
1620 ml_purge(struct mbuf_list *ml)
1621 {
1622 	struct mbuf *m, *n;
1623 	unsigned int len;
1624 
1625 	for (m = ml->ml_head; m != NULL; m = n) {
1626 		n = m->m_nextpkt;
1627 		m_freem(m);
1628 	}
1629 
1630 	len = ml->ml_len;
1631 	ml_init(ml);
1632 
1633 	return (len);
1634 }
1635 
1636 unsigned int
1637 ml_hdatalen(struct mbuf_list *ml)
1638 {
1639 	struct mbuf *m;
1640 
1641 	m = ml->ml_head;
1642 	if (m == NULL)
1643 		return (0);
1644 
1645 	KASSERT(ISSET(m->m_flags, M_PKTHDR));
1646 	return (m->m_pkthdr.len);
1647 }
1648 
1649 /*
1650  * mbuf queues
1651  */
1652 
1653 void
1654 mq_init(struct mbuf_queue *mq, u_int maxlen, int ipl)
1655 {
1656 	mtx_init(&mq->mq_mtx, ipl);
1657 	ml_init(&mq->mq_list);
1658 	mq->mq_maxlen = maxlen;
1659 }
1660 
1661 int
1662 mq_push(struct mbuf_queue *mq, struct mbuf *m)
1663 {
1664 	struct mbuf *dropped = NULL;
1665 
1666 	mtx_enter(&mq->mq_mtx);
1667 	if (mq_len(mq) >= mq->mq_maxlen) {
1668 		mq->mq_drops++;
1669 		dropped = ml_dequeue(&mq->mq_list);
1670 	}
1671 	ml_enqueue(&mq->mq_list, m);
1672 	mtx_leave(&mq->mq_mtx);
1673 
1674 	if (dropped)
1675 		m_freem(dropped);
1676 
1677 	return (dropped != NULL);
1678 }
1679 
1680 int
1681 mq_enqueue(struct mbuf_queue *mq, struct mbuf *m)
1682 {
1683 	int dropped = 0;
1684 
1685 	mtx_enter(&mq->mq_mtx);
1686 	if (mq_len(mq) < mq->mq_maxlen)
1687 		ml_enqueue(&mq->mq_list, m);
1688 	else {
1689 		mq->mq_drops++;
1690 		dropped = 1;
1691 	}
1692 	mtx_leave(&mq->mq_mtx);
1693 
1694 	if (dropped)
1695 		m_freem(m);
1696 
1697 	return (dropped);
1698 }
1699 
1700 struct mbuf *
1701 mq_dequeue(struct mbuf_queue *mq)
1702 {
1703 	struct mbuf *m;
1704 
1705 	mtx_enter(&mq->mq_mtx);
1706 	m = ml_dequeue(&mq->mq_list);
1707 	mtx_leave(&mq->mq_mtx);
1708 
1709 	return (m);
1710 }
1711 
1712 int
1713 mq_enlist(struct mbuf_queue *mq, struct mbuf_list *ml)
1714 {
1715 	struct mbuf *m;
1716 	int dropped = 0;
1717 
1718 	mtx_enter(&mq->mq_mtx);
1719 	if (mq_len(mq) < mq->mq_maxlen)
1720 		ml_enlist(&mq->mq_list, ml);
1721 	else {
1722 		dropped = ml_len(ml);
1723 		mq->mq_drops += dropped;
1724 	}
1725 	mtx_leave(&mq->mq_mtx);
1726 
1727 	if (dropped) {
1728 		while ((m = ml_dequeue(ml)) != NULL)
1729 			m_freem(m);
1730 	}
1731 
1732 	return (dropped);
1733 }
1734 
1735 void
1736 mq_delist(struct mbuf_queue *mq, struct mbuf_list *ml)
1737 {
1738 	mtx_enter(&mq->mq_mtx);
1739 	*ml = mq->mq_list;
1740 	ml_init(&mq->mq_list);
1741 	mtx_leave(&mq->mq_mtx);
1742 }
1743 
1744 struct mbuf *
1745 mq_dechain(struct mbuf_queue *mq)
1746 {
1747 	struct mbuf *m0;
1748 
1749 	mtx_enter(&mq->mq_mtx);
1750 	m0 = ml_dechain(&mq->mq_list);
1751 	mtx_leave(&mq->mq_mtx);
1752 
1753 	return (m0);
1754 }
1755 
1756 unsigned int
1757 mq_purge(struct mbuf_queue *mq)
1758 {
1759 	struct mbuf_list ml;
1760 
1761 	mq_delist(mq, &ml);
1762 
1763 	return (ml_purge(&ml));
1764 }
1765 
1766 unsigned int
1767 mq_hdatalen(struct mbuf_queue *mq)
1768 {
1769 	unsigned int hdatalen;
1770 
1771 	mtx_enter(&mq->mq_mtx);
1772 	hdatalen = ml_hdatalen(&mq->mq_list);
1773 	mtx_leave(&mq->mq_mtx);
1774 
1775 	return (hdatalen);
1776 }
1777 
1778 void
1779 mq_set_maxlen(struct mbuf_queue *mq, u_int maxlen)
1780 {
1781 	mtx_enter(&mq->mq_mtx);
1782 	mq->mq_maxlen = maxlen;
1783 	mtx_leave(&mq->mq_mtx);
1784 }
1785 
1786 int
1787 sysctl_mq(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1788     void *newp, size_t newlen, struct mbuf_queue *mq)
1789 {
1790 	unsigned int maxlen;
1791 	int error;
1792 
1793 	/* All sysctl names at this level are terminal. */
1794 	if (namelen != 1)
1795 		return (ENOTDIR);
1796 
1797 	switch (name[0]) {
1798 	case IFQCTL_LEN:
1799 		return (sysctl_rdint(oldp, oldlenp, newp, mq_len(mq)));
1800 	case IFQCTL_MAXLEN:
1801 		maxlen = mq->mq_maxlen;
1802 		error = sysctl_int(oldp, oldlenp, newp, newlen, &maxlen);
1803 		if (error == 0)
1804 			mq_set_maxlen(mq, maxlen);
1805 		return (error);
1806 	case IFQCTL_DROPS:
1807 		return (sysctl_rdint(oldp, oldlenp, newp, mq_drops(mq)));
1808 	default:
1809 		return (EOPNOTSUPP);
1810 	}
1811 	/* NOTREACHED */
1812 }
1813