xref: /openbsd/sys/kern/uipc_mbuf.c (revision a00b2212)
1 /*	$OpenBSD: uipc_mbuf.c,v 1.294 2024/09/10 14:52:42 bluhm Exp $	*/
2 /*	$NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
33  */
34 
35 /*
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  *	This product includes software developed by the University of
50  *	California, Berkeley and its contributors.
51  *	This product includes software developed at the Information
52  *	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include "pf.h"
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/atomic.h>
80 #include <sys/mbuf.h>
81 #include <sys/pool.h>
82 #include <sys/percpu.h>
83 #include <sys/sysctl.h>
84 
85 #include <sys/socket.h>
86 #include <net/if.h>
87 
88 
89 #include <uvm/uvm_extern.h>
90 
91 #ifdef DDB
92 #include <machine/db_machdep.h>
93 #include <ddb/db_interface.h>
94 #endif
95 
96 #if NPF > 0
97 #include <net/pfvar.h>
98 #endif	/* NPF > 0 */
99 
100 /* mbuf stats */
101 COUNTERS_BOOT_MEMORY(mbstat_boot, MBSTAT_COUNT);
102 struct cpumem *mbstat = COUNTERS_BOOT_INITIALIZER(mbstat_boot);
103 /* mbuf pools */
104 struct	pool mbpool;
105 struct	pool mtagpool;
106 
107 /* mbuf cluster pools */
108 u_int	mclsizes[MCLPOOLS] = {
109 	MCLBYTES,	/* must be at slot 0 */
110 	MCLBYTES + 2,	/* ETHER_ALIGNED 2k mbufs */
111 	4 * 1024,
112 	8 * 1024,
113 	9 * 1024,
114 	12 * 1024,
115 	16 * 1024,
116 	64 * 1024
117 };
118 static	char mclnames[MCLPOOLS][8];
119 struct	pool mclpools[MCLPOOLS];
120 
121 struct pool *m_clpool(u_int);
122 
123 int max_linkhdr;		/* largest link-level header */
124 int max_protohdr;		/* largest protocol header */
125 int max_hdr;			/* largest link+protocol header */
126 
127 struct	mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
128 
129 void	m_extfree(struct mbuf *);
130 void	m_zero(struct mbuf *);
131 
132 unsigned long mbuf_mem_limit;	/* how much memory can be allocated */
133 unsigned long mbuf_mem_alloc;	/* how much memory has been allocated */
134 
135 void	*m_pool_alloc(struct pool *, int, int *);
136 void	m_pool_free(struct pool *, void *);
137 
138 struct pool_allocator m_pool_allocator = {
139 	m_pool_alloc,
140 	m_pool_free,
141 	0 /* will be copied from pool_allocator_multi */
142 };
143 
144 static void (*mextfree_fns[4])(caddr_t, u_int, void *);
145 static u_int num_extfree_fns;
146 
147 #define M_DATABUF(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \
148 			(m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat)
149 #define M_SIZE(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_size : \
150 			(m)->m_flags & M_PKTHDR ? MHLEN : MLEN)
151 
152 /*
153  * Initialize the mbuf allocator.
154  */
155 void
mbinit(void)156 mbinit(void)
157 {
158 	int i, error;
159 	unsigned int lowbits;
160 
161 	CTASSERT(MSIZE == sizeof(struct mbuf));
162 
163 	m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
164 
165 	mbuf_mem_alloc = 0;
166 
167 #if DIAGNOSTIC
168 	if (mclsizes[0] != MCLBYTES)
169 		panic("mbinit: the smallest cluster size != MCLBYTES");
170 	if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES)
171 		panic("mbinit: the largest cluster size != MAXMCLBYTES");
172 #endif
173 
174 	m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
175 
176 	pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
177 	    IPL_NET, 0, "mtagpl", NULL);
178 
179 	for (i = 0; i < nitems(mclsizes); i++) {
180 		lowbits = mclsizes[i] & ((1 << 10) - 1);
181 		if (lowbits) {
182 			snprintf(mclnames[i], sizeof(mclnames[0]),
183 			    "mcl%dk%u", mclsizes[i] >> 10, lowbits);
184 		} else {
185 			snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
186 			    mclsizes[i] >> 10);
187 		}
188 
189 		m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
190 	}
191 
192 	error = nmbclust_update(nmbclust);
193 	KASSERT(error == 0);
194 
195 	(void)mextfree_register(m_extfree_pool);
196 	KASSERT(num_extfree_fns == 1);
197 }
198 
199 void
mbcpuinit(void)200 mbcpuinit(void)
201 {
202 	int i;
203 
204 	mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT);
205 
206 	pool_cache_init(&mbpool);
207 	pool_cache_init(&mtagpool);
208 
209 	for (i = 0; i < nitems(mclsizes); i++)
210 		pool_cache_init(&mclpools[i]);
211 }
212 
213 int
nmbclust_update(long newval)214 nmbclust_update(long newval)
215 {
216 	int i;
217 
218 	if (newval <= 0 || newval > LONG_MAX / MCLBYTES)
219 		return ERANGE;
220 	/* update the global mbuf memory limit */
221 	nmbclust = newval;
222 	mbuf_mem_limit = nmbclust * MCLBYTES;
223 
224 	pool_wakeup(&mbpool);
225 	for (i = 0; i < nitems(mclsizes); i++)
226 		pool_wakeup(&mclpools[i]);
227 
228 	return 0;
229 }
230 
231 /*
232  * Space allocation routines.
233  */
234 struct mbuf *
m_get(int nowait,int type)235 m_get(int nowait, int type)
236 {
237 	struct mbuf *m;
238 	int s;
239 
240 	KASSERT(type >= 0 && type < MT_NTYPES);
241 
242 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
243 	if (m == NULL)
244 		return (NULL);
245 
246 	s = splnet();
247 	counters_inc(mbstat, type);
248 	splx(s);
249 
250 	m->m_type = type;
251 	m->m_next = NULL;
252 	m->m_nextpkt = NULL;
253 	m->m_data = m->m_dat;
254 	m->m_flags = 0;
255 
256 	return (m);
257 }
258 
259 /*
260  * ATTN: When changing anything here check m_inithdr() and m_defrag() those
261  * may need to change as well.
262  */
263 struct mbuf *
m_gethdr(int nowait,int type)264 m_gethdr(int nowait, int type)
265 {
266 	struct mbuf *m;
267 	int s;
268 
269 	KASSERT(type >= 0 && type < MT_NTYPES);
270 
271 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
272 	if (m == NULL)
273 		return (NULL);
274 
275 	s = splnet();
276 	counters_inc(mbstat, type);
277 	splx(s);
278 
279 	m->m_type = type;
280 
281 	return (m_inithdr(m));
282 }
283 
284 struct mbuf *
m_inithdr(struct mbuf * m)285 m_inithdr(struct mbuf *m)
286 {
287 	/* keep in sync with m_gethdr */
288 	m->m_next = NULL;
289 	m->m_nextpkt = NULL;
290 	m->m_data = m->m_pktdat;
291 	m->m_flags = M_PKTHDR;
292 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
293 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
294 
295 	return (m);
296 }
297 
298 static inline void
m_clearhdr(struct mbuf * m)299 m_clearhdr(struct mbuf *m)
300 {
301 	/* delete all mbuf tags to reset the state */
302 	m_tag_delete_chain(m);
303 #if NPF > 0
304 	pf_mbuf_unlink_state_key(m);
305 	pf_mbuf_unlink_inpcb(m);
306 #endif	/* NPF > 0 */
307 
308 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
309 }
310 
311 void
m_removehdr(struct mbuf * m)312 m_removehdr(struct mbuf *m)
313 {
314 	KASSERT(m->m_flags & M_PKTHDR);
315 	m_clearhdr(m);
316 	m->m_flags &= ~M_PKTHDR;
317 }
318 
319 void
m_resethdr(struct mbuf * m)320 m_resethdr(struct mbuf *m)
321 {
322 	int len = m->m_pkthdr.len;
323 	u_int8_t loopcnt = m->m_pkthdr.ph_loopcnt;
324 
325 	KASSERT(m->m_flags & M_PKTHDR);
326 	m->m_flags &= (M_EXT|M_PKTHDR|M_EOR|M_EXTWR|M_ZEROIZE);
327 	m_clearhdr(m);
328 	/* like m_inithdr(), but keep any associated data and mbufs */
329 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
330 	m->m_pkthdr.len = len;
331 	m->m_pkthdr.ph_loopcnt = loopcnt;
332 }
333 
334 void
m_calchdrlen(struct mbuf * m)335 m_calchdrlen(struct mbuf *m)
336 {
337 	struct mbuf *n;
338 	int plen = 0;
339 
340 	KASSERT(m->m_flags & M_PKTHDR);
341 	for (n = m; n; n = n->m_next)
342 		plen += n->m_len;
343 	m->m_pkthdr.len = plen;
344 }
345 
346 struct mbuf *
m_getclr(int nowait,int type)347 m_getclr(int nowait, int type)
348 {
349 	struct mbuf *m;
350 
351 	MGET(m, nowait, type);
352 	if (m == NULL)
353 		return (NULL);
354 	memset(mtod(m, caddr_t), 0, MLEN);
355 	return (m);
356 }
357 
358 struct pool *
m_clpool(u_int pktlen)359 m_clpool(u_int pktlen)
360 {
361 	struct pool *pp;
362 	int pi;
363 
364 	for (pi = 0; pi < nitems(mclpools); pi++) {
365 		pp = &mclpools[pi];
366 		if (pktlen <= pp->pr_size)
367 			return (pp);
368 	}
369 
370 	return (NULL);
371 }
372 
373 struct mbuf *
m_clget(struct mbuf * m,int how,u_int pktlen)374 m_clget(struct mbuf *m, int how, u_int pktlen)
375 {
376 	struct mbuf *m0 = NULL;
377 	struct pool *pp;
378 	caddr_t buf;
379 
380 	pp = m_clpool(pktlen);
381 #ifdef DIAGNOSTIC
382 	if (pp == NULL)
383 		panic("m_clget: request for %u byte cluster", pktlen);
384 #endif
385 
386 	if (m == NULL) {
387 		m0 = m_gethdr(how, MT_DATA);
388 		if (m0 == NULL)
389 			return (NULL);
390 
391 		m = m0;
392 	}
393 	buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT);
394 	if (buf == NULL) {
395 		m_freem(m0);
396 		return (NULL);
397 	}
398 
399 	MEXTADD(m, buf, pp->pr_size, M_EXTWR, MEXTFREE_POOL, pp);
400 	return (m);
401 }
402 
403 void
m_extfree_pool(caddr_t buf,u_int size,void * pp)404 m_extfree_pool(caddr_t buf, u_int size, void *pp)
405 {
406 	pool_put(pp, buf);
407 }
408 
409 struct mbuf *
m_free(struct mbuf * m)410 m_free(struct mbuf *m)
411 {
412 	struct mbuf *n;
413 	int s;
414 
415 	if (m == NULL)
416 		return (NULL);
417 
418 	s = splnet();
419 	counters_dec(mbstat, m->m_type);
420 	splx(s);
421 
422 	n = m->m_next;
423 	if (m->m_flags & M_ZEROIZE) {
424 		m_zero(m);
425 		/* propagate M_ZEROIZE to the next mbuf in the chain */
426 		if (n)
427 			n->m_flags |= M_ZEROIZE;
428 	}
429 	if (m->m_flags & M_PKTHDR) {
430 		m_tag_delete_chain(m);
431 #if NPF > 0
432 		pf_mbuf_unlink_state_key(m);
433 		pf_mbuf_unlink_inpcb(m);
434 #endif	/* NPF > 0 */
435 	}
436 	if (m->m_flags & M_EXT)
437 		m_extfree(m);
438 
439 	pool_put(&mbpool, m);
440 
441 	return (n);
442 }
443 
444 void
m_extref(struct mbuf * o,struct mbuf * n)445 m_extref(struct mbuf *o, struct mbuf *n)
446 {
447 	int refs = MCLISREFERENCED(o);
448 
449 	n->m_flags |= o->m_flags & (M_EXT|M_EXTWR);
450 
451 	if (refs)
452 		mtx_enter(&m_extref_mtx);
453 	n->m_ext.ext_nextref = o->m_ext.ext_nextref;
454 	n->m_ext.ext_prevref = o;
455 	o->m_ext.ext_nextref = n;
456 	n->m_ext.ext_nextref->m_ext.ext_prevref = n;
457 	if (refs)
458 		mtx_leave(&m_extref_mtx);
459 
460 	MCLREFDEBUGN((n), __FILE__, __LINE__);
461 }
462 
463 static inline u_int
m_extunref(struct mbuf * m)464 m_extunref(struct mbuf *m)
465 {
466 	int refs = 0;
467 
468 	if (!MCLISREFERENCED(m))
469 		return (0);
470 
471 	mtx_enter(&m_extref_mtx);
472 	if (MCLISREFERENCED(m)) {
473 		m->m_ext.ext_nextref->m_ext.ext_prevref =
474 		    m->m_ext.ext_prevref;
475 		m->m_ext.ext_prevref->m_ext.ext_nextref =
476 		    m->m_ext.ext_nextref;
477 		refs = 1;
478 	}
479 	mtx_leave(&m_extref_mtx);
480 
481 	return (refs);
482 }
483 
484 /*
485  * Returns a number for use with MEXTADD.
486  * Should only be called once per function.
487  * Drivers can be assured that the index will be non zero.
488  */
489 u_int
mextfree_register(void (* fn)(caddr_t,u_int,void *))490 mextfree_register(void (*fn)(caddr_t, u_int, void *))
491 {
492 	KASSERT(num_extfree_fns < nitems(mextfree_fns));
493 	mextfree_fns[num_extfree_fns] = fn;
494 	return num_extfree_fns++;
495 }
496 
497 void
m_extfree(struct mbuf * m)498 m_extfree(struct mbuf *m)
499 {
500 	if (m_extunref(m) == 0) {
501 		KASSERT(m->m_ext.ext_free_fn < num_extfree_fns);
502 		mextfree_fns[m->m_ext.ext_free_fn](m->m_ext.ext_buf,
503 		    m->m_ext.ext_size, m->m_ext.ext_arg);
504 	}
505 
506 	m->m_flags &= ~(M_EXT|M_EXTWR);
507 }
508 
509 struct mbuf *
m_freem(struct mbuf * m)510 m_freem(struct mbuf *m)
511 {
512 	struct mbuf *n;
513 
514 	if (m == NULL)
515 		return (NULL);
516 
517 	n = m->m_nextpkt;
518 
519 	do
520 		m = m_free(m);
521 	while (m != NULL);
522 
523 	return (n);
524 }
525 
526 void
m_purge(struct mbuf * m)527 m_purge(struct mbuf *m)
528 {
529 	while (m != NULL)
530 		m = m_freem(m);
531 }
532 
533 /*
534  * mbuf chain defragmenter. This function uses some evil tricks to defragment
535  * an mbuf chain into a single buffer without changing the mbuf pointer.
536  * This needs to know a lot of the mbuf internals to make this work.
537  * The resulting mbuf is not aligned to IP header to assist DMA transfers.
538  */
539 int
m_defrag(struct mbuf * m,int how)540 m_defrag(struct mbuf *m, int how)
541 {
542 	struct mbuf *m0;
543 
544 	if (m->m_next == NULL)
545 		return (0);
546 
547 	KASSERT(m->m_flags & M_PKTHDR);
548 
549 	counters_inc(mbstat, MBSTAT_DEFRAG_ALLOC);
550 	if ((m0 = m_gethdr(how, m->m_type)) == NULL)
551 		return (ENOBUFS);
552 	if (m->m_pkthdr.len > MHLEN) {
553 		MCLGETL(m0, how, m->m_pkthdr.len);
554 		if (!(m0->m_flags & M_EXT)) {
555 			m_free(m0);
556 			return (ENOBUFS);
557 		}
558 	}
559 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
560 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
561 
562 	/* free chain behind and possible ext buf on the first mbuf */
563 	m_freem(m->m_next);
564 	m->m_next = NULL;
565 	if (m->m_flags & M_EXT)
566 		m_extfree(m);
567 
568 	/*
569 	 * Bounce copy mbuf over to the original mbuf and set everything up.
570 	 * This needs to reset or clear all pointers that may go into the
571 	 * original mbuf chain.
572 	 */
573 	if (m0->m_flags & M_EXT) {
574 		memcpy(&m->m_ext, &m0->m_ext, sizeof(struct mbuf_ext));
575 		MCLINITREFERENCE(m);
576 		m->m_flags |= m0->m_flags & (M_EXT|M_EXTWR);
577 		m->m_data = m->m_ext.ext_buf;
578 	} else {
579 		m->m_data = m->m_pktdat;
580 		memcpy(m->m_data, m0->m_data, m0->m_len);
581 	}
582 	m->m_pkthdr.len = m->m_len = m0->m_len;
583 
584 	m0->m_flags &= ~(M_EXT|M_EXTWR);	/* cluster is gone */
585 	m_free(m0);
586 
587 	return (0);
588 }
589 
590 /*
591  * Mbuffer utility routines.
592  */
593 
594 /*
595  * Ensure len bytes of contiguous space at the beginning of the mbuf chain
596  */
597 struct mbuf *
m_prepend(struct mbuf * m,int len,int how)598 m_prepend(struct mbuf *m, int len, int how)
599 {
600 	struct mbuf *mn;
601 
602 	if (len > MHLEN)
603 		panic("mbuf prepend length too big");
604 
605 	if (m_leadingspace(m) >= len) {
606 		m->m_data -= len;
607 		m->m_len += len;
608 	} else {
609 		counters_inc(mbstat, MBSTAT_PREPEND_ALLOC);
610 		MGET(mn, how, m->m_type);
611 		if (mn == NULL) {
612 			m_freem(m);
613 			return (NULL);
614 		}
615 		if (m->m_flags & M_PKTHDR)
616 			M_MOVE_PKTHDR(mn, m);
617 		mn->m_next = m;
618 		m = mn;
619 		m_align(m, len);
620 		m->m_len = len;
621 	}
622 	if (m->m_flags & M_PKTHDR)
623 		m->m_pkthdr.len += len;
624 	return (m);
625 }
626 
627 /*
628  * Make a copy of an mbuf chain starting "off" bytes from the beginning,
629  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
630  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
631  */
632 struct mbuf *
m_copym(struct mbuf * m0,int off,int len,int wait)633 m_copym(struct mbuf *m0, int off, int len, int wait)
634 {
635 	struct mbuf *m, *n, **np;
636 	struct mbuf *top;
637 	int copyhdr = 0;
638 
639 	if (off < 0 || len < 0)
640 		panic("m_copym0: off %d, len %d", off, len);
641 	if (off == 0 && m0->m_flags & M_PKTHDR)
642 		copyhdr = 1;
643 	if ((m = m_getptr(m0, off, &off)) == NULL)
644 		panic("m_copym0: short mbuf chain");
645 	np = &top;
646 	top = NULL;
647 	while (len > 0) {
648 		if (m == NULL) {
649 			if (len != M_COPYALL)
650 				panic("m_copym0: m == NULL and not COPYALL");
651 			break;
652 		}
653 		MGET(n, wait, m->m_type);
654 		*np = n;
655 		if (n == NULL)
656 			goto nospace;
657 		if (copyhdr) {
658 			if (m_dup_pkthdr(n, m0, wait))
659 				goto nospace;
660 			if (len != M_COPYALL)
661 				n->m_pkthdr.len = len;
662 			copyhdr = 0;
663 		}
664 		n->m_len = min(len, m->m_len - off);
665 		if (m->m_flags & M_EXT) {
666 			n->m_data = m->m_data + off;
667 			n->m_ext = m->m_ext;
668 			MCLADDREFERENCE(m, n);
669 		} else {
670 			n->m_data += m->m_data -
671 			    (m->m_flags & M_PKTHDR ? m->m_pktdat : m->m_dat);
672 			n->m_data += off;
673 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
674 			    n->m_len);
675 		}
676 		if (len != M_COPYALL)
677 			len -= n->m_len;
678 		off += n->m_len;
679 #ifdef DIAGNOSTIC
680 		if (off > m->m_len)
681 			panic("m_copym0 overrun");
682 #endif
683 		if (off == m->m_len) {
684 			m = m->m_next;
685 			off = 0;
686 		}
687 		np = &n->m_next;
688 	}
689 	return (top);
690 nospace:
691 	m_freem(top);
692 	return (NULL);
693 }
694 
695 /*
696  * Copy data from an mbuf chain starting "off" bytes from the beginning,
697  * continuing for "len" bytes, into the indicated buffer.
698  */
699 void
m_copydata(struct mbuf * m,int off,int len,void * p)700 m_copydata(struct mbuf *m, int off, int len, void *p)
701 {
702 	caddr_t cp = p;
703 	unsigned count;
704 
705 	if (off < 0)
706 		panic("m_copydata: off %d < 0", off);
707 	if (len < 0)
708 		panic("m_copydata: len %d < 0", len);
709 	if ((m = m_getptr(m, off, &off)) == NULL)
710 		panic("m_copydata: short mbuf chain");
711 	while (len > 0) {
712 		if (m == NULL)
713 			panic("m_copydata: null mbuf");
714 		count = min(m->m_len - off, len);
715 		memmove(cp, mtod(m, caddr_t) + off, count);
716 		len -= count;
717 		cp += count;
718 		off = 0;
719 		m = m->m_next;
720 	}
721 }
722 
723 /*
724  * Copy data from a buffer back into the indicated mbuf chain,
725  * starting "off" bytes from the beginning, extending the mbuf
726  * chain if necessary. The mbuf needs to be properly initialized
727  * including the setting of m_len.
728  */
729 int
m_copyback(struct mbuf * m0,int off,int len,const void * _cp,int wait)730 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait)
731 {
732 	int mlen, totlen = 0;
733 	struct mbuf *m = m0, *n;
734 	caddr_t cp = (caddr_t)_cp;
735 	int error = 0;
736 
737 	if (m0 == NULL)
738 		return (0);
739 	while (off > (mlen = m->m_len)) {
740 		off -= mlen;
741 		totlen += mlen;
742 		if (m->m_next == NULL) {
743 			if ((n = m_get(wait, m->m_type)) == NULL) {
744 				error = ENOBUFS;
745 				goto out;
746 			}
747 
748 			if (off + len > MLEN) {
749 				MCLGETL(n, wait, off + len);
750 				if (!(n->m_flags & M_EXT)) {
751 					m_free(n);
752 					error = ENOBUFS;
753 					goto out;
754 				}
755 			}
756 			memset(mtod(n, caddr_t), 0, off);
757 			n->m_len = len + off;
758 			m->m_next = n;
759 		}
760 		m = m->m_next;
761 	}
762 	while (len > 0) {
763 		/* extend last packet to be filled fully */
764 		if (m->m_next == NULL && (len > m->m_len - off))
765 			m->m_len += min(len - (m->m_len - off),
766 			    m_trailingspace(m));
767 		mlen = min(m->m_len - off, len);
768 		memmove(mtod(m, caddr_t) + off, cp, mlen);
769 		cp += mlen;
770 		len -= mlen;
771 		totlen += mlen + off;
772 		if (len == 0)
773 			break;
774 		off = 0;
775 
776 		if (m->m_next == NULL) {
777 			if ((n = m_get(wait, m->m_type)) == NULL) {
778 				error = ENOBUFS;
779 				goto out;
780 			}
781 
782 			if (len > MLEN) {
783 				MCLGETL(n, wait, len);
784 				if (!(n->m_flags & M_EXT)) {
785 					m_free(n);
786 					error = ENOBUFS;
787 					goto out;
788 				}
789 			}
790 			n->m_len = len;
791 			m->m_next = n;
792 		}
793 		m = m->m_next;
794 	}
795 out:
796 	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
797 		m->m_pkthdr.len = totlen;
798 
799 	return (error);
800 }
801 
802 /*
803  * Concatenate mbuf chain n to m.
804  * n might be copied into m (when n->m_len is small), therefore data portion of
805  * n could be copied into an mbuf of different mbuf type.
806  * Therefore both chains should be of the same type (e.g. MT_DATA).
807  * Any m_pkthdr is not updated.
808  */
809 void
m_cat(struct mbuf * m,struct mbuf * n)810 m_cat(struct mbuf *m, struct mbuf *n)
811 {
812 	while (m->m_next)
813 		m = m->m_next;
814 	while (n) {
815 		if (M_READONLY(m) || n->m_len > m_trailingspace(m)) {
816 			/* just join the two chains */
817 			m->m_next = n;
818 			return;
819 		}
820 		/* splat the data from one into the other */
821 		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
822 		    n->m_len);
823 		m->m_len += n->m_len;
824 		n = m_free(n);
825 	}
826 }
827 
828 void
m_adj(struct mbuf * mp,int req_len)829 m_adj(struct mbuf *mp, int req_len)
830 {
831 	int len = req_len;
832 	struct mbuf *m;
833 	int count;
834 
835 	if (mp == NULL)
836 		return;
837 	if (len >= 0) {
838 		/*
839 		 * Trim from head.
840 		 */
841 		m = mp;
842 		while (m != NULL && len > 0) {
843 			if (m->m_len <= len) {
844 				len -= m->m_len;
845 				m->m_data += m->m_len;
846 				m->m_len = 0;
847 				m = m->m_next;
848 			} else {
849 				m->m_data += len;
850 				m->m_len -= len;
851 				len = 0;
852 			}
853 		}
854 		if (mp->m_flags & M_PKTHDR)
855 			mp->m_pkthdr.len -= (req_len - len);
856 	} else {
857 		/*
858 		 * Trim from tail.  Scan the mbuf chain,
859 		 * calculating its length and finding the last mbuf.
860 		 * If the adjustment only affects this mbuf, then just
861 		 * adjust and return.  Otherwise, rescan and truncate
862 		 * after the remaining size.
863 		 */
864 		len = -len;
865 		count = 0;
866 		m = mp;
867 		for (;;) {
868 			count += m->m_len;
869 			if (m->m_next == NULL)
870 				break;
871 			m = m->m_next;
872 		}
873 		if (m->m_len >= len) {
874 			m->m_len -= len;
875 			if (mp->m_flags & M_PKTHDR)
876 				mp->m_pkthdr.len -= len;
877 			return;
878 		}
879 		count -= len;
880 		if (count < 0)
881 			count = 0;
882 		/*
883 		 * Correct length for chain is "count".
884 		 * Find the mbuf with last data, adjust its length,
885 		 * and toss data from remaining mbufs on chain.
886 		 */
887 		if (mp->m_flags & M_PKTHDR)
888 			mp->m_pkthdr.len = count;
889 		m = mp;
890 		for (;;) {
891 			if (m->m_len >= count) {
892 				m->m_len = count;
893 				break;
894 			}
895 			count -= m->m_len;
896 			m = m->m_next;
897 		}
898 		while ((m = m->m_next) != NULL)
899 			m->m_len = 0;
900 	}
901 }
902 
903 /*
904  * Rearrange an mbuf chain so that len bytes are contiguous
905  * and in the data area of an mbuf (so that mtod will work
906  * for a structure of size len).  Returns the resulting
907  * mbuf chain on success, frees it and returns null on failure.
908  */
909 struct mbuf *
m_pullup(struct mbuf * m0,int len)910 m_pullup(struct mbuf *m0, int len)
911 {
912 	struct mbuf *m;
913 	unsigned int adj;
914 	caddr_t head, tail;
915 	unsigned int space;
916 
917 	/* if len is already contig in m0, then don't do any work */
918 	if (len <= m0->m_len)
919 		return (m0);
920 
921 	/* look for some data */
922 	m = m0->m_next;
923 	if (m == NULL)
924 		goto freem0;
925 
926 	head = M_DATABUF(m0);
927 	if (m0->m_len == 0) {
928 		while (m->m_len == 0) {
929 			m = m_free(m);
930 			if (m == NULL)
931 				goto freem0;
932 		}
933 
934 		adj = mtod(m, unsigned long) & (sizeof(long) - 1);
935 	} else
936 		adj = mtod(m0, unsigned long) & (sizeof(long) - 1);
937 
938 	tail = head + M_SIZE(m0);
939 	head += adj;
940 
941 	if (!M_READONLY(m0) && len <= tail - head) {
942 		/* we can copy everything into the first mbuf */
943 		if (m0->m_len == 0) {
944 			m0->m_data = head;
945 		} else if (len > tail - mtod(m0, caddr_t)) {
946 			/* need to memmove to make space at the end */
947 			memmove(head, mtod(m0, caddr_t), m0->m_len);
948 			m0->m_data = head;
949 		}
950 		len -= m0->m_len;
951 		counters_inc(mbstat, MBSTAT_PULLUP_COPY);
952 	} else {
953 		/* the first mbuf is too small or read-only, make a new one */
954 		space = adj + len;
955 
956 		if (space > MAXMCLBYTES)
957 			goto bad;
958 
959 		m0->m_next = m;
960 		m = m0;
961 
962 		counters_inc(mbstat, MBSTAT_PULLUP_ALLOC);
963 		MGET(m0, M_DONTWAIT, m->m_type);
964 		if (m0 == NULL)
965 			goto bad;
966 
967 		if (space > MHLEN) {
968 			MCLGETL(m0, M_DONTWAIT, space);
969 			if ((m0->m_flags & M_EXT) == 0)
970 				goto bad;
971 		}
972 
973 		if (m->m_flags & M_PKTHDR)
974 			M_MOVE_PKTHDR(m0, m);
975 
976 		m0->m_len = 0;
977 		m0->m_data += adj;
978 	}
979 
980 	KDASSERT(m_trailingspace(m0) >= len);
981 
982 	for (;;) {
983 		space = min(len, m->m_len);
984 		memcpy(mtod(m0, caddr_t) + m0->m_len, mtod(m, caddr_t), space);
985 		len -= space;
986 		m0->m_len += space;
987 		m->m_len -= space;
988 
989 		if (m->m_len > 0)
990 			m->m_data += space;
991 		else
992 			m = m_free(m);
993 
994 		if (len == 0)
995 			break;
996 
997 		if (m == NULL)
998 			goto bad;
999 	}
1000 
1001 	m0->m_next = m; /* link the chain back up */
1002 
1003 	return (m0);
1004 
1005 bad:
1006 	m_freem(m);
1007 freem0:
1008 	m_free(m0);
1009 	return (NULL);
1010 }
1011 
1012 /*
1013  * Return a pointer to mbuf/offset of location in mbuf chain.
1014  */
1015 struct mbuf *
m_getptr(struct mbuf * m,int loc,int * off)1016 m_getptr(struct mbuf *m, int loc, int *off)
1017 {
1018 	while (loc >= 0) {
1019 		/* Normal end of search */
1020 		if (m->m_len > loc) {
1021 			*off = loc;
1022 			return (m);
1023 		} else {
1024 			loc -= m->m_len;
1025 
1026 			if (m->m_next == NULL) {
1027 				if (loc == 0) {
1028 					/* Point at the end of valid data */
1029 					*off = m->m_len;
1030 					return (m);
1031 				} else {
1032 					return (NULL);
1033 				}
1034 			} else {
1035 				m = m->m_next;
1036 			}
1037 		}
1038 	}
1039 
1040 	return (NULL);
1041 }
1042 
1043 /*
1044  * Partition an mbuf chain in two pieces, returning the tail --
1045  * all but the first len0 bytes.  In case of failure, it returns NULL and
1046  * attempts to restore the chain to its original state.
1047  */
1048 struct mbuf *
m_split(struct mbuf * m0,int len0,int wait)1049 m_split(struct mbuf *m0, int len0, int wait)
1050 {
1051 	struct mbuf *m, *n;
1052 	unsigned len = len0, remain, olen;
1053 
1054 	for (m = m0; m && len > m->m_len; m = m->m_next)
1055 		len -= m->m_len;
1056 	if (m == NULL)
1057 		return (NULL);
1058 	remain = m->m_len - len;
1059 	if (m0->m_flags & M_PKTHDR) {
1060 		MGETHDR(n, wait, m0->m_type);
1061 		if (n == NULL)
1062 			return (NULL);
1063 		if (m_dup_pkthdr(n, m0, wait)) {
1064 			m_freem(n);
1065 			return (NULL);
1066 		}
1067 		n->m_pkthdr.len -= len0;
1068 		olen = m0->m_pkthdr.len;
1069 		m0->m_pkthdr.len = len0;
1070 		if (remain == 0) {
1071 			n->m_next = m->m_next;
1072 			m->m_next = NULL;
1073 			n->m_len = 0;
1074 			return (n);
1075 		}
1076 		if ((m->m_flags & M_EXT) == 0 && remain > MHLEN) {
1077 			/* m can't be the lead packet */
1078 			m_align(n, 0);
1079 			n->m_next = m_split(m, len, wait);
1080 			if (n->m_next == NULL) {
1081 				(void) m_free(n);
1082 				m0->m_pkthdr.len = olen;
1083 				return (NULL);
1084 			} else {
1085 				n->m_len = 0;
1086 				return (n);
1087 			}
1088 		}
1089 	} else if (remain == 0) {
1090 		n = m->m_next;
1091 		m->m_next = NULL;
1092 		return (n);
1093 	} else {
1094 		MGET(n, wait, m->m_type);
1095 		if (n == NULL)
1096 			return (NULL);
1097 	}
1098 	if (m->m_flags & M_EXT) {
1099 		n->m_ext = m->m_ext;
1100 		MCLADDREFERENCE(m, n);
1101 		n->m_data = m->m_data + len;
1102 	} else {
1103 		m_align(n, remain);
1104 		memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
1105 	}
1106 	n->m_len = remain;
1107 	m->m_len = len;
1108 	n->m_next = m->m_next;
1109 	m->m_next = NULL;
1110 	return (n);
1111 }
1112 
1113 /*
1114  * Make space for a new header of length hlen at skip bytes
1115  * into the packet.  When doing this we allocate new mbufs only
1116  * when absolutely necessary.  The mbuf where the new header
1117  * is to go is returned together with an offset into the mbuf.
1118  * If NULL is returned then the mbuf chain may have been modified;
1119  * the caller is assumed to always free the chain.
1120  */
1121 struct mbuf *
m_makespace(struct mbuf * m0,int skip,int hlen,int * off)1122 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
1123 {
1124 	struct mbuf *m;
1125 	unsigned remain;
1126 
1127 	KASSERT(m0->m_flags & M_PKTHDR);
1128 	/*
1129 	 * Limit the size of the new header to MHLEN. In case
1130 	 * skip = 0 and the first buffer is not a cluster this
1131 	 * is the maximum space available in that mbuf.
1132 	 * In other words this code never prepends a mbuf.
1133 	 */
1134 	KASSERT(hlen < MHLEN);
1135 
1136 	for (m = m0; m && skip > m->m_len; m = m->m_next)
1137 		skip -= m->m_len;
1138 	if (m == NULL)
1139 		return (NULL);
1140 	/*
1141 	 * At this point skip is the offset into the mbuf m
1142 	 * where the new header should be placed.  Figure out
1143 	 * if there's space to insert the new header.  If so,
1144 	 * and copying the remainder makes sense then do so.
1145 	 * Otherwise insert a new mbuf in the chain, splitting
1146 	 * the contents of m as needed.
1147 	 */
1148 	remain = m->m_len - skip;		/* data to move */
1149 	if (skip < remain && hlen <= m_leadingspace(m)) {
1150 		if (skip)
1151 			memmove(m->m_data-hlen, m->m_data, skip);
1152 		m->m_data -= hlen;
1153 		m->m_len += hlen;
1154 		*off = skip;
1155 	} else if (hlen > m_trailingspace(m)) {
1156 		struct mbuf *n;
1157 
1158 		if (remain > 0) {
1159 			MGET(n, M_DONTWAIT, m->m_type);
1160 			if (n && remain > MLEN) {
1161 				MCLGETL(n, M_DONTWAIT, remain);
1162 				if ((n->m_flags & M_EXT) == 0) {
1163 					m_free(n);
1164 					n = NULL;
1165 				}
1166 			}
1167 			if (n == NULL)
1168 				return (NULL);
1169 
1170 			memcpy(n->m_data, mtod(m, char *) + skip, remain);
1171 			n->m_len = remain;
1172 			m->m_len -= remain;
1173 
1174 			n->m_next = m->m_next;
1175 			m->m_next = n;
1176 		}
1177 
1178 		if (hlen <= m_trailingspace(m)) {
1179 			m->m_len += hlen;
1180 			*off = skip;
1181 		} else {
1182 			n = m_get(M_DONTWAIT, m->m_type);
1183 			if (n == NULL)
1184 				return NULL;
1185 
1186 			n->m_len = hlen;
1187 
1188 			n->m_next = m->m_next;
1189 			m->m_next = n;
1190 
1191 			*off = 0;	/* header is at front ... */
1192 			m = n;		/* ... of new mbuf */
1193 		}
1194 	} else {
1195 		/*
1196 		 * Copy the remainder to the back of the mbuf
1197 		 * so there's space to write the new header.
1198 		 */
1199 		if (remain > 0)
1200 			memmove(mtod(m, caddr_t) + skip + hlen,
1201 			      mtod(m, caddr_t) + skip, remain);
1202 		m->m_len += hlen;
1203 		*off = skip;
1204 	}
1205 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
1206 	return m;
1207 }
1208 
1209 
1210 /*
1211  * Routine to copy from device local memory into mbufs.
1212  */
1213 struct mbuf *
m_devget(char * buf,int totlen,int off)1214 m_devget(char *buf, int totlen, int off)
1215 {
1216 	struct mbuf	*m;
1217 	struct mbuf	*top, **mp;
1218 	int		 len;
1219 
1220 	top = NULL;
1221 	mp = &top;
1222 
1223 	if (off < 0 || off > MHLEN)
1224 		return (NULL);
1225 
1226 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1227 	if (m == NULL)
1228 		return (NULL);
1229 
1230 	m->m_pkthdr.len = totlen;
1231 
1232 	len = MHLEN;
1233 
1234 	while (totlen > 0) {
1235 		if (top != NULL) {
1236 			MGET(m, M_DONTWAIT, MT_DATA);
1237 			if (m == NULL) {
1238 				/*
1239 				 * As we might get called by pfkey, make sure
1240 				 * we do not leak sensitive data.
1241 				 */
1242 				top->m_flags |= M_ZEROIZE;
1243 				m_freem(top);
1244 				return (NULL);
1245 			}
1246 			len = MLEN;
1247 		}
1248 
1249 		if (totlen + off >= MINCLSIZE) {
1250 			MCLGET(m, M_DONTWAIT);
1251 			if (m->m_flags & M_EXT)
1252 				len = MCLBYTES;
1253 		} else {
1254 			/* Place initial small packet/header at end of mbuf. */
1255 			if (top == NULL && totlen + off + max_linkhdr <= len) {
1256 				m->m_data += max_linkhdr;
1257 				len -= max_linkhdr;
1258 			}
1259 		}
1260 
1261 		if (off) {
1262 			m->m_data += off;
1263 			len -= off;
1264 			off = 0;
1265 		}
1266 
1267 		m->m_len = len = min(totlen, len);
1268 		memcpy(mtod(m, void *), buf, (size_t)len);
1269 
1270 		buf += len;
1271 		*mp = m;
1272 		mp = &m->m_next;
1273 		totlen -= len;
1274 	}
1275 	return (top);
1276 }
1277 
1278 void
m_zero(struct mbuf * m)1279 m_zero(struct mbuf *m)
1280 {
1281 	if (M_READONLY(m)) {
1282 		mtx_enter(&m_extref_mtx);
1283 		if ((m->m_flags & M_EXT) && MCLISREFERENCED(m)) {
1284 			m->m_ext.ext_nextref->m_flags |= M_ZEROIZE;
1285 			m->m_ext.ext_prevref->m_flags |= M_ZEROIZE;
1286 		}
1287 		mtx_leave(&m_extref_mtx);
1288 		return;
1289 	}
1290 
1291 	explicit_bzero(M_DATABUF(m), M_SIZE(m));
1292 }
1293 
1294 /*
1295  * Apply function f to the data in an mbuf chain starting "off" bytes from the
1296  * beginning, continuing for "len" bytes.
1297  */
1298 int
m_apply(struct mbuf * m,int off,int len,int (* f)(caddr_t,caddr_t,unsigned int),caddr_t fstate)1299 m_apply(struct mbuf *m, int off, int len,
1300     int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate)
1301 {
1302 	int rval;
1303 	unsigned int count;
1304 
1305 	if (len < 0)
1306 		panic("m_apply: len %d < 0", len);
1307 	if (off < 0)
1308 		panic("m_apply: off %d < 0", off);
1309 	while (off > 0) {
1310 		if (m == NULL)
1311 			panic("m_apply: null mbuf in skip");
1312 		if (off < m->m_len)
1313 			break;
1314 		off -= m->m_len;
1315 		m = m->m_next;
1316 	}
1317 	while (len > 0) {
1318 		if (m == NULL)
1319 			panic("m_apply: null mbuf");
1320 		count = min(m->m_len - off, len);
1321 
1322 		rval = f(fstate, mtod(m, caddr_t) + off, count);
1323 		if (rval)
1324 			return (rval);
1325 
1326 		len -= count;
1327 		off = 0;
1328 		m = m->m_next;
1329 	}
1330 
1331 	return (0);
1332 }
1333 
1334 /*
1335  * Compute the amount of space available before the current start of data
1336  * in an mbuf. Read-only clusters never have space available.
1337  */
1338 int
m_leadingspace(struct mbuf * m)1339 m_leadingspace(struct mbuf *m)
1340 {
1341 	if (M_READONLY(m))
1342 		return 0;
1343 	KASSERT(m->m_data >= M_DATABUF(m));
1344 	return m->m_data - M_DATABUF(m);
1345 }
1346 
1347 /*
1348  * Compute the amount of space available after the end of data in an mbuf.
1349  * Read-only clusters never have space available.
1350  */
1351 int
m_trailingspace(struct mbuf * m)1352 m_trailingspace(struct mbuf *m)
1353 {
1354 	if (M_READONLY(m))
1355 		return 0;
1356 	KASSERT(M_DATABUF(m) + M_SIZE(m) >= (m->m_data + m->m_len));
1357 	return M_DATABUF(m) + M_SIZE(m) - (m->m_data + m->m_len);
1358 }
1359 
1360 /*
1361  * Set the m_data pointer of a newly-allocated mbuf to place an object of
1362  * the specified size at the end of the mbuf, longword aligned.
1363  */
1364 void
m_align(struct mbuf * m,int len)1365 m_align(struct mbuf *m, int len)
1366 {
1367 	KASSERT(len >= 0 && !M_READONLY(m));
1368 	KASSERT(m->m_data == M_DATABUF(m));	/* newly-allocated check */
1369 	KASSERT(((len + sizeof(long) - 1) &~ (sizeof(long) - 1)) <= M_SIZE(m));
1370 
1371 	m->m_data = M_DATABUF(m) + ((M_SIZE(m) - (len)) &~ (sizeof(long) - 1));
1372 }
1373 
1374 /*
1375  * Duplicate mbuf pkthdr from from to to.
1376  * from must have M_PKTHDR set, and to must be empty.
1377  */
1378 int
m_dup_pkthdr(struct mbuf * to,struct mbuf * from,int wait)1379 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait)
1380 {
1381 	int error;
1382 
1383 	KASSERT(from->m_flags & M_PKTHDR);
1384 
1385 	to->m_flags = (to->m_flags & (M_EXT | M_EXTWR));
1386 	to->m_flags |= (from->m_flags & M_COPYFLAGS);
1387 	to->m_pkthdr = from->m_pkthdr;
1388 
1389 #if NPF > 0
1390 	to->m_pkthdr.pf.statekey = NULL;
1391 	pf_mbuf_link_state_key(to, from->m_pkthdr.pf.statekey);
1392 	to->m_pkthdr.pf.inp = NULL;
1393 	pf_mbuf_link_inpcb(to, from->m_pkthdr.pf.inp);
1394 #endif	/* NPF > 0 */
1395 
1396 	SLIST_INIT(&to->m_pkthdr.ph_tags);
1397 
1398 	if ((error = m_tag_copy_chain(to, from, wait)) != 0)
1399 		return (error);
1400 
1401 	if ((to->m_flags & M_EXT) == 0)
1402 		to->m_data = to->m_pktdat;
1403 
1404 	return (0);
1405 }
1406 
1407 struct mbuf *
m_dup_pkt(struct mbuf * m0,unsigned int adj,int wait)1408 m_dup_pkt(struct mbuf *m0, unsigned int adj, int wait)
1409 {
1410 	struct mbuf *m;
1411 	int len;
1412 
1413 	KASSERT(m0->m_flags & M_PKTHDR);
1414 
1415 	len = m0->m_pkthdr.len + adj;
1416 	if (len > MAXMCLBYTES) /* XXX */
1417 		return (NULL);
1418 
1419 	m = m_get(wait, m0->m_type);
1420 	if (m == NULL)
1421 		return (NULL);
1422 
1423 	if (m_dup_pkthdr(m, m0, wait) != 0)
1424 		goto fail;
1425 
1426 	if (len > MHLEN) {
1427 		MCLGETL(m, wait, len);
1428 		if (!ISSET(m->m_flags, M_EXT))
1429 			goto fail;
1430 	}
1431 
1432 	m->m_len = m->m_pkthdr.len = len;
1433 	m_adj(m, adj);
1434 	m_copydata(m0, 0, m0->m_pkthdr.len, mtod(m, caddr_t));
1435 
1436 	return (m);
1437 
1438 fail:
1439 	m_freem(m);
1440 	return (NULL);
1441 }
1442 
1443 void
m_microtime(const struct mbuf * m,struct timeval * tv)1444 m_microtime(const struct mbuf *m, struct timeval *tv)
1445 {
1446 	if (ISSET(m->m_pkthdr.csum_flags, M_TIMESTAMP)) {
1447 		struct timeval btv, utv;
1448 
1449 		NSEC_TO_TIMEVAL(m->m_pkthdr.ph_timestamp, &utv);
1450 		microboottime(&btv);
1451 		timeradd(&btv, &utv, tv);
1452 	} else
1453 		microtime(tv);
1454 }
1455 
1456 void *
m_pool_alloc(struct pool * pp,int flags,int * slowdown)1457 m_pool_alloc(struct pool *pp, int flags, int *slowdown)
1458 {
1459 	void *v;
1460 
1461 	if (atomic_add_long_nv(&mbuf_mem_alloc, pp->pr_pgsize) > mbuf_mem_limit)
1462 		goto fail;
1463 
1464 	v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
1465 	if (v != NULL)
1466 		return (v);
1467 
1468  fail:
1469 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1470 	return (NULL);
1471 }
1472 
1473 void
m_pool_free(struct pool * pp,void * v)1474 m_pool_free(struct pool *pp, void *v)
1475 {
1476 	(*pool_allocator_multi.pa_free)(pp, v);
1477 
1478 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1479 }
1480 
1481 void
m_pool_init(struct pool * pp,u_int size,u_int align,const char * wmesg)1482 m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
1483 {
1484 	pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
1485 	pool_set_constraints(pp, &kp_dma_contig);
1486 }
1487 
1488 u_int
m_pool_used(void)1489 m_pool_used(void)
1490 {
1491 	return ((mbuf_mem_alloc * 100) / mbuf_mem_limit);
1492 }
1493 
1494 #ifdef DDB
1495 void
m_print(void * v,int (* pr)(const char *,...))1496 m_print(void *v,
1497     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1498 {
1499 	struct mbuf *m = v;
1500 
1501 	(*pr)("mbuf %p\n", m);
1502 	(*pr)("m_type: %i\tm_flags: %b\n", m->m_type, m->m_flags, M_BITS);
1503 	(*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt);
1504 	(*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len);
1505 	(*pr)("m_dat: %p\tm_pktdat: %p\n", m->m_dat, m->m_pktdat);
1506 	if (m->m_flags & M_PKTHDR) {
1507 		(*pr)("m_ptkhdr.ph_ifidx: %u\tm_pkthdr.len: %i\n",
1508 		    m->m_pkthdr.ph_ifidx, m->m_pkthdr.len);
1509 		(*pr)("m_ptkhdr.ph_tags: %p\tm_pkthdr.ph_tagsset: %b\n",
1510 		    SLIST_FIRST(&m->m_pkthdr.ph_tags),
1511 		    m->m_pkthdr.ph_tagsset, MTAG_BITS);
1512 		(*pr)("m_pkthdr.ph_flowid: %u\tm_pkthdr.ph_loopcnt: %u\n",
1513 		    m->m_pkthdr.ph_flowid, m->m_pkthdr.ph_loopcnt);
1514 		(*pr)("m_pkthdr.csum_flags: %b\n",
1515 		    m->m_pkthdr.csum_flags, MCS_BITS);
1516 		(*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n",
1517 		    m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid);
1518 		(*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n",
1519 		    m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp);
1520 		(*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n",
1521 		    m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag);
1522 		(*pr)("m_pkthdr.pf.flags: %b\n",
1523 		    m->m_pkthdr.pf.flags, MPF_BITS);
1524 		(*pr)("m_pkthdr.pf.routed: %u\tm_pkthdr.pf.prio: %u\n",
1525 		    m->m_pkthdr.pf.routed, m->m_pkthdr.pf.prio);
1526 	}
1527 	if (m->m_flags & M_EXT) {
1528 		(*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n",
1529 		    m->m_ext.ext_buf, m->m_ext.ext_size);
1530 		(*pr)("m_ext.ext_free_fn: %u\tm_ext.ext_arg: %p\n",
1531 		    m->m_ext.ext_free_fn, m->m_ext.ext_arg);
1532 		(*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
1533 		    m->m_ext.ext_nextref, m->m_ext.ext_prevref);
1534 
1535 	}
1536 }
1537 
1538 const char *m_types[MT_NTYPES] = {
1539 	"fre",
1540 	"dat",
1541 	"hdr",
1542 	"nam",
1543 	"opt",
1544 	"ftb",
1545 	"ctl",
1546 	"oob",
1547 };
1548 
1549 void
m_print_chain(void * v,int deep,int (* pr)(const char *,...))1550 m_print_chain(void *v, int deep,
1551     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1552 {
1553 	struct mbuf *m;
1554 	const char *indent = deep ? "++-" : "-+-";
1555 	size_t chain = 0, len = 0, size = 0;
1556 
1557 	for (m = v; m != NULL; m = m->m_next) {
1558 		const char *type;
1559 
1560 		chain++;
1561 		len += m->m_len;
1562 		size += M_SIZE(m);
1563 		type = (m->m_type >= 0 && m->m_type < MT_NTYPES) ?
1564 		    m_types[m->m_type] : "???";
1565 		(*pr)("%s mbuf %p, %s, off %zd, len %u", indent, m, type,
1566 		    m->m_data - M_DATABUF(m), m->m_len);
1567 		if (m->m_flags & M_PKTHDR)
1568 			(*pr)(", pktlen %d", m->m_pkthdr.len);
1569 		if (m->m_flags & M_EXT)
1570 			(*pr)(", clsize %u", m->m_ext.ext_size);
1571 		else
1572 			(*pr)(", size %zu",
1573 			    m->m_flags & M_PKTHDR ? MHLEN : MLEN);
1574 		(*pr)("\n");
1575 		indent = deep ? "|+-" : " +-";
1576 	}
1577 	indent = deep ? "|\\-" : " \\-";
1578 	if (v != NULL) {
1579 		(*pr)("%s total chain %zu, len %zu, size %zu\n",
1580 		    indent, chain, len, size);
1581 	}
1582 }
1583 
1584 void
m_print_packet(void * v,int deep,int (* pr)(const char *,...))1585 m_print_packet(void *v, int deep,
1586     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1587 {
1588 	struct mbuf *m, *n;
1589 	const char *indent = "+--";
1590 	size_t pkts = 0;
1591 
1592 	for (m = v; m != NULL; m = m->m_nextpkt) {
1593 		size_t chain = 0, len = 0, size = 0;
1594 
1595 		pkts++;
1596 		if (deep) {
1597 			m_print_chain(m, deep, pr);
1598 			continue;
1599 		}
1600 		for (n = m; n != NULL; n = n->m_next) {
1601 			chain++;
1602 			len += n->m_len;
1603 			size += M_SIZE(n);
1604 		}
1605 		(*pr)("%s mbuf %p, chain %zu", indent, m, chain);
1606 		if (m->m_flags & M_PKTHDR)
1607 			(*pr)(", pktlen %d", m->m_pkthdr.len);
1608 		(*pr)(", len %zu, size %zu\n", len, size);
1609 	}
1610 	indent = "\\--";
1611 	if (v != NULL)
1612 		(*pr)("%s total packets %zu\n", indent, pkts);
1613 }
1614 #endif
1615 
1616 /*
1617  * mbuf lists
1618  */
1619 
1620 void
ml_init(struct mbuf_list * ml)1621 ml_init(struct mbuf_list *ml)
1622 {
1623 	ml->ml_head = ml->ml_tail = NULL;
1624 	ml->ml_len = 0;
1625 }
1626 
1627 void
ml_enqueue(struct mbuf_list * ml,struct mbuf * m)1628 ml_enqueue(struct mbuf_list *ml, struct mbuf *m)
1629 {
1630 	if (ml->ml_tail == NULL)
1631 		ml->ml_head = ml->ml_tail = m;
1632 	else {
1633 		ml->ml_tail->m_nextpkt = m;
1634 		ml->ml_tail = m;
1635 	}
1636 
1637 	m->m_nextpkt = NULL;
1638 	ml->ml_len++;
1639 }
1640 
1641 void
ml_enlist(struct mbuf_list * mla,struct mbuf_list * mlb)1642 ml_enlist(struct mbuf_list *mla, struct mbuf_list *mlb)
1643 {
1644 	if (!ml_empty(mlb)) {
1645 		if (ml_empty(mla))
1646 			mla->ml_head = mlb->ml_head;
1647 		else
1648 			mla->ml_tail->m_nextpkt = mlb->ml_head;
1649 		mla->ml_tail = mlb->ml_tail;
1650 		mla->ml_len += mlb->ml_len;
1651 
1652 		ml_init(mlb);
1653 	}
1654 }
1655 
1656 struct mbuf *
ml_dequeue(struct mbuf_list * ml)1657 ml_dequeue(struct mbuf_list *ml)
1658 {
1659 	struct mbuf *m;
1660 
1661 	m = ml->ml_head;
1662 	if (m != NULL) {
1663 		ml->ml_head = m->m_nextpkt;
1664 		if (ml->ml_head == NULL)
1665 			ml->ml_tail = NULL;
1666 
1667 		m->m_nextpkt = NULL;
1668 		ml->ml_len--;
1669 	}
1670 
1671 	return (m);
1672 }
1673 
1674 struct mbuf *
ml_dechain(struct mbuf_list * ml)1675 ml_dechain(struct mbuf_list *ml)
1676 {
1677 	struct mbuf *m0;
1678 
1679 	m0 = ml->ml_head;
1680 
1681 	ml_init(ml);
1682 
1683 	return (m0);
1684 }
1685 
1686 unsigned int
ml_purge(struct mbuf_list * ml)1687 ml_purge(struct mbuf_list *ml)
1688 {
1689 	struct mbuf *m, *n;
1690 	unsigned int len;
1691 
1692 	for (m = ml->ml_head; m != NULL; m = n) {
1693 		n = m->m_nextpkt;
1694 		m_freem(m);
1695 	}
1696 
1697 	len = ml->ml_len;
1698 	ml_init(ml);
1699 
1700 	return (len);
1701 }
1702 
1703 unsigned int
ml_hdatalen(struct mbuf_list * ml)1704 ml_hdatalen(struct mbuf_list *ml)
1705 {
1706 	struct mbuf *m;
1707 
1708 	m = ml->ml_head;
1709 	if (m == NULL)
1710 		return (0);
1711 
1712 	KASSERT(ISSET(m->m_flags, M_PKTHDR));
1713 	return (m->m_pkthdr.len);
1714 }
1715 
1716 /*
1717  * mbuf queues
1718  */
1719 
1720 void
mq_init(struct mbuf_queue * mq,u_int maxlen,int ipl)1721 mq_init(struct mbuf_queue *mq, u_int maxlen, int ipl)
1722 {
1723 	mtx_init(&mq->mq_mtx, ipl);
1724 	ml_init(&mq->mq_list);
1725 	mq->mq_maxlen = maxlen;
1726 }
1727 
1728 int
mq_push(struct mbuf_queue * mq,struct mbuf * m)1729 mq_push(struct mbuf_queue *mq, struct mbuf *m)
1730 {
1731 	struct mbuf *dropped = NULL;
1732 
1733 	mtx_enter(&mq->mq_mtx);
1734 	if (mq_len(mq) >= mq->mq_maxlen) {
1735 		mq->mq_drops++;
1736 		dropped = ml_dequeue(&mq->mq_list);
1737 	}
1738 	ml_enqueue(&mq->mq_list, m);
1739 	mtx_leave(&mq->mq_mtx);
1740 
1741 	if (dropped)
1742 		m_freem(dropped);
1743 
1744 	return (dropped != NULL);
1745 }
1746 
1747 int
mq_enqueue(struct mbuf_queue * mq,struct mbuf * m)1748 mq_enqueue(struct mbuf_queue *mq, struct mbuf *m)
1749 {
1750 	int dropped = 0;
1751 
1752 	mtx_enter(&mq->mq_mtx);
1753 	if (mq_len(mq) < mq->mq_maxlen)
1754 		ml_enqueue(&mq->mq_list, m);
1755 	else {
1756 		mq->mq_drops++;
1757 		dropped = 1;
1758 	}
1759 	mtx_leave(&mq->mq_mtx);
1760 
1761 	if (dropped)
1762 		m_freem(m);
1763 
1764 	return (dropped);
1765 }
1766 
1767 struct mbuf *
mq_dequeue(struct mbuf_queue * mq)1768 mq_dequeue(struct mbuf_queue *mq)
1769 {
1770 	struct mbuf *m;
1771 
1772 	mtx_enter(&mq->mq_mtx);
1773 	m = ml_dequeue(&mq->mq_list);
1774 	mtx_leave(&mq->mq_mtx);
1775 
1776 	return (m);
1777 }
1778 
1779 int
mq_enlist(struct mbuf_queue * mq,struct mbuf_list * ml)1780 mq_enlist(struct mbuf_queue *mq, struct mbuf_list *ml)
1781 {
1782 	struct mbuf *m;
1783 	int dropped = 0;
1784 
1785 	mtx_enter(&mq->mq_mtx);
1786 	if (mq_len(mq) < mq->mq_maxlen)
1787 		ml_enlist(&mq->mq_list, ml);
1788 	else {
1789 		dropped = ml_len(ml);
1790 		mq->mq_drops += dropped;
1791 	}
1792 	mtx_leave(&mq->mq_mtx);
1793 
1794 	if (dropped) {
1795 		while ((m = ml_dequeue(ml)) != NULL)
1796 			m_freem(m);
1797 	}
1798 
1799 	return (dropped);
1800 }
1801 
1802 void
mq_delist(struct mbuf_queue * mq,struct mbuf_list * ml)1803 mq_delist(struct mbuf_queue *mq, struct mbuf_list *ml)
1804 {
1805 	mtx_enter(&mq->mq_mtx);
1806 	*ml = mq->mq_list;
1807 	ml_init(&mq->mq_list);
1808 	mtx_leave(&mq->mq_mtx);
1809 }
1810 
1811 struct mbuf *
mq_dechain(struct mbuf_queue * mq)1812 mq_dechain(struct mbuf_queue *mq)
1813 {
1814 	struct mbuf *m0;
1815 
1816 	mtx_enter(&mq->mq_mtx);
1817 	m0 = ml_dechain(&mq->mq_list);
1818 	mtx_leave(&mq->mq_mtx);
1819 
1820 	return (m0);
1821 }
1822 
1823 unsigned int
mq_purge(struct mbuf_queue * mq)1824 mq_purge(struct mbuf_queue *mq)
1825 {
1826 	struct mbuf_list ml;
1827 
1828 	mq_delist(mq, &ml);
1829 
1830 	return (ml_purge(&ml));
1831 }
1832 
1833 unsigned int
mq_hdatalen(struct mbuf_queue * mq)1834 mq_hdatalen(struct mbuf_queue *mq)
1835 {
1836 	unsigned int hdatalen;
1837 
1838 	mtx_enter(&mq->mq_mtx);
1839 	hdatalen = ml_hdatalen(&mq->mq_list);
1840 	mtx_leave(&mq->mq_mtx);
1841 
1842 	return (hdatalen);
1843 }
1844 
1845 void
mq_set_maxlen(struct mbuf_queue * mq,u_int maxlen)1846 mq_set_maxlen(struct mbuf_queue *mq, u_int maxlen)
1847 {
1848 	mtx_enter(&mq->mq_mtx);
1849 	mq->mq_maxlen = maxlen;
1850 	mtx_leave(&mq->mq_mtx);
1851 }
1852 
1853 int
sysctl_mq(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen,struct mbuf_queue * mq)1854 sysctl_mq(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1855     void *newp, size_t newlen, struct mbuf_queue *mq)
1856 {
1857 	unsigned int maxlen;
1858 	int error;
1859 
1860 	/* All sysctl names at this level are terminal. */
1861 	if (namelen != 1)
1862 		return (ENOTDIR);
1863 
1864 	switch (name[0]) {
1865 	case IFQCTL_LEN:
1866 		return (sysctl_rdint(oldp, oldlenp, newp, mq_len(mq)));
1867 	case IFQCTL_MAXLEN:
1868 		maxlen = mq->mq_maxlen;
1869 		error = sysctl_int(oldp, oldlenp, newp, newlen, &maxlen);
1870 		if (error == 0)
1871 			mq_set_maxlen(mq, maxlen);
1872 		return (error);
1873 	case IFQCTL_DROPS:
1874 		return (sysctl_rdint(oldp, oldlenp, newp, mq_drops(mq)));
1875 	default:
1876 		return (EOPNOTSUPP);
1877 	}
1878 	/* NOTREACHED */
1879 }
1880