xref: /openbsd/sys/kern/uipc_mbuf.c (revision 0bf85d46)
1 /*	$OpenBSD: uipc_mbuf.c,v 1.290 2024/03/05 18:52:41 bluhm Exp $	*/
2 /*	$NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
33  */
34 
35 /*
36  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
37  *
38  * NRL grants permission for redistribution and use in source and binary
39  * forms, with or without modification, of the software and documentation
40  * created at NRL provided that the following conditions are met:
41  *
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgements:
49  *	This product includes software developed by the University of
50  *	California, Berkeley and its contributors.
51  *	This product includes software developed at the Information
52  *	Technology Division, US Naval Research Laboratory.
53  * 4. Neither the name of the NRL nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
61  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68  *
69  * The views and conclusions contained in the software and documentation
70  * are those of the authors and should not be interpreted as representing
71  * official policies, either expressed or implied, of the US Naval
72  * Research Laboratory (NRL).
73  */
74 
75 #include "pf.h"
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/atomic.h>
80 #include <sys/mbuf.h>
81 #include <sys/pool.h>
82 #include <sys/percpu.h>
83 #include <sys/sysctl.h>
84 
85 #include <sys/socket.h>
86 #include <net/if.h>
87 
88 
89 #include <uvm/uvm_extern.h>
90 
91 #ifdef DDB
92 #include <machine/db_machdep.h>
93 #endif
94 
95 #if NPF > 0
96 #include <net/pfvar.h>
97 #endif	/* NPF > 0 */
98 
99 /* mbuf stats */
100 COUNTERS_BOOT_MEMORY(mbstat_boot, MBSTAT_COUNT);
101 struct cpumem *mbstat = COUNTERS_BOOT_INITIALIZER(mbstat_boot);
102 /* mbuf pools */
103 struct	pool mbpool;
104 struct	pool mtagpool;
105 
106 /* mbuf cluster pools */
107 u_int	mclsizes[MCLPOOLS] = {
108 	MCLBYTES,	/* must be at slot 0 */
109 	MCLBYTES + 2,	/* ETHER_ALIGNED 2k mbufs */
110 	4 * 1024,
111 	8 * 1024,
112 	9 * 1024,
113 	12 * 1024,
114 	16 * 1024,
115 	64 * 1024
116 };
117 static	char mclnames[MCLPOOLS][8];
118 struct	pool mclpools[MCLPOOLS];
119 
120 struct pool *m_clpool(u_int);
121 
122 int max_linkhdr;		/* largest link-level header */
123 int max_protohdr;		/* largest protocol header */
124 int max_hdr;			/* largest link+protocol header */
125 
126 struct	mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
127 
128 void	m_extfree(struct mbuf *);
129 void	m_zero(struct mbuf *);
130 
131 unsigned long mbuf_mem_limit;	/* how much memory can be allocated */
132 unsigned long mbuf_mem_alloc;	/* how much memory has been allocated */
133 
134 void	*m_pool_alloc(struct pool *, int, int *);
135 void	m_pool_free(struct pool *, void *);
136 
137 struct pool_allocator m_pool_allocator = {
138 	m_pool_alloc,
139 	m_pool_free,
140 	0 /* will be copied from pool_allocator_multi */
141 };
142 
143 static void (*mextfree_fns[4])(caddr_t, u_int, void *);
144 static u_int num_extfree_fns;
145 
146 #define M_DATABUF(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \
147 			(m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat)
148 #define M_SIZE(m)	((m)->m_flags & M_EXT ? (m)->m_ext.ext_size : \
149 			(m)->m_flags & M_PKTHDR ? MHLEN : MLEN)
150 
151 /*
152  * Initialize the mbuf allocator.
153  */
154 void
mbinit(void)155 mbinit(void)
156 {
157 	int i, error;
158 	unsigned int lowbits;
159 
160 	CTASSERT(MSIZE == sizeof(struct mbuf));
161 
162 	m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
163 
164 	mbuf_mem_alloc = 0;
165 
166 #if DIAGNOSTIC
167 	if (mclsizes[0] != MCLBYTES)
168 		panic("mbinit: the smallest cluster size != MCLBYTES");
169 	if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES)
170 		panic("mbinit: the largest cluster size != MAXMCLBYTES");
171 #endif
172 
173 	m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
174 
175 	pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
176 	    IPL_NET, 0, "mtagpl", NULL);
177 
178 	for (i = 0; i < nitems(mclsizes); i++) {
179 		lowbits = mclsizes[i] & ((1 << 10) - 1);
180 		if (lowbits) {
181 			snprintf(mclnames[i], sizeof(mclnames[0]),
182 			    "mcl%dk%u", mclsizes[i] >> 10, lowbits);
183 		} else {
184 			snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
185 			    mclsizes[i] >> 10);
186 		}
187 
188 		m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
189 	}
190 
191 	error = nmbclust_update(nmbclust);
192 	KASSERT(error == 0);
193 
194 	(void)mextfree_register(m_extfree_pool);
195 	KASSERT(num_extfree_fns == 1);
196 }
197 
198 void
mbcpuinit(void)199 mbcpuinit(void)
200 {
201 	int i;
202 
203 	mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT);
204 
205 	pool_cache_init(&mbpool);
206 	pool_cache_init(&mtagpool);
207 
208 	for (i = 0; i < nitems(mclsizes); i++)
209 		pool_cache_init(&mclpools[i]);
210 }
211 
212 int
nmbclust_update(long newval)213 nmbclust_update(long newval)
214 {
215 	int i;
216 
217 	if (newval <= 0 || newval > LONG_MAX / MCLBYTES)
218 		return ERANGE;
219 	/* update the global mbuf memory limit */
220 	nmbclust = newval;
221 	mbuf_mem_limit = nmbclust * MCLBYTES;
222 
223 	pool_wakeup(&mbpool);
224 	for (i = 0; i < nitems(mclsizes); i++)
225 		pool_wakeup(&mclpools[i]);
226 
227 	return 0;
228 }
229 
230 /*
231  * Space allocation routines.
232  */
233 struct mbuf *
m_get(int nowait,int type)234 m_get(int nowait, int type)
235 {
236 	struct mbuf *m;
237 	struct counters_ref cr;
238 	uint64_t *counters;
239 	int s;
240 
241 	KASSERT(type >= 0 && type < MT_NTYPES);
242 
243 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
244 	if (m == NULL)
245 		return (NULL);
246 
247 	s = splnet();
248 	counters = counters_enter(&cr, mbstat);
249 	counters[type]++;
250 	counters_leave(&cr, mbstat);
251 	splx(s);
252 
253 	m->m_type = type;
254 	m->m_next = NULL;
255 	m->m_nextpkt = NULL;
256 	m->m_data = m->m_dat;
257 	m->m_flags = 0;
258 
259 	return (m);
260 }
261 
262 /*
263  * ATTN: When changing anything here check m_inithdr() and m_defrag() those
264  * may need to change as well.
265  */
266 struct mbuf *
m_gethdr(int nowait,int type)267 m_gethdr(int nowait, int type)
268 {
269 	struct mbuf *m;
270 	struct counters_ref cr;
271 	uint64_t *counters;
272 	int s;
273 
274 	KASSERT(type >= 0 && type < MT_NTYPES);
275 
276 	m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
277 	if (m == NULL)
278 		return (NULL);
279 
280 	s = splnet();
281 	counters = counters_enter(&cr, mbstat);
282 	counters[type]++;
283 	counters_leave(&cr, mbstat);
284 	splx(s);
285 
286 	m->m_type = type;
287 
288 	return (m_inithdr(m));
289 }
290 
291 struct mbuf *
m_inithdr(struct mbuf * m)292 m_inithdr(struct mbuf *m)
293 {
294 	/* keep in sync with m_gethdr */
295 	m->m_next = NULL;
296 	m->m_nextpkt = NULL;
297 	m->m_data = m->m_pktdat;
298 	m->m_flags = M_PKTHDR;
299 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
300 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
301 
302 	return (m);
303 }
304 
305 static inline void
m_clearhdr(struct mbuf * m)306 m_clearhdr(struct mbuf *m)
307 {
308 	/* delete all mbuf tags to reset the state */
309 	m_tag_delete_chain(m);
310 #if NPF > 0
311 	pf_mbuf_unlink_state_key(m);
312 	pf_mbuf_unlink_inpcb(m);
313 #endif	/* NPF > 0 */
314 
315 	memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
316 }
317 
318 void
m_removehdr(struct mbuf * m)319 m_removehdr(struct mbuf *m)
320 {
321 	KASSERT(m->m_flags & M_PKTHDR);
322 	m_clearhdr(m);
323 	m->m_flags &= ~M_PKTHDR;
324 }
325 
326 void
m_resethdr(struct mbuf * m)327 m_resethdr(struct mbuf *m)
328 {
329 	int len = m->m_pkthdr.len;
330 	u_int8_t loopcnt = m->m_pkthdr.ph_loopcnt;
331 
332 	KASSERT(m->m_flags & M_PKTHDR);
333 	m->m_flags &= (M_EXT|M_PKTHDR|M_EOR|M_EXTWR|M_ZEROIZE);
334 	m_clearhdr(m);
335 	/* like m_inithdr(), but keep any associated data and mbufs */
336 	m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
337 	m->m_pkthdr.len = len;
338 	m->m_pkthdr.ph_loopcnt = loopcnt;
339 }
340 
341 void
m_calchdrlen(struct mbuf * m)342 m_calchdrlen(struct mbuf *m)
343 {
344 	struct mbuf *n;
345 	int plen = 0;
346 
347 	KASSERT(m->m_flags & M_PKTHDR);
348 	for (n = m; n; n = n->m_next)
349 		plen += n->m_len;
350 	m->m_pkthdr.len = plen;
351 }
352 
353 struct mbuf *
m_getclr(int nowait,int type)354 m_getclr(int nowait, int type)
355 {
356 	struct mbuf *m;
357 
358 	MGET(m, nowait, type);
359 	if (m == NULL)
360 		return (NULL);
361 	memset(mtod(m, caddr_t), 0, MLEN);
362 	return (m);
363 }
364 
365 struct pool *
m_clpool(u_int pktlen)366 m_clpool(u_int pktlen)
367 {
368 	struct pool *pp;
369 	int pi;
370 
371 	for (pi = 0; pi < nitems(mclpools); pi++) {
372 		pp = &mclpools[pi];
373 		if (pktlen <= pp->pr_size)
374 			return (pp);
375 	}
376 
377 	return (NULL);
378 }
379 
380 struct mbuf *
m_clget(struct mbuf * m,int how,u_int pktlen)381 m_clget(struct mbuf *m, int how, u_int pktlen)
382 {
383 	struct mbuf *m0 = NULL;
384 	struct pool *pp;
385 	caddr_t buf;
386 
387 	pp = m_clpool(pktlen);
388 #ifdef DIAGNOSTIC
389 	if (pp == NULL)
390 		panic("m_clget: request for %u byte cluster", pktlen);
391 #endif
392 
393 	if (m == NULL) {
394 		m0 = m_gethdr(how, MT_DATA);
395 		if (m0 == NULL)
396 			return (NULL);
397 
398 		m = m0;
399 	}
400 	buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT);
401 	if (buf == NULL) {
402 		m_freem(m0);
403 		return (NULL);
404 	}
405 
406 	MEXTADD(m, buf, pp->pr_size, M_EXTWR, MEXTFREE_POOL, pp);
407 	return (m);
408 }
409 
410 void
m_extfree_pool(caddr_t buf,u_int size,void * pp)411 m_extfree_pool(caddr_t buf, u_int size, void *pp)
412 {
413 	pool_put(pp, buf);
414 }
415 
416 struct mbuf *
m_free(struct mbuf * m)417 m_free(struct mbuf *m)
418 {
419 	struct mbuf *n;
420 	struct counters_ref cr;
421 	uint64_t *counters;
422 	int s;
423 
424 	if (m == NULL)
425 		return (NULL);
426 
427 	s = splnet();
428 	counters = counters_enter(&cr, mbstat);
429 	counters[m->m_type]--;
430 	counters_leave(&cr, mbstat);
431 	splx(s);
432 
433 	n = m->m_next;
434 	if (m->m_flags & M_ZEROIZE) {
435 		m_zero(m);
436 		/* propagate M_ZEROIZE to the next mbuf in the chain */
437 		if (n)
438 			n->m_flags |= M_ZEROIZE;
439 	}
440 	if (m->m_flags & M_PKTHDR) {
441 		m_tag_delete_chain(m);
442 #if NPF > 0
443 		pf_mbuf_unlink_state_key(m);
444 		pf_mbuf_unlink_inpcb(m);
445 #endif	/* NPF > 0 */
446 	}
447 	if (m->m_flags & M_EXT)
448 		m_extfree(m);
449 
450 	pool_put(&mbpool, m);
451 
452 	return (n);
453 }
454 
455 void
m_extref(struct mbuf * o,struct mbuf * n)456 m_extref(struct mbuf *o, struct mbuf *n)
457 {
458 	int refs = MCLISREFERENCED(o);
459 
460 	n->m_flags |= o->m_flags & (M_EXT|M_EXTWR);
461 
462 	if (refs)
463 		mtx_enter(&m_extref_mtx);
464 	n->m_ext.ext_nextref = o->m_ext.ext_nextref;
465 	n->m_ext.ext_prevref = o;
466 	o->m_ext.ext_nextref = n;
467 	n->m_ext.ext_nextref->m_ext.ext_prevref = n;
468 	if (refs)
469 		mtx_leave(&m_extref_mtx);
470 
471 	MCLREFDEBUGN((n), __FILE__, __LINE__);
472 }
473 
474 static inline u_int
m_extunref(struct mbuf * m)475 m_extunref(struct mbuf *m)
476 {
477 	int refs = 0;
478 
479 	if (!MCLISREFERENCED(m))
480 		return (0);
481 
482 	mtx_enter(&m_extref_mtx);
483 	if (MCLISREFERENCED(m)) {
484 		m->m_ext.ext_nextref->m_ext.ext_prevref =
485 		    m->m_ext.ext_prevref;
486 		m->m_ext.ext_prevref->m_ext.ext_nextref =
487 		    m->m_ext.ext_nextref;
488 		refs = 1;
489 	}
490 	mtx_leave(&m_extref_mtx);
491 
492 	return (refs);
493 }
494 
495 /*
496  * Returns a number for use with MEXTADD.
497  * Should only be called once per function.
498  * Drivers can be assured that the index will be non zero.
499  */
500 u_int
mextfree_register(void (* fn)(caddr_t,u_int,void *))501 mextfree_register(void (*fn)(caddr_t, u_int, void *))
502 {
503 	KASSERT(num_extfree_fns < nitems(mextfree_fns));
504 	mextfree_fns[num_extfree_fns] = fn;
505 	return num_extfree_fns++;
506 }
507 
508 void
m_extfree(struct mbuf * m)509 m_extfree(struct mbuf *m)
510 {
511 	if (m_extunref(m) == 0) {
512 		KASSERT(m->m_ext.ext_free_fn < num_extfree_fns);
513 		mextfree_fns[m->m_ext.ext_free_fn](m->m_ext.ext_buf,
514 		    m->m_ext.ext_size, m->m_ext.ext_arg);
515 	}
516 
517 	m->m_flags &= ~(M_EXT|M_EXTWR);
518 }
519 
520 struct mbuf *
m_freem(struct mbuf * m)521 m_freem(struct mbuf *m)
522 {
523 	struct mbuf *n;
524 
525 	if (m == NULL)
526 		return (NULL);
527 
528 	n = m->m_nextpkt;
529 
530 	do
531 		m = m_free(m);
532 	while (m != NULL);
533 
534 	return (n);
535 }
536 
537 void
m_purge(struct mbuf * m)538 m_purge(struct mbuf *m)
539 {
540 	while (m != NULL)
541 		m = m_freem(m);
542 }
543 
544 /*
545  * mbuf chain defragmenter. This function uses some evil tricks to defragment
546  * an mbuf chain into a single buffer without changing the mbuf pointer.
547  * This needs to know a lot of the mbuf internals to make this work.
548  * The resulting mbuf is not aligned to IP header to assist DMA transfers.
549  */
550 int
m_defrag(struct mbuf * m,int how)551 m_defrag(struct mbuf *m, int how)
552 {
553 	struct mbuf *m0;
554 
555 	if (m->m_next == NULL)
556 		return (0);
557 
558 	KASSERT(m->m_flags & M_PKTHDR);
559 
560 	if ((m0 = m_gethdr(how, m->m_type)) == NULL)
561 		return (ENOBUFS);
562 	if (m->m_pkthdr.len > MHLEN) {
563 		MCLGETL(m0, how, m->m_pkthdr.len);
564 		if (!(m0->m_flags & M_EXT)) {
565 			m_free(m0);
566 			return (ENOBUFS);
567 		}
568 	}
569 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
570 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
571 
572 	/* free chain behind and possible ext buf on the first mbuf */
573 	m_freem(m->m_next);
574 	m->m_next = NULL;
575 	if (m->m_flags & M_EXT)
576 		m_extfree(m);
577 
578 	/*
579 	 * Bounce copy mbuf over to the original mbuf and set everything up.
580 	 * This needs to reset or clear all pointers that may go into the
581 	 * original mbuf chain.
582 	 */
583 	if (m0->m_flags & M_EXT) {
584 		memcpy(&m->m_ext, &m0->m_ext, sizeof(struct mbuf_ext));
585 		MCLINITREFERENCE(m);
586 		m->m_flags |= m0->m_flags & (M_EXT|M_EXTWR);
587 		m->m_data = m->m_ext.ext_buf;
588 	} else {
589 		m->m_data = m->m_pktdat;
590 		memcpy(m->m_data, m0->m_data, m0->m_len);
591 	}
592 	m->m_pkthdr.len = m->m_len = m0->m_len;
593 
594 	m0->m_flags &= ~(M_EXT|M_EXTWR);	/* cluster is gone */
595 	m_free(m0);
596 
597 	return (0);
598 }
599 
600 /*
601  * Mbuffer utility routines.
602  */
603 
604 /*
605  * Ensure len bytes of contiguous space at the beginning of the mbuf chain
606  */
607 struct mbuf *
m_prepend(struct mbuf * m,int len,int how)608 m_prepend(struct mbuf *m, int len, int how)
609 {
610 	struct mbuf *mn;
611 
612 	if (len > MHLEN)
613 		panic("mbuf prepend length too big");
614 
615 	if (m_leadingspace(m) >= len) {
616 		m->m_data -= len;
617 		m->m_len += len;
618 	} else {
619 		MGET(mn, how, m->m_type);
620 		if (mn == NULL) {
621 			m_freem(m);
622 			return (NULL);
623 		}
624 		if (m->m_flags & M_PKTHDR)
625 			M_MOVE_PKTHDR(mn, m);
626 		mn->m_next = m;
627 		m = mn;
628 		m_align(m, len);
629 		m->m_len = len;
630 	}
631 	if (m->m_flags & M_PKTHDR)
632 		m->m_pkthdr.len += len;
633 	return (m);
634 }
635 
636 /*
637  * Make a copy of an mbuf chain starting "off" bytes from the beginning,
638  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
639  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
640  */
641 struct mbuf *
m_copym(struct mbuf * m0,int off,int len,int wait)642 m_copym(struct mbuf *m0, int off, int len, int wait)
643 {
644 	struct mbuf *m, *n, **np;
645 	struct mbuf *top;
646 	int copyhdr = 0;
647 
648 	if (off < 0 || len < 0)
649 		panic("m_copym0: off %d, len %d", off, len);
650 	if (off == 0 && m0->m_flags & M_PKTHDR)
651 		copyhdr = 1;
652 	if ((m = m_getptr(m0, off, &off)) == NULL)
653 		panic("m_copym0: short mbuf chain");
654 	np = &top;
655 	top = NULL;
656 	while (len > 0) {
657 		if (m == NULL) {
658 			if (len != M_COPYALL)
659 				panic("m_copym0: m == NULL and not COPYALL");
660 			break;
661 		}
662 		MGET(n, wait, m->m_type);
663 		*np = n;
664 		if (n == NULL)
665 			goto nospace;
666 		if (copyhdr) {
667 			if (m_dup_pkthdr(n, m0, wait))
668 				goto nospace;
669 			if (len != M_COPYALL)
670 				n->m_pkthdr.len = len;
671 			copyhdr = 0;
672 		}
673 		n->m_len = min(len, m->m_len - off);
674 		if (m->m_flags & M_EXT) {
675 			n->m_data = m->m_data + off;
676 			n->m_ext = m->m_ext;
677 			MCLADDREFERENCE(m, n);
678 		} else {
679 			n->m_data += m->m_data -
680 			    (m->m_flags & M_PKTHDR ? m->m_pktdat : m->m_dat);
681 			n->m_data += off;
682 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
683 			    n->m_len);
684 		}
685 		if (len != M_COPYALL)
686 			len -= n->m_len;
687 		off += n->m_len;
688 #ifdef DIAGNOSTIC
689 		if (off > m->m_len)
690 			panic("m_copym0 overrun");
691 #endif
692 		if (off == m->m_len) {
693 			m = m->m_next;
694 			off = 0;
695 		}
696 		np = &n->m_next;
697 	}
698 	return (top);
699 nospace:
700 	m_freem(top);
701 	return (NULL);
702 }
703 
704 /*
705  * Copy data from an mbuf chain starting "off" bytes from the beginning,
706  * continuing for "len" bytes, into the indicated buffer.
707  */
708 void
m_copydata(struct mbuf * m,int off,int len,void * p)709 m_copydata(struct mbuf *m, int off, int len, void *p)
710 {
711 	caddr_t cp = p;
712 	unsigned count;
713 
714 	if (off < 0)
715 		panic("m_copydata: off %d < 0", off);
716 	if (len < 0)
717 		panic("m_copydata: len %d < 0", len);
718 	if ((m = m_getptr(m, off, &off)) == NULL)
719 		panic("m_copydata: short mbuf chain");
720 	while (len > 0) {
721 		if (m == NULL)
722 			panic("m_copydata: null mbuf");
723 		count = min(m->m_len - off, len);
724 		memmove(cp, mtod(m, caddr_t) + off, count);
725 		len -= count;
726 		cp += count;
727 		off = 0;
728 		m = m->m_next;
729 	}
730 }
731 
732 /*
733  * Copy data from a buffer back into the indicated mbuf chain,
734  * starting "off" bytes from the beginning, extending the mbuf
735  * chain if necessary. The mbuf needs to be properly initialized
736  * including the setting of m_len.
737  */
738 int
m_copyback(struct mbuf * m0,int off,int len,const void * _cp,int wait)739 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait)
740 {
741 	int mlen, totlen = 0;
742 	struct mbuf *m = m0, *n;
743 	caddr_t cp = (caddr_t)_cp;
744 	int error = 0;
745 
746 	if (m0 == NULL)
747 		return (0);
748 	while (off > (mlen = m->m_len)) {
749 		off -= mlen;
750 		totlen += mlen;
751 		if (m->m_next == NULL) {
752 			if ((n = m_get(wait, m->m_type)) == NULL) {
753 				error = ENOBUFS;
754 				goto out;
755 			}
756 
757 			if (off + len > MLEN) {
758 				MCLGETL(n, wait, off + len);
759 				if (!(n->m_flags & M_EXT)) {
760 					m_free(n);
761 					error = ENOBUFS;
762 					goto out;
763 				}
764 			}
765 			memset(mtod(n, caddr_t), 0, off);
766 			n->m_len = len + off;
767 			m->m_next = n;
768 		}
769 		m = m->m_next;
770 	}
771 	while (len > 0) {
772 		/* extend last packet to be filled fully */
773 		if (m->m_next == NULL && (len > m->m_len - off))
774 			m->m_len += min(len - (m->m_len - off),
775 			    m_trailingspace(m));
776 		mlen = min(m->m_len - off, len);
777 		memmove(mtod(m, caddr_t) + off, cp, mlen);
778 		cp += mlen;
779 		len -= mlen;
780 		totlen += mlen + off;
781 		if (len == 0)
782 			break;
783 		off = 0;
784 
785 		if (m->m_next == NULL) {
786 			if ((n = m_get(wait, m->m_type)) == NULL) {
787 				error = ENOBUFS;
788 				goto out;
789 			}
790 
791 			if (len > MLEN) {
792 				MCLGETL(n, wait, len);
793 				if (!(n->m_flags & M_EXT)) {
794 					m_free(n);
795 					error = ENOBUFS;
796 					goto out;
797 				}
798 			}
799 			n->m_len = len;
800 			m->m_next = n;
801 		}
802 		m = m->m_next;
803 	}
804 out:
805 	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
806 		m->m_pkthdr.len = totlen;
807 
808 	return (error);
809 }
810 
811 /*
812  * Concatenate mbuf chain n to m.
813  * n might be copied into m (when n->m_len is small), therefore data portion of
814  * n could be copied into an mbuf of different mbuf type.
815  * Therefore both chains should be of the same type (e.g. MT_DATA).
816  * Any m_pkthdr is not updated.
817  */
818 void
m_cat(struct mbuf * m,struct mbuf * n)819 m_cat(struct mbuf *m, struct mbuf *n)
820 {
821 	while (m->m_next)
822 		m = m->m_next;
823 	while (n) {
824 		if (M_READONLY(m) || n->m_len > m_trailingspace(m)) {
825 			/* just join the two chains */
826 			m->m_next = n;
827 			return;
828 		}
829 		/* splat the data from one into the other */
830 		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
831 		    n->m_len);
832 		m->m_len += n->m_len;
833 		n = m_free(n);
834 	}
835 }
836 
837 void
m_adj(struct mbuf * mp,int req_len)838 m_adj(struct mbuf *mp, int req_len)
839 {
840 	int len = req_len;
841 	struct mbuf *m;
842 	int count;
843 
844 	if (mp == NULL)
845 		return;
846 	if (len >= 0) {
847 		/*
848 		 * Trim from head.
849 		 */
850 		m = mp;
851 		while (m != NULL && len > 0) {
852 			if (m->m_len <= len) {
853 				len -= m->m_len;
854 				m->m_data += m->m_len;
855 				m->m_len = 0;
856 				m = m->m_next;
857 			} else {
858 				m->m_data += len;
859 				m->m_len -= len;
860 				len = 0;
861 			}
862 		}
863 		if (mp->m_flags & M_PKTHDR)
864 			mp->m_pkthdr.len -= (req_len - len);
865 	} else {
866 		/*
867 		 * Trim from tail.  Scan the mbuf chain,
868 		 * calculating its length and finding the last mbuf.
869 		 * If the adjustment only affects this mbuf, then just
870 		 * adjust and return.  Otherwise, rescan and truncate
871 		 * after the remaining size.
872 		 */
873 		len = -len;
874 		count = 0;
875 		m = mp;
876 		for (;;) {
877 			count += m->m_len;
878 			if (m->m_next == NULL)
879 				break;
880 			m = m->m_next;
881 		}
882 		if (m->m_len >= len) {
883 			m->m_len -= len;
884 			if (mp->m_flags & M_PKTHDR)
885 				mp->m_pkthdr.len -= len;
886 			return;
887 		}
888 		count -= len;
889 		if (count < 0)
890 			count = 0;
891 		/*
892 		 * Correct length for chain is "count".
893 		 * Find the mbuf with last data, adjust its length,
894 		 * and toss data from remaining mbufs on chain.
895 		 */
896 		if (mp->m_flags & M_PKTHDR)
897 			mp->m_pkthdr.len = count;
898 		m = mp;
899 		for (;;) {
900 			if (m->m_len >= count) {
901 				m->m_len = count;
902 				break;
903 			}
904 			count -= m->m_len;
905 			m = m->m_next;
906 		}
907 		while ((m = m->m_next) != NULL)
908 			m->m_len = 0;
909 	}
910 }
911 
912 /*
913  * Rearrange an mbuf chain so that len bytes are contiguous
914  * and in the data area of an mbuf (so that mtod will work
915  * for a structure of size len).  Returns the resulting
916  * mbuf chain on success, frees it and returns null on failure.
917  */
918 struct mbuf *
m_pullup(struct mbuf * m0,int len)919 m_pullup(struct mbuf *m0, int len)
920 {
921 	struct mbuf *m;
922 	unsigned int adj;
923 	caddr_t head, tail;
924 	unsigned int space;
925 
926 	/* if len is already contig in m0, then don't do any work */
927 	if (len <= m0->m_len)
928 		return (m0);
929 
930 	/* look for some data */
931 	m = m0->m_next;
932 	if (m == NULL)
933 		goto freem0;
934 
935 	head = M_DATABUF(m0);
936 	if (m0->m_len == 0) {
937 		while (m->m_len == 0) {
938 			m = m_free(m);
939 			if (m == NULL)
940 				goto freem0;
941 		}
942 
943 		adj = mtod(m, unsigned long) & (sizeof(long) - 1);
944 	} else
945 		adj = mtod(m0, unsigned long) & (sizeof(long) - 1);
946 
947 	tail = head + M_SIZE(m0);
948 	head += adj;
949 
950 	if (!M_READONLY(m0) && len <= tail - head) {
951 		/* we can copy everything into the first mbuf */
952 		if (m0->m_len == 0) {
953 			m0->m_data = head;
954 		} else if (len > tail - mtod(m0, caddr_t)) {
955 			/* need to memmove to make space at the end */
956 			memmove(head, mtod(m0, caddr_t), m0->m_len);
957 			m0->m_data = head;
958 		}
959 
960 		len -= m0->m_len;
961 	} else {
962 		/* the first mbuf is too small or read-only, make a new one */
963 		space = adj + len;
964 
965 		if (space > MAXMCLBYTES)
966 			goto bad;
967 
968 		m0->m_next = m;
969 		m = m0;
970 
971 		MGET(m0, M_DONTWAIT, m->m_type);
972 		if (m0 == NULL)
973 			goto bad;
974 
975 		if (space > MHLEN) {
976 			MCLGETL(m0, M_DONTWAIT, space);
977 			if ((m0->m_flags & M_EXT) == 0)
978 				goto bad;
979 		}
980 
981 		if (m->m_flags & M_PKTHDR)
982 			M_MOVE_PKTHDR(m0, m);
983 
984 		m0->m_len = 0;
985 		m0->m_data += adj;
986 	}
987 
988 	KDASSERT(m_trailingspace(m0) >= len);
989 
990 	for (;;) {
991 		space = min(len, m->m_len);
992 		memcpy(mtod(m0, caddr_t) + m0->m_len, mtod(m, caddr_t), space);
993 		len -= space;
994 		m0->m_len += space;
995 		m->m_len -= space;
996 
997 		if (m->m_len > 0)
998 			m->m_data += space;
999 		else
1000 			m = m_free(m);
1001 
1002 		if (len == 0)
1003 			break;
1004 
1005 		if (m == NULL)
1006 			goto bad;
1007 	}
1008 
1009 	m0->m_next = m; /* link the chain back up */
1010 
1011 	return (m0);
1012 
1013 bad:
1014 	m_freem(m);
1015 freem0:
1016 	m_free(m0);
1017 	return (NULL);
1018 }
1019 
1020 /*
1021  * Return a pointer to mbuf/offset of location in mbuf chain.
1022  */
1023 struct mbuf *
m_getptr(struct mbuf * m,int loc,int * off)1024 m_getptr(struct mbuf *m, int loc, int *off)
1025 {
1026 	while (loc >= 0) {
1027 		/* Normal end of search */
1028 		if (m->m_len > loc) {
1029 			*off = loc;
1030 			return (m);
1031 		} else {
1032 			loc -= m->m_len;
1033 
1034 			if (m->m_next == NULL) {
1035 				if (loc == 0) {
1036 					/* Point at the end of valid data */
1037 					*off = m->m_len;
1038 					return (m);
1039 				} else {
1040 					return (NULL);
1041 				}
1042 			} else {
1043 				m = m->m_next;
1044 			}
1045 		}
1046 	}
1047 
1048 	return (NULL);
1049 }
1050 
1051 /*
1052  * Partition an mbuf chain in two pieces, returning the tail --
1053  * all but the first len0 bytes.  In case of failure, it returns NULL and
1054  * attempts to restore the chain to its original state.
1055  */
1056 struct mbuf *
m_split(struct mbuf * m0,int len0,int wait)1057 m_split(struct mbuf *m0, int len0, int wait)
1058 {
1059 	struct mbuf *m, *n;
1060 	unsigned len = len0, remain, olen;
1061 
1062 	for (m = m0; m && len > m->m_len; m = m->m_next)
1063 		len -= m->m_len;
1064 	if (m == NULL)
1065 		return (NULL);
1066 	remain = m->m_len - len;
1067 	if (m0->m_flags & M_PKTHDR) {
1068 		MGETHDR(n, wait, m0->m_type);
1069 		if (n == NULL)
1070 			return (NULL);
1071 		if (m_dup_pkthdr(n, m0, wait)) {
1072 			m_freem(n);
1073 			return (NULL);
1074 		}
1075 		n->m_pkthdr.len -= len0;
1076 		olen = m0->m_pkthdr.len;
1077 		m0->m_pkthdr.len = len0;
1078 		if (remain == 0) {
1079 			n->m_next = m->m_next;
1080 			m->m_next = NULL;
1081 			n->m_len = 0;
1082 			return (n);
1083 		}
1084 		if ((m->m_flags & M_EXT) == 0 && remain > MHLEN) {
1085 			/* m can't be the lead packet */
1086 			m_align(n, 0);
1087 			n->m_next = m_split(m, len, wait);
1088 			if (n->m_next == NULL) {
1089 				(void) m_free(n);
1090 				m0->m_pkthdr.len = olen;
1091 				return (NULL);
1092 			} else {
1093 				n->m_len = 0;
1094 				return (n);
1095 			}
1096 		}
1097 	} else if (remain == 0) {
1098 		n = m->m_next;
1099 		m->m_next = NULL;
1100 		return (n);
1101 	} else {
1102 		MGET(n, wait, m->m_type);
1103 		if (n == NULL)
1104 			return (NULL);
1105 	}
1106 	if (m->m_flags & M_EXT) {
1107 		n->m_ext = m->m_ext;
1108 		MCLADDREFERENCE(m, n);
1109 		n->m_data = m->m_data + len;
1110 	} else {
1111 		m_align(n, remain);
1112 		memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
1113 	}
1114 	n->m_len = remain;
1115 	m->m_len = len;
1116 	n->m_next = m->m_next;
1117 	m->m_next = NULL;
1118 	return (n);
1119 }
1120 
1121 /*
1122  * Make space for a new header of length hlen at skip bytes
1123  * into the packet.  When doing this we allocate new mbufs only
1124  * when absolutely necessary.  The mbuf where the new header
1125  * is to go is returned together with an offset into the mbuf.
1126  * If NULL is returned then the mbuf chain may have been modified;
1127  * the caller is assumed to always free the chain.
1128  */
1129 struct mbuf *
m_makespace(struct mbuf * m0,int skip,int hlen,int * off)1130 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
1131 {
1132 	struct mbuf *m;
1133 	unsigned remain;
1134 
1135 	KASSERT(m0->m_flags & M_PKTHDR);
1136 	/*
1137 	 * Limit the size of the new header to MHLEN. In case
1138 	 * skip = 0 and the first buffer is not a cluster this
1139 	 * is the maximum space available in that mbuf.
1140 	 * In other words this code never prepends a mbuf.
1141 	 */
1142 	KASSERT(hlen < MHLEN);
1143 
1144 	for (m = m0; m && skip > m->m_len; m = m->m_next)
1145 		skip -= m->m_len;
1146 	if (m == NULL)
1147 		return (NULL);
1148 	/*
1149 	 * At this point skip is the offset into the mbuf m
1150 	 * where the new header should be placed.  Figure out
1151 	 * if there's space to insert the new header.  If so,
1152 	 * and copying the remainder makes sense then do so.
1153 	 * Otherwise insert a new mbuf in the chain, splitting
1154 	 * the contents of m as needed.
1155 	 */
1156 	remain = m->m_len - skip;		/* data to move */
1157 	if (skip < remain && hlen <= m_leadingspace(m)) {
1158 		if (skip)
1159 			memmove(m->m_data-hlen, m->m_data, skip);
1160 		m->m_data -= hlen;
1161 		m->m_len += hlen;
1162 		*off = skip;
1163 	} else if (hlen > m_trailingspace(m)) {
1164 		struct mbuf *n;
1165 
1166 		if (remain > 0) {
1167 			MGET(n, M_DONTWAIT, m->m_type);
1168 			if (n && remain > MLEN) {
1169 				MCLGETL(n, M_DONTWAIT, remain);
1170 				if ((n->m_flags & M_EXT) == 0) {
1171 					m_free(n);
1172 					n = NULL;
1173 				}
1174 			}
1175 			if (n == NULL)
1176 				return (NULL);
1177 
1178 			memcpy(n->m_data, mtod(m, char *) + skip, remain);
1179 			n->m_len = remain;
1180 			m->m_len -= remain;
1181 
1182 			n->m_next = m->m_next;
1183 			m->m_next = n;
1184 		}
1185 
1186 		if (hlen <= m_trailingspace(m)) {
1187 			m->m_len += hlen;
1188 			*off = skip;
1189 		} else {
1190 			n = m_get(M_DONTWAIT, m->m_type);
1191 			if (n == NULL)
1192 				return NULL;
1193 
1194 			n->m_len = hlen;
1195 
1196 			n->m_next = m->m_next;
1197 			m->m_next = n;
1198 
1199 			*off = 0;	/* header is at front ... */
1200 			m = n;		/* ... of new mbuf */
1201 		}
1202 	} else {
1203 		/*
1204 		 * Copy the remainder to the back of the mbuf
1205 		 * so there's space to write the new header.
1206 		 */
1207 		if (remain > 0)
1208 			memmove(mtod(m, caddr_t) + skip + hlen,
1209 			      mtod(m, caddr_t) + skip, remain);
1210 		m->m_len += hlen;
1211 		*off = skip;
1212 	}
1213 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
1214 	return m;
1215 }
1216 
1217 
1218 /*
1219  * Routine to copy from device local memory into mbufs.
1220  */
1221 struct mbuf *
m_devget(char * buf,int totlen,int off)1222 m_devget(char *buf, int totlen, int off)
1223 {
1224 	struct mbuf	*m;
1225 	struct mbuf	*top, **mp;
1226 	int		 len;
1227 
1228 	top = NULL;
1229 	mp = &top;
1230 
1231 	if (off < 0 || off > MHLEN)
1232 		return (NULL);
1233 
1234 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1235 	if (m == NULL)
1236 		return (NULL);
1237 
1238 	m->m_pkthdr.len = totlen;
1239 
1240 	len = MHLEN;
1241 
1242 	while (totlen > 0) {
1243 		if (top != NULL) {
1244 			MGET(m, M_DONTWAIT, MT_DATA);
1245 			if (m == NULL) {
1246 				/*
1247 				 * As we might get called by pfkey, make sure
1248 				 * we do not leak sensitive data.
1249 				 */
1250 				top->m_flags |= M_ZEROIZE;
1251 				m_freem(top);
1252 				return (NULL);
1253 			}
1254 			len = MLEN;
1255 		}
1256 
1257 		if (totlen + off >= MINCLSIZE) {
1258 			MCLGET(m, M_DONTWAIT);
1259 			if (m->m_flags & M_EXT)
1260 				len = MCLBYTES;
1261 		} else {
1262 			/* Place initial small packet/header at end of mbuf. */
1263 			if (top == NULL && totlen + off + max_linkhdr <= len) {
1264 				m->m_data += max_linkhdr;
1265 				len -= max_linkhdr;
1266 			}
1267 		}
1268 
1269 		if (off) {
1270 			m->m_data += off;
1271 			len -= off;
1272 			off = 0;
1273 		}
1274 
1275 		m->m_len = len = min(totlen, len);
1276 		memcpy(mtod(m, void *), buf, (size_t)len);
1277 
1278 		buf += len;
1279 		*mp = m;
1280 		mp = &m->m_next;
1281 		totlen -= len;
1282 	}
1283 	return (top);
1284 }
1285 
1286 void
m_zero(struct mbuf * m)1287 m_zero(struct mbuf *m)
1288 {
1289 	if (M_READONLY(m)) {
1290 		mtx_enter(&m_extref_mtx);
1291 		if ((m->m_flags & M_EXT) && MCLISREFERENCED(m)) {
1292 			m->m_ext.ext_nextref->m_flags |= M_ZEROIZE;
1293 			m->m_ext.ext_prevref->m_flags |= M_ZEROIZE;
1294 		}
1295 		mtx_leave(&m_extref_mtx);
1296 		return;
1297 	}
1298 
1299 	explicit_bzero(M_DATABUF(m), M_SIZE(m));
1300 }
1301 
1302 /*
1303  * Apply function f to the data in an mbuf chain starting "off" bytes from the
1304  * beginning, continuing for "len" bytes.
1305  */
1306 int
m_apply(struct mbuf * m,int off,int len,int (* f)(caddr_t,caddr_t,unsigned int),caddr_t fstate)1307 m_apply(struct mbuf *m, int off, int len,
1308     int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate)
1309 {
1310 	int rval;
1311 	unsigned int count;
1312 
1313 	if (len < 0)
1314 		panic("m_apply: len %d < 0", len);
1315 	if (off < 0)
1316 		panic("m_apply: off %d < 0", off);
1317 	while (off > 0) {
1318 		if (m == NULL)
1319 			panic("m_apply: null mbuf in skip");
1320 		if (off < m->m_len)
1321 			break;
1322 		off -= m->m_len;
1323 		m = m->m_next;
1324 	}
1325 	while (len > 0) {
1326 		if (m == NULL)
1327 			panic("m_apply: null mbuf");
1328 		count = min(m->m_len - off, len);
1329 
1330 		rval = f(fstate, mtod(m, caddr_t) + off, count);
1331 		if (rval)
1332 			return (rval);
1333 
1334 		len -= count;
1335 		off = 0;
1336 		m = m->m_next;
1337 	}
1338 
1339 	return (0);
1340 }
1341 
1342 /*
1343  * Compute the amount of space available before the current start of data
1344  * in an mbuf. Read-only clusters never have space available.
1345  */
1346 int
m_leadingspace(struct mbuf * m)1347 m_leadingspace(struct mbuf *m)
1348 {
1349 	if (M_READONLY(m))
1350 		return 0;
1351 	KASSERT(m->m_data >= M_DATABUF(m));
1352 	return m->m_data - M_DATABUF(m);
1353 }
1354 
1355 /*
1356  * Compute the amount of space available after the end of data in an mbuf.
1357  * Read-only clusters never have space available.
1358  */
1359 int
m_trailingspace(struct mbuf * m)1360 m_trailingspace(struct mbuf *m)
1361 {
1362 	if (M_READONLY(m))
1363 		return 0;
1364 	KASSERT(M_DATABUF(m) + M_SIZE(m) >= (m->m_data + m->m_len));
1365 	return M_DATABUF(m) + M_SIZE(m) - (m->m_data + m->m_len);
1366 }
1367 
1368 /*
1369  * Set the m_data pointer of a newly-allocated mbuf to place an object of
1370  * the specified size at the end of the mbuf, longword aligned.
1371  */
1372 void
m_align(struct mbuf * m,int len)1373 m_align(struct mbuf *m, int len)
1374 {
1375 	KASSERT(len >= 0 && !M_READONLY(m));
1376 	KASSERT(m->m_data == M_DATABUF(m));	/* newly-allocated check */
1377 	KASSERT(((len + sizeof(long) - 1) &~ (sizeof(long) - 1)) <= M_SIZE(m));
1378 
1379 	m->m_data = M_DATABUF(m) + ((M_SIZE(m) - (len)) &~ (sizeof(long) - 1));
1380 }
1381 
1382 /*
1383  * Duplicate mbuf pkthdr from from to to.
1384  * from must have M_PKTHDR set, and to must be empty.
1385  */
1386 int
m_dup_pkthdr(struct mbuf * to,struct mbuf * from,int wait)1387 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait)
1388 {
1389 	int error;
1390 
1391 	KASSERT(from->m_flags & M_PKTHDR);
1392 
1393 	to->m_flags = (to->m_flags & (M_EXT | M_EXTWR));
1394 	to->m_flags |= (from->m_flags & M_COPYFLAGS);
1395 	to->m_pkthdr = from->m_pkthdr;
1396 
1397 #if NPF > 0
1398 	to->m_pkthdr.pf.statekey = NULL;
1399 	pf_mbuf_link_state_key(to, from->m_pkthdr.pf.statekey);
1400 	to->m_pkthdr.pf.inp = NULL;
1401 	pf_mbuf_link_inpcb(to, from->m_pkthdr.pf.inp);
1402 #endif	/* NPF > 0 */
1403 
1404 	SLIST_INIT(&to->m_pkthdr.ph_tags);
1405 
1406 	if ((error = m_tag_copy_chain(to, from, wait)) != 0)
1407 		return (error);
1408 
1409 	if ((to->m_flags & M_EXT) == 0)
1410 		to->m_data = to->m_pktdat;
1411 
1412 	return (0);
1413 }
1414 
1415 struct mbuf *
m_dup_pkt(struct mbuf * m0,unsigned int adj,int wait)1416 m_dup_pkt(struct mbuf *m0, unsigned int adj, int wait)
1417 {
1418 	struct mbuf *m;
1419 	int len;
1420 
1421 	KASSERT(m0->m_flags & M_PKTHDR);
1422 
1423 	len = m0->m_pkthdr.len + adj;
1424 	if (len > MAXMCLBYTES) /* XXX */
1425 		return (NULL);
1426 
1427 	m = m_get(wait, m0->m_type);
1428 	if (m == NULL)
1429 		return (NULL);
1430 
1431 	if (m_dup_pkthdr(m, m0, wait) != 0)
1432 		goto fail;
1433 
1434 	if (len > MHLEN) {
1435 		MCLGETL(m, wait, len);
1436 		if (!ISSET(m->m_flags, M_EXT))
1437 			goto fail;
1438 	}
1439 
1440 	m->m_len = m->m_pkthdr.len = len;
1441 	m_adj(m, adj);
1442 	m_copydata(m0, 0, m0->m_pkthdr.len, mtod(m, caddr_t));
1443 
1444 	return (m);
1445 
1446 fail:
1447 	m_freem(m);
1448 	return (NULL);
1449 }
1450 
1451 void
m_microtime(const struct mbuf * m,struct timeval * tv)1452 m_microtime(const struct mbuf *m, struct timeval *tv)
1453 {
1454 	if (ISSET(m->m_pkthdr.csum_flags, M_TIMESTAMP)) {
1455 		struct timeval btv, utv;
1456 
1457 		NSEC_TO_TIMEVAL(m->m_pkthdr.ph_timestamp, &utv);
1458 		microboottime(&btv);
1459 		timeradd(&btv, &utv, tv);
1460 	} else
1461 		microtime(tv);
1462 }
1463 
1464 void *
m_pool_alloc(struct pool * pp,int flags,int * slowdown)1465 m_pool_alloc(struct pool *pp, int flags, int *slowdown)
1466 {
1467 	void *v;
1468 
1469 	if (atomic_add_long_nv(&mbuf_mem_alloc, pp->pr_pgsize) > mbuf_mem_limit)
1470 		goto fail;
1471 
1472 	v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
1473 	if (v != NULL)
1474 		return (v);
1475 
1476  fail:
1477 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1478 	return (NULL);
1479 }
1480 
1481 void
m_pool_free(struct pool * pp,void * v)1482 m_pool_free(struct pool *pp, void *v)
1483 {
1484 	(*pool_allocator_multi.pa_free)(pp, v);
1485 
1486 	atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1487 }
1488 
1489 void
m_pool_init(struct pool * pp,u_int size,u_int align,const char * wmesg)1490 m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
1491 {
1492 	pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
1493 	pool_set_constraints(pp, &kp_dma_contig);
1494 }
1495 
1496 u_int
m_pool_used(void)1497 m_pool_used(void)
1498 {
1499 	return ((mbuf_mem_alloc * 100) / mbuf_mem_limit);
1500 }
1501 
1502 #ifdef DDB
1503 void
m_print(void * v,int (* pr)(const char *,...))1504 m_print(void *v,
1505     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1506 {
1507 	struct mbuf *m = v;
1508 
1509 	(*pr)("mbuf %p\n", m);
1510 	(*pr)("m_type: %i\tm_flags: %b\n", m->m_type, m->m_flags, M_BITS);
1511 	(*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt);
1512 	(*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len);
1513 	(*pr)("m_dat: %p\tm_pktdat: %p\n", m->m_dat, m->m_pktdat);
1514 	if (m->m_flags & M_PKTHDR) {
1515 		(*pr)("m_ptkhdr.ph_ifidx: %u\tm_pkthdr.len: %i\n",
1516 		    m->m_pkthdr.ph_ifidx, m->m_pkthdr.len);
1517 		(*pr)("m_ptkhdr.ph_tags: %p\tm_pkthdr.ph_tagsset: %b\n",
1518 		    SLIST_FIRST(&m->m_pkthdr.ph_tags),
1519 		    m->m_pkthdr.ph_tagsset, MTAG_BITS);
1520 		(*pr)("m_pkthdr.ph_flowid: %u\tm_pkthdr.ph_loopcnt: %u\n",
1521 		    m->m_pkthdr.ph_flowid, m->m_pkthdr.ph_loopcnt);
1522 		(*pr)("m_pkthdr.csum_flags: %b\n",
1523 		    m->m_pkthdr.csum_flags, MCS_BITS);
1524 		(*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n",
1525 		    m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid);
1526 		(*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n",
1527 		    m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp);
1528 		(*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n",
1529 		    m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag);
1530 		(*pr)("m_pkthdr.pf.flags: %b\n",
1531 		    m->m_pkthdr.pf.flags, MPF_BITS);
1532 		(*pr)("m_pkthdr.pf.routed: %u\tm_pkthdr.pf.prio: %u\n",
1533 		    m->m_pkthdr.pf.routed, m->m_pkthdr.pf.prio);
1534 	}
1535 	if (m->m_flags & M_EXT) {
1536 		(*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n",
1537 		    m->m_ext.ext_buf, m->m_ext.ext_size);
1538 		(*pr)("m_ext.ext_free_fn: %u\tm_ext.ext_arg: %p\n",
1539 		    m->m_ext.ext_free_fn, m->m_ext.ext_arg);
1540 		(*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
1541 		    m->m_ext.ext_nextref, m->m_ext.ext_prevref);
1542 
1543 	}
1544 }
1545 #endif
1546 
1547 /*
1548  * mbuf lists
1549  */
1550 
1551 void
ml_init(struct mbuf_list * ml)1552 ml_init(struct mbuf_list *ml)
1553 {
1554 	ml->ml_head = ml->ml_tail = NULL;
1555 	ml->ml_len = 0;
1556 }
1557 
1558 void
ml_enqueue(struct mbuf_list * ml,struct mbuf * m)1559 ml_enqueue(struct mbuf_list *ml, struct mbuf *m)
1560 {
1561 	if (ml->ml_tail == NULL)
1562 		ml->ml_head = ml->ml_tail = m;
1563 	else {
1564 		ml->ml_tail->m_nextpkt = m;
1565 		ml->ml_tail = m;
1566 	}
1567 
1568 	m->m_nextpkt = NULL;
1569 	ml->ml_len++;
1570 }
1571 
1572 void
ml_enlist(struct mbuf_list * mla,struct mbuf_list * mlb)1573 ml_enlist(struct mbuf_list *mla, struct mbuf_list *mlb)
1574 {
1575 	if (!ml_empty(mlb)) {
1576 		if (ml_empty(mla))
1577 			mla->ml_head = mlb->ml_head;
1578 		else
1579 			mla->ml_tail->m_nextpkt = mlb->ml_head;
1580 		mla->ml_tail = mlb->ml_tail;
1581 		mla->ml_len += mlb->ml_len;
1582 
1583 		ml_init(mlb);
1584 	}
1585 }
1586 
1587 struct mbuf *
ml_dequeue(struct mbuf_list * ml)1588 ml_dequeue(struct mbuf_list *ml)
1589 {
1590 	struct mbuf *m;
1591 
1592 	m = ml->ml_head;
1593 	if (m != NULL) {
1594 		ml->ml_head = m->m_nextpkt;
1595 		if (ml->ml_head == NULL)
1596 			ml->ml_tail = NULL;
1597 
1598 		m->m_nextpkt = NULL;
1599 		ml->ml_len--;
1600 	}
1601 
1602 	return (m);
1603 }
1604 
1605 struct mbuf *
ml_dechain(struct mbuf_list * ml)1606 ml_dechain(struct mbuf_list *ml)
1607 {
1608 	struct mbuf *m0;
1609 
1610 	m0 = ml->ml_head;
1611 
1612 	ml_init(ml);
1613 
1614 	return (m0);
1615 }
1616 
1617 unsigned int
ml_purge(struct mbuf_list * ml)1618 ml_purge(struct mbuf_list *ml)
1619 {
1620 	struct mbuf *m, *n;
1621 	unsigned int len;
1622 
1623 	for (m = ml->ml_head; m != NULL; m = n) {
1624 		n = m->m_nextpkt;
1625 		m_freem(m);
1626 	}
1627 
1628 	len = ml->ml_len;
1629 	ml_init(ml);
1630 
1631 	return (len);
1632 }
1633 
1634 unsigned int
ml_hdatalen(struct mbuf_list * ml)1635 ml_hdatalen(struct mbuf_list *ml)
1636 {
1637 	struct mbuf *m;
1638 
1639 	m = ml->ml_head;
1640 	if (m == NULL)
1641 		return (0);
1642 
1643 	KASSERT(ISSET(m->m_flags, M_PKTHDR));
1644 	return (m->m_pkthdr.len);
1645 }
1646 
1647 /*
1648  * mbuf queues
1649  */
1650 
1651 void
mq_init(struct mbuf_queue * mq,u_int maxlen,int ipl)1652 mq_init(struct mbuf_queue *mq, u_int maxlen, int ipl)
1653 {
1654 	mtx_init(&mq->mq_mtx, ipl);
1655 	ml_init(&mq->mq_list);
1656 	mq->mq_maxlen = maxlen;
1657 }
1658 
1659 int
mq_push(struct mbuf_queue * mq,struct mbuf * m)1660 mq_push(struct mbuf_queue *mq, struct mbuf *m)
1661 {
1662 	struct mbuf *dropped = NULL;
1663 
1664 	mtx_enter(&mq->mq_mtx);
1665 	if (mq_len(mq) >= mq->mq_maxlen) {
1666 		mq->mq_drops++;
1667 		dropped = ml_dequeue(&mq->mq_list);
1668 	}
1669 	ml_enqueue(&mq->mq_list, m);
1670 	mtx_leave(&mq->mq_mtx);
1671 
1672 	if (dropped)
1673 		m_freem(dropped);
1674 
1675 	return (dropped != NULL);
1676 }
1677 
1678 int
mq_enqueue(struct mbuf_queue * mq,struct mbuf * m)1679 mq_enqueue(struct mbuf_queue *mq, struct mbuf *m)
1680 {
1681 	int dropped = 0;
1682 
1683 	mtx_enter(&mq->mq_mtx);
1684 	if (mq_len(mq) < mq->mq_maxlen)
1685 		ml_enqueue(&mq->mq_list, m);
1686 	else {
1687 		mq->mq_drops++;
1688 		dropped = 1;
1689 	}
1690 	mtx_leave(&mq->mq_mtx);
1691 
1692 	if (dropped)
1693 		m_freem(m);
1694 
1695 	return (dropped);
1696 }
1697 
1698 struct mbuf *
mq_dequeue(struct mbuf_queue * mq)1699 mq_dequeue(struct mbuf_queue *mq)
1700 {
1701 	struct mbuf *m;
1702 
1703 	mtx_enter(&mq->mq_mtx);
1704 	m = ml_dequeue(&mq->mq_list);
1705 	mtx_leave(&mq->mq_mtx);
1706 
1707 	return (m);
1708 }
1709 
1710 int
mq_enlist(struct mbuf_queue * mq,struct mbuf_list * ml)1711 mq_enlist(struct mbuf_queue *mq, struct mbuf_list *ml)
1712 {
1713 	struct mbuf *m;
1714 	int dropped = 0;
1715 
1716 	mtx_enter(&mq->mq_mtx);
1717 	if (mq_len(mq) < mq->mq_maxlen)
1718 		ml_enlist(&mq->mq_list, ml);
1719 	else {
1720 		dropped = ml_len(ml);
1721 		mq->mq_drops += dropped;
1722 	}
1723 	mtx_leave(&mq->mq_mtx);
1724 
1725 	if (dropped) {
1726 		while ((m = ml_dequeue(ml)) != NULL)
1727 			m_freem(m);
1728 	}
1729 
1730 	return (dropped);
1731 }
1732 
1733 void
mq_delist(struct mbuf_queue * mq,struct mbuf_list * ml)1734 mq_delist(struct mbuf_queue *mq, struct mbuf_list *ml)
1735 {
1736 	mtx_enter(&mq->mq_mtx);
1737 	*ml = mq->mq_list;
1738 	ml_init(&mq->mq_list);
1739 	mtx_leave(&mq->mq_mtx);
1740 }
1741 
1742 struct mbuf *
mq_dechain(struct mbuf_queue * mq)1743 mq_dechain(struct mbuf_queue *mq)
1744 {
1745 	struct mbuf *m0;
1746 
1747 	mtx_enter(&mq->mq_mtx);
1748 	m0 = ml_dechain(&mq->mq_list);
1749 	mtx_leave(&mq->mq_mtx);
1750 
1751 	return (m0);
1752 }
1753 
1754 unsigned int
mq_purge(struct mbuf_queue * mq)1755 mq_purge(struct mbuf_queue *mq)
1756 {
1757 	struct mbuf_list ml;
1758 
1759 	mq_delist(mq, &ml);
1760 
1761 	return (ml_purge(&ml));
1762 }
1763 
1764 unsigned int
mq_hdatalen(struct mbuf_queue * mq)1765 mq_hdatalen(struct mbuf_queue *mq)
1766 {
1767 	unsigned int hdatalen;
1768 
1769 	mtx_enter(&mq->mq_mtx);
1770 	hdatalen = ml_hdatalen(&mq->mq_list);
1771 	mtx_leave(&mq->mq_mtx);
1772 
1773 	return (hdatalen);
1774 }
1775 
1776 void
mq_set_maxlen(struct mbuf_queue * mq,u_int maxlen)1777 mq_set_maxlen(struct mbuf_queue *mq, u_int maxlen)
1778 {
1779 	mtx_enter(&mq->mq_mtx);
1780 	mq->mq_maxlen = maxlen;
1781 	mtx_leave(&mq->mq_mtx);
1782 }
1783 
1784 int
sysctl_mq(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen,struct mbuf_queue * mq)1785 sysctl_mq(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1786     void *newp, size_t newlen, struct mbuf_queue *mq)
1787 {
1788 	unsigned int maxlen;
1789 	int error;
1790 
1791 	/* All sysctl names at this level are terminal. */
1792 	if (namelen != 1)
1793 		return (ENOTDIR);
1794 
1795 	switch (name[0]) {
1796 	case IFQCTL_LEN:
1797 		return (sysctl_rdint(oldp, oldlenp, newp, mq_len(mq)));
1798 	case IFQCTL_MAXLEN:
1799 		maxlen = mq->mq_maxlen;
1800 		error = sysctl_int(oldp, oldlenp, newp, newlen, &maxlen);
1801 		if (error == 0)
1802 			mq_set_maxlen(mq, maxlen);
1803 		return (error);
1804 	case IFQCTL_DROPS:
1805 		return (sysctl_rdint(oldp, oldlenp, newp, mq_drops(mq)));
1806 	default:
1807 		return (EOPNOTSUPP);
1808 	}
1809 	/* NOTREACHED */
1810 }
1811