1 /* $OpenBSD: uipc_mbuf.c,v 1.295 2024/11/06 14:37:45 bluhm Exp $ */
2 /* $NetBSD: uipc_mbuf.c,v 1.15.4.1 1996/06/13 17:11:44 cgd Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1988, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94
33 */
34
35 /*
36 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995
37 *
38 * NRL grants permission for redistribution and use in source and binary
39 * forms, with or without modification, of the software and documentation
40 * created at NRL provided that the following conditions are met:
41 *
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. All advertising materials mentioning features or use of this software
48 * must display the following acknowledgements:
49 * This product includes software developed by the University of
50 * California, Berkeley and its contributors.
51 * This product includes software developed at the Information
52 * Technology Division, US Naval Research Laboratory.
53 * 4. Neither the name of the NRL nor the names of its contributors
54 * may be used to endorse or promote products derived from this software
55 * without specific prior written permission.
56 *
57 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
58 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
59 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
60 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR
61 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
62 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
64 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
65 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
66 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
67 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 *
69 * The views and conclusions contained in the software and documentation
70 * are those of the authors and should not be interpreted as representing
71 * official policies, either expressed or implied, of the US Naval
72 * Research Laboratory (NRL).
73 */
74
75 #include "pf.h"
76
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/atomic.h>
80 #include <sys/mbuf.h>
81 #include <sys/pool.h>
82 #include <sys/percpu.h>
83 #include <sys/sysctl.h>
84
85 #include <sys/socket.h>
86 #include <net/if.h>
87
88
89 #include <uvm/uvm_extern.h>
90
91 #ifdef DDB
92 #include <machine/db_machdep.h>
93 #include <ddb/db_interface.h>
94 #endif
95
96 #if NPF > 0
97 #include <net/pfvar.h>
98 #endif /* NPF > 0 */
99
100 /* mbuf stats */
101 COUNTERS_BOOT_MEMORY(mbstat_boot, MBSTAT_COUNT);
102 struct cpumem *mbstat = COUNTERS_BOOT_INITIALIZER(mbstat_boot);
103 /* mbuf pools */
104 struct pool mbpool;
105 struct pool mtagpool;
106
107 /* mbuf cluster pools */
108 u_int mclsizes[MCLPOOLS] = {
109 MCLBYTES, /* must be at slot 0 */
110 MCLBYTES + 2, /* ETHER_ALIGNED 2k mbufs */
111 4 * 1024,
112 8 * 1024,
113 9 * 1024,
114 12 * 1024,
115 16 * 1024,
116 64 * 1024
117 };
118 static char mclnames[MCLPOOLS][8];
119 struct pool mclpools[MCLPOOLS];
120
121 struct pool *m_clpool(u_int);
122
123 int max_linkhdr; /* largest link-level header */
124 int max_protohdr; /* largest protocol header */
125 int max_hdr; /* largest link+protocol header */
126
127 struct mutex m_extref_mtx = MUTEX_INITIALIZER(IPL_NET);
128
129 void m_extfree(struct mbuf *);
130 void m_zero(struct mbuf *);
131
132 unsigned long mbuf_mem_limit; /* [a] how much memory can be allocated */
133 unsigned long mbuf_mem_alloc; /* [a] how much memory has been allocated */
134
135 void *m_pool_alloc(struct pool *, int, int *);
136 void m_pool_free(struct pool *, void *);
137
138 struct pool_allocator m_pool_allocator = {
139 m_pool_alloc,
140 m_pool_free,
141 0 /* will be copied from pool_allocator_multi */
142 };
143
144 static void (*mextfree_fns[4])(caddr_t, u_int, void *);
145 static u_int num_extfree_fns;
146
147 #define M_DATABUF(m) ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf : \
148 (m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat)
149 #define M_SIZE(m) ((m)->m_flags & M_EXT ? (m)->m_ext.ext_size : \
150 (m)->m_flags & M_PKTHDR ? MHLEN : MLEN)
151
152 /*
153 * Initialize the mbuf allocator.
154 */
155 void
mbinit(void)156 mbinit(void)
157 {
158 int i, error;
159 unsigned int lowbits;
160
161 CTASSERT(MSIZE == sizeof(struct mbuf));
162
163 m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
164
165 mbuf_mem_alloc = 0;
166
167 #if DIAGNOSTIC
168 if (mclsizes[0] != MCLBYTES)
169 panic("mbinit: the smallest cluster size != MCLBYTES");
170 if (mclsizes[nitems(mclsizes) - 1] != MAXMCLBYTES)
171 panic("mbinit: the largest cluster size != MAXMCLBYTES");
172 #endif
173
174 m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
175
176 pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
177 IPL_NET, 0, "mtagpl", NULL);
178
179 for (i = 0; i < nitems(mclsizes); i++) {
180 lowbits = mclsizes[i] & ((1 << 10) - 1);
181 if (lowbits) {
182 snprintf(mclnames[i], sizeof(mclnames[0]),
183 "mcl%dk%u", mclsizes[i] >> 10, lowbits);
184 } else {
185 snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
186 mclsizes[i] >> 10);
187 }
188
189 m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
190 }
191
192 error = nmbclust_update(nmbclust);
193 KASSERT(error == 0);
194
195 (void)mextfree_register(m_extfree_pool);
196 KASSERT(num_extfree_fns == 1);
197 }
198
199 void
mbcpuinit(void)200 mbcpuinit(void)
201 {
202 int i;
203
204 mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT);
205
206 pool_cache_init(&mbpool);
207 pool_cache_init(&mtagpool);
208
209 for (i = 0; i < nitems(mclsizes); i++)
210 pool_cache_init(&mclpools[i]);
211 }
212
213 int
nmbclust_update(long newval)214 nmbclust_update(long newval)
215 {
216 int i;
217
218 if (newval <= 0 || newval > LONG_MAX / MCLBYTES)
219 return ERANGE;
220 /* update the global mbuf memory limit */
221 nmbclust = newval;
222 atomic_store_long(&mbuf_mem_limit, nmbclust * MCLBYTES);
223
224 pool_wakeup(&mbpool);
225 for (i = 0; i < nitems(mclsizes); i++)
226 pool_wakeup(&mclpools[i]);
227
228 return 0;
229 }
230
231 /*
232 * Space allocation routines.
233 */
234 struct mbuf *
m_get(int nowait,int type)235 m_get(int nowait, int type)
236 {
237 struct mbuf *m;
238 int s;
239
240 KASSERT(type >= 0 && type < MT_NTYPES);
241
242 m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
243 if (m == NULL)
244 return (NULL);
245
246 s = splnet();
247 counters_inc(mbstat, type);
248 splx(s);
249
250 m->m_type = type;
251 m->m_next = NULL;
252 m->m_nextpkt = NULL;
253 m->m_data = m->m_dat;
254 m->m_flags = 0;
255
256 return (m);
257 }
258
259 /*
260 * ATTN: When changing anything here check m_inithdr() and m_defrag() those
261 * may need to change as well.
262 */
263 struct mbuf *
m_gethdr(int nowait,int type)264 m_gethdr(int nowait, int type)
265 {
266 struct mbuf *m;
267 int s;
268
269 KASSERT(type >= 0 && type < MT_NTYPES);
270
271 m = pool_get(&mbpool, nowait == M_WAIT ? PR_WAITOK : PR_NOWAIT);
272 if (m == NULL)
273 return (NULL);
274
275 s = splnet();
276 counters_inc(mbstat, type);
277 splx(s);
278
279 m->m_type = type;
280
281 return (m_inithdr(m));
282 }
283
284 struct mbuf *
m_inithdr(struct mbuf * m)285 m_inithdr(struct mbuf *m)
286 {
287 /* keep in sync with m_gethdr */
288 m->m_next = NULL;
289 m->m_nextpkt = NULL;
290 m->m_data = m->m_pktdat;
291 m->m_flags = M_PKTHDR;
292 memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
293 m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
294
295 return (m);
296 }
297
298 static inline void
m_clearhdr(struct mbuf * m)299 m_clearhdr(struct mbuf *m)
300 {
301 /* delete all mbuf tags to reset the state */
302 m_tag_delete_chain(m);
303 #if NPF > 0
304 pf_mbuf_unlink_state_key(m);
305 pf_mbuf_unlink_inpcb(m);
306 #endif /* NPF > 0 */
307
308 memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
309 }
310
311 void
m_removehdr(struct mbuf * m)312 m_removehdr(struct mbuf *m)
313 {
314 KASSERT(m->m_flags & M_PKTHDR);
315 m_clearhdr(m);
316 m->m_flags &= ~M_PKTHDR;
317 }
318
319 void
m_resethdr(struct mbuf * m)320 m_resethdr(struct mbuf *m)
321 {
322 int len = m->m_pkthdr.len;
323 u_int8_t loopcnt = m->m_pkthdr.ph_loopcnt;
324
325 KASSERT(m->m_flags & M_PKTHDR);
326 m->m_flags &= (M_EXT|M_PKTHDR|M_EOR|M_EXTWR|M_ZEROIZE);
327 m_clearhdr(m);
328 /* like m_inithdr(), but keep any associated data and mbufs */
329 m->m_pkthdr.pf.prio = IFQ_DEFPRIO;
330 m->m_pkthdr.len = len;
331 m->m_pkthdr.ph_loopcnt = loopcnt;
332 }
333
334 void
m_calchdrlen(struct mbuf * m)335 m_calchdrlen(struct mbuf *m)
336 {
337 struct mbuf *n;
338 int plen = 0;
339
340 KASSERT(m->m_flags & M_PKTHDR);
341 for (n = m; n; n = n->m_next)
342 plen += n->m_len;
343 m->m_pkthdr.len = plen;
344 }
345
346 struct mbuf *
m_getclr(int nowait,int type)347 m_getclr(int nowait, int type)
348 {
349 struct mbuf *m;
350
351 MGET(m, nowait, type);
352 if (m == NULL)
353 return (NULL);
354 memset(mtod(m, caddr_t), 0, MLEN);
355 return (m);
356 }
357
358 struct pool *
m_clpool(u_int pktlen)359 m_clpool(u_int pktlen)
360 {
361 struct pool *pp;
362 int pi;
363
364 for (pi = 0; pi < nitems(mclpools); pi++) {
365 pp = &mclpools[pi];
366 if (pktlen <= pp->pr_size)
367 return (pp);
368 }
369
370 return (NULL);
371 }
372
373 struct mbuf *
m_clget(struct mbuf * m,int how,u_int pktlen)374 m_clget(struct mbuf *m, int how, u_int pktlen)
375 {
376 struct mbuf *m0 = NULL;
377 struct pool *pp;
378 caddr_t buf;
379
380 pp = m_clpool(pktlen);
381 #ifdef DIAGNOSTIC
382 if (pp == NULL)
383 panic("m_clget: request for %u byte cluster", pktlen);
384 #endif
385
386 if (m == NULL) {
387 m0 = m_gethdr(how, MT_DATA);
388 if (m0 == NULL)
389 return (NULL);
390
391 m = m0;
392 }
393 buf = pool_get(pp, how == M_WAIT ? PR_WAITOK : PR_NOWAIT);
394 if (buf == NULL) {
395 m_freem(m0);
396 return (NULL);
397 }
398
399 MEXTADD(m, buf, pp->pr_size, M_EXTWR, MEXTFREE_POOL, pp);
400 return (m);
401 }
402
403 void
m_extfree_pool(caddr_t buf,u_int size,void * pp)404 m_extfree_pool(caddr_t buf, u_int size, void *pp)
405 {
406 pool_put(pp, buf);
407 }
408
409 struct mbuf *
m_free(struct mbuf * m)410 m_free(struct mbuf *m)
411 {
412 struct mbuf *n;
413 int s;
414
415 if (m == NULL)
416 return (NULL);
417
418 s = splnet();
419 counters_dec(mbstat, m->m_type);
420 splx(s);
421
422 n = m->m_next;
423 if (m->m_flags & M_ZEROIZE) {
424 m_zero(m);
425 /* propagate M_ZEROIZE to the next mbuf in the chain */
426 if (n)
427 n->m_flags |= M_ZEROIZE;
428 }
429 if (m->m_flags & M_PKTHDR) {
430 m_tag_delete_chain(m);
431 #if NPF > 0
432 pf_mbuf_unlink_state_key(m);
433 pf_mbuf_unlink_inpcb(m);
434 #endif /* NPF > 0 */
435 }
436 if (m->m_flags & M_EXT)
437 m_extfree(m);
438
439 pool_put(&mbpool, m);
440
441 return (n);
442 }
443
444 void
m_extref(struct mbuf * o,struct mbuf * n)445 m_extref(struct mbuf *o, struct mbuf *n)
446 {
447 int refs = MCLISREFERENCED(o);
448
449 n->m_flags |= o->m_flags & (M_EXT|M_EXTWR);
450
451 if (refs)
452 mtx_enter(&m_extref_mtx);
453 n->m_ext.ext_nextref = o->m_ext.ext_nextref;
454 n->m_ext.ext_prevref = o;
455 o->m_ext.ext_nextref = n;
456 n->m_ext.ext_nextref->m_ext.ext_prevref = n;
457 if (refs)
458 mtx_leave(&m_extref_mtx);
459
460 MCLREFDEBUGN((n), __FILE__, __LINE__);
461 }
462
463 static inline u_int
m_extunref(struct mbuf * m)464 m_extunref(struct mbuf *m)
465 {
466 int refs = 0;
467
468 if (!MCLISREFERENCED(m))
469 return (0);
470
471 mtx_enter(&m_extref_mtx);
472 if (MCLISREFERENCED(m)) {
473 m->m_ext.ext_nextref->m_ext.ext_prevref =
474 m->m_ext.ext_prevref;
475 m->m_ext.ext_prevref->m_ext.ext_nextref =
476 m->m_ext.ext_nextref;
477 refs = 1;
478 }
479 mtx_leave(&m_extref_mtx);
480
481 return (refs);
482 }
483
484 /*
485 * Returns a number for use with MEXTADD.
486 * Should only be called once per function.
487 * Drivers can be assured that the index will be non zero.
488 */
489 u_int
mextfree_register(void (* fn)(caddr_t,u_int,void *))490 mextfree_register(void (*fn)(caddr_t, u_int, void *))
491 {
492 KASSERT(num_extfree_fns < nitems(mextfree_fns));
493 mextfree_fns[num_extfree_fns] = fn;
494 return num_extfree_fns++;
495 }
496
497 void
m_extfree(struct mbuf * m)498 m_extfree(struct mbuf *m)
499 {
500 if (m_extunref(m) == 0) {
501 KASSERT(m->m_ext.ext_free_fn < num_extfree_fns);
502 mextfree_fns[m->m_ext.ext_free_fn](m->m_ext.ext_buf,
503 m->m_ext.ext_size, m->m_ext.ext_arg);
504 }
505
506 m->m_flags &= ~(M_EXT|M_EXTWR);
507 }
508
509 struct mbuf *
m_freem(struct mbuf * m)510 m_freem(struct mbuf *m)
511 {
512 struct mbuf *n;
513
514 if (m == NULL)
515 return (NULL);
516
517 n = m->m_nextpkt;
518
519 do
520 m = m_free(m);
521 while (m != NULL);
522
523 return (n);
524 }
525
526 void
m_purge(struct mbuf * m)527 m_purge(struct mbuf *m)
528 {
529 while (m != NULL)
530 m = m_freem(m);
531 }
532
533 /*
534 * mbuf chain defragmenter. This function uses some evil tricks to defragment
535 * an mbuf chain into a single buffer without changing the mbuf pointer.
536 * This needs to know a lot of the mbuf internals to make this work.
537 * The resulting mbuf is not aligned to IP header to assist DMA transfers.
538 */
539 int
m_defrag(struct mbuf * m,int how)540 m_defrag(struct mbuf *m, int how)
541 {
542 struct mbuf *m0;
543
544 if (m->m_next == NULL)
545 return (0);
546
547 KASSERT(m->m_flags & M_PKTHDR);
548
549 counters_inc(mbstat, MBSTAT_DEFRAG_ALLOC);
550 if ((m0 = m_gethdr(how, m->m_type)) == NULL)
551 return (ENOBUFS);
552 if (m->m_pkthdr.len > MHLEN) {
553 MCLGETL(m0, how, m->m_pkthdr.len);
554 if (!(m0->m_flags & M_EXT)) {
555 m_free(m0);
556 return (ENOBUFS);
557 }
558 }
559 m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
560 m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
561
562 /* free chain behind and possible ext buf on the first mbuf */
563 m_freem(m->m_next);
564 m->m_next = NULL;
565 if (m->m_flags & M_EXT)
566 m_extfree(m);
567
568 /*
569 * Bounce copy mbuf over to the original mbuf and set everything up.
570 * This needs to reset or clear all pointers that may go into the
571 * original mbuf chain.
572 */
573 if (m0->m_flags & M_EXT) {
574 memcpy(&m->m_ext, &m0->m_ext, sizeof(struct mbuf_ext));
575 MCLINITREFERENCE(m);
576 m->m_flags |= m0->m_flags & (M_EXT|M_EXTWR);
577 m->m_data = m->m_ext.ext_buf;
578 } else {
579 m->m_data = m->m_pktdat;
580 memcpy(m->m_data, m0->m_data, m0->m_len);
581 }
582 m->m_pkthdr.len = m->m_len = m0->m_len;
583
584 m0->m_flags &= ~(M_EXT|M_EXTWR); /* cluster is gone */
585 m_free(m0);
586
587 return (0);
588 }
589
590 /*
591 * Mbuffer utility routines.
592 */
593
594 /*
595 * Ensure len bytes of contiguous space at the beginning of the mbuf chain
596 */
597 struct mbuf *
m_prepend(struct mbuf * m,int len,int how)598 m_prepend(struct mbuf *m, int len, int how)
599 {
600 struct mbuf *mn;
601
602 if (len > MHLEN)
603 panic("mbuf prepend length too big");
604
605 if (m_leadingspace(m) >= len) {
606 m->m_data -= len;
607 m->m_len += len;
608 } else {
609 counters_inc(mbstat, MBSTAT_PREPEND_ALLOC);
610 MGET(mn, how, m->m_type);
611 if (mn == NULL) {
612 m_freem(m);
613 return (NULL);
614 }
615 if (m->m_flags & M_PKTHDR)
616 M_MOVE_PKTHDR(mn, m);
617 mn->m_next = m;
618 m = mn;
619 m_align(m, len);
620 m->m_len = len;
621 }
622 if (m->m_flags & M_PKTHDR)
623 m->m_pkthdr.len += len;
624 return (m);
625 }
626
627 /*
628 * Make a copy of an mbuf chain starting "off" bytes from the beginning,
629 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
630 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
631 */
632 struct mbuf *
m_copym(struct mbuf * m0,int off,int len,int wait)633 m_copym(struct mbuf *m0, int off, int len, int wait)
634 {
635 struct mbuf *m, *n, **np;
636 struct mbuf *top;
637 int copyhdr = 0;
638
639 if (off < 0 || len < 0)
640 panic("m_copym0: off %d, len %d", off, len);
641 if (off == 0 && m0->m_flags & M_PKTHDR)
642 copyhdr = 1;
643 if ((m = m_getptr(m0, off, &off)) == NULL)
644 panic("m_copym0: short mbuf chain");
645 np = ⊤
646 top = NULL;
647 while (len > 0) {
648 if (m == NULL) {
649 if (len != M_COPYALL)
650 panic("m_copym0: m == NULL and not COPYALL");
651 break;
652 }
653 MGET(n, wait, m->m_type);
654 *np = n;
655 if (n == NULL)
656 goto nospace;
657 if (copyhdr) {
658 if (m_dup_pkthdr(n, m0, wait))
659 goto nospace;
660 if (len != M_COPYALL)
661 n->m_pkthdr.len = len;
662 copyhdr = 0;
663 }
664 n->m_len = min(len, m->m_len - off);
665 if (m->m_flags & M_EXT) {
666 n->m_data = m->m_data + off;
667 n->m_ext = m->m_ext;
668 MCLADDREFERENCE(m, n);
669 } else {
670 n->m_data += m->m_data -
671 (m->m_flags & M_PKTHDR ? m->m_pktdat : m->m_dat);
672 n->m_data += off;
673 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off,
674 n->m_len);
675 }
676 if (len != M_COPYALL)
677 len -= n->m_len;
678 off += n->m_len;
679 #ifdef DIAGNOSTIC
680 if (off > m->m_len)
681 panic("m_copym0 overrun");
682 #endif
683 if (off == m->m_len) {
684 m = m->m_next;
685 off = 0;
686 }
687 np = &n->m_next;
688 }
689 return (top);
690 nospace:
691 m_freem(top);
692 return (NULL);
693 }
694
695 /*
696 * Copy data from an mbuf chain starting "off" bytes from the beginning,
697 * continuing for "len" bytes, into the indicated buffer.
698 */
699 void
m_copydata(struct mbuf * m,int off,int len,void * p)700 m_copydata(struct mbuf *m, int off, int len, void *p)
701 {
702 caddr_t cp = p;
703 unsigned count;
704
705 if (off < 0)
706 panic("m_copydata: off %d < 0", off);
707 if (len < 0)
708 panic("m_copydata: len %d < 0", len);
709 if ((m = m_getptr(m, off, &off)) == NULL)
710 panic("m_copydata: short mbuf chain");
711 while (len > 0) {
712 if (m == NULL)
713 panic("m_copydata: null mbuf");
714 count = min(m->m_len - off, len);
715 memmove(cp, mtod(m, caddr_t) + off, count);
716 len -= count;
717 cp += count;
718 off = 0;
719 m = m->m_next;
720 }
721 }
722
723 /*
724 * Copy data from a buffer back into the indicated mbuf chain,
725 * starting "off" bytes from the beginning, extending the mbuf
726 * chain if necessary. The mbuf needs to be properly initialized
727 * including the setting of m_len.
728 */
729 int
m_copyback(struct mbuf * m0,int off,int len,const void * _cp,int wait)730 m_copyback(struct mbuf *m0, int off, int len, const void *_cp, int wait)
731 {
732 int mlen, totlen = 0;
733 struct mbuf *m = m0, *n;
734 caddr_t cp = (caddr_t)_cp;
735 int error = 0;
736
737 if (m0 == NULL)
738 return (0);
739 while (off > (mlen = m->m_len)) {
740 off -= mlen;
741 totlen += mlen;
742 if (m->m_next == NULL) {
743 if ((n = m_get(wait, m->m_type)) == NULL) {
744 error = ENOBUFS;
745 goto out;
746 }
747
748 if (off + len > MLEN) {
749 MCLGETL(n, wait, off + len);
750 if (!(n->m_flags & M_EXT)) {
751 m_free(n);
752 error = ENOBUFS;
753 goto out;
754 }
755 }
756 memset(mtod(n, caddr_t), 0, off);
757 n->m_len = len + off;
758 m->m_next = n;
759 }
760 m = m->m_next;
761 }
762 while (len > 0) {
763 /* extend last packet to be filled fully */
764 if (m->m_next == NULL && (len > m->m_len - off))
765 m->m_len += min(len - (m->m_len - off),
766 m_trailingspace(m));
767 mlen = min(m->m_len - off, len);
768 memmove(mtod(m, caddr_t) + off, cp, mlen);
769 cp += mlen;
770 len -= mlen;
771 totlen += mlen + off;
772 if (len == 0)
773 break;
774 off = 0;
775
776 if (m->m_next == NULL) {
777 if ((n = m_get(wait, m->m_type)) == NULL) {
778 error = ENOBUFS;
779 goto out;
780 }
781
782 if (len > MLEN) {
783 MCLGETL(n, wait, len);
784 if (!(n->m_flags & M_EXT)) {
785 m_free(n);
786 error = ENOBUFS;
787 goto out;
788 }
789 }
790 n->m_len = len;
791 m->m_next = n;
792 }
793 m = m->m_next;
794 }
795 out:
796 if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
797 m->m_pkthdr.len = totlen;
798
799 return (error);
800 }
801
802 /*
803 * Concatenate mbuf chain n to m.
804 * n might be copied into m (when n->m_len is small), therefore data portion of
805 * n could be copied into an mbuf of different mbuf type.
806 * Therefore both chains should be of the same type (e.g. MT_DATA).
807 * Any m_pkthdr is not updated.
808 */
809 void
m_cat(struct mbuf * m,struct mbuf * n)810 m_cat(struct mbuf *m, struct mbuf *n)
811 {
812 while (m->m_next)
813 m = m->m_next;
814 while (n) {
815 if (M_READONLY(m) || n->m_len > m_trailingspace(m)) {
816 /* just join the two chains */
817 m->m_next = n;
818 return;
819 }
820 /* splat the data from one into the other */
821 memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
822 n->m_len);
823 m->m_len += n->m_len;
824 n = m_free(n);
825 }
826 }
827
828 void
m_adj(struct mbuf * mp,int req_len)829 m_adj(struct mbuf *mp, int req_len)
830 {
831 int len = req_len;
832 struct mbuf *m;
833 int count;
834
835 if (mp == NULL)
836 return;
837 if (len >= 0) {
838 /*
839 * Trim from head.
840 */
841 m = mp;
842 while (m != NULL && len > 0) {
843 if (m->m_len <= len) {
844 len -= m->m_len;
845 m->m_data += m->m_len;
846 m->m_len = 0;
847 m = m->m_next;
848 } else {
849 m->m_data += len;
850 m->m_len -= len;
851 len = 0;
852 }
853 }
854 if (mp->m_flags & M_PKTHDR)
855 mp->m_pkthdr.len -= (req_len - len);
856 } else {
857 /*
858 * Trim from tail. Scan the mbuf chain,
859 * calculating its length and finding the last mbuf.
860 * If the adjustment only affects this mbuf, then just
861 * adjust and return. Otherwise, rescan and truncate
862 * after the remaining size.
863 */
864 len = -len;
865 count = 0;
866 m = mp;
867 for (;;) {
868 count += m->m_len;
869 if (m->m_next == NULL)
870 break;
871 m = m->m_next;
872 }
873 if (m->m_len >= len) {
874 m->m_len -= len;
875 if (mp->m_flags & M_PKTHDR)
876 mp->m_pkthdr.len -= len;
877 return;
878 }
879 count -= len;
880 if (count < 0)
881 count = 0;
882 /*
883 * Correct length for chain is "count".
884 * Find the mbuf with last data, adjust its length,
885 * and toss data from remaining mbufs on chain.
886 */
887 if (mp->m_flags & M_PKTHDR)
888 mp->m_pkthdr.len = count;
889 m = mp;
890 for (;;) {
891 if (m->m_len >= count) {
892 m->m_len = count;
893 break;
894 }
895 count -= m->m_len;
896 m = m->m_next;
897 }
898 while ((m = m->m_next) != NULL)
899 m->m_len = 0;
900 }
901 }
902
903 /*
904 * Rearrange an mbuf chain so that len bytes are contiguous
905 * and in the data area of an mbuf (so that mtod will work
906 * for a structure of size len). Returns the resulting
907 * mbuf chain on success, frees it and returns null on failure.
908 */
909 struct mbuf *
m_pullup(struct mbuf * m0,int len)910 m_pullup(struct mbuf *m0, int len)
911 {
912 struct mbuf *m;
913 unsigned int adj;
914 caddr_t head, tail;
915 unsigned int space;
916
917 /* if len is already contig in m0, then don't do any work */
918 if (len <= m0->m_len)
919 return (m0);
920
921 /* look for some data */
922 m = m0->m_next;
923 if (m == NULL)
924 goto freem0;
925
926 head = M_DATABUF(m0);
927 if (m0->m_len == 0) {
928 while (m->m_len == 0) {
929 m = m_free(m);
930 if (m == NULL)
931 goto freem0;
932 }
933
934 adj = mtod(m, unsigned long) & (sizeof(long) - 1);
935 } else
936 adj = mtod(m0, unsigned long) & (sizeof(long) - 1);
937
938 tail = head + M_SIZE(m0);
939 head += adj;
940
941 if (!M_READONLY(m0) && len <= tail - head) {
942 /* we can copy everything into the first mbuf */
943 if (m0->m_len == 0) {
944 m0->m_data = head;
945 } else if (len > tail - mtod(m0, caddr_t)) {
946 /* need to memmove to make space at the end */
947 memmove(head, mtod(m0, caddr_t), m0->m_len);
948 m0->m_data = head;
949 }
950 len -= m0->m_len;
951 counters_inc(mbstat, MBSTAT_PULLUP_COPY);
952 } else {
953 /* the first mbuf is too small or read-only, make a new one */
954 space = adj + len;
955
956 if (space > MAXMCLBYTES)
957 goto bad;
958
959 m0->m_next = m;
960 m = m0;
961
962 counters_inc(mbstat, MBSTAT_PULLUP_ALLOC);
963 MGET(m0, M_DONTWAIT, m->m_type);
964 if (m0 == NULL)
965 goto bad;
966
967 if (space > MHLEN) {
968 MCLGETL(m0, M_DONTWAIT, space);
969 if ((m0->m_flags & M_EXT) == 0)
970 goto bad;
971 }
972
973 if (m->m_flags & M_PKTHDR)
974 M_MOVE_PKTHDR(m0, m);
975
976 m0->m_len = 0;
977 m0->m_data += adj;
978 }
979
980 KDASSERT(m_trailingspace(m0) >= len);
981
982 for (;;) {
983 space = min(len, m->m_len);
984 memcpy(mtod(m0, caddr_t) + m0->m_len, mtod(m, caddr_t), space);
985 len -= space;
986 m0->m_len += space;
987 m->m_len -= space;
988
989 if (m->m_len > 0)
990 m->m_data += space;
991 else
992 m = m_free(m);
993
994 if (len == 0)
995 break;
996
997 if (m == NULL)
998 goto bad;
999 }
1000
1001 m0->m_next = m; /* link the chain back up */
1002
1003 return (m0);
1004
1005 bad:
1006 m_freem(m);
1007 freem0:
1008 m_free(m0);
1009 return (NULL);
1010 }
1011
1012 /*
1013 * Return a pointer to mbuf/offset of location in mbuf chain.
1014 */
1015 struct mbuf *
m_getptr(struct mbuf * m,int loc,int * off)1016 m_getptr(struct mbuf *m, int loc, int *off)
1017 {
1018 while (loc >= 0) {
1019 /* Normal end of search */
1020 if (m->m_len > loc) {
1021 *off = loc;
1022 return (m);
1023 } else {
1024 loc -= m->m_len;
1025
1026 if (m->m_next == NULL) {
1027 if (loc == 0) {
1028 /* Point at the end of valid data */
1029 *off = m->m_len;
1030 return (m);
1031 } else {
1032 return (NULL);
1033 }
1034 } else {
1035 m = m->m_next;
1036 }
1037 }
1038 }
1039
1040 return (NULL);
1041 }
1042
1043 /*
1044 * Partition an mbuf chain in two pieces, returning the tail --
1045 * all but the first len0 bytes. In case of failure, it returns NULL and
1046 * attempts to restore the chain to its original state.
1047 */
1048 struct mbuf *
m_split(struct mbuf * m0,int len0,int wait)1049 m_split(struct mbuf *m0, int len0, int wait)
1050 {
1051 struct mbuf *m, *n;
1052 unsigned len = len0, remain, olen;
1053
1054 for (m = m0; m && len > m->m_len; m = m->m_next)
1055 len -= m->m_len;
1056 if (m == NULL)
1057 return (NULL);
1058 remain = m->m_len - len;
1059 if (m0->m_flags & M_PKTHDR) {
1060 MGETHDR(n, wait, m0->m_type);
1061 if (n == NULL)
1062 return (NULL);
1063 if (m_dup_pkthdr(n, m0, wait)) {
1064 m_freem(n);
1065 return (NULL);
1066 }
1067 n->m_pkthdr.len -= len0;
1068 olen = m0->m_pkthdr.len;
1069 m0->m_pkthdr.len = len0;
1070 if (remain == 0) {
1071 n->m_next = m->m_next;
1072 m->m_next = NULL;
1073 n->m_len = 0;
1074 return (n);
1075 }
1076 if ((m->m_flags & M_EXT) == 0 && remain > MHLEN) {
1077 /* m can't be the lead packet */
1078 m_align(n, 0);
1079 n->m_next = m_split(m, len, wait);
1080 if (n->m_next == NULL) {
1081 (void) m_free(n);
1082 m0->m_pkthdr.len = olen;
1083 return (NULL);
1084 } else {
1085 n->m_len = 0;
1086 return (n);
1087 }
1088 }
1089 } else if (remain == 0) {
1090 n = m->m_next;
1091 m->m_next = NULL;
1092 return (n);
1093 } else {
1094 MGET(n, wait, m->m_type);
1095 if (n == NULL)
1096 return (NULL);
1097 }
1098 if (m->m_flags & M_EXT) {
1099 n->m_ext = m->m_ext;
1100 MCLADDREFERENCE(m, n);
1101 n->m_data = m->m_data + len;
1102 } else {
1103 m_align(n, remain);
1104 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + len, remain);
1105 }
1106 n->m_len = remain;
1107 m->m_len = len;
1108 n->m_next = m->m_next;
1109 m->m_next = NULL;
1110 return (n);
1111 }
1112
1113 /*
1114 * Make space for a new header of length hlen at skip bytes
1115 * into the packet. When doing this we allocate new mbufs only
1116 * when absolutely necessary. The mbuf where the new header
1117 * is to go is returned together with an offset into the mbuf.
1118 * If NULL is returned then the mbuf chain may have been modified;
1119 * the caller is assumed to always free the chain.
1120 */
1121 struct mbuf *
m_makespace(struct mbuf * m0,int skip,int hlen,int * off)1122 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
1123 {
1124 struct mbuf *m;
1125 unsigned remain;
1126
1127 KASSERT(m0->m_flags & M_PKTHDR);
1128 /*
1129 * Limit the size of the new header to MHLEN. In case
1130 * skip = 0 and the first buffer is not a cluster this
1131 * is the maximum space available in that mbuf.
1132 * In other words this code never prepends a mbuf.
1133 */
1134 KASSERT(hlen < MHLEN);
1135
1136 for (m = m0; m && skip > m->m_len; m = m->m_next)
1137 skip -= m->m_len;
1138 if (m == NULL)
1139 return (NULL);
1140 /*
1141 * At this point skip is the offset into the mbuf m
1142 * where the new header should be placed. Figure out
1143 * if there's space to insert the new header. If so,
1144 * and copying the remainder makes sense then do so.
1145 * Otherwise insert a new mbuf in the chain, splitting
1146 * the contents of m as needed.
1147 */
1148 remain = m->m_len - skip; /* data to move */
1149 if (skip < remain && hlen <= m_leadingspace(m)) {
1150 if (skip)
1151 memmove(m->m_data-hlen, m->m_data, skip);
1152 m->m_data -= hlen;
1153 m->m_len += hlen;
1154 *off = skip;
1155 } else if (hlen > m_trailingspace(m)) {
1156 struct mbuf *n;
1157
1158 if (remain > 0) {
1159 MGET(n, M_DONTWAIT, m->m_type);
1160 if (n && remain > MLEN) {
1161 MCLGETL(n, M_DONTWAIT, remain);
1162 if ((n->m_flags & M_EXT) == 0) {
1163 m_free(n);
1164 n = NULL;
1165 }
1166 }
1167 if (n == NULL)
1168 return (NULL);
1169
1170 memcpy(n->m_data, mtod(m, char *) + skip, remain);
1171 n->m_len = remain;
1172 m->m_len -= remain;
1173
1174 n->m_next = m->m_next;
1175 m->m_next = n;
1176 }
1177
1178 if (hlen <= m_trailingspace(m)) {
1179 m->m_len += hlen;
1180 *off = skip;
1181 } else {
1182 n = m_get(M_DONTWAIT, m->m_type);
1183 if (n == NULL)
1184 return NULL;
1185
1186 n->m_len = hlen;
1187
1188 n->m_next = m->m_next;
1189 m->m_next = n;
1190
1191 *off = 0; /* header is at front ... */
1192 m = n; /* ... of new mbuf */
1193 }
1194 } else {
1195 /*
1196 * Copy the remainder to the back of the mbuf
1197 * so there's space to write the new header.
1198 */
1199 if (remain > 0)
1200 memmove(mtod(m, caddr_t) + skip + hlen,
1201 mtod(m, caddr_t) + skip, remain);
1202 m->m_len += hlen;
1203 *off = skip;
1204 }
1205 m0->m_pkthdr.len += hlen; /* adjust packet length */
1206 return m;
1207 }
1208
1209
1210 /*
1211 * Routine to copy from device local memory into mbufs.
1212 */
1213 struct mbuf *
m_devget(char * buf,int totlen,int off)1214 m_devget(char *buf, int totlen, int off)
1215 {
1216 struct mbuf *m;
1217 struct mbuf *top, **mp;
1218 int len;
1219
1220 top = NULL;
1221 mp = ⊤
1222
1223 if (off < 0 || off > MHLEN)
1224 return (NULL);
1225
1226 MGETHDR(m, M_DONTWAIT, MT_DATA);
1227 if (m == NULL)
1228 return (NULL);
1229
1230 m->m_pkthdr.len = totlen;
1231
1232 len = MHLEN;
1233
1234 while (totlen > 0) {
1235 if (top != NULL) {
1236 MGET(m, M_DONTWAIT, MT_DATA);
1237 if (m == NULL) {
1238 /*
1239 * As we might get called by pfkey, make sure
1240 * we do not leak sensitive data.
1241 */
1242 top->m_flags |= M_ZEROIZE;
1243 m_freem(top);
1244 return (NULL);
1245 }
1246 len = MLEN;
1247 }
1248
1249 if (totlen + off >= MINCLSIZE) {
1250 MCLGET(m, M_DONTWAIT);
1251 if (m->m_flags & M_EXT)
1252 len = MCLBYTES;
1253 } else {
1254 /* Place initial small packet/header at end of mbuf. */
1255 if (top == NULL && totlen + off + max_linkhdr <= len) {
1256 m->m_data += max_linkhdr;
1257 len -= max_linkhdr;
1258 }
1259 }
1260
1261 if (off) {
1262 m->m_data += off;
1263 len -= off;
1264 off = 0;
1265 }
1266
1267 m->m_len = len = min(totlen, len);
1268 memcpy(mtod(m, void *), buf, (size_t)len);
1269
1270 buf += len;
1271 *mp = m;
1272 mp = &m->m_next;
1273 totlen -= len;
1274 }
1275 return (top);
1276 }
1277
1278 void
m_zero(struct mbuf * m)1279 m_zero(struct mbuf *m)
1280 {
1281 if (M_READONLY(m)) {
1282 mtx_enter(&m_extref_mtx);
1283 if ((m->m_flags & M_EXT) && MCLISREFERENCED(m)) {
1284 m->m_ext.ext_nextref->m_flags |= M_ZEROIZE;
1285 m->m_ext.ext_prevref->m_flags |= M_ZEROIZE;
1286 }
1287 mtx_leave(&m_extref_mtx);
1288 return;
1289 }
1290
1291 explicit_bzero(M_DATABUF(m), M_SIZE(m));
1292 }
1293
1294 /*
1295 * Apply function f to the data in an mbuf chain starting "off" bytes from the
1296 * beginning, continuing for "len" bytes.
1297 */
1298 int
m_apply(struct mbuf * m,int off,int len,int (* f)(caddr_t,caddr_t,unsigned int),caddr_t fstate)1299 m_apply(struct mbuf *m, int off, int len,
1300 int (*f)(caddr_t, caddr_t, unsigned int), caddr_t fstate)
1301 {
1302 int rval;
1303 unsigned int count;
1304
1305 if (len < 0)
1306 panic("m_apply: len %d < 0", len);
1307 if (off < 0)
1308 panic("m_apply: off %d < 0", off);
1309 while (off > 0) {
1310 if (m == NULL)
1311 panic("m_apply: null mbuf in skip");
1312 if (off < m->m_len)
1313 break;
1314 off -= m->m_len;
1315 m = m->m_next;
1316 }
1317 while (len > 0) {
1318 if (m == NULL)
1319 panic("m_apply: null mbuf");
1320 count = min(m->m_len - off, len);
1321
1322 rval = f(fstate, mtod(m, caddr_t) + off, count);
1323 if (rval)
1324 return (rval);
1325
1326 len -= count;
1327 off = 0;
1328 m = m->m_next;
1329 }
1330
1331 return (0);
1332 }
1333
1334 /*
1335 * Compute the amount of space available before the current start of data
1336 * in an mbuf. Read-only clusters never have space available.
1337 */
1338 int
m_leadingspace(struct mbuf * m)1339 m_leadingspace(struct mbuf *m)
1340 {
1341 if (M_READONLY(m))
1342 return 0;
1343 KASSERT(m->m_data >= M_DATABUF(m));
1344 return m->m_data - M_DATABUF(m);
1345 }
1346
1347 /*
1348 * Compute the amount of space available after the end of data in an mbuf.
1349 * Read-only clusters never have space available.
1350 */
1351 int
m_trailingspace(struct mbuf * m)1352 m_trailingspace(struct mbuf *m)
1353 {
1354 if (M_READONLY(m))
1355 return 0;
1356 KASSERT(M_DATABUF(m) + M_SIZE(m) >= (m->m_data + m->m_len));
1357 return M_DATABUF(m) + M_SIZE(m) - (m->m_data + m->m_len);
1358 }
1359
1360 /*
1361 * Set the m_data pointer of a newly-allocated mbuf to place an object of
1362 * the specified size at the end of the mbuf, longword aligned.
1363 */
1364 void
m_align(struct mbuf * m,int len)1365 m_align(struct mbuf *m, int len)
1366 {
1367 KASSERT(len >= 0 && !M_READONLY(m));
1368 KASSERT(m->m_data == M_DATABUF(m)); /* newly-allocated check */
1369 KASSERT(((len + sizeof(long) - 1) &~ (sizeof(long) - 1)) <= M_SIZE(m));
1370
1371 m->m_data = M_DATABUF(m) + ((M_SIZE(m) - (len)) &~ (sizeof(long) - 1));
1372 }
1373
1374 /*
1375 * Duplicate mbuf pkthdr from from to to.
1376 * from must have M_PKTHDR set, and to must be empty.
1377 */
1378 int
m_dup_pkthdr(struct mbuf * to,struct mbuf * from,int wait)1379 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int wait)
1380 {
1381 int error;
1382
1383 KASSERT(from->m_flags & M_PKTHDR);
1384
1385 to->m_flags = (to->m_flags & (M_EXT | M_EXTWR));
1386 to->m_flags |= (from->m_flags & M_COPYFLAGS);
1387 to->m_pkthdr = from->m_pkthdr;
1388
1389 #if NPF > 0
1390 to->m_pkthdr.pf.statekey = NULL;
1391 pf_mbuf_link_state_key(to, from->m_pkthdr.pf.statekey);
1392 to->m_pkthdr.pf.inp = NULL;
1393 pf_mbuf_link_inpcb(to, from->m_pkthdr.pf.inp);
1394 #endif /* NPF > 0 */
1395
1396 SLIST_INIT(&to->m_pkthdr.ph_tags);
1397
1398 if ((error = m_tag_copy_chain(to, from, wait)) != 0)
1399 return (error);
1400
1401 if ((to->m_flags & M_EXT) == 0)
1402 to->m_data = to->m_pktdat;
1403
1404 return (0);
1405 }
1406
1407 struct mbuf *
m_dup_pkt(struct mbuf * m0,unsigned int adj,int wait)1408 m_dup_pkt(struct mbuf *m0, unsigned int adj, int wait)
1409 {
1410 struct mbuf *m;
1411 int len;
1412
1413 KASSERT(m0->m_flags & M_PKTHDR);
1414
1415 len = m0->m_pkthdr.len + adj;
1416 if (len > MAXMCLBYTES) /* XXX */
1417 return (NULL);
1418
1419 m = m_get(wait, m0->m_type);
1420 if (m == NULL)
1421 return (NULL);
1422
1423 if (m_dup_pkthdr(m, m0, wait) != 0)
1424 goto fail;
1425
1426 if (len > MHLEN) {
1427 MCLGETL(m, wait, len);
1428 if (!ISSET(m->m_flags, M_EXT))
1429 goto fail;
1430 }
1431
1432 m->m_len = m->m_pkthdr.len = len;
1433 m_adj(m, adj);
1434 m_copydata(m0, 0, m0->m_pkthdr.len, mtod(m, caddr_t));
1435
1436 return (m);
1437
1438 fail:
1439 m_freem(m);
1440 return (NULL);
1441 }
1442
1443 void
m_microtime(const struct mbuf * m,struct timeval * tv)1444 m_microtime(const struct mbuf *m, struct timeval *tv)
1445 {
1446 if (ISSET(m->m_pkthdr.csum_flags, M_TIMESTAMP)) {
1447 struct timeval btv, utv;
1448
1449 NSEC_TO_TIMEVAL(m->m_pkthdr.ph_timestamp, &utv);
1450 microboottime(&btv);
1451 timeradd(&btv, &utv, tv);
1452 } else
1453 microtime(tv);
1454 }
1455
1456 void *
m_pool_alloc(struct pool * pp,int flags,int * slowdown)1457 m_pool_alloc(struct pool *pp, int flags, int *slowdown)
1458 {
1459 void *v;
1460
1461 if (atomic_add_long_nv(&mbuf_mem_alloc, pp->pr_pgsize) >
1462 atomic_load_long(&mbuf_mem_limit))
1463 goto fail;
1464
1465 v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
1466 if (v != NULL)
1467 return (v);
1468
1469 fail:
1470 atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1471 return (NULL);
1472 }
1473
1474 void
m_pool_free(struct pool * pp,void * v)1475 m_pool_free(struct pool *pp, void *v)
1476 {
1477 (*pool_allocator_multi.pa_free)(pp, v);
1478
1479 atomic_sub_long(&mbuf_mem_alloc, pp->pr_pgsize);
1480 }
1481
1482 void
m_pool_init(struct pool * pp,u_int size,u_int align,const char * wmesg)1483 m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
1484 {
1485 pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
1486 pool_set_constraints(pp, &kp_dma_contig);
1487 }
1488
1489 u_int
m_pool_used(void)1490 m_pool_used(void)
1491 {
1492 return ((atomic_load_long(&mbuf_mem_alloc) * 100) /
1493 atomic_load_long(&mbuf_mem_limit));
1494 }
1495
1496 #ifdef DDB
1497 void
m_print(void * v,int (* pr)(const char *,...))1498 m_print(void *v,
1499 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1500 {
1501 struct mbuf *m = v;
1502
1503 (*pr)("mbuf %p\n", m);
1504 (*pr)("m_type: %i\tm_flags: %b\n", m->m_type, m->m_flags, M_BITS);
1505 (*pr)("m_next: %p\tm_nextpkt: %p\n", m->m_next, m->m_nextpkt);
1506 (*pr)("m_data: %p\tm_len: %u\n", m->m_data, m->m_len);
1507 (*pr)("m_dat: %p\tm_pktdat: %p\n", m->m_dat, m->m_pktdat);
1508 if (m->m_flags & M_PKTHDR) {
1509 (*pr)("m_ptkhdr.ph_ifidx: %u\tm_pkthdr.len: %i\n",
1510 m->m_pkthdr.ph_ifidx, m->m_pkthdr.len);
1511 (*pr)("m_ptkhdr.ph_tags: %p\tm_pkthdr.ph_tagsset: %b\n",
1512 SLIST_FIRST(&m->m_pkthdr.ph_tags),
1513 m->m_pkthdr.ph_tagsset, MTAG_BITS);
1514 (*pr)("m_pkthdr.ph_flowid: %u\tm_pkthdr.ph_loopcnt: %u\n",
1515 m->m_pkthdr.ph_flowid, m->m_pkthdr.ph_loopcnt);
1516 (*pr)("m_pkthdr.csum_flags: %b\n",
1517 m->m_pkthdr.csum_flags, MCS_BITS);
1518 (*pr)("m_pkthdr.ether_vtag: %u\tm_ptkhdr.ph_rtableid: %u\n",
1519 m->m_pkthdr.ether_vtag, m->m_pkthdr.ph_rtableid);
1520 (*pr)("m_pkthdr.pf.statekey: %p\tm_pkthdr.pf.inp %p\n",
1521 m->m_pkthdr.pf.statekey, m->m_pkthdr.pf.inp);
1522 (*pr)("m_pkthdr.pf.qid: %u\tm_pkthdr.pf.tag: %u\n",
1523 m->m_pkthdr.pf.qid, m->m_pkthdr.pf.tag);
1524 (*pr)("m_pkthdr.pf.flags: %b\n",
1525 m->m_pkthdr.pf.flags, MPF_BITS);
1526 (*pr)("m_pkthdr.pf.routed: %u\tm_pkthdr.pf.prio: %u\n",
1527 m->m_pkthdr.pf.routed, m->m_pkthdr.pf.prio);
1528 }
1529 if (m->m_flags & M_EXT) {
1530 (*pr)("m_ext.ext_buf: %p\tm_ext.ext_size: %u\n",
1531 m->m_ext.ext_buf, m->m_ext.ext_size);
1532 (*pr)("m_ext.ext_free_fn: %u\tm_ext.ext_arg: %p\n",
1533 m->m_ext.ext_free_fn, m->m_ext.ext_arg);
1534 (*pr)("m_ext.ext_nextref: %p\tm_ext.ext_prevref: %p\n",
1535 m->m_ext.ext_nextref, m->m_ext.ext_prevref);
1536
1537 }
1538 }
1539
1540 const char *m_types[MT_NTYPES] = {
1541 "fre",
1542 "dat",
1543 "hdr",
1544 "nam",
1545 "opt",
1546 "ftb",
1547 "ctl",
1548 "oob",
1549 };
1550
1551 void
m_print_chain(void * v,int deep,int (* pr)(const char *,...))1552 m_print_chain(void *v, int deep,
1553 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1554 {
1555 struct mbuf *m;
1556 const char *indent = deep ? "++-" : "-+-";
1557 size_t chain = 0, len = 0, size = 0;
1558
1559 for (m = v; m != NULL; m = m->m_next) {
1560 const char *type;
1561
1562 chain++;
1563 len += m->m_len;
1564 size += M_SIZE(m);
1565 type = (m->m_type >= 0 && m->m_type < MT_NTYPES) ?
1566 m_types[m->m_type] : "???";
1567 (*pr)("%s mbuf %p, %s, off %zd, len %u", indent, m, type,
1568 m->m_data - M_DATABUF(m), m->m_len);
1569 if (m->m_flags & M_PKTHDR)
1570 (*pr)(", pktlen %d", m->m_pkthdr.len);
1571 if (m->m_flags & M_EXT)
1572 (*pr)(", clsize %u", m->m_ext.ext_size);
1573 else
1574 (*pr)(", size %zu",
1575 m->m_flags & M_PKTHDR ? MHLEN : MLEN);
1576 (*pr)("\n");
1577 indent = deep ? "|+-" : " +-";
1578 }
1579 indent = deep ? "|\\-" : " \\-";
1580 if (v != NULL) {
1581 (*pr)("%s total chain %zu, len %zu, size %zu\n",
1582 indent, chain, len, size);
1583 }
1584 }
1585
1586 void
m_print_packet(void * v,int deep,int (* pr)(const char *,...))1587 m_print_packet(void *v, int deep,
1588 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1589 {
1590 struct mbuf *m, *n;
1591 const char *indent = "+--";
1592 size_t pkts = 0;
1593
1594 for (m = v; m != NULL; m = m->m_nextpkt) {
1595 size_t chain = 0, len = 0, size = 0;
1596
1597 pkts++;
1598 if (deep) {
1599 m_print_chain(m, deep, pr);
1600 continue;
1601 }
1602 for (n = m; n != NULL; n = n->m_next) {
1603 chain++;
1604 len += n->m_len;
1605 size += M_SIZE(n);
1606 }
1607 (*pr)("%s mbuf %p, chain %zu", indent, m, chain);
1608 if (m->m_flags & M_PKTHDR)
1609 (*pr)(", pktlen %d", m->m_pkthdr.len);
1610 (*pr)(", len %zu, size %zu\n", len, size);
1611 }
1612 indent = "\\--";
1613 if (v != NULL)
1614 (*pr)("%s total packets %zu\n", indent, pkts);
1615 }
1616 #endif
1617
1618 /*
1619 * mbuf lists
1620 */
1621
1622 void
ml_init(struct mbuf_list * ml)1623 ml_init(struct mbuf_list *ml)
1624 {
1625 ml->ml_head = ml->ml_tail = NULL;
1626 ml->ml_len = 0;
1627 }
1628
1629 void
ml_enqueue(struct mbuf_list * ml,struct mbuf * m)1630 ml_enqueue(struct mbuf_list *ml, struct mbuf *m)
1631 {
1632 if (ml->ml_tail == NULL)
1633 ml->ml_head = ml->ml_tail = m;
1634 else {
1635 ml->ml_tail->m_nextpkt = m;
1636 ml->ml_tail = m;
1637 }
1638
1639 m->m_nextpkt = NULL;
1640 ml->ml_len++;
1641 }
1642
1643 void
ml_enlist(struct mbuf_list * mla,struct mbuf_list * mlb)1644 ml_enlist(struct mbuf_list *mla, struct mbuf_list *mlb)
1645 {
1646 if (!ml_empty(mlb)) {
1647 if (ml_empty(mla))
1648 mla->ml_head = mlb->ml_head;
1649 else
1650 mla->ml_tail->m_nextpkt = mlb->ml_head;
1651 mla->ml_tail = mlb->ml_tail;
1652 mla->ml_len += mlb->ml_len;
1653
1654 ml_init(mlb);
1655 }
1656 }
1657
1658 struct mbuf *
ml_dequeue(struct mbuf_list * ml)1659 ml_dequeue(struct mbuf_list *ml)
1660 {
1661 struct mbuf *m;
1662
1663 m = ml->ml_head;
1664 if (m != NULL) {
1665 ml->ml_head = m->m_nextpkt;
1666 if (ml->ml_head == NULL)
1667 ml->ml_tail = NULL;
1668
1669 m->m_nextpkt = NULL;
1670 ml->ml_len--;
1671 }
1672
1673 return (m);
1674 }
1675
1676 struct mbuf *
ml_dechain(struct mbuf_list * ml)1677 ml_dechain(struct mbuf_list *ml)
1678 {
1679 struct mbuf *m0;
1680
1681 m0 = ml->ml_head;
1682
1683 ml_init(ml);
1684
1685 return (m0);
1686 }
1687
1688 unsigned int
ml_purge(struct mbuf_list * ml)1689 ml_purge(struct mbuf_list *ml)
1690 {
1691 struct mbuf *m, *n;
1692 unsigned int len;
1693
1694 for (m = ml->ml_head; m != NULL; m = n) {
1695 n = m->m_nextpkt;
1696 m_freem(m);
1697 }
1698
1699 len = ml->ml_len;
1700 ml_init(ml);
1701
1702 return (len);
1703 }
1704
1705 unsigned int
ml_hdatalen(struct mbuf_list * ml)1706 ml_hdatalen(struct mbuf_list *ml)
1707 {
1708 struct mbuf *m;
1709
1710 m = ml->ml_head;
1711 if (m == NULL)
1712 return (0);
1713
1714 KASSERT(ISSET(m->m_flags, M_PKTHDR));
1715 return (m->m_pkthdr.len);
1716 }
1717
1718 /*
1719 * mbuf queues
1720 */
1721
1722 void
mq_init(struct mbuf_queue * mq,u_int maxlen,int ipl)1723 mq_init(struct mbuf_queue *mq, u_int maxlen, int ipl)
1724 {
1725 mtx_init(&mq->mq_mtx, ipl);
1726 ml_init(&mq->mq_list);
1727 mq->mq_maxlen = maxlen;
1728 }
1729
1730 int
mq_push(struct mbuf_queue * mq,struct mbuf * m)1731 mq_push(struct mbuf_queue *mq, struct mbuf *m)
1732 {
1733 struct mbuf *dropped = NULL;
1734
1735 mtx_enter(&mq->mq_mtx);
1736 if (mq_len(mq) >= mq->mq_maxlen) {
1737 mq->mq_drops++;
1738 dropped = ml_dequeue(&mq->mq_list);
1739 }
1740 ml_enqueue(&mq->mq_list, m);
1741 mtx_leave(&mq->mq_mtx);
1742
1743 if (dropped)
1744 m_freem(dropped);
1745
1746 return (dropped != NULL);
1747 }
1748
1749 int
mq_enqueue(struct mbuf_queue * mq,struct mbuf * m)1750 mq_enqueue(struct mbuf_queue *mq, struct mbuf *m)
1751 {
1752 int dropped = 0;
1753
1754 mtx_enter(&mq->mq_mtx);
1755 if (mq_len(mq) < mq->mq_maxlen)
1756 ml_enqueue(&mq->mq_list, m);
1757 else {
1758 mq->mq_drops++;
1759 dropped = 1;
1760 }
1761 mtx_leave(&mq->mq_mtx);
1762
1763 if (dropped)
1764 m_freem(m);
1765
1766 return (dropped);
1767 }
1768
1769 struct mbuf *
mq_dequeue(struct mbuf_queue * mq)1770 mq_dequeue(struct mbuf_queue *mq)
1771 {
1772 struct mbuf *m;
1773
1774 mtx_enter(&mq->mq_mtx);
1775 m = ml_dequeue(&mq->mq_list);
1776 mtx_leave(&mq->mq_mtx);
1777
1778 return (m);
1779 }
1780
1781 int
mq_enlist(struct mbuf_queue * mq,struct mbuf_list * ml)1782 mq_enlist(struct mbuf_queue *mq, struct mbuf_list *ml)
1783 {
1784 struct mbuf *m;
1785 int dropped = 0;
1786
1787 mtx_enter(&mq->mq_mtx);
1788 if (mq_len(mq) < mq->mq_maxlen)
1789 ml_enlist(&mq->mq_list, ml);
1790 else {
1791 dropped = ml_len(ml);
1792 mq->mq_drops += dropped;
1793 }
1794 mtx_leave(&mq->mq_mtx);
1795
1796 if (dropped) {
1797 while ((m = ml_dequeue(ml)) != NULL)
1798 m_freem(m);
1799 }
1800
1801 return (dropped);
1802 }
1803
1804 void
mq_delist(struct mbuf_queue * mq,struct mbuf_list * ml)1805 mq_delist(struct mbuf_queue *mq, struct mbuf_list *ml)
1806 {
1807 mtx_enter(&mq->mq_mtx);
1808 *ml = mq->mq_list;
1809 ml_init(&mq->mq_list);
1810 mtx_leave(&mq->mq_mtx);
1811 }
1812
1813 struct mbuf *
mq_dechain(struct mbuf_queue * mq)1814 mq_dechain(struct mbuf_queue *mq)
1815 {
1816 struct mbuf *m0;
1817
1818 mtx_enter(&mq->mq_mtx);
1819 m0 = ml_dechain(&mq->mq_list);
1820 mtx_leave(&mq->mq_mtx);
1821
1822 return (m0);
1823 }
1824
1825 unsigned int
mq_purge(struct mbuf_queue * mq)1826 mq_purge(struct mbuf_queue *mq)
1827 {
1828 struct mbuf_list ml;
1829
1830 mq_delist(mq, &ml);
1831
1832 return (ml_purge(&ml));
1833 }
1834
1835 unsigned int
mq_hdatalen(struct mbuf_queue * mq)1836 mq_hdatalen(struct mbuf_queue *mq)
1837 {
1838 unsigned int hdatalen;
1839
1840 mtx_enter(&mq->mq_mtx);
1841 hdatalen = ml_hdatalen(&mq->mq_list);
1842 mtx_leave(&mq->mq_mtx);
1843
1844 return (hdatalen);
1845 }
1846
1847 void
mq_set_maxlen(struct mbuf_queue * mq,u_int maxlen)1848 mq_set_maxlen(struct mbuf_queue *mq, u_int maxlen)
1849 {
1850 mtx_enter(&mq->mq_mtx);
1851 mq->mq_maxlen = maxlen;
1852 mtx_leave(&mq->mq_mtx);
1853 }
1854
1855 int
sysctl_mq(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen,struct mbuf_queue * mq)1856 sysctl_mq(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1857 void *newp, size_t newlen, struct mbuf_queue *mq)
1858 {
1859 unsigned int maxlen;
1860 int error;
1861
1862 /* All sysctl names at this level are terminal. */
1863 if (namelen != 1)
1864 return (ENOTDIR);
1865
1866 switch (name[0]) {
1867 case IFQCTL_LEN:
1868 return (sysctl_rdint(oldp, oldlenp, newp, mq_len(mq)));
1869 case IFQCTL_MAXLEN:
1870 maxlen = mq->mq_maxlen;
1871 error = sysctl_int(oldp, oldlenp, newp, newlen, &maxlen);
1872 if (error == 0)
1873 mq_set_maxlen(mq, maxlen);
1874 return (error);
1875 case IFQCTL_DROPS:
1876 return (sysctl_rdint(oldp, oldlenp, newp, mq_drops(mq)));
1877 default:
1878 return (EOPNOTSUPP);
1879 }
1880 /* NOTREACHED */
1881 }
1882