1 /* $NetBSD: mbuf.h,v 1.238 2023/04/12 06:48:08 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center and Matt Thomas of 3am Software Foundry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)mbuf.h 8.5 (Berkeley) 2/19/95
62 */
63
64 #ifndef _SYS_MBUF_H_
65 #define _SYS_MBUF_H_
66
67 #ifdef _KERNEL_OPT
68 #include "opt_mbuftrace.h"
69 #endif
70
71 #ifndef M_WAITOK
72 #include <sys/malloc.h>
73 #endif
74 #include <sys/pool.h>
75 #include <sys/queue.h>
76 #if defined(_KERNEL)
77 #include <sys/percpu_types.h>
78 #include <sys/socket.h> /* for AF_UNSPEC */
79 #include <sys/psref.h>
80 #endif /* defined(_KERNEL) */
81
82 /* For offsetof() */
83 #if defined(_KERNEL) || defined(_STANDALONE)
84 #include <sys/systm.h>
85 #else
86 #include <stddef.h>
87 #endif
88
89 #include <uvm/uvm_param.h> /* for MIN_PAGE_SIZE */
90
91 #include <net/if.h>
92
93 /*
94 * Mbufs are of a single size, MSIZE (machine/param.h), which
95 * includes overhead. An mbuf may add a single "mbuf cluster" of size
96 * MCLBYTES (also in machine/param.h), which has no additional overhead
97 * and is used instead of the internal data area; this is done when
98 * at least MINCLSIZE of data must be stored.
99 */
100
101 /* Packet tags structure */
102 struct m_tag {
103 SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */
104 uint16_t m_tag_id; /* Tag ID */
105 uint16_t m_tag_len; /* Length of data */
106 };
107
108 /* mbuf ownership structure */
109 struct mowner {
110 char mo_name[16]; /* owner name (fxp0) */
111 char mo_descr[16]; /* owner description (input) */
112 LIST_ENTRY(mowner) mo_link; /* */
113 struct percpu *mo_counters;
114 };
115
116 #define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y }
117
118 enum mowner_counter_index {
119 MOWNER_COUNTER_CLAIMS, /* # of small mbuf claimed */
120 MOWNER_COUNTER_RELEASES, /* # of small mbuf released */
121 MOWNER_COUNTER_CLUSTER_CLAIMS, /* # of cluster mbuf claimed */
122 MOWNER_COUNTER_CLUSTER_RELEASES,/* # of cluster mbuf released */
123 MOWNER_COUNTER_EXT_CLAIMS, /* # of M_EXT mbuf claimed */
124 MOWNER_COUNTER_EXT_RELEASES, /* # of M_EXT mbuf released */
125
126 MOWNER_COUNTER_NCOUNTERS,
127 };
128
129 #if defined(_KERNEL)
130 struct mowner_counter {
131 u_long mc_counter[MOWNER_COUNTER_NCOUNTERS];
132 };
133 #endif
134
135 /* userland-exported version of struct mowner */
136 struct mowner_user {
137 char mo_name[16]; /* owner name (fxp0) */
138 char mo_descr[16]; /* owner description (input) */
139 LIST_ENTRY(mowner) mo_link; /* unused padding; for compatibility */
140 u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */
141 };
142
143 /*
144 * Macros for type conversion
145 * mtod(m,t) - convert mbuf pointer to data pointer of correct type
146 */
147 #define mtod(m, t) ((t)((m)->m_data))
148
149 /* header at beginning of each mbuf */
150 struct m_hdr {
151 struct mbuf *mh_next; /* next buffer in chain */
152 struct mbuf *mh_nextpkt; /* next chain in queue/record */
153 char *mh_data; /* location of data */
154 struct mowner *mh_owner; /* mbuf owner */
155 int mh_len; /* amount of data in this mbuf */
156 int mh_flags; /* flags; see below */
157 paddr_t mh_paddr; /* physical address of mbuf */
158 short mh_type; /* type of data in this mbuf */
159 };
160
161 /*
162 * record/packet header in first mbuf of chain; valid if M_PKTHDR set
163 *
164 * A note about csum_data:
165 *
166 * o For the out-bound direction, the low 16 bits indicates the offset after
167 * the L4 header where the final L4 checksum value is to be stored and the
168 * high 16 bits is the length of the L3 header (the start of the data to
169 * be checksummed).
170 *
171 * o For the in-bound direction, it is only valid if the M_CSUM_DATA flag is
172 * set. In this case, an L4 checksum has been calculated by hardware and
173 * is stored in csum_data, but it is up to software to perform final
174 * verification.
175 *
176 * Note for in-bound TCP/UDP checksums: we expect the csum_data to NOT
177 * be bit-wise inverted (the final step in the calculation of an IP
178 * checksum) -- this is so we can accumulate the checksum for fragmented
179 * packets during reassembly.
180 *
181 * Size ILP32: 40
182 * LP64: 56
183 */
184 struct pkthdr {
185 union {
186 void *ctx; /* for M_GETCTX/M_SETCTX */
187 if_index_t index; /* rcv interface index */
188 } _rcvif;
189 #define rcvif_index _rcvif.index
190 SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
191 int len; /* total packet length */
192 int csum_flags; /* checksum flags */
193 uint32_t csum_data; /* checksum data */
194 u_int segsz; /* segment size */
195 uint16_t ether_vtag; /* ethernet 802.1p+q vlan tag */
196 uint16_t pkthdr_flags; /* flags for pkthdr, see blow */
197 #define PKTHDR_FLAG_IPSEC_SKIP_PFIL 0x0001 /* skip pfil_run_hooks() after ipsec decrypt */
198
199 /*
200 * Following three fields are open-coded struct altq_pktattr
201 * to rearrange struct pkthdr fields flexibly.
202 */
203 int pattr_af; /* ALTQ: address family */
204 void *pattr_class; /* ALTQ: sched class set by classifier */
205 void *pattr_hdr; /* ALTQ: saved header position in mbuf */
206 };
207
208 /* Checksumming flags (csum_flags). */
209 #define M_CSUM_TCPv4 0x00000001 /* TCP header/payload */
210 #define M_CSUM_UDPv4 0x00000002 /* UDP header/payload */
211 #define M_CSUM_TCP_UDP_BAD 0x00000004 /* TCP/UDP checksum bad */
212 #define M_CSUM_DATA 0x00000008 /* consult csum_data */
213 #define M_CSUM_TCPv6 0x00000010 /* IPv6 TCP header/payload */
214 #define M_CSUM_UDPv6 0x00000020 /* IPv6 UDP header/payload */
215 #define M_CSUM_IPv4 0x00000040 /* IPv4 header */
216 #define M_CSUM_IPv4_BAD 0x00000080 /* IPv4 header checksum bad */
217 #define M_CSUM_TSOv4 0x00000100 /* TCPv4 segmentation offload */
218 #define M_CSUM_TSOv6 0x00000200 /* TCPv6 segmentation offload */
219
220 /* Checksum-assist quirks: keep separate from jump-table bits. */
221 #define M_CSUM_BLANK 0x40000000 /* csum is missing */
222 #define M_CSUM_NO_PSEUDOHDR 0x80000000 /* Rx csum_data does not include
223 * the UDP/TCP pseudo-hdr, and
224 * is not yet 1s-complemented.
225 */
226
227 #define M_CSUM_BITS \
228 "\20\1TCPv4\2UDPv4\3TCP_UDP_BAD\4DATA\5TCPv6\6UDPv6\7IPv4\10IPv4_BAD" \
229 "\11TSOv4\12TSOv6\39BLANK\40NO_PSEUDOHDR"
230
231 /*
232 * Macros for manipulating csum_data on outgoing packets. These are
233 * used to pass information down from the L4/L3 to the L2.
234 *
235 * _IPHL: Length of the IPv{4/6} header, plus the options; in other
236 * words the offset of the UDP/TCP header in the packet.
237 * _OFFSET: Offset of the checksum field in the UDP/TCP header.
238 */
239 #define M_CSUM_DATA_IPv4_IPHL(x) ((x) >> 16)
240 #define M_CSUM_DATA_IPv4_OFFSET(x) ((x) & 0xffff)
241 #define M_CSUM_DATA_IPv6_IPHL(x) ((x) >> 16)
242 #define M_CSUM_DATA_IPv6_OFFSET(x) ((x) & 0xffff)
243 #define M_CSUM_DATA_IPv6_SET(x, v) (x) = ((x) & 0xffff) | ((v) << 16)
244
245 /*
246 * Max # of pages we can attach to m_ext. This is carefully chosen
247 * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page.
248 */
249 #ifdef MIN_PAGE_SIZE
250 #define M_EXT_MAXPAGES ((65536 / MIN_PAGE_SIZE) + 1)
251 #endif
252
253 /*
254 * Description of external storage mapped into mbuf, valid if M_EXT set.
255 */
256 struct _m_ext_storage {
257 unsigned int ext_refcnt;
258 char *ext_buf; /* start of buffer */
259 void (*ext_free) /* free routine if not the usual */
260 (struct mbuf *, void *, size_t, void *);
261 void *ext_arg; /* argument for ext_free */
262 size_t ext_size; /* size of buffer, for ext_free */
263
264 union {
265 /* M_EXT_CLUSTER: physical address */
266 paddr_t extun_paddr;
267 #ifdef M_EXT_MAXPAGES
268 /* M_EXT_PAGES: pages */
269 struct vm_page *extun_pgs[M_EXT_MAXPAGES];
270 #endif
271 } ext_un;
272 #define ext_paddr ext_un.extun_paddr
273 #define ext_pgs ext_un.extun_pgs
274 };
275
276 struct _m_ext {
277 struct mbuf *ext_ref;
278 struct _m_ext_storage ext_storage;
279 };
280
281 #define M_PADDR_INVALID POOL_PADDR_INVALID
282
283 /*
284 * Definition of "struct mbuf".
285 * Don't change this without understanding how MHLEN/MLEN are defined.
286 */
287 #define MBUF_DEFINE(name, mhlen, mlen) \
288 struct name { \
289 struct m_hdr m_hdr; \
290 union { \
291 struct { \
292 struct pkthdr MH_pkthdr; \
293 union { \
294 struct _m_ext MH_ext; \
295 char MH_databuf[(mhlen)]; \
296 } MH_dat; \
297 } MH; \
298 char M_databuf[(mlen)]; \
299 } M_dat; \
300 }
301 #define m_next m_hdr.mh_next
302 #define m_len m_hdr.mh_len
303 #define m_data m_hdr.mh_data
304 #define m_owner m_hdr.mh_owner
305 #define m_type m_hdr.mh_type
306 #define m_flags m_hdr.mh_flags
307 #define m_nextpkt m_hdr.mh_nextpkt
308 #define m_paddr m_hdr.mh_paddr
309 #define m_pkthdr M_dat.MH.MH_pkthdr
310 #define m_ext_storage M_dat.MH.MH_dat.MH_ext.ext_storage
311 #define m_ext_ref M_dat.MH.MH_dat.MH_ext.ext_ref
312 #define m_ext m_ext_ref->m_ext_storage
313 #define m_pktdat M_dat.MH.MH_dat.MH_databuf
314 #define m_dat M_dat.M_databuf
315
316 /*
317 * Dummy mbuf structure to calculate the right values for MLEN/MHLEN, taking
318 * into account inter-structure padding.
319 */
320 MBUF_DEFINE(_mbuf_dummy, 1, 1);
321
322 /* normal data len */
323 #define MLEN ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_dat)))
324 /* data len w/pkthdr */
325 #define MHLEN ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_pktdat)))
326
327 #define MINCLSIZE (MHLEN+MLEN+1) /* smallest amount to put in cluster */
328
329 /*
330 * The *real* struct mbuf
331 */
332 MBUF_DEFINE(mbuf, MHLEN, MLEN);
333
334 /* mbuf flags */
335 #define M_EXT 0x00000001 /* has associated external storage */
336 #define M_PKTHDR 0x00000002 /* start of record */
337 #define M_EOR 0x00000004 /* end of record */
338 #define M_PROTO1 0x00000008 /* protocol-specific */
339
340 /* mbuf pkthdr flags, also in m_flags */
341 #define M_AUTHIPHDR 0x00000010 /* authenticated (IPsec) */
342 #define M_DECRYPTED 0x00000020 /* decrypted (IPsec) */
343 #define M_LOOP 0x00000040 /* received on loopback */
344 #define M_BCAST 0x00000100 /* send/received as L2 broadcast */
345 #define M_MCAST 0x00000200 /* send/received as L2 multicast */
346 #define M_CANFASTFWD 0x00000400 /* packet can be fast-forwarded */
347 #define M_ANYCAST6 0x00000800 /* received as IPv6 anycast */
348
349 #define M_LINK0 0x00001000 /* link layer specific flag */
350 #define M_LINK1 0x00002000 /* link layer specific flag */
351 #define M_LINK2 0x00004000 /* link layer specific flag */
352 #define M_LINK3 0x00008000 /* link layer specific flag */
353 #define M_LINK4 0x00010000 /* link layer specific flag */
354 #define M_LINK5 0x00020000 /* link layer specific flag */
355 #define M_LINK6 0x00040000 /* link layer specific flag */
356 #define M_LINK7 0x00080000 /* link layer specific flag */
357
358 #define M_VLANTAG 0x00100000 /* ether_vtag is valid */
359
360 /* additional flags for M_EXT mbufs */
361 #define M_EXT_FLAGS 0xff000000
362 #define M_EXT_CLUSTER 0x01000000 /* ext is a cluster */
363 #define M_EXT_PAGES 0x02000000 /* ext_pgs is valid */
364 #define M_EXT_ROMAP 0x04000000 /* ext mapping is r-o at MMU */
365 #define M_EXT_RW 0x08000000 /* ext storage is writable */
366
367 /* for source-level compatibility */
368 #define M_NOTIFICATION M_PROTO1
369
370 #define M_FLAGS_BITS \
371 "\20\1EXT\2PKTHDR\3EOR\4PROTO1\5AUTHIPHDR\6DECRYPTED\7LOOP\10NONE" \
372 "\11BCAST\12MCAST\13CANFASTFWD\14ANYCAST6\15LINK0\16LINK1\17LINK2\20LINK3" \
373 "\21LINK4\22LINK5\23LINK6\24LINK7" \
374 "\25VLANTAG" \
375 "\31EXT_CLUSTER\32EXT_PAGES\33EXT_ROMAP\34EXT_RW"
376
377 /* flags copied when copying m_pkthdr */
378 #define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD| \
379 M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP| \
380 M_VLANTAG)
381
382 /* flag copied when shallow-copying external storage */
383 #define M_EXTCOPYFLAGS (M_EXT|M_EXT_FLAGS)
384
385 /* mbuf types */
386 #define MT_FREE 0 /* should be on free list */
387 #define MT_DATA 1 /* dynamic (data) allocation */
388 #define MT_HEADER 2 /* packet header */
389 #define MT_SONAME 3 /* socket name */
390 #define MT_SOOPTS 4 /* socket options */
391 #define MT_FTABLE 5 /* fragment reassembly header */
392 #define MT_CONTROL 6 /* extra-data protocol message */
393 #define MT_OOBDATA 7 /* expedited data */
394
395 #ifdef MBUFTYPES
396 const char * const mbuftypes[] = {
397 "mbfree",
398 "mbdata",
399 "mbheader",
400 "mbsoname",
401 "mbsopts",
402 "mbftable",
403 "mbcontrol",
404 "mboobdata",
405 };
406 #else
407 extern const char * const mbuftypes[];
408 #endif
409
410 /* flags to m_get/MGET */
411 #define M_DONTWAIT M_NOWAIT
412 #define M_WAIT M_WAITOK
413
414 #ifdef MBUFTRACE
415 /* Mbuf allocation tracing. */
416 void mowner_init_owner(struct mowner *, const char *, const char *);
417 void mowner_init(struct mbuf *, int);
418 void mowner_ref(struct mbuf *, int);
419 void m_claim(struct mbuf *, struct mowner *);
420 void mowner_revoke(struct mbuf *, bool, int);
421 void mowner_attach(struct mowner *);
422 void mowner_detach(struct mowner *);
423 void m_claimm(struct mbuf *, struct mowner *);
424 #else
425 #define mowner_init_owner(mo, n, d) __nothing
426 #define mowner_init(m, type) __nothing
427 #define mowner_ref(m, flags) __nothing
428 #define mowner_revoke(m, all, flags) __nothing
429 #define m_claim(m, mowner) __nothing
430 #define mowner_attach(mo) __nothing
431 #define mowner_detach(mo) __nothing
432 #define m_claimm(m, mo) __nothing
433 #endif
434
435 #define MCLAIM(m, mo) m_claim((m), (mo))
436 #define MOWNER_ATTACH(mo) mowner_attach(mo)
437 #define MOWNER_DETACH(mo) mowner_detach(mo)
438
439 /*
440 * mbuf allocation/deallocation macros:
441 *
442 * MGET(struct mbuf *m, int how, int type)
443 * allocates an mbuf and initializes it to contain internal data.
444 *
445 * MGETHDR(struct mbuf *m, int how, int type)
446 * allocates an mbuf and initializes it to contain a packet header
447 * and internal data.
448 *
449 * If 'how' is M_WAIT, these macros (and the corresponding functions)
450 * are guaranteed to return successfully.
451 */
452 #define MGET(m, how, type) m = m_get((how), (type))
453 #define MGETHDR(m, how, type) m = m_gethdr((how), (type))
454
455 #if defined(_KERNEL)
456
457 #define MCLINITREFERENCE(m) \
458 do { \
459 KASSERT(((m)->m_flags & M_EXT) == 0); \
460 (m)->m_ext_ref = (m); \
461 (m)->m_ext.ext_refcnt = 1; \
462 } while (/* CONSTCOND */ 0)
463
464 /*
465 * Macros for mbuf external storage.
466 *
467 * MCLGET allocates and adds an mbuf cluster to a normal mbuf;
468 * the flag M_EXT is set upon success.
469 *
470 * MEXTMALLOC allocates external storage and adds it to
471 * a normal mbuf; the flag M_EXT is set upon success.
472 *
473 * MEXTADD adds pre-allocated external storage to
474 * a normal mbuf; the flag M_EXT is set upon success.
475 */
476
477 #define MCLGET(m, how) m_clget((m), (how))
478
479 #define MEXTMALLOC(m, size, how) \
480 do { \
481 (m)->m_ext_storage.ext_buf = malloc((size), 0, (how)); \
482 if ((m)->m_ext_storage.ext_buf != NULL) { \
483 MCLINITREFERENCE(m); \
484 (m)->m_data = (m)->m_ext.ext_buf; \
485 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \
486 M_EXT|M_EXT_RW; \
487 (m)->m_ext.ext_size = (size); \
488 (m)->m_ext.ext_free = NULL; \
489 (m)->m_ext.ext_arg = NULL; \
490 mowner_ref((m), M_EXT); \
491 } \
492 } while (/* CONSTCOND */ 0)
493
494 #define MEXTADD(m, buf, size, type, free, arg) \
495 do { \
496 MCLINITREFERENCE(m); \
497 (m)->m_data = (m)->m_ext.ext_buf = (char *)(buf); \
498 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT; \
499 (m)->m_ext.ext_size = (size); \
500 (m)->m_ext.ext_free = (free); \
501 (m)->m_ext.ext_arg = (arg); \
502 mowner_ref((m), M_EXT); \
503 } while (/* CONSTCOND */ 0)
504
505 #define M_BUFADDR(m) \
506 (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \
507 ((m)->m_flags & M_PKTHDR) ? (m)->m_pktdat : (m)->m_dat)
508
509 #define M_BUFSIZE(m) \
510 (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \
511 ((m)->m_flags & M_PKTHDR) ? MHLEN : MLEN)
512
513 #define MRESETDATA(m) (m)->m_data = M_BUFADDR(m)
514
515 /*
516 * Compute the offset of the beginning of the data buffer of a non-ext
517 * mbuf.
518 */
519 #define M_BUFOFFSET(m) \
520 (((m)->m_flags & M_PKTHDR) ? \
521 offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat))
522
523 /*
524 * Determine if an mbuf's data area is read-only. This is true
525 * if external storage is read-only mapped, or not marked as R/W,
526 * or referenced by more than one mbuf.
527 */
528 #define M_READONLY(m) \
529 (((m)->m_flags & M_EXT) != 0 && \
530 (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW || \
531 (m)->m_ext.ext_refcnt > 1))
532
533 #define M_UNWRITABLE(__m, __len) \
534 ((__m)->m_len < (__len) || M_READONLY((__m)))
535
536 /*
537 * Determine if an mbuf's data area is read-only at the MMU.
538 */
539 #define M_ROMAP(m) \
540 (((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP))
541
542 /*
543 * Compute the amount of space available before the current start of
544 * data in an mbuf.
545 */
546 #define M_LEADINGSPACE(m) \
547 (M_READONLY((m)) ? 0 : ((m)->m_data - M_BUFADDR(m)))
548
549 /*
550 * Compute the amount of space available
551 * after the end of data in an mbuf.
552 */
553 #define _M_TRAILINGSPACE(m) \
554 ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
555 ((m)->m_data + (m)->m_len) : \
556 &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
557
558 #define M_TRAILINGSPACE(m) \
559 (M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m)))
560
561 /*
562 * Arrange to prepend space of size plen to mbuf m.
563 * If a new mbuf must be allocated, how specifies whether to wait.
564 * If how is M_DONTWAIT and allocation fails, the original mbuf chain
565 * is freed and m is set to NULL.
566 */
567 #define M_PREPEND(m, plen, how) \
568 do { \
569 if (M_LEADINGSPACE(m) >= (plen)) { \
570 (m)->m_data -= (plen); \
571 (m)->m_len += (plen); \
572 } else \
573 (m) = m_prepend((m), (plen), (how)); \
574 if ((m) && (m)->m_flags & M_PKTHDR) \
575 (m)->m_pkthdr.len += (plen); \
576 } while (/* CONSTCOND */ 0)
577
578 /* change mbuf to new type */
579 #define MCHTYPE(m, t) \
580 do { \
581 KASSERT((t) != MT_FREE); \
582 mbstat_type_add((m)->m_type, -1); \
583 mbstat_type_add(t, 1); \
584 (m)->m_type = t; \
585 } while (/* CONSTCOND */ 0)
586
587 #ifdef DIAGNOSTIC
588 #define M_VERIFY_PACKET(m) m_verify_packet(m)
589 #else
590 #define M_VERIFY_PACKET(m) __nothing
591 #endif
592
593 /* The "copy all" special length. */
594 #define M_COPYALL -1
595
596 /*
597 * Allow drivers and/or protocols to store private context information.
598 */
599 #define M_GETCTX(m, t) ((t)(m)->m_pkthdr._rcvif.ctx)
600 #define M_SETCTX(m, c) ((void)((m)->m_pkthdr._rcvif.ctx = (void *)(c)))
601 #define M_CLEARCTX(m) M_SETCTX((m), NULL)
602
603 /*
604 * M_REGION_GET ensures that the "len"-sized region of type "typ" starting
605 * from "off" within "m" is located in a single mbuf, contiguously.
606 *
607 * The pointer to the region will be returned to pointer variable "val".
608 */
609 #define M_REGION_GET(val, typ, m, off, len) \
610 do { \
611 struct mbuf *_t; \
612 int _tmp; \
613 if ((m)->m_len >= (off) + (len)) \
614 (val) = (typ)(mtod((m), char *) + (off)); \
615 else { \
616 _t = m_pulldown((m), (off), (len), &_tmp); \
617 if (_t) { \
618 if (_t->m_len < _tmp + (len)) \
619 panic("m_pulldown malfunction"); \
620 (val) = (typ)(mtod(_t, char *) + _tmp); \
621 } else { \
622 (val) = (typ)NULL; \
623 (m) = NULL; \
624 } \
625 } \
626 } while (/*CONSTCOND*/ 0)
627
628 #endif /* defined(_KERNEL) */
629
630 /*
631 * Simple mbuf queueing system
632 *
633 * this is basically a SIMPLEQ adapted to mbuf use (ie using
634 * m_nextpkt instead of field.sqe_next).
635 *
636 * m_next is ignored, so queueing chains of mbufs is possible
637 */
638 #define MBUFQ_HEAD(name) \
639 struct name { \
640 struct mbuf *mq_first; \
641 struct mbuf **mq_last; \
642 }
643
644 #define MBUFQ_INIT(q) do { \
645 (q)->mq_first = NULL; \
646 (q)->mq_last = &(q)->mq_first; \
647 } while (/*CONSTCOND*/0)
648
649 #define MBUFQ_ENQUEUE(q, m) do { \
650 (m)->m_nextpkt = NULL; \
651 *(q)->mq_last = (m); \
652 (q)->mq_last = &(m)->m_nextpkt; \
653 } while (/*CONSTCOND*/0)
654
655 #define MBUFQ_PREPEND(q, m) do { \
656 if (((m)->m_nextpkt = (q)->mq_first) == NULL) \
657 (q)->mq_last = &(m)->m_nextpkt; \
658 (q)->mq_first = (m); \
659 } while (/*CONSTCOND*/0)
660
661 #define MBUFQ_DEQUEUE(q, m) do { \
662 if (((m) = (q)->mq_first) != NULL) { \
663 if (((q)->mq_first = (m)->m_nextpkt) == NULL) \
664 (q)->mq_last = &(q)->mq_first; \
665 else \
666 (m)->m_nextpkt = NULL; \
667 } \
668 } while (/*CONSTCOND*/0)
669
670 #define MBUFQ_DRAIN(q) do { \
671 struct mbuf *__m0; \
672 while ((__m0 = (q)->mq_first) != NULL) { \
673 (q)->mq_first = __m0->m_nextpkt; \
674 m_freem(__m0); \
675 } \
676 (q)->mq_last = &(q)->mq_first; \
677 } while (/*CONSTCOND*/0)
678
679 #define MBUFQ_FIRST(q) ((q)->mq_first)
680 #define MBUFQ_NEXT(m) ((m)->m_nextpkt)
681 #define MBUFQ_LAST(q) (*(q)->mq_last)
682
683 /*
684 * Mbuf statistics.
685 * For statistics related to mbuf and cluster allocations, see also the
686 * pool headers (mb_cache and mcl_cache).
687 */
688 struct mbstat {
689 u_long _m_spare; /* formerly m_mbufs */
690 u_long _m_spare1; /* formerly m_clusters */
691 u_long _m_spare2; /* spare field */
692 u_long _m_spare3; /* formely m_clfree - free clusters */
693 u_long m_drops; /* times failed to find space */
694 u_long m_wait; /* times waited for space */
695 u_long m_drain; /* times drained protocols for space */
696 u_short m_mtypes[256]; /* type specific mbuf allocations */
697 };
698
699 struct mbstat_cpu {
700 u_int m_mtypes[256]; /* type specific mbuf allocations */
701 };
702
703 /*
704 * Mbuf sysctl variables.
705 */
706 #define MBUF_MSIZE 1 /* int: mbuf base size */
707 #define MBUF_MCLBYTES 2 /* int: mbuf cluster size */
708 #define MBUF_NMBCLUSTERS 3 /* int: limit on the # of clusters */
709 #define MBUF_MBLOWAT 4 /* int: mbuf low water mark */
710 #define MBUF_MCLLOWAT 5 /* int: mbuf cluster low water mark */
711 #define MBUF_STATS 6 /* struct: mbstat */
712 #define MBUF_MOWNERS 7 /* struct: m_owner[] */
713 #define MBUF_NMBCLUSTERS_LIMIT 8 /* int: limit of nmbclusters */
714
715 #ifdef _KERNEL
716 extern struct mbstat mbstat;
717 extern int nmbclusters; /* limit on the # of clusters */
718 extern int mblowat; /* mbuf low water mark */
719 extern int mcllowat; /* mbuf cluster low water mark */
720 extern int max_linkhdr; /* largest link-level header */
721 extern int max_protohdr; /* largest protocol header */
722 extern int max_hdr; /* largest link+protocol header */
723 extern int max_datalen; /* MHLEN - max_hdr */
724 extern const int msize; /* mbuf base size */
725 extern const int mclbytes; /* mbuf cluster size */
726 extern pool_cache_t mb_cache;
727 #ifdef MBUFTRACE
728 LIST_HEAD(mownerhead, mowner);
729 extern struct mownerhead mowners;
730 extern struct mowner unknown_mowners[];
731 extern struct mowner revoked_mowner;
732 #endif
733
734 MALLOC_DECLARE(M_MBUF);
735 MALLOC_DECLARE(M_SONAME);
736
737 struct mbuf *m_copym(struct mbuf *, int, int, int);
738 struct mbuf *m_copypacket(struct mbuf *, int);
739 struct mbuf *m_devget(char *, int, int, struct ifnet *);
740 struct mbuf *m_dup(struct mbuf *, int, int, int);
741 struct mbuf *m_get(int, int);
742 struct mbuf *m_gethdr(int, int);
743 struct mbuf *m_get_n(int, int, size_t, size_t);
744 struct mbuf *m_gethdr_n(int, int, size_t, size_t);
745 struct mbuf *m_prepend(struct mbuf *,int, int);
746 struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
747 struct mbuf *m_pullup(struct mbuf *, int);
748 struct mbuf *m_copyup(struct mbuf *, int, int);
749 struct mbuf *m_split(struct mbuf *,int, int);
750 struct mbuf *m_getptr(struct mbuf *, int, int *);
751 void m_adj(struct mbuf *, int);
752 struct mbuf *m_defrag(struct mbuf *, int);
753 int m_apply(struct mbuf *, int, int,
754 int (*)(void *, void *, unsigned int), void *);
755 void m_cat(struct mbuf *,struct mbuf *);
756 void m_clget(struct mbuf *, int);
757 void m_copyback(struct mbuf *, int, int, const void *);
758 struct mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int);
759 int m_makewritable(struct mbuf **, int, int, int);
760 struct mbuf *m_getcl(int, int, int);
761 void m_copydata(struct mbuf *, int, int, void *);
762 void m_verify_packet(struct mbuf *);
763 struct mbuf *m_free(struct mbuf *);
764 void m_freem(struct mbuf *);
765 void mbinit(void);
766 void m_remove_pkthdr(struct mbuf *);
767 void m_copy_pkthdr(struct mbuf *, struct mbuf *);
768 void m_move_pkthdr(struct mbuf *, struct mbuf *);
769 void m_align(struct mbuf *, int);
770
771 bool m_ensure_contig(struct mbuf **, int);
772 struct mbuf *m_add(struct mbuf *, struct mbuf *);
773
774 /* Inline routines. */
775 static __inline u_int m_length(const struct mbuf *) __unused;
776
777 /* Statistics */
778 void mbstat_type_add(int, int);
779
780 /* Packet tag routines */
781 struct m_tag *m_tag_get(int, int, int);
782 void m_tag_free(struct m_tag *);
783 void m_tag_prepend(struct mbuf *, struct m_tag *);
784 void m_tag_unlink(struct mbuf *, struct m_tag *);
785 void m_tag_delete(struct mbuf *, struct m_tag *);
786 void m_tag_delete_chain(struct mbuf *);
787 struct m_tag *m_tag_find(const struct mbuf *, int);
788 struct m_tag *m_tag_copy(struct m_tag *);
789 int m_tag_copy_chain(struct mbuf *, struct mbuf *);
790
791 /* Packet tag types */
792 #define PACKET_TAG_NONE 0 /* Nothing */
793 #define PACKET_TAG_SO 4 /* sending socket pointer */
794 #define PACKET_TAG_NPF 10 /* packet filter */
795 #define PACKET_TAG_PF 11 /* packet filter */
796 #define PACKET_TAG_ALTQ_QID 12 /* ALTQ queue id */
797 #define PACKET_TAG_IPSEC_OUT_DONE 18
798 #define PACKET_TAG_IPSEC_NAT_T_PORTS 25 /* two uint16_t */
799 #define PACKET_TAG_INET6 26 /* IPv6 info */
800 #define PACKET_TAG_TUNNEL_INFO 28 /* tunnel identification and
801 * protocol callback, for loop
802 * detection/recovery
803 */
804 #define PACKET_TAG_MPLS 29 /* Indicate it's for MPLS */
805 #define PACKET_TAG_SRCROUTE 30 /* IPv4 source routing */
806 #define PACKET_TAG_ETHERNET_SRC 31 /* Ethernet source address */
807
808 /*
809 * Return the number of bytes in the mbuf chain, m.
810 */
811 static __inline u_int
m_length(const struct mbuf * m)812 m_length(const struct mbuf *m)
813 {
814 const struct mbuf *m0;
815 u_int pktlen;
816
817 if ((m->m_flags & M_PKTHDR) != 0)
818 return m->m_pkthdr.len;
819
820 pktlen = 0;
821 for (m0 = m; m0 != NULL; m0 = m0->m_next)
822 pktlen += m0->m_len;
823 return pktlen;
824 }
825
826 static __inline void
m_set_rcvif(struct mbuf * m,const struct ifnet * ifp)827 m_set_rcvif(struct mbuf *m, const struct ifnet *ifp)
828 {
829 KASSERT(m->m_flags & M_PKTHDR);
830 m->m_pkthdr.rcvif_index = ifp->if_index;
831 }
832
833 static __inline void
m_reset_rcvif(struct mbuf * m)834 m_reset_rcvif(struct mbuf *m)
835 {
836 KASSERT(m->m_flags & M_PKTHDR);
837 /* A caller may expect whole _rcvif union is zeroed */
838 /* m->m_pkthdr.rcvif_index = 0; */
839 m->m_pkthdr._rcvif.ctx = NULL;
840 }
841
842 static __inline void
m_copy_rcvif(struct mbuf * m,const struct mbuf * n)843 m_copy_rcvif(struct mbuf *m, const struct mbuf *n)
844 {
845 KASSERT(m->m_flags & M_PKTHDR);
846 KASSERT(n->m_flags & M_PKTHDR);
847 m->m_pkthdr.rcvif_index = n->m_pkthdr.rcvif_index;
848 }
849
850 #define M_GET_ALIGNED_HDR(m, type, linkhdr) \
851 m_get_aligned_hdr((m), __alignof(type) - 1, sizeof(type), (linkhdr))
852
853 static __inline int
m_get_aligned_hdr(struct mbuf ** m,int mask,size_t hlen,bool linkhdr)854 m_get_aligned_hdr(struct mbuf **m, int mask, size_t hlen, bool linkhdr)
855 {
856 #ifndef __NO_STRICT_ALIGNMENT
857 if (((uintptr_t)mtod(*m, void *) & mask) != 0)
858 *m = m_copyup(*m, hlen,
859 linkhdr ? (max_linkhdr + mask) & ~mask : 0);
860 else
861 #endif
862 if (__predict_false((size_t)(*m)->m_len < hlen))
863 *m = m_pullup(*m, hlen);
864
865 return *m == NULL;
866 }
867
868 void m_print(const struct mbuf *, const char *, void (*)(const char *, ...)
869 __printflike(1, 2));
870
871 /* from uipc_mbufdebug.c */
872 void m_examine(const struct mbuf *, int, const char *,
873 void (*)(const char *, ...) __printflike(1, 2));
874
875 /* parsers for m_examine() */
876 void m_examine_ether(const struct mbuf *, int, const char *,
877 void (*)(const char *, ...) __printflike(1, 2));
878 void m_examine_pppoe(const struct mbuf *, int, const char *,
879 void (*)(const char *, ...) __printflike(1, 2));
880 void m_examine_ppp(const struct mbuf *, int, const char *,
881 void (*)(const char *, ...) __printflike(1, 2));
882 void m_examine_arp(const struct mbuf *, int, const char *,
883 void (*)(const char *, ...) __printflike(1, 2));
884 void m_examine_ip(const struct mbuf *, int, const char *,
885 void (*)(const char *, ...) __printflike(1, 2));
886 void m_examine_icmp(const struct mbuf *, int, const char *,
887 void (*)(const char *, ...) __printflike(1, 2));
888 void m_examine_ip6(const struct mbuf *, int, const char *,
889 void (*)(const char *, ...) __printflike(1, 2));
890 void m_examine_icmp6(const struct mbuf *, int, const char *,
891 void (*)(const char *, ...) __printflike(1, 2));
892 void m_examine_tcp(const struct mbuf *, int, const char *,
893 void (*)(const char *, ...) __printflike(1, 2));
894 void m_examine_udp(const struct mbuf *, int, const char *,
895 void (*)(const char *, ...) __printflike(1, 2));
896 void m_examine_hex(const struct mbuf *, int, const char *,
897 void (*)(const char *, ...) __printflike(1, 2));
898
899 /*
900 * Get rcvif of a mbuf.
901 *
902 * The caller must call m_put_rcvif after using rcvif if the returned rcvif
903 * isn't NULL. If the returned rcvif is NULL, the caller doesn't need to call
904 * m_put_rcvif (although calling it is safe).
905 *
906 * The caller must not block or sleep while using rcvif. The API ensures a
907 * returned rcvif isn't freed until m_put_rcvif is called.
908 */
909 static __inline struct ifnet *
m_get_rcvif(const struct mbuf * m,int * s)910 m_get_rcvif(const struct mbuf *m, int *s)
911 {
912 struct ifnet *ifp;
913
914 KASSERT(m->m_flags & M_PKTHDR);
915 *s = pserialize_read_enter();
916 ifp = if_byindex(m->m_pkthdr.rcvif_index);
917 if (__predict_false(ifp == NULL))
918 pserialize_read_exit(*s);
919
920 return ifp;
921 }
922
923 static __inline void
m_put_rcvif(struct ifnet * ifp,int * s)924 m_put_rcvif(struct ifnet *ifp, int *s)
925 {
926
927 if (ifp == NULL)
928 return;
929 pserialize_read_exit(*s);
930 }
931
932 /*
933 * Get rcvif of a mbuf.
934 *
935 * The caller must call m_put_rcvif_psref after using rcvif. The API ensures
936 * a got rcvif isn't be freed until m_put_rcvif_psref is called.
937 */
938 static __inline struct ifnet *
m_get_rcvif_psref(const struct mbuf * m,struct psref * psref)939 m_get_rcvif_psref(const struct mbuf *m, struct psref *psref)
940 {
941 KASSERT(m->m_flags & M_PKTHDR);
942 return if_get_byindex(m->m_pkthdr.rcvif_index, psref);
943 }
944
945 static __inline void
m_put_rcvif_psref(struct ifnet * ifp,struct psref * psref)946 m_put_rcvif_psref(struct ifnet *ifp, struct psref *psref)
947 {
948
949 if (ifp == NULL)
950 return;
951 if_put(ifp, psref);
952 }
953
954 /*
955 * Get rcvif of a mbuf.
956 *
957 * This is NOT an MP-safe API and shouldn't be used at where we want MP-safe.
958 */
959 static __inline struct ifnet *
m_get_rcvif_NOMPSAFE(const struct mbuf * m)960 m_get_rcvif_NOMPSAFE(const struct mbuf *m)
961 {
962 KASSERT(m->m_flags & M_PKTHDR);
963 return if_byindex(m->m_pkthdr.rcvif_index);
964 }
965
966 #endif /* _KERNEL */
967 #endif /* !_SYS_MBUF_H_ */
968