xref: /netbsd/sys/sys/mbuf.h (revision f677aab4)
1 /*	$NetBSD: mbuf.h,v 1.238 2023/04/12 06:48:08 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center and Matt Thomas of 3am Software Foundry.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)mbuf.h	8.5 (Berkeley) 2/19/95
62  */
63 
64 #ifndef _SYS_MBUF_H_
65 #define _SYS_MBUF_H_
66 
67 #ifdef _KERNEL_OPT
68 #include "opt_mbuftrace.h"
69 #endif
70 
71 #ifndef M_WAITOK
72 #include <sys/malloc.h>
73 #endif
74 #include <sys/pool.h>
75 #include <sys/queue.h>
76 #if defined(_KERNEL)
77 #include <sys/percpu_types.h>
78 #include <sys/socket.h>	/* for AF_UNSPEC */
79 #include <sys/psref.h>
80 #endif /* defined(_KERNEL) */
81 
82 /* For offsetof() */
83 #if defined(_KERNEL) || defined(_STANDALONE)
84 #include <sys/systm.h>
85 #else
86 #include <stddef.h>
87 #endif
88 
89 #include <uvm/uvm_param.h>	/* for MIN_PAGE_SIZE */
90 
91 #include <net/if.h>
92 
93 /*
94  * Mbufs are of a single size, MSIZE (machine/param.h), which
95  * includes overhead.  An mbuf may add a single "mbuf cluster" of size
96  * MCLBYTES (also in machine/param.h), which has no additional overhead
97  * and is used instead of the internal data area; this is done when
98  * at least MINCLSIZE of data must be stored.
99  */
100 
101 /* Packet tags structure */
102 struct m_tag {
103 	SLIST_ENTRY(m_tag)	m_tag_link;	/* List of packet tags */
104 	uint16_t		m_tag_id;	/* Tag ID */
105 	uint16_t		m_tag_len;	/* Length of data */
106 };
107 
108 /* mbuf ownership structure */
109 struct mowner {
110 	char mo_name[16];		/* owner name (fxp0) */
111 	char mo_descr[16];		/* owner description (input) */
112 	LIST_ENTRY(mowner) mo_link;	/* */
113 	struct percpu *mo_counters;
114 };
115 
116 #define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y }
117 
118 enum mowner_counter_index {
119 	MOWNER_COUNTER_CLAIMS,		/* # of small mbuf claimed */
120 	MOWNER_COUNTER_RELEASES,	/* # of small mbuf released */
121 	MOWNER_COUNTER_CLUSTER_CLAIMS,	/* # of cluster mbuf claimed */
122 	MOWNER_COUNTER_CLUSTER_RELEASES,/* # of cluster mbuf released */
123 	MOWNER_COUNTER_EXT_CLAIMS,	/* # of M_EXT mbuf claimed */
124 	MOWNER_COUNTER_EXT_RELEASES,	/* # of M_EXT mbuf released */
125 
126 	MOWNER_COUNTER_NCOUNTERS,
127 };
128 
129 #if defined(_KERNEL)
130 struct mowner_counter {
131 	u_long mc_counter[MOWNER_COUNTER_NCOUNTERS];
132 };
133 #endif
134 
135 /* userland-exported version of struct mowner */
136 struct mowner_user {
137 	char mo_name[16];		/* owner name (fxp0) */
138 	char mo_descr[16];		/* owner description (input) */
139 	LIST_ENTRY(mowner) mo_link;	/* unused padding; for compatibility */
140 	u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */
141 };
142 
143 /*
144  * Macros for type conversion
145  * mtod(m,t) -	convert mbuf pointer to data pointer of correct type
146  */
147 #define mtod(m, t)	((t)((m)->m_data))
148 
149 /* header at beginning of each mbuf */
150 struct m_hdr {
151 	struct	mbuf *mh_next;		/* next buffer in chain */
152 	struct	mbuf *mh_nextpkt;	/* next chain in queue/record */
153 	char	*mh_data;		/* location of data */
154 	struct	mowner *mh_owner;	/* mbuf owner */
155 	int	mh_len;			/* amount of data in this mbuf */
156 	int	mh_flags;		/* flags; see below */
157 	paddr_t	mh_paddr;		/* physical address of mbuf */
158 	short	mh_type;		/* type of data in this mbuf */
159 };
160 
161 /*
162  * record/packet header in first mbuf of chain; valid if M_PKTHDR set
163  *
164  * A note about csum_data:
165  *
166  *  o For the out-bound direction, the low 16 bits indicates the offset after
167  *    the L4 header where the final L4 checksum value is to be stored and the
168  *    high 16 bits is the length of the L3 header (the start of the data to
169  *    be checksummed).
170  *
171  *  o For the in-bound direction, it is only valid if the M_CSUM_DATA flag is
172  *    set. In this case, an L4 checksum has been calculated by hardware and
173  *    is stored in csum_data, but it is up to software to perform final
174  *    verification.
175  *
176  * Note for in-bound TCP/UDP checksums: we expect the csum_data to NOT
177  * be bit-wise inverted (the final step in the calculation of an IP
178  * checksum) -- this is so we can accumulate the checksum for fragmented
179  * packets during reassembly.
180  *
181  * Size ILP32: 40
182  *       LP64: 56
183  */
184 struct pkthdr {
185 	union {
186 		void		*ctx;		/* for M_GETCTX/M_SETCTX */
187 		if_index_t	index;		/* rcv interface index */
188 	} _rcvif;
189 #define rcvif_index		_rcvif.index
190 	SLIST_HEAD(packet_tags, m_tag) tags;	/* list of packet tags */
191 	int		len;			/* total packet length */
192 	int		csum_flags;		/* checksum flags */
193 	uint32_t	csum_data;		/* checksum data */
194 	u_int		segsz;			/* segment size */
195 	uint16_t	ether_vtag;		/* ethernet 802.1p+q vlan tag */
196 	uint16_t	pkthdr_flags;		/* flags for pkthdr, see blow */
197 #define PKTHDR_FLAG_IPSEC_SKIP_PFIL	0x0001	/* skip pfil_run_hooks() after ipsec decrypt */
198 
199 	/*
200 	 * Following three fields are open-coded struct altq_pktattr
201 	 * to rearrange struct pkthdr fields flexibly.
202 	 */
203 	int	pattr_af;		/* ALTQ: address family */
204 	void	*pattr_class;		/* ALTQ: sched class set by classifier */
205 	void	*pattr_hdr;		/* ALTQ: saved header position in mbuf */
206 };
207 
208 /* Checksumming flags (csum_flags). */
209 #define M_CSUM_TCPv4		0x00000001	/* TCP header/payload */
210 #define M_CSUM_UDPv4		0x00000002	/* UDP header/payload */
211 #define M_CSUM_TCP_UDP_BAD	0x00000004	/* TCP/UDP checksum bad */
212 #define M_CSUM_DATA		0x00000008	/* consult csum_data */
213 #define M_CSUM_TCPv6		0x00000010	/* IPv6 TCP header/payload */
214 #define M_CSUM_UDPv6		0x00000020	/* IPv6 UDP header/payload */
215 #define M_CSUM_IPv4		0x00000040	/* IPv4 header */
216 #define M_CSUM_IPv4_BAD		0x00000080	/* IPv4 header checksum bad */
217 #define M_CSUM_TSOv4		0x00000100	/* TCPv4 segmentation offload */
218 #define M_CSUM_TSOv6		0x00000200	/* TCPv6 segmentation offload */
219 
220 /* Checksum-assist quirks: keep separate from jump-table bits. */
221 #define M_CSUM_BLANK		0x40000000	/* csum is missing */
222 #define M_CSUM_NO_PSEUDOHDR	0x80000000	/* Rx csum_data does not include
223 						 * the UDP/TCP pseudo-hdr, and
224 						 * is not yet 1s-complemented.
225 						 */
226 
227 #define M_CSUM_BITS \
228     "\20\1TCPv4\2UDPv4\3TCP_UDP_BAD\4DATA\5TCPv6\6UDPv6\7IPv4\10IPv4_BAD" \
229     "\11TSOv4\12TSOv6\39BLANK\40NO_PSEUDOHDR"
230 
231 /*
232  * Macros for manipulating csum_data on outgoing packets. These are
233  * used to pass information down from the L4/L3 to the L2.
234  *
235  *   _IPHL:   Length of the IPv{4/6} header, plus the options; in other
236  *            words the offset of the UDP/TCP header in the packet.
237  *   _OFFSET: Offset of the checksum field in the UDP/TCP header.
238  */
239 #define M_CSUM_DATA_IPv4_IPHL(x)	((x) >> 16)
240 #define M_CSUM_DATA_IPv4_OFFSET(x)	((x) & 0xffff)
241 #define M_CSUM_DATA_IPv6_IPHL(x)	((x) >> 16)
242 #define M_CSUM_DATA_IPv6_OFFSET(x)	((x) & 0xffff)
243 #define M_CSUM_DATA_IPv6_SET(x, v)	(x) = ((x) & 0xffff) | ((v) << 16)
244 
245 /*
246  * Max # of pages we can attach to m_ext.  This is carefully chosen
247  * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page.
248  */
249 #ifdef MIN_PAGE_SIZE
250 #define M_EXT_MAXPAGES		((65536 / MIN_PAGE_SIZE) + 1)
251 #endif
252 
253 /*
254  * Description of external storage mapped into mbuf, valid if M_EXT set.
255  */
256 struct _m_ext_storage {
257 	unsigned int ext_refcnt;
258 	char *ext_buf;			/* start of buffer */
259 	void (*ext_free)		/* free routine if not the usual */
260 		(struct mbuf *, void *, size_t, void *);
261 	void *ext_arg;			/* argument for ext_free */
262 	size_t ext_size;		/* size of buffer, for ext_free */
263 
264 	union {
265 		/* M_EXT_CLUSTER: physical address */
266 		paddr_t extun_paddr;
267 #ifdef M_EXT_MAXPAGES
268 		/* M_EXT_PAGES: pages */
269 		struct vm_page *extun_pgs[M_EXT_MAXPAGES];
270 #endif
271 	} ext_un;
272 #define ext_paddr	ext_un.extun_paddr
273 #define ext_pgs		ext_un.extun_pgs
274 };
275 
276 struct _m_ext {
277 	struct mbuf *ext_ref;
278 	struct _m_ext_storage ext_storage;
279 };
280 
281 #define M_PADDR_INVALID		POOL_PADDR_INVALID
282 
283 /*
284  * Definition of "struct mbuf".
285  * Don't change this without understanding how MHLEN/MLEN are defined.
286  */
287 #define MBUF_DEFINE(name, mhlen, mlen)					\
288 	struct name {							\
289 		struct m_hdr m_hdr;					\
290 		union {							\
291 			struct {					\
292 				struct pkthdr MH_pkthdr;		\
293 				union {					\
294 					struct _m_ext MH_ext;		\
295 					char MH_databuf[(mhlen)];	\
296 				} MH_dat;				\
297 			} MH;						\
298 			char M_databuf[(mlen)];				\
299 		} M_dat;						\
300 	}
301 #define m_next		m_hdr.mh_next
302 #define m_len		m_hdr.mh_len
303 #define m_data		m_hdr.mh_data
304 #define m_owner		m_hdr.mh_owner
305 #define m_type		m_hdr.mh_type
306 #define m_flags		m_hdr.mh_flags
307 #define m_nextpkt	m_hdr.mh_nextpkt
308 #define m_paddr		m_hdr.mh_paddr
309 #define m_pkthdr	M_dat.MH.MH_pkthdr
310 #define m_ext_storage	M_dat.MH.MH_dat.MH_ext.ext_storage
311 #define m_ext_ref	M_dat.MH.MH_dat.MH_ext.ext_ref
312 #define m_ext		m_ext_ref->m_ext_storage
313 #define m_pktdat	M_dat.MH.MH_dat.MH_databuf
314 #define m_dat		M_dat.M_databuf
315 
316 /*
317  * Dummy mbuf structure to calculate the right values for MLEN/MHLEN, taking
318  * into account inter-structure padding.
319  */
320 MBUF_DEFINE(_mbuf_dummy, 1, 1);
321 
322 /* normal data len */
323 #define MLEN		((int)(MSIZE - offsetof(struct _mbuf_dummy, m_dat)))
324 /* data len w/pkthdr */
325 #define MHLEN		((int)(MSIZE - offsetof(struct _mbuf_dummy, m_pktdat)))
326 
327 #define MINCLSIZE	(MHLEN+MLEN+1)	/* smallest amount to put in cluster */
328 
329 /*
330  * The *real* struct mbuf
331  */
332 MBUF_DEFINE(mbuf, MHLEN, MLEN);
333 
334 /* mbuf flags */
335 #define M_EXT		0x00000001	/* has associated external storage */
336 #define M_PKTHDR	0x00000002	/* start of record */
337 #define M_EOR		0x00000004	/* end of record */
338 #define M_PROTO1	0x00000008	/* protocol-specific */
339 
340 /* mbuf pkthdr flags, also in m_flags */
341 #define M_AUTHIPHDR	0x00000010	/* authenticated (IPsec) */
342 #define M_DECRYPTED	0x00000020	/* decrypted (IPsec) */
343 #define M_LOOP		0x00000040	/* received on loopback */
344 #define M_BCAST		0x00000100	/* send/received as L2 broadcast */
345 #define M_MCAST		0x00000200	/* send/received as L2 multicast */
346 #define M_CANFASTFWD	0x00000400	/* packet can be fast-forwarded */
347 #define M_ANYCAST6	0x00000800	/* received as IPv6 anycast */
348 
349 #define M_LINK0		0x00001000	/* link layer specific flag */
350 #define M_LINK1		0x00002000	/* link layer specific flag */
351 #define M_LINK2		0x00004000	/* link layer specific flag */
352 #define M_LINK3		0x00008000	/* link layer specific flag */
353 #define M_LINK4		0x00010000	/* link layer specific flag */
354 #define M_LINK5		0x00020000	/* link layer specific flag */
355 #define M_LINK6		0x00040000	/* link layer specific flag */
356 #define M_LINK7		0x00080000	/* link layer specific flag */
357 
358 #define M_VLANTAG	0x00100000	/* ether_vtag is valid */
359 
360 /* additional flags for M_EXT mbufs */
361 #define M_EXT_FLAGS	0xff000000
362 #define M_EXT_CLUSTER	0x01000000	/* ext is a cluster */
363 #define M_EXT_PAGES	0x02000000	/* ext_pgs is valid */
364 #define M_EXT_ROMAP	0x04000000	/* ext mapping is r-o at MMU */
365 #define M_EXT_RW	0x08000000	/* ext storage is writable */
366 
367 /* for source-level compatibility */
368 #define M_NOTIFICATION	M_PROTO1
369 
370 #define M_FLAGS_BITS \
371     "\20\1EXT\2PKTHDR\3EOR\4PROTO1\5AUTHIPHDR\6DECRYPTED\7LOOP\10NONE" \
372     "\11BCAST\12MCAST\13CANFASTFWD\14ANYCAST6\15LINK0\16LINK1\17LINK2\20LINK3" \
373     "\21LINK4\22LINK5\23LINK6\24LINK7" \
374     "\25VLANTAG" \
375     "\31EXT_CLUSTER\32EXT_PAGES\33EXT_ROMAP\34EXT_RW"
376 
377 /* flags copied when copying m_pkthdr */
378 #define M_COPYFLAGS	(M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD| \
379     M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP| \
380     M_VLANTAG)
381 
382 /* flag copied when shallow-copying external storage */
383 #define M_EXTCOPYFLAGS	(M_EXT|M_EXT_FLAGS)
384 
385 /* mbuf types */
386 #define MT_FREE		0	/* should be on free list */
387 #define MT_DATA		1	/* dynamic (data) allocation */
388 #define MT_HEADER	2	/* packet header */
389 #define MT_SONAME	3	/* socket name */
390 #define MT_SOOPTS	4	/* socket options */
391 #define MT_FTABLE	5	/* fragment reassembly header */
392 #define MT_CONTROL	6	/* extra-data protocol message */
393 #define MT_OOBDATA	7	/* expedited data  */
394 
395 #ifdef MBUFTYPES
396 const char * const mbuftypes[] = {
397 	"mbfree",
398 	"mbdata",
399 	"mbheader",
400 	"mbsoname",
401 	"mbsopts",
402 	"mbftable",
403 	"mbcontrol",
404 	"mboobdata",
405 };
406 #else
407 extern const char * const mbuftypes[];
408 #endif
409 
410 /* flags to m_get/MGET */
411 #define M_DONTWAIT	M_NOWAIT
412 #define M_WAIT		M_WAITOK
413 
414 #ifdef MBUFTRACE
415 /* Mbuf allocation tracing. */
416 void mowner_init_owner(struct mowner *, const char *, const char *);
417 void mowner_init(struct mbuf *, int);
418 void mowner_ref(struct mbuf *, int);
419 void m_claim(struct mbuf *, struct mowner *);
420 void mowner_revoke(struct mbuf *, bool, int);
421 void mowner_attach(struct mowner *);
422 void mowner_detach(struct mowner *);
423 void m_claimm(struct mbuf *, struct mowner *);
424 #else
425 #define mowner_init_owner(mo, n, d)	__nothing
426 #define mowner_init(m, type)		__nothing
427 #define mowner_ref(m, flags)		__nothing
428 #define mowner_revoke(m, all, flags)	__nothing
429 #define m_claim(m, mowner)		__nothing
430 #define mowner_attach(mo)		__nothing
431 #define mowner_detach(mo)		__nothing
432 #define m_claimm(m, mo)			__nothing
433 #endif
434 
435 #define MCLAIM(m, mo)		m_claim((m), (mo))
436 #define MOWNER_ATTACH(mo)	mowner_attach(mo)
437 #define MOWNER_DETACH(mo)	mowner_detach(mo)
438 
439 /*
440  * mbuf allocation/deallocation macros:
441  *
442  *	MGET(struct mbuf *m, int how, int type)
443  * allocates an mbuf and initializes it to contain internal data.
444  *
445  *	MGETHDR(struct mbuf *m, int how, int type)
446  * allocates an mbuf and initializes it to contain a packet header
447  * and internal data.
448  *
449  * If 'how' is M_WAIT, these macros (and the corresponding functions)
450  * are guaranteed to return successfully.
451  */
452 #define MGET(m, how, type)	m = m_get((how), (type))
453 #define MGETHDR(m, how, type)	m = m_gethdr((how), (type))
454 
455 #if defined(_KERNEL)
456 
457 #define MCLINITREFERENCE(m)						\
458 do {									\
459 	KASSERT(((m)->m_flags & M_EXT) == 0);				\
460 	(m)->m_ext_ref = (m);						\
461 	(m)->m_ext.ext_refcnt = 1;					\
462 } while (/* CONSTCOND */ 0)
463 
464 /*
465  * Macros for mbuf external storage.
466  *
467  * MCLGET allocates and adds an mbuf cluster to a normal mbuf;
468  * the flag M_EXT is set upon success.
469  *
470  * MEXTMALLOC allocates external storage and adds it to
471  * a normal mbuf; the flag M_EXT is set upon success.
472  *
473  * MEXTADD adds pre-allocated external storage to
474  * a normal mbuf; the flag M_EXT is set upon success.
475  */
476 
477 #define MCLGET(m, how)	m_clget((m), (how))
478 
479 #define MEXTMALLOC(m, size, how)					\
480 do {									\
481 	(m)->m_ext_storage.ext_buf = malloc((size), 0, (how));		\
482 	if ((m)->m_ext_storage.ext_buf != NULL) {			\
483 		MCLINITREFERENCE(m);					\
484 		(m)->m_data = (m)->m_ext.ext_buf;			\
485 		(m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) |	\
486 				M_EXT|M_EXT_RW;				\
487 		(m)->m_ext.ext_size = (size);				\
488 		(m)->m_ext.ext_free = NULL;				\
489 		(m)->m_ext.ext_arg = NULL;				\
490 		mowner_ref((m), M_EXT);					\
491 	}								\
492 } while (/* CONSTCOND */ 0)
493 
494 #define MEXTADD(m, buf, size, type, free, arg)				\
495 do {									\
496 	MCLINITREFERENCE(m);						\
497 	(m)->m_data = (m)->m_ext.ext_buf = (char *)(buf);		\
498 	(m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT;	\
499 	(m)->m_ext.ext_size = (size);					\
500 	(m)->m_ext.ext_free = (free);					\
501 	(m)->m_ext.ext_arg = (arg);					\
502 	mowner_ref((m), M_EXT);						\
503 } while (/* CONSTCOND */ 0)
504 
505 #define M_BUFADDR(m)							\
506 	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf :			\
507 	    ((m)->m_flags & M_PKTHDR) ? (m)->m_pktdat : (m)->m_dat)
508 
509 #define M_BUFSIZE(m)							\
510 	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size :			\
511 	    ((m)->m_flags & M_PKTHDR) ? MHLEN : MLEN)
512 
513 #define MRESETDATA(m)	(m)->m_data = M_BUFADDR(m)
514 
515 /*
516  * Compute the offset of the beginning of the data buffer of a non-ext
517  * mbuf.
518  */
519 #define M_BUFOFFSET(m)							\
520 	(((m)->m_flags & M_PKTHDR) ?					\
521 	 offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat))
522 
523 /*
524  * Determine if an mbuf's data area is read-only.  This is true
525  * if external storage is read-only mapped, or not marked as R/W,
526  * or referenced by more than one mbuf.
527  */
528 #define M_READONLY(m)							\
529 	(((m)->m_flags & M_EXT) != 0 &&					\
530 	  (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW ||	\
531 	  (m)->m_ext.ext_refcnt > 1))
532 
533 #define M_UNWRITABLE(__m, __len)					\
534 	((__m)->m_len < (__len) || M_READONLY((__m)))
535 
536 /*
537  * Determine if an mbuf's data area is read-only at the MMU.
538  */
539 #define M_ROMAP(m)							\
540 	(((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP))
541 
542 /*
543  * Compute the amount of space available before the current start of
544  * data in an mbuf.
545  */
546 #define M_LEADINGSPACE(m)						\
547 	(M_READONLY((m)) ? 0 : ((m)->m_data - M_BUFADDR(m)))
548 
549 /*
550  * Compute the amount of space available
551  * after the end of data in an mbuf.
552  */
553 #define _M_TRAILINGSPACE(m)						\
554 	((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
555 	 ((m)->m_data + (m)->m_len) :					\
556 	 &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
557 
558 #define M_TRAILINGSPACE(m)						\
559 	(M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m)))
560 
561 /*
562  * Arrange to prepend space of size plen to mbuf m.
563  * If a new mbuf must be allocated, how specifies whether to wait.
564  * If how is M_DONTWAIT and allocation fails, the original mbuf chain
565  * is freed and m is set to NULL.
566  */
567 #define M_PREPEND(m, plen, how)						\
568 do {									\
569 	if (M_LEADINGSPACE(m) >= (plen)) {				\
570 		(m)->m_data -= (plen);					\
571 		(m)->m_len += (plen);					\
572 	} else								\
573 		(m) = m_prepend((m), (plen), (how));			\
574 	if ((m) && (m)->m_flags & M_PKTHDR)				\
575 		(m)->m_pkthdr.len += (plen);				\
576 } while (/* CONSTCOND */ 0)
577 
578 /* change mbuf to new type */
579 #define MCHTYPE(m, t)							\
580 do {									\
581 	KASSERT((t) != MT_FREE);					\
582 	mbstat_type_add((m)->m_type, -1);				\
583 	mbstat_type_add(t, 1);						\
584 	(m)->m_type = t;						\
585 } while (/* CONSTCOND */ 0)
586 
587 #ifdef DIAGNOSTIC
588 #define M_VERIFY_PACKET(m)	m_verify_packet(m)
589 #else
590 #define M_VERIFY_PACKET(m)	__nothing
591 #endif
592 
593 /* The "copy all" special length. */
594 #define M_COPYALL	-1
595 
596 /*
597  * Allow drivers and/or protocols to store private context information.
598  */
599 #define M_GETCTX(m, t)		((t)(m)->m_pkthdr._rcvif.ctx)
600 #define M_SETCTX(m, c)		((void)((m)->m_pkthdr._rcvif.ctx = (void *)(c)))
601 #define M_CLEARCTX(m)		M_SETCTX((m), NULL)
602 
603 /*
604  * M_REGION_GET ensures that the "len"-sized region of type "typ" starting
605  * from "off" within "m" is located in a single mbuf, contiguously.
606  *
607  * The pointer to the region will be returned to pointer variable "val".
608  */
609 #define M_REGION_GET(val, typ, m, off, len) \
610 do {									\
611 	struct mbuf *_t;						\
612 	int _tmp;							\
613 	if ((m)->m_len >= (off) + (len))				\
614 		(val) = (typ)(mtod((m), char *) + (off));		\
615 	else {								\
616 		_t = m_pulldown((m), (off), (len), &_tmp);		\
617 		if (_t) {						\
618 			if (_t->m_len < _tmp + (len))			\
619 				panic("m_pulldown malfunction");	\
620 			(val) = (typ)(mtod(_t, char *) + _tmp);	\
621 		} else {						\
622 			(val) = (typ)NULL;				\
623 			(m) = NULL;					\
624 		}							\
625 	}								\
626 } while (/*CONSTCOND*/ 0)
627 
628 #endif /* defined(_KERNEL) */
629 
630 /*
631  * Simple mbuf queueing system
632  *
633  * this is basically a SIMPLEQ adapted to mbuf use (ie using
634  * m_nextpkt instead of field.sqe_next).
635  *
636  * m_next is ignored, so queueing chains of mbufs is possible
637  */
638 #define MBUFQ_HEAD(name)					\
639 struct name {							\
640 	struct mbuf *mq_first;					\
641 	struct mbuf **mq_last;					\
642 }
643 
644 #define MBUFQ_INIT(q)		do {				\
645 	(q)->mq_first = NULL;					\
646 	(q)->mq_last = &(q)->mq_first;				\
647 } while (/*CONSTCOND*/0)
648 
649 #define MBUFQ_ENQUEUE(q, m)	do {				\
650 	(m)->m_nextpkt = NULL;					\
651 	*(q)->mq_last = (m);					\
652 	(q)->mq_last = &(m)->m_nextpkt;				\
653 } while (/*CONSTCOND*/0)
654 
655 #define MBUFQ_PREPEND(q, m)	do {				\
656 	if (((m)->m_nextpkt = (q)->mq_first) == NULL)		\
657 		(q)->mq_last = &(m)->m_nextpkt;			\
658 	(q)->mq_first = (m);					\
659 } while (/*CONSTCOND*/0)
660 
661 #define MBUFQ_DEQUEUE(q, m)	do {				\
662 	if (((m) = (q)->mq_first) != NULL) {			\
663 		if (((q)->mq_first = (m)->m_nextpkt) == NULL)	\
664 			(q)->mq_last = &(q)->mq_first;		\
665 		else						\
666 			(m)->m_nextpkt = NULL;			\
667 	}							\
668 } while (/*CONSTCOND*/0)
669 
670 #define MBUFQ_DRAIN(q)		do {				\
671 	struct mbuf *__m0;					\
672 	while ((__m0 = (q)->mq_first) != NULL) {		\
673 		(q)->mq_first = __m0->m_nextpkt;		\
674 		m_freem(__m0);					\
675 	}							\
676 	(q)->mq_last = &(q)->mq_first;				\
677 } while (/*CONSTCOND*/0)
678 
679 #define MBUFQ_FIRST(q)		((q)->mq_first)
680 #define MBUFQ_NEXT(m)		((m)->m_nextpkt)
681 #define MBUFQ_LAST(q)		(*(q)->mq_last)
682 
683 /*
684  * Mbuf statistics.
685  * For statistics related to mbuf and cluster allocations, see also the
686  * pool headers (mb_cache and mcl_cache).
687  */
688 struct mbstat {
689 	u_long	_m_spare;	/* formerly m_mbufs */
690 	u_long	_m_spare1;	/* formerly m_clusters */
691 	u_long	_m_spare2;	/* spare field */
692 	u_long	_m_spare3;	/* formely m_clfree - free clusters */
693 	u_long	m_drops;	/* times failed to find space */
694 	u_long	m_wait;		/* times waited for space */
695 	u_long	m_drain;	/* times drained protocols for space */
696 	u_short	m_mtypes[256];	/* type specific mbuf allocations */
697 };
698 
699 struct mbstat_cpu {
700 	u_int	m_mtypes[256];	/* type specific mbuf allocations */
701 };
702 
703 /*
704  * Mbuf sysctl variables.
705  */
706 #define MBUF_MSIZE		1	/* int: mbuf base size */
707 #define MBUF_MCLBYTES		2	/* int: mbuf cluster size */
708 #define MBUF_NMBCLUSTERS	3	/* int: limit on the # of clusters */
709 #define MBUF_MBLOWAT		4	/* int: mbuf low water mark */
710 #define MBUF_MCLLOWAT		5	/* int: mbuf cluster low water mark */
711 #define MBUF_STATS		6	/* struct: mbstat */
712 #define MBUF_MOWNERS		7	/* struct: m_owner[] */
713 #define MBUF_NMBCLUSTERS_LIMIT	8	/* int: limit of nmbclusters */
714 
715 #ifdef _KERNEL
716 extern struct mbstat mbstat;
717 extern int nmbclusters;		/* limit on the # of clusters */
718 extern int mblowat;		/* mbuf low water mark */
719 extern int mcllowat;		/* mbuf cluster low water mark */
720 extern int max_linkhdr;		/* largest link-level header */
721 extern int max_protohdr;		/* largest protocol header */
722 extern int max_hdr;		/* largest link+protocol header */
723 extern int max_datalen;		/* MHLEN - max_hdr */
724 extern const int msize;			/* mbuf base size */
725 extern const int mclbytes;		/* mbuf cluster size */
726 extern pool_cache_t mb_cache;
727 #ifdef MBUFTRACE
728 LIST_HEAD(mownerhead, mowner);
729 extern struct mownerhead mowners;
730 extern struct mowner unknown_mowners[];
731 extern struct mowner revoked_mowner;
732 #endif
733 
734 MALLOC_DECLARE(M_MBUF);
735 MALLOC_DECLARE(M_SONAME);
736 
737 struct	mbuf *m_copym(struct mbuf *, int, int, int);
738 struct	mbuf *m_copypacket(struct mbuf *, int);
739 struct	mbuf *m_devget(char *, int, int, struct ifnet *);
740 struct	mbuf *m_dup(struct mbuf *, int, int, int);
741 struct	mbuf *m_get(int, int);
742 struct	mbuf *m_gethdr(int, int);
743 struct	mbuf *m_get_n(int, int, size_t, size_t);
744 struct	mbuf *m_gethdr_n(int, int, size_t, size_t);
745 struct	mbuf *m_prepend(struct mbuf *,int, int);
746 struct	mbuf *m_pulldown(struct mbuf *, int, int, int *);
747 struct	mbuf *m_pullup(struct mbuf *, int);
748 struct	mbuf *m_copyup(struct mbuf *, int, int);
749 struct	mbuf *m_split(struct mbuf *,int, int);
750 struct	mbuf *m_getptr(struct mbuf *, int, int *);
751 void	m_adj(struct mbuf *, int);
752 struct	mbuf *m_defrag(struct mbuf *, int);
753 int	m_apply(struct mbuf *, int, int,
754     int (*)(void *, void *, unsigned int), void *);
755 void	m_cat(struct mbuf *,struct mbuf *);
756 void	m_clget(struct mbuf *, int);
757 void	m_copyback(struct mbuf *, int, int, const void *);
758 struct	mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int);
759 int	m_makewritable(struct mbuf **, int, int, int);
760 struct	mbuf *m_getcl(int, int, int);
761 void	m_copydata(struct mbuf *, int, int, void *);
762 void	m_verify_packet(struct mbuf *);
763 struct	mbuf *m_free(struct mbuf *);
764 void	m_freem(struct mbuf *);
765 void	mbinit(void);
766 void	m_remove_pkthdr(struct mbuf *);
767 void	m_copy_pkthdr(struct mbuf *, struct mbuf *);
768 void	m_move_pkthdr(struct mbuf *, struct mbuf *);
769 void	m_align(struct mbuf *, int);
770 
771 bool	m_ensure_contig(struct mbuf **, int);
772 struct mbuf *m_add(struct mbuf *, struct mbuf *);
773 
774 /* Inline routines. */
775 static __inline u_int m_length(const struct mbuf *) __unused;
776 
777 /* Statistics */
778 void mbstat_type_add(int, int);
779 
780 /* Packet tag routines */
781 struct	m_tag *m_tag_get(int, int, int);
782 void	m_tag_free(struct m_tag *);
783 void	m_tag_prepend(struct mbuf *, struct m_tag *);
784 void	m_tag_unlink(struct mbuf *, struct m_tag *);
785 void	m_tag_delete(struct mbuf *, struct m_tag *);
786 void	m_tag_delete_chain(struct mbuf *);
787 struct	m_tag *m_tag_find(const struct mbuf *, int);
788 struct	m_tag *m_tag_copy(struct m_tag *);
789 int	m_tag_copy_chain(struct mbuf *, struct mbuf *);
790 
791 /* Packet tag types */
792 #define PACKET_TAG_NONE			0  /* Nothing */
793 #define PACKET_TAG_SO			4  /* sending socket pointer */
794 #define PACKET_TAG_NPF			10 /* packet filter */
795 #define PACKET_TAG_PF			11 /* packet filter */
796 #define PACKET_TAG_ALTQ_QID		12 /* ALTQ queue id */
797 #define PACKET_TAG_IPSEC_OUT_DONE	18
798 #define PACKET_TAG_IPSEC_NAT_T_PORTS	25 /* two uint16_t */
799 #define PACKET_TAG_INET6		26 /* IPv6 info */
800 #define PACKET_TAG_TUNNEL_INFO		28 /* tunnel identification and
801 					    * protocol callback, for loop
802 					    * detection/recovery
803 					    */
804 #define PACKET_TAG_MPLS			29 /* Indicate it's for MPLS */
805 #define PACKET_TAG_SRCROUTE		30 /* IPv4 source routing */
806 #define PACKET_TAG_ETHERNET_SRC		31 /* Ethernet source address */
807 
808 /*
809  * Return the number of bytes in the mbuf chain, m.
810  */
811 static __inline u_int
m_length(const struct mbuf * m)812 m_length(const struct mbuf *m)
813 {
814 	const struct mbuf *m0;
815 	u_int pktlen;
816 
817 	if ((m->m_flags & M_PKTHDR) != 0)
818 		return m->m_pkthdr.len;
819 
820 	pktlen = 0;
821 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
822 		pktlen += m0->m_len;
823 	return pktlen;
824 }
825 
826 static __inline void
m_set_rcvif(struct mbuf * m,const struct ifnet * ifp)827 m_set_rcvif(struct mbuf *m, const struct ifnet *ifp)
828 {
829 	KASSERT(m->m_flags & M_PKTHDR);
830 	m->m_pkthdr.rcvif_index = ifp->if_index;
831 }
832 
833 static __inline void
m_reset_rcvif(struct mbuf * m)834 m_reset_rcvif(struct mbuf *m)
835 {
836 	KASSERT(m->m_flags & M_PKTHDR);
837 	/* A caller may expect whole _rcvif union is zeroed */
838 	/* m->m_pkthdr.rcvif_index = 0; */
839 	m->m_pkthdr._rcvif.ctx = NULL;
840 }
841 
842 static __inline void
m_copy_rcvif(struct mbuf * m,const struct mbuf * n)843 m_copy_rcvif(struct mbuf *m, const struct mbuf *n)
844 {
845 	KASSERT(m->m_flags & M_PKTHDR);
846 	KASSERT(n->m_flags & M_PKTHDR);
847 	m->m_pkthdr.rcvif_index = n->m_pkthdr.rcvif_index;
848 }
849 
850 #define M_GET_ALIGNED_HDR(m, type, linkhdr) \
851     m_get_aligned_hdr((m), __alignof(type) - 1, sizeof(type), (linkhdr))
852 
853 static __inline int
m_get_aligned_hdr(struct mbuf ** m,int mask,size_t hlen,bool linkhdr)854 m_get_aligned_hdr(struct mbuf **m, int mask, size_t hlen, bool linkhdr)
855 {
856 #ifndef __NO_STRICT_ALIGNMENT
857 	if (((uintptr_t)mtod(*m, void *) & mask) != 0)
858 		*m = m_copyup(*m, hlen,
859 		      linkhdr ? (max_linkhdr + mask) & ~mask : 0);
860 	else
861 #endif
862 	if (__predict_false((size_t)(*m)->m_len < hlen))
863 		*m = m_pullup(*m, hlen);
864 
865 	return *m == NULL;
866 }
867 
868 void m_print(const struct mbuf *, const char *, void (*)(const char *, ...)
869     __printflike(1, 2));
870 
871 /* from uipc_mbufdebug.c */
872 void	m_examine(const struct mbuf *, int, const char *,
873     void (*)(const char *, ...) __printflike(1, 2));
874 
875 /* parsers for m_examine() */
876 void m_examine_ether(const struct mbuf *, int, const char *,
877     void (*)(const char *, ...) __printflike(1, 2));
878 void m_examine_pppoe(const struct mbuf *, int, const char *,
879     void (*)(const char *, ...) __printflike(1, 2));
880 void m_examine_ppp(const struct mbuf *, int, const char *,
881     void (*)(const char *, ...) __printflike(1, 2));
882 void m_examine_arp(const struct mbuf *, int, const char *,
883     void (*)(const char *, ...) __printflike(1, 2));
884 void m_examine_ip(const struct mbuf *, int, const char *,
885     void (*)(const char *, ...) __printflike(1, 2));
886 void m_examine_icmp(const struct mbuf *, int, const char *,
887     void (*)(const char *, ...) __printflike(1, 2));
888 void m_examine_ip6(const struct mbuf *, int, const char *,
889     void (*)(const char *, ...) __printflike(1, 2));
890 void m_examine_icmp6(const struct mbuf *, int, const char *,
891     void (*)(const char *, ...) __printflike(1, 2));
892 void m_examine_tcp(const struct mbuf *, int, const char *,
893     void (*)(const char *, ...) __printflike(1, 2));
894 void m_examine_udp(const struct mbuf *, int, const char *,
895     void (*)(const char *, ...) __printflike(1, 2));
896 void m_examine_hex(const struct mbuf *, int, const char *,
897     void (*)(const char *, ...) __printflike(1, 2));
898 
899 /*
900  * Get rcvif of a mbuf.
901  *
902  * The caller must call m_put_rcvif after using rcvif if the returned rcvif
903  * isn't NULL. If the returned rcvif is NULL, the caller doesn't need to call
904  * m_put_rcvif (although calling it is safe).
905  *
906  * The caller must not block or sleep while using rcvif. The API ensures a
907  * returned rcvif isn't freed until m_put_rcvif is called.
908  */
909 static __inline struct ifnet *
m_get_rcvif(const struct mbuf * m,int * s)910 m_get_rcvif(const struct mbuf *m, int *s)
911 {
912 	struct ifnet *ifp;
913 
914 	KASSERT(m->m_flags & M_PKTHDR);
915 	*s = pserialize_read_enter();
916 	ifp = if_byindex(m->m_pkthdr.rcvif_index);
917 	if (__predict_false(ifp == NULL))
918 		pserialize_read_exit(*s);
919 
920 	return ifp;
921 }
922 
923 static __inline void
m_put_rcvif(struct ifnet * ifp,int * s)924 m_put_rcvif(struct ifnet *ifp, int *s)
925 {
926 
927 	if (ifp == NULL)
928 		return;
929 	pserialize_read_exit(*s);
930 }
931 
932 /*
933  * Get rcvif of a mbuf.
934  *
935  * The caller must call m_put_rcvif_psref after using rcvif. The API ensures
936  * a got rcvif isn't be freed until m_put_rcvif_psref is called.
937  */
938 static __inline struct ifnet *
m_get_rcvif_psref(const struct mbuf * m,struct psref * psref)939 m_get_rcvif_psref(const struct mbuf *m, struct psref *psref)
940 {
941 	KASSERT(m->m_flags & M_PKTHDR);
942 	return if_get_byindex(m->m_pkthdr.rcvif_index, psref);
943 }
944 
945 static __inline void
m_put_rcvif_psref(struct ifnet * ifp,struct psref * psref)946 m_put_rcvif_psref(struct ifnet *ifp, struct psref *psref)
947 {
948 
949 	if (ifp == NULL)
950 		return;
951 	if_put(ifp, psref);
952 }
953 
954 /*
955  * Get rcvif of a mbuf.
956  *
957  * This is NOT an MP-safe API and shouldn't be used at where we want MP-safe.
958  */
959 static __inline struct ifnet *
m_get_rcvif_NOMPSAFE(const struct mbuf * m)960 m_get_rcvif_NOMPSAFE(const struct mbuf *m)
961 {
962 	KASSERT(m->m_flags & M_PKTHDR);
963 	return if_byindex(m->m_pkthdr.rcvif_index);
964 }
965 
966 #endif /* _KERNEL */
967 #endif /* !_SYS_MBUF_H_ */
968