1 /* $NetBSD: mbuf.h,v 1.166 2016/06/21 03:07:54 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center and Matt Thomas of 3am Software Foundry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)mbuf.h 8.5 (Berkeley) 2/19/95
62 */
63
64 #ifndef _SYS_MBUF_H_
65 #define _SYS_MBUF_H_
66
67 #ifdef _KERNEL_OPT
68 #include "opt_mbuftrace.h"
69 #endif
70
71 #ifndef M_WAITOK
72 #include <sys/malloc.h>
73 #endif
74 #include <sys/pool.h>
75 #include <sys/queue.h>
76 #if defined(_KERNEL)
77 #include <sys/percpu_types.h>
78 #include <sys/socket.h> /* for AF_UNSPEC */
79 #include <sys/psref.h>
80 #endif /* defined(_KERNEL) */
81
82 /* For offsetof() */
83 #if defined(_KERNEL) || defined(_STANDALONE)
84 #include <sys/systm.h>
85 #else
86 #include <stddef.h>
87 #endif
88
89 #include <uvm/uvm_param.h> /* for MIN_PAGE_SIZE */
90
91 #include <net/if.h>
92
93 /*
94 * Mbufs are of a single size, MSIZE (machine/param.h), which
95 * includes overhead. An mbuf may add a single "mbuf cluster" of size
96 * MCLBYTES (also in machine/param.h), which has no additional overhead
97 * and is used instead of the internal data area; this is done when
98 * at least MINCLSIZE of data must be stored.
99 */
100
101 /* Packet tags structure */
102 struct m_tag {
103 SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */
104 uint16_t m_tag_id; /* Tag ID */
105 uint16_t m_tag_len; /* Length of data */
106 };
107
108 /* mbuf ownership structure */
109 struct mowner {
110 char mo_name[16]; /* owner name (fxp0) */
111 char mo_descr[16]; /* owner description (input) */
112 LIST_ENTRY(mowner) mo_link; /* */
113 struct percpu *mo_counters;
114 };
115
116 #define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y }
117
118 enum mowner_counter_index {
119 MOWNER_COUNTER_CLAIMS, /* # of small mbuf claimed */
120 MOWNER_COUNTER_RELEASES, /* # of small mbuf released */
121 MOWNER_COUNTER_CLUSTER_CLAIMS, /* # of M_CLUSTER mbuf claimed */
122 MOWNER_COUNTER_CLUSTER_RELEASES,/* # of M_CLUSTER mbuf released */
123 MOWNER_COUNTER_EXT_CLAIMS, /* # of M_EXT mbuf claimed */
124 MOWNER_COUNTER_EXT_RELEASES, /* # of M_EXT mbuf released */
125
126 MOWNER_COUNTER_NCOUNTERS,
127 };
128
129 #if defined(_KERNEL)
130 struct mowner_counter {
131 u_long mc_counter[MOWNER_COUNTER_NCOUNTERS];
132 };
133 #endif /* defined(_KERNEL) */
134
135 /* userland-exported version of struct mowner */
136 struct mowner_user {
137 char mo_name[16]; /* owner name (fxp0) */
138 char mo_descr[16]; /* owner description (input) */
139 LIST_ENTRY(mowner) mo_link; /* unused padding; for compatibility */
140 u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */
141 };
142
143 /*
144 * Macros for type conversion
145 * mtod(m,t) - convert mbuf pointer to data pointer of correct type
146 */
147 #define mtod(m, t) ((t)((m)->m_data))
148
149 /* header at beginning of each mbuf: */
150 struct m_hdr {
151 struct mbuf *mh_next; /* next buffer in chain */
152 struct mbuf *mh_nextpkt; /* next chain in queue/record */
153 char *mh_data; /* location of data */
154 struct mowner *mh_owner; /* mbuf owner */
155 int mh_len; /* amount of data in this mbuf */
156 int mh_flags; /* flags; see below */
157 paddr_t mh_paddr; /* physical address of mbuf */
158 short mh_type; /* type of data in this mbuf */
159 };
160
161 /*
162 * record/packet header in first mbuf of chain; valid if M_PKTHDR set
163 *
164 * A note about csum_data: For the out-bound direction, the low 16 bits
165 * indicates the offset after the L4 header where the final L4 checksum value
166 * is to be stored and the high 16 bits is the length of the L3 header (the
167 * start of the data to be checksumed). For the in-bound direction, it is only
168 * valid if the M_CSUM_DATA flag is set. In this case, an L4 checksum has been
169 * calculated by hardware, but it is up to software to perform final
170 * verification.
171 *
172 * Note for in-bound TCP/UDP checksums, we expect the csum_data to NOT
173 * be bit-wise inverted (the final step in the calculation of an IP
174 * checksum) -- this is so we can accumulate the checksum for fragmented
175 * packets during reassembly.
176 *
177 * Size ILP32: 36
178 * LP64: 56
179 */
180 struct pkthdr {
181 union {
182 void *ctx; /* for M_GETCTX/M_SETCTX */
183 if_index_t index; /* rcv interface index */
184 } _rcvif;
185 #define rcvif_index _rcvif.index
186 SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
187 int len; /* total packet length */
188 int csum_flags; /* checksum flags */
189 uint32_t csum_data; /* checksum data */
190 u_int segsz; /* segment size */
191
192 /*
193 * Following three fields are open-coded struct altq_pktattr
194 * to rearrange struct pkthdr fields flexibly.
195 */
196 void *pattr_class; /* ALTQ: sched class set by classifier */
197 void *pattr_hdr; /* ALTQ: saved header position in mbuf */
198 int pattr_af; /* ALTQ: address family */
199 };
200
201 /*
202 * Note: These bits are carefully arrange so that the compiler can have
203 * a prayer of generating a jump table.
204 */
205 #define M_CSUM_TCPv4 0x00000001 /* TCP header/payload */
206 #define M_CSUM_UDPv4 0x00000002 /* UDP header/payload */
207 #define M_CSUM_TCP_UDP_BAD 0x00000004 /* TCP/UDP checksum bad */
208 #define M_CSUM_DATA 0x00000008 /* consult csum_data */
209 #define M_CSUM_TCPv6 0x00000010 /* IPv6 TCP header/payload */
210 #define M_CSUM_UDPv6 0x00000020 /* IPv6 UDP header/payload */
211 #define M_CSUM_IPv4 0x00000040 /* IPv4 header */
212 #define M_CSUM_IPv4_BAD 0x00000080 /* IPv4 header checksum bad */
213 #define M_CSUM_TSOv4 0x00000100 /* TCPv4 segmentation offload */
214 #define M_CSUM_TSOv6 0x00000200 /* TCPv6 segmentation offload */
215
216 /* Checksum-assist quirks: keep separate from jump-table bits. */
217 #define M_CSUM_NO_PSEUDOHDR 0x80000000 /* Rx csum_data does not include
218 * the UDP/TCP pseudo-hdr, and
219 * is not yet 1s-complemented.
220 */
221
222 #define M_CSUM_BITS \
223 "\20\1TCPv4\2UDPv4\3TCP_UDP_BAD\4DATA\5TCPv6\6UDPv6\7IPv4\10IPv4_BAD" \
224 "\11TSOv4\12TSOv6\40NO_PSEUDOHDR"
225
226 /*
227 * Macros for manipulating csum_data on outgoing packets. These are
228 * used to pass information down from the L4/L3 to the L2.
229 */
230 #define M_CSUM_DATA_IPv4_IPHL(x) ((x) >> 16)
231 #define M_CSUM_DATA_IPv4_OFFSET(x) ((x) & 0xffff)
232
233 /*
234 * Macros for M_CSUM_TCPv6 and M_CSUM_UDPv6
235 *
236 * M_CSUM_DATA_IPv6_HL: length of ip6_hdr + ext header.
237 * ie. offset of UDP/TCP header in the packet.
238 *
239 * M_CSUM_DATA_IPv6_OFFSET: offset of the checksum field in UDP/TCP header.
240 */
241
242 #define M_CSUM_DATA_IPv6_HL(x) ((x) >> 16)
243 #define M_CSUM_DATA_IPv6_HL_SET(x, v) (x) = ((x) & 0xffff) | ((v) << 16)
244 #define M_CSUM_DATA_IPv6_OFFSET(x) ((x) & 0xffff)
245
246 /*
247 * Max # of pages we can attach to m_ext. This is carefully chosen
248 * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page.
249 */
250 #ifdef MIN_PAGE_SIZE
251 #define M_EXT_MAXPAGES ((65536 / MIN_PAGE_SIZE) + 1)
252 #endif
253
254 /* description of external storage mapped into mbuf, valid if M_EXT set */
255 struct _m_ext_storage {
256 unsigned int ext_refcnt;
257 int ext_flags;
258 char *ext_buf; /* start of buffer */
259 void (*ext_free) /* free routine if not the usual */
260 (struct mbuf *, void *, size_t, void *);
261 void *ext_arg; /* argument for ext_free */
262 size_t ext_size; /* size of buffer, for ext_free */
263 union {
264 paddr_t extun_paddr; /* physical address (M_EXT_CLUSTER) */
265 /* pages (M_EXT_PAGES) */
266 /*
267 * XXX This is gross, but it doesn't really matter; this is
268 * XXX overlaid on top of the mbuf data area.
269 */
270 #ifdef M_EXT_MAXPAGES
271 struct vm_page *extun_pgs[M_EXT_MAXPAGES];
272 #endif
273 } ext_un;
274 #define ext_paddr ext_un.extun_paddr
275 #define ext_pgs ext_un.extun_pgs
276 #ifdef DEBUG
277 const char *ext_ofile;
278 const char *ext_nfile;
279 int ext_oline;
280 int ext_nline;
281 #endif
282 };
283
284 struct _m_ext {
285 struct mbuf *ext_ref;
286 struct _m_ext_storage ext_storage;
287 };
288
289 #define M_PADDR_INVALID POOL_PADDR_INVALID
290
291 /*
292 * Definition of "struct mbuf".
293 * Don't change this without understanding how MHLEN/MLEN are defined.
294 */
295 #define MBUF_DEFINE(name, mhlen, mlen) \
296 struct name { \
297 struct m_hdr m_hdr; \
298 union { \
299 struct { \
300 struct pkthdr MH_pkthdr; \
301 union { \
302 struct _m_ext MH_ext; \
303 char MH_databuf[(mhlen)]; \
304 } MH_dat; \
305 } MH; \
306 char M_databuf[(mlen)]; \
307 } M_dat; \
308 }
309 #define m_next m_hdr.mh_next
310 #define m_len m_hdr.mh_len
311 #define m_data m_hdr.mh_data
312 #define m_owner m_hdr.mh_owner
313 #define m_type m_hdr.mh_type
314 #define m_flags m_hdr.mh_flags
315 #define m_nextpkt m_hdr.mh_nextpkt
316 #define m_paddr m_hdr.mh_paddr
317 #define m_pkthdr M_dat.MH.MH_pkthdr
318 #define m_ext_storage M_dat.MH.MH_dat.MH_ext.ext_storage
319 #define m_ext_ref M_dat.MH.MH_dat.MH_ext.ext_ref
320 #define m_ext m_ext_ref->m_ext_storage
321 #define m_pktdat M_dat.MH.MH_dat.MH_databuf
322 #define m_dat M_dat.M_databuf
323
324 /*
325 * Dummy mbuf structure to calculate the right values for MLEN/MHLEN, taking
326 * into account inter-structure padding.
327 */
328 MBUF_DEFINE(_mbuf_dummy, 1, 1);
329
330 /* normal data len */
331 #define MLEN (MSIZE - offsetof(struct _mbuf_dummy, m_dat))
332 /* data len w/pkthdr */
333 #define MHLEN (MSIZE - offsetof(struct _mbuf_dummy, m_pktdat))
334
335 #define MINCLSIZE (MHLEN+MLEN+1) /* smallest amount to put in cluster */
336 #define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */
337
338 /*
339 * The *real* struct mbuf
340 */
341 MBUF_DEFINE(mbuf, MHLEN, MLEN);
342
343 /* mbuf flags */
344 #define M_EXT 0x00000001 /* has associated external storage */
345 #define M_PKTHDR 0x00000002 /* start of record */
346 #define M_EOR 0x00000004 /* end of record */
347 #define M_PROTO1 0x00000008 /* protocol-specific */
348
349 /* mbuf pkthdr flags, also in m_flags */
350 #define M_AUTHIPHDR 0x00000010 /* data origin authentication for
351 * IP header */
352 #define M_DECRYPTED 0x00000020 /* confidentiality */
353 #define M_LOOP 0x00000040 /* for Mbuf statistics */
354 #define M_AUTHIPDGM 0x00000080 /* data origin authentication */
355 #define M_BCAST 0x00000100 /* send/received as link-level
356 * broadcast */
357 #define M_MCAST 0x00000200 /* send/received as link-level
358 * multicast */
359 #define M_CANFASTFWD 0x00000400 /* used by filters to indicate
360 * packet can be fast-forwarded */
361 #define M_ANYCAST6 0x00000800 /* received as IPv6 anycast */
362
363 #define M_LINK0 0x00001000 /* link layer specific flag */
364 #define M_LINK1 0x00002000 /* link layer specific flag */
365 #define M_LINK2 0x00004000 /* link layer specific flag */
366
367 #define M_LINK3 0x00008000 /* link layer specific flag */
368 #define M_LINK4 0x00010000 /* link layer specific flag */
369 #define M_LINK5 0x00020000 /* link layer specific flag */
370 #define M_LINK6 0x00040000 /* link layer specific flag */
371 #define M_LINK7 0x00080000 /* link layer specific flag */
372
373 /* additional flags for M_EXT mbufs */
374 #define M_EXT_FLAGS 0xff000000
375 #define M_EXT_CLUSTER 0x01000000 /* ext is a cluster */
376 #define M_EXT_PAGES 0x02000000 /* ext_pgs is valid */
377 #define M_EXT_ROMAP 0x04000000 /* ext mapping is r-o at MMU */
378 #define M_EXT_RW 0x08000000 /* ext storage is writable */
379
380 /* for source-level compatibility */
381 #define M_CLUSTER M_EXT_CLUSTER
382 #define M_NOTIFICATION M_PROTO1
383
384 #define M_FLAGS_BITS \
385 "\20\1EXT\2PKTHDR\3EOR\4PROTO1\5AUTHIPHDR\6DECRYPTED\7LOOP\10AUTHIPDGM" \
386 "\11BCAST\12MCAST\13CANFASTFWD\14ANYCAST6\15LINK0\16LINK1\17LINK2\20LINK3" \
387 "\21LINK4\22LINK5\23LINK6\24LINK7" \
388 "\31EXT_CLUSTER\32EXT_PAGES\33EXT_ROMAP\34EXT_RW"
389
390 /* flags copied when copying m_pkthdr */
391 #define M_COPYFLAGS (M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD|M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP|M_AUTHIPDGM)
392
393 /* flag copied when shallow-copying external storage */
394 #define M_EXTCOPYFLAGS (M_EXT|M_EXT_FLAGS)
395
396 /* mbuf types */
397 #define MT_FREE 0 /* should be on free list */
398 #define MT_DATA 1 /* dynamic (data) allocation */
399 #define MT_HEADER 2 /* packet header */
400 #define MT_SONAME 3 /* socket name */
401 #define MT_SOOPTS 4 /* socket options */
402 #define MT_FTABLE 5 /* fragment reassembly header */
403 #define MT_CONTROL 6 /* extra-data protocol message */
404 #define MT_OOBDATA 7 /* expedited data */
405
406 #ifdef MBUFTYPES
407 static const char * const mbuftypes[] = {
408 "mbfree",
409 "mbdata",
410 "mbheader",
411 "mbsoname",
412 "mbsopts",
413 "mbftable",
414 "mbcontrol",
415 "mboobdata",
416 };
417 #endif
418
419 /* flags to m_get/MGET */
420 #define M_DONTWAIT M_NOWAIT
421 #define M_WAIT M_WAITOK
422
423 #ifdef MBUFTRACE
424 /*
425 * mbuf allocation tracing
426 */
427 void mowner_init(struct mbuf *, int);
428 void mowner_ref(struct mbuf *, int);
429 void m_claim(struct mbuf *, struct mowner *);
430 void mowner_revoke(struct mbuf *, bool, int);
431 void mowner_attach(struct mowner *);
432 void mowner_detach(struct mowner *);
433 void m_claimm(struct mbuf *, struct mowner *);
434 #else
435 #define mowner_init(m, type) do { } while (/* CONSTCOND */ 0)
436 #define mowner_ref(m, flags) do { } while (/* CONSTCOND */ 0)
437 #define mowner_revoke(m, all, flags) do { } while (/* CONSTCOND */ 0)
438 #define m_claim(m, mowner) do { } while (/* CONSTCOND */ 0)
439 #define mowner_attach(mo) do { } while (/* CONSTCOND */ 0)
440 #define mowner_detach(mo) do { } while (/* CONSTCOND */ 0)
441 #define m_claimm(m, mo) do { } while (/* CONSTCOND */ 0)
442 #endif
443
444 #define MCLAIM(m, mo) m_claim((m), (mo))
445 #define MOWNER_ATTACH(mo) mowner_attach(mo)
446 #define MOWNER_DETACH(mo) mowner_detach(mo)
447
448 /*
449 * mbuf allocation/deallocation macros:
450 *
451 * MGET(struct mbuf *m, int how, int type)
452 * allocates an mbuf and initializes it to contain internal data.
453 *
454 * MGETHDR(struct mbuf *m, int how, int type)
455 * allocates an mbuf and initializes it to contain a packet header
456 * and internal data.
457 *
458 * If 'how' is M_WAIT, these macros (and the corresponding functions)
459 * are guaranteed to return successfully.
460 */
461 #define MGET(m, how, type) m = m_get((how), (type))
462 #define MGETHDR(m, how, type) m = m_gethdr((how), (type))
463
464 #if defined(_KERNEL)
465 #define _M_
466 /*
467 * Macros for tracking external storage associated with an mbuf.
468 */
469 #ifdef DEBUG
470 #define MCLREFDEBUGN(m, file, line) \
471 do { \
472 (m)->m_ext.ext_nfile = (file); \
473 (m)->m_ext.ext_nline = (line); \
474 } while (/* CONSTCOND */ 0)
475
476 #define MCLREFDEBUGO(m, file, line) \
477 do { \
478 (m)->m_ext.ext_ofile = (file); \
479 (m)->m_ext.ext_oline = (line); \
480 } while (/* CONSTCOND */ 0)
481 #else
482 #define MCLREFDEBUGN(m, file, line)
483 #define MCLREFDEBUGO(m, file, line)
484 #endif
485
486 #define MCLINITREFERENCE(m) \
487 do { \
488 KDASSERT(((m)->m_flags & M_EXT) == 0); \
489 (m)->m_ext_ref = (m); \
490 (m)->m_ext.ext_refcnt = 1; \
491 MCLREFDEBUGO((m), __FILE__, __LINE__); \
492 MCLREFDEBUGN((m), NULL, 0); \
493 } while (/* CONSTCOND */ 0)
494
495 /*
496 * Macros for mbuf external storage.
497 *
498 * MCLGET allocates and adds an mbuf cluster to a normal mbuf;
499 * the flag M_EXT is set upon success.
500 *
501 * MEXTMALLOC allocates external storage and adds it to
502 * a normal mbuf; the flag M_EXT is set upon success.
503 *
504 * MEXTADD adds pre-allocated external storage to
505 * a normal mbuf; the flag M_EXT is set upon success.
506 */
507
508 #define _MCLGET(m, pool_cache, size, how) \
509 do { \
510 (m)->m_ext_storage.ext_buf = (char *) \
511 pool_cache_get_paddr((pool_cache), \
512 (how) == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : 0, \
513 &(m)->m_ext_storage.ext_paddr); \
514 if ((m)->m_ext_storage.ext_buf != NULL) { \
515 MCLINITREFERENCE(m); \
516 (m)->m_data = (m)->m_ext.ext_buf; \
517 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \
518 M_EXT|M_CLUSTER|M_EXT_RW; \
519 (m)->m_ext.ext_flags = 0; \
520 (m)->m_ext.ext_size = (size); \
521 (m)->m_ext.ext_free = NULL; \
522 (m)->m_ext.ext_arg = (pool_cache); \
523 /* ext_paddr initialized above */ \
524 mowner_ref((m), M_EXT|M_CLUSTER); \
525 } \
526 } while (/* CONSTCOND */ 0)
527
528 /*
529 * The standard mbuf cluster pool.
530 */
531 #define MCLGET(m, how) _MCLGET((m), mcl_cache, MCLBYTES, (how))
532
533 #define MEXTMALLOC(m, size, how) \
534 do { \
535 (m)->m_ext_storage.ext_buf = (char *) \
536 malloc((size), mbtypes[(m)->m_type], (how)); \
537 if ((m)->m_ext_storage.ext_buf != NULL) { \
538 MCLINITREFERENCE(m); \
539 (m)->m_data = (m)->m_ext.ext_buf; \
540 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \
541 M_EXT|M_EXT_RW; \
542 (m)->m_ext.ext_flags = 0; \
543 (m)->m_ext.ext_size = (size); \
544 (m)->m_ext.ext_free = NULL; \
545 (m)->m_ext.ext_arg = NULL; \
546 mowner_ref((m), M_EXT); \
547 } \
548 } while (/* CONSTCOND */ 0)
549
550 #define MEXTADD(m, buf, size, type, free, arg) \
551 do { \
552 MCLINITREFERENCE(m); \
553 (m)->m_data = (m)->m_ext.ext_buf = (char *)(buf); \
554 (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT; \
555 (m)->m_ext.ext_flags = 0; \
556 (m)->m_ext.ext_size = (size); \
557 (m)->m_ext.ext_free = (free); \
558 (m)->m_ext.ext_arg = (arg); \
559 mowner_ref((m), M_EXT); \
560 } while (/* CONSTCOND */ 0)
561
562 /*
563 * Reset the data pointer on an mbuf.
564 */
565 #define MRESETDATA(m) \
566 do { \
567 if ((m)->m_flags & M_EXT) \
568 (m)->m_data = (m)->m_ext.ext_buf; \
569 else if ((m)->m_flags & M_PKTHDR) \
570 (m)->m_data = (m)->m_pktdat; \
571 else \
572 (m)->m_data = (m)->m_dat; \
573 } while (/* CONSTCOND */ 0)
574
575 /*
576 * MFREE(struct mbuf *m, struct mbuf *n)
577 * Free a single mbuf and associated external storage.
578 * Place the successor, if any, in n.
579 */
580 #define MFREE(m, n) \
581 mowner_revoke((m), 1, (m)->m_flags); \
582 mbstat_type_add((m)->m_type, -1); \
583 if ((m)->m_flags & M_PKTHDR) \
584 m_tag_delete_chain((m), NULL); \
585 (n) = (m)->m_next; \
586 if ((m)->m_flags & M_EXT) { \
587 m_ext_free((m)); \
588 } else { \
589 KASSERT((m)->m_type != MT_FREE); \
590 (m)->m_type = MT_FREE; \
591 pool_cache_put(mb_cache, (m)); \
592 } \
593
594 /*
595 * Copy mbuf pkthdr from `from' to `to'.
596 * `from' must have M_PKTHDR set, and `to' must be empty.
597 */
598 #define M_COPY_PKTHDR(to, from) \
599 do { \
600 (to)->m_pkthdr = (from)->m_pkthdr; \
601 (to)->m_flags = (from)->m_flags & M_COPYFLAGS; \
602 SLIST_INIT(&(to)->m_pkthdr.tags); \
603 m_tag_copy_chain((to), (from)); \
604 (to)->m_data = (to)->m_pktdat; \
605 } while (/* CONSTCOND */ 0)
606
607 /*
608 * Move mbuf pkthdr from `from' to `to'.
609 * `from' must have M_PKTHDR set, and `to' must be empty.
610 */
611 #define M_MOVE_PKTHDR(to, from) m_move_pkthdr(to, from)
612
613 /*
614 * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place
615 * an object of the specified size at the end of the mbuf, longword aligned.
616 */
617 #define M_ALIGN(m, len) \
618 do { \
619 (m)->m_data += (MLEN - (len)) &~ (sizeof(long) - 1); \
620 } while (/* CONSTCOND */ 0)
621
622 /*
623 * As above, for mbufs allocated with m_gethdr/MGETHDR
624 * or initialized by M_COPY_PKTHDR.
625 */
626 #define MH_ALIGN(m, len) \
627 do { \
628 (m)->m_data += (MHLEN - (len)) &~ (sizeof(long) - 1); \
629 } while (/* CONSTCOND */ 0)
630
631 /*
632 * Determine if an mbuf's data area is read-only. This is true
633 * if external storage is read-only mapped, or not marked as R/W,
634 * or referenced by more than one mbuf.
635 */
636 #define M_READONLY(m) \
637 (((m)->m_flags & M_EXT) != 0 && \
638 (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW || \
639 (m)->m_ext.ext_refcnt > 1))
640
641 #define M_UNWRITABLE(__m, __len) \
642 ((__m)->m_len < (__len) || M_READONLY((__m)))
643 /*
644 * Determine if an mbuf's data area is read-only at the MMU.
645 */
646 #define M_ROMAP(m) \
647 (((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP))
648
649 /*
650 * Compute the amount of space available
651 * before the current start of data in an mbuf.
652 */
653 #define _M_LEADINGSPACE(m) \
654 ((m)->m_flags & M_EXT ? (m)->m_data - (m)->m_ext.ext_buf : \
655 (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \
656 (m)->m_data - (m)->m_dat)
657
658 #define M_LEADINGSPACE(m) \
659 (M_READONLY((m)) ? 0 : _M_LEADINGSPACE((m)))
660
661 /*
662 * Compute the amount of space available
663 * after the end of data in an mbuf.
664 */
665 #define _M_TRAILINGSPACE(m) \
666 ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
667 ((m)->m_data + (m)->m_len) : \
668 &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
669
670 #define M_TRAILINGSPACE(m) \
671 (M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m)))
672
673 /*
674 * Compute the address of an mbuf's data area.
675 */
676 #define M_BUFADDR(m) \
677 (((m)->m_flags & M_PKTHDR) ? (m)->m_pktdat : (m)->m_dat)
678
679 /*
680 * Compute the offset of the beginning of the data buffer of a non-ext
681 * mbuf.
682 */
683 #define M_BUFOFFSET(m) \
684 (((m)->m_flags & M_PKTHDR) ? \
685 offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat))
686
687 /*
688 * Arrange to prepend space of size plen to mbuf m.
689 * If a new mbuf must be allocated, how specifies whether to wait.
690 * If how is M_DONTWAIT and allocation fails, the original mbuf chain
691 * is freed and m is set to NULL.
692 */
693 #define M_PREPEND(m, plen, how) \
694 do { \
695 if (M_LEADINGSPACE(m) >= (plen)) { \
696 (m)->m_data -= (plen); \
697 (m)->m_len += (plen); \
698 } else \
699 (m) = m_prepend((m), (plen), (how)); \
700 if ((m) && (m)->m_flags & M_PKTHDR) \
701 (m)->m_pkthdr.len += (plen); \
702 } while (/* CONSTCOND */ 0)
703
704 /* change mbuf to new type */
705 #define MCHTYPE(m, t) \
706 do { \
707 KASSERT((t) != MT_FREE); \
708 mbstat_type_add((m)->m_type, -1); \
709 mbstat_type_add(t, 1); \
710 (m)->m_type = t; \
711 } while (/* CONSTCOND */ 0)
712
713 /* length to m_copy to copy all */
714 #define M_COPYALL -1
715
716 /* compatibility with 4.3 */
717 #define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT)
718
719 /*
720 * Allow drivers and/or protocols to store private context information.
721 */
722 #define M_GETCTX(m, t) ((t)(m)->m_pkthdr._rcvif.ctx)
723 #define M_SETCTX(m, c) ((void)((m)->m_pkthdr._rcvif.ctx = (void *)(c)))
724 #define M_CLEARCTX(m) M_SETCTX((m), NULL)
725
726 #endif /* defined(_KERNEL) */
727
728 /*
729 * Simple mbuf queueing system
730 *
731 * this is basically a SIMPLEQ adapted to mbuf use (ie using
732 * m_nextpkt instead of field.sqe_next).
733 *
734 * m_next is ignored, so queueing chains of mbufs is possible
735 */
736 #define MBUFQ_HEAD(name) \
737 struct name { \
738 struct mbuf *mq_first; \
739 struct mbuf **mq_last; \
740 }
741
742 #define MBUFQ_INIT(q) do { \
743 (q)->mq_first = NULL; \
744 (q)->mq_last = &(q)->mq_first; \
745 } while (/*CONSTCOND*/0)
746
747 #define MBUFQ_ENQUEUE(q, m) do { \
748 (m)->m_nextpkt = NULL; \
749 *(q)->mq_last = (m); \
750 (q)->mq_last = &(m)->m_nextpkt; \
751 } while (/*CONSTCOND*/0)
752
753 #define MBUFQ_PREPEND(q, m) do { \
754 if (((m)->m_nextpkt = (q)->mq_first) == NULL) \
755 (q)->mq_last = &(m)->m_nextpkt; \
756 (q)->mq_first = (m); \
757 } while (/*CONSTCOND*/0)
758
759 #define MBUFQ_DEQUEUE(q, m) do { \
760 if (((m) = (q)->mq_first) != NULL) { \
761 if (((q)->mq_first = (m)->m_nextpkt) == NULL) \
762 (q)->mq_last = &(q)->mq_first; \
763 else \
764 (m)->m_nextpkt = NULL; \
765 } \
766 } while (/*CONSTCOND*/0)
767
768 #define MBUFQ_DRAIN(q) do { \
769 struct mbuf *__m0; \
770 while ((__m0 = (q)->mq_first) != NULL) { \
771 (q)->mq_first = __m0->m_nextpkt; \
772 m_freem(__m0); \
773 } \
774 (q)->mq_last = &(q)->mq_first; \
775 } while (/*CONSTCOND*/0)
776
777 #define MBUFQ_FIRST(q) ((q)->mq_first)
778 #define MBUFQ_NEXT(m) ((m)->m_nextpkt)
779 #define MBUFQ_LAST(q) (*(q)->mq_last)
780
781 /*
782 * Mbuf statistics.
783 * For statistics related to mbuf and cluster allocations, see also the
784 * pool headers (mb_cache and mcl_cache).
785 */
786 struct mbstat {
787 u_long _m_spare; /* formerly m_mbufs */
788 u_long _m_spare1; /* formerly m_clusters */
789 u_long _m_spare2; /* spare field */
790 u_long _m_spare3; /* formely m_clfree - free clusters */
791 u_long m_drops; /* times failed to find space */
792 u_long m_wait; /* times waited for space */
793 u_long m_drain; /* times drained protocols for space */
794 u_short m_mtypes[256]; /* type specific mbuf allocations */
795 };
796
797 struct mbstat_cpu {
798 u_int m_mtypes[256]; /* type specific mbuf allocations */
799 };
800
801 /*
802 * Mbuf sysctl variables.
803 */
804 #define MBUF_MSIZE 1 /* int: mbuf base size */
805 #define MBUF_MCLBYTES 2 /* int: mbuf cluster size */
806 #define MBUF_NMBCLUSTERS 3 /* int: limit on the # of clusters */
807 #define MBUF_MBLOWAT 4 /* int: mbuf low water mark */
808 #define MBUF_MCLLOWAT 5 /* int: mbuf cluster low water mark */
809 #define MBUF_STATS 6 /* struct: mbstat */
810 #define MBUF_MOWNERS 7 /* struct: m_owner[] */
811 #define MBUF_MAXID 8 /* number of valid MBUF ids */
812
813 #define CTL_MBUF_NAMES { \
814 { 0, 0 }, \
815 { "msize", CTLTYPE_INT }, \
816 { "mclbytes", CTLTYPE_INT }, \
817 { "nmbclusters", CTLTYPE_INT }, \
818 { "mblowat", CTLTYPE_INT }, \
819 { "mcllowat", CTLTYPE_INT }, \
820 { 0 /* "stats" */, CTLTYPE_STRUCT }, \
821 { 0 /* "mowners" */, CTLTYPE_STRUCT }, \
822 }
823
824 #ifdef _KERNEL
825 extern struct mbstat mbstat;
826 extern int nmbclusters; /* limit on the # of clusters */
827 extern int mblowat; /* mbuf low water mark */
828 extern int mcllowat; /* mbuf cluster low water mark */
829 extern int max_linkhdr; /* largest link-level header */
830 extern int max_protohdr; /* largest protocol header */
831 extern int max_hdr; /* largest link+protocol header */
832 extern int max_datalen; /* MHLEN - max_hdr */
833 extern const int msize; /* mbuf base size */
834 extern const int mclbytes; /* mbuf cluster size */
835 extern pool_cache_t mb_cache;
836 extern pool_cache_t mcl_cache;
837 #ifdef MBUFTRACE
838 LIST_HEAD(mownerhead, mowner);
839 extern struct mownerhead mowners;
840 extern struct mowner unknown_mowners[];
841 extern struct mowner revoked_mowner;
842 #endif
843
844 MALLOC_DECLARE(M_MBUF);
845 MALLOC_DECLARE(M_SONAME);
846
847 struct mbuf *m_copym(struct mbuf *, int, int, int);
848 struct mbuf *m_copypacket(struct mbuf *, int);
849 struct mbuf *m_devget(char *, int, int, struct ifnet *,
850 void (*copy)(const void *, void *, size_t));
851 struct mbuf *m_dup(struct mbuf *, int, int, int);
852 struct mbuf *m_free(struct mbuf *);
853 struct mbuf *m_get(int, int);
854 struct mbuf *m_getclr(int, int);
855 struct mbuf *m_gethdr(int, int);
856 struct mbuf *m_prepend(struct mbuf *,int, int);
857 struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
858 struct mbuf *m_pullup(struct mbuf *, int);
859 struct mbuf *m_copyup(struct mbuf *, int, int);
860 struct mbuf *m_split(struct mbuf *,int, int);
861 struct mbuf *m_getptr(struct mbuf *, int, int *);
862 void m_adj(struct mbuf *, int);
863 struct mbuf *m_defrag(struct mbuf *, int);
864 int m_apply(struct mbuf *, int, int,
865 int (*)(void *, void *, unsigned int), void *);
866 void m_cat(struct mbuf *,struct mbuf *);
867 void m_clget(struct mbuf *, int);
868 int m_mballoc(int, int);
869 void m_copyback(struct mbuf *, int, int, const void *);
870 struct mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int);
871 int m_makewritable(struct mbuf **, int, int, int);
872 struct mbuf *m_getcl(int, int, int);
873 void m_copydata(struct mbuf *, int, int, void *);
874 void m_freem(struct mbuf *);
875 void m_reclaim(void *, int);
876 void mbinit(void);
877 void m_ext_free(struct mbuf *);
878 char * m_mapin(struct mbuf *);
879 void m_move_pkthdr(struct mbuf *, struct mbuf *);
880
881 bool m_ensure_contig(struct mbuf **, int);
882 struct mbuf *m_add(struct mbuf *, struct mbuf *);
883 void m_align(struct mbuf *, int);
884 int m_append(struct mbuf *, int, const void *);
885
886 /* Inline routines. */
887 static __inline u_int m_length(const struct mbuf *) __unused;
888
889 /* Statistics */
890 void mbstat_type_add(int, int);
891
892 /* Packet tag routines */
893 struct m_tag *m_tag_get(int, int, int);
894 void m_tag_free(struct m_tag *);
895 void m_tag_prepend(struct mbuf *, struct m_tag *);
896 void m_tag_unlink(struct mbuf *, struct m_tag *);
897 void m_tag_delete(struct mbuf *, struct m_tag *);
898 void m_tag_delete_chain(struct mbuf *, struct m_tag *);
899 void m_tag_delete_nonpersistent(struct mbuf *);
900 struct m_tag *m_tag_find(const struct mbuf *, int, struct m_tag *);
901 struct m_tag *m_tag_copy(struct m_tag *);
902 int m_tag_copy_chain(struct mbuf *, struct mbuf *);
903 void m_tag_init(struct mbuf *);
904 struct m_tag *m_tag_first(struct mbuf *);
905 struct m_tag *m_tag_next(struct mbuf *, struct m_tag *);
906
907 /* Packet tag types */
908 #define PACKET_TAG_NONE 0 /* Nothing */
909 #define PACKET_TAG_VLAN 1 /* VLAN ID */
910 #define PACKET_TAG_ENCAP 2 /* encapsulation data */
911 #define PACKET_TAG_ESP 3 /* ESP information */
912 #define PACKET_TAG_PF 11 /* packet filter */
913 #define PACKET_TAG_ALTQ_QID 12 /* ALTQ queue id */
914
915 #define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 16
916 #define PACKET_TAG_IPSEC_IN_DONE 17
917 #define PACKET_TAG_IPSEC_OUT_DONE 18
918 #define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 19 /* NIC IPsec crypto req'ed */
919 #define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 20 /* NIC notifies IPsec */
920 #define PACKET_TAG_IPSEC_PENDING_TDB 21 /* Reminder to do IPsec */
921
922 #define PACKET_TAG_IPSEC_SOCKET 22 /* IPSEC socket ref */
923 #define PACKET_TAG_IPSEC_HISTORY 23 /* IPSEC history */
924
925 #define PACKET_TAG_IPSEC_NAT_T_PORTS 25 /* two uint16_t */
926
927 #define PACKET_TAG_INET6 26 /* IPv6 info */
928
929 #define PACKET_TAG_ECO_RETRYPARMS 27 /* Econet retry parameters */
930
931 #define PACKET_TAG_TUNNEL_INFO 28 /* tunnel identification and
932 * protocol callback, for
933 * loop detection/recovery
934 */
935
936 #define PACKET_TAG_MPLS 29 /* Indicate it's for MPLS */
937
938 /*
939 * Return the number of bytes in the mbuf chain, m.
940 */
941 static __inline u_int
m_length(const struct mbuf * m)942 m_length(const struct mbuf *m)
943 {
944 const struct mbuf *m0;
945 u_int pktlen;
946
947 if ((m->m_flags & M_PKTHDR) != 0)
948 return m->m_pkthdr.len;
949
950 pktlen = 0;
951 for (m0 = m; m0 != NULL; m0 = m0->m_next)
952 pktlen += m0->m_len;
953 return pktlen;
954 }
955
956 static __inline void
m_hdr_init(struct mbuf * m,short type,struct mbuf * next,char * data,int len)957 m_hdr_init(struct mbuf *m, short type, struct mbuf *next, char *data, int len)
958 {
959
960 KASSERT(m != NULL);
961
962 mowner_init(m, type);
963 m->m_ext_ref = m; /* default */
964 m->m_type = type;
965 m->m_len = len;
966 m->m_next = next;
967 m->m_nextpkt = NULL; /* default */
968 m->m_data = data;
969 m->m_flags = 0; /* default */
970 }
971
972 static __inline void
m_set_rcvif(struct mbuf * m,const struct ifnet * ifp)973 m_set_rcvif(struct mbuf *m, const struct ifnet *ifp)
974 {
975
976 m->m_pkthdr.rcvif_index = ifp->if_index;
977 }
978
979 static __inline void
m_reset_rcvif(struct mbuf * m)980 m_reset_rcvif(struct mbuf *m)
981 {
982
983 /* A caller may expect whole _rcvif union is zeroed */
984 /* m->m_pkthdr.rcvif_index = 0; */
985 m->m_pkthdr._rcvif.ctx = NULL;
986 }
987
988 static __inline void
m_copy_rcvif(struct mbuf * m,const struct mbuf * n)989 m_copy_rcvif(struct mbuf *m, const struct mbuf *n)
990 {
991
992 m->m_pkthdr.rcvif_index = n->m_pkthdr.rcvif_index;
993 }
994
995 static __inline void
m_pkthdr_init(struct mbuf * m)996 m_pkthdr_init(struct mbuf *m)
997 {
998
999 KASSERT(m != NULL);
1000
1001 m->m_data = m->m_pktdat;
1002 m->m_flags = M_PKTHDR;
1003
1004 m_reset_rcvif(m);
1005 m->m_pkthdr.len = 0;
1006 m->m_pkthdr.csum_flags = 0;
1007 m->m_pkthdr.csum_data = 0;
1008 SLIST_INIT(&m->m_pkthdr.tags);
1009
1010 m->m_pkthdr.pattr_class = NULL;
1011 m->m_pkthdr.pattr_af = AF_UNSPEC;
1012 m->m_pkthdr.pattr_hdr = NULL;
1013 }
1014
1015 void m_print(const struct mbuf *, const char *, void (*)(const char *, ...)
1016 __printflike(1, 2));
1017
1018 /*
1019 * Get rcvif of a mbuf.
1020 *
1021 * The caller must call m_put_rcvif after using rcvif. The caller cannot
1022 * block or sleep during using rcvif. Insofar as the constraint is satisfied,
1023 * the API ensures a got rcvif isn't be freed until m_put_rcvif is called.
1024 */
1025 static __inline struct ifnet *
m_get_rcvif(const struct mbuf * m,int * s)1026 m_get_rcvif(const struct mbuf *m, int *s)
1027 {
1028
1029 *s = pserialize_read_enter();
1030 return if_byindex(m->m_pkthdr.rcvif_index);
1031 }
1032
1033 static __inline void
m_put_rcvif(struct ifnet * ifp,int * s)1034 m_put_rcvif(struct ifnet *ifp, int *s)
1035 {
1036
1037 if (ifp == NULL)
1038 return;
1039 pserialize_read_exit(*s);
1040 }
1041
1042 /*
1043 * Get rcvif of a mbuf.
1044 *
1045 * The caller must call m_put_rcvif_psref after using rcvif. The API ensures
1046 * a got rcvif isn't be freed until m_put_rcvif_psref is called.
1047 */
1048 static __inline struct ifnet *
m_get_rcvif_psref(const struct mbuf * m,struct psref * psref)1049 m_get_rcvif_psref(const struct mbuf *m, struct psref *psref)
1050 {
1051
1052 return if_get_byindex(m->m_pkthdr.rcvif_index, psref);
1053 }
1054
1055 static __inline void
m_put_rcvif_psref(struct ifnet * ifp,struct psref * psref)1056 m_put_rcvif_psref(struct ifnet *ifp, struct psref *psref)
1057 {
1058
1059 if (ifp == NULL)
1060 return;
1061 if_put(ifp, psref);
1062 }
1063
1064 /*
1065 * Get rcvif of a mbuf.
1066 *
1067 * This is NOT an MP-safe API and shouldn't be used at where we want MP-safe.
1068 */
1069 static __inline struct ifnet *
m_get_rcvif_NOMPSAFE(const struct mbuf * m)1070 m_get_rcvif_NOMPSAFE(const struct mbuf *m)
1071 {
1072
1073 return if_byindex(m->m_pkthdr.rcvif_index);
1074 }
1075
1076 #endif /* _KERNEL */
1077 #endif /* !_SYS_MBUF_H_ */
1078