xref: /openbsd/sys/net/pfvar_priv.h (revision 19e99d06)
1 /*	$OpenBSD: pfvar_priv.h,v 1.36 2024/04/22 13:30:22 bluhm Exp $	*/
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org>
6  * Copyright (c) 2016 Alexander Bluhm <bluhm@openbsd.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  *    - Redistributions of source code must retain the above copyright
14  *      notice, this list of conditions and the following disclaimer.
15  *    - Redistributions in binary form must reproduce the above
16  *      copyright notice, this list of conditions and the following
17  *      disclaimer in the documentation and/or other materials provided
18  *      with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  */
34 
35 #ifndef _NET_PFVAR_PRIV_H_
36 #define _NET_PFVAR_PRIV_H_
37 
38 #ifdef _KERNEL
39 
40 #include <sys/rwlock.h>
41 #include <sys/mutex.h>
42 #include <sys/percpu.h>
43 
44 /*
45  * Locks used to protect struct members in this file:
46  *	L	pf_inp_mtx		link pf to inp mutex
47  */
48 
49 struct pfsync_deferral;
50 
51 /*
52  * pf state items - links from pf_state_key to pf_states
53  */
54 
55 struct pf_state_item {
56 	TAILQ_ENTRY(pf_state_item)
57 				 si_entry;
58 	struct pf_state		*si_st;
59 };
60 
61 TAILQ_HEAD(pf_statelisthead, pf_state_item);
62 
63 /*
64  * pf state keys - look up states by address
65  */
66 
67 struct pf_state_key {
68 	struct pf_addr	 addr[2];
69 	u_int16_t	 port[2];
70 	u_int16_t	 rdomain;
71 	u_int16_t	 hash;
72 	sa_family_t	 af;
73 	u_int8_t	 proto;
74 
75 	RB_ENTRY(pf_state_key)	 sk_entry;
76 	struct pf_statelisthead	 sk_states;
77 	struct pf_state_key	*sk_reverse;
78 	struct inpcb		*sk_inp;	/* [L] */
79 	pf_refcnt_t		 sk_refcnt;
80 	u_int8_t		 sk_removed;
81 };
82 
83 RBT_HEAD(pf_state_tree, pf_state_key);
84 RBT_PROTOTYPE(pf_state_tree, pf_state_key, sk_entry, pf_state_compare_key);
85 
86 #define PF_REVERSED_KEY(key, family)				\
87 	((key[PF_SK_WIRE]->af != key[PF_SK_STACK]->af) &&	\
88 	 (key[PF_SK_WIRE]->af != (family)))
89 
90 /*
91  * pf state
92  *
93  * Protection/ownership of pf_state members:
94  *	I	immutable after pf_state_insert()
95  *	M	pf_state mtx
96  *	P	PF_STATE_LOCK
97  *	S	pfsync
98  *	L	pf_state_list
99  *	g	pf_purge gc
100  */
101 
102 struct pf_state {
103 	u_int64_t		 id;		/* [I] */
104 	u_int32_t		 creatorid;	/* [I] */
105 	u_int8_t		 direction;	/* [I] */
106 	u_int8_t		 pad[3];
107 
108 	TAILQ_ENTRY(pf_state)	 sync_list;	/* [S] */
109 	struct pfsync_deferral	*sync_defer;	/* [S] */
110 	TAILQ_ENTRY(pf_state)	 entry_list;	/* [L] */
111 	SLIST_ENTRY(pf_state)	 gc_list;	/* [g] */
112 	RB_ENTRY(pf_state)	 entry_id;	/* [P] */
113 	struct pf_state_peer	 src;
114 	struct pf_state_peer	 dst;
115 	struct pf_rule_slist	 match_rules;	/* [I] */
116 	union pf_rule_ptr	 rule;		/* [I] */
117 	union pf_rule_ptr	 anchor;	/* [I] */
118 	union pf_rule_ptr	 natrule;	/* [I] */
119 	struct pf_addr		 rt_addr;	/* [I] */
120 	struct pf_sn_head	 src_nodes;	/* [I] */
121 	struct pf_state_key	*key[2];	/* [I] stack and wire */
122 	struct pfi_kif		*kif;		/* [I] */
123 	struct mutex		 mtx;
124 	pf_refcnt_t		 refcnt;
125 	u_int64_t		 packets[2];
126 	u_int64_t		 bytes[2];
127 	int32_t			 creation;	/* [I] */
128 	int32_t			 expire;
129 	int32_t			 pfsync_time;	/* [S] */
130 	int			 rtableid[2];	/* [I] stack and wire */
131 	u_int16_t		 qid;		/* [I] */
132 	u_int16_t		 pqid;		/* [I] */
133 	u_int16_t		 tag;		/* [I] */
134 	u_int16_t		 state_flags;	/* [M] */
135 	u_int8_t		 log;		/* [I] */
136 	u_int8_t		 timeout;
137 	u_int8_t		 sync_state;	/* [S] PFSYNC_S_x */
138 	u_int8_t		 sync_updates;	/* [S] */
139 	u_int8_t		 min_ttl;	/* [I] */
140 	u_int8_t		 set_tos;	/* [I] */
141 	u_int8_t		 set_prio[2];	/* [I] */
142 	u_int16_t		 max_mss;	/* [I] */
143 	u_int16_t		 if_index_in;	/* [I] */
144 	u_int16_t		 if_index_out;	/* [I] */
145 	u_int16_t		 delay;		/* [I] */
146 	u_int8_t		 rt;		/* [I] */
147 };
148 
149 RBT_HEAD(pf_state_tree_id, pf_state);
150 RBT_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id);
151 extern struct pf_state_tree_id tree_id;
152 
153 /*
154  * states are linked into a global list to support the following
155  * functionality:
156  *
157  * - garbage collection
158  * - pfsync bulk send operations
159  * - bulk state fetches via the DIOCGETSTATES ioctl
160  * - bulk state clearing via the DIOCCLRSTATES ioctl
161  *
162  * states are inserted into the global pf_state_list once it has also
163  * been successfully added to the various trees that make up the state
164  * table. states are only removed from the pf_state_list by the garbage
165  * collection process.
166  *
167  * the pf_state_list head and tail pointers (ie, the pfs_list TAILQ_HEAD
168  * structure) and the pointers between the entries on the pf_state_list
169  * are locked separately. at a high level, this allows for insertion
170  * of new states into the pf_state_list while other contexts (eg, the
171  * ioctls) are traversing the state items in the list. for garbage
172  * collection to remove items from the pf_state_list, it has to exclude
173  * both modifications to the list head and tail pointers, and traversal
174  * of the links between the states.
175  *
176  * the head and tail pointers are protected by a mutex. the pointers
177  * between states are protected by an rwlock.
178  *
179  * because insertions are only made to the end of the list, if we get
180  * a snapshot of the head and tail of the list and prevent modifications
181  * to the links between states, we can safely traverse between the
182  * head and tail entries. subsequent insertions can add entries after
183  * our view of the tail, but we don't look past our view.
184  *
185  * if both locks must be taken, the rwlock protecting the links between
186  * states is taken before the mutex protecting the head and tail
187  * pointer.
188  *
189  * insertion into the list follows this pattern:
190  *
191  *	// serialise list head/tail modifications
192  *	mtx_enter(&pf_state_list.pfs_mtx);
193  *	TAILQ_INSERT_TAIL(&pf_state_list.pfs_list, state, entry_list);
194  *	mtx_leave(&pf_state_list.pfs_mtx);
195  *
196  * traversal of the list:
197  *
198  *	// lock against the gc removing an item from the list
199  *	rw_enter_read(&pf_state_list.pfs_rwl);
200  *
201  *	// get a snapshot view of the ends of the list
202  *	mtx_enter(&pf_state_list.pfs_mtx);
203  *	head = TAILQ_FIRST(&pf_state_list.pfs_list);
204  *	tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
205  *	mtx_leave(&pf_state_list.pfs_mtx);
206  *
207  *	state = NULL;
208  *	next = head;
209  *
210  *	while (state != tail) {
211  *		state = next;
212  *		next = TAILQ_NEXT(state, entry_list);
213  *
214  *		// look at the state
215  *	}
216  *
217  *	rw_exit_read(&pf_state_list.pfs_rwl);
218  *
219  * removing an item from the list:
220  *
221  *	// wait for iterators (readers) to get out
222  *	rw_enter_write(&pf_state_list.pfs_rwl);
223  *
224  *	// serialise list head/tail modifications
225  *	mtx_enter(&pf_state_list.pfs_mtx);
226  *	TAILQ_REMOVE(&pf_state_list.pfs_list, state, entry_list);
227  *	mtx_leave(&pf_state_list.pfs_mtx);
228  *
229  *	rw_exit_write(&pf_state_list.pfs_rwl);
230  *
231  * the lock ordering for pf_state_list locks and the rest of the pf
232  * locks are:
233  *
234  * 1. KERNEL_LOCK
235  * 2. NET_LOCK
236  * 3. pf_state_list.pfs_rwl
237  * 4. PF_LOCK
238  * 5. PF_STATE_LOCK
239  * 6. pf_state_list.pfs_mtx
240  */
241 
242 struct pf_state_list {
243 	/* the list of states in the system */
244 	struct pf_state_queue		pfs_list;
245 
246 	/* serialise pfs_list head/tail access */
247 	struct mutex			pfs_mtx;
248 
249 	/* serialise access to pointers between pfs_list entries */
250 	struct rwlock			pfs_rwl;
251 };
252 
253 #define PF_STATE_LIST_INITIALIZER(_pfs) {				\
254 	.pfs_list	= TAILQ_HEAD_INITIALIZER(_pfs.pfs_list),	\
255 	.pfs_mtx	= MUTEX_INITIALIZER(IPL_SOFTNET),		\
256 	.pfs_rwl	= RWLOCK_INITIALIZER("pfstates"),		\
257 }
258 
259 extern struct rwlock pf_lock;
260 
261 struct pf_pdesc {
262 	struct {
263 		int	 done;
264 		uid_t	 uid;
265 		gid_t	 gid;
266 		pid_t	 pid;
267 	}		 lookup;
268 	u_int64_t	 tot_len;	/* Make Mickey money */
269 
270 	struct pf_addr	 nsaddr;	/* src address after NAT */
271 	struct pf_addr	 ndaddr;	/* dst address after NAT */
272 
273 	struct pfi_kif	*kif;		/* incoming interface */
274 	struct mbuf	*m;		/* mbuf containing the packet */
275 	struct pf_addr	*src;		/* src address */
276 	struct pf_addr	*dst;		/* dst address */
277 	u_int16_t	*pcksum;	/* proto cksum */
278 	u_int16_t	*sport;
279 	u_int16_t	*dport;
280 	u_int16_t	 osport;
281 	u_int16_t	 odport;
282 	u_int16_t	 hash;
283 	u_int16_t	 nsport;	/* src port after NAT */
284 	u_int16_t	 ndport;	/* dst port after NAT */
285 
286 	u_int32_t	 off;		/* protocol header offset */
287 	u_int32_t	 hdrlen;	/* protocol header length */
288 	u_int32_t	 p_len;		/* length of protocol payload */
289 	u_int32_t	 extoff;	/* extension header offset */
290 	u_int32_t	 fragoff;	/* fragment header offset */
291 	u_int32_t	 jumbolen;	/* length from v6 jumbo header */
292 	u_int32_t	 badopts;	/* v4 options or v6 routing headers */
293 #define PF_OPT_OTHER		0x0001
294 #define PF_OPT_JUMBO		0x0002
295 #define PF_OPT_ROUTER_ALERT	0x0004
296 
297 	u_int16_t	 rdomain;	/* original routing domain */
298 	u_int16_t	 virtual_proto;
299 #define PF_VPROTO_FRAGMENT	256
300 	sa_family_t	 af;
301 	sa_family_t	 naf;
302 	u_int8_t	 proto;
303 	u_int8_t	 tos;
304 	u_int8_t	 ttl;
305 	u_int8_t	 dir;		/* direction */
306 	u_int8_t	 sidx;		/* key index for source */
307 	u_int8_t	 didx;		/* key index for destination */
308 	u_int8_t	 destchg;	/* flag set when destination changed */
309 	u_int8_t	 pflog;		/* flags for packet logging */
310 	union {
311 		struct tcphdr			tcp;
312 		struct udphdr			udp;
313 		struct icmp			icmp;
314 #ifdef INET6
315 		struct icmp6_hdr		icmp6;
316 		struct mld_hdr			mld;
317 		struct nd_neighbor_solicit	nd_ns;
318 #endif /* INET6 */
319 	} hdr;
320 };
321 
322 struct pf_anchor_stackframe {
323 	struct pf_ruleset	*sf_rs;
324 	union {
325 		struct pf_rule			*u_r;
326 		struct pf_anchor_stackframe	*u_stack_top;
327 	} u;
328 	struct pf_anchor	*sf_child;
329 	int			 sf_jump_target;
330 };
331 #define sf_r		u.u_r
332 #define sf_stack_top	u.u_stack_top
333 enum {
334 	PF_NEXT_RULE,
335 	PF_NEXT_CHILD
336 };
337 
338 extern struct cpumem *pf_anchor_stack;
339 
340 enum pf_trans_type {
341 	PF_TRANS_NONE,
342 	PF_TRANS_GETRULE,
343 	PF_TRANS_MAX
344 };
345 
346 struct pf_trans {
347 	LIST_ENTRY(pf_trans)	pft_entry;
348 	uint32_t		pft_unit;		/* process id */
349 	uint64_t		pft_ticket;
350 	enum pf_trans_type	pft_type;
351 	union {
352 		struct {
353 			u_int32_t		 gr_version;
354 			struct pf_anchor	*gr_anchor;
355 			struct pf_rule		*gr_rule;
356 		} u_getrule;
357 	} u;
358 };
359 
360 #define pftgr_version	u.u_getrule.gr_version
361 #define pftgr_anchor	u.u_getrule.gr_anchor
362 #define pftgr_rule	u.u_getrule.gr_rule
363 
364 extern struct timeout	pf_purge_states_to;
365 extern struct task	pf_purge_task;
366 extern struct timeout	pf_purge_to;
367 
368 struct pf_state		*pf_state_ref(struct pf_state *);
369 void			 pf_state_unref(struct pf_state *);
370 
371 extern struct rwlock	pf_lock;
372 extern struct rwlock	pf_state_lock;
373 extern struct mutex	pf_frag_mtx;
374 extern struct mutex	pf_inp_mtx;
375 
376 #define PF_LOCK()		do {			\
377 		rw_enter_write(&pf_lock);		\
378 	} while (0)
379 
380 #define PF_UNLOCK()		do {			\
381 		PF_ASSERT_LOCKED();			\
382 		rw_exit_write(&pf_lock);		\
383 	} while (0)
384 
385 #define PF_ASSERT_LOCKED()	do {			\
386 		if (rw_status(&pf_lock) != RW_WRITE)	\
387 			splassert_fail(RW_WRITE,	\
388 			    rw_status(&pf_lock),__func__);\
389 	} while (0)
390 
391 #define PF_ASSERT_UNLOCKED()	do {			\
392 		if (rw_status(&pf_lock) == RW_WRITE)	\
393 			splassert_fail(0, rw_status(&pf_lock), __func__);\
394 	} while (0)
395 
396 #define PF_STATE_ENTER_READ()	do {			\
397 		rw_enter_read(&pf_state_lock);		\
398 	} while (0)
399 
400 #define PF_STATE_EXIT_READ()	do {			\
401 		rw_exit_read(&pf_state_lock);		\
402 	} while (0)
403 
404 #define PF_STATE_ENTER_WRITE()	do {			\
405 		rw_enter_write(&pf_state_lock);		\
406 	} while (0)
407 
408 #define PF_STATE_EXIT_WRITE()	do {			\
409 		PF_STATE_ASSERT_LOCKED();		\
410 		rw_exit_write(&pf_state_lock);		\
411 	} while (0)
412 
413 #define PF_STATE_ASSERT_LOCKED()	do {		\
414 		if (rw_status(&pf_state_lock) != RW_WRITE)\
415 			splassert_fail(RW_WRITE,	\
416 			    rw_status(&pf_state_lock), __func__);\
417 	} while (0)
418 
419 #define PF_FRAG_LOCK()		mtx_enter(&pf_frag_mtx)
420 #define PF_FRAG_UNLOCK()	mtx_leave(&pf_frag_mtx)
421 
422 /* for copies to/from network byte order */
423 void			pf_state_peer_hton(const struct pf_state_peer *,
424 			    struct pfsync_state_peer *);
425 void			pf_state_peer_ntoh(const struct pfsync_state_peer *,
426 			    struct pf_state_peer *);
427 u_int16_t		pf_pkt_hash(sa_family_t, uint8_t,
428 			    const struct pf_addr *, const struct pf_addr *,
429 			    uint16_t, uint16_t);
430 
431 #endif /* _KERNEL */
432 
433 #endif /* _NET_PFVAR_PRIV_H_ */
434