xref: /openbsd/sys/net/pfvar_priv.h (revision 845086ff)
1 /*	$OpenBSD: pfvar_priv.h,v 1.38 2024/09/07 22:41:55 aisha Exp $	*/
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org>
6  * Copyright (c) 2016 Alexander Bluhm <bluhm@openbsd.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  *    - Redistributions of source code must retain the above copyright
14  *      notice, this list of conditions and the following disclaimer.
15  *    - Redistributions in binary form must reproduce the above
16  *      copyright notice, this list of conditions and the following
17  *      disclaimer in the documentation and/or other materials provided
18  *      with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  */
34 
35 #ifndef _NET_PFVAR_PRIV_H_
36 #define _NET_PFVAR_PRIV_H_
37 
38 #ifdef _KERNEL
39 
40 #include <sys/rwlock.h>
41 #include <sys/mutex.h>
42 #include <sys/percpu.h>
43 
44 /*
45  * Locks used to protect struct members in this file:
46  *	L	pf_inp_mtx		link pf to inp mutex
47  */
48 
49 struct pfsync_deferral;
50 
51 /*
52  * pf state items - links from pf_state_key to pf_states
53  */
54 
55 struct pf_state_item {
56 	TAILQ_ENTRY(pf_state_item)
57 				 si_entry;
58 	struct pf_state		*si_st;
59 };
60 
61 TAILQ_HEAD(pf_statelisthead, pf_state_item);
62 
63 /*
64  * pf state keys - look up states by address
65  */
66 
67 struct pf_state_key {
68 	struct pf_addr	 addr[2];
69 	u_int16_t	 port[2];
70 	u_int16_t	 rdomain;
71 	u_int16_t	 hash;
72 	sa_family_t	 af;
73 	u_int8_t	 proto;
74 
75 	RBT_ENTRY(pf_state_key)	 sk_entry;
76 	struct pf_statelisthead	 sk_states;
77 	struct pf_state_key	*sk_reverse;
78 	struct inpcb		*sk_inp;	/* [L] */
79 	pf_refcnt_t		 sk_refcnt;
80 	u_int8_t		 sk_removed;
81 };
82 
83 RBT_HEAD(pf_state_tree, pf_state_key);
84 RBT_PROTOTYPE(pf_state_tree, pf_state_key, sk_entry, pf_state_compare_key);
85 
86 #define PF_REVERSED_KEY(key, family)				\
87 	((key[PF_SK_WIRE]->af != key[PF_SK_STACK]->af) &&	\
88 	 (key[PF_SK_WIRE]->af != (family)))
89 
90 /*
91  * pf state
92  *
93  * Protection/ownership of pf_state members:
94  *	I	immutable after pf_state_insert()
95  *	M	pf_state mtx
96  *	P	PF_STATE_LOCK
97  *	S	pfsync
98  *	L	pf_state_list
99  *	g	pf_purge gc
100  */
101 
102 struct pf_state {
103 	u_int64_t		 id;		/* [I] */
104 	u_int32_t		 creatorid;	/* [I] */
105 	u_int8_t		 direction;	/* [I] */
106 	u_int8_t		 pad[3];
107 
108 	TAILQ_ENTRY(pf_state)	 sync_list;	/* [S] */
109 	struct pfsync_deferral	*sync_defer;	/* [S] */
110 	TAILQ_ENTRY(pf_state)	 entry_list;	/* [L] */
111 	SLIST_ENTRY(pf_state)	 gc_list;	/* [g] */
112 	RBT_ENTRY(pf_state)	 entry_id;	/* [P] */
113 	struct pf_state_peer	 src;
114 	struct pf_state_peer	 dst;
115 	struct pf_rule_slist	 match_rules;	/* [I] */
116 	union pf_rule_ptr	 rule;		/* [I] */
117 	union pf_rule_ptr	 anchor;	/* [I] */
118 	union pf_rule_ptr	 natrule;	/* [I] */
119 	struct pf_addr		 rt_addr;	/* [I] */
120 	struct pf_sn_head	 src_nodes;	/* [I] */
121 	struct pf_state_key	*key[2];	/* [I] stack and wire */
122 	struct pfi_kif		*kif;		/* [I] */
123 	struct mutex		 mtx;
124 	pf_refcnt_t		 refcnt;
125 	u_int64_t		 packets[2];
126 	u_int64_t		 bytes[2];
127 	int32_t			 creation;	/* [I] */
128 	int32_t			 expire;
129 	int32_t			 pfsync_time;	/* [S] */
130 	int			 rtableid[2];	/* [I] stack and wire */
131 	u_int16_t		 qid;		/* [I] */
132 	u_int16_t		 pqid;		/* [I] */
133 	u_int16_t		 tag;		/* [I] */
134 	u_int16_t		 state_flags;	/* [M] */
135 	u_int8_t		 log;		/* [I] */
136 	u_int8_t		 timeout;
137 	u_int8_t		 sync_state;	/* [S] PFSYNC_S_x */
138 	u_int8_t		 sync_updates;	/* [S] */
139 	u_int8_t		 min_ttl;	/* [I] */
140 	u_int8_t		 set_tos;	/* [I] */
141 	u_int8_t		 set_prio[2];	/* [I] */
142 	u_int16_t		 max_mss;	/* [I] */
143 	u_int16_t		 if_index_in;	/* [I] */
144 	u_int16_t		 if_index_out;	/* [I] */
145 	u_int16_t		 delay;		/* [I] */
146 	u_int8_t		 rt;		/* [I] */
147 };
148 
149 RBT_HEAD(pf_state_tree_id, pf_state);
150 RBT_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id);
151 extern struct pf_state_tree_id tree_id;
152 
153 /*
154  * states are linked into a global list to support the following
155  * functionality:
156  *
157  * - garbage collection
158  * - pfsync bulk send operations
159  * - bulk state fetches via the DIOCGETSTATES ioctl
160  * - bulk state clearing via the DIOCCLRSTATES ioctl
161  *
162  * states are inserted into the global pf_state_list once it has also
163  * been successfully added to the various trees that make up the state
164  * table. states are only removed from the pf_state_list by the garbage
165  * collection process.
166  *
167  * the pf_state_list head and tail pointers (ie, the pfs_list TAILQ_HEAD
168  * structure) and the pointers between the entries on the pf_state_list
169  * are locked separately. at a high level, this allows for insertion
170  * of new states into the pf_state_list while other contexts (eg, the
171  * ioctls) are traversing the state items in the list. for garbage
172  * collection to remove items from the pf_state_list, it has to exclude
173  * both modifications to the list head and tail pointers, and traversal
174  * of the links between the states.
175  *
176  * the head and tail pointers are protected by a mutex. the pointers
177  * between states are protected by an rwlock.
178  *
179  * because insertions are only made to the end of the list, if we get
180  * a snapshot of the head and tail of the list and prevent modifications
181  * to the links between states, we can safely traverse between the
182  * head and tail entries. subsequent insertions can add entries after
183  * our view of the tail, but we don't look past our view.
184  *
185  * if both locks must be taken, the rwlock protecting the links between
186  * states is taken before the mutex protecting the head and tail
187  * pointer.
188  *
189  * insertion into the list follows this pattern:
190  *
191  *	// serialise list head/tail modifications
192  *	mtx_enter(&pf_state_list.pfs_mtx);
193  *	TAILQ_INSERT_TAIL(&pf_state_list.pfs_list, state, entry_list);
194  *	mtx_leave(&pf_state_list.pfs_mtx);
195  *
196  * traversal of the list:
197  *
198  *	// lock against the gc removing an item from the list
199  *	rw_enter_read(&pf_state_list.pfs_rwl);
200  *
201  *	// get a snapshot view of the ends of the list
202  *	mtx_enter(&pf_state_list.pfs_mtx);
203  *	head = TAILQ_FIRST(&pf_state_list.pfs_list);
204  *	tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
205  *	mtx_leave(&pf_state_list.pfs_mtx);
206  *
207  *	state = NULL;
208  *	next = head;
209  *
210  *	while (state != tail) {
211  *		state = next;
212  *		next = TAILQ_NEXT(state, entry_list);
213  *
214  *		// look at the state
215  *	}
216  *
217  *	rw_exit_read(&pf_state_list.pfs_rwl);
218  *
219  * removing an item from the list:
220  *
221  *	// wait for iterators (readers) to get out
222  *	rw_enter_write(&pf_state_list.pfs_rwl);
223  *
224  *	// serialise list head/tail modifications
225  *	mtx_enter(&pf_state_list.pfs_mtx);
226  *	TAILQ_REMOVE(&pf_state_list.pfs_list, state, entry_list);
227  *	mtx_leave(&pf_state_list.pfs_mtx);
228  *
229  *	rw_exit_write(&pf_state_list.pfs_rwl);
230  *
231  * the lock ordering for pf_state_list locks and the rest of the pf
232  * locks are:
233  *
234  * 1. KERNEL_LOCK
235  * 2. NET_LOCK
236  * 3. pf_state_list.pfs_rwl
237  * 4. PF_LOCK
238  * 5. PF_STATE_LOCK
239  * 6. pf_state_list.pfs_mtx
240  */
241 
242 struct pf_state_list {
243 	/* the list of states in the system */
244 	struct pf_state_queue		pfs_list;
245 
246 	/* serialise pfs_list head/tail access */
247 	struct mutex			pfs_mtx;
248 
249 	/* serialise access to pointers between pfs_list entries */
250 	struct rwlock			pfs_rwl;
251 };
252 
253 #define PF_STATE_LIST_INITIALIZER(_pfs) {				\
254 	.pfs_list	= TAILQ_HEAD_INITIALIZER(_pfs.pfs_list),	\
255 	.pfs_mtx	= MUTEX_INITIALIZER(IPL_SOFTNET),		\
256 	.pfs_rwl	= RWLOCK_INITIALIZER("pfstates"),		\
257 }
258 
259 extern struct rwlock pf_lock;
260 
261 struct pf_pdesc {
262 	struct {
263 		int	 done;
264 		uid_t	 uid;
265 		gid_t	 gid;
266 		pid_t	 pid;
267 	}		 lookup;
268 	u_int64_t	 tot_len;	/* Make Mickey money */
269 
270 	struct pf_addr	 nsaddr;	/* src address after NAT */
271 	struct pf_addr	 ndaddr;	/* dst address after NAT */
272 
273 	struct pfi_kif	*kif;		/* incoming interface */
274 	struct mbuf	*m;		/* mbuf containing the packet */
275 	struct pf_addr	*src;		/* src address */
276 	struct pf_addr	*dst;		/* dst address */
277 	u_int16_t	*pcksum;	/* proto cksum */
278 	u_int16_t	*sport;
279 	u_int16_t	*dport;
280 	u_int16_t	 osport;
281 	u_int16_t	 odport;
282 	u_int16_t	 hash;
283 	u_int16_t	 nsport;	/* src port after NAT */
284 	u_int16_t	 ndport;	/* dst port after NAT */
285 
286 	u_int32_t	 off;		/* protocol header offset */
287 	u_int32_t	 hdrlen;	/* protocol header length */
288 	u_int32_t	 p_len;		/* length of protocol payload */
289 	u_int32_t	 extoff;	/* extension header offset */
290 	u_int32_t	 fragoff;	/* fragment header offset */
291 	u_int32_t	 jumbolen;	/* length from v6 jumbo header */
292 	u_int32_t	 badopts;	/* v4 options or v6 routing headers */
293 #define PF_OPT_OTHER		0x0001
294 #define PF_OPT_JUMBO		0x0002
295 #define PF_OPT_ROUTER_ALERT	0x0004
296 
297 	u_int16_t	 rdomain;	/* original routing domain */
298 	u_int16_t	 virtual_proto;
299 #define PF_VPROTO_FRAGMENT	256
300 	sa_family_t	 af;
301 	sa_family_t	 naf;
302 	u_int8_t	 proto;
303 	u_int8_t	 tos;
304 	u_int8_t	 ttl;
305 	u_int8_t	 dir;		/* direction */
306 	u_int8_t	 sidx;		/* key index for source */
307 	u_int8_t	 didx;		/* key index for destination */
308 	u_int8_t	 destchg;	/* flag set when destination changed */
309 	u_int8_t	 pflog;		/* flags for packet logging */
310 	union {
311 		struct tcphdr			tcp;
312 		struct udphdr			udp;
313 		struct icmp			icmp;
314 #ifdef INET6
315 		struct icmp6_hdr		icmp6;
316 		struct mld_hdr			mld;
317 		struct nd_neighbor_solicit	nd_ns;
318 #endif /* INET6 */
319 	} hdr;
320 };
321 
322 struct pf_anchor_stackframe {
323 	struct pf_ruleset	*sf_rs;
324 	struct pf_rule		*sf_anchor;
325 	union {
326 		struct pf_rule			*u_r;
327 		struct pf_anchor_stackframe	*u_stack_top;
328 	} u;
329 	struct pf_anchor	*sf_child;
330 	int			 sf_jump_target;
331 };
332 #define sf_r		u.u_r
333 #define sf_stack_top	u.u_stack_top
334 enum {
335 	PF_NEXT_RULE,
336 	PF_NEXT_CHILD
337 };
338 
339 extern struct cpumem *pf_anchor_stack;
340 
341 enum pf_trans_type {
342 	PF_TRANS_NONE,
343 	PF_TRANS_GETRULE,
344 	PF_TRANS_MAX
345 };
346 
347 struct pf_trans {
348 	LIST_ENTRY(pf_trans)	pft_entry;
349 	uint32_t		pft_unit;		/* process id */
350 	uint64_t		pft_ticket;
351 	enum pf_trans_type	pft_type;
352 	union {
353 		struct {
354 			u_int32_t		 gr_version;
355 			struct pf_anchor	*gr_anchor;
356 			struct pf_rule		*gr_rule;
357 		} u_getrule;
358 	} u;
359 };
360 
361 #define pftgr_version	u.u_getrule.gr_version
362 #define pftgr_anchor	u.u_getrule.gr_anchor
363 #define pftgr_rule	u.u_getrule.gr_rule
364 
365 extern struct timeout	pf_purge_states_to;
366 extern struct task	pf_purge_task;
367 extern struct timeout	pf_purge_to;
368 
369 struct pf_state		*pf_state_ref(struct pf_state *);
370 void			 pf_state_unref(struct pf_state *);
371 
372 extern struct rwlock	pf_lock;
373 extern struct rwlock	pf_state_lock;
374 extern struct mutex	pf_frag_mtx;
375 extern struct mutex	pf_inp_mtx;
376 
377 #define PF_LOCK()		do {			\
378 		rw_enter_write(&pf_lock);		\
379 	} while (0)
380 
381 #define PF_UNLOCK()		do {			\
382 		PF_ASSERT_LOCKED();			\
383 		rw_exit_write(&pf_lock);		\
384 	} while (0)
385 
386 #define PF_ASSERT_LOCKED()	do {			\
387 		if (rw_status(&pf_lock) != RW_WRITE)	\
388 			splassert_fail(RW_WRITE,	\
389 			    rw_status(&pf_lock),__func__);\
390 	} while (0)
391 
392 #define PF_ASSERT_UNLOCKED()	do {			\
393 		if (rw_status(&pf_lock) == RW_WRITE)	\
394 			splassert_fail(0, rw_status(&pf_lock), __func__);\
395 	} while (0)
396 
397 #define PF_STATE_ENTER_READ()	do {			\
398 		rw_enter_read(&pf_state_lock);		\
399 	} while (0)
400 
401 #define PF_STATE_EXIT_READ()	do {			\
402 		rw_exit_read(&pf_state_lock);		\
403 	} while (0)
404 
405 #define PF_STATE_ENTER_WRITE()	do {			\
406 		rw_enter_write(&pf_state_lock);		\
407 	} while (0)
408 
409 #define PF_STATE_EXIT_WRITE()	do {			\
410 		PF_STATE_ASSERT_LOCKED();		\
411 		rw_exit_write(&pf_state_lock);		\
412 	} while (0)
413 
414 #define PF_STATE_ASSERT_LOCKED()	do {		\
415 		if (rw_status(&pf_state_lock) != RW_WRITE)\
416 			splassert_fail(RW_WRITE,	\
417 			    rw_status(&pf_state_lock), __func__);\
418 	} while (0)
419 
420 #define PF_FRAG_LOCK()		mtx_enter(&pf_frag_mtx)
421 #define PF_FRAG_UNLOCK()	mtx_leave(&pf_frag_mtx)
422 
423 /* for copies to/from network byte order */
424 void			pf_state_peer_hton(const struct pf_state_peer *,
425 			    struct pfsync_state_peer *);
426 void			pf_state_peer_ntoh(const struct pfsync_state_peer *,
427 			    struct pf_state_peer *);
428 u_int16_t		pf_pkt_hash(sa_family_t, uint8_t,
429 			    const struct pf_addr *, const struct pf_addr *,
430 			    uint16_t, uint16_t);
431 
432 #endif /* _KERNEL */
433 
434 #endif /* _NET_PFVAR_PRIV_H_ */
435