1 /* $OpenBSD: pfvar_priv.h,v 1.38 2024/09/07 22:41:55 aisha Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * Copyright (c) 2016 Alexander Bluhm <bluhm@openbsd.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * - Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * - Redistributions in binary form must reproduce the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer in the documentation and/or other materials provided 18 * with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 30 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 */ 34 35 #ifndef _NET_PFVAR_PRIV_H_ 36 #define _NET_PFVAR_PRIV_H_ 37 38 #ifdef _KERNEL 39 40 #include <sys/rwlock.h> 41 #include <sys/mutex.h> 42 #include <sys/percpu.h> 43 44 /* 45 * Locks used to protect struct members in this file: 46 * L pf_inp_mtx link pf to inp mutex 47 */ 48 49 struct pfsync_deferral; 50 51 /* 52 * pf state items - links from pf_state_key to pf_states 53 */ 54 55 struct pf_state_item { 56 TAILQ_ENTRY(pf_state_item) 57 si_entry; 58 struct pf_state *si_st; 59 }; 60 61 TAILQ_HEAD(pf_statelisthead, pf_state_item); 62 63 /* 64 * pf state keys - look up states by address 65 */ 66 67 struct pf_state_key { 68 struct pf_addr addr[2]; 69 u_int16_t port[2]; 70 u_int16_t rdomain; 71 u_int16_t hash; 72 sa_family_t af; 73 u_int8_t proto; 74 75 RBT_ENTRY(pf_state_key) sk_entry; 76 struct pf_statelisthead sk_states; 77 struct pf_state_key *sk_reverse; 78 struct inpcb *sk_inp; /* [L] */ 79 pf_refcnt_t sk_refcnt; 80 u_int8_t sk_removed; 81 }; 82 83 RBT_HEAD(pf_state_tree, pf_state_key); 84 RBT_PROTOTYPE(pf_state_tree, pf_state_key, sk_entry, pf_state_compare_key); 85 86 #define PF_REVERSED_KEY(key, family) \ 87 ((key[PF_SK_WIRE]->af != key[PF_SK_STACK]->af) && \ 88 (key[PF_SK_WIRE]->af != (family))) 89 90 /* 91 * pf state 92 * 93 * Protection/ownership of pf_state members: 94 * I immutable after pf_state_insert() 95 * M pf_state mtx 96 * P PF_STATE_LOCK 97 * S pfsync 98 * L pf_state_list 99 * g pf_purge gc 100 */ 101 102 struct pf_state { 103 u_int64_t id; /* [I] */ 104 u_int32_t creatorid; /* [I] */ 105 u_int8_t direction; /* [I] */ 106 u_int8_t pad[3]; 107 108 TAILQ_ENTRY(pf_state) sync_list; /* [S] */ 109 struct pfsync_deferral *sync_defer; /* [S] */ 110 TAILQ_ENTRY(pf_state) entry_list; /* [L] */ 111 SLIST_ENTRY(pf_state) gc_list; /* [g] */ 112 RBT_ENTRY(pf_state) entry_id; /* [P] */ 113 struct pf_state_peer src; 114 struct pf_state_peer dst; 115 struct pf_rule_slist match_rules; /* [I] */ 116 union pf_rule_ptr rule; /* [I] */ 117 union pf_rule_ptr anchor; /* [I] */ 118 union pf_rule_ptr natrule; /* [I] */ 119 struct pf_addr rt_addr; /* [I] */ 120 struct pf_sn_head src_nodes; /* [I] */ 121 struct pf_state_key *key[2]; /* [I] stack and wire */ 122 struct pfi_kif *kif; /* [I] */ 123 struct mutex mtx; 124 pf_refcnt_t refcnt; 125 u_int64_t packets[2]; 126 u_int64_t bytes[2]; 127 int32_t creation; /* [I] */ 128 int32_t expire; 129 int32_t pfsync_time; /* [S] */ 130 int rtableid[2]; /* [I] stack and wire */ 131 u_int16_t qid; /* [I] */ 132 u_int16_t pqid; /* [I] */ 133 u_int16_t tag; /* [I] */ 134 u_int16_t state_flags; /* [M] */ 135 u_int8_t log; /* [I] */ 136 u_int8_t timeout; 137 u_int8_t sync_state; /* [S] PFSYNC_S_x */ 138 u_int8_t sync_updates; /* [S] */ 139 u_int8_t min_ttl; /* [I] */ 140 u_int8_t set_tos; /* [I] */ 141 u_int8_t set_prio[2]; /* [I] */ 142 u_int16_t max_mss; /* [I] */ 143 u_int16_t if_index_in; /* [I] */ 144 u_int16_t if_index_out; /* [I] */ 145 u_int16_t delay; /* [I] */ 146 u_int8_t rt; /* [I] */ 147 }; 148 149 RBT_HEAD(pf_state_tree_id, pf_state); 150 RBT_PROTOTYPE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); 151 extern struct pf_state_tree_id tree_id; 152 153 /* 154 * states are linked into a global list to support the following 155 * functionality: 156 * 157 * - garbage collection 158 * - pfsync bulk send operations 159 * - bulk state fetches via the DIOCGETSTATES ioctl 160 * - bulk state clearing via the DIOCCLRSTATES ioctl 161 * 162 * states are inserted into the global pf_state_list once it has also 163 * been successfully added to the various trees that make up the state 164 * table. states are only removed from the pf_state_list by the garbage 165 * collection process. 166 * 167 * the pf_state_list head and tail pointers (ie, the pfs_list TAILQ_HEAD 168 * structure) and the pointers between the entries on the pf_state_list 169 * are locked separately. at a high level, this allows for insertion 170 * of new states into the pf_state_list while other contexts (eg, the 171 * ioctls) are traversing the state items in the list. for garbage 172 * collection to remove items from the pf_state_list, it has to exclude 173 * both modifications to the list head and tail pointers, and traversal 174 * of the links between the states. 175 * 176 * the head and tail pointers are protected by a mutex. the pointers 177 * between states are protected by an rwlock. 178 * 179 * because insertions are only made to the end of the list, if we get 180 * a snapshot of the head and tail of the list and prevent modifications 181 * to the links between states, we can safely traverse between the 182 * head and tail entries. subsequent insertions can add entries after 183 * our view of the tail, but we don't look past our view. 184 * 185 * if both locks must be taken, the rwlock protecting the links between 186 * states is taken before the mutex protecting the head and tail 187 * pointer. 188 * 189 * insertion into the list follows this pattern: 190 * 191 * // serialise list head/tail modifications 192 * mtx_enter(&pf_state_list.pfs_mtx); 193 * TAILQ_INSERT_TAIL(&pf_state_list.pfs_list, state, entry_list); 194 * mtx_leave(&pf_state_list.pfs_mtx); 195 * 196 * traversal of the list: 197 * 198 * // lock against the gc removing an item from the list 199 * rw_enter_read(&pf_state_list.pfs_rwl); 200 * 201 * // get a snapshot view of the ends of the list 202 * mtx_enter(&pf_state_list.pfs_mtx); 203 * head = TAILQ_FIRST(&pf_state_list.pfs_list); 204 * tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); 205 * mtx_leave(&pf_state_list.pfs_mtx); 206 * 207 * state = NULL; 208 * next = head; 209 * 210 * while (state != tail) { 211 * state = next; 212 * next = TAILQ_NEXT(state, entry_list); 213 * 214 * // look at the state 215 * } 216 * 217 * rw_exit_read(&pf_state_list.pfs_rwl); 218 * 219 * removing an item from the list: 220 * 221 * // wait for iterators (readers) to get out 222 * rw_enter_write(&pf_state_list.pfs_rwl); 223 * 224 * // serialise list head/tail modifications 225 * mtx_enter(&pf_state_list.pfs_mtx); 226 * TAILQ_REMOVE(&pf_state_list.pfs_list, state, entry_list); 227 * mtx_leave(&pf_state_list.pfs_mtx); 228 * 229 * rw_exit_write(&pf_state_list.pfs_rwl); 230 * 231 * the lock ordering for pf_state_list locks and the rest of the pf 232 * locks are: 233 * 234 * 1. KERNEL_LOCK 235 * 2. NET_LOCK 236 * 3. pf_state_list.pfs_rwl 237 * 4. PF_LOCK 238 * 5. PF_STATE_LOCK 239 * 6. pf_state_list.pfs_mtx 240 */ 241 242 struct pf_state_list { 243 /* the list of states in the system */ 244 struct pf_state_queue pfs_list; 245 246 /* serialise pfs_list head/tail access */ 247 struct mutex pfs_mtx; 248 249 /* serialise access to pointers between pfs_list entries */ 250 struct rwlock pfs_rwl; 251 }; 252 253 #define PF_STATE_LIST_INITIALIZER(_pfs) { \ 254 .pfs_list = TAILQ_HEAD_INITIALIZER(_pfs.pfs_list), \ 255 .pfs_mtx = MUTEX_INITIALIZER(IPL_SOFTNET), \ 256 .pfs_rwl = RWLOCK_INITIALIZER("pfstates"), \ 257 } 258 259 extern struct rwlock pf_lock; 260 261 struct pf_pdesc { 262 struct { 263 int done; 264 uid_t uid; 265 gid_t gid; 266 pid_t pid; 267 } lookup; 268 u_int64_t tot_len; /* Make Mickey money */ 269 270 struct pf_addr nsaddr; /* src address after NAT */ 271 struct pf_addr ndaddr; /* dst address after NAT */ 272 273 struct pfi_kif *kif; /* incoming interface */ 274 struct mbuf *m; /* mbuf containing the packet */ 275 struct pf_addr *src; /* src address */ 276 struct pf_addr *dst; /* dst address */ 277 u_int16_t *pcksum; /* proto cksum */ 278 u_int16_t *sport; 279 u_int16_t *dport; 280 u_int16_t osport; 281 u_int16_t odport; 282 u_int16_t hash; 283 u_int16_t nsport; /* src port after NAT */ 284 u_int16_t ndport; /* dst port after NAT */ 285 286 u_int32_t off; /* protocol header offset */ 287 u_int32_t hdrlen; /* protocol header length */ 288 u_int32_t p_len; /* length of protocol payload */ 289 u_int32_t extoff; /* extension header offset */ 290 u_int32_t fragoff; /* fragment header offset */ 291 u_int32_t jumbolen; /* length from v6 jumbo header */ 292 u_int32_t badopts; /* v4 options or v6 routing headers */ 293 #define PF_OPT_OTHER 0x0001 294 #define PF_OPT_JUMBO 0x0002 295 #define PF_OPT_ROUTER_ALERT 0x0004 296 297 u_int16_t rdomain; /* original routing domain */ 298 u_int16_t virtual_proto; 299 #define PF_VPROTO_FRAGMENT 256 300 sa_family_t af; 301 sa_family_t naf; 302 u_int8_t proto; 303 u_int8_t tos; 304 u_int8_t ttl; 305 u_int8_t dir; /* direction */ 306 u_int8_t sidx; /* key index for source */ 307 u_int8_t didx; /* key index for destination */ 308 u_int8_t destchg; /* flag set when destination changed */ 309 u_int8_t pflog; /* flags for packet logging */ 310 union { 311 struct tcphdr tcp; 312 struct udphdr udp; 313 struct icmp icmp; 314 #ifdef INET6 315 struct icmp6_hdr icmp6; 316 struct mld_hdr mld; 317 struct nd_neighbor_solicit nd_ns; 318 #endif /* INET6 */ 319 } hdr; 320 }; 321 322 struct pf_anchor_stackframe { 323 struct pf_ruleset *sf_rs; 324 struct pf_rule *sf_anchor; 325 union { 326 struct pf_rule *u_r; 327 struct pf_anchor_stackframe *u_stack_top; 328 } u; 329 struct pf_anchor *sf_child; 330 int sf_jump_target; 331 }; 332 #define sf_r u.u_r 333 #define sf_stack_top u.u_stack_top 334 enum { 335 PF_NEXT_RULE, 336 PF_NEXT_CHILD 337 }; 338 339 extern struct cpumem *pf_anchor_stack; 340 341 enum pf_trans_type { 342 PF_TRANS_NONE, 343 PF_TRANS_GETRULE, 344 PF_TRANS_MAX 345 }; 346 347 struct pf_trans { 348 LIST_ENTRY(pf_trans) pft_entry; 349 uint32_t pft_unit; /* process id */ 350 uint64_t pft_ticket; 351 enum pf_trans_type pft_type; 352 union { 353 struct { 354 u_int32_t gr_version; 355 struct pf_anchor *gr_anchor; 356 struct pf_rule *gr_rule; 357 } u_getrule; 358 } u; 359 }; 360 361 #define pftgr_version u.u_getrule.gr_version 362 #define pftgr_anchor u.u_getrule.gr_anchor 363 #define pftgr_rule u.u_getrule.gr_rule 364 365 extern struct timeout pf_purge_states_to; 366 extern struct task pf_purge_task; 367 extern struct timeout pf_purge_to; 368 369 struct pf_state *pf_state_ref(struct pf_state *); 370 void pf_state_unref(struct pf_state *); 371 372 extern struct rwlock pf_lock; 373 extern struct rwlock pf_state_lock; 374 extern struct mutex pf_frag_mtx; 375 extern struct mutex pf_inp_mtx; 376 377 #define PF_LOCK() do { \ 378 rw_enter_write(&pf_lock); \ 379 } while (0) 380 381 #define PF_UNLOCK() do { \ 382 PF_ASSERT_LOCKED(); \ 383 rw_exit_write(&pf_lock); \ 384 } while (0) 385 386 #define PF_ASSERT_LOCKED() do { \ 387 if (rw_status(&pf_lock) != RW_WRITE) \ 388 splassert_fail(RW_WRITE, \ 389 rw_status(&pf_lock),__func__);\ 390 } while (0) 391 392 #define PF_ASSERT_UNLOCKED() do { \ 393 if (rw_status(&pf_lock) == RW_WRITE) \ 394 splassert_fail(0, rw_status(&pf_lock), __func__);\ 395 } while (0) 396 397 #define PF_STATE_ENTER_READ() do { \ 398 rw_enter_read(&pf_state_lock); \ 399 } while (0) 400 401 #define PF_STATE_EXIT_READ() do { \ 402 rw_exit_read(&pf_state_lock); \ 403 } while (0) 404 405 #define PF_STATE_ENTER_WRITE() do { \ 406 rw_enter_write(&pf_state_lock); \ 407 } while (0) 408 409 #define PF_STATE_EXIT_WRITE() do { \ 410 PF_STATE_ASSERT_LOCKED(); \ 411 rw_exit_write(&pf_state_lock); \ 412 } while (0) 413 414 #define PF_STATE_ASSERT_LOCKED() do { \ 415 if (rw_status(&pf_state_lock) != RW_WRITE)\ 416 splassert_fail(RW_WRITE, \ 417 rw_status(&pf_state_lock), __func__);\ 418 } while (0) 419 420 #define PF_FRAG_LOCK() mtx_enter(&pf_frag_mtx) 421 #define PF_FRAG_UNLOCK() mtx_leave(&pf_frag_mtx) 422 423 /* for copies to/from network byte order */ 424 void pf_state_peer_hton(const struct pf_state_peer *, 425 struct pfsync_state_peer *); 426 void pf_state_peer_ntoh(const struct pfsync_state_peer *, 427 struct pf_state_peer *); 428 u_int16_t pf_pkt_hash(sa_family_t, uint8_t, 429 const struct pf_addr *, const struct pf_addr *, 430 uint16_t, uint16_t); 431 432 #endif /* _KERNEL */ 433 434 #endif /* _NET_PFVAR_PRIV_H_ */ 435