1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "opt_inet.h" 39 #include "opt_inet6.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/filio.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/kernel.h> 49 #include <sys/time.h> 50 #include <sys/sysctl.h> 51 #include <sys/endian.h> 52 #include <sys/proc.h> 53 #include <sys/kthread.h> 54 #include <sys/spinlock.h> 55 56 #include <sys/md5.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr2.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_seq.h> 71 #include <netinet/udp.h> 72 #include <netinet/ip_icmp.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/tcp_timer.h> 75 #include <netinet/tcp_var.h> 76 #include <netinet/udp_var.h> 77 #include <netinet/icmp_var.h> 78 #include <netinet/if_ether.h> 79 80 #include <net/pf/pfvar.h> 81 #include <net/pf/if_pflog.h> 82 83 #include <net/pf/if_pfsync.h> 84 85 #ifdef INET6 86 #include <netinet/ip6.h> 87 #include <netinet/icmp6.h> 88 #include <netinet6/nd6.h> 89 #include <netinet6/ip6_var.h> 90 #include <netinet6/in6_pcb.h> 91 #endif /* INET6 */ 92 93 #include <sys/in_cksum.h> 94 #include <sys/ucred.h> 95 #include <machine/limits.h> 96 #include <sys/msgport2.h> 97 #include <sys/spinlock2.h> 98 #include <net/netmsg2.h> 99 #include <net/toeplitz2.h> 100 101 extern int ip_optcopy(struct ip *, struct ip *); 102 extern int debug_pfugidhack; 103 104 /* 105 * pf_token - shared lock for cpu-localized operations, 106 * exclusive lock otherwise. 107 * 108 * pf_gtoken- exclusive lock used for initialization. 109 * 110 * pf_spin - only used to atomically fetch and increment stateid 111 * on 32-bit systems. 112 */ 113 struct lwkt_token pf_token = LWKT_TOKEN_INITIALIZER(pf_token); 114 struct lwkt_token pf_gtoken = LWKT_TOKEN_INITIALIZER(pf_gtoken); 115 #if __SIZEOF_LONG__ != 8 116 struct spinlock pf_spin = SPINLOCK_INITIALIZER(pf_spin, "pf_spin"); 117 #endif 118 119 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x 120 121 #define FAIL(code) { error = (code); goto done; } 122 123 /* 124 * Global variables 125 */ 126 127 /* mask radix tree */ 128 struct radix_node_head *pf_maskhead; 129 130 /* state tables */ 131 struct pf_state_tree *pf_statetbl; /* incls one global table */ 132 struct pf_state **purge_cur; 133 struct pf_altqqueue pf_altqs[2]; 134 struct pf_palist pf_pabuf; 135 struct pf_altqqueue *pf_altqs_active; 136 struct pf_altqqueue *pf_altqs_inactive; 137 struct pf_status pf_status; 138 139 u_int32_t ticket_altqs_active; 140 u_int32_t ticket_altqs_inactive; 141 int altqs_inactive_open; 142 u_int32_t ticket_pabuf; 143 144 MD5_CTX pf_tcp_secret_ctx; 145 u_char pf_tcp_secret[16]; 146 int pf_tcp_secret_init; 147 int pf_tcp_iss_off; 148 149 struct pf_anchor_stackframe { 150 struct pf_ruleset *rs; 151 struct pf_rule *r; 152 struct pf_anchor_node *parent; 153 struct pf_anchor *child; 154 } pf_anchor_stack[64]; 155 156 struct malloc_type *pf_src_tree_pl, *pf_rule_pl, *pf_pooladdr_pl; 157 struct malloc_type *pf_state_pl, *pf_state_key_pl, *pf_state_item_pl; 158 struct malloc_type *pf_altq_pl; 159 160 void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); 161 162 void pf_init_threshold(struct pf_threshold *, u_int32_t, 163 u_int32_t); 164 void pf_add_threshold(struct pf_threshold *); 165 int pf_check_threshold(struct pf_threshold *); 166 167 void pf_change_ap(struct pf_addr *, u_int16_t *, 168 u_int16_t *, u_int16_t *, struct pf_addr *, 169 u_int16_t, u_int8_t, sa_family_t); 170 int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, 171 struct tcphdr *, struct pf_state_peer *); 172 #ifdef INET6 173 void pf_change_a6(struct pf_addr *, u_int16_t *, 174 struct pf_addr *, u_int8_t); 175 #endif /* INET6 */ 176 void pf_change_icmp(struct pf_addr *, u_int16_t *, 177 struct pf_addr *, struct pf_addr *, u_int16_t, 178 u_int16_t *, u_int16_t *, u_int16_t *, 179 u_int16_t *, u_int8_t, sa_family_t); 180 void pf_send_tcp(const struct pf_rule *, sa_family_t, 181 const struct pf_addr *, const struct pf_addr *, 182 u_int16_t, u_int16_t, u_int32_t, u_int32_t, 183 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, 184 u_int16_t, struct ether_header *, struct ifnet *); 185 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, 186 sa_family_t, struct pf_rule *); 187 struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, 188 int, int, struct pfi_kif *, 189 struct pf_addr *, u_int16_t, struct pf_addr *, 190 u_int16_t, int); 191 struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, 192 int, int, struct pfi_kif *, struct pf_src_node **, 193 struct pf_state_key **, struct pf_state_key **, 194 struct pf_state_key **, struct pf_state_key **, 195 struct pf_addr *, struct pf_addr *, 196 u_int16_t, u_int16_t); 197 void pf_detach_state(struct pf_state *); 198 int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *, 199 struct pf_state_key **, struct pf_state_key **, 200 struct pf_state_key **, struct pf_state_key **, 201 struct pf_addr *, struct pf_addr *, 202 u_int16_t, u_int16_t); 203 void pf_state_key_detach(struct pf_state *, int); 204 u_int32_t pf_tcp_iss(struct pf_pdesc *); 205 int pf_test_rule(struct pf_rule **, struct pf_state **, 206 int, struct pfi_kif *, struct mbuf *, int, 207 void *, struct pf_pdesc *, struct pf_rule **, 208 struct pf_ruleset **, struct ifqueue *, struct inpcb *); 209 static __inline int pf_create_state(struct pf_rule *, struct pf_rule *, 210 struct pf_rule *, struct pf_pdesc *, 211 struct pf_src_node *, struct pf_state_key *, 212 struct pf_state_key *, struct pf_state_key *, 213 struct pf_state_key *, struct mbuf *, int, 214 u_int16_t, u_int16_t, int *, struct pfi_kif *, 215 struct pf_state **, int, u_int16_t, u_int16_t, 216 int); 217 int pf_test_fragment(struct pf_rule **, int, 218 struct pfi_kif *, struct mbuf *, void *, 219 struct pf_pdesc *, struct pf_rule **, 220 struct pf_ruleset **); 221 int pf_tcp_track_full(struct pf_state_peer *, 222 struct pf_state_peer *, struct pf_state **, 223 struct pfi_kif *, struct mbuf *, int, 224 struct pf_pdesc *, u_short *, int *); 225 int pf_tcp_track_sloppy(struct pf_state_peer *, 226 struct pf_state_peer *, struct pf_state **, 227 struct pf_pdesc *, u_short *); 228 int pf_test_state_tcp(struct pf_state **, int, 229 struct pfi_kif *, struct mbuf *, int, 230 void *, struct pf_pdesc *, u_short *); 231 int pf_test_state_udp(struct pf_state **, int, 232 struct pfi_kif *, struct mbuf *, int, 233 void *, struct pf_pdesc *); 234 int pf_test_state_icmp(struct pf_state **, int, 235 struct pfi_kif *, struct mbuf *, int, 236 void *, struct pf_pdesc *, u_short *); 237 int pf_test_state_other(struct pf_state **, int, 238 struct pfi_kif *, struct mbuf *, struct pf_pdesc *); 239 void pf_step_into_anchor(int *, struct pf_ruleset **, int, 240 struct pf_rule **, struct pf_rule **, int *); 241 int pf_step_out_of_anchor(int *, struct pf_ruleset **, 242 int, struct pf_rule **, struct pf_rule **, 243 int *); 244 void pf_hash(struct pf_addr *, struct pf_addr *, 245 struct pf_poolhashkey *, sa_family_t); 246 int pf_map_addr(u_int8_t, struct pf_rule *, 247 struct pf_addr *, struct pf_addr *, 248 struct pf_addr *, struct pf_src_node **); 249 int pf_get_sport(struct pf_pdesc *, 250 sa_family_t, u_int8_t, struct pf_rule *, 251 struct pf_addr *, struct pf_addr *, 252 u_int16_t, u_int16_t, 253 struct pf_addr *, u_int16_t *, 254 u_int16_t, u_int16_t, 255 struct pf_src_node **); 256 void pf_route(struct mbuf **, struct pf_rule *, int, 257 struct ifnet *, struct pf_state *, 258 struct pf_pdesc *); 259 void pf_route6(struct mbuf **, struct pf_rule *, int, 260 struct ifnet *, struct pf_state *, 261 struct pf_pdesc *); 262 u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, 263 sa_family_t); 264 u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, 265 sa_family_t); 266 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, 267 u_int16_t); 268 void pf_set_rt_ifp(struct pf_state *, 269 struct pf_addr *); 270 int pf_check_proto_cksum(struct mbuf *, int, int, 271 u_int8_t, sa_family_t); 272 struct pf_divert *pf_get_divert(struct mbuf *); 273 void pf_print_state_parts(struct pf_state *, 274 struct pf_state_key *, struct pf_state_key *); 275 int pf_addr_wrap_neq(struct pf_addr_wrap *, 276 struct pf_addr_wrap *); 277 struct pf_state *pf_find_state(struct pfi_kif *, 278 struct pf_state_key_cmp *, u_int, struct mbuf *); 279 int pf_src_connlimit(struct pf_state *); 280 int pf_check_congestion(struct ifqueue *); 281 282 extern int pf_end_threads; 283 284 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 285 { &pf_state_pl, PFSTATE_HIWAT }, 286 { &pf_src_tree_pl, PFSNODE_HIWAT }, 287 { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, 288 { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, 289 { &pfr_kentry_pl, PFR_KENTRY_HIWAT } 290 }; 291 292 /* 293 * If route-to and direction is out we match with no further processing 294 * (rt_kif must be assigned and not equal to the out interface) 295 * If reply-to and direction is in we match with no further processing 296 * (rt_kif must be assigned and not equal to the in interface) 297 */ 298 #define STATE_LOOKUP(i, k, d, s, m) \ 299 do { \ 300 s = pf_find_state(i, k, d, m); \ 301 if (s == NULL || (s)->timeout == PFTM_PURGE) \ 302 return (PF_DROP); \ 303 if (d == PF_OUT && \ 304 (((s)->rule.ptr->rt == PF_ROUTETO && \ 305 (s)->rule.ptr->direction == PF_OUT) || \ 306 ((s)->rule.ptr->rt == PF_REPLYTO && \ 307 (s)->rule.ptr->direction == PF_IN)) && \ 308 (s)->rt_kif != NULL && \ 309 (s)->rt_kif != i) \ 310 return (PF_PASS); \ 311 } while (0) 312 313 #define BOUND_IFACE(r, k) \ 314 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 315 316 #define STATE_INC_COUNTERS(s) \ 317 do { \ 318 atomic_add_int(&s->rule.ptr->states_cur, 1); \ 319 s->rule.ptr->states_tot++; \ 320 if (s->anchor.ptr != NULL) { \ 321 atomic_add_int(&s->anchor.ptr->states_cur, 1); \ 322 s->anchor.ptr->states_tot++; \ 323 } \ 324 if (s->nat_rule.ptr != NULL) { \ 325 atomic_add_int(&s->nat_rule.ptr->states_cur, 1); \ 326 s->nat_rule.ptr->states_tot++; \ 327 } \ 328 } while (0) 329 330 #define STATE_DEC_COUNTERS(s) \ 331 do { \ 332 if (s->nat_rule.ptr != NULL) \ 333 atomic_add_int(&s->nat_rule.ptr->states_cur, -1); \ 334 if (s->anchor.ptr != NULL) \ 335 atomic_add_int(&s->anchor.ptr->states_cur, -1); \ 336 atomic_add_int(&s->rule.ptr->states_cur, -1); \ 337 } while (0) 338 339 static MALLOC_DEFINE(M_PFSTATEPL, "pfstatepl", "pf state pool list"); 340 static MALLOC_DEFINE(M_PFSRCTREEPL, "pfsrctpl", "pf source tree pool list"); 341 static MALLOC_DEFINE(M_PFSTATEKEYPL, "pfstatekeypl", "pf state key pool list"); 342 static MALLOC_DEFINE(M_PFSTATEITEMPL, "pfstateitempl", "pf state item pool list"); 343 344 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 345 static __inline int pf_state_compare_key(struct pf_state_key *, 346 struct pf_state_key *); 347 static __inline int pf_state_compare_rkey(struct pf_state_key *, 348 struct pf_state_key *); 349 static __inline int pf_state_compare_id(struct pf_state *, 350 struct pf_state *); 351 352 struct pf_src_tree *tree_src_tracking; 353 struct pf_state_tree_id *tree_id; 354 struct pf_state_queue *state_list; 355 struct pf_counters *pf_counters; 356 357 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 358 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 359 RB_GENERATE(pf_state_rtree, pf_state_key, entry, pf_state_compare_rkey); 360 RB_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); 361 362 static __inline int 363 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 364 { 365 int diff; 366 367 if (a->rule.ptr > b->rule.ptr) 368 return (1); 369 if (a->rule.ptr < b->rule.ptr) 370 return (-1); 371 if ((diff = a->af - b->af) != 0) 372 return (diff); 373 switch (a->af) { 374 #ifdef INET 375 case AF_INET: 376 if (a->addr.addr32[0] > b->addr.addr32[0]) 377 return (1); 378 if (a->addr.addr32[0] < b->addr.addr32[0]) 379 return (-1); 380 break; 381 #endif /* INET */ 382 #ifdef INET6 383 case AF_INET6: 384 if (a->addr.addr32[3] > b->addr.addr32[3]) 385 return (1); 386 if (a->addr.addr32[3] < b->addr.addr32[3]) 387 return (-1); 388 if (a->addr.addr32[2] > b->addr.addr32[2]) 389 return (1); 390 if (a->addr.addr32[2] < b->addr.addr32[2]) 391 return (-1); 392 if (a->addr.addr32[1] > b->addr.addr32[1]) 393 return (1); 394 if (a->addr.addr32[1] < b->addr.addr32[1]) 395 return (-1); 396 if (a->addr.addr32[0] > b->addr.addr32[0]) 397 return (1); 398 if (a->addr.addr32[0] < b->addr.addr32[0]) 399 return (-1); 400 break; 401 #endif /* INET6 */ 402 } 403 return (0); 404 } 405 406 u_int32_t 407 pf_state_hash(struct pf_state_key *sk) 408 { 409 u_int32_t hv = (u_int32_t)(((intptr_t)sk >> 6) ^ ((intptr_t)sk >> 15)); 410 if (hv == 0) /* disallow 0 */ 411 hv = 1; 412 return(hv); 413 } 414 415 #ifdef INET6 416 void 417 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 418 { 419 switch (af) { 420 #ifdef INET 421 case AF_INET: 422 dst->addr32[0] = src->addr32[0]; 423 break; 424 #endif /* INET */ 425 case AF_INET6: 426 dst->addr32[0] = src->addr32[0]; 427 dst->addr32[1] = src->addr32[1]; 428 dst->addr32[2] = src->addr32[2]; 429 dst->addr32[3] = src->addr32[3]; 430 break; 431 } 432 } 433 #endif /* INET6 */ 434 435 void 436 pf_init_threshold(struct pf_threshold *threshold, 437 u_int32_t limit, u_int32_t seconds) 438 { 439 threshold->limit = limit * PF_THRESHOLD_MULT; 440 threshold->seconds = seconds; 441 threshold->count = 0; 442 threshold->last = time_second; 443 } 444 445 void 446 pf_add_threshold(struct pf_threshold *threshold) 447 { 448 u_int32_t t = time_second, diff = t - threshold->last; 449 450 if (diff >= threshold->seconds) 451 threshold->count = 0; 452 else 453 threshold->count -= threshold->count * diff / 454 threshold->seconds; 455 threshold->count += PF_THRESHOLD_MULT; 456 threshold->last = t; 457 } 458 459 int 460 pf_check_threshold(struct pf_threshold *threshold) 461 { 462 return (threshold->count > threshold->limit); 463 } 464 465 int 466 pf_src_connlimit(struct pf_state *state) 467 { 468 int bad = 0; 469 int cpu = mycpu->gd_cpuid; 470 471 atomic_add_int(&state->src_node->conn, 1); 472 state->src.tcp_est = 1; 473 pf_add_threshold(&state->src_node->conn_rate); 474 475 if (state->rule.ptr->max_src_conn && 476 state->rule.ptr->max_src_conn < 477 state->src_node->conn) { 478 PF_INC_LCOUNTER(LCNT_SRCCONN); 479 bad++; 480 } 481 482 if (state->rule.ptr->max_src_conn_rate.limit && 483 pf_check_threshold(&state->src_node->conn_rate)) { 484 PF_INC_LCOUNTER(LCNT_SRCCONNRATE); 485 bad++; 486 } 487 488 if (!bad) 489 return 0; 490 491 if (state->rule.ptr->overload_tbl) { 492 struct pfr_addr p; 493 u_int32_t killed = 0; 494 495 PF_INC_LCOUNTER(LCNT_OVERLOAD_TABLE); 496 if (pf_status.debug >= PF_DEBUG_MISC) { 497 kprintf("pf_src_connlimit: blocking address "); 498 pf_print_host(&state->src_node->addr, 0, 499 state->key[PF_SK_WIRE]->af); 500 } 501 502 bzero(&p, sizeof(p)); 503 p.pfra_af = state->key[PF_SK_WIRE]->af; 504 switch (state->key[PF_SK_WIRE]->af) { 505 #ifdef INET 506 case AF_INET: 507 p.pfra_net = 32; 508 p.pfra_ip4addr = state->src_node->addr.v4; 509 break; 510 #endif /* INET */ 511 #ifdef INET6 512 case AF_INET6: 513 p.pfra_net = 128; 514 p.pfra_ip6addr = state->src_node->addr.v6; 515 break; 516 #endif /* INET6 */ 517 } 518 519 pfr_insert_kentry(state->rule.ptr->overload_tbl, 520 &p, time_second); 521 522 /* kill existing states if that's required. */ 523 if (state->rule.ptr->flush) { 524 struct pf_state_key *sk; 525 struct pf_state *st; 526 527 PF_INC_LCOUNTER(LCNT_OVERLOAD_FLUSH); 528 RB_FOREACH(st, pf_state_tree_id, &tree_id[cpu]) { 529 sk = st->key[PF_SK_WIRE]; 530 /* 531 * Kill states from this source. (Only those 532 * from the same rule if PF_FLUSH_GLOBAL is not 533 * set). (Only on current cpu). 534 */ 535 if (sk->af == 536 state->key[PF_SK_WIRE]->af && 537 ((state->direction == PF_OUT && 538 PF_AEQ(&state->src_node->addr, 539 &sk->addr[0], sk->af)) || 540 (state->direction == PF_IN && 541 PF_AEQ(&state->src_node->addr, 542 &sk->addr[1], sk->af))) && 543 (state->rule.ptr->flush & 544 PF_FLUSH_GLOBAL || 545 state->rule.ptr == st->rule.ptr)) { 546 st->timeout = PFTM_PURGE; 547 st->src.state = st->dst.state = 548 TCPS_CLOSED; 549 killed++; 550 } 551 } 552 if (pf_status.debug >= PF_DEBUG_MISC) 553 kprintf(", %u states killed", killed); 554 } 555 if (pf_status.debug >= PF_DEBUG_MISC) 556 kprintf("\n"); 557 } 558 559 /* kill this state */ 560 state->timeout = PFTM_PURGE; 561 state->src.state = state->dst.state = TCPS_CLOSED; 562 563 return 1; 564 } 565 566 int 567 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 568 struct pf_addr *src, sa_family_t af) 569 { 570 struct pf_src_node k; 571 int cpu = mycpu->gd_cpuid; 572 573 bzero(&k, sizeof(k)); /* avoid gcc warnings */ 574 if (*sn == NULL) { 575 k.af = af; 576 PF_ACPY(&k.addr, src, af); 577 if (rule->rule_flag & PFRULE_RULESRCTRACK || 578 rule->rpool.opts & PF_POOL_STICKYADDR) 579 k.rule.ptr = rule; 580 else 581 k.rule.ptr = NULL; 582 PF_INC_SCOUNTER(SCNT_SRC_NODE_SEARCH); 583 *sn = RB_FIND(pf_src_tree, &tree_src_tracking[cpu], &k); 584 } 585 if (*sn == NULL) { 586 if (!rule->max_src_nodes || 587 rule->src_nodes < rule->max_src_nodes) 588 (*sn) = kmalloc(sizeof(struct pf_src_node), 589 M_PFSRCTREEPL, M_NOWAIT|M_ZERO); 590 else 591 PF_INC_LCOUNTER(LCNT_SRCNODES); 592 if ((*sn) == NULL) 593 return (-1); 594 595 pf_init_threshold(&(*sn)->conn_rate, 596 rule->max_src_conn_rate.limit, 597 rule->max_src_conn_rate.seconds); 598 599 (*sn)->af = af; 600 if (rule->rule_flag & PFRULE_RULESRCTRACK || 601 rule->rpool.opts & PF_POOL_STICKYADDR) 602 (*sn)->rule.ptr = rule; 603 else 604 (*sn)->rule.ptr = NULL; 605 PF_ACPY(&(*sn)->addr, src, af); 606 if (RB_INSERT(pf_src_tree, 607 &tree_src_tracking[cpu], *sn) != NULL) { 608 if (pf_status.debug >= PF_DEBUG_MISC) { 609 kprintf("pf: src_tree insert failed: "); 610 pf_print_host(&(*sn)->addr, 0, af); 611 kprintf("\n"); 612 } 613 kfree(*sn, M_PFSRCTREEPL); 614 return (-1); 615 } 616 617 /* 618 * Atomic op required to increment src_nodes in the rule 619 * because we hold a shared token here (decrements will use 620 * an exclusive token). 621 */ 622 (*sn)->creation = time_second; 623 (*sn)->ruletype = rule->action; 624 if ((*sn)->rule.ptr != NULL) 625 atomic_add_int(&(*sn)->rule.ptr->src_nodes, 1); 626 PF_INC_SCOUNTER(SCNT_SRC_NODE_INSERT); 627 atomic_add_int(&pf_status.src_nodes, 1); 628 } else { 629 if (rule->max_src_states && 630 (*sn)->states >= rule->max_src_states) { 631 PF_INC_LCOUNTER(LCNT_SRCSTATES); 632 return (-1); 633 } 634 } 635 return (0); 636 } 637 638 /* 639 * state table (indexed by the pf_state_key structure), normal RBTREE 640 * comparison. 641 */ 642 static __inline int 643 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 644 { 645 int diff; 646 647 if ((diff = a->proto - b->proto) != 0) 648 return (diff); 649 if ((diff = a->af - b->af) != 0) 650 return (diff); 651 switch (a->af) { 652 #ifdef INET 653 case AF_INET: 654 if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) 655 return (1); 656 if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) 657 return (-1); 658 if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) 659 return (1); 660 if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) 661 return (-1); 662 break; 663 #endif /* INET */ 664 #ifdef INET6 665 case AF_INET6: 666 if (a->addr[0].addr32[3] > b->addr[0].addr32[3]) 667 return (1); 668 if (a->addr[0].addr32[3] < b->addr[0].addr32[3]) 669 return (-1); 670 if (a->addr[1].addr32[3] > b->addr[1].addr32[3]) 671 return (1); 672 if (a->addr[1].addr32[3] < b->addr[1].addr32[3]) 673 return (-1); 674 if (a->addr[0].addr32[2] > b->addr[0].addr32[2]) 675 return (1); 676 if (a->addr[0].addr32[2] < b->addr[0].addr32[2]) 677 return (-1); 678 if (a->addr[1].addr32[2] > b->addr[1].addr32[2]) 679 return (1); 680 if (a->addr[1].addr32[2] < b->addr[1].addr32[2]) 681 return (-1); 682 if (a->addr[0].addr32[1] > b->addr[0].addr32[1]) 683 return (1); 684 if (a->addr[0].addr32[1] < b->addr[0].addr32[1]) 685 return (-1); 686 if (a->addr[1].addr32[1] > b->addr[1].addr32[1]) 687 return (1); 688 if (a->addr[1].addr32[1] < b->addr[1].addr32[1]) 689 return (-1); 690 if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) 691 return (1); 692 if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) 693 return (-1); 694 if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) 695 return (1); 696 if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) 697 return (-1); 698 break; 699 #endif /* INET6 */ 700 } 701 702 if ((diff = a->port[0] - b->port[0]) != 0) 703 return (diff); 704 if ((diff = a->port[1] - b->port[1]) != 0) 705 return (diff); 706 707 return (0); 708 } 709 710 /* 711 * Used for RB_FIND only, compare in the reverse direction. The 712 * element to be reversed is always (a), since we obviously can't 713 * reverse the state tree depicted by (b). 714 */ 715 static __inline int 716 pf_state_compare_rkey(struct pf_state_key *a, struct pf_state_key *b) 717 { 718 int diff; 719 720 if ((diff = a->proto - b->proto) != 0) 721 return (diff); 722 if ((diff = a->af - b->af) != 0) 723 return (diff); 724 switch (a->af) { 725 #ifdef INET 726 case AF_INET: 727 if (a->addr[1].addr32[0] > b->addr[0].addr32[0]) 728 return (1); 729 if (a->addr[1].addr32[0] < b->addr[0].addr32[0]) 730 return (-1); 731 if (a->addr[0].addr32[0] > b->addr[1].addr32[0]) 732 return (1); 733 if (a->addr[0].addr32[0] < b->addr[1].addr32[0]) 734 return (-1); 735 break; 736 #endif /* INET */ 737 #ifdef INET6 738 case AF_INET6: 739 if (a->addr[1].addr32[3] > b->addr[0].addr32[3]) 740 return (1); 741 if (a->addr[1].addr32[3] < b->addr[0].addr32[3]) 742 return (-1); 743 if (a->addr[0].addr32[3] > b->addr[1].addr32[3]) 744 return (1); 745 if (a->addr[0].addr32[3] < b->addr[1].addr32[3]) 746 return (-1); 747 if (a->addr[1].addr32[2] > b->addr[0].addr32[2]) 748 return (1); 749 if (a->addr[1].addr32[2] < b->addr[0].addr32[2]) 750 return (-1); 751 if (a->addr[0].addr32[2] > b->addr[1].addr32[2]) 752 return (1); 753 if (a->addr[0].addr32[2] < b->addr[1].addr32[2]) 754 return (-1); 755 if (a->addr[1].addr32[1] > b->addr[0].addr32[1]) 756 return (1); 757 if (a->addr[1].addr32[1] < b->addr[0].addr32[1]) 758 return (-1); 759 if (a->addr[0].addr32[1] > b->addr[1].addr32[1]) 760 return (1); 761 if (a->addr[0].addr32[1] < b->addr[1].addr32[1]) 762 return (-1); 763 if (a->addr[1].addr32[0] > b->addr[0].addr32[0]) 764 return (1); 765 if (a->addr[1].addr32[0] < b->addr[0].addr32[0]) 766 return (-1); 767 if (a->addr[0].addr32[0] > b->addr[1].addr32[0]) 768 return (1); 769 if (a->addr[0].addr32[0] < b->addr[1].addr32[0]) 770 return (-1); 771 break; 772 #endif /* INET6 */ 773 } 774 775 if ((diff = a->port[1] - b->port[0]) != 0) 776 return (diff); 777 if ((diff = a->port[0] - b->port[1]) != 0) 778 return (diff); 779 780 return (0); 781 } 782 783 static __inline int 784 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 785 { 786 if (a->id > b->id) 787 return (1); 788 if (a->id < b->id) 789 return (-1); 790 if (a->creatorid > b->creatorid) 791 return (1); 792 if (a->creatorid < b->creatorid) 793 return (-1); 794 795 return (0); 796 } 797 798 int 799 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 800 { 801 struct pf_state_item *si; 802 struct pf_state_key *cur; 803 int cpu; 804 int error; 805 806 /* 807 * PFSTATE_STACK_GLOBAL is set when the state might not hash to the 808 * current cpu. The keys are managed on the global statetbl tree 809 * for this case. Only translations (RDR, NAT) can cause this. 810 * 811 * When this flag is not set we must still check the global statetbl 812 * for a collision, and if we find one we set the HALF_DUPLEX flag 813 * in the state. 814 */ 815 if (s->state_flags & PFSTATE_STACK_GLOBAL) { 816 cpu = ncpus; 817 lockmgr(&pf_global_statetbl_lock, LK_EXCLUSIVE); 818 } else { 819 cpu = mycpu->gd_cpuid; 820 lockmgr(&pf_global_statetbl_lock, LK_SHARED); 821 } 822 KKASSERT(s->key[idx] == NULL); /* XXX handle this? */ 823 824 if (pf_status.debug >= PF_DEBUG_MISC) { 825 kprintf("state_key attach cpu %d (%08x:%d) %s (%08x:%d)\n", 826 cpu, 827 ntohl(sk->addr[0].addr32[0]), ntohs(sk->port[0]), 828 (idx == PF_SK_WIRE ? "->" : "<-"), 829 ntohl(sk->addr[1].addr32[0]), ntohs(sk->port[1])); 830 } 831 832 /* 833 * Check whether (e.g.) a PASS rule being put on a per-cpu tree 834 * collides with a translation rule on the global tree. This is 835 * NOT an error. We *WANT* to establish state for this case so the 836 * packet path is short-cutted and doesn't need to scan the ruleset 837 * on every packet. But the established state will only see one 838 * side of a two-way packet conversation. To prevent this from 839 * causing problems (e.g. generating a RST), we force PFSTATE_SLOPPY 840 * to be set on the established state. 841 * 842 * A collision against RDR state can only occur with a PASS IN in the 843 * opposite direction or a PASS OUT in the forwards direction. This 844 * is because RDRs are processed on the input side. 845 * 846 * A collision against NAT state can only occur with a PASS IN in the 847 * forwards direction or a PASS OUT in the opposite direction. This 848 * is because NATs are processed on the output side. 849 * 850 * In both situations we need to do a reverse addr/port test because 851 * the PASS IN or PASS OUT only establishes if it doesn't match the 852 * established RDR state in the forwards direction. The direction 853 * flag has to be ignored (it will be one way for a PASS IN and the 854 * other way for a PASS OUT). 855 * 856 * pf_global_statetbl_lock will be locked shared when testing and 857 * not entering into the global state table. 858 */ 859 if (cpu != ncpus && 860 (cur = RB_FIND(pf_state_rtree, 861 (struct pf_state_rtree *)&pf_statetbl[ncpus], 862 sk)) != NULL) { 863 TAILQ_FOREACH(si, &cur->states, entry) { 864 /* 865 * NOTE: We must ignore direction mismatches. 866 */ 867 if (si->s->kif == s->kif) { 868 s->state_flags |= PFSTATE_HALF_DUPLEX | 869 PFSTATE_SLOPPY; 870 if (pf_status.debug >= PF_DEBUG_MISC) { 871 kprintf( 872 "pf: %s key attach collision " 873 "on %s: ", 874 (idx == PF_SK_WIRE) ? 875 "wire" : "stack", 876 s->kif->pfik_name); 877 pf_print_state_parts(s, 878 (idx == PF_SK_WIRE) ? sk : NULL, 879 (idx == PF_SK_STACK) ? sk : NULL); 880 kprintf("\n"); 881 } 882 break; 883 } 884 } 885 } 886 887 /* 888 * Enter into either the per-cpu or the global state table. 889 * 890 * pf_global_statetbl_lock will be locked exclusively when entering 891 * into the global state table. 892 */ 893 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl[cpu], sk)) != NULL) { 894 /* key exists. check for same kif, if none, add to key */ 895 TAILQ_FOREACH(si, &cur->states, entry) { 896 if (si->s->kif == s->kif && 897 si->s->direction == s->direction) { 898 if (pf_status.debug >= PF_DEBUG_MISC) { 899 kprintf( 900 "pf: %s key attach failed on %s: ", 901 (idx == PF_SK_WIRE) ? 902 "wire" : "stack", 903 s->kif->pfik_name); 904 pf_print_state_parts(s, 905 (idx == PF_SK_WIRE) ? sk : NULL, 906 (idx == PF_SK_STACK) ? sk : NULL); 907 kprintf("\n"); 908 } 909 kfree(sk, M_PFSTATEKEYPL); 910 error = -1; 911 goto failed; /* collision! */ 912 } 913 } 914 kfree(sk, M_PFSTATEKEYPL); 915 916 s->key[idx] = cur; 917 } else { 918 s->key[idx] = sk; 919 } 920 921 if ((si = kmalloc(sizeof(struct pf_state_item), 922 M_PFSTATEITEMPL, M_NOWAIT)) == NULL) { 923 pf_state_key_detach(s, idx); 924 error = -1; 925 goto failed; /* collision! */ 926 } 927 si->s = s; 928 929 /* list is sorted, if-bound states before floating */ 930 if (s->kif == pfi_all) 931 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 932 else 933 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 934 935 error = 0; 936 failed: 937 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 938 return error; 939 } 940 941 /* 942 * NOTE: Can only be called indirectly via the purge thread with pf_token 943 * exclusively locked. 944 */ 945 void 946 pf_detach_state(struct pf_state *s) 947 { 948 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 949 s->key[PF_SK_WIRE] = NULL; 950 951 if (s->key[PF_SK_STACK] != NULL) 952 pf_state_key_detach(s, PF_SK_STACK); 953 954 if (s->key[PF_SK_WIRE] != NULL) 955 pf_state_key_detach(s, PF_SK_WIRE); 956 } 957 958 /* 959 * NOTE: Can only be called indirectly via the purge thread with pf_token 960 * exclusively locked. 961 */ 962 void 963 pf_state_key_detach(struct pf_state *s, int idx) 964 { 965 struct pf_state_item *si; 966 int cpu; 967 968 /* 969 * PFSTATE_STACK_GLOBAL is set for translations when the translated 970 * address/port is not localized to the same cpu that the untranslated 971 * address/port is on. The wire pf_state_key is managed on the global 972 * statetbl tree for this case. 973 */ 974 if (s->state_flags & PFSTATE_STACK_GLOBAL) { 975 cpu = ncpus; 976 lockmgr(&pf_global_statetbl_lock, LK_EXCLUSIVE); 977 } else { 978 cpu = mycpu->gd_cpuid; 979 } 980 981 si = TAILQ_FIRST(&s->key[idx]->states); 982 while (si && si->s != s) 983 si = TAILQ_NEXT(si, entry); 984 985 if (si) { 986 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 987 kfree(si, M_PFSTATEITEMPL); 988 } 989 990 if (TAILQ_EMPTY(&s->key[idx]->states)) { 991 RB_REMOVE(pf_state_tree, &pf_statetbl[cpu], s->key[idx]); 992 if (s->key[idx]->reverse) 993 s->key[idx]->reverse->reverse = NULL; 994 if (s->key[idx]->inp) 995 s->key[idx]->inp->inp_pf_sk = NULL; 996 kfree(s->key[idx], M_PFSTATEKEYPL); 997 } 998 s->key[idx] = NULL; 999 1000 if (s->state_flags & PFSTATE_STACK_GLOBAL) 1001 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 1002 } 1003 1004 struct pf_state_key * 1005 pf_alloc_state_key(int pool_flags) 1006 { 1007 struct pf_state_key *sk; 1008 1009 sk = kmalloc(sizeof(struct pf_state_key), M_PFSTATEKEYPL, pool_flags); 1010 if (sk) { 1011 TAILQ_INIT(&sk->states); 1012 } 1013 return (sk); 1014 } 1015 1016 int 1017 pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr, 1018 struct pf_state_key **skw, struct pf_state_key **sks, 1019 struct pf_state_key **skp, struct pf_state_key **nkp, 1020 struct pf_addr *saddr, struct pf_addr *daddr, 1021 u_int16_t sport, u_int16_t dport) 1022 { 1023 KKASSERT((*skp == NULL && *nkp == NULL)); 1024 1025 if ((*skp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL) 1026 return (ENOMEM); 1027 1028 PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af); 1029 PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af); 1030 (*skp)->port[pd->sidx] = sport; 1031 (*skp)->port[pd->didx] = dport; 1032 (*skp)->proto = pd->proto; 1033 (*skp)->af = pd->af; 1034 1035 if (nr != NULL) { 1036 if ((*nkp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL) 1037 return (ENOMEM); /* caller must handle cleanup */ 1038 1039 /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */ 1040 PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af); 1041 PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af); 1042 (*nkp)->port[0] = (*skp)->port[0]; 1043 (*nkp)->port[1] = (*skp)->port[1]; 1044 (*nkp)->proto = pd->proto; 1045 (*nkp)->af = pd->af; 1046 } else { 1047 *nkp = *skp; 1048 } 1049 1050 if (pd->dir == PF_IN) { 1051 *skw = *skp; 1052 *sks = *nkp; 1053 } else { 1054 *sks = *skp; 1055 *skw = *nkp; 1056 } 1057 return (0); 1058 } 1059 1060 /* 1061 * Insert pf_state with one or two state keys (allowing a reverse path lookup 1062 * which is used by NAT). In the NAT case skw is the initiator (?) and 1063 * sks is the target. 1064 */ 1065 int 1066 pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, 1067 struct pf_state_key *sks, struct pf_state *s) 1068 { 1069 int cpu = mycpu->gd_cpuid; 1070 1071 s->kif = kif; 1072 s->cpuid = cpu; 1073 1074 if (skw == sks) { 1075 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) 1076 return (-1); 1077 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 1078 } else { 1079 /* 1080 skw->reverse = sks; 1081 sks->reverse = skw; 1082 */ 1083 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) { 1084 kfree(sks, M_PFSTATEKEYPL); 1085 return (-1); 1086 } 1087 if (pf_state_key_attach(sks, s, PF_SK_STACK)) { 1088 pf_state_key_detach(s, PF_SK_WIRE); 1089 return (-1); 1090 } 1091 } 1092 1093 if (s->id == 0 && s->creatorid == 0) { 1094 u_int64_t sid; 1095 1096 #if __SIZEOF_LONG__ == 8 1097 sid = atomic_fetchadd_long(&pf_status.stateid, 1); 1098 #else 1099 spin_lock(&pf_spin); 1100 sid = pf_status.stateid++; 1101 spin_unlock(&pf_spin); 1102 #endif 1103 s->id = htobe64(sid); 1104 s->creatorid = pf_status.hostid; 1105 } 1106 1107 /* 1108 * Calculate hash code for altq 1109 */ 1110 s->hash = crc32(s->key[PF_SK_WIRE], PF_STATE_KEY_HASH_LENGTH); 1111 1112 if (RB_INSERT(pf_state_tree_id, &tree_id[cpu], s) != NULL) { 1113 if (pf_status.debug >= PF_DEBUG_MISC) { 1114 kprintf("pf: state insert failed: " 1115 "id: %016jx creatorid: %08x", 1116 (uintmax_t)be64toh(s->id), ntohl(s->creatorid)); 1117 if (s->sync_flags & PFSTATE_FROMSYNC) 1118 kprintf(" (from sync)"); 1119 kprintf("\n"); 1120 } 1121 pf_detach_state(s); 1122 return (-1); 1123 } 1124 TAILQ_INSERT_TAIL(&state_list[cpu], s, entry_list); 1125 PF_INC_FCOUNTER(FCNT_STATE_INSERT); 1126 atomic_add_int(&pf_status.states, 1); 1127 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 1128 pfsync_insert_state(s); 1129 return (0); 1130 } 1131 1132 struct pf_state * 1133 pf_find_state_byid(struct pf_state_cmp *key) 1134 { 1135 int cpu = mycpu->gd_cpuid; 1136 1137 PF_INC_FCOUNTER(FCNT_STATE_SEARCH); 1138 1139 return (RB_FIND(pf_state_tree_id, &tree_id[cpu], 1140 (struct pf_state *)key)); 1141 } 1142 1143 /* 1144 * WARNING! May return a state structure that was localized to another cpu, 1145 * destruction is typically protected by the callers pf_token. 1146 * The element can only be destroyed 1147 */ 1148 struct pf_state * 1149 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, 1150 struct mbuf *m) 1151 { 1152 struct pf_state_key *skey = (void *)key; 1153 struct pf_state_key *sk; 1154 struct pf_state_item *si; 1155 struct pf_state *s; 1156 int cpu = mycpu->gd_cpuid; 1157 int globalstl = 0; 1158 1159 PF_INC_FCOUNTER(FCNT_STATE_SEARCH); 1160 1161 if (dir == PF_OUT && m->m_pkthdr.pf.statekey && 1162 ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) { 1163 sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; 1164 } else { 1165 sk = RB_FIND(pf_state_tree, &pf_statetbl[cpu], skey); 1166 if (sk == NULL) { 1167 lockmgr(&pf_global_statetbl_lock, LK_SHARED); 1168 sk = RB_FIND(pf_state_tree, &pf_statetbl[ncpus], skey); 1169 if (sk == NULL) { 1170 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 1171 return (NULL); 1172 } 1173 globalstl = 1; 1174 } 1175 if (dir == PF_OUT && m->m_pkthdr.pf.statekey) { 1176 ((struct pf_state_key *) 1177 m->m_pkthdr.pf.statekey)->reverse = sk; 1178 sk->reverse = m->m_pkthdr.pf.statekey; 1179 } 1180 } 1181 if (dir == PF_OUT) 1182 m->m_pkthdr.pf.statekey = NULL; 1183 1184 /* list is sorted, if-bound states before floating ones */ 1185 TAILQ_FOREACH(si, &sk->states, entry) { 1186 if ((si->s->kif == pfi_all || si->s->kif == kif) && 1187 sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1188 si->s->key[PF_SK_STACK])) { 1189 break; 1190 } 1191 } 1192 1193 /* 1194 * Extract state before potentially releasing the global statetbl 1195 * lock. Ignore the state if the create is still in-progress as 1196 * it can be deleted out from under us by the owning localized cpu. 1197 * However, if CREATEINPROG is not set, state can only be deleted 1198 * by the purge thread which we are protected from via our shared 1199 * pf_token. 1200 */ 1201 if (si) { 1202 s = si->s; 1203 if (s && (s->state_flags & PFSTATE_CREATEINPROG)) 1204 s = NULL; 1205 } else { 1206 s = NULL; 1207 } 1208 if (globalstl) 1209 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 1210 return s; 1211 } 1212 1213 /* 1214 * WARNING! May return a state structure that was localized to another cpu, 1215 * destruction is typically protected by the callers pf_token. 1216 */ 1217 struct pf_state * 1218 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1219 { 1220 struct pf_state_key *skey = (void *)key; 1221 struct pf_state_key *sk; 1222 struct pf_state_item *si, *ret = NULL; 1223 struct pf_state *s; 1224 int cpu = mycpu->gd_cpuid; 1225 int globalstl = 0; 1226 1227 PF_INC_FCOUNTER(FCNT_STATE_SEARCH); 1228 1229 sk = RB_FIND(pf_state_tree, &pf_statetbl[cpu], skey); 1230 if (sk == NULL) { 1231 lockmgr(&pf_global_statetbl_lock, LK_SHARED); 1232 sk = RB_FIND(pf_state_tree, &pf_statetbl[ncpus], skey); 1233 globalstl = 1; 1234 } 1235 if (sk != NULL) { 1236 TAILQ_FOREACH(si, &sk->states, entry) 1237 if (dir == PF_INOUT || 1238 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1239 si->s->key[PF_SK_STACK]))) { 1240 if (more == NULL) { 1241 ret = si; 1242 break; 1243 } 1244 if (ret) 1245 (*more)++; 1246 else 1247 ret = si; 1248 } 1249 } 1250 1251 /* 1252 * Extract state before potentially releasing the global statetbl 1253 * lock. Ignore the state if the create is still in-progress as 1254 * it can be deleted out from under us by the owning localized cpu. 1255 * However, if CREATEINPROG is not set, state can only be deleted 1256 * by the purge thread which we are protected from via our shared 1257 * pf_token. 1258 */ 1259 if (ret) { 1260 s = ret->s; 1261 if (s && (s->state_flags & PFSTATE_CREATEINPROG)) 1262 s = NULL; 1263 } else { 1264 s = NULL; 1265 } 1266 if (globalstl) 1267 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 1268 return s; 1269 } 1270 1271 /* END state table stuff */ 1272 1273 void 1274 pf_purge_thread(void *v) 1275 { 1276 globaldata_t save_gd = mycpu; 1277 int nloops = 0; 1278 int locked = 0; 1279 int nn; 1280 int endingit; 1281 1282 for (;;) { 1283 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); 1284 1285 endingit = pf_end_threads; 1286 1287 for (nn = 0; nn < ncpus; ++nn) { 1288 lwkt_setcpu_self(globaldata_find(nn)); 1289 1290 lwkt_gettoken(&pf_token); 1291 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 1292 crit_enter(); 1293 1294 /* 1295 * process a fraction of the state table every second 1296 */ 1297 if(!pf_purge_expired_states( 1298 1 + (pf_status.states / 1299 pf_default_rule.timeout[ 1300 PFTM_INTERVAL]), 0)) { 1301 pf_purge_expired_states( 1302 1 + (pf_status.states / 1303 pf_default_rule.timeout[ 1304 PFTM_INTERVAL]), 1); 1305 } 1306 1307 /* 1308 * purge other expired types every PFTM_INTERVAL 1309 * seconds 1310 */ 1311 if (++nloops >= 1312 pf_default_rule.timeout[PFTM_INTERVAL]) { 1313 pf_purge_expired_fragments(); 1314 if (!pf_purge_expired_src_nodes(locked)) { 1315 pf_purge_expired_src_nodes(1); 1316 } 1317 nloops = 0; 1318 } 1319 1320 /* 1321 * If terminating the thread, clean everything out 1322 * (on all cpus). 1323 */ 1324 if (endingit) { 1325 pf_purge_expired_states(pf_status.states, 0); 1326 pf_purge_expired_fragments(); 1327 pf_purge_expired_src_nodes(1); 1328 } 1329 1330 crit_exit(); 1331 lockmgr(&pf_consistency_lock, LK_RELEASE); 1332 lwkt_reltoken(&pf_token); 1333 } 1334 lwkt_setcpu_self(save_gd); 1335 if (endingit) 1336 break; 1337 } 1338 1339 /* 1340 * Thread termination 1341 */ 1342 pf_end_threads++; 1343 wakeup(pf_purge_thread); 1344 kthread_exit(); 1345 } 1346 1347 u_int32_t 1348 pf_state_expires(const struct pf_state *state) 1349 { 1350 u_int32_t timeout; 1351 u_int32_t start; 1352 u_int32_t end; 1353 u_int32_t states; 1354 1355 /* handle all PFTM_* > PFTM_MAX here */ 1356 if (state->timeout == PFTM_PURGE) 1357 return (time_second); 1358 if (state->timeout == PFTM_UNTIL_PACKET) 1359 return (0); 1360 KKASSERT(state->timeout != PFTM_UNLINKED); 1361 KKASSERT(state->timeout < PFTM_MAX); 1362 timeout = state->rule.ptr->timeout[state->timeout]; 1363 if (!timeout) 1364 timeout = pf_default_rule.timeout[state->timeout]; 1365 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1366 if (start) { 1367 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1368 states = state->rule.ptr->states_cur; 1369 } else { 1370 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1371 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1372 states = pf_status.states; 1373 } 1374 1375 /* 1376 * If the number of states exceeds allowed values, adaptively 1377 * timeout the state more quickly. This can be very dangerous 1378 * to legitimate connections, however, so defray the timeout 1379 * based on the packet count. 1380 * 1381 * Retain from 0-100% based on number of states. 1382 * 1383 * Recover up to 50% of the lost portion if there was 1384 * packet traffic (100 pkts = 50%). 1385 */ 1386 if (end && states > start && start < end) { 1387 u_int32_t n; /* timeout retention 0-100% */ 1388 u_int64_t pkts; 1389 #if 0 1390 static struct krate boorate = { .freq = 1 }; 1391 #endif 1392 1393 /* 1394 * Reduce timeout by n% (0-100) 1395 */ 1396 n = (states - start) * 100 / (end - start); 1397 if (n > 100) 1398 n = 0; 1399 else 1400 n = 100 - n; 1401 1402 /* 1403 * But claw back some of the reduction based on packet 1404 * count associated with the state. 1405 */ 1406 pkts = state->packets[0] + state->packets[1]; 1407 if (pkts > 100) 1408 pkts = 100; 1409 #if 0 1410 krateprintf(&boorate, "timeout %-4u n=%u pkts=%-3lu -> %lu\n", 1411 timeout, n, pkts, n + (100 - n) * pkts / 200); 1412 #endif 1413 1414 n += (100 - n) * pkts / 200; /* recover by up-to 50% */ 1415 timeout = timeout * n / 100; 1416 1417 } 1418 return (state->expire + timeout); 1419 } 1420 1421 /* 1422 * (called with exclusive pf_token) 1423 */ 1424 int 1425 pf_purge_expired_src_nodes(int waslocked) 1426 { 1427 struct pf_src_node *cur, *next; 1428 int locked = waslocked; 1429 int cpu = mycpu->gd_cpuid; 1430 1431 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking[cpu]); 1432 cur; 1433 cur = next) { 1434 next = RB_NEXT(pf_src_tree, &tree_src_tracking[cpu], cur); 1435 1436 if (cur->states <= 0 && cur->expire <= time_second) { 1437 if (!locked) { 1438 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 1439 next = RB_NEXT(pf_src_tree, 1440 &tree_src_tracking[cpu], cur); 1441 locked = 1; 1442 } 1443 if (cur->rule.ptr != NULL) { 1444 /* 1445 * decrements in rule should be ok, token is 1446 * held exclusively in this code path. 1447 */ 1448 atomic_add_int(&cur->rule.ptr->src_nodes, -1); 1449 if (cur->rule.ptr->states_cur <= 0 && 1450 cur->rule.ptr->max_src_nodes <= 0) 1451 pf_rm_rule(NULL, cur->rule.ptr); 1452 } 1453 RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], cur); 1454 PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS); 1455 atomic_add_int(&pf_status.src_nodes, -1); 1456 kfree(cur, M_PFSRCTREEPL); 1457 } 1458 } 1459 if (locked && !waslocked) 1460 lockmgr(&pf_consistency_lock, LK_RELEASE); 1461 return(1); 1462 } 1463 1464 void 1465 pf_src_tree_remove_state(struct pf_state *s) 1466 { 1467 u_int32_t timeout; 1468 1469 if (s->src_node != NULL) { 1470 if (s->src.tcp_est) 1471 atomic_add_int(&s->src_node->conn, -1); 1472 if (--s->src_node->states <= 0) { 1473 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1474 if (!timeout) { 1475 timeout = 1476 pf_default_rule.timeout[PFTM_SRC_NODE]; 1477 } 1478 s->src_node->expire = time_second + timeout; 1479 } 1480 } 1481 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { 1482 if (--s->nat_src_node->states <= 0) { 1483 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1484 if (!timeout) 1485 timeout = 1486 pf_default_rule.timeout[PFTM_SRC_NODE]; 1487 s->nat_src_node->expire = time_second + timeout; 1488 } 1489 } 1490 s->src_node = s->nat_src_node = NULL; 1491 } 1492 1493 /* callers should be at crit_enter() */ 1494 void 1495 pf_unlink_state(struct pf_state *cur) 1496 { 1497 int cpu = mycpu->gd_cpuid; 1498 1499 if (cur->src.state == PF_TCPS_PROXY_DST) { 1500 /* XXX wire key the right one? */ 1501 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1502 &cur->key[PF_SK_WIRE]->addr[1], 1503 &cur->key[PF_SK_WIRE]->addr[0], 1504 cur->key[PF_SK_WIRE]->port[1], 1505 cur->key[PF_SK_WIRE]->port[0], 1506 cur->src.seqhi, cur->src.seqlo + 1, 1507 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); 1508 } 1509 RB_REMOVE(pf_state_tree_id, &tree_id[cpu], cur); 1510 if (cur->creatorid == pf_status.hostid) 1511 pfsync_delete_state(cur); 1512 cur->timeout = PFTM_UNLINKED; 1513 pf_src_tree_remove_state(cur); 1514 pf_detach_state(cur); 1515 } 1516 1517 /* 1518 * callers should be at crit_enter() and hold pf_consistency_lock exclusively. 1519 * pf_token must also be held exclusively. 1520 */ 1521 void 1522 pf_free_state(struct pf_state *cur) 1523 { 1524 int cpu = mycpu->gd_cpuid; 1525 1526 KKASSERT(cur->cpuid == cpu); 1527 1528 if (pfsyncif != NULL && 1529 (pfsyncif->sc_bulk_send_next == cur || 1530 pfsyncif->sc_bulk_terminator == cur)) 1531 return; 1532 KKASSERT(cur->timeout == PFTM_UNLINKED); 1533 /* 1534 * decrements in rule should be ok, token is 1535 * held exclusively in this code path. 1536 */ 1537 if (--cur->rule.ptr->states_cur <= 0 && 1538 cur->rule.ptr->src_nodes <= 0) 1539 pf_rm_rule(NULL, cur->rule.ptr); 1540 if (cur->nat_rule.ptr != NULL) { 1541 if (--cur->nat_rule.ptr->states_cur <= 0 && 1542 cur->nat_rule.ptr->src_nodes <= 0) { 1543 pf_rm_rule(NULL, cur->nat_rule.ptr); 1544 } 1545 } 1546 if (cur->anchor.ptr != NULL) { 1547 if (--cur->anchor.ptr->states_cur <= 0) 1548 pf_rm_rule(NULL, cur->anchor.ptr); 1549 } 1550 pf_normalize_tcp_cleanup(cur); 1551 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1552 1553 /* 1554 * We may be freeing pf_purge_expired_states()'s saved scan entry, 1555 * adjust it if necessary. 1556 */ 1557 if (purge_cur[cpu] == cur) { 1558 kprintf("PURGE CONFLICT\n"); 1559 purge_cur[cpu] = TAILQ_NEXT(purge_cur[cpu], entry_list); 1560 } 1561 TAILQ_REMOVE(&state_list[cpu], cur, entry_list); 1562 if (cur->tag) 1563 pf_tag_unref(cur->tag); 1564 kfree(cur, M_PFSTATEPL); 1565 PF_INC_FCOUNTER(FCNT_STATE_REMOVALS); 1566 atomic_add_int(&pf_status.states, -1); 1567 } 1568 1569 int 1570 pf_purge_expired_states(u_int32_t maxcheck, int waslocked) 1571 { 1572 struct pf_state *cur; 1573 int locked = waslocked; 1574 int cpu = mycpu->gd_cpuid; 1575 1576 while (maxcheck--) { 1577 /* 1578 * Wrap to start of list when we hit the end 1579 */ 1580 cur = purge_cur[cpu]; 1581 if (cur == NULL) { 1582 cur = TAILQ_FIRST(&state_list[cpu]); 1583 if (cur == NULL) 1584 break; /* list empty */ 1585 } 1586 1587 /* 1588 * Setup next (purge_cur) while we process this one. If 1589 * we block and something else deletes purge_cur, 1590 * pf_free_state() will adjust it further ahead. 1591 */ 1592 purge_cur[cpu] = TAILQ_NEXT(cur, entry_list); 1593 1594 if (cur->timeout == PFTM_UNLINKED) { 1595 /* free unlinked state */ 1596 if (! locked) { 1597 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 1598 locked = 1; 1599 } 1600 pf_free_state(cur); 1601 } else if (pf_state_expires(cur) <= time_second) { 1602 /* unlink and free expired state */ 1603 pf_unlink_state(cur); 1604 if (! locked) { 1605 if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE)) 1606 return (0); 1607 locked = 1; 1608 } 1609 pf_free_state(cur); 1610 } 1611 } 1612 1613 if (locked) 1614 lockmgr(&pf_consistency_lock, LK_RELEASE); 1615 return (1); 1616 } 1617 1618 int 1619 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1620 { 1621 if (aw->type != PF_ADDR_TABLE) 1622 return (0); 1623 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) 1624 return (1); 1625 return (0); 1626 } 1627 1628 void 1629 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1630 { 1631 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1632 return; 1633 pfr_detach_table(aw->p.tbl); 1634 aw->p.tbl = NULL; 1635 } 1636 1637 void 1638 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1639 { 1640 struct pfr_ktable *kt = aw->p.tbl; 1641 1642 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1643 return; 1644 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1645 kt = kt->pfrkt_root; 1646 aw->p.tbl = NULL; 1647 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1648 kt->pfrkt_cnt : -1; 1649 } 1650 1651 void 1652 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1653 { 1654 switch (af) { 1655 #ifdef INET 1656 case AF_INET: { 1657 u_int32_t a = ntohl(addr->addr32[0]); 1658 kprintf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1659 (a>>8)&255, a&255); 1660 if (p) { 1661 p = ntohs(p); 1662 kprintf(":%u", p); 1663 } 1664 break; 1665 } 1666 #endif /* INET */ 1667 #ifdef INET6 1668 case AF_INET6: { 1669 u_int16_t b; 1670 u_int8_t i, curstart, curend, maxstart, maxend; 1671 curstart = curend = maxstart = maxend = 255; 1672 for (i = 0; i < 8; i++) { 1673 if (!addr->addr16[i]) { 1674 if (curstart == 255) 1675 curstart = i; 1676 curend = i; 1677 } else { 1678 if ((curend - curstart) > 1679 (maxend - maxstart)) { 1680 maxstart = curstart; 1681 maxend = curend; 1682 } 1683 curstart = curend = 255; 1684 } 1685 } 1686 if ((curend - curstart) > 1687 (maxend - maxstart)) { 1688 maxstart = curstart; 1689 maxend = curend; 1690 } 1691 for (i = 0; i < 8; i++) { 1692 if (i >= maxstart && i <= maxend) { 1693 if (i == 0) 1694 kprintf(":"); 1695 if (i == maxend) 1696 kprintf(":"); 1697 } else { 1698 b = ntohs(addr->addr16[i]); 1699 kprintf("%x", b); 1700 if (i < 7) 1701 kprintf(":"); 1702 } 1703 } 1704 if (p) { 1705 p = ntohs(p); 1706 kprintf("[%u]", p); 1707 } 1708 break; 1709 } 1710 #endif /* INET6 */ 1711 } 1712 } 1713 1714 void 1715 pf_print_state(struct pf_state *s) 1716 { 1717 pf_print_state_parts(s, NULL, NULL); 1718 } 1719 1720 void 1721 pf_print_state_parts(struct pf_state *s, 1722 struct pf_state_key *skwp, struct pf_state_key *sksp) 1723 { 1724 struct pf_state_key *skw, *sks; 1725 u_int8_t proto, dir; 1726 1727 /* Do our best to fill these, but they're skipped if NULL */ 1728 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1729 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1730 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1731 dir = s ? s->direction : 0; 1732 1733 switch (proto) { 1734 case IPPROTO_TCP: 1735 kprintf("TCP "); 1736 break; 1737 case IPPROTO_UDP: 1738 kprintf("UDP "); 1739 break; 1740 case IPPROTO_ICMP: 1741 kprintf("ICMP "); 1742 break; 1743 case IPPROTO_ICMPV6: 1744 kprintf("ICMPV6 "); 1745 break; 1746 default: 1747 kprintf("%u ", skw->proto); 1748 break; 1749 } 1750 switch (dir) { 1751 case PF_IN: 1752 kprintf(" in"); 1753 break; 1754 case PF_OUT: 1755 kprintf(" out"); 1756 break; 1757 } 1758 if (skw) { 1759 kprintf(" wire: "); 1760 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1761 kprintf(" "); 1762 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1763 } 1764 if (sks) { 1765 kprintf(" stack: "); 1766 if (sks != skw) { 1767 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1768 kprintf(" "); 1769 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1770 } else 1771 kprintf("-"); 1772 } 1773 if (s) { 1774 if (proto == IPPROTO_TCP) { 1775 kprintf(" [lo=%u high=%u win=%u modulator=%u", 1776 s->src.seqlo, s->src.seqhi, 1777 s->src.max_win, s->src.seqdiff); 1778 if (s->src.wscale && s->dst.wscale) 1779 kprintf(" wscale=%u", 1780 s->src.wscale & PF_WSCALE_MASK); 1781 kprintf("]"); 1782 kprintf(" [lo=%u high=%u win=%u modulator=%u", 1783 s->dst.seqlo, s->dst.seqhi, 1784 s->dst.max_win, s->dst.seqdiff); 1785 if (s->src.wscale && s->dst.wscale) 1786 kprintf(" wscale=%u", 1787 s->dst.wscale & PF_WSCALE_MASK); 1788 kprintf("]"); 1789 } 1790 kprintf(" %u:%u", s->src.state, s->dst.state); 1791 } 1792 } 1793 1794 void 1795 pf_print_flags(u_int8_t f) 1796 { 1797 if (f) 1798 kprintf(" "); 1799 if (f & TH_FIN) 1800 kprintf("F"); 1801 if (f & TH_SYN) 1802 kprintf("S"); 1803 if (f & TH_RST) 1804 kprintf("R"); 1805 if (f & TH_PUSH) 1806 kprintf("P"); 1807 if (f & TH_ACK) 1808 kprintf("A"); 1809 if (f & TH_URG) 1810 kprintf("U"); 1811 if (f & TH_ECE) 1812 kprintf("E"); 1813 if (f & TH_CWR) 1814 kprintf("W"); 1815 } 1816 1817 #define PF_SET_SKIP_STEPS(i) \ 1818 do { \ 1819 while (head[i] != cur) { \ 1820 head[i]->skip[i].ptr = cur; \ 1821 head[i] = TAILQ_NEXT(head[i], entries); \ 1822 } \ 1823 } while (0) 1824 1825 void 1826 pf_calc_skip_steps(struct pf_rulequeue *rules) 1827 { 1828 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1829 int i; 1830 1831 cur = TAILQ_FIRST(rules); 1832 prev = cur; 1833 for (i = 0; i < PF_SKIP_COUNT; ++i) 1834 head[i] = cur; 1835 while (cur != NULL) { 1836 1837 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1838 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1839 if (cur->direction != prev->direction) 1840 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1841 if (cur->af != prev->af) 1842 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1843 if (cur->proto != prev->proto) 1844 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1845 if (cur->src.neg != prev->src.neg || 1846 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1847 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1848 if (cur->src.port[0] != prev->src.port[0] || 1849 cur->src.port[1] != prev->src.port[1] || 1850 cur->src.port_op != prev->src.port_op) 1851 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1852 if (cur->dst.neg != prev->dst.neg || 1853 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1854 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1855 if (cur->dst.port[0] != prev->dst.port[0] || 1856 cur->dst.port[1] != prev->dst.port[1] || 1857 cur->dst.port_op != prev->dst.port_op) 1858 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1859 1860 prev = cur; 1861 cur = TAILQ_NEXT(cur, entries); 1862 } 1863 for (i = 0; i < PF_SKIP_COUNT; ++i) 1864 PF_SET_SKIP_STEPS(i); 1865 } 1866 1867 int 1868 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1869 { 1870 if (aw1->type != aw2->type) 1871 return (1); 1872 switch (aw1->type) { 1873 case PF_ADDR_ADDRMASK: 1874 case PF_ADDR_RANGE: 1875 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) 1876 return (1); 1877 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) 1878 return (1); 1879 return (0); 1880 case PF_ADDR_DYNIFTL: 1881 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1882 case PF_ADDR_NOROUTE: 1883 case PF_ADDR_URPFFAILED: 1884 return (0); 1885 case PF_ADDR_TABLE: 1886 return (aw1->p.tbl != aw2->p.tbl); 1887 case PF_ADDR_RTLABEL: 1888 return (aw1->v.rtlabel != aw2->v.rtlabel); 1889 default: 1890 kprintf("invalid address type: %d\n", aw1->type); 1891 return (1); 1892 } 1893 } 1894 1895 u_int16_t 1896 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) 1897 { 1898 u_int32_t l; 1899 1900 if (udp && !cksum) 1901 return (0x0000); 1902 l = cksum + old - new; 1903 l = (l >> 16) + (l & 65535); 1904 l = l & 65535; 1905 if (udp && !l) 1906 return (0xFFFF); 1907 return (l); 1908 } 1909 1910 void 1911 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, 1912 struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) 1913 { 1914 struct pf_addr ao; 1915 u_int16_t po = *p; 1916 1917 PF_ACPY(&ao, a, af); 1918 PF_ACPY(a, an, af); 1919 1920 *p = pn; 1921 1922 switch (af) { 1923 #ifdef INET 1924 case AF_INET: 1925 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 1926 ao.addr16[0], an->addr16[0], 0), 1927 ao.addr16[1], an->addr16[1], 0); 1928 *p = pn; 1929 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1930 ao.addr16[0], an->addr16[0], u), 1931 ao.addr16[1], an->addr16[1], u), 1932 po, pn, u); 1933 break; 1934 #endif /* INET */ 1935 #ifdef INET6 1936 case AF_INET6: 1937 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1938 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1939 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1940 ao.addr16[0], an->addr16[0], u), 1941 ao.addr16[1], an->addr16[1], u), 1942 ao.addr16[2], an->addr16[2], u), 1943 ao.addr16[3], an->addr16[3], u), 1944 ao.addr16[4], an->addr16[4], u), 1945 ao.addr16[5], an->addr16[5], u), 1946 ao.addr16[6], an->addr16[6], u), 1947 ao.addr16[7], an->addr16[7], u), 1948 po, pn, u); 1949 break; 1950 #endif /* INET6 */ 1951 } 1952 } 1953 1954 1955 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ 1956 void 1957 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) 1958 { 1959 u_int32_t ao; 1960 1961 memcpy(&ao, a, sizeof(ao)); 1962 memcpy(a, &an, sizeof(u_int32_t)); 1963 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), 1964 ao % 65536, an % 65536, u); 1965 } 1966 1967 #ifdef INET6 1968 void 1969 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) 1970 { 1971 struct pf_addr ao; 1972 1973 PF_ACPY(&ao, a, AF_INET6); 1974 PF_ACPY(a, an, AF_INET6); 1975 1976 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1977 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1978 pf_cksum_fixup(pf_cksum_fixup(*c, 1979 ao.addr16[0], an->addr16[0], u), 1980 ao.addr16[1], an->addr16[1], u), 1981 ao.addr16[2], an->addr16[2], u), 1982 ao.addr16[3], an->addr16[3], u), 1983 ao.addr16[4], an->addr16[4], u), 1984 ao.addr16[5], an->addr16[5], u), 1985 ao.addr16[6], an->addr16[6], u), 1986 ao.addr16[7], an->addr16[7], u); 1987 } 1988 #endif /* INET6 */ 1989 1990 void 1991 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, 1992 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, 1993 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) 1994 { 1995 struct pf_addr oia, ooa; 1996 1997 PF_ACPY(&oia, ia, af); 1998 if (oa) 1999 PF_ACPY(&ooa, oa, af); 2000 2001 /* Change inner protocol port, fix inner protocol checksum. */ 2002 if (ip != NULL) { 2003 u_int16_t oip = *ip; 2004 u_int32_t opc = 0; 2005 2006 if (pc != NULL) 2007 opc = *pc; 2008 *ip = np; 2009 if (pc != NULL) 2010 *pc = pf_cksum_fixup(*pc, oip, *ip, u); 2011 *ic = pf_cksum_fixup(*ic, oip, *ip, 0); 2012 if (pc != NULL) 2013 *ic = pf_cksum_fixup(*ic, opc, *pc, 0); 2014 } 2015 /* Change inner ip address, fix inner ip and icmp checksums. */ 2016 PF_ACPY(ia, na, af); 2017 switch (af) { 2018 #ifdef INET 2019 case AF_INET: { 2020 u_int32_t oh2c = *h2c; 2021 2022 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, 2023 oia.addr16[0], ia->addr16[0], 0), 2024 oia.addr16[1], ia->addr16[1], 0); 2025 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 2026 oia.addr16[0], ia->addr16[0], 0), 2027 oia.addr16[1], ia->addr16[1], 0); 2028 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); 2029 break; 2030 } 2031 #endif /* INET */ 2032 #ifdef INET6 2033 case AF_INET6: 2034 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2035 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2036 pf_cksum_fixup(pf_cksum_fixup(*ic, 2037 oia.addr16[0], ia->addr16[0], u), 2038 oia.addr16[1], ia->addr16[1], u), 2039 oia.addr16[2], ia->addr16[2], u), 2040 oia.addr16[3], ia->addr16[3], u), 2041 oia.addr16[4], ia->addr16[4], u), 2042 oia.addr16[5], ia->addr16[5], u), 2043 oia.addr16[6], ia->addr16[6], u), 2044 oia.addr16[7], ia->addr16[7], u); 2045 break; 2046 #endif /* INET6 */ 2047 } 2048 /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ 2049 if (oa) { 2050 PF_ACPY(oa, na, af); 2051 switch (af) { 2052 #ifdef INET 2053 case AF_INET: 2054 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, 2055 ooa.addr16[0], oa->addr16[0], 0), 2056 ooa.addr16[1], oa->addr16[1], 0); 2057 break; 2058 #endif /* INET */ 2059 #ifdef INET6 2060 case AF_INET6: 2061 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2062 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2063 pf_cksum_fixup(pf_cksum_fixup(*ic, 2064 ooa.addr16[0], oa->addr16[0], u), 2065 ooa.addr16[1], oa->addr16[1], u), 2066 ooa.addr16[2], oa->addr16[2], u), 2067 ooa.addr16[3], oa->addr16[3], u), 2068 ooa.addr16[4], oa->addr16[4], u), 2069 ooa.addr16[5], oa->addr16[5], u), 2070 ooa.addr16[6], oa->addr16[6], u), 2071 ooa.addr16[7], oa->addr16[7], u); 2072 break; 2073 #endif /* INET6 */ 2074 } 2075 } 2076 } 2077 2078 2079 /* 2080 * Need to modulate the sequence numbers in the TCP SACK option 2081 * (credits to Krzysztof Pfaff for report and patch) 2082 */ 2083 int 2084 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, 2085 struct tcphdr *th, struct pf_state_peer *dst) 2086 { 2087 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; 2088 u_int8_t opts[TCP_MAXOLEN], *opt = opts; 2089 int copyback = 0, i, olen; 2090 struct raw_sackblock sack; 2091 2092 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) 2093 if (hlen < TCPOLEN_SACKLEN || 2094 !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) 2095 return 0; 2096 2097 while (hlen >= TCPOLEN_SACKLEN) { 2098 olen = opt[1]; 2099 switch (*opt) { 2100 case TCPOPT_EOL: /* FALLTHROUGH */ 2101 case TCPOPT_NOP: 2102 opt++; 2103 hlen--; 2104 break; 2105 case TCPOPT_SACK: 2106 if (olen > hlen) 2107 olen = hlen; 2108 if (olen >= TCPOLEN_SACKLEN) { 2109 for (i = 2; i + TCPOLEN_SACK <= olen; 2110 i += TCPOLEN_SACK) { 2111 memcpy(&sack, &opt[i], sizeof(sack)); 2112 pf_change_a(&sack.rblk_start, &th->th_sum, 2113 htonl(ntohl(sack.rblk_start) - 2114 dst->seqdiff), 0); 2115 pf_change_a(&sack.rblk_end, &th->th_sum, 2116 htonl(ntohl(sack.rblk_end) - 2117 dst->seqdiff), 0); 2118 memcpy(&opt[i], &sack, sizeof(sack)); 2119 } 2120 copyback = 1; 2121 } 2122 /* FALLTHROUGH */ 2123 default: 2124 if (olen < 2) 2125 olen = 2; 2126 hlen -= olen; 2127 opt += olen; 2128 } 2129 } 2130 2131 if (copyback) 2132 m_copyback(m, off + sizeof(*th), thoptlen, opts); 2133 return (copyback); 2134 } 2135 2136 void 2137 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2138 const struct pf_addr *saddr, const struct pf_addr *daddr, 2139 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2140 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2141 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) 2142 { 2143 struct mbuf *m; 2144 int len = 0, tlen; 2145 #ifdef INET 2146 struct ip *h = NULL; 2147 #endif /* INET */ 2148 #ifdef INET6 2149 struct ip6_hdr *h6 = NULL; 2150 #endif /* INET6 */ 2151 struct tcphdr *th = NULL; 2152 char *opt; 2153 2154 ASSERT_LWKT_TOKEN_HELD(&pf_token); 2155 2156 /* maximum segment size tcp option */ 2157 tlen = sizeof(struct tcphdr); 2158 if (mss) 2159 tlen += 4; 2160 2161 switch (af) { 2162 #ifdef INET 2163 case AF_INET: 2164 len = sizeof(struct ip) + tlen; 2165 break; 2166 #endif /* INET */ 2167 #ifdef INET6 2168 case AF_INET6: 2169 len = sizeof(struct ip6_hdr) + tlen; 2170 break; 2171 #endif /* INET6 */ 2172 } 2173 2174 /* 2175 * Create outgoing mbuf. 2176 * 2177 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 2178 * so make sure pf.flags is clear. 2179 */ 2180 m = m_gethdr(M_NOWAIT, MT_HEADER); 2181 if (m == NULL) { 2182 return; 2183 } 2184 if (tag) 2185 m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED; 2186 m->m_pkthdr.pf.flags = 0; 2187 m->m_pkthdr.pf.tag = rtag; 2188 /* XXX Recheck when upgrading to > 4.4 */ 2189 m->m_pkthdr.pf.statekey = NULL; 2190 if (r != NULL && r->rtableid >= 0) 2191 m->m_pkthdr.pf.rtableid = r->rtableid; 2192 2193 #ifdef ALTQ 2194 if (r != NULL && r->qid) { 2195 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 2196 m->m_pkthdr.pf.qid = r->qid; 2197 m->m_pkthdr.pf.ecn_af = af; 2198 m->m_pkthdr.pf.hdr = mtod(m, struct ip *); 2199 } 2200 #endif /* ALTQ */ 2201 m->m_data += max_linkhdr; 2202 m->m_pkthdr.len = m->m_len = len; 2203 m->m_pkthdr.rcvif = NULL; 2204 bzero(m->m_data, len); 2205 switch (af) { 2206 #ifdef INET 2207 case AF_INET: 2208 h = mtod(m, struct ip *); 2209 2210 /* IP header fields included in the TCP checksum */ 2211 h->ip_p = IPPROTO_TCP; 2212 h->ip_len = tlen; 2213 h->ip_src.s_addr = saddr->v4.s_addr; 2214 h->ip_dst.s_addr = daddr->v4.s_addr; 2215 2216 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2217 break; 2218 #endif /* INET */ 2219 #ifdef INET6 2220 case AF_INET6: 2221 h6 = mtod(m, struct ip6_hdr *); 2222 2223 /* IP header fields included in the TCP checksum */ 2224 h6->ip6_nxt = IPPROTO_TCP; 2225 h6->ip6_plen = htons(tlen); 2226 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2227 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2228 2229 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2230 break; 2231 #endif /* INET6 */ 2232 } 2233 2234 /* TCP header */ 2235 th->th_sport = sport; 2236 th->th_dport = dport; 2237 th->th_seq = htonl(seq); 2238 th->th_ack = htonl(ack); 2239 th->th_off = tlen >> 2; 2240 th->th_flags = flags; 2241 th->th_win = htons(win); 2242 2243 if (mss) { 2244 opt = (char *)(th + 1); 2245 opt[0] = TCPOPT_MAXSEG; 2246 opt[1] = 4; 2247 mss = htons(mss); 2248 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); 2249 } 2250 2251 switch (af) { 2252 #ifdef INET 2253 case AF_INET: 2254 /* TCP checksum */ 2255 th->th_sum = in_cksum(m, len); 2256 2257 /* Finish the IP header */ 2258 h->ip_v = 4; 2259 h->ip_hl = sizeof(*h) >> 2; 2260 h->ip_tos = IPTOS_LOWDELAY; 2261 h->ip_len = len; 2262 h->ip_off = path_mtu_discovery ? IP_DF : 0; 2263 h->ip_ttl = ttl ? ttl : ip_defttl; 2264 h->ip_sum = 0; 2265 if (eh == NULL) { 2266 lwkt_reltoken(&pf_token); 2267 ip_output(m, NULL, NULL, 0, NULL, NULL); 2268 lwkt_gettoken(&pf_token); 2269 } else { 2270 struct route ro; 2271 struct rtentry rt; 2272 struct ether_header *e = (void *)ro.ro_dst.sa_data; 2273 2274 if (ifp == NULL) { 2275 m_freem(m); 2276 return; 2277 } 2278 rt.rt_ifp = ifp; 2279 ro.ro_rt = &rt; 2280 ro.ro_dst.sa_len = sizeof(ro.ro_dst); 2281 ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; 2282 bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); 2283 bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); 2284 e->ether_type = eh->ether_type; 2285 /* XXX_IMPORT: later */ 2286 lwkt_reltoken(&pf_token); 2287 ip_output(m, NULL, &ro, 0, NULL, NULL); 2288 lwkt_gettoken(&pf_token); 2289 } 2290 break; 2291 #endif /* INET */ 2292 #ifdef INET6 2293 case AF_INET6: 2294 /* TCP checksum */ 2295 th->th_sum = in6_cksum(m, IPPROTO_TCP, 2296 sizeof(struct ip6_hdr), tlen); 2297 2298 h6->ip6_vfc |= IPV6_VERSION; 2299 h6->ip6_hlim = IPV6_DEFHLIM; 2300 2301 lwkt_reltoken(&pf_token); 2302 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 2303 lwkt_gettoken(&pf_token); 2304 break; 2305 #endif /* INET6 */ 2306 } 2307 } 2308 2309 void 2310 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, 2311 struct pf_rule *r) 2312 { 2313 struct mbuf *m0; 2314 2315 /* 2316 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 2317 * so make sure pf.flags is clear. 2318 */ 2319 if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL) 2320 return; 2321 2322 m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED; 2323 m0->m_pkthdr.pf.flags = 0; 2324 /* XXX Re-Check when Upgrading to > 4.4 */ 2325 m0->m_pkthdr.pf.statekey = NULL; 2326 2327 if (r->rtableid >= 0) 2328 m0->m_pkthdr.pf.rtableid = r->rtableid; 2329 2330 #ifdef ALTQ 2331 if (r->qid) { 2332 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 2333 m0->m_pkthdr.pf.qid = r->qid; 2334 m0->m_pkthdr.pf.ecn_af = af; 2335 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); 2336 } 2337 #endif /* ALTQ */ 2338 2339 switch (af) { 2340 #ifdef INET 2341 case AF_INET: 2342 icmp_error(m0, type, code, 0, 0); 2343 break; 2344 #endif /* INET */ 2345 #ifdef INET6 2346 case AF_INET6: 2347 icmp6_error(m0, type, code, 0); 2348 break; 2349 #endif /* INET6 */ 2350 } 2351 } 2352 2353 /* 2354 * Return 1 if the addresses a and b match (with mask m), otherwise return 0. 2355 * If n is 0, they match if they are equal. If n is != 0, they match if they 2356 * are different. 2357 */ 2358 int 2359 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2360 struct pf_addr *b, sa_family_t af) 2361 { 2362 int match = 0; 2363 2364 switch (af) { 2365 #ifdef INET 2366 case AF_INET: 2367 if ((a->addr32[0] & m->addr32[0]) == 2368 (b->addr32[0] & m->addr32[0])) 2369 match++; 2370 break; 2371 #endif /* INET */ 2372 #ifdef INET6 2373 case AF_INET6: 2374 if (((a->addr32[0] & m->addr32[0]) == 2375 (b->addr32[0] & m->addr32[0])) && 2376 ((a->addr32[1] & m->addr32[1]) == 2377 (b->addr32[1] & m->addr32[1])) && 2378 ((a->addr32[2] & m->addr32[2]) == 2379 (b->addr32[2] & m->addr32[2])) && 2380 ((a->addr32[3] & m->addr32[3]) == 2381 (b->addr32[3] & m->addr32[3]))) 2382 match++; 2383 break; 2384 #endif /* INET6 */ 2385 } 2386 if (match) { 2387 if (n) 2388 return (0); 2389 else 2390 return (1); 2391 } else { 2392 if (n) 2393 return (1); 2394 else 2395 return (0); 2396 } 2397 } 2398 2399 /* 2400 * Return 1 if b <= a <= e, otherwise return 0. 2401 */ 2402 int 2403 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2404 struct pf_addr *a, sa_family_t af) 2405 { 2406 switch (af) { 2407 #ifdef INET 2408 case AF_INET: 2409 if ((a->addr32[0] < b->addr32[0]) || 2410 (a->addr32[0] > e->addr32[0])) 2411 return (0); 2412 break; 2413 #endif /* INET */ 2414 #ifdef INET6 2415 case AF_INET6: { 2416 int i; 2417 2418 /* check a >= b */ 2419 for (i = 0; i < 4; ++i) 2420 if (a->addr32[i] > b->addr32[i]) 2421 break; 2422 else if (a->addr32[i] < b->addr32[i]) 2423 return (0); 2424 /* check a <= e */ 2425 for (i = 0; i < 4; ++i) 2426 if (a->addr32[i] < e->addr32[i]) 2427 break; 2428 else if (a->addr32[i] > e->addr32[i]) 2429 return (0); 2430 break; 2431 } 2432 #endif /* INET6 */ 2433 } 2434 return (1); 2435 } 2436 2437 int 2438 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 2439 { 2440 switch (op) { 2441 case PF_OP_IRG: 2442 return ((p > a1) && (p < a2)); 2443 case PF_OP_XRG: 2444 return ((p < a1) || (p > a2)); 2445 case PF_OP_RRG: 2446 return ((p >= a1) && (p <= a2)); 2447 case PF_OP_EQ: 2448 return (p == a1); 2449 case PF_OP_NE: 2450 return (p != a1); 2451 case PF_OP_LT: 2452 return (p < a1); 2453 case PF_OP_LE: 2454 return (p <= a1); 2455 case PF_OP_GT: 2456 return (p > a1); 2457 case PF_OP_GE: 2458 return (p >= a1); 2459 } 2460 return (0); /* never reached */ 2461 } 2462 2463 int 2464 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 2465 { 2466 a1 = ntohs(a1); 2467 a2 = ntohs(a2); 2468 p = ntohs(p); 2469 return (pf_match(op, a1, a2, p)); 2470 } 2471 2472 int 2473 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 2474 { 2475 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2476 return (0); 2477 return (pf_match(op, a1, a2, u)); 2478 } 2479 2480 int 2481 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 2482 { 2483 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2484 return (0); 2485 return (pf_match(op, a1, a2, g)); 2486 } 2487 2488 int 2489 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 2490 { 2491 if (*tag == -1) 2492 *tag = m->m_pkthdr.pf.tag; 2493 2494 return ((!r->match_tag_not && r->match_tag == *tag) || 2495 (r->match_tag_not && r->match_tag != *tag)); 2496 } 2497 2498 int 2499 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 2500 { 2501 if (tag <= 0 && rtableid < 0) 2502 return (0); 2503 2504 if (tag > 0) 2505 m->m_pkthdr.pf.tag = tag; 2506 if (rtableid >= 0) 2507 m->m_pkthdr.pf.rtableid = rtableid; 2508 2509 return (0); 2510 } 2511 2512 void 2513 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, 2514 struct pf_rule **r, struct pf_rule **a, int *match) 2515 { 2516 struct pf_anchor_stackframe *f; 2517 2518 (*r)->anchor->match = 0; 2519 if (match) 2520 *match = 0; 2521 if (*depth >= NELEM(pf_anchor_stack)) { 2522 kprintf("pf_step_into_anchor: stack overflow\n"); 2523 *r = TAILQ_NEXT(*r, entries); 2524 return; 2525 } else if (*depth == 0 && a != NULL) 2526 *a = *r; 2527 f = pf_anchor_stack + (*depth)++; 2528 f->rs = *rs; 2529 f->r = *r; 2530 if ((*r)->anchor_wildcard) { 2531 f->parent = &(*r)->anchor->children; 2532 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == 2533 NULL) { 2534 *r = NULL; 2535 return; 2536 } 2537 *rs = &f->child->ruleset; 2538 } else { 2539 f->parent = NULL; 2540 f->child = NULL; 2541 *rs = &(*r)->anchor->ruleset; 2542 } 2543 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); 2544 } 2545 2546 int 2547 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, 2548 struct pf_rule **r, struct pf_rule **a, int *match) 2549 { 2550 struct pf_anchor_stackframe *f; 2551 int quick = 0; 2552 2553 do { 2554 if (*depth <= 0) 2555 break; 2556 f = pf_anchor_stack + *depth - 1; 2557 if (f->parent != NULL && f->child != NULL) { 2558 if (f->child->match || 2559 (match != NULL && *match)) { 2560 f->r->anchor->match = 1; 2561 *match = 0; 2562 } 2563 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); 2564 if (f->child != NULL) { 2565 *rs = &f->child->ruleset; 2566 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); 2567 if (*r == NULL) 2568 continue; 2569 else 2570 break; 2571 } 2572 } 2573 (*depth)--; 2574 if (*depth == 0 && a != NULL) 2575 *a = NULL; 2576 *rs = f->rs; 2577 if (f->r->anchor->match || (match != NULL && *match)) 2578 quick = f->r->quick; 2579 *r = TAILQ_NEXT(f->r, entries); 2580 } while (*r == NULL); 2581 2582 return (quick); 2583 } 2584 2585 #ifdef INET6 2586 void 2587 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 2588 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 2589 { 2590 switch (af) { 2591 #ifdef INET 2592 case AF_INET: 2593 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2594 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2595 break; 2596 #endif /* INET */ 2597 case AF_INET6: 2598 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2599 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2600 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 2601 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 2602 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 2603 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 2604 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 2605 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 2606 break; 2607 } 2608 } 2609 2610 void 2611 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 2612 { 2613 switch (af) { 2614 #ifdef INET 2615 case AF_INET: 2616 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 2617 break; 2618 #endif /* INET */ 2619 case AF_INET6: 2620 if (addr->addr32[3] == 0xffffffff) { 2621 addr->addr32[3] = 0; 2622 if (addr->addr32[2] == 0xffffffff) { 2623 addr->addr32[2] = 0; 2624 if (addr->addr32[1] == 0xffffffff) { 2625 addr->addr32[1] = 0; 2626 addr->addr32[0] = 2627 htonl(ntohl(addr->addr32[0]) + 1); 2628 } else 2629 addr->addr32[1] = 2630 htonl(ntohl(addr->addr32[1]) + 1); 2631 } else 2632 addr->addr32[2] = 2633 htonl(ntohl(addr->addr32[2]) + 1); 2634 } else 2635 addr->addr32[3] = 2636 htonl(ntohl(addr->addr32[3]) + 1); 2637 break; 2638 } 2639 } 2640 #endif /* INET6 */ 2641 2642 #define mix(a,b,c) \ 2643 do { \ 2644 a -= b; a -= c; a ^= (c >> 13); \ 2645 b -= c; b -= a; b ^= (a << 8); \ 2646 c -= a; c -= b; c ^= (b >> 13); \ 2647 a -= b; a -= c; a ^= (c >> 12); \ 2648 b -= c; b -= a; b ^= (a << 16); \ 2649 c -= a; c -= b; c ^= (b >> 5); \ 2650 a -= b; a -= c; a ^= (c >> 3); \ 2651 b -= c; b -= a; b ^= (a << 10); \ 2652 c -= a; c -= b; c ^= (b >> 15); \ 2653 } while (0) 2654 2655 /* 2656 * hash function based on bridge_hash in if_bridge.c 2657 */ 2658 void 2659 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 2660 struct pf_poolhashkey *key, sa_family_t af) 2661 { 2662 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; 2663 2664 switch (af) { 2665 #ifdef INET 2666 case AF_INET: 2667 a += inaddr->addr32[0]; 2668 b += key->key32[1]; 2669 mix(a, b, c); 2670 hash->addr32[0] = c + key->key32[2]; 2671 break; 2672 #endif /* INET */ 2673 #ifdef INET6 2674 case AF_INET6: 2675 a += inaddr->addr32[0]; 2676 b += inaddr->addr32[2]; 2677 mix(a, b, c); 2678 hash->addr32[0] = c; 2679 a += inaddr->addr32[1]; 2680 b += inaddr->addr32[3]; 2681 c += key->key32[1]; 2682 mix(a, b, c); 2683 hash->addr32[1] = c; 2684 a += inaddr->addr32[2]; 2685 b += inaddr->addr32[1]; 2686 c += key->key32[2]; 2687 mix(a, b, c); 2688 hash->addr32[2] = c; 2689 a += inaddr->addr32[3]; 2690 b += inaddr->addr32[0]; 2691 c += key->key32[3]; 2692 mix(a, b, c); 2693 hash->addr32[3] = c; 2694 break; 2695 #endif /* INET6 */ 2696 } 2697 } 2698 2699 int 2700 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 2701 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) 2702 { 2703 unsigned char hash[16]; 2704 struct pf_pool *rpool = &r->rpool; 2705 struct pf_pooladdr *acur = rpool->cur; 2706 struct pf_pooladdr *cur; 2707 struct pf_addr *raddr; 2708 struct pf_addr *rmask; 2709 struct pf_addr counter; 2710 struct pf_src_node k; 2711 int cpu = mycpu->gd_cpuid; 2712 int tblidx; 2713 2714 bzero(hash, sizeof(hash)); /* avoid gcc warnings */ 2715 2716 /* 2717 * NOTE! rpool->cur and rpool->tblidx can be iterators and thus 2718 * may represent a SMP race due to the shared nature of the 2719 * rpool structure. We allow the race and ensure that updates 2720 * do not create a fatal condition. 2721 */ 2722 cpu_ccfence(); 2723 cur = acur; 2724 raddr = &cur->addr.v.a.addr; 2725 rmask = &cur->addr.v.a.mask; 2726 2727 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && 2728 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 2729 k.af = af; 2730 PF_ACPY(&k.addr, saddr, af); 2731 if (r->rule_flag & PFRULE_RULESRCTRACK || 2732 r->rpool.opts & PF_POOL_STICKYADDR) 2733 k.rule.ptr = r; 2734 else 2735 k.rule.ptr = NULL; 2736 PF_INC_SCOUNTER(SCNT_SRC_NODE_SEARCH); 2737 *sn = RB_FIND(pf_src_tree, &tree_src_tracking[cpu], &k); 2738 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { 2739 PF_ACPY(naddr, &(*sn)->raddr, af); 2740 if (pf_status.debug >= PF_DEBUG_MISC) { 2741 kprintf("pf_map_addr: src tracking maps "); 2742 pf_print_host(&k.addr, 0, af); 2743 kprintf(" to "); 2744 pf_print_host(naddr, 0, af); 2745 kprintf("\n"); 2746 } 2747 return (0); 2748 } 2749 } 2750 2751 if (cur->addr.type == PF_ADDR_NOROUTE) 2752 return (1); 2753 if (cur->addr.type == PF_ADDR_DYNIFTL) { 2754 switch (af) { 2755 #ifdef INET 2756 case AF_INET: 2757 if (cur->addr.p.dyn->pfid_acnt4 < 1 && 2758 (rpool->opts & PF_POOL_TYPEMASK) != 2759 PF_POOL_ROUNDROBIN) 2760 return (1); 2761 raddr = &cur->addr.p.dyn->pfid_addr4; 2762 rmask = &cur->addr.p.dyn->pfid_mask4; 2763 break; 2764 #endif /* INET */ 2765 #ifdef INET6 2766 case AF_INET6: 2767 if (cur->addr.p.dyn->pfid_acnt6 < 1 && 2768 (rpool->opts & PF_POOL_TYPEMASK) != 2769 PF_POOL_ROUNDROBIN) 2770 return (1); 2771 raddr = &cur->addr.p.dyn->pfid_addr6; 2772 rmask = &cur->addr.p.dyn->pfid_mask6; 2773 break; 2774 #endif /* INET6 */ 2775 } 2776 } else if (cur->addr.type == PF_ADDR_TABLE) { 2777 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) 2778 return (1); /* unsupported */ 2779 } else { 2780 raddr = &cur->addr.v.a.addr; 2781 rmask = &cur->addr.v.a.mask; 2782 } 2783 2784 switch (rpool->opts & PF_POOL_TYPEMASK) { 2785 case PF_POOL_NONE: 2786 PF_ACPY(naddr, raddr, af); 2787 break; 2788 case PF_POOL_BITMASK: 2789 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 2790 break; 2791 case PF_POOL_RANDOM: 2792 if (init_addr != NULL && PF_AZERO(init_addr, af)) { 2793 switch (af) { 2794 #ifdef INET 2795 case AF_INET: 2796 counter.addr32[0] = htonl(karc4random()); 2797 break; 2798 #endif /* INET */ 2799 #ifdef INET6 2800 case AF_INET6: 2801 if (rmask->addr32[3] != 0xffffffff) 2802 counter.addr32[3] = 2803 htonl(karc4random()); 2804 else 2805 break; 2806 if (rmask->addr32[2] != 0xffffffff) 2807 counter.addr32[2] = 2808 htonl(karc4random()); 2809 else 2810 break; 2811 if (rmask->addr32[1] != 0xffffffff) 2812 counter.addr32[1] = 2813 htonl(karc4random()); 2814 else 2815 break; 2816 if (rmask->addr32[0] != 0xffffffff) 2817 counter.addr32[0] = 2818 htonl(karc4random()); 2819 break; 2820 #endif /* INET6 */ 2821 } 2822 PF_POOLMASK(naddr, raddr, rmask, &counter, af); 2823 PF_ACPY(init_addr, naddr, af); 2824 2825 } else { 2826 counter = rpool->counter; 2827 cpu_ccfence(); 2828 PF_AINC(&counter, af); 2829 PF_POOLMASK(naddr, raddr, rmask, &counter, af); 2830 rpool->counter = counter; 2831 } 2832 break; 2833 case PF_POOL_SRCHASH: 2834 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 2835 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); 2836 break; 2837 case PF_POOL_ROUNDROBIN: 2838 tblidx = rpool->tblidx; 2839 counter = rpool->counter; 2840 if (cur->addr.type == PF_ADDR_TABLE) { 2841 if (!pfr_pool_get(cur->addr.p.tbl, 2842 &tblidx, &counter, 2843 &raddr, &rmask, af)) { 2844 goto get_addr; 2845 } 2846 } else if (cur->addr.type == PF_ADDR_DYNIFTL) { 2847 if (!pfr_pool_get(cur->addr.p.dyn->pfid_kt, 2848 &tblidx, &counter, 2849 &raddr, &rmask, af)) { 2850 goto get_addr; 2851 } 2852 } else if (pf_match_addr(0, raddr, rmask, 2853 &counter, af)) { 2854 goto get_addr; 2855 } 2856 2857 try_next: 2858 if ((cur = TAILQ_NEXT(cur, entries)) == NULL) 2859 cur = TAILQ_FIRST(&rpool->list); 2860 if (cur->addr.type == PF_ADDR_TABLE) { 2861 tblidx = -1; 2862 if (pfr_pool_get(cur->addr.p.tbl, 2863 &tblidx, &counter, 2864 &raddr, &rmask, af)) { 2865 /* table contains no address of type 'af' */ 2866 if (cur != acur) 2867 goto try_next; 2868 return (1); 2869 } 2870 } else if (cur->addr.type == PF_ADDR_DYNIFTL) { 2871 tblidx = -1; 2872 if (pfr_pool_get(cur->addr.p.dyn->pfid_kt, 2873 &tblidx, &counter, 2874 &raddr, &rmask, af)) { 2875 /* table contains no address of type 'af' */ 2876 if (cur != acur) 2877 goto try_next; 2878 return (1); 2879 } 2880 } else { 2881 raddr = &cur->addr.v.a.addr; 2882 rmask = &cur->addr.v.a.mask; 2883 PF_ACPY(&counter, raddr, af); 2884 } 2885 2886 get_addr: 2887 rpool->cur = cur; 2888 rpool->tblidx = tblidx; 2889 PF_ACPY(naddr, &counter, af); 2890 if (init_addr != NULL && PF_AZERO(init_addr, af)) 2891 PF_ACPY(init_addr, naddr, af); 2892 PF_AINC(&counter, af); 2893 rpool->counter = counter; 2894 break; 2895 } 2896 if (*sn != NULL) 2897 PF_ACPY(&(*sn)->raddr, naddr, af); 2898 2899 if (pf_status.debug >= PF_DEBUG_MISC && 2900 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 2901 kprintf("pf_map_addr: selected address "); 2902 pf_print_host(naddr, 0, af); 2903 kprintf("\n"); 2904 } 2905 2906 return (0); 2907 } 2908 2909 int 2910 pf_get_sport(struct pf_pdesc *pd, sa_family_t af, 2911 u_int8_t proto, struct pf_rule *r, 2912 struct pf_addr *saddr, struct pf_addr *daddr, 2913 u_int16_t sport, u_int16_t dport, 2914 struct pf_addr *naddr, u_int16_t *nport, 2915 u_int16_t low, u_int16_t high, struct pf_src_node **sn) 2916 { 2917 struct pf_state_key_cmp key; 2918 struct pf_addr init_addr; 2919 u_int16_t cut; 2920 u_int32_t hash_base = 0; 2921 int do_hash = 0; 2922 2923 bzero(&init_addr, sizeof(init_addr)); 2924 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) 2925 return (1); 2926 2927 if (proto == IPPROTO_ICMP) { 2928 low = 1; 2929 high = 65535; 2930 } 2931 2932 bzero(&key, sizeof(key)); 2933 key.af = af; 2934 key.proto = proto; 2935 key.port[0] = dport; 2936 PF_ACPY(&key.addr[0], daddr, key.af); 2937 2938 do { 2939 PF_ACPY(&key.addr[1], naddr, key.af); 2940 2941 /* 2942 * We want to select a port that calculates to a toeplitz hash 2943 * that masks to the same cpu, otherwise the response may 2944 * not see the new state. 2945 * 2946 * We can still do this even if the kernel is disregarding 2947 * the hash and vectoring the packets to a specific cpu, 2948 * but it will reduce the number of ports we can use. 2949 */ 2950 switch(af) { 2951 case AF_INET: 2952 if (proto == IPPROTO_TCP) { 2953 do_hash = 1; 2954 hash_base = toeplitz_piecemeal_port(dport) ^ 2955 toeplitz_piecemeal_addr(daddr->v4.s_addr) ^ 2956 toeplitz_piecemeal_addr(naddr->v4.s_addr); 2957 } 2958 break; 2959 case AF_INET6: 2960 /* XXX TODO XXX */ 2961 default: 2962 /* XXX TODO XXX */ 2963 break; 2964 } 2965 2966 /* 2967 * port search; start random, step; 2968 * similar 2 portloop in in_pcbbind 2969 * 2970 * WARNING! We try to match such that the kernel will 2971 * dispatch the translated host/port to the same 2972 * cpu, but this might not be possible. 2973 * 2974 * In the case where the port is fixed, or for the 2975 * UDP case (whos toeplitz does not incorporate the 2976 * port), we set not_cpu_localized which ultimately 2977 * causes the pf_state_tree element 2978 * 2979 * XXX fixed ports present a problem for cpu localization. 2980 */ 2981 if (!(proto == IPPROTO_TCP || 2982 proto == IPPROTO_UDP || 2983 proto == IPPROTO_ICMP)) { 2984 /* 2985 * non-specific protocol, leave port intact. 2986 */ 2987 key.port[1] = sport; 2988 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 2989 *nport = sport; 2990 pd->not_cpu_localized = 1; 2991 return (0); 2992 } 2993 } else if (low == 0 && high == 0) { 2994 /* 2995 * static-port same as originator. 2996 */ 2997 key.port[1] = sport; 2998 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 2999 *nport = sport; 3000 pd->not_cpu_localized = 1; 3001 return (0); 3002 } 3003 } else if (low == high) { 3004 /* 3005 * specific port as specified. 3006 */ 3007 key.port[1] = htons(low); 3008 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 3009 *nport = htons(low); 3010 pd->not_cpu_localized = 1; 3011 return (0); 3012 } 3013 } else { 3014 /* 3015 * normal dynamic port 3016 */ 3017 u_int16_t tmp; 3018 3019 if (low > high) { 3020 tmp = low; 3021 low = high; 3022 high = tmp; 3023 } 3024 /* low < high */ 3025 cut = htonl(karc4random()) % (1 + high - low) + low; 3026 /* low <= cut <= high */ 3027 for (tmp = cut; tmp <= high; ++(tmp)) { 3028 key.port[1] = htons(tmp); 3029 if (do_hash) { 3030 uint32_t hash; 3031 3032 hash = hash_base ^ 3033 toeplitz_piecemeal_port(key.port[1]); 3034 if (netisr_hashcpu(hash) != mycpuid) 3035 continue; 3036 } 3037 if (pf_find_state_all(&key, PF_IN, NULL) == 3038 NULL && !in_baddynamic(tmp, proto)) { 3039 if (proto == IPPROTO_UDP) 3040 pd->not_cpu_localized = 1; 3041 *nport = htons(tmp); 3042 return (0); 3043 } 3044 } 3045 for (tmp = cut - 1; tmp >= low; --(tmp)) { 3046 key.port[1] = htons(tmp); 3047 if (do_hash) { 3048 uint32_t hash; 3049 3050 hash = hash_base ^ 3051 toeplitz_piecemeal_port(key.port[1]); 3052 if (netisr_hashcpu(hash) != mycpuid) 3053 continue; 3054 } 3055 if (pf_find_state_all(&key, PF_IN, NULL) == 3056 NULL && !in_baddynamic(tmp, proto)) { 3057 if (proto == IPPROTO_UDP) 3058 pd->not_cpu_localized = 1; 3059 *nport = htons(tmp); 3060 return (0); 3061 } 3062 } 3063 } 3064 3065 /* 3066 * Next address 3067 */ 3068 switch (r->rpool.opts & PF_POOL_TYPEMASK) { 3069 case PF_POOL_RANDOM: 3070 case PF_POOL_ROUNDROBIN: 3071 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) 3072 return (1); 3073 break; 3074 case PF_POOL_NONE: 3075 case PF_POOL_SRCHASH: 3076 case PF_POOL_BITMASK: 3077 default: 3078 return (1); 3079 } 3080 } while (! PF_AEQ(&init_addr, naddr, af) ); 3081 return (1); /* none available */ 3082 } 3083 3084 struct pf_rule * 3085 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, 3086 int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, 3087 struct pf_addr *daddr, u_int16_t dport, int rs_num) 3088 { 3089 struct pf_rule *r, *rm = NULL; 3090 struct pf_ruleset *ruleset = NULL; 3091 int tag = -1; 3092 int rtableid = -1; 3093 int asd = 0; 3094 3095 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); 3096 while (r && rm == NULL) { 3097 struct pf_rule_addr *src = NULL, *dst = NULL; 3098 struct pf_addr_wrap *xdst = NULL; 3099 struct pf_pooladdr *cur; 3100 3101 if (r->action == PF_BINAT && direction == PF_IN) { 3102 src = &r->dst; 3103 cur = r->rpool.cur; /* SMP race possible */ 3104 cpu_ccfence(); 3105 if (cur) 3106 xdst = &cur->addr; 3107 } else { 3108 src = &r->src; 3109 dst = &r->dst; 3110 } 3111 3112 r->evaluations++; 3113 if (pfi_kif_match(r->kif, kif) == r->ifnot) 3114 r = r->skip[PF_SKIP_IFP].ptr; 3115 else if (r->direction && r->direction != direction) 3116 r = r->skip[PF_SKIP_DIR].ptr; 3117 else if (r->af && r->af != pd->af) 3118 r = r->skip[PF_SKIP_AF].ptr; 3119 else if (r->proto && r->proto != pd->proto) 3120 r = r->skip[PF_SKIP_PROTO].ptr; 3121 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, 3122 src->neg, kif)) 3123 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : 3124 PF_SKIP_DST_ADDR].ptr; 3125 else if (src->port_op && !pf_match_port(src->port_op, 3126 src->port[0], src->port[1], sport)) 3127 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : 3128 PF_SKIP_DST_PORT].ptr; 3129 else if (dst != NULL && 3130 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) 3131 r = r->skip[PF_SKIP_DST_ADDR].ptr; 3132 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 3133 0, NULL)) 3134 r = TAILQ_NEXT(r, entries); 3135 else if (dst != NULL && dst->port_op && 3136 !pf_match_port(dst->port_op, dst->port[0], 3137 dst->port[1], dport)) 3138 r = r->skip[PF_SKIP_DST_PORT].ptr; 3139 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 3140 r = TAILQ_NEXT(r, entries); 3141 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != 3142 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, 3143 off, pd->hdr.tcp), r->os_fingerprint))) 3144 r = TAILQ_NEXT(r, entries); 3145 else { 3146 if (r->tag) 3147 tag = r->tag; 3148 if (r->rtableid >= 0) 3149 rtableid = r->rtableid; 3150 if (r->anchor == NULL) { 3151 rm = r; 3152 } else 3153 pf_step_into_anchor(&asd, &ruleset, rs_num, 3154 &r, NULL, NULL); 3155 } 3156 if (r == NULL) 3157 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, 3158 NULL, NULL); 3159 } 3160 if (pf_tag_packet(m, tag, rtableid)) 3161 return (NULL); 3162 if (rm != NULL && (rm->action == PF_NONAT || 3163 rm->action == PF_NORDR || rm->action == PF_NOBINAT)) 3164 return (NULL); 3165 return (rm); 3166 } 3167 3168 struct pf_rule * 3169 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, 3170 struct pfi_kif *kif, struct pf_src_node **sn, 3171 struct pf_state_key **skw, struct pf_state_key **sks, 3172 struct pf_state_key **skp, struct pf_state_key **nkp, 3173 struct pf_addr *saddr, struct pf_addr *daddr, 3174 u_int16_t sport, u_int16_t dport) 3175 { 3176 struct pf_rule *r = NULL; 3177 3178 if (direction == PF_OUT) { 3179 r = pf_match_translation(pd, m, off, direction, kif, saddr, 3180 sport, daddr, dport, PF_RULESET_BINAT); 3181 if (r == NULL) 3182 r = pf_match_translation(pd, m, off, direction, kif, 3183 saddr, sport, daddr, dport, PF_RULESET_NAT); 3184 } else { 3185 r = pf_match_translation(pd, m, off, direction, kif, saddr, 3186 sport, daddr, dport, PF_RULESET_RDR); 3187 if (r == NULL) 3188 r = pf_match_translation(pd, m, off, direction, kif, 3189 saddr, sport, daddr, dport, PF_RULESET_BINAT); 3190 } 3191 3192 if (r != NULL) { 3193 struct pf_addr *naddr; 3194 u_int16_t *nport; 3195 3196 if (pf_state_key_setup(pd, r, skw, sks, skp, nkp, 3197 saddr, daddr, sport, dport)) 3198 return r; 3199 3200 /* XXX We only modify one side for now. */ 3201 naddr = &(*nkp)->addr[1]; 3202 nport = &(*nkp)->port[1]; 3203 3204 /* 3205 * NOTE: Currently all translations will clear 3206 * BRIDGE_MBUF_TAGGED, telling the bridge to 3207 * ignore the original input encapsulation. 3208 */ 3209 switch (r->action) { 3210 case PF_NONAT: 3211 case PF_NOBINAT: 3212 case PF_NORDR: 3213 return (NULL); 3214 case PF_NAT: 3215 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 3216 if (pf_get_sport(pd, pd->af, pd->proto, r, 3217 saddr, daddr, sport, dport, 3218 naddr, nport, r->rpool.proxy_port[0], 3219 r->rpool.proxy_port[1], sn)) { 3220 DPFPRINTF(PF_DEBUG_MISC, 3221 ("pf: NAT proxy port allocation " 3222 "(%u-%u) failed\n", 3223 r->rpool.proxy_port[0], 3224 r->rpool.proxy_port[1])); 3225 return (NULL); 3226 } 3227 break; 3228 case PF_BINAT: 3229 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 3230 switch (direction) { 3231 case PF_OUT: 3232 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ 3233 switch (pd->af) { 3234 #ifdef INET 3235 case AF_INET: 3236 if (r->rpool.cur->addr.p.dyn-> 3237 pfid_acnt4 < 1) 3238 return (NULL); 3239 PF_POOLMASK(naddr, 3240 &r->rpool.cur->addr.p.dyn-> 3241 pfid_addr4, 3242 &r->rpool.cur->addr.p.dyn-> 3243 pfid_mask4, 3244 saddr, AF_INET); 3245 break; 3246 #endif /* INET */ 3247 #ifdef INET6 3248 case AF_INET6: 3249 if (r->rpool.cur->addr.p.dyn-> 3250 pfid_acnt6 < 1) 3251 return (NULL); 3252 PF_POOLMASK(naddr, 3253 &r->rpool.cur->addr.p.dyn-> 3254 pfid_addr6, 3255 &r->rpool.cur->addr.p.dyn-> 3256 pfid_mask6, 3257 saddr, AF_INET6); 3258 break; 3259 #endif /* INET6 */ 3260 } 3261 } else 3262 PF_POOLMASK(naddr, 3263 &r->rpool.cur->addr.v.a.addr, 3264 &r->rpool.cur->addr.v.a.mask, 3265 saddr, pd->af); 3266 break; 3267 case PF_IN: 3268 if (r->src.addr.type == PF_ADDR_DYNIFTL) { 3269 switch (pd->af) { 3270 #ifdef INET 3271 case AF_INET: 3272 if (r->src.addr.p.dyn-> 3273 pfid_acnt4 < 1) 3274 return (NULL); 3275 PF_POOLMASK(naddr, 3276 &r->src.addr.p.dyn-> 3277 pfid_addr4, 3278 &r->src.addr.p.dyn-> 3279 pfid_mask4, 3280 daddr, AF_INET); 3281 break; 3282 #endif /* INET */ 3283 #ifdef INET6 3284 case AF_INET6: 3285 if (r->src.addr.p.dyn-> 3286 pfid_acnt6 < 1) 3287 return (NULL); 3288 PF_POOLMASK(naddr, 3289 &r->src.addr.p.dyn-> 3290 pfid_addr6, 3291 &r->src.addr.p.dyn-> 3292 pfid_mask6, 3293 daddr, AF_INET6); 3294 break; 3295 #endif /* INET6 */ 3296 } 3297 } else 3298 PF_POOLMASK(naddr, 3299 &r->src.addr.v.a.addr, 3300 &r->src.addr.v.a.mask, daddr, 3301 pd->af); 3302 break; 3303 } 3304 break; 3305 case PF_RDR: { 3306 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 3307 if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) 3308 return (NULL); 3309 if ((r->rpool.opts & PF_POOL_TYPEMASK) == 3310 PF_POOL_BITMASK) 3311 PF_POOLMASK(naddr, naddr, 3312 &r->rpool.cur->addr.v.a.mask, daddr, 3313 pd->af); 3314 3315 if (r->rpool.proxy_port[1]) { 3316 u_int32_t tmp_nport; 3317 3318 tmp_nport = ((ntohs(dport) - 3319 ntohs(r->dst.port[0])) % 3320 (r->rpool.proxy_port[1] - 3321 r->rpool.proxy_port[0] + 1)) + 3322 r->rpool.proxy_port[0]; 3323 3324 /* wrap around if necessary */ 3325 if (tmp_nport > 65535) 3326 tmp_nport -= 65535; 3327 *nport = htons((u_int16_t)tmp_nport); 3328 } else if (r->rpool.proxy_port[0]) { 3329 *nport = htons(r->rpool.proxy_port[0]); 3330 } 3331 pd->not_cpu_localized = 1; 3332 break; 3333 } 3334 default: 3335 return (NULL); 3336 } 3337 } 3338 3339 return (r); 3340 } 3341 3342 struct netmsg_hashlookup { 3343 struct netmsg_base base; 3344 struct inpcb **nm_pinp; 3345 struct inpcbinfo *nm_pcbinfo; 3346 struct pf_addr *nm_saddr; 3347 struct pf_addr *nm_daddr; 3348 uint16_t nm_sport; 3349 uint16_t nm_dport; 3350 sa_family_t nm_af; 3351 }; 3352 3353 #ifdef PF_SOCKET_LOOKUP_DOMSG 3354 static void 3355 in_pcblookup_hash_handler(netmsg_t msg) 3356 { 3357 struct netmsg_hashlookup *rmsg = (struct netmsg_hashlookup *)msg; 3358 3359 if (rmsg->nm_af == AF_INET) 3360 *rmsg->nm_pinp = in_pcblookup_hash(rmsg->nm_pcbinfo, 3361 rmsg->nm_saddr->v4, rmsg->nm_sport, rmsg->nm_daddr->v4, 3362 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL); 3363 #ifdef INET6 3364 else 3365 *rmsg->nm_pinp = in6_pcblookup_hash(rmsg->nm_pcbinfo, 3366 &rmsg->nm_saddr->v6, rmsg->nm_sport, &rmsg->nm_daddr->v6, 3367 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL); 3368 #endif /* INET6 */ 3369 lwkt_replymsg(&rmsg->base.lmsg, 0); 3370 } 3371 #endif /* PF_SOCKET_LOOKUP_DOMSG */ 3372 3373 int 3374 pf_socket_lookup(int direction, struct pf_pdesc *pd) 3375 { 3376 struct pf_addr *saddr, *daddr; 3377 u_int16_t sport, dport; 3378 struct inpcbinfo *pi; 3379 struct inpcb *inp; 3380 struct netmsg_hashlookup *msg = NULL; 3381 #ifdef PF_SOCKET_LOOKUP_DOMSG 3382 struct netmsg_hashlookup msg0; 3383 #endif 3384 int pi_cpu = 0; 3385 3386 if (pd == NULL) 3387 return (-1); 3388 pd->lookup.uid = UID_MAX; 3389 pd->lookup.gid = GID_MAX; 3390 pd->lookup.pid = NO_PID; 3391 if (direction == PF_IN) { 3392 saddr = pd->src; 3393 daddr = pd->dst; 3394 } else { 3395 saddr = pd->dst; 3396 daddr = pd->src; 3397 } 3398 switch (pd->proto) { 3399 case IPPROTO_TCP: 3400 if (pd->hdr.tcp == NULL) 3401 return (-1); 3402 sport = pd->hdr.tcp->th_sport; 3403 dport = pd->hdr.tcp->th_dport; 3404 3405 pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport); 3406 pi = &tcbinfo[pi_cpu]; 3407 /* 3408 * Our netstack runs lockless on MP systems 3409 * (only for TCP connections at the moment). 3410 * 3411 * As we are not allowed to read another CPU's tcbinfo, 3412 * we have to ask that CPU via remote call to search the 3413 * table for us. 3414 * 3415 * Prepare a msg iff data belongs to another CPU. 3416 */ 3417 if (pi_cpu != mycpu->gd_cpuid) { 3418 #ifdef PF_SOCKET_LOOKUP_DOMSG 3419 /* 3420 * NOTE: 3421 * 3422 * Following lwkt_domsg() is dangerous and could 3423 * lockup the network system, e.g. 3424 * 3425 * On 2 CPU system: 3426 * netisr0 domsg to netisr1 (due to lookup) 3427 * netisr1 domsg to netisr0 (due to lookup) 3428 * 3429 * We simply return -1 here, since we are probably 3430 * called before NAT, so the TCP packet should 3431 * already be on the correct CPU. 3432 */ 3433 msg = &msg0; 3434 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 3435 0, in_pcblookup_hash_handler); 3436 msg->nm_pinp = &inp; 3437 msg->nm_pcbinfo = pi; 3438 msg->nm_saddr = saddr; 3439 msg->nm_sport = sport; 3440 msg->nm_daddr = daddr; 3441 msg->nm_dport = dport; 3442 msg->nm_af = pd->af; 3443 #else /* !PF_SOCKET_LOOKUP_DOMSG */ 3444 kprintf("pf_socket_lookup: tcp packet not on the " 3445 "correct cpu %d, cur cpu %d\n", 3446 pi_cpu, mycpuid); 3447 print_backtrace(-1); 3448 return -1; 3449 #endif /* PF_SOCKET_LOOKUP_DOMSG */ 3450 } 3451 break; 3452 case IPPROTO_UDP: 3453 if (pd->hdr.udp == NULL) 3454 return (-1); 3455 sport = pd->hdr.udp->uh_sport; 3456 dport = pd->hdr.udp->uh_dport; 3457 pi = &udbinfo[mycpuid]; 3458 break; 3459 default: 3460 return (-1); 3461 } 3462 if (direction != PF_IN) { 3463 u_int16_t p; 3464 3465 p = sport; 3466 sport = dport; 3467 dport = p; 3468 } 3469 switch (pd->af) { 3470 #ifdef INET6 3471 case AF_INET6: 3472 /* 3473 * Query other CPU, second part 3474 * 3475 * msg only gets initialized when: 3476 * 1) packet is TCP 3477 * 2) the info belongs to another CPU 3478 * 3479 * Use some switch/case magic to avoid code duplication. 3480 */ 3481 if (msg == NULL) { 3482 inp = in6_pcblookup_hash(pi, &saddr->v6, sport, 3483 &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL); 3484 3485 if (inp == NULL) 3486 return (-1); 3487 break; 3488 } 3489 /* FALLTHROUGH if SMP and on other CPU */ 3490 #endif /* INET6 */ 3491 case AF_INET: 3492 if (msg != NULL) { 3493 lwkt_domsg(netisr_cpuport(pi_cpu), 3494 &msg->base.lmsg, 0); 3495 } else 3496 { 3497 inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, 3498 dport, INPLOOKUP_WILDCARD, NULL); 3499 } 3500 if (inp == NULL) 3501 return (-1); 3502 break; 3503 3504 default: 3505 return (-1); 3506 } 3507 pd->lookup.uid = inp->inp_socket->so_cred->cr_uid; 3508 pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0]; 3509 return (1); 3510 } 3511 3512 u_int8_t 3513 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 3514 { 3515 int hlen; 3516 u_int8_t hdr[60]; 3517 u_int8_t *opt, optlen; 3518 u_int8_t wscale = 0; 3519 3520 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 3521 if (hlen <= sizeof(struct tcphdr)) 3522 return (0); 3523 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 3524 return (0); 3525 opt = hdr + sizeof(struct tcphdr); 3526 hlen -= sizeof(struct tcphdr); 3527 while (hlen >= 3) { 3528 switch (*opt) { 3529 case TCPOPT_EOL: 3530 case TCPOPT_NOP: 3531 ++opt; 3532 --hlen; 3533 break; 3534 case TCPOPT_WINDOW: 3535 wscale = opt[2]; 3536 if (wscale > TCP_MAX_WINSHIFT) 3537 wscale = TCP_MAX_WINSHIFT; 3538 wscale |= PF_WSCALE_FLAG; 3539 /* FALLTHROUGH */ 3540 default: 3541 optlen = opt[1]; 3542 if (optlen < 2) 3543 optlen = 2; 3544 hlen -= optlen; 3545 opt += optlen; 3546 break; 3547 } 3548 } 3549 return (wscale); 3550 } 3551 3552 u_int16_t 3553 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 3554 { 3555 int hlen; 3556 u_int8_t hdr[60]; 3557 u_int8_t *opt, optlen; 3558 u_int16_t mss = tcp_mssdflt; 3559 3560 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 3561 if (hlen <= sizeof(struct tcphdr)) 3562 return (0); 3563 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 3564 return (0); 3565 opt = hdr + sizeof(struct tcphdr); 3566 hlen -= sizeof(struct tcphdr); 3567 while (hlen >= TCPOLEN_MAXSEG) { 3568 switch (*opt) { 3569 case TCPOPT_EOL: 3570 case TCPOPT_NOP: 3571 ++opt; 3572 --hlen; 3573 break; 3574 case TCPOPT_MAXSEG: 3575 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); 3576 /* FALLTHROUGH */ 3577 default: 3578 optlen = opt[1]; 3579 if (optlen < 2) 3580 optlen = 2; 3581 hlen -= optlen; 3582 opt += optlen; 3583 break; 3584 } 3585 } 3586 return (mss); 3587 } 3588 3589 u_int16_t 3590 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) 3591 { 3592 #ifdef INET 3593 struct sockaddr_in *dst; 3594 struct route ro; 3595 #endif /* INET */ 3596 #ifdef INET6 3597 struct sockaddr_in6 *dst6; 3598 struct route_in6 ro6; 3599 #endif /* INET6 */ 3600 struct rtentry *rt = NULL; 3601 int hlen = 0; 3602 u_int16_t mss = tcp_mssdflt; 3603 3604 switch (af) { 3605 #ifdef INET 3606 case AF_INET: 3607 hlen = sizeof(struct ip); 3608 bzero(&ro, sizeof(ro)); 3609 dst = (struct sockaddr_in *)&ro.ro_dst; 3610 dst->sin_family = AF_INET; 3611 dst->sin_len = sizeof(*dst); 3612 dst->sin_addr = addr->v4; 3613 rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING)); 3614 rt = ro.ro_rt; 3615 break; 3616 #endif /* INET */ 3617 #ifdef INET6 3618 case AF_INET6: 3619 hlen = sizeof(struct ip6_hdr); 3620 bzero(&ro6, sizeof(ro6)); 3621 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; 3622 dst6->sin6_family = AF_INET6; 3623 dst6->sin6_len = sizeof(*dst6); 3624 dst6->sin6_addr = addr->v6; 3625 rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING)); 3626 rt = ro6.ro_rt; 3627 break; 3628 #endif /* INET6 */ 3629 } 3630 3631 if (rt && rt->rt_ifp) { 3632 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); 3633 mss = max(tcp_mssdflt, mss); 3634 RTFREE(rt); 3635 } 3636 mss = min(mss, offer); 3637 mss = max(mss, 64); /* sanity - at least max opt space */ 3638 return (mss); 3639 } 3640 3641 void 3642 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) 3643 { 3644 struct pf_rule *r = s->rule.ptr; 3645 3646 s->rt_kif = NULL; 3647 if (!r->rt || r->rt == PF_FASTROUTE) 3648 return; 3649 switch (s->key[PF_SK_WIRE]->af) { 3650 #ifdef INET 3651 case AF_INET: 3652 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, 3653 &s->nat_src_node); 3654 s->rt_kif = r->rpool.cur->kif; 3655 break; 3656 #endif /* INET */ 3657 #ifdef INET6 3658 case AF_INET6: 3659 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, 3660 &s->nat_src_node); 3661 s->rt_kif = r->rpool.cur->kif; 3662 break; 3663 #endif /* INET6 */ 3664 } 3665 } 3666 3667 u_int32_t 3668 pf_tcp_iss(struct pf_pdesc *pd) 3669 { 3670 MD5_CTX ctx; 3671 u_int32_t digest[4]; 3672 3673 if (pf_tcp_secret_init == 0) { 3674 lwkt_gettoken(&pf_gtoken); 3675 if (pf_tcp_secret_init == 0) { 3676 karc4rand(pf_tcp_secret, sizeof(pf_tcp_secret)); 3677 MD5Init(&pf_tcp_secret_ctx); 3678 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3679 sizeof(pf_tcp_secret)); 3680 pf_tcp_secret_init = 1; 3681 } 3682 lwkt_reltoken(&pf_gtoken); 3683 } 3684 ctx = pf_tcp_secret_ctx; 3685 3686 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); 3687 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); 3688 if (pd->af == AF_INET6) { 3689 MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr)); 3690 MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr)); 3691 } else { 3692 MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr)); 3693 MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); 3694 } 3695 MD5Final((u_char *)digest, &ctx); 3696 pf_tcp_iss_off += 4096; 3697 3698 return (digest[0] + pd->hdr.tcp->th_seq + pf_tcp_iss_off); 3699 } 3700 3701 int 3702 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, 3703 struct pfi_kif *kif, struct mbuf *m, int off, void *h, 3704 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, 3705 struct ifqueue *ifq, struct inpcb *inp) 3706 { 3707 struct pf_rule *nr = NULL; 3708 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 3709 sa_family_t af = pd->af; 3710 struct pf_rule *r, *a = NULL; 3711 struct pf_ruleset *ruleset = NULL; 3712 struct pf_src_node *nsn = NULL; 3713 struct tcphdr *th = pd->hdr.tcp; 3714 struct pf_state_key *skw = NULL, *sks = NULL; 3715 struct pf_state_key *sk = NULL, *nk = NULL; 3716 u_short reason; 3717 int rewrite = 0, hdrlen = 0; 3718 int tag = -1, rtableid = -1; 3719 int asd = 0; 3720 int match = 0; 3721 int state_icmp = 0; 3722 u_int16_t sport = 0, dport = 0; 3723 u_int16_t bproto_sum = 0, bip_sum = 0; 3724 u_int8_t icmptype = 0, icmpcode = 0; 3725 3726 3727 if (direction == PF_IN && pf_check_congestion(ifq)) { 3728 REASON_SET(&reason, PFRES_CONGEST); 3729 return (PF_DROP); 3730 } 3731 3732 if (inp != NULL) 3733 pd->lookup.done = pf_socket_lookup(direction, pd); 3734 else if (debug_pfugidhack) { 3735 DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); 3736 pd->lookup.done = pf_socket_lookup(direction, pd); 3737 } 3738 3739 switch (pd->proto) { 3740 case IPPROTO_TCP: 3741 sport = th->th_sport; 3742 dport = th->th_dport; 3743 hdrlen = sizeof(*th); 3744 break; 3745 case IPPROTO_UDP: 3746 sport = pd->hdr.udp->uh_sport; 3747 dport = pd->hdr.udp->uh_dport; 3748 hdrlen = sizeof(*pd->hdr.udp); 3749 break; 3750 #ifdef INET 3751 case IPPROTO_ICMP: 3752 if (pd->af != AF_INET) 3753 break; 3754 sport = dport = pd->hdr.icmp->icmp_id; 3755 hdrlen = sizeof(*pd->hdr.icmp); 3756 icmptype = pd->hdr.icmp->icmp_type; 3757 icmpcode = pd->hdr.icmp->icmp_code; 3758 3759 if (icmptype == ICMP_UNREACH || 3760 icmptype == ICMP_SOURCEQUENCH || 3761 icmptype == ICMP_REDIRECT || 3762 icmptype == ICMP_TIMXCEED || 3763 icmptype == ICMP_PARAMPROB) 3764 state_icmp++; 3765 break; 3766 #endif /* INET */ 3767 #ifdef INET6 3768 case IPPROTO_ICMPV6: 3769 if (af != AF_INET6) 3770 break; 3771 sport = dport = pd->hdr.icmp6->icmp6_id; 3772 hdrlen = sizeof(*pd->hdr.icmp6); 3773 icmptype = pd->hdr.icmp6->icmp6_type; 3774 icmpcode = pd->hdr.icmp6->icmp6_code; 3775 3776 if (icmptype == ICMP6_DST_UNREACH || 3777 icmptype == ICMP6_PACKET_TOO_BIG || 3778 icmptype == ICMP6_TIME_EXCEEDED || 3779 icmptype == ICMP6_PARAM_PROB) 3780 state_icmp++; 3781 break; 3782 #endif /* INET6 */ 3783 default: 3784 sport = dport = hdrlen = 0; 3785 break; 3786 } 3787 3788 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); 3789 3790 /* check packet for BINAT/NAT/RDR */ 3791 if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, 3792 &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) { 3793 if (nk == NULL || sk == NULL) { 3794 REASON_SET(&reason, PFRES_MEMORY); 3795 goto cleanup; 3796 } 3797 3798 if (pd->ip_sum) 3799 bip_sum = *pd->ip_sum; 3800 3801 m->m_flags &= ~M_HASH; 3802 switch (pd->proto) { 3803 case IPPROTO_TCP: 3804 bproto_sum = th->th_sum; 3805 pd->proto_sum = &th->th_sum; 3806 3807 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || 3808 nk->port[pd->sidx] != sport) { 3809 pf_change_ap(saddr, &th->th_sport, pd->ip_sum, 3810 &th->th_sum, &nk->addr[pd->sidx], 3811 nk->port[pd->sidx], 0, af); 3812 pd->sport = &th->th_sport; 3813 sport = th->th_sport; 3814 } 3815 3816 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || 3817 nk->port[pd->didx] != dport) { 3818 pf_change_ap(daddr, &th->th_dport, pd->ip_sum, 3819 &th->th_sum, &nk->addr[pd->didx], 3820 nk->port[pd->didx], 0, af); 3821 dport = th->th_dport; 3822 pd->dport = &th->th_dport; 3823 } 3824 rewrite++; 3825 break; 3826 case IPPROTO_UDP: 3827 bproto_sum = pd->hdr.udp->uh_sum; 3828 pd->proto_sum = &pd->hdr.udp->uh_sum; 3829 3830 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || 3831 nk->port[pd->sidx] != sport) { 3832 pf_change_ap(saddr, &pd->hdr.udp->uh_sport, 3833 pd->ip_sum, &pd->hdr.udp->uh_sum, 3834 &nk->addr[pd->sidx], 3835 nk->port[pd->sidx], 1, af); 3836 sport = pd->hdr.udp->uh_sport; 3837 pd->sport = &pd->hdr.udp->uh_sport; 3838 } 3839 3840 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || 3841 nk->port[pd->didx] != dport) { 3842 pf_change_ap(daddr, &pd->hdr.udp->uh_dport, 3843 pd->ip_sum, &pd->hdr.udp->uh_sum, 3844 &nk->addr[pd->didx], 3845 nk->port[pd->didx], 1, af); 3846 dport = pd->hdr.udp->uh_dport; 3847 pd->dport = &pd->hdr.udp->uh_dport; 3848 } 3849 rewrite++; 3850 break; 3851 #ifdef INET 3852 case IPPROTO_ICMP: 3853 nk->port[0] = nk->port[1]; 3854 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) 3855 pf_change_a(&saddr->v4.s_addr, pd->ip_sum, 3856 nk->addr[pd->sidx].v4.s_addr, 0); 3857 3858 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) 3859 pf_change_a(&daddr->v4.s_addr, pd->ip_sum, 3860 nk->addr[pd->didx].v4.s_addr, 0); 3861 3862 if (nk->port[1] != pd->hdr.icmp->icmp_id) { 3863 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( 3864 pd->hdr.icmp->icmp_cksum, sport, 3865 nk->port[1], 0); 3866 pd->hdr.icmp->icmp_id = nk->port[1]; 3867 pd->sport = &pd->hdr.icmp->icmp_id; 3868 } 3869 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); 3870 break; 3871 #endif /* INET */ 3872 #ifdef INET6 3873 case IPPROTO_ICMPV6: 3874 nk->port[0] = nk->port[1]; 3875 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) 3876 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, 3877 &nk->addr[pd->sidx], 0); 3878 3879 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) 3880 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, 3881 &nk->addr[pd->didx], 0); 3882 rewrite++; 3883 break; 3884 #endif /* INET */ 3885 default: 3886 switch (af) { 3887 #ifdef INET 3888 case AF_INET: 3889 if (PF_ANEQ(saddr, 3890 &nk->addr[pd->sidx], AF_INET)) 3891 pf_change_a(&saddr->v4.s_addr, 3892 pd->ip_sum, 3893 nk->addr[pd->sidx].v4.s_addr, 0); 3894 3895 if (PF_ANEQ(daddr, 3896 &nk->addr[pd->didx], AF_INET)) 3897 pf_change_a(&daddr->v4.s_addr, 3898 pd->ip_sum, 3899 nk->addr[pd->didx].v4.s_addr, 0); 3900 break; 3901 #endif /* INET */ 3902 #ifdef INET6 3903 case AF_INET6: 3904 if (PF_ANEQ(saddr, 3905 &nk->addr[pd->sidx], AF_INET6)) 3906 PF_ACPY(saddr, &nk->addr[pd->sidx], af); 3907 3908 if (PF_ANEQ(daddr, 3909 &nk->addr[pd->didx], AF_INET6)) 3910 PF_ACPY(saddr, &nk->addr[pd->didx], af); 3911 break; 3912 #endif /* INET */ 3913 } 3914 break; 3915 } 3916 if (nr->natpass) 3917 r = NULL; 3918 pd->nat_rule = nr; 3919 } 3920 3921 while (r != NULL) { 3922 r->evaluations++; 3923 if (pfi_kif_match(r->kif, kif) == r->ifnot) 3924 r = r->skip[PF_SKIP_IFP].ptr; 3925 else if (r->direction && r->direction != direction) 3926 r = r->skip[PF_SKIP_DIR].ptr; 3927 else if (r->af && r->af != af) 3928 r = r->skip[PF_SKIP_AF].ptr; 3929 else if (r->proto && r->proto != pd->proto) 3930 r = r->skip[PF_SKIP_PROTO].ptr; 3931 else if (PF_MISMATCHAW(&r->src.addr, saddr, af, 3932 r->src.neg, kif)) 3933 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 3934 /* tcp/udp only. port_op always 0 in other cases */ 3935 else if (r->src.port_op && !pf_match_port(r->src.port_op, 3936 r->src.port[0], r->src.port[1], sport)) 3937 r = r->skip[PF_SKIP_SRC_PORT].ptr; 3938 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, 3939 r->dst.neg, NULL)) 3940 r = r->skip[PF_SKIP_DST_ADDR].ptr; 3941 /* tcp/udp only. port_op always 0 in other cases */ 3942 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 3943 r->dst.port[0], r->dst.port[1], dport)) 3944 r = r->skip[PF_SKIP_DST_PORT].ptr; 3945 /* icmp only. type always 0 in other cases */ 3946 else if (r->type && r->type != icmptype + 1) 3947 r = TAILQ_NEXT(r, entries); 3948 /* icmp only. type always 0 in other cases */ 3949 else if (r->code && r->code != icmpcode + 1) 3950 r = TAILQ_NEXT(r, entries); 3951 else if (r->tos && !(r->tos == pd->tos)) 3952 r = TAILQ_NEXT(r, entries); 3953 else if (r->rule_flag & PFRULE_FRAGMENT) 3954 r = TAILQ_NEXT(r, entries); 3955 else if (pd->proto == IPPROTO_TCP && 3956 (r->flagset & th->th_flags) != r->flags) 3957 r = TAILQ_NEXT(r, entries); 3958 /* tcp/udp only. uid.op always 0 in other cases */ 3959 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = 3960 pf_socket_lookup(direction, pd), 1)) && 3961 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], 3962 pd->lookup.uid)) 3963 r = TAILQ_NEXT(r, entries); 3964 /* tcp/udp only. gid.op always 0 in other cases */ 3965 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = 3966 pf_socket_lookup(direction, pd), 1)) && 3967 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], 3968 pd->lookup.gid)) 3969 r = TAILQ_NEXT(r, entries); 3970 else if (r->prob && 3971 r->prob <= karc4random()) 3972 r = TAILQ_NEXT(r, entries); 3973 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 3974 r = TAILQ_NEXT(r, entries); 3975 else if (r->os_fingerprint != PF_OSFP_ANY && 3976 (pd->proto != IPPROTO_TCP || !pf_osfp_match( 3977 pf_osfp_fingerprint(pd, m, off, th), 3978 r->os_fingerprint))) 3979 r = TAILQ_NEXT(r, entries); 3980 else { 3981 if (r->tag) 3982 tag = r->tag; 3983 if (r->rtableid >= 0) 3984 rtableid = r->rtableid; 3985 if (r->anchor == NULL) { 3986 match = 1; 3987 *rm = r; 3988 *am = a; 3989 *rsm = ruleset; 3990 if ((*rm)->quick) 3991 break; 3992 r = TAILQ_NEXT(r, entries); 3993 } else 3994 pf_step_into_anchor(&asd, &ruleset, 3995 PF_RULESET_FILTER, &r, &a, &match); 3996 } 3997 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 3998 PF_RULESET_FILTER, &r, &a, &match)) 3999 break; 4000 } 4001 r = *rm; 4002 a = *am; 4003 ruleset = *rsm; 4004 4005 REASON_SET(&reason, PFRES_MATCH); 4006 4007 if (r->log || (nr != NULL && nr->log)) { 4008 if (rewrite) 4009 m_copyback(m, off, hdrlen, pd->hdr.any); 4010 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, 4011 a, ruleset, pd); 4012 } 4013 4014 if ((r->action == PF_DROP) && 4015 ((r->rule_flag & PFRULE_RETURNRST) || 4016 (r->rule_flag & PFRULE_RETURNICMP) || 4017 (r->rule_flag & PFRULE_RETURN))) { 4018 /* undo NAT changes, if they have taken place */ 4019 if (nr != NULL) { 4020 PF_ACPY(saddr, &sk->addr[pd->sidx], af); 4021 PF_ACPY(daddr, &sk->addr[pd->didx], af); 4022 if (pd->sport) 4023 *pd->sport = sk->port[pd->sidx]; 4024 if (pd->dport) 4025 *pd->dport = sk->port[pd->didx]; 4026 if (pd->proto_sum) 4027 *pd->proto_sum = bproto_sum; 4028 if (pd->ip_sum) 4029 *pd->ip_sum = bip_sum; 4030 m_copyback(m, off, hdrlen, pd->hdr.any); 4031 } 4032 if (pd->proto == IPPROTO_TCP && 4033 ((r->rule_flag & PFRULE_RETURNRST) || 4034 (r->rule_flag & PFRULE_RETURN)) && 4035 !(th->th_flags & TH_RST)) { 4036 u_int32_t ack = ntohl(th->th_seq) + pd->p_len; 4037 int len = 0; 4038 struct ip *h4; 4039 #ifdef INET6 4040 struct ip6_hdr *h6; 4041 #endif 4042 switch (af) { 4043 case AF_INET: 4044 h4 = mtod(m, struct ip *); 4045 len = h4->ip_len - off; 4046 break; 4047 #ifdef INET6 4048 case AF_INET6: 4049 h6 = mtod(m, struct ip6_hdr *); 4050 len = h6->ip6_plen - (off - sizeof(*h6)); 4051 break; 4052 #endif 4053 } 4054 4055 if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) 4056 REASON_SET(&reason, PFRES_PROTCKSUM); 4057 else { 4058 if (th->th_flags & TH_SYN) 4059 ack++; 4060 if (th->th_flags & TH_FIN) 4061 ack++; 4062 pf_send_tcp(r, af, pd->dst, 4063 pd->src, th->th_dport, th->th_sport, 4064 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, 4065 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); 4066 } 4067 } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && 4068 r->return_icmp) 4069 pf_send_icmp(m, r->return_icmp >> 8, 4070 r->return_icmp & 255, af, r); 4071 else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 && 4072 r->return_icmp6) 4073 pf_send_icmp(m, r->return_icmp6 >> 8, 4074 r->return_icmp6 & 255, af, r); 4075 } 4076 4077 if (r->action == PF_DROP) 4078 goto cleanup; 4079 4080 if (pf_tag_packet(m, tag, rtableid)) { 4081 REASON_SET(&reason, PFRES_MEMORY); 4082 goto cleanup; 4083 } 4084 4085 if (!state_icmp && (r->keep_state || nr != NULL || 4086 (pd->flags & PFDESC_TCP_NORM))) { 4087 int action; 4088 action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m, 4089 off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, 4090 bip_sum, hdrlen); 4091 if (action != PF_PASS) 4092 return (action); 4093 } 4094 4095 /* copy back packet headers if we performed NAT operations */ 4096 if (rewrite) 4097 m_copyback(m, off, hdrlen, pd->hdr.any); 4098 4099 return (PF_PASS); 4100 4101 cleanup: 4102 if (sk != NULL) 4103 kfree(sk, M_PFSTATEKEYPL); 4104 if (nk != NULL) 4105 kfree(nk, M_PFSTATEKEYPL); 4106 return (PF_DROP); 4107 } 4108 4109 static __inline int 4110 pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, 4111 struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw, 4112 struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk, 4113 struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, 4114 struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, 4115 u_int16_t bip_sum, int hdrlen) 4116 { 4117 struct pf_state *s = NULL; 4118 struct pf_src_node *sn = NULL; 4119 struct tcphdr *th = pd->hdr.tcp; 4120 u_int16_t mss = tcp_mssdflt; 4121 u_short reason; 4122 int cpu = mycpu->gd_cpuid; 4123 4124 /* check maximums */ 4125 if (r->max_states && (r->states_cur >= r->max_states)) { 4126 PF_INC_LCOUNTER(LCNT_STATES); 4127 REASON_SET(&reason, PFRES_MAXSTATES); 4128 return (PF_DROP); 4129 } 4130 /* src node for filter rule */ 4131 if ((r->rule_flag & PFRULE_SRCTRACK || 4132 r->rpool.opts & PF_POOL_STICKYADDR) && 4133 pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) { 4134 REASON_SET(&reason, PFRES_SRCLIMIT); 4135 goto csfailed; 4136 } 4137 /* src node for translation rule */ 4138 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && 4139 pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) { 4140 REASON_SET(&reason, PFRES_SRCLIMIT); 4141 goto csfailed; 4142 } 4143 s = kmalloc(sizeof(struct pf_state), M_PFSTATEPL, M_NOWAIT|M_ZERO); 4144 if (s == NULL) { 4145 REASON_SET(&reason, PFRES_MEMORY); 4146 goto csfailed; 4147 } 4148 lockinit(&s->lk, "pfstlk", 0, 0); 4149 s->id = 0; /* XXX Do we really need that? not in OpenBSD */ 4150 s->creatorid = 0; 4151 s->rule.ptr = r; 4152 s->nat_rule.ptr = nr; 4153 s->anchor.ptr = a; 4154 s->state_flags = PFSTATE_CREATEINPROG; 4155 STATE_INC_COUNTERS(s); 4156 if (r->allow_opts) 4157 s->state_flags |= PFSTATE_ALLOWOPTS; 4158 if (r->rule_flag & PFRULE_STATESLOPPY) 4159 s->state_flags |= PFSTATE_SLOPPY; 4160 if (pd->not_cpu_localized) 4161 s->state_flags |= PFSTATE_STACK_GLOBAL; 4162 4163 s->log = r->log & PF_LOG_ALL; 4164 if (nr != NULL) 4165 s->log |= nr->log & PF_LOG_ALL; 4166 switch (pd->proto) { 4167 case IPPROTO_TCP: 4168 s->src.seqlo = ntohl(th->th_seq); 4169 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 4170 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4171 r->keep_state == PF_STATE_MODULATE) { 4172 /* Generate sequence number modulator */ 4173 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 4174 0) 4175 s->src.seqdiff = 1; 4176 pf_change_a(&th->th_seq, &th->th_sum, 4177 htonl(s->src.seqlo + s->src.seqdiff), 0); 4178 *rewrite = 1; 4179 } else 4180 s->src.seqdiff = 0; 4181 if (th->th_flags & TH_SYN) { 4182 s->src.seqhi++; 4183 s->src.wscale = pf_get_wscale(m, off, 4184 th->th_off, pd->af); 4185 } 4186 s->src.max_win = MAX(ntohs(th->th_win), 1); 4187 if (s->src.wscale & PF_WSCALE_MASK) { 4188 /* Remove scale factor from initial window */ 4189 int win = s->src.max_win; 4190 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 4191 s->src.max_win = (win - 1) >> 4192 (s->src.wscale & PF_WSCALE_MASK); 4193 } 4194 if (th->th_flags & TH_FIN) 4195 s->src.seqhi++; 4196 s->dst.seqhi = 1; 4197 s->dst.max_win = 1; 4198 s->src.state = TCPS_SYN_SENT; 4199 s->dst.state = TCPS_CLOSED; 4200 s->timeout = PFTM_TCP_FIRST_PACKET; 4201 break; 4202 case IPPROTO_UDP: 4203 s->src.state = PFUDPS_SINGLE; 4204 s->dst.state = PFUDPS_NO_TRAFFIC; 4205 s->timeout = PFTM_UDP_FIRST_PACKET; 4206 break; 4207 case IPPROTO_ICMP: 4208 #ifdef INET6 4209 case IPPROTO_ICMPV6: 4210 #endif 4211 s->timeout = PFTM_ICMP_FIRST_PACKET; 4212 break; 4213 default: 4214 s->src.state = PFOTHERS_SINGLE; 4215 s->dst.state = PFOTHERS_NO_TRAFFIC; 4216 s->timeout = PFTM_OTHER_FIRST_PACKET; 4217 } 4218 4219 s->creation = time_second; 4220 s->expire = time_second; 4221 4222 if (sn != NULL) { 4223 s->src_node = sn; 4224 s->src_node->states++; 4225 } 4226 if (nsn != NULL) { 4227 /* XXX We only modify one side for now. */ 4228 PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); 4229 s->nat_src_node = nsn; 4230 s->nat_src_node->states++; 4231 } 4232 if (pd->proto == IPPROTO_TCP) { 4233 if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, 4234 off, pd, th, &s->src, &s->dst)) { 4235 REASON_SET(&reason, PFRES_MEMORY); 4236 pf_src_tree_remove_state(s); 4237 STATE_DEC_COUNTERS(s); 4238 kfree(s, M_PFSTATEPL); 4239 return (PF_DROP); 4240 } 4241 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && 4242 pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, 4243 &s->src, &s->dst, rewrite)) { 4244 /* This really shouldn't happen!!! */ 4245 DPFPRINTF(PF_DEBUG_URGENT, 4246 ("pf_normalize_tcp_stateful failed on first pkt")); 4247 pf_normalize_tcp_cleanup(s); 4248 pf_src_tree_remove_state(s); 4249 STATE_DEC_COUNTERS(s); 4250 kfree(s, M_PFSTATEPL); 4251 return (PF_DROP); 4252 } 4253 } 4254 s->direction = pd->dir; 4255 4256 if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk, 4257 pd->src, pd->dst, sport, dport)) { 4258 REASON_SET(&reason, PFRES_MEMORY); 4259 goto csfailed; 4260 } 4261 4262 if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) { 4263 if (pd->proto == IPPROTO_TCP) 4264 pf_normalize_tcp_cleanup(s); 4265 REASON_SET(&reason, PFRES_STATEINS); 4266 pf_src_tree_remove_state(s); 4267 STATE_DEC_COUNTERS(s); 4268 kfree(s, M_PFSTATEPL); 4269 return (PF_DROP); 4270 } else 4271 *sm = s; 4272 4273 pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ 4274 if (tag > 0) { 4275 pf_tag_ref(tag); 4276 s->tag = tag; 4277 } 4278 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4279 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 4280 s->src.state = PF_TCPS_PROXY_SRC; 4281 /* undo NAT changes, if they have taken place */ 4282 if (nr != NULL) { 4283 struct pf_state_key *skt = s->key[PF_SK_WIRE]; 4284 if (pd->dir == PF_OUT) 4285 skt = s->key[PF_SK_STACK]; 4286 PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af); 4287 PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af); 4288 if (pd->sport) 4289 *pd->sport = skt->port[pd->sidx]; 4290 if (pd->dport) 4291 *pd->dport = skt->port[pd->didx]; 4292 if (pd->proto_sum) 4293 *pd->proto_sum = bproto_sum; 4294 if (pd->ip_sum) 4295 *pd->ip_sum = bip_sum; 4296 m->m_flags &= ~M_HASH; 4297 m_copyback(m, off, hdrlen, pd->hdr.any); 4298 } 4299 s->src.seqhi = htonl(karc4random()); 4300 /* Find mss option */ 4301 mss = pf_get_mss(m, off, th->th_off, pd->af); 4302 mss = pf_calc_mss(pd->src, pd->af, mss); 4303 mss = pf_calc_mss(pd->dst, pd->af, mss); 4304 s->src.mss = mss; 4305 s->state_flags &= ~PFSTATE_CREATEINPROG; 4306 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4307 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4308 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); 4309 REASON_SET(&reason, PFRES_SYNPROXY); 4310 return (PF_SYNPROXY_DROP); 4311 } 4312 4313 s->state_flags &= ~PFSTATE_CREATEINPROG; 4314 return (PF_PASS); 4315 4316 csfailed: 4317 if (sk != NULL) 4318 kfree(sk, M_PFSTATEKEYPL); 4319 if (nk != NULL) 4320 kfree(nk, M_PFSTATEKEYPL); 4321 4322 if (sn != NULL && sn->states == 0 && sn->expire == 0) { 4323 RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], sn); 4324 PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS); 4325 atomic_add_int(&pf_status.src_nodes, -1); 4326 kfree(sn, M_PFSRCTREEPL); 4327 } 4328 if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { 4329 RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], nsn); 4330 PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS); 4331 atomic_add_int(&pf_status.src_nodes, -1); 4332 kfree(nsn, M_PFSRCTREEPL); 4333 } 4334 if (s) { 4335 pf_src_tree_remove_state(s); 4336 STATE_DEC_COUNTERS(s); 4337 kfree(s, M_PFSTATEPL); 4338 } 4339 4340 return (PF_DROP); 4341 } 4342 4343 int 4344 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, 4345 struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, 4346 struct pf_ruleset **rsm) 4347 { 4348 struct pf_rule *r, *a = NULL; 4349 struct pf_ruleset *ruleset = NULL; 4350 sa_family_t af = pd->af; 4351 u_short reason; 4352 int tag = -1; 4353 int asd = 0; 4354 int match = 0; 4355 4356 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); 4357 while (r != NULL) { 4358 r->evaluations++; 4359 if (pfi_kif_match(r->kif, kif) == r->ifnot) 4360 r = r->skip[PF_SKIP_IFP].ptr; 4361 else if (r->direction && r->direction != direction) 4362 r = r->skip[PF_SKIP_DIR].ptr; 4363 else if (r->af && r->af != af) 4364 r = r->skip[PF_SKIP_AF].ptr; 4365 else if (r->proto && r->proto != pd->proto) 4366 r = r->skip[PF_SKIP_PROTO].ptr; 4367 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 4368 r->src.neg, kif)) 4369 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 4370 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 4371 r->dst.neg, NULL)) 4372 r = r->skip[PF_SKIP_DST_ADDR].ptr; 4373 else if (r->tos && !(r->tos == pd->tos)) 4374 r = TAILQ_NEXT(r, entries); 4375 else if (r->os_fingerprint != PF_OSFP_ANY) 4376 r = TAILQ_NEXT(r, entries); 4377 else if (pd->proto == IPPROTO_UDP && 4378 (r->src.port_op || r->dst.port_op)) 4379 r = TAILQ_NEXT(r, entries); 4380 else if (pd->proto == IPPROTO_TCP && 4381 (r->src.port_op || r->dst.port_op || r->flagset)) 4382 r = TAILQ_NEXT(r, entries); 4383 else if ((pd->proto == IPPROTO_ICMP || 4384 pd->proto == IPPROTO_ICMPV6) && 4385 (r->type || r->code)) 4386 r = TAILQ_NEXT(r, entries); 4387 else if (r->prob && r->prob <= karc4random()) 4388 r = TAILQ_NEXT(r, entries); 4389 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 4390 r = TAILQ_NEXT(r, entries); 4391 else { 4392 if (r->anchor == NULL) { 4393 match = 1; 4394 *rm = r; 4395 *am = a; 4396 *rsm = ruleset; 4397 if ((*rm)->quick) 4398 break; 4399 r = TAILQ_NEXT(r, entries); 4400 } else 4401 pf_step_into_anchor(&asd, &ruleset, 4402 PF_RULESET_FILTER, &r, &a, &match); 4403 } 4404 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 4405 PF_RULESET_FILTER, &r, &a, &match)) 4406 break; 4407 } 4408 r = *rm; 4409 a = *am; 4410 ruleset = *rsm; 4411 4412 REASON_SET(&reason, PFRES_MATCH); 4413 4414 if (r->log) 4415 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, 4416 pd); 4417 4418 if (r->action != PF_PASS) 4419 return (PF_DROP); 4420 4421 if (pf_tag_packet(m, tag, -1)) { 4422 REASON_SET(&reason, PFRES_MEMORY); 4423 return (PF_DROP); 4424 } 4425 4426 return (PF_PASS); 4427 } 4428 4429 /* 4430 * Called with state locked 4431 */ 4432 int 4433 pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, 4434 struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, 4435 struct pf_pdesc *pd, u_short *reason, int *copyback) 4436 { 4437 struct tcphdr *th = pd->hdr.tcp; 4438 u_int16_t win = ntohs(th->th_win); 4439 u_int32_t ack, end, seq, orig_seq; 4440 u_int8_t sws, dws; 4441 int ackskew; 4442 4443 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4444 sws = src->wscale & PF_WSCALE_MASK; 4445 dws = dst->wscale & PF_WSCALE_MASK; 4446 } else { 4447 sws = dws = 0; 4448 } 4449 4450 /* 4451 * Sequence tracking algorithm from Guido van Rooij's paper: 4452 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4453 * tcp_filtering.ps 4454 */ 4455 4456 orig_seq = seq = ntohl(th->th_seq); 4457 if (src->seqlo == 0) { 4458 /* First packet from this end. Set its state */ 4459 4460 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && 4461 src->scrub == NULL) { 4462 if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { 4463 REASON_SET(reason, PFRES_MEMORY); 4464 return (PF_DROP); 4465 } 4466 } 4467 4468 /* Deferred generation of sequence number modulator */ 4469 if (dst->seqdiff && !src->seqdiff) { 4470 /* use random iss for the TCP server */ 4471 while ((src->seqdiff = karc4random() - seq) == 0) 4472 ; 4473 ack = ntohl(th->th_ack) - dst->seqdiff; 4474 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 4475 src->seqdiff), 0); 4476 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 4477 *copyback = 1; 4478 } else { 4479 ack = ntohl(th->th_ack); 4480 } 4481 4482 end = seq + pd->p_len; 4483 if (th->th_flags & TH_SYN) { 4484 end++; 4485 (*state)->sync_flags |= PFSTATE_GOT_SYN2; 4486 if (dst->wscale & PF_WSCALE_FLAG) { 4487 src->wscale = pf_get_wscale(m, off, th->th_off, 4488 pd->af); 4489 if (src->wscale & PF_WSCALE_FLAG) { 4490 /* Remove scale factor from initial 4491 * window */ 4492 sws = src->wscale & PF_WSCALE_MASK; 4493 win = ((u_int32_t)win + (1 << sws) - 1) 4494 >> sws; 4495 dws = dst->wscale & PF_WSCALE_MASK; 4496 } else { 4497 /* fixup other window */ 4498 dst->max_win <<= dst->wscale & 4499 PF_WSCALE_MASK; 4500 /* in case of a retrans SYN|ACK */ 4501 dst->wscale = 0; 4502 } 4503 } 4504 } 4505 if (th->th_flags & TH_FIN) 4506 end++; 4507 4508 src->seqlo = seq; 4509 if (src->state < TCPS_SYN_SENT) 4510 src->state = TCPS_SYN_SENT; 4511 4512 /* 4513 * May need to slide the window (seqhi may have been set by 4514 * the crappy stack check or if we picked up the connection 4515 * after establishment) 4516 */ 4517 if (src->seqhi == 1 || 4518 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4519 src->seqhi = end + MAX(1, dst->max_win << dws); 4520 if (win > src->max_win) 4521 src->max_win = win; 4522 4523 } else { 4524 ack = ntohl(th->th_ack) - dst->seqdiff; 4525 if (src->seqdiff) { 4526 /* Modulate sequence numbers */ 4527 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 4528 src->seqdiff), 0); 4529 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 4530 *copyback = 1; 4531 } 4532 end = seq + pd->p_len; 4533 if (th->th_flags & TH_SYN) 4534 end++; 4535 if (th->th_flags & TH_FIN) 4536 end++; 4537 } 4538 4539 if ((th->th_flags & TH_ACK) == 0) { 4540 /* Let it pass through the ack skew check */ 4541 ack = dst->seqlo; 4542 } else if ((ack == 0 && 4543 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4544 /* broken tcp stacks do not set ack */ 4545 (dst->state < TCPS_SYN_SENT)) { 4546 /* 4547 * Many stacks (ours included) will set the ACK number in an 4548 * FIN|ACK if the SYN times out -- no sequence to ACK. 4549 */ 4550 ack = dst->seqlo; 4551 } 4552 4553 if (seq == end) { 4554 /* Ease sequencing restrictions on no data packets */ 4555 seq = src->seqlo; 4556 end = seq; 4557 } 4558 4559 ackskew = dst->seqlo - ack; 4560 4561 4562 /* 4563 * Need to demodulate the sequence numbers in any TCP SACK options 4564 * (Selective ACK). We could optionally validate the SACK values 4565 * against the current ACK window, either forwards or backwards, but 4566 * I'm not confident that SACK has been implemented properly 4567 * everywhere. It wouldn't surprise me if several stacks accidently 4568 * SACK too far backwards of previously ACKed data. There really aren't 4569 * any security implications of bad SACKing unless the target stack 4570 * doesn't validate the option length correctly. Someone trying to 4571 * spoof into a TCP connection won't bother blindly sending SACK 4572 * options anyway. 4573 */ 4574 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4575 if (pf_modulate_sack(m, off, pd, th, dst)) 4576 *copyback = 1; 4577 } 4578 4579 4580 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4581 if (SEQ_GEQ(src->seqhi, end) && 4582 /* Last octet inside other's window space */ 4583 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4584 /* Retrans: not more than one window back */ 4585 (ackskew >= -MAXACKWINDOW) && 4586 /* Acking not more than one reassembled fragment backwards */ 4587 (ackskew <= (MAXACKWINDOW << sws)) && 4588 /* Acking not more than one window forward */ 4589 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4590 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || 4591 (pd->flags & PFDESC_IP_REAS) == 0)) { 4592 /* Require an exact/+1 sequence match on resets when possible */ 4593 4594 if (dst->scrub || src->scrub) { 4595 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 4596 *state, src, dst, copyback)) 4597 return (PF_DROP); 4598 } 4599 4600 /* update max window */ 4601 if (src->max_win < win) 4602 src->max_win = win; 4603 /* synchronize sequencing */ 4604 if (SEQ_GT(end, src->seqlo)) 4605 src->seqlo = end; 4606 /* slide the window of what the other end can send */ 4607 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4608 dst->seqhi = ack + MAX((win << sws), 1); 4609 4610 4611 /* update states */ 4612 if (th->th_flags & TH_SYN) 4613 if (src->state < TCPS_SYN_SENT) 4614 src->state = TCPS_SYN_SENT; 4615 if (th->th_flags & TH_FIN) 4616 if (src->state < TCPS_CLOSING) 4617 src->state = TCPS_CLOSING; 4618 if (th->th_flags & TH_ACK) { 4619 if (dst->state == TCPS_SYN_SENT) { 4620 dst->state = TCPS_ESTABLISHED; 4621 if (src->state == TCPS_ESTABLISHED && 4622 (*state)->src_node != NULL && 4623 pf_src_connlimit(*state)) { 4624 REASON_SET(reason, PFRES_SRCLIMIT); 4625 return (PF_DROP); 4626 } 4627 } else if (dst->state == TCPS_CLOSING) 4628 dst->state = TCPS_FIN_WAIT_2; 4629 } 4630 if (th->th_flags & TH_RST) 4631 src->state = dst->state = TCPS_TIME_WAIT; 4632 4633 /* update expire time */ 4634 (*state)->expire = time_second; 4635 if (src->state >= TCPS_FIN_WAIT_2 && 4636 dst->state >= TCPS_FIN_WAIT_2) 4637 (*state)->timeout = PFTM_TCP_CLOSED; 4638 else if (src->state >= TCPS_CLOSING && 4639 dst->state >= TCPS_CLOSING) 4640 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4641 else if (src->state < TCPS_ESTABLISHED || 4642 dst->state < TCPS_ESTABLISHED) 4643 (*state)->timeout = PFTM_TCP_OPENING; 4644 else if (src->state >= TCPS_CLOSING || 4645 dst->state >= TCPS_CLOSING) 4646 (*state)->timeout = PFTM_TCP_CLOSING; 4647 else if ((th->th_flags & TH_SYN) && 4648 ((*state)->state_flags & PFSTATE_SLOPPY)) 4649 (*state)->timeout = PFTM_TCP_FIRST_PACKET; 4650 else 4651 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4652 4653 /* Fall through to PASS packet */ 4654 4655 } else if ((dst->state < TCPS_SYN_SENT || 4656 dst->state >= TCPS_FIN_WAIT_2 || 4657 src->state >= TCPS_FIN_WAIT_2) && 4658 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && 4659 /* Within a window forward of the originating packet */ 4660 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4661 /* Within a window backward of the originating packet */ 4662 4663 /* 4664 * This currently handles three situations: 4665 * 1) Stupid stacks will shotgun SYNs before their peer 4666 * replies. 4667 * 2) When PF catches an already established stream (the 4668 * firewall rebooted, the state table was flushed, routes 4669 * changed...) 4670 * 3) Packets get funky immediately after the connection 4671 * closes (this should catch Solaris spurious ACK|FINs 4672 * that web servers like to spew after a close) 4673 * 4674 * This must be a little more careful than the above code 4675 * since packet floods will also be caught here. We don't 4676 * update the TTL here to mitigate the damage of a packet 4677 * flood and so the same code can handle awkward establishment 4678 * and a loosened connection close. 4679 * In the establishment case, a correct peer response will 4680 * validate the connection, go through the normal state code 4681 * and keep updating the state TTL. 4682 */ 4683 4684 if (pf_status.debug >= PF_DEBUG_MISC) { 4685 kprintf("pf: loose state match: "); 4686 pf_print_state(*state); 4687 pf_print_flags(th->th_flags); 4688 kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4689 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, 4690 ackskew, (unsigned long long)(*state)->packets[0], 4691 (unsigned long long)(*state)->packets[1], 4692 pd->dir == PF_IN ? "in" : "out", 4693 pd->dir == (*state)->direction ? "fwd" : "rev"); 4694 } 4695 4696 if (dst->scrub || src->scrub) { 4697 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 4698 *state, src, dst, copyback)) 4699 return (PF_DROP); 4700 } 4701 4702 /* update max window */ 4703 if (src->max_win < win) 4704 src->max_win = win; 4705 /* synchronize sequencing */ 4706 if (SEQ_GT(end, src->seqlo)) 4707 src->seqlo = end; 4708 /* slide the window of what the other end can send */ 4709 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4710 dst->seqhi = ack + MAX((win << sws), 1); 4711 4712 /* 4713 * Cannot set dst->seqhi here since this could be a shotgunned 4714 * SYN and not an already established connection. 4715 */ 4716 4717 if (th->th_flags & TH_FIN) 4718 if (src->state < TCPS_CLOSING) 4719 src->state = TCPS_CLOSING; 4720 if (th->th_flags & TH_RST) 4721 src->state = dst->state = TCPS_TIME_WAIT; 4722 4723 /* Fall through to PASS packet */ 4724 4725 } else if ((*state)->pickup_mode == PF_PICKUPS_HASHONLY || 4726 ((*state)->pickup_mode == PF_PICKUPS_ENABLED && 4727 ((*state)->sync_flags & PFSTATE_GOT_SYN_MASK) != 4728 PFSTATE_GOT_SYN_MASK)) { 4729 /* 4730 * If pickup mode is hash only, do not fail on sequence checks. 4731 * 4732 * If pickup mode is enabled and we did not see the SYN in 4733 * both direction, do not fail on sequence checks because 4734 * we do not have complete information on window scale. 4735 * 4736 * Adjust expiration and fall through to PASS packet. 4737 * XXX Add a FIN check to reduce timeout? 4738 */ 4739 (*state)->expire = time_second; 4740 } else { 4741 /* 4742 * Failure processing 4743 */ 4744 if ((*state)->dst.state == TCPS_SYN_SENT && 4745 (*state)->src.state == TCPS_SYN_SENT) { 4746 /* Send RST for state mismatches during handshake */ 4747 if (!(th->th_flags & TH_RST)) 4748 pf_send_tcp((*state)->rule.ptr, pd->af, 4749 pd->dst, pd->src, th->th_dport, 4750 th->th_sport, ntohl(th->th_ack), 0, 4751 TH_RST, 0, 0, 4752 (*state)->rule.ptr->return_ttl, 1, 0, 4753 pd->eh, kif->pfik_ifp); 4754 src->seqlo = 0; 4755 src->seqhi = 1; 4756 src->max_win = 1; 4757 } else if (pf_status.debug >= PF_DEBUG_MISC) { 4758 kprintf("pf: BAD state: "); 4759 pf_print_state(*state); 4760 pf_print_flags(th->th_flags); 4761 kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4762 "pkts=%llu:%llu dir=%s,%s\n", 4763 seq, orig_seq, ack, pd->p_len, ackskew, 4764 (unsigned long long)(*state)->packets[0], 4765 (unsigned long long)(*state)->packets[1], 4766 pd->dir == PF_IN ? "in" : "out", 4767 pd->dir == (*state)->direction ? "fwd" : "rev"); 4768 kprintf("pf: State failure on: %c %c %c %c | %c %c\n", 4769 SEQ_GEQ(src->seqhi, end) ? ' ' : '1', 4770 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4771 ' ': '2', 4772 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4773 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4774 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', 4775 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4776 } 4777 REASON_SET(reason, PFRES_BADSTATE); 4778 return (PF_DROP); 4779 } 4780 4781 return (PF_PASS); 4782 } 4783 4784 /* 4785 * Called with state locked 4786 */ 4787 int 4788 pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, 4789 struct pf_state **state, struct pf_pdesc *pd, u_short *reason) 4790 { 4791 struct tcphdr *th = pd->hdr.tcp; 4792 4793 if (th->th_flags & TH_SYN) 4794 if (src->state < TCPS_SYN_SENT) 4795 src->state = TCPS_SYN_SENT; 4796 if (th->th_flags & TH_FIN) 4797 if (src->state < TCPS_CLOSING) 4798 src->state = TCPS_CLOSING; 4799 if (th->th_flags & TH_ACK) { 4800 if (dst->state == TCPS_SYN_SENT) { 4801 dst->state = TCPS_ESTABLISHED; 4802 if (src->state == TCPS_ESTABLISHED && 4803 (*state)->src_node != NULL && 4804 pf_src_connlimit(*state)) { 4805 REASON_SET(reason, PFRES_SRCLIMIT); 4806 return (PF_DROP); 4807 } 4808 } else if (dst->state == TCPS_CLOSING) { 4809 dst->state = TCPS_FIN_WAIT_2; 4810 } else if (src->state == TCPS_SYN_SENT && 4811 dst->state < TCPS_SYN_SENT) { 4812 /* 4813 * Handle a special sloppy case where we only see one 4814 * half of the connection. If there is a ACK after 4815 * the initial SYN without ever seeing a packet from 4816 * the destination, set the connection to established. 4817 */ 4818 dst->state = src->state = TCPS_ESTABLISHED; 4819 if ((*state)->src_node != NULL && 4820 pf_src_connlimit(*state)) { 4821 REASON_SET(reason, PFRES_SRCLIMIT); 4822 return (PF_DROP); 4823 } 4824 } else if (src->state == TCPS_CLOSING && 4825 dst->state == TCPS_ESTABLISHED && 4826 dst->seqlo == 0) { 4827 /* 4828 * Handle the closing of half connections where we 4829 * don't see the full bidirectional FIN/ACK+ACK 4830 * handshake. 4831 */ 4832 dst->state = TCPS_CLOSING; 4833 } 4834 } 4835 if (th->th_flags & TH_RST) 4836 src->state = dst->state = TCPS_TIME_WAIT; 4837 4838 /* update expire time */ 4839 (*state)->expire = time_second; 4840 if (src->state >= TCPS_FIN_WAIT_2 && 4841 dst->state >= TCPS_FIN_WAIT_2) 4842 (*state)->timeout = PFTM_TCP_CLOSED; 4843 else if (src->state >= TCPS_CLOSING && 4844 dst->state >= TCPS_CLOSING) 4845 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4846 else if (src->state < TCPS_ESTABLISHED || 4847 dst->state < TCPS_ESTABLISHED) 4848 (*state)->timeout = PFTM_TCP_OPENING; 4849 else if (src->state >= TCPS_CLOSING || 4850 dst->state >= TCPS_CLOSING) 4851 (*state)->timeout = PFTM_TCP_CLOSING; 4852 else if ((th->th_flags & TH_SYN) && 4853 ((*state)->state_flags & PFSTATE_SLOPPY)) 4854 (*state)->timeout = PFTM_TCP_FIRST_PACKET; 4855 else 4856 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4857 4858 return (PF_PASS); 4859 } 4860 4861 /* 4862 * Test TCP connection state. Caller must hold the state locked. 4863 */ 4864 int 4865 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, 4866 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, 4867 u_short *reason) 4868 { 4869 struct pf_state_key_cmp key; 4870 struct tcphdr *th = pd->hdr.tcp; 4871 int copyback = 0; 4872 int error; 4873 struct pf_state_peer *src, *dst; 4874 struct pf_state_key *sk; 4875 4876 bzero(&key, sizeof(key)); 4877 key.af = pd->af; 4878 key.proto = IPPROTO_TCP; 4879 if (direction == PF_IN) { /* wire side, straight */ 4880 PF_ACPY(&key.addr[0], pd->src, key.af); 4881 PF_ACPY(&key.addr[1], pd->dst, key.af); 4882 key.port[0] = th->th_sport; 4883 key.port[1] = th->th_dport; 4884 if (pf_status.debug >= PF_DEBUG_MISC) { 4885 kprintf("test-tcp IN (%08x:%d) -> (%08x:%d)\n", 4886 ntohl(key.addr[0].addr32[0]), 4887 ntohs(key.port[0]), 4888 ntohl(key.addr[1].addr32[0]), 4889 ntohs(key.port[1])); 4890 } 4891 } else { /* stack side, reverse */ 4892 PF_ACPY(&key.addr[1], pd->src, key.af); 4893 PF_ACPY(&key.addr[0], pd->dst, key.af); 4894 key.port[1] = th->th_sport; 4895 key.port[0] = th->th_dport; 4896 if (pf_status.debug >= PF_DEBUG_MISC) { 4897 kprintf("test-tcp OUT (%08x:%d) <- (%08x:%d)\n", 4898 ntohl(key.addr[0].addr32[0]), 4899 ntohs(key.port[0]), 4900 ntohl(key.addr[1].addr32[0]), 4901 ntohs(key.port[1])); 4902 } 4903 } 4904 4905 STATE_LOOKUP(kif, &key, direction, *state, m); 4906 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 4907 4908 if (direction == (*state)->direction) { 4909 src = &(*state)->src; 4910 dst = &(*state)->dst; 4911 } else { 4912 src = &(*state)->dst; 4913 dst = &(*state)->src; 4914 } 4915 4916 sk = (*state)->key[pd->didx]; 4917 4918 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4919 if (direction != (*state)->direction) { 4920 REASON_SET(reason, PFRES_SYNPROXY); 4921 FAIL (PF_SYNPROXY_DROP); 4922 } 4923 if (th->th_flags & TH_SYN) { 4924 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4925 REASON_SET(reason, PFRES_SYNPROXY); 4926 FAIL (PF_DROP); 4927 } 4928 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4929 pd->src, th->th_dport, th->th_sport, 4930 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4931 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4932 0, NULL, NULL); 4933 REASON_SET(reason, PFRES_SYNPROXY); 4934 FAIL (PF_SYNPROXY_DROP); 4935 } else if (!(th->th_flags & TH_ACK) || 4936 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4937 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4938 REASON_SET(reason, PFRES_SYNPROXY); 4939 FAIL (PF_DROP); 4940 } else if ((*state)->src_node != NULL && 4941 pf_src_connlimit(*state)) { 4942 REASON_SET(reason, PFRES_SRCLIMIT); 4943 FAIL (PF_DROP); 4944 } else 4945 (*state)->src.state = PF_TCPS_PROXY_DST; 4946 } 4947 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4948 if (direction == (*state)->direction) { 4949 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4950 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4951 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4952 REASON_SET(reason, PFRES_SYNPROXY); 4953 FAIL (PF_DROP); 4954 } 4955 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4956 if ((*state)->dst.seqhi == 1) 4957 (*state)->dst.seqhi = htonl(karc4random()); 4958 pf_send_tcp((*state)->rule.ptr, pd->af, 4959 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4960 sk->port[pd->sidx], sk->port[pd->didx], 4961 (*state)->dst.seqhi, 0, TH_SYN, 0, 4962 (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); 4963 REASON_SET(reason, PFRES_SYNPROXY); 4964 FAIL (PF_SYNPROXY_DROP); 4965 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4966 (TH_SYN|TH_ACK)) || 4967 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4968 REASON_SET(reason, PFRES_SYNPROXY); 4969 FAIL (PF_DROP); 4970 } else { 4971 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4972 (*state)->dst.seqlo = ntohl(th->th_seq); 4973 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4974 pd->src, th->th_dport, th->th_sport, 4975 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4976 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4977 (*state)->tag, NULL, NULL); 4978 pf_send_tcp((*state)->rule.ptr, pd->af, 4979 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4980 sk->port[pd->sidx], sk->port[pd->didx], 4981 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4982 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4983 0, NULL, NULL); 4984 (*state)->src.seqdiff = (*state)->dst.seqhi - 4985 (*state)->src.seqlo; 4986 (*state)->dst.seqdiff = (*state)->src.seqhi - 4987 (*state)->dst.seqlo; 4988 (*state)->src.seqhi = (*state)->src.seqlo + 4989 (*state)->dst.max_win; 4990 (*state)->dst.seqhi = (*state)->dst.seqlo + 4991 (*state)->src.max_win; 4992 (*state)->src.wscale = (*state)->dst.wscale = 0; 4993 (*state)->src.state = (*state)->dst.state = 4994 TCPS_ESTABLISHED; 4995 REASON_SET(reason, PFRES_SYNPROXY); 4996 FAIL (PF_SYNPROXY_DROP); 4997 } 4998 } 4999 5000 /* 5001 * Check for connection (addr+port pair) reuse. We can't actually 5002 * unlink the state if we don't own it. 5003 */ 5004 if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && 5005 dst->state >= TCPS_FIN_WAIT_2 && 5006 src->state >= TCPS_FIN_WAIT_2) { 5007 if (pf_status.debug >= PF_DEBUG_MISC) { 5008 kprintf("pf: state reuse "); 5009 pf_print_state(*state); 5010 pf_print_flags(th->th_flags); 5011 kprintf("\n"); 5012 } 5013 /* XXX make sure it's the same direction ?? */ 5014 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 5015 if ((*state)->cpuid == mycpu->gd_cpuid) { 5016 pf_unlink_state(*state); 5017 *state = NULL; 5018 } else { 5019 (*state)->timeout = PFTM_PURGE; 5020 } 5021 FAIL (PF_DROP); 5022 } 5023 5024 if ((*state)->state_flags & PFSTATE_SLOPPY) { 5025 if (pf_tcp_track_sloppy(src, dst, state, pd, 5026 reason) == PF_DROP) { 5027 FAIL (PF_DROP); 5028 } 5029 } else { 5030 if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, 5031 reason, ©back) == PF_DROP) { 5032 FAIL (PF_DROP); 5033 } 5034 } 5035 5036 /* translate source/destination address, if necessary */ 5037 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5038 struct pf_state_key *nk = (*state)->key[pd->didx]; 5039 5040 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || 5041 nk->port[pd->sidx] != th->th_sport) { 5042 /* 5043 * The translated source address may be completely 5044 * unrelated to the saved link header, make sure 5045 * a bridge doesn't try to use it. 5046 */ 5047 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 5048 pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, 5049 &th->th_sum, &nk->addr[pd->sidx], 5050 nk->port[pd->sidx], 0, pd->af); 5051 } 5052 5053 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 5054 nk->port[pd->didx] != th->th_dport) { 5055 /* 5056 * If we don't redispatch the packet will go into 5057 * the protocol stack on the wrong cpu for the 5058 * post-translated address. 5059 */ 5060 pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, 5061 &th->th_sum, &nk->addr[pd->didx], 5062 nk->port[pd->didx], 0, pd->af); 5063 } 5064 copyback = 1; 5065 } 5066 5067 /* Copyback sequence modulation or stateful scrub changes if needed */ 5068 if (copyback) { 5069 m->m_flags &= ~M_HASH; 5070 m_copyback(m, off, sizeof(*th), (caddr_t)th); 5071 } 5072 5073 pfsync_update_state(*state); 5074 error = PF_PASS; 5075 done: 5076 if (*state) 5077 lockmgr(&(*state)->lk, LK_RELEASE); 5078 return (error); 5079 } 5080 5081 /* 5082 * Test UDP connection state. Caller must hold the state locked. 5083 */ 5084 int 5085 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, 5086 struct mbuf *m, int off, void *h, struct pf_pdesc *pd) 5087 { 5088 struct pf_state_peer *src, *dst; 5089 struct pf_state_key_cmp key; 5090 struct udphdr *uh = pd->hdr.udp; 5091 5092 bzero(&key, sizeof(key)); 5093 key.af = pd->af; 5094 key.proto = IPPROTO_UDP; 5095 if (direction == PF_IN) { /* wire side, straight */ 5096 PF_ACPY(&key.addr[0], pd->src, key.af); 5097 PF_ACPY(&key.addr[1], pd->dst, key.af); 5098 key.port[0] = uh->uh_sport; 5099 key.port[1] = uh->uh_dport; 5100 } else { /* stack side, reverse */ 5101 PF_ACPY(&key.addr[1], pd->src, key.af); 5102 PF_ACPY(&key.addr[0], pd->dst, key.af); 5103 key.port[1] = uh->uh_sport; 5104 key.port[0] = uh->uh_dport; 5105 } 5106 5107 STATE_LOOKUP(kif, &key, direction, *state, m); 5108 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5109 5110 if (direction == (*state)->direction) { 5111 src = &(*state)->src; 5112 dst = &(*state)->dst; 5113 } else { 5114 src = &(*state)->dst; 5115 dst = &(*state)->src; 5116 } 5117 5118 /* update states */ 5119 if (src->state < PFUDPS_SINGLE) 5120 src->state = PFUDPS_SINGLE; 5121 if (dst->state == PFUDPS_SINGLE) 5122 dst->state = PFUDPS_MULTIPLE; 5123 5124 /* update expire time */ 5125 (*state)->expire = time_second; 5126 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) 5127 (*state)->timeout = PFTM_UDP_MULTIPLE; 5128 else 5129 (*state)->timeout = PFTM_UDP_SINGLE; 5130 5131 /* translate source/destination address, if necessary */ 5132 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5133 struct pf_state_key *nk = (*state)->key[pd->didx]; 5134 5135 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || 5136 nk->port[pd->sidx] != uh->uh_sport) { 5137 /* 5138 * The translated source address may be completely 5139 * unrelated to the saved link header, make sure 5140 * a bridge doesn't try to use it. 5141 */ 5142 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 5143 m->m_flags &= ~M_HASH; 5144 pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, 5145 &uh->uh_sum, &nk->addr[pd->sidx], 5146 nk->port[pd->sidx], 1, pd->af); 5147 } 5148 5149 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 5150 nk->port[pd->didx] != uh->uh_dport) { 5151 /* 5152 * If we don't redispatch the packet will go into 5153 * the protocol stack on the wrong cpu for the 5154 * post-translated address. 5155 */ 5156 m->m_flags &= ~M_HASH; 5157 pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, 5158 &uh->uh_sum, &nk->addr[pd->didx], 5159 nk->port[pd->didx], 1, pd->af); 5160 } 5161 m_copyback(m, off, sizeof(*uh), (caddr_t)uh); 5162 } 5163 5164 pfsync_update_state(*state); 5165 lockmgr(&(*state)->lk, LK_RELEASE); 5166 return (PF_PASS); 5167 } 5168 5169 /* 5170 * Test ICMP connection state. Caller must hold the state locked. 5171 */ 5172 int 5173 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, 5174 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, 5175 u_short *reason) 5176 { 5177 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 5178 u_int16_t icmpid = 0, *icmpsum = NULL; 5179 u_int8_t icmptype = 0; 5180 int state_icmp = 0; 5181 int error; 5182 struct pf_state_key_cmp key; 5183 5184 bzero(&key, sizeof(key)); 5185 5186 switch (pd->proto) { 5187 #ifdef INET 5188 case IPPROTO_ICMP: 5189 icmptype = pd->hdr.icmp->icmp_type; 5190 icmpid = pd->hdr.icmp->icmp_id; 5191 icmpsum = &pd->hdr.icmp->icmp_cksum; 5192 5193 if (icmptype == ICMP_UNREACH || 5194 icmptype == ICMP_SOURCEQUENCH || 5195 icmptype == ICMP_REDIRECT || 5196 icmptype == ICMP_TIMXCEED || 5197 icmptype == ICMP_PARAMPROB) 5198 state_icmp++; 5199 break; 5200 #endif /* INET */ 5201 #ifdef INET6 5202 case IPPROTO_ICMPV6: 5203 icmptype = pd->hdr.icmp6->icmp6_type; 5204 icmpid = pd->hdr.icmp6->icmp6_id; 5205 icmpsum = &pd->hdr.icmp6->icmp6_cksum; 5206 5207 if (icmptype == ICMP6_DST_UNREACH || 5208 icmptype == ICMP6_PACKET_TOO_BIG || 5209 icmptype == ICMP6_TIME_EXCEEDED || 5210 icmptype == ICMP6_PARAM_PROB) 5211 state_icmp++; 5212 break; 5213 #endif /* INET6 */ 5214 } 5215 5216 if (!state_icmp) { 5217 5218 /* 5219 * ICMP query/reply message not related to a TCP/UDP packet. 5220 * Search for an ICMP state. 5221 */ 5222 key.af = pd->af; 5223 key.proto = pd->proto; 5224 key.port[0] = key.port[1] = icmpid; 5225 if (direction == PF_IN) { /* wire side, straight */ 5226 PF_ACPY(&key.addr[0], pd->src, key.af); 5227 PF_ACPY(&key.addr[1], pd->dst, key.af); 5228 } else { /* stack side, reverse */ 5229 PF_ACPY(&key.addr[1], pd->src, key.af); 5230 PF_ACPY(&key.addr[0], pd->dst, key.af); 5231 } 5232 5233 STATE_LOOKUP(kif, &key, direction, *state, m); 5234 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5235 5236 (*state)->expire = time_second; 5237 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 5238 5239 /* translate source/destination address, if necessary */ 5240 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5241 struct pf_state_key *nk = (*state)->key[pd->didx]; 5242 5243 switch (pd->af) { 5244 #ifdef INET 5245 case AF_INET: 5246 if (PF_ANEQ(pd->src, 5247 &nk->addr[pd->sidx], AF_INET)) 5248 pf_change_a(&saddr->v4.s_addr, 5249 pd->ip_sum, 5250 nk->addr[pd->sidx].v4.s_addr, 0); 5251 5252 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], 5253 AF_INET)) 5254 pf_change_a(&daddr->v4.s_addr, 5255 pd->ip_sum, 5256 nk->addr[pd->didx].v4.s_addr, 0); 5257 5258 if (nk->port[0] != 5259 pd->hdr.icmp->icmp_id) { 5260 pd->hdr.icmp->icmp_cksum = 5261 pf_cksum_fixup( 5262 pd->hdr.icmp->icmp_cksum, icmpid, 5263 nk->port[pd->sidx], 0); 5264 pd->hdr.icmp->icmp_id = 5265 nk->port[pd->sidx]; 5266 } 5267 5268 m->m_flags &= ~M_HASH; 5269 m_copyback(m, off, ICMP_MINLEN, 5270 (caddr_t)pd->hdr.icmp); 5271 break; 5272 #endif /* INET */ 5273 #ifdef INET6 5274 case AF_INET6: 5275 if (PF_ANEQ(pd->src, 5276 &nk->addr[pd->sidx], AF_INET6)) 5277 pf_change_a6(saddr, 5278 &pd->hdr.icmp6->icmp6_cksum, 5279 &nk->addr[pd->sidx], 0); 5280 5281 if (PF_ANEQ(pd->dst, 5282 &nk->addr[pd->didx], AF_INET6)) 5283 pf_change_a6(daddr, 5284 &pd->hdr.icmp6->icmp6_cksum, 5285 &nk->addr[pd->didx], 0); 5286 5287 m->m_flags &= ~M_HASH; 5288 m_copyback(m, off, 5289 sizeof(struct icmp6_hdr), 5290 (caddr_t)pd->hdr.icmp6); 5291 break; 5292 #endif /* INET6 */ 5293 } 5294 } 5295 } else { 5296 /* 5297 * ICMP error message in response to a TCP/UDP packet. 5298 * Extract the inner TCP/UDP header and search for that state. 5299 */ 5300 5301 struct pf_pdesc pd2; 5302 #ifdef INET 5303 struct ip h2; 5304 #endif /* INET */ 5305 #ifdef INET6 5306 struct ip6_hdr h2_6; 5307 int terminal = 0; 5308 #endif /* INET6 */ 5309 int ipoff2; 5310 int off2; 5311 5312 pd2.not_cpu_localized = 1; 5313 pd2.af = pd->af; 5314 /* Payload packet is from the opposite direction. */ 5315 pd2.sidx = (direction == PF_IN) ? 1 : 0; 5316 pd2.didx = (direction == PF_IN) ? 0 : 1; 5317 switch (pd->af) { 5318 #ifdef INET 5319 case AF_INET: 5320 /* offset of h2 in mbuf chain */ 5321 ipoff2 = off + ICMP_MINLEN; 5322 5323 if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), 5324 NULL, reason, pd2.af)) { 5325 DPFPRINTF(PF_DEBUG_MISC, 5326 ("pf: ICMP error message too short " 5327 "(ip)\n")); 5328 FAIL (PF_DROP); 5329 } 5330 /* 5331 * ICMP error messages don't refer to non-first 5332 * fragments 5333 */ 5334 if (h2.ip_off & htons(IP_OFFMASK)) { 5335 REASON_SET(reason, PFRES_FRAG); 5336 FAIL (PF_DROP); 5337 } 5338 5339 /* offset of protocol header that follows h2 */ 5340 off2 = ipoff2 + (h2.ip_hl << 2); 5341 5342 pd2.proto = h2.ip_p; 5343 pd2.src = (struct pf_addr *)&h2.ip_src; 5344 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5345 pd2.ip_sum = &h2.ip_sum; 5346 break; 5347 #endif /* INET */ 5348 #ifdef INET6 5349 case AF_INET6: 5350 ipoff2 = off + sizeof(struct icmp6_hdr); 5351 5352 if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), 5353 NULL, reason, pd2.af)) { 5354 DPFPRINTF(PF_DEBUG_MISC, 5355 ("pf: ICMP error message too short " 5356 "(ip6)\n")); 5357 FAIL (PF_DROP); 5358 } 5359 pd2.proto = h2_6.ip6_nxt; 5360 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5361 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5362 pd2.ip_sum = NULL; 5363 off2 = ipoff2 + sizeof(h2_6); 5364 do { 5365 switch (pd2.proto) { 5366 case IPPROTO_FRAGMENT: 5367 /* 5368 * ICMPv6 error messages for 5369 * non-first fragments 5370 */ 5371 REASON_SET(reason, PFRES_FRAG); 5372 FAIL (PF_DROP); 5373 case IPPROTO_AH: 5374 case IPPROTO_HOPOPTS: 5375 case IPPROTO_ROUTING: 5376 case IPPROTO_DSTOPTS: { 5377 /* get next header and header length */ 5378 struct ip6_ext opt6; 5379 5380 if (!pf_pull_hdr(m, off2, &opt6, 5381 sizeof(opt6), NULL, reason, 5382 pd2.af)) { 5383 DPFPRINTF(PF_DEBUG_MISC, 5384 ("pf: ICMPv6 short opt\n")); 5385 FAIL (PF_DROP); 5386 } 5387 if (pd2.proto == IPPROTO_AH) 5388 off2 += (opt6.ip6e_len + 2) * 4; 5389 else 5390 off2 += (opt6.ip6e_len + 1) * 8; 5391 pd2.proto = opt6.ip6e_nxt; 5392 /* goto the next header */ 5393 break; 5394 } 5395 default: 5396 terminal++; 5397 break; 5398 } 5399 } while (!terminal); 5400 break; 5401 #endif /* INET6 */ 5402 default: 5403 DPFPRINTF(PF_DEBUG_MISC, 5404 ("pf: ICMP AF %d unknown (ip6)\n", pd->af)); 5405 FAIL (PF_DROP); 5406 break; 5407 } 5408 5409 switch (pd2.proto) { 5410 case IPPROTO_TCP: { 5411 struct tcphdr th; 5412 u_int32_t seq; 5413 struct pf_state_peer *src, *dst; 5414 u_int8_t dws; 5415 int copyback = 0; 5416 5417 /* 5418 * Only the first 8 bytes of the TCP header can be 5419 * expected. Don't access any TCP header fields after 5420 * th_seq, an ackskew test is not possible. 5421 */ 5422 if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, 5423 pd2.af)) { 5424 DPFPRINTF(PF_DEBUG_MISC, 5425 ("pf: ICMP error message too short " 5426 "(tcp)\n")); 5427 FAIL (PF_DROP); 5428 } 5429 5430 key.af = pd2.af; 5431 key.proto = IPPROTO_TCP; 5432 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5433 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5434 key.port[pd2.sidx] = th.th_sport; 5435 key.port[pd2.didx] = th.th_dport; 5436 5437 STATE_LOOKUP(kif, &key, direction, *state, m); 5438 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5439 5440 if (direction == (*state)->direction) { 5441 src = &(*state)->dst; 5442 dst = &(*state)->src; 5443 } else { 5444 src = &(*state)->src; 5445 dst = &(*state)->dst; 5446 } 5447 5448 if (src->wscale && dst->wscale) 5449 dws = dst->wscale & PF_WSCALE_MASK; 5450 else 5451 dws = 0; 5452 5453 /* Demodulate sequence number */ 5454 seq = ntohl(th.th_seq) - src->seqdiff; 5455 if (src->seqdiff) { 5456 pf_change_a(&th.th_seq, icmpsum, 5457 htonl(seq), 0); 5458 copyback = 1; 5459 } 5460 5461 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5462 (!SEQ_GEQ(src->seqhi, seq) || 5463 !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { 5464 if (pf_status.debug >= PF_DEBUG_MISC) { 5465 kprintf("pf: BAD ICMP %d:%d ", 5466 icmptype, pd->hdr.icmp->icmp_code); 5467 pf_print_host(pd->src, 0, pd->af); 5468 kprintf(" -> "); 5469 pf_print_host(pd->dst, 0, pd->af); 5470 kprintf(" state: "); 5471 pf_print_state(*state); 5472 kprintf(" seq=%u\n", seq); 5473 } 5474 REASON_SET(reason, PFRES_BADSTATE); 5475 FAIL (PF_DROP); 5476 } else { 5477 if (pf_status.debug >= PF_DEBUG_MISC) { 5478 kprintf("pf: OK ICMP %d:%d ", 5479 icmptype, pd->hdr.icmp->icmp_code); 5480 pf_print_host(pd->src, 0, pd->af); 5481 kprintf(" -> "); 5482 pf_print_host(pd->dst, 0, pd->af); 5483 kprintf(" state: "); 5484 pf_print_state(*state); 5485 kprintf(" seq=%u\n", seq); 5486 } 5487 } 5488 5489 /* translate source/destination address, if necessary */ 5490 if ((*state)->key[PF_SK_WIRE] != 5491 (*state)->key[PF_SK_STACK]) { 5492 struct pf_state_key *nk = 5493 (*state)->key[pd->didx]; 5494 5495 if (PF_ANEQ(pd2.src, 5496 &nk->addr[pd2.sidx], pd2.af) || 5497 nk->port[pd2.sidx] != th.th_sport) 5498 pf_change_icmp(pd2.src, &th.th_sport, 5499 daddr, &nk->addr[pd2.sidx], 5500 nk->port[pd2.sidx], NULL, 5501 pd2.ip_sum, icmpsum, 5502 pd->ip_sum, 0, pd2.af); 5503 5504 if (PF_ANEQ(pd2.dst, 5505 &nk->addr[pd2.didx], pd2.af) || 5506 nk->port[pd2.didx] != th.th_dport) 5507 pf_change_icmp(pd2.dst, &th.th_dport, 5508 NULL, /* XXX Inbound NAT? */ 5509 &nk->addr[pd2.didx], 5510 nk->port[pd2.didx], NULL, 5511 pd2.ip_sum, icmpsum, 5512 pd->ip_sum, 0, pd2.af); 5513 copyback = 1; 5514 } 5515 5516 if (copyback) { 5517 switch (pd2.af) { 5518 #ifdef INET 5519 case AF_INET: 5520 m_copyback(m, off, ICMP_MINLEN, 5521 (caddr_t)pd->hdr.icmp); 5522 m_copyback(m, ipoff2, sizeof(h2), 5523 (caddr_t)&h2); 5524 break; 5525 #endif /* INET */ 5526 #ifdef INET6 5527 case AF_INET6: 5528 m_copyback(m, off, 5529 sizeof(struct icmp6_hdr), 5530 (caddr_t)pd->hdr.icmp6); 5531 m_copyback(m, ipoff2, sizeof(h2_6), 5532 (caddr_t)&h2_6); 5533 break; 5534 #endif /* INET6 */ 5535 } 5536 m->m_flags &= ~M_HASH; 5537 m_copyback(m, off2, 8, (caddr_t)&th); 5538 } 5539 break; 5540 } 5541 case IPPROTO_UDP: { 5542 struct udphdr uh; 5543 5544 if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), 5545 NULL, reason, pd2.af)) { 5546 DPFPRINTF(PF_DEBUG_MISC, 5547 ("pf: ICMP error message too short " 5548 "(udp)\n")); 5549 return (PF_DROP); 5550 } 5551 5552 key.af = pd2.af; 5553 key.proto = IPPROTO_UDP; 5554 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5555 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5556 key.port[pd2.sidx] = uh.uh_sport; 5557 key.port[pd2.didx] = uh.uh_dport; 5558 5559 STATE_LOOKUP(kif, &key, direction, *state, m); 5560 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5561 5562 /* translate source/destination address, if necessary */ 5563 if ((*state)->key[PF_SK_WIRE] != 5564 (*state)->key[PF_SK_STACK]) { 5565 struct pf_state_key *nk = 5566 (*state)->key[pd->didx]; 5567 5568 if (PF_ANEQ(pd2.src, 5569 &nk->addr[pd2.sidx], pd2.af) || 5570 nk->port[pd2.sidx] != uh.uh_sport) 5571 pf_change_icmp(pd2.src, &uh.uh_sport, 5572 daddr, &nk->addr[pd2.sidx], 5573 nk->port[pd2.sidx], &uh.uh_sum, 5574 pd2.ip_sum, icmpsum, 5575 pd->ip_sum, 1, pd2.af); 5576 5577 if (PF_ANEQ(pd2.dst, 5578 &nk->addr[pd2.didx], pd2.af) || 5579 nk->port[pd2.didx] != uh.uh_dport) 5580 pf_change_icmp(pd2.dst, &uh.uh_dport, 5581 NULL, /* XXX Inbound NAT? */ 5582 &nk->addr[pd2.didx], 5583 nk->port[pd2.didx], &uh.uh_sum, 5584 pd2.ip_sum, icmpsum, 5585 pd->ip_sum, 1, pd2.af); 5586 5587 switch (pd2.af) { 5588 #ifdef INET 5589 case AF_INET: 5590 m_copyback(m, off, ICMP_MINLEN, 5591 (caddr_t)pd->hdr.icmp); 5592 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 5593 break; 5594 #endif /* INET */ 5595 #ifdef INET6 5596 case AF_INET6: 5597 m_copyback(m, off, 5598 sizeof(struct icmp6_hdr), 5599 (caddr_t)pd->hdr.icmp6); 5600 m_copyback(m, ipoff2, sizeof(h2_6), 5601 (caddr_t)&h2_6); 5602 break; 5603 #endif /* INET6 */ 5604 } 5605 m->m_flags &= ~M_HASH; 5606 m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); 5607 } 5608 break; 5609 } 5610 #ifdef INET 5611 case IPPROTO_ICMP: { 5612 struct icmp iih; 5613 5614 if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, 5615 NULL, reason, pd2.af)) { 5616 DPFPRINTF(PF_DEBUG_MISC, 5617 ("pf: ICMP error message too short i" 5618 "(icmp)\n")); 5619 return (PF_DROP); 5620 } 5621 5622 key.af = pd2.af; 5623 key.proto = IPPROTO_ICMP; 5624 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5625 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5626 key.port[0] = key.port[1] = iih.icmp_id; 5627 5628 STATE_LOOKUP(kif, &key, direction, *state, m); 5629 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5630 5631 /* translate source/destination address, if necessary */ 5632 if ((*state)->key[PF_SK_WIRE] != 5633 (*state)->key[PF_SK_STACK]) { 5634 struct pf_state_key *nk = 5635 (*state)->key[pd->didx]; 5636 5637 if (PF_ANEQ(pd2.src, 5638 &nk->addr[pd2.sidx], pd2.af) || 5639 nk->port[pd2.sidx] != iih.icmp_id) 5640 pf_change_icmp(pd2.src, &iih.icmp_id, 5641 daddr, &nk->addr[pd2.sidx], 5642 nk->port[pd2.sidx], NULL, 5643 pd2.ip_sum, icmpsum, 5644 pd->ip_sum, 0, AF_INET); 5645 5646 if (PF_ANEQ(pd2.dst, 5647 &nk->addr[pd2.didx], pd2.af) || 5648 nk->port[pd2.didx] != iih.icmp_id) 5649 pf_change_icmp(pd2.dst, &iih.icmp_id, 5650 NULL, /* XXX Inbound NAT? */ 5651 &nk->addr[pd2.didx], 5652 nk->port[pd2.didx], NULL, 5653 pd2.ip_sum, icmpsum, 5654 pd->ip_sum, 0, AF_INET); 5655 5656 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); 5657 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 5658 m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); 5659 m->m_flags &= ~M_HASH; 5660 } 5661 break; 5662 } 5663 #endif /* INET */ 5664 #ifdef INET6 5665 case IPPROTO_ICMPV6: { 5666 struct icmp6_hdr iih; 5667 5668 if (!pf_pull_hdr(m, off2, &iih, 5669 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5670 DPFPRINTF(PF_DEBUG_MISC, 5671 ("pf: ICMP error message too short " 5672 "(icmp6)\n")); 5673 FAIL (PF_DROP); 5674 } 5675 5676 key.af = pd2.af; 5677 key.proto = IPPROTO_ICMPV6; 5678 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5679 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5680 key.port[0] = key.port[1] = iih.icmp6_id; 5681 5682 STATE_LOOKUP(kif, &key, direction, *state, m); 5683 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5684 5685 /* translate source/destination address, if necessary */ 5686 if ((*state)->key[PF_SK_WIRE] != 5687 (*state)->key[PF_SK_STACK]) { 5688 struct pf_state_key *nk = 5689 (*state)->key[pd->didx]; 5690 5691 if (PF_ANEQ(pd2.src, 5692 &nk->addr[pd2.sidx], pd2.af) || 5693 nk->port[pd2.sidx] != iih.icmp6_id) 5694 pf_change_icmp(pd2.src, &iih.icmp6_id, 5695 daddr, &nk->addr[pd2.sidx], 5696 nk->port[pd2.sidx], NULL, 5697 pd2.ip_sum, icmpsum, 5698 pd->ip_sum, 0, AF_INET6); 5699 5700 if (PF_ANEQ(pd2.dst, 5701 &nk->addr[pd2.didx], pd2.af) || 5702 nk->port[pd2.didx] != iih.icmp6_id) 5703 pf_change_icmp(pd2.dst, &iih.icmp6_id, 5704 NULL, /* XXX Inbound NAT? */ 5705 &nk->addr[pd2.didx], 5706 nk->port[pd2.didx], NULL, 5707 pd2.ip_sum, icmpsum, 5708 pd->ip_sum, 0, AF_INET6); 5709 5710 m_copyback(m, off, sizeof(struct icmp6_hdr), 5711 (caddr_t)pd->hdr.icmp6); 5712 m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); 5713 m_copyback(m, off2, sizeof(struct icmp6_hdr), 5714 (caddr_t)&iih); 5715 m->m_flags &= ~M_HASH; 5716 } 5717 break; 5718 } 5719 #endif /* INET6 */ 5720 default: { 5721 key.af = pd2.af; 5722 key.proto = pd2.proto; 5723 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5724 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5725 key.port[0] = key.port[1] = 0; 5726 5727 STATE_LOOKUP(kif, &key, direction, *state, m); 5728 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5729 5730 /* translate source/destination address, if necessary */ 5731 if ((*state)->key[PF_SK_WIRE] != 5732 (*state)->key[PF_SK_STACK]) { 5733 struct pf_state_key *nk = 5734 (*state)->key[pd->didx]; 5735 5736 if (PF_ANEQ(pd2.src, 5737 &nk->addr[pd2.sidx], pd2.af)) 5738 pf_change_icmp(pd2.src, NULL, daddr, 5739 &nk->addr[pd2.sidx], 0, NULL, 5740 pd2.ip_sum, icmpsum, 5741 pd->ip_sum, 0, pd2.af); 5742 5743 if (PF_ANEQ(pd2.dst, 5744 &nk->addr[pd2.didx], pd2.af)) 5745 pf_change_icmp(pd2.src, NULL, 5746 NULL, /* XXX Inbound NAT? */ 5747 &nk->addr[pd2.didx], 0, NULL, 5748 pd2.ip_sum, icmpsum, 5749 pd->ip_sum, 0, pd2.af); 5750 5751 switch (pd2.af) { 5752 #ifdef INET 5753 case AF_INET: 5754 m_copyback(m, off, ICMP_MINLEN, 5755 (caddr_t)pd->hdr.icmp); 5756 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 5757 m->m_flags &= ~M_HASH; 5758 break; 5759 #endif /* INET */ 5760 #ifdef INET6 5761 case AF_INET6: 5762 m_copyback(m, off, 5763 sizeof(struct icmp6_hdr), 5764 (caddr_t)pd->hdr.icmp6); 5765 m_copyback(m, ipoff2, sizeof(h2_6), 5766 (caddr_t)&h2_6); 5767 m->m_flags &= ~M_HASH; 5768 break; 5769 #endif /* INET6 */ 5770 } 5771 } 5772 break; 5773 } 5774 } 5775 } 5776 5777 pfsync_update_state(*state); 5778 error = PF_PASS; 5779 done: 5780 if (*state) 5781 lockmgr(&(*state)->lk, LK_RELEASE); 5782 return (error); 5783 } 5784 5785 /* 5786 * Test other connection state. Caller must hold the state locked. 5787 */ 5788 int 5789 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, 5790 struct mbuf *m, struct pf_pdesc *pd) 5791 { 5792 struct pf_state_peer *src, *dst; 5793 struct pf_state_key_cmp key; 5794 5795 bzero(&key, sizeof(key)); 5796 key.af = pd->af; 5797 key.proto = pd->proto; 5798 if (direction == PF_IN) { 5799 PF_ACPY(&key.addr[0], pd->src, key.af); 5800 PF_ACPY(&key.addr[1], pd->dst, key.af); 5801 key.port[0] = key.port[1] = 0; 5802 } else { 5803 PF_ACPY(&key.addr[1], pd->src, key.af); 5804 PF_ACPY(&key.addr[0], pd->dst, key.af); 5805 key.port[1] = key.port[0] = 0; 5806 } 5807 5808 STATE_LOOKUP(kif, &key, direction, *state, m); 5809 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5810 5811 if (direction == (*state)->direction) { 5812 src = &(*state)->src; 5813 dst = &(*state)->dst; 5814 } else { 5815 src = &(*state)->dst; 5816 dst = &(*state)->src; 5817 } 5818 5819 /* update states */ 5820 if (src->state < PFOTHERS_SINGLE) 5821 src->state = PFOTHERS_SINGLE; 5822 if (dst->state == PFOTHERS_SINGLE) 5823 dst->state = PFOTHERS_MULTIPLE; 5824 5825 /* update expire time */ 5826 (*state)->expire = time_second; 5827 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) 5828 (*state)->timeout = PFTM_OTHER_MULTIPLE; 5829 else 5830 (*state)->timeout = PFTM_OTHER_SINGLE; 5831 5832 /* translate source/destination address, if necessary */ 5833 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5834 struct pf_state_key *nk = (*state)->key[pd->didx]; 5835 5836 KKASSERT(nk); 5837 KKASSERT(pd); 5838 KKASSERT(pd->src); 5839 KKASSERT(pd->dst); 5840 switch (pd->af) { 5841 #ifdef INET 5842 case AF_INET: 5843 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) 5844 pf_change_a(&pd->src->v4.s_addr, 5845 pd->ip_sum, 5846 nk->addr[pd->sidx].v4.s_addr, 5847 0); 5848 5849 5850 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) 5851 pf_change_a(&pd->dst->v4.s_addr, 5852 pd->ip_sum, 5853 nk->addr[pd->didx].v4.s_addr, 5854 0); 5855 5856 break; 5857 #endif /* INET */ 5858 #ifdef INET6 5859 case AF_INET6: 5860 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) 5861 PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); 5862 5863 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) 5864 PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); 5865 #endif /* INET6 */ 5866 } 5867 } 5868 5869 pfsync_update_state(*state); 5870 lockmgr(&(*state)->lk, LK_RELEASE); 5871 return (PF_PASS); 5872 } 5873 5874 /* 5875 * ipoff and off are measured from the start of the mbuf chain. 5876 * h must be at "ipoff" on the mbuf chain. 5877 */ 5878 void * 5879 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5880 u_short *actionp, u_short *reasonp, sa_family_t af) 5881 { 5882 switch (af) { 5883 #ifdef INET 5884 case AF_INET: { 5885 struct ip *h = mtod(m, struct ip *); 5886 u_int16_t fragoff = (h->ip_off & IP_OFFMASK) << 3; 5887 5888 if (fragoff) { 5889 if (fragoff >= len) 5890 ACTION_SET(actionp, PF_PASS); 5891 else { 5892 ACTION_SET(actionp, PF_DROP); 5893 REASON_SET(reasonp, PFRES_FRAG); 5894 } 5895 return (NULL); 5896 } 5897 if (m->m_pkthdr.len < off + len || 5898 h->ip_len < off + len) { 5899 ACTION_SET(actionp, PF_DROP); 5900 REASON_SET(reasonp, PFRES_SHORT); 5901 return (NULL); 5902 } 5903 break; 5904 } 5905 #endif /* INET */ 5906 #ifdef INET6 5907 case AF_INET6: { 5908 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5909 5910 if (m->m_pkthdr.len < off + len || 5911 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < 5912 (unsigned)(off + len)) { 5913 ACTION_SET(actionp, PF_DROP); 5914 REASON_SET(reasonp, PFRES_SHORT); 5915 return (NULL); 5916 } 5917 break; 5918 } 5919 #endif /* INET6 */ 5920 } 5921 m_copydata(m, off, len, p); 5922 return (p); 5923 } 5924 5925 int 5926 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) 5927 { 5928 struct sockaddr_in *dst; 5929 int ret = 1; 5930 int check_mpath; 5931 #ifdef INET6 5932 struct sockaddr_in6 *dst6; 5933 struct route_in6 ro; 5934 #else 5935 struct route ro; 5936 #endif 5937 struct radix_node *rn; 5938 struct rtentry *rt; 5939 struct ifnet *ifp; 5940 5941 check_mpath = 0; 5942 bzero(&ro, sizeof(ro)); 5943 switch (af) { 5944 case AF_INET: 5945 dst = satosin(&ro.ro_dst); 5946 dst->sin_family = AF_INET; 5947 dst->sin_len = sizeof(*dst); 5948 dst->sin_addr = addr->v4; 5949 break; 5950 #ifdef INET6 5951 case AF_INET6: 5952 /* 5953 * Skip check for addresses with embedded interface scope, 5954 * as they would always match anyway. 5955 */ 5956 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5957 goto out; 5958 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 5959 dst6->sin6_family = AF_INET6; 5960 dst6->sin6_len = sizeof(*dst6); 5961 dst6->sin6_addr = addr->v6; 5962 break; 5963 #endif /* INET6 */ 5964 default: 5965 return (0); 5966 } 5967 5968 /* Skip checks for ipsec interfaces */ 5969 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5970 goto out; 5971 5972 rtalloc_ign((struct route *)&ro, 0); 5973 5974 if (ro.ro_rt != NULL) { 5975 /* No interface given, this is a no-route check */ 5976 if (kif == NULL) 5977 goto out; 5978 5979 if (kif->pfik_ifp == NULL) { 5980 ret = 0; 5981 goto out; 5982 } 5983 5984 /* Perform uRPF check if passed input interface */ 5985 ret = 0; 5986 rn = (struct radix_node *)ro.ro_rt; 5987 do { 5988 rt = (struct rtentry *)rn; 5989 ifp = rt->rt_ifp; 5990 5991 if (kif->pfik_ifp == ifp) 5992 ret = 1; 5993 rn = NULL; 5994 } while (check_mpath == 1 && rn != NULL && ret == 0); 5995 } else 5996 ret = 0; 5997 out: 5998 if (ro.ro_rt != NULL) 5999 RTFREE(ro.ro_rt); 6000 return (ret); 6001 } 6002 6003 int 6004 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) 6005 { 6006 struct sockaddr_in *dst; 6007 #ifdef INET6 6008 struct sockaddr_in6 *dst6; 6009 struct route_in6 ro; 6010 #else 6011 struct route ro; 6012 #endif 6013 int ret = 0; 6014 6015 ASSERT_LWKT_TOKEN_HELD(&pf_token); 6016 6017 bzero(&ro, sizeof(ro)); 6018 switch (af) { 6019 case AF_INET: 6020 dst = satosin(&ro.ro_dst); 6021 dst->sin_family = AF_INET; 6022 dst->sin_len = sizeof(*dst); 6023 dst->sin_addr = addr->v4; 6024 break; 6025 #ifdef INET6 6026 case AF_INET6: 6027 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 6028 dst6->sin6_family = AF_INET6; 6029 dst6->sin6_len = sizeof(*dst6); 6030 dst6->sin6_addr = addr->v6; 6031 break; 6032 #endif /* INET6 */ 6033 default: 6034 return (0); 6035 } 6036 6037 rtalloc_ign((struct route *)&ro, (RTF_CLONING | RTF_PRCLONING)); 6038 6039 if (ro.ro_rt != NULL) { 6040 RTFREE(ro.ro_rt); 6041 } 6042 6043 return (ret); 6044 } 6045 6046 #ifdef INET 6047 void 6048 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 6049 struct pf_state *s, struct pf_pdesc *pd) 6050 { 6051 struct mbuf *m0, *m1; 6052 struct route iproute; 6053 struct route *ro = NULL; 6054 struct sockaddr_in *dst; 6055 struct ip *ip; 6056 struct ifnet *ifp = NULL; 6057 struct pf_addr naddr; 6058 struct pf_src_node *sn = NULL; 6059 int error = 0; 6060 int sw_csum; 6061 6062 ASSERT_LWKT_TOKEN_HELD(&pf_token); 6063 6064 if (m == NULL || *m == NULL || r == NULL || 6065 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 6066 panic("pf_route: invalid parameters"); 6067 6068 if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) { 6069 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED; 6070 (*m)->m_pkthdr.pf.routed = 1; 6071 } else { 6072 if ((*m)->m_pkthdr.pf.routed++ > 3) { 6073 m0 = *m; 6074 *m = NULL; 6075 goto bad; 6076 } 6077 } 6078 6079 if (r->rt == PF_DUPTO) { 6080 if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { 6081 return; 6082 } 6083 } else { 6084 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { 6085 return; 6086 } 6087 m0 = *m; 6088 } 6089 6090 if (m0->m_len < sizeof(struct ip)) { 6091 DPFPRINTF(PF_DEBUG_URGENT, 6092 ("pf_route: m0->m_len < sizeof(struct ip)\n")); 6093 goto bad; 6094 } 6095 6096 ip = mtod(m0, struct ip *); 6097 6098 ro = &iproute; 6099 bzero((caddr_t)ro, sizeof(*ro)); 6100 dst = satosin(&ro->ro_dst); 6101 dst->sin_family = AF_INET; 6102 dst->sin_len = sizeof(*dst); 6103 dst->sin_addr = ip->ip_dst; 6104 6105 if (r->rt == PF_FASTROUTE) { 6106 rtalloc(ro); 6107 if (ro->ro_rt == 0) { 6108 ipstat.ips_noroute++; 6109 goto bad; 6110 } 6111 6112 ifp = ro->ro_rt->rt_ifp; 6113 ro->ro_rt->rt_use++; 6114 6115 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 6116 dst = satosin(ro->ro_rt->rt_gateway); 6117 } else { 6118 if (TAILQ_EMPTY(&r->rpool.list)) { 6119 DPFPRINTF(PF_DEBUG_URGENT, 6120 ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); 6121 goto bad; 6122 } 6123 if (s == NULL) { 6124 pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, 6125 &naddr, NULL, &sn); 6126 if (!PF_AZERO(&naddr, AF_INET)) 6127 dst->sin_addr.s_addr = naddr.v4.s_addr; 6128 ifp = r->rpool.cur->kif ? 6129 r->rpool.cur->kif->pfik_ifp : NULL; 6130 } else { 6131 if (!PF_AZERO(&s->rt_addr, AF_INET)) 6132 dst->sin_addr.s_addr = 6133 s->rt_addr.v4.s_addr; 6134 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6135 } 6136 } 6137 if (ifp == NULL) 6138 goto bad; 6139 6140 if (oifp != ifp) { 6141 if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { 6142 goto bad; 6143 } else if (m0 == NULL) { 6144 goto done; 6145 } 6146 if (m0->m_len < sizeof(struct ip)) { 6147 DPFPRINTF(PF_DEBUG_URGENT, 6148 ("pf_route: m0->m_len < sizeof(struct ip)\n")); 6149 goto bad; 6150 } 6151 ip = mtod(m0, struct ip *); 6152 } 6153 6154 /* Copied from FreeBSD 5.1-CURRENT ip_output. */ 6155 m0->m_pkthdr.csum_flags |= CSUM_IP; 6156 sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist; 6157 if (sw_csum & CSUM_DELAY_DATA) { 6158 in_delayed_cksum(m0); 6159 sw_csum &= ~CSUM_DELAY_DATA; 6160 } 6161 m0->m_pkthdr.csum_flags &= ifp->if_hwassist; 6162 m0->m_pkthdr.csum_iphlen = (ip->ip_hl << 2); 6163 6164 /* 6165 * WARNING! We cannot fragment if the packet was modified from an 6166 * original which expected to be using TSO. In this 6167 * situation we pray that the target interface is 6168 * compatible with the originating interface. 6169 */ 6170 if (ip->ip_len <= ifp->if_mtu || 6171 (m0->m_pkthdr.csum_flags & CSUM_TSO) || 6172 ((ifp->if_hwassist & CSUM_FRAGMENT) && 6173 (ip->ip_off & IP_DF) == 0)) { 6174 ip->ip_len = htons(ip->ip_len); 6175 ip->ip_off = htons(ip->ip_off); 6176 ip->ip_sum = 0; 6177 if (sw_csum & CSUM_DELAY_IP) { 6178 /* From KAME */ 6179 if (ip->ip_v == IPVERSION && 6180 (ip->ip_hl << 2) == sizeof(*ip)) { 6181 ip->ip_sum = in_cksum_hdr(ip); 6182 } else { 6183 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 6184 } 6185 } 6186 lwkt_reltoken(&pf_token); 6187 error = ifp->if_output(ifp, m0, sintosa(dst), ro->ro_rt); 6188 lwkt_gettoken(&pf_token); 6189 goto done; 6190 } 6191 6192 /* 6193 * Too large for interface; fragment if possible. 6194 * Must be able to put at least 8 bytes per fragment. 6195 */ 6196 if (ip->ip_off & IP_DF) { 6197 ipstat.ips_cantfrag++; 6198 if (r->rt != PF_DUPTO) { 6199 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, 6200 ifp->if_mtu); 6201 goto done; 6202 } else 6203 goto bad; 6204 } 6205 6206 m1 = m0; 6207 error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum); 6208 if (error) { 6209 goto bad; 6210 } 6211 6212 for (m0 = m1; m0; m0 = m1) { 6213 m1 = m0->m_nextpkt; 6214 m0->m_nextpkt = 0; 6215 if (error == 0) { 6216 lwkt_reltoken(&pf_token); 6217 error = (*ifp->if_output)(ifp, m0, sintosa(dst), 6218 NULL); 6219 lwkt_gettoken(&pf_token); 6220 } else 6221 m_freem(m0); 6222 } 6223 6224 if (error == 0) 6225 ipstat.ips_fragmented++; 6226 6227 done: 6228 if (r->rt != PF_DUPTO) 6229 *m = NULL; 6230 if (ro == &iproute && ro->ro_rt) 6231 RTFREE(ro->ro_rt); 6232 return; 6233 6234 bad: 6235 m_freem(m0); 6236 goto done; 6237 } 6238 #endif /* INET */ 6239 6240 #ifdef INET6 6241 void 6242 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 6243 struct pf_state *s, struct pf_pdesc *pd) 6244 { 6245 struct mbuf *m0; 6246 struct route_in6 ip6route; 6247 struct route_in6 *ro; 6248 struct sockaddr_in6 *dst; 6249 struct ip6_hdr *ip6; 6250 struct ifnet *ifp = NULL; 6251 struct pf_addr naddr; 6252 struct pf_src_node *sn = NULL; 6253 6254 if (m == NULL || *m == NULL || r == NULL || 6255 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 6256 panic("pf_route6: invalid parameters"); 6257 6258 if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) { 6259 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED; 6260 (*m)->m_pkthdr.pf.routed = 1; 6261 } else { 6262 if ((*m)->m_pkthdr.pf.routed++ > 3) { 6263 m0 = *m; 6264 *m = NULL; 6265 goto bad; 6266 } 6267 } 6268 6269 if (r->rt == PF_DUPTO) { 6270 if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) 6271 return; 6272 } else { 6273 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 6274 return; 6275 m0 = *m; 6276 } 6277 6278 if (m0->m_len < sizeof(struct ip6_hdr)) { 6279 DPFPRINTF(PF_DEBUG_URGENT, 6280 ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); 6281 goto bad; 6282 } 6283 ip6 = mtod(m0, struct ip6_hdr *); 6284 6285 ro = &ip6route; 6286 bzero((caddr_t)ro, sizeof(*ro)); 6287 dst = (struct sockaddr_in6 *)&ro->ro_dst; 6288 dst->sin6_family = AF_INET6; 6289 dst->sin6_len = sizeof(*dst); 6290 dst->sin6_addr = ip6->ip6_dst; 6291 6292 /* 6293 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 6294 * so make sure pf.flags is clear. 6295 * 6296 * Cheat. XXX why only in the v6 case??? 6297 */ 6298 if (r->rt == PF_FASTROUTE) { 6299 m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED; 6300 m0->m_pkthdr.pf.flags = 0; 6301 /* XXX Re-Check when Upgrading to > 4.4 */ 6302 m0->m_pkthdr.pf.statekey = NULL; 6303 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); 6304 return; 6305 } 6306 6307 if (TAILQ_EMPTY(&r->rpool.list)) { 6308 DPFPRINTF(PF_DEBUG_URGENT, 6309 ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); 6310 goto bad; 6311 } 6312 if (s == NULL) { 6313 pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 6314 &naddr, NULL, &sn); 6315 if (!PF_AZERO(&naddr, AF_INET6)) 6316 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 6317 &naddr, AF_INET6); 6318 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; 6319 } else { 6320 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 6321 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 6322 &s->rt_addr, AF_INET6); 6323 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6324 } 6325 if (ifp == NULL) 6326 goto bad; 6327 6328 if (oifp != ifp) { 6329 if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { 6330 goto bad; 6331 } else if (m0 == NULL) { 6332 goto done; 6333 } 6334 if (m0->m_len < sizeof(struct ip6_hdr)) { 6335 DPFPRINTF(PF_DEBUG_URGENT, 6336 ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); 6337 goto bad; 6338 } 6339 ip6 = mtod(m0, struct ip6_hdr *); 6340 } 6341 6342 /* 6343 * If the packet is too large for the outgoing interface, 6344 * send back an icmp6 error. 6345 */ 6346 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 6347 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 6348 if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6349 nd6_output(ifp, ifp, m0, dst, NULL); 6350 } else { 6351 in6_ifstat_inc(ifp, ifs6_in_toobig); 6352 if (r->rt != PF_DUPTO) 6353 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); 6354 else 6355 goto bad; 6356 } 6357 6358 done: 6359 if (r->rt != PF_DUPTO) 6360 *m = NULL; 6361 return; 6362 6363 bad: 6364 m_freem(m0); 6365 goto done; 6366 } 6367 #endif /* INET6 */ 6368 6369 6370 /* 6371 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag 6372 * off is the offset where the protocol header starts 6373 * len is the total length of protocol header plus payload 6374 * returns 0 when the checksum is valid, otherwise returns 1. 6375 */ 6376 /* 6377 * XXX 6378 * FreeBSD supports cksum offload for the following drivers. 6379 * em(4), gx(4), lge(4), nge(4), ti(4), xl(4) 6380 * If we can make full use of it we would outperform ipfw/ipfilter in 6381 * very heavy traffic. 6382 * I have not tested 'cause I don't have NICs that supports cksum offload. 6383 * (There might be problems. Typical phenomena would be 6384 * 1. No route message for UDP packet. 6385 * 2. No connection acceptance from external hosts regardless of rule set.) 6386 */ 6387 int 6388 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, 6389 sa_family_t af) 6390 { 6391 u_int16_t sum = 0; 6392 int hw_assist = 0; 6393 struct ip *ip; 6394 6395 if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) 6396 return (1); 6397 if (m->m_pkthdr.len < off + len) 6398 return (1); 6399 6400 switch (p) { 6401 case IPPROTO_TCP: 6402 case IPPROTO_UDP: 6403 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 6404 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { 6405 sum = m->m_pkthdr.csum_data; 6406 } else { 6407 ip = mtod(m, struct ip *); 6408 sum = in_pseudo(ip->ip_src.s_addr, 6409 ip->ip_dst.s_addr, htonl((u_short)len + 6410 m->m_pkthdr.csum_data + p)); 6411 } 6412 sum ^= 0xffff; 6413 ++hw_assist; 6414 } 6415 break; 6416 case IPPROTO_ICMP: 6417 #ifdef INET6 6418 case IPPROTO_ICMPV6: 6419 #endif /* INET6 */ 6420 break; 6421 default: 6422 return (1); 6423 } 6424 6425 if (!hw_assist) { 6426 switch (af) { 6427 case AF_INET: 6428 if (p == IPPROTO_ICMP) { 6429 if (m->m_len < off) 6430 return (1); 6431 m->m_data += off; 6432 m->m_len -= off; 6433 sum = in_cksum(m, len); 6434 m->m_data -= off; 6435 m->m_len += off; 6436 } else { 6437 if (m->m_len < sizeof(struct ip)) 6438 return (1); 6439 sum = in_cksum_range(m, p, off, len); 6440 if (sum == 0) { 6441 m->m_pkthdr.csum_flags |= 6442 (CSUM_DATA_VALID | 6443 CSUM_PSEUDO_HDR); 6444 m->m_pkthdr.csum_data = 0xffff; 6445 } 6446 } 6447 break; 6448 #ifdef INET6 6449 case AF_INET6: 6450 if (m->m_len < sizeof(struct ip6_hdr)) 6451 return (1); 6452 sum = in6_cksum(m, p, off, len); 6453 /* 6454 * XXX 6455 * IPv6 H/W cksum off-load not supported yet! 6456 * 6457 * if (sum == 0) { 6458 * m->m_pkthdr.csum_flags |= 6459 * (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 6460 * m->m_pkthdr.csum_data = 0xffff; 6461 *} 6462 */ 6463 break; 6464 #endif /* INET6 */ 6465 default: 6466 return (1); 6467 } 6468 } 6469 if (sum) { 6470 switch (p) { 6471 case IPPROTO_TCP: 6472 tcpstat.tcps_rcvbadsum++; 6473 break; 6474 case IPPROTO_UDP: 6475 udp_stat.udps_badsum++; 6476 break; 6477 case IPPROTO_ICMP: 6478 icmpstat.icps_checksum++; 6479 break; 6480 #ifdef INET6 6481 case IPPROTO_ICMPV6: 6482 icmp6stat.icp6s_checksum++; 6483 break; 6484 #endif /* INET6 */ 6485 } 6486 return (1); 6487 } 6488 return (0); 6489 } 6490 6491 struct pf_divert * 6492 pf_find_divert(struct mbuf *m) 6493 { 6494 struct m_tag *mtag; 6495 6496 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6497 return (NULL); 6498 6499 return ((struct pf_divert *)(mtag + 1)); 6500 } 6501 6502 struct pf_divert * 6503 pf_get_divert(struct mbuf *m) 6504 { 6505 struct m_tag *mtag; 6506 6507 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6508 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6509 M_NOWAIT); 6510 if (mtag == NULL) 6511 return (NULL); 6512 bzero(mtag + 1, sizeof(struct pf_divert)); 6513 m_tag_prepend(m, mtag); 6514 } 6515 6516 return ((struct pf_divert *)(mtag + 1)); 6517 } 6518 6519 #ifdef INET 6520 6521 /* 6522 * WARNING: pf_token held shared on entry, THIS IS CPU LOCALIZED CODE 6523 */ 6524 int 6525 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, 6526 struct ether_header *eh, struct inpcb *inp) 6527 { 6528 struct pfi_kif *kif; 6529 u_short action, reason = 0, log = 0; 6530 struct mbuf *m = *m0; 6531 struct ip *h = NULL; 6532 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; 6533 struct pf_state *s = NULL; 6534 struct pf_ruleset *ruleset = NULL; 6535 struct pf_pdesc pd; 6536 int off, dirndx; 6537 #ifdef ALTQ 6538 int pqid = 0; 6539 #endif 6540 6541 if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) { 6542 /* Skip us; continue in ipfw. */ 6543 return (PF_PASS); 6544 } 6545 6546 if (!pf_status.running) 6547 return (PF_PASS); 6548 6549 memset(&pd, 0, sizeof(pd)); 6550 #ifdef foo 6551 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6552 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6553 else 6554 #endif 6555 kif = (struct pfi_kif *)ifp->if_pf_kif; 6556 6557 if (kif == NULL) { 6558 DPFPRINTF(PF_DEBUG_URGENT, 6559 ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); 6560 return (PF_DROP); 6561 } 6562 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6563 return (PF_PASS); 6564 6565 #ifdef DIAGNOSTIC 6566 if ((m->m_flags & M_PKTHDR) == 0) 6567 panic("non-M_PKTHDR is passed to pf_test"); 6568 #endif /* DIAGNOSTIC */ 6569 6570 if (m->m_pkthdr.len < (int)sizeof(*h)) { 6571 action = PF_DROP; 6572 REASON_SET(&reason, PFRES_SHORT); 6573 log = 1; 6574 goto done; 6575 } 6576 6577 /* 6578 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 6579 * so make sure pf.flags is clear. 6580 */ 6581 if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED) 6582 return (PF_PASS); 6583 m->m_pkthdr.pf.flags = 0; 6584 /* Re-Check when updating to > 4.4 */ 6585 m->m_pkthdr.pf.statekey = NULL; 6586 6587 /* We do IP header normalization and packet reassembly here */ 6588 if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { 6589 action = PF_DROP; 6590 goto done; 6591 } 6592 m = *m0; /* pf_normalize messes with m0 */ 6593 h = mtod(m, struct ip *); 6594 6595 off = h->ip_hl << 2; 6596 if (off < (int)sizeof(*h)) { 6597 action = PF_DROP; 6598 REASON_SET(&reason, PFRES_SHORT); 6599 log = 1; 6600 goto done; 6601 } 6602 6603 pd.src = (struct pf_addr *)&h->ip_src; 6604 pd.dst = (struct pf_addr *)&h->ip_dst; 6605 pd.sport = pd.dport = NULL; 6606 pd.ip_sum = &h->ip_sum; 6607 pd.proto_sum = NULL; 6608 pd.proto = h->ip_p; 6609 pd.dir = dir; 6610 pd.sidx = (dir == PF_IN) ? 0 : 1; 6611 pd.didx = (dir == PF_IN) ? 1 : 0; 6612 pd.af = AF_INET; 6613 pd.tos = h->ip_tos; 6614 pd.tot_len = h->ip_len; 6615 pd.eh = eh; 6616 6617 /* handle fragments that didn't get reassembled by normalization */ 6618 if (h->ip_off & (IP_MF | IP_OFFMASK)) { 6619 action = pf_test_fragment(&r, dir, kif, m, h, 6620 &pd, &a, &ruleset); 6621 goto done; 6622 } 6623 6624 switch (h->ip_p) { 6625 6626 case IPPROTO_TCP: { 6627 struct tcphdr th; 6628 6629 pd.hdr.tcp = &th; 6630 if (!pf_pull_hdr(m, off, &th, sizeof(th), 6631 &action, &reason, AF_INET)) { 6632 log = action != PF_PASS; 6633 goto done; 6634 } 6635 pd.p_len = pd.tot_len - off - (th.th_off << 2); 6636 #ifdef ALTQ 6637 if ((th.th_flags & TH_ACK) && pd.p_len == 0) 6638 pqid = 1; 6639 #endif 6640 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); 6641 if (action == PF_DROP) 6642 goto done; 6643 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, 6644 &reason); 6645 if (action == PF_PASS) { 6646 r = s->rule.ptr; 6647 a = s->anchor.ptr; 6648 log = s->log; 6649 } else if (s == NULL) { 6650 action = pf_test_rule(&r, &s, dir, kif, 6651 m, off, h, &pd, &a, 6652 &ruleset, NULL, inp); 6653 } 6654 break; 6655 } 6656 6657 case IPPROTO_UDP: { 6658 struct udphdr uh; 6659 6660 pd.hdr.udp = &uh; 6661 if (!pf_pull_hdr(m, off, &uh, sizeof(uh), 6662 &action, &reason, AF_INET)) { 6663 log = action != PF_PASS; 6664 goto done; 6665 } 6666 if (uh.uh_dport == 0 || 6667 ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || 6668 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { 6669 action = PF_DROP; 6670 REASON_SET(&reason, PFRES_SHORT); 6671 goto done; 6672 } 6673 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); 6674 if (action == PF_PASS) { 6675 r = s->rule.ptr; 6676 a = s->anchor.ptr; 6677 log = s->log; 6678 } else if (s == NULL) { 6679 action = pf_test_rule(&r, &s, dir, kif, 6680 m, off, h, &pd, &a, 6681 &ruleset, NULL, inp); 6682 } 6683 break; 6684 } 6685 6686 case IPPROTO_ICMP: { 6687 struct icmp ih; 6688 6689 pd.hdr.icmp = &ih; 6690 if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, 6691 &action, &reason, AF_INET)) { 6692 log = action != PF_PASS; 6693 goto done; 6694 } 6695 action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, 6696 &reason); 6697 if (action == PF_PASS) { 6698 r = s->rule.ptr; 6699 a = s->anchor.ptr; 6700 log = s->log; 6701 } else if (s == NULL) { 6702 action = pf_test_rule(&r, &s, dir, kif, 6703 m, off, h, &pd, &a, 6704 &ruleset, NULL, inp); 6705 } 6706 break; 6707 } 6708 6709 default: 6710 action = pf_test_state_other(&s, dir, kif, m, &pd); 6711 if (action == PF_PASS) { 6712 r = s->rule.ptr; 6713 a = s->anchor.ptr; 6714 log = s->log; 6715 } else if (s == NULL) { 6716 action = pf_test_rule(&r, &s, dir, kif, m, off, h, 6717 &pd, &a, &ruleset, NULL, inp); 6718 } 6719 break; 6720 } 6721 6722 done: 6723 if (action == PF_PASS && h->ip_hl > 5 && 6724 !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { 6725 action = PF_DROP; 6726 REASON_SET(&reason, PFRES_IPOPTIONS); 6727 log = 1; 6728 DPFPRINTF(PF_DEBUG_MISC, 6729 ("pf: dropping packet with ip options\n")); 6730 } 6731 6732 if ((s && s->tag) || r->rtableid) 6733 pf_tag_packet(m, s ? s->tag : 0, r->rtableid); 6734 6735 #if 0 6736 if (dir == PF_IN && s && s->key[PF_SK_STACK]) 6737 m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; 6738 #endif 6739 6740 #ifdef ALTQ 6741 /* 6742 * Generate a hash code and qid request for ALTQ. A qid of 0 6743 * is allowed and will cause altq to select the default queue. 6744 */ 6745 if (action == PF_PASS) { 6746 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 6747 if (pqid || (pd.tos & IPTOS_LOWDELAY)) 6748 m->m_pkthdr.pf.qid = r->pqid; 6749 else 6750 m->m_pkthdr.pf.qid = r->qid; 6751 m->m_pkthdr.pf.ecn_af = AF_INET; 6752 m->m_pkthdr.pf.hdr = h; 6753 /* add connection hash for fairq */ 6754 if (s) { 6755 /* for fairq */ 6756 m->m_pkthdr.pf.state_hash = s->hash; 6757 m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED; 6758 } 6759 } 6760 #endif /* ALTQ */ 6761 6762 /* 6763 * connections redirected to loopback should not match sockets 6764 * bound specifically to loopback due to security implications, 6765 * see tcp_input() and in_pcblookup_listen(). 6766 */ 6767 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 6768 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && 6769 (s->nat_rule.ptr->action == PF_RDR || 6770 s->nat_rule.ptr->action == PF_BINAT) && 6771 (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) 6772 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 6773 6774 if (dir == PF_IN && action == PF_PASS && r->divert.port) { 6775 struct pf_divert *divert; 6776 6777 if ((divert = pf_get_divert(m))) { 6778 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 6779 divert->port = r->divert.port; 6780 divert->addr.ipv4 = r->divert.addr.v4; 6781 } 6782 } 6783 6784 if (log) { 6785 struct pf_rule *lr; 6786 6787 if (s != NULL && s->nat_rule.ptr != NULL && 6788 s->nat_rule.ptr->log & PF_LOG_ALL) 6789 lr = s->nat_rule.ptr; 6790 else 6791 lr = r; 6792 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, 6793 &pd); 6794 } 6795 6796 kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; 6797 kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; 6798 6799 if (action == PF_PASS || r->action == PF_DROP) { 6800 dirndx = (dir == PF_OUT); 6801 r->packets[dirndx]++; 6802 r->bytes[dirndx] += pd.tot_len; 6803 if (a != NULL) { 6804 a->packets[dirndx]++; 6805 a->bytes[dirndx] += pd.tot_len; 6806 } 6807 if (s != NULL) { 6808 if (s->nat_rule.ptr != NULL) { 6809 s->nat_rule.ptr->packets[dirndx]++; 6810 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; 6811 } 6812 if (s->src_node != NULL) { 6813 s->src_node->packets[dirndx]++; 6814 s->src_node->bytes[dirndx] += pd.tot_len; 6815 } 6816 if (s->nat_src_node != NULL) { 6817 s->nat_src_node->packets[dirndx]++; 6818 s->nat_src_node->bytes[dirndx] += pd.tot_len; 6819 } 6820 dirndx = (dir == s->direction) ? 0 : 1; 6821 s->packets[dirndx]++; 6822 s->bytes[dirndx] += pd.tot_len; 6823 } 6824 tr = r; 6825 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; 6826 if (nr != NULL && r == &pf_default_rule) 6827 tr = nr; 6828 if (tr->src.addr.type == PF_ADDR_TABLE) 6829 pfr_update_stats(tr->src.addr.p.tbl, 6830 (s == NULL) ? pd.src : 6831 &s->key[(s->direction == PF_IN)]-> 6832 addr[(s->direction == PF_OUT)], 6833 pd.af, pd.tot_len, dir == PF_OUT, 6834 r->action == PF_PASS, tr->src.neg); 6835 if (tr->dst.addr.type == PF_ADDR_TABLE) 6836 pfr_update_stats(tr->dst.addr.p.tbl, 6837 (s == NULL) ? pd.dst : 6838 &s->key[(s->direction == PF_IN)]-> 6839 addr[(s->direction == PF_IN)], 6840 pd.af, pd.tot_len, dir == PF_OUT, 6841 r->action == PF_PASS, tr->dst.neg); 6842 } 6843 6844 6845 if (action == PF_SYNPROXY_DROP) { 6846 m_freem(*m0); 6847 *m0 = NULL; 6848 action = PF_PASS; 6849 } else if (r->rt) { 6850 /* pf_route can free the mbuf causing *m0 to become NULL */ 6851 pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); 6852 } 6853 6854 return (action); 6855 } 6856 #endif /* INET */ 6857 6858 #ifdef INET6 6859 6860 /* 6861 * WARNING: pf_token held shared on entry, THIS IS CPU LOCALIZED CODE 6862 */ 6863 int 6864 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, 6865 struct ether_header *eh, struct inpcb *inp) 6866 { 6867 struct pfi_kif *kif; 6868 u_short action, reason = 0, log = 0; 6869 struct mbuf *m = *m0, *n = NULL; 6870 struct ip6_hdr *h = NULL; 6871 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; 6872 struct pf_state *s = NULL; 6873 struct pf_ruleset *ruleset = NULL; 6874 struct pf_pdesc pd; 6875 int off, terminal = 0, dirndx, rh_cnt = 0; 6876 6877 if (!pf_status.running) 6878 return (PF_PASS); 6879 6880 memset(&pd, 0, sizeof(pd)); 6881 #ifdef foo 6882 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6883 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6884 else 6885 #endif 6886 kif = (struct pfi_kif *)ifp->if_pf_kif; 6887 6888 if (kif == NULL) { 6889 DPFPRINTF(PF_DEBUG_URGENT, 6890 ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); 6891 return (PF_DROP); 6892 } 6893 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6894 return (PF_PASS); 6895 6896 #ifdef DIAGNOSTIC 6897 if ((m->m_flags & M_PKTHDR) == 0) 6898 panic("non-M_PKTHDR is passed to pf_test6"); 6899 #endif /* DIAGNOSTIC */ 6900 6901 if (m->m_pkthdr.len < (int)sizeof(*h)) { 6902 action = PF_DROP; 6903 REASON_SET(&reason, PFRES_SHORT); 6904 log = 1; 6905 goto done; 6906 } 6907 6908 /* 6909 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 6910 * so make sure pf.flags is clear. 6911 */ 6912 if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED) 6913 return (PF_PASS); 6914 m->m_pkthdr.pf.flags = 0; 6915 /* Re-Check when updating to > 4.4 */ 6916 m->m_pkthdr.pf.statekey = NULL; 6917 6918 /* We do IP header normalization and packet reassembly here */ 6919 if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { 6920 action = PF_DROP; 6921 goto done; 6922 } 6923 m = *m0; /* pf_normalize messes with m0 */ 6924 h = mtod(m, struct ip6_hdr *); 6925 6926 #if 1 6927 /* 6928 * we do not support jumbogram yet. if we keep going, zero ip6_plen 6929 * will do something bad, so drop the packet for now. 6930 */ 6931 if (htons(h->ip6_plen) == 0) { 6932 action = PF_DROP; 6933 REASON_SET(&reason, PFRES_NORM); /*XXX*/ 6934 goto done; 6935 } 6936 #endif 6937 6938 pd.src = (struct pf_addr *)&h->ip6_src; 6939 pd.dst = (struct pf_addr *)&h->ip6_dst; 6940 pd.sport = pd.dport = NULL; 6941 pd.ip_sum = NULL; 6942 pd.proto_sum = NULL; 6943 pd.dir = dir; 6944 pd.sidx = (dir == PF_IN) ? 0 : 1; 6945 pd.didx = (dir == PF_IN) ? 1 : 0; 6946 pd.af = AF_INET6; 6947 pd.tos = 0; 6948 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6949 pd.eh = eh; 6950 6951 off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); 6952 pd.proto = h->ip6_nxt; 6953 do { 6954 switch (pd.proto) { 6955 case IPPROTO_FRAGMENT: 6956 action = pf_test_fragment(&r, dir, kif, m, h, 6957 &pd, &a, &ruleset); 6958 if (action == PF_DROP) 6959 REASON_SET(&reason, PFRES_FRAG); 6960 goto done; 6961 case IPPROTO_ROUTING: { 6962 struct ip6_rthdr rthdr; 6963 6964 if (rh_cnt++) { 6965 DPFPRINTF(PF_DEBUG_MISC, 6966 ("pf: IPv6 more than one rthdr\n")); 6967 action = PF_DROP; 6968 REASON_SET(&reason, PFRES_IPOPTIONS); 6969 log = 1; 6970 goto done; 6971 } 6972 if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, 6973 &reason, pd.af)) { 6974 DPFPRINTF(PF_DEBUG_MISC, 6975 ("pf: IPv6 short rthdr\n")); 6976 action = PF_DROP; 6977 REASON_SET(&reason, PFRES_SHORT); 6978 log = 1; 6979 goto done; 6980 } 6981 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6982 DPFPRINTF(PF_DEBUG_MISC, 6983 ("pf: IPv6 rthdr0\n")); 6984 action = PF_DROP; 6985 REASON_SET(&reason, PFRES_IPOPTIONS); 6986 log = 1; 6987 goto done; 6988 } 6989 /* FALLTHROUGH */ 6990 } 6991 case IPPROTO_AH: 6992 case IPPROTO_HOPOPTS: 6993 case IPPROTO_DSTOPTS: { 6994 /* get next header and header length */ 6995 struct ip6_ext opt6; 6996 6997 if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), 6998 NULL, &reason, pd.af)) { 6999 DPFPRINTF(PF_DEBUG_MISC, 7000 ("pf: IPv6 short opt\n")); 7001 action = PF_DROP; 7002 log = 1; 7003 goto done; 7004 } 7005 if (pd.proto == IPPROTO_AH) 7006 off += (opt6.ip6e_len + 2) * 4; 7007 else 7008 off += (opt6.ip6e_len + 1) * 8; 7009 pd.proto = opt6.ip6e_nxt; 7010 /* goto the next header */ 7011 break; 7012 } 7013 default: 7014 terminal++; 7015 break; 7016 } 7017 } while (!terminal); 7018 7019 /* if there's no routing header, use unmodified mbuf for checksumming */ 7020 if (!n) 7021 n = m; 7022 7023 switch (pd.proto) { 7024 7025 case IPPROTO_TCP: { 7026 struct tcphdr th; 7027 7028 pd.hdr.tcp = &th; 7029 if (!pf_pull_hdr(m, off, &th, sizeof(th), 7030 &action, &reason, AF_INET6)) { 7031 log = action != PF_PASS; 7032 goto done; 7033 } 7034 pd.p_len = pd.tot_len - off - (th.th_off << 2); 7035 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); 7036 if (action == PF_DROP) 7037 goto done; 7038 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, 7039 &reason); 7040 if (action == PF_PASS) { 7041 r = s->rule.ptr; 7042 a = s->anchor.ptr; 7043 log = s->log; 7044 } else if (s == NULL) { 7045 action = pf_test_rule(&r, &s, dir, kif, 7046 m, off, h, &pd, &a, 7047 &ruleset, NULL, inp); 7048 } 7049 break; 7050 } 7051 7052 case IPPROTO_UDP: { 7053 struct udphdr uh; 7054 7055 pd.hdr.udp = &uh; 7056 if (!pf_pull_hdr(m, off, &uh, sizeof(uh), 7057 &action, &reason, AF_INET6)) { 7058 log = action != PF_PASS; 7059 goto done; 7060 } 7061 if (uh.uh_dport == 0 || 7062 ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || 7063 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { 7064 action = PF_DROP; 7065 REASON_SET(&reason, PFRES_SHORT); 7066 goto done; 7067 } 7068 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); 7069 if (action == PF_PASS) { 7070 r = s->rule.ptr; 7071 a = s->anchor.ptr; 7072 log = s->log; 7073 } else if (s == NULL) { 7074 action = pf_test_rule(&r, &s, dir, kif, 7075 m, off, h, &pd, &a, 7076 &ruleset, NULL, inp); 7077 } 7078 break; 7079 } 7080 7081 case IPPROTO_ICMPV6: { 7082 struct icmp6_hdr ih; 7083 7084 pd.hdr.icmp6 = &ih; 7085 if (!pf_pull_hdr(m, off, &ih, sizeof(ih), 7086 &action, &reason, AF_INET6)) { 7087 log = action != PF_PASS; 7088 goto done; 7089 } 7090 action = pf_test_state_icmp(&s, dir, kif, 7091 m, off, h, &pd, &reason); 7092 if (action == PF_PASS) { 7093 r = s->rule.ptr; 7094 a = s->anchor.ptr; 7095 log = s->log; 7096 } else if (s == NULL) { 7097 action = pf_test_rule(&r, &s, dir, kif, 7098 m, off, h, &pd, &a, 7099 &ruleset, NULL, inp); 7100 } 7101 break; 7102 } 7103 7104 default: 7105 action = pf_test_state_other(&s, dir, kif, m, &pd); 7106 if (action == PF_PASS) { 7107 r = s->rule.ptr; 7108 a = s->anchor.ptr; 7109 log = s->log; 7110 } else if (s == NULL) { 7111 action = pf_test_rule(&r, &s, dir, kif, m, off, h, 7112 &pd, &a, &ruleset, NULL, inp); 7113 } 7114 break; 7115 } 7116 7117 done: 7118 if (n != m) { 7119 m_freem(n); 7120 n = NULL; 7121 } 7122 7123 /* handle dangerous IPv6 extension headers. */ 7124 if (action == PF_PASS && rh_cnt && 7125 !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { 7126 action = PF_DROP; 7127 REASON_SET(&reason, PFRES_IPOPTIONS); 7128 log = 1; 7129 DPFPRINTF(PF_DEBUG_MISC, 7130 ("pf: dropping packet with dangerous v6 headers\n")); 7131 } 7132 7133 if ((s && s->tag) || r->rtableid) 7134 pf_tag_packet(m, s ? s->tag : 0, r->rtableid); 7135 7136 #if 0 7137 if (dir == PF_IN && s && s->key[PF_SK_STACK]) 7138 m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; 7139 #endif 7140 7141 #ifdef ALTQ 7142 /* 7143 * Generate a hash code and qid request for ALTQ. A qid of 0 7144 * is allowed and will cause altq to select the default queue. 7145 */ 7146 if (action == PF_PASS) { 7147 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 7148 if (pd.tos & IPTOS_LOWDELAY) 7149 m->m_pkthdr.pf.qid = r->pqid; 7150 else 7151 m->m_pkthdr.pf.qid = r->qid; 7152 m->m_pkthdr.pf.ecn_af = AF_INET6; 7153 m->m_pkthdr.pf.hdr = h; 7154 if (s) { 7155 /* for fairq */ 7156 m->m_pkthdr.pf.state_hash = s->hash; 7157 m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED; 7158 } 7159 } 7160 #endif /* ALTQ */ 7161 7162 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 7163 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && 7164 (s->nat_rule.ptr->action == PF_RDR || 7165 s->nat_rule.ptr->action == PF_BINAT) && 7166 IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) 7167 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7168 7169 if (dir == PF_IN && action == PF_PASS && r->divert.port) { 7170 struct pf_divert *divert; 7171 7172 if ((divert = pf_get_divert(m))) { 7173 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7174 divert->port = r->divert.port; 7175 divert->addr.ipv6 = r->divert.addr.v6; 7176 } 7177 } 7178 7179 if (log) { 7180 struct pf_rule *lr; 7181 7182 if (s != NULL && s->nat_rule.ptr != NULL && 7183 s->nat_rule.ptr->log & PF_LOG_ALL) 7184 lr = s->nat_rule.ptr; 7185 else 7186 lr = r; 7187 PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, 7188 &pd); 7189 } 7190 7191 kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; 7192 kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; 7193 7194 if (action == PF_PASS || r->action == PF_DROP) { 7195 dirndx = (dir == PF_OUT); 7196 r->packets[dirndx]++; 7197 r->bytes[dirndx] += pd.tot_len; 7198 if (a != NULL) { 7199 a->packets[dirndx]++; 7200 a->bytes[dirndx] += pd.tot_len; 7201 } 7202 if (s != NULL) { 7203 if (s->nat_rule.ptr != NULL) { 7204 s->nat_rule.ptr->packets[dirndx]++; 7205 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; 7206 } 7207 if (s->src_node != NULL) { 7208 s->src_node->packets[dirndx]++; 7209 s->src_node->bytes[dirndx] += pd.tot_len; 7210 } 7211 if (s->nat_src_node != NULL) { 7212 s->nat_src_node->packets[dirndx]++; 7213 s->nat_src_node->bytes[dirndx] += pd.tot_len; 7214 } 7215 dirndx = (dir == s->direction) ? 0 : 1; 7216 s->packets[dirndx]++; 7217 s->bytes[dirndx] += pd.tot_len; 7218 } 7219 tr = r; 7220 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; 7221 if (nr != NULL && r == &pf_default_rule) 7222 tr = nr; 7223 if (tr->src.addr.type == PF_ADDR_TABLE) 7224 pfr_update_stats(tr->src.addr.p.tbl, 7225 (s == NULL) ? pd.src : 7226 &s->key[(s->direction == PF_IN)]->addr[0], 7227 pd.af, pd.tot_len, dir == PF_OUT, 7228 r->action == PF_PASS, tr->src.neg); 7229 if (tr->dst.addr.type == PF_ADDR_TABLE) 7230 pfr_update_stats(tr->dst.addr.p.tbl, 7231 (s == NULL) ? pd.dst : 7232 &s->key[(s->direction == PF_IN)]->addr[1], 7233 pd.af, pd.tot_len, dir == PF_OUT, 7234 r->action == PF_PASS, tr->dst.neg); 7235 } 7236 7237 7238 if (action == PF_SYNPROXY_DROP) { 7239 m_freem(*m0); 7240 *m0 = NULL; 7241 action = PF_PASS; 7242 } else if (r->rt) 7243 /* pf_route6 can free the mbuf causing *m0 to become NULL */ 7244 pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); 7245 7246 return (action); 7247 } 7248 #endif /* INET6 */ 7249 7250 int 7251 pf_check_congestion(struct ifqueue *ifq) 7252 { 7253 return (0); 7254 } 7255