1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "opt_inet.h" 39 #include "opt_inet6.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/filio.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/kernel.h> 49 #include <sys/time.h> 50 #include <sys/sysctl.h> 51 #include <sys/endian.h> 52 #include <sys/proc.h> 53 #include <sys/kthread.h> 54 #include <sys/spinlock.h> 55 56 #include <sys/md5.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr2.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_seq.h> 71 #include <netinet/udp.h> 72 #include <netinet/ip_icmp.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/tcp_timer.h> 75 #include <netinet/tcp_var.h> 76 #include <netinet/udp_var.h> 77 #include <netinet/icmp_var.h> 78 #include <netinet/if_ether.h> 79 80 #include <net/pf/pfvar.h> 81 #include <net/pf/if_pflog.h> 82 83 #include <net/pf/if_pfsync.h> 84 85 #ifdef INET6 86 #include <netinet/ip6.h> 87 #include <netinet/icmp6.h> 88 #include <netinet6/nd6.h> 89 #include <netinet6/ip6_var.h> 90 #include <netinet6/in6_pcb.h> 91 #endif /* INET6 */ 92 93 #include <sys/in_cksum.h> 94 #include <sys/ucred.h> 95 #include <machine/limits.h> 96 #include <sys/msgport2.h> 97 #include <sys/spinlock2.h> 98 #include <net/netmsg2.h> 99 #include <net/toeplitz2.h> 100 101 extern int ip_optcopy(struct ip *, struct ip *); 102 extern int debug_pfugidhack; 103 104 /* 105 * pf_token - shared lock for cpu-localized operations, 106 * exclusive lock otherwise. 107 * 108 * pf_gtoken- exclusive lock used for initialization. 109 */ 110 struct lwkt_token pf_token = LWKT_TOKEN_INITIALIZER(pf_token); 111 struct lwkt_token pf_gtoken = LWKT_TOKEN_INITIALIZER(pf_gtoken); 112 113 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) kprintf x 114 115 #define FAIL(code) { error = (code); goto done; } 116 117 /* 118 * Global variables 119 */ 120 121 /* mask radix tree */ 122 struct radix_node_head *pf_maskhead; 123 124 /* state tables */ 125 struct pf_state_tree *pf_statetbl; /* incls one global table */ 126 struct pf_state **purge_cur; 127 struct pf_altqqueue pf_altqs[2]; 128 struct pf_palist pf_pabuf; 129 struct pf_altqqueue *pf_altqs_active; 130 struct pf_altqqueue *pf_altqs_inactive; 131 struct pf_status pf_status; 132 133 u_int32_t ticket_altqs_active; 134 u_int32_t ticket_altqs_inactive; 135 int altqs_inactive_open; 136 u_int32_t ticket_pabuf; 137 138 MD5_CTX pf_tcp_secret_ctx; 139 u_char pf_tcp_secret[16]; 140 int pf_tcp_secret_init; 141 int pf_tcp_iss_off; 142 143 struct pf_anchor_stackframe { 144 struct pf_ruleset *rs; 145 struct pf_rule *r; 146 struct pf_anchor_node *parent; 147 struct pf_anchor *child; 148 } pf_anchor_stack[64]; 149 150 struct malloc_type *pf_src_tree_pl, *pf_rule_pl, *pf_pooladdr_pl; 151 struct malloc_type *pf_state_pl, *pf_state_key_pl, *pf_state_item_pl; 152 struct malloc_type *pf_altq_pl; 153 154 void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); 155 156 void pf_init_threshold(struct pf_threshold *, u_int32_t, 157 u_int32_t); 158 void pf_add_threshold(struct pf_threshold *); 159 int pf_check_threshold(struct pf_threshold *); 160 161 void pf_change_ap(struct pf_addr *, u_int16_t *, 162 u_int16_t *, u_int16_t *, struct pf_addr *, 163 u_int16_t, u_int8_t, sa_family_t); 164 int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, 165 struct tcphdr *, struct pf_state_peer *); 166 #ifdef INET6 167 void pf_change_a6(struct pf_addr *, u_int16_t *, 168 struct pf_addr *, u_int8_t); 169 #endif /* INET6 */ 170 void pf_change_icmp(struct pf_addr *, u_int16_t *, 171 struct pf_addr *, struct pf_addr *, u_int16_t, 172 u_int16_t *, u_int16_t *, u_int16_t *, 173 u_int16_t *, u_int8_t, sa_family_t); 174 void pf_send_tcp(const struct pf_rule *, sa_family_t, 175 const struct pf_addr *, const struct pf_addr *, 176 u_int16_t, u_int16_t, u_int32_t, u_int32_t, 177 u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, 178 u_int16_t, struct ether_header *, struct ifnet *); 179 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, 180 sa_family_t, struct pf_rule *); 181 struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, 182 int, int, struct pfi_kif *, 183 struct pf_addr *, u_int16_t, struct pf_addr *, 184 u_int16_t, int); 185 struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, 186 int, int, struct pfi_kif *, struct pf_src_node **, 187 struct pf_state_key **, struct pf_state_key **, 188 struct pf_state_key **, struct pf_state_key **, 189 struct pf_addr *, struct pf_addr *, 190 u_int16_t, u_int16_t); 191 void pf_detach_state(struct pf_state *); 192 int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *, 193 struct pf_state_key **, struct pf_state_key **, 194 struct pf_state_key **, struct pf_state_key **, 195 struct pf_addr *, struct pf_addr *, 196 u_int16_t, u_int16_t); 197 void pf_state_key_detach(struct pf_state *, int); 198 u_int32_t pf_tcp_iss(struct pf_pdesc *); 199 int pf_test_rule(struct pf_rule **, struct pf_state **, 200 int, struct pfi_kif *, struct mbuf *, int, 201 void *, struct pf_pdesc *, struct pf_rule **, 202 struct pf_ruleset **, struct ifqueue *, struct inpcb *); 203 static __inline int pf_create_state(struct pf_rule *, struct pf_rule *, 204 struct pf_rule *, struct pf_pdesc *, 205 struct pf_src_node *, struct pf_state_key *, 206 struct pf_state_key *, struct pf_state_key *, 207 struct pf_state_key *, struct mbuf *, int, 208 u_int16_t, u_int16_t, int *, struct pfi_kif *, 209 struct pf_state **, int, u_int16_t, u_int16_t, 210 int); 211 int pf_test_fragment(struct pf_rule **, int, 212 struct pfi_kif *, struct mbuf *, void *, 213 struct pf_pdesc *, struct pf_rule **, 214 struct pf_ruleset **); 215 int pf_tcp_track_full(struct pf_state_peer *, 216 struct pf_state_peer *, struct pf_state **, 217 struct pfi_kif *, struct mbuf *, int, 218 struct pf_pdesc *, u_short *, int *); 219 int pf_tcp_track_sloppy(struct pf_state_peer *, 220 struct pf_state_peer *, struct pf_state **, 221 struct pf_pdesc *, u_short *); 222 int pf_test_state_tcp(struct pf_state **, int, 223 struct pfi_kif *, struct mbuf *, int, 224 void *, struct pf_pdesc *, u_short *); 225 int pf_test_state_udp(struct pf_state **, int, 226 struct pfi_kif *, struct mbuf *, int, 227 void *, struct pf_pdesc *); 228 int pf_test_state_icmp(struct pf_state **, int, 229 struct pfi_kif *, struct mbuf *, int, 230 void *, struct pf_pdesc *, u_short *); 231 int pf_test_state_other(struct pf_state **, int, 232 struct pfi_kif *, struct mbuf *, struct pf_pdesc *); 233 void pf_step_into_anchor(int *, struct pf_ruleset **, int, 234 struct pf_rule **, struct pf_rule **, int *); 235 int pf_step_out_of_anchor(int *, struct pf_ruleset **, 236 int, struct pf_rule **, struct pf_rule **, 237 int *); 238 void pf_hash(struct pf_addr *, struct pf_addr *, 239 struct pf_poolhashkey *, sa_family_t); 240 int pf_map_addr(u_int8_t, struct pf_rule *, 241 struct pf_addr *, struct pf_addr *, 242 struct pf_addr *, struct pf_src_node **); 243 int pf_get_sport(struct pf_pdesc *, 244 sa_family_t, u_int8_t, struct pf_rule *, 245 struct pf_addr *, struct pf_addr *, 246 u_int16_t, u_int16_t, 247 struct pf_addr *, u_int16_t *, 248 u_int16_t, u_int16_t, 249 struct pf_src_node **); 250 void pf_route(struct mbuf **, struct pf_rule *, int, 251 struct ifnet *, struct pf_state *, 252 struct pf_pdesc *); 253 void pf_route6(struct mbuf **, struct pf_rule *, int, 254 struct ifnet *, struct pf_state *, 255 struct pf_pdesc *); 256 u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, 257 sa_family_t); 258 u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, 259 sa_family_t); 260 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, 261 u_int16_t); 262 void pf_set_rt_ifp(struct pf_state *, 263 struct pf_addr *); 264 int pf_check_proto_cksum(struct mbuf *, int, int, 265 u_int8_t, sa_family_t); 266 struct pf_divert *pf_get_divert(struct mbuf *); 267 void pf_print_state_parts(struct pf_state *, 268 struct pf_state_key *, struct pf_state_key *); 269 int pf_addr_wrap_neq(struct pf_addr_wrap *, 270 struct pf_addr_wrap *); 271 struct pf_state *pf_find_state(struct pfi_kif *, 272 struct pf_state_key_cmp *, u_int, struct mbuf *); 273 int pf_src_connlimit(struct pf_state *); 274 int pf_check_congestion(struct ifqueue *); 275 276 extern int pf_end_threads; 277 278 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 279 { &pf_state_pl, PFSTATE_HIWAT }, 280 { &pf_src_tree_pl, PFSNODE_HIWAT }, 281 { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, 282 { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, 283 { &pfr_kentry_pl, PFR_KENTRY_HIWAT } 284 }; 285 286 /* 287 * If route-to and direction is out we match with no further processing 288 * (rt_kif must be assigned and not equal to the out interface) 289 * If reply-to and direction is in we match with no further processing 290 * (rt_kif must be assigned and not equal to the in interface) 291 */ 292 #define STATE_LOOKUP(i, k, d, s, m) \ 293 do { \ 294 s = pf_find_state(i, k, d, m); \ 295 if (s == NULL || (s)->timeout == PFTM_PURGE) \ 296 return (PF_DROP); \ 297 if (d == PF_OUT && \ 298 (((s)->rule.ptr->rt == PF_ROUTETO && \ 299 (s)->rule.ptr->direction == PF_OUT) || \ 300 ((s)->rule.ptr->rt == PF_REPLYTO && \ 301 (s)->rule.ptr->direction == PF_IN)) && \ 302 (s)->rt_kif != NULL && \ 303 (s)->rt_kif != i) \ 304 return (PF_PASS); \ 305 } while (0) 306 307 #define BOUND_IFACE(r, k) \ 308 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 309 310 #define STATE_INC_COUNTERS(s) \ 311 do { \ 312 atomic_add_int(&s->rule.ptr->states_cur, 1); \ 313 s->rule.ptr->states_tot++; \ 314 if (s->anchor.ptr != NULL) { \ 315 atomic_add_int(&s->anchor.ptr->states_cur, 1); \ 316 s->anchor.ptr->states_tot++; \ 317 } \ 318 if (s->nat_rule.ptr != NULL) { \ 319 atomic_add_int(&s->nat_rule.ptr->states_cur, 1); \ 320 s->nat_rule.ptr->states_tot++; \ 321 } \ 322 } while (0) 323 324 #define STATE_DEC_COUNTERS(s) \ 325 do { \ 326 if (s->nat_rule.ptr != NULL) \ 327 atomic_add_int(&s->nat_rule.ptr->states_cur, -1); \ 328 if (s->anchor.ptr != NULL) \ 329 atomic_add_int(&s->anchor.ptr->states_cur, -1); \ 330 atomic_add_int(&s->rule.ptr->states_cur, -1); \ 331 } while (0) 332 333 static MALLOC_DEFINE(M_PFSTATEPL, "pfstatepl", "pf state pool list"); 334 static MALLOC_DEFINE(M_PFSRCTREEPL, "pfsrctpl", "pf source tree pool list"); 335 static MALLOC_DEFINE(M_PFSTATEKEYPL, "pfstatekeypl", "pf state key pool list"); 336 static MALLOC_DEFINE(M_PFSTATEITEMPL, "pfstateitempl", "pf state item pool list"); 337 338 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 339 static __inline int pf_state_compare_key(struct pf_state_key *, 340 struct pf_state_key *); 341 static __inline int pf_state_compare_rkey(struct pf_state_key *, 342 struct pf_state_key *); 343 static __inline int pf_state_compare_id(struct pf_state *, 344 struct pf_state *); 345 346 struct pf_src_tree *tree_src_tracking; 347 struct pf_state_tree_id *tree_id; 348 struct pf_state_queue *state_list; 349 struct pf_counters *pf_counters; 350 351 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 352 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 353 RB_GENERATE(pf_state_rtree, pf_state_key, entry, pf_state_compare_rkey); 354 RB_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); 355 356 static __inline int 357 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 358 { 359 int diff; 360 361 if (a->rule.ptr > b->rule.ptr) 362 return (1); 363 if (a->rule.ptr < b->rule.ptr) 364 return (-1); 365 if ((diff = a->af - b->af) != 0) 366 return (diff); 367 switch (a->af) { 368 #ifdef INET 369 case AF_INET: 370 if (a->addr.addr32[0] > b->addr.addr32[0]) 371 return (1); 372 if (a->addr.addr32[0] < b->addr.addr32[0]) 373 return (-1); 374 break; 375 #endif /* INET */ 376 #ifdef INET6 377 case AF_INET6: 378 if (a->addr.addr32[3] > b->addr.addr32[3]) 379 return (1); 380 if (a->addr.addr32[3] < b->addr.addr32[3]) 381 return (-1); 382 if (a->addr.addr32[2] > b->addr.addr32[2]) 383 return (1); 384 if (a->addr.addr32[2] < b->addr.addr32[2]) 385 return (-1); 386 if (a->addr.addr32[1] > b->addr.addr32[1]) 387 return (1); 388 if (a->addr.addr32[1] < b->addr.addr32[1]) 389 return (-1); 390 if (a->addr.addr32[0] > b->addr.addr32[0]) 391 return (1); 392 if (a->addr.addr32[0] < b->addr.addr32[0]) 393 return (-1); 394 break; 395 #endif /* INET6 */ 396 } 397 return (0); 398 } 399 400 u_int32_t 401 pf_state_hash(struct pf_state_key *sk) 402 { 403 u_int32_t hv = (u_int32_t)(((intptr_t)sk >> 6) ^ ((intptr_t)sk >> 15)); 404 if (hv == 0) /* disallow 0 */ 405 hv = 1; 406 return(hv); 407 } 408 409 #ifdef INET6 410 void 411 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 412 { 413 switch (af) { 414 #ifdef INET 415 case AF_INET: 416 dst->addr32[0] = src->addr32[0]; 417 break; 418 #endif /* INET */ 419 case AF_INET6: 420 dst->addr32[0] = src->addr32[0]; 421 dst->addr32[1] = src->addr32[1]; 422 dst->addr32[2] = src->addr32[2]; 423 dst->addr32[3] = src->addr32[3]; 424 break; 425 } 426 } 427 #endif /* INET6 */ 428 429 void 430 pf_init_threshold(struct pf_threshold *threshold, 431 u_int32_t limit, u_int32_t seconds) 432 { 433 threshold->limit = limit * PF_THRESHOLD_MULT; 434 threshold->seconds = seconds; 435 threshold->count = 0; 436 threshold->last = time_second; 437 } 438 439 void 440 pf_add_threshold(struct pf_threshold *threshold) 441 { 442 u_int32_t t = time_second, diff = t - threshold->last; 443 444 if (diff >= threshold->seconds) 445 threshold->count = 0; 446 else 447 threshold->count -= threshold->count * diff / 448 threshold->seconds; 449 threshold->count += PF_THRESHOLD_MULT; 450 threshold->last = t; 451 } 452 453 int 454 pf_check_threshold(struct pf_threshold *threshold) 455 { 456 return (threshold->count > threshold->limit); 457 } 458 459 int 460 pf_src_connlimit(struct pf_state *state) 461 { 462 int bad = 0; 463 int cpu = mycpu->gd_cpuid; 464 465 atomic_add_int(&state->src_node->conn, 1); 466 state->src.tcp_est = 1; 467 pf_add_threshold(&state->src_node->conn_rate); 468 469 if (state->rule.ptr->max_src_conn && 470 state->rule.ptr->max_src_conn < 471 state->src_node->conn) { 472 PF_INC_LCOUNTER(LCNT_SRCCONN); 473 bad++; 474 } 475 476 if (state->rule.ptr->max_src_conn_rate.limit && 477 pf_check_threshold(&state->src_node->conn_rate)) { 478 PF_INC_LCOUNTER(LCNT_SRCCONNRATE); 479 bad++; 480 } 481 482 if (!bad) 483 return 0; 484 485 if (state->rule.ptr->overload_tbl) { 486 struct pfr_addr p; 487 u_int32_t killed = 0; 488 489 PF_INC_LCOUNTER(LCNT_OVERLOAD_TABLE); 490 if (pf_status.debug >= PF_DEBUG_MISC) { 491 kprintf("pf_src_connlimit: blocking address "); 492 pf_print_host(&state->src_node->addr, 0, 493 state->key[PF_SK_WIRE]->af); 494 } 495 496 bzero(&p, sizeof(p)); 497 p.pfra_af = state->key[PF_SK_WIRE]->af; 498 switch (state->key[PF_SK_WIRE]->af) { 499 #ifdef INET 500 case AF_INET: 501 p.pfra_net = 32; 502 p.pfra_ip4addr = state->src_node->addr.v4; 503 break; 504 #endif /* INET */ 505 #ifdef INET6 506 case AF_INET6: 507 p.pfra_net = 128; 508 p.pfra_ip6addr = state->src_node->addr.v6; 509 break; 510 #endif /* INET6 */ 511 } 512 513 pfr_insert_kentry(state->rule.ptr->overload_tbl, 514 &p, time_second); 515 516 /* kill existing states if that's required. */ 517 if (state->rule.ptr->flush) { 518 struct pf_state_key *sk; 519 struct pf_state *st; 520 521 PF_INC_LCOUNTER(LCNT_OVERLOAD_FLUSH); 522 RB_FOREACH(st, pf_state_tree_id, &tree_id[cpu]) { 523 sk = st->key[PF_SK_WIRE]; 524 /* 525 * Kill states from this source. (Only those 526 * from the same rule if PF_FLUSH_GLOBAL is not 527 * set). (Only on current cpu). 528 */ 529 if (sk->af == 530 state->key[PF_SK_WIRE]->af && 531 ((state->direction == PF_OUT && 532 PF_AEQ(&state->src_node->addr, 533 &sk->addr[0], sk->af)) || 534 (state->direction == PF_IN && 535 PF_AEQ(&state->src_node->addr, 536 &sk->addr[1], sk->af))) && 537 (state->rule.ptr->flush & 538 PF_FLUSH_GLOBAL || 539 state->rule.ptr == st->rule.ptr)) { 540 st->timeout = PFTM_PURGE; 541 st->src.state = st->dst.state = 542 TCPS_CLOSED; 543 killed++; 544 } 545 } 546 if (pf_status.debug >= PF_DEBUG_MISC) 547 kprintf(", %u states killed", killed); 548 } 549 if (pf_status.debug >= PF_DEBUG_MISC) 550 kprintf("\n"); 551 } 552 553 /* kill this state */ 554 state->timeout = PFTM_PURGE; 555 state->src.state = state->dst.state = TCPS_CLOSED; 556 557 return 1; 558 } 559 560 int 561 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 562 struct pf_addr *src, sa_family_t af) 563 { 564 struct pf_src_node k; 565 int cpu = mycpu->gd_cpuid; 566 567 bzero(&k, sizeof(k)); /* avoid gcc warnings */ 568 if (*sn == NULL) { 569 k.af = af; 570 PF_ACPY(&k.addr, src, af); 571 if (rule->rule_flag & PFRULE_RULESRCTRACK || 572 rule->rpool.opts & PF_POOL_STICKYADDR) 573 k.rule.ptr = rule; 574 else 575 k.rule.ptr = NULL; 576 PF_INC_SCOUNTER(SCNT_SRC_NODE_SEARCH); 577 *sn = RB_FIND(pf_src_tree, &tree_src_tracking[cpu], &k); 578 } 579 if (*sn == NULL) { 580 if (!rule->max_src_nodes || 581 rule->src_nodes < rule->max_src_nodes) 582 (*sn) = kmalloc(sizeof(struct pf_src_node), 583 M_PFSRCTREEPL, M_NOWAIT|M_ZERO); 584 else 585 PF_INC_LCOUNTER(LCNT_SRCNODES); 586 if ((*sn) == NULL) 587 return (-1); 588 589 pf_init_threshold(&(*sn)->conn_rate, 590 rule->max_src_conn_rate.limit, 591 rule->max_src_conn_rate.seconds); 592 593 (*sn)->af = af; 594 if (rule->rule_flag & PFRULE_RULESRCTRACK || 595 rule->rpool.opts & PF_POOL_STICKYADDR) 596 (*sn)->rule.ptr = rule; 597 else 598 (*sn)->rule.ptr = NULL; 599 PF_ACPY(&(*sn)->addr, src, af); 600 if (RB_INSERT(pf_src_tree, 601 &tree_src_tracking[cpu], *sn) != NULL) { 602 if (pf_status.debug >= PF_DEBUG_MISC) { 603 kprintf("pf: src_tree insert failed: "); 604 pf_print_host(&(*sn)->addr, 0, af); 605 kprintf("\n"); 606 } 607 kfree(*sn, M_PFSRCTREEPL); 608 return (-1); 609 } 610 611 /* 612 * Atomic op required to increment src_nodes in the rule 613 * because we hold a shared token here (decrements will use 614 * an exclusive token). 615 */ 616 (*sn)->creation = time_second; 617 (*sn)->ruletype = rule->action; 618 if ((*sn)->rule.ptr != NULL) 619 atomic_add_int(&(*sn)->rule.ptr->src_nodes, 1); 620 PF_INC_SCOUNTER(SCNT_SRC_NODE_INSERT); 621 atomic_add_int(&pf_status.src_nodes, 1); 622 } else { 623 if (rule->max_src_states && 624 (*sn)->states >= rule->max_src_states) { 625 PF_INC_LCOUNTER(LCNT_SRCSTATES); 626 return (-1); 627 } 628 } 629 return (0); 630 } 631 632 /* 633 * state table (indexed by the pf_state_key structure), normal RBTREE 634 * comparison. 635 */ 636 static __inline int 637 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 638 { 639 int diff; 640 641 if ((diff = a->proto - b->proto) != 0) 642 return (diff); 643 if ((diff = a->af - b->af) != 0) 644 return (diff); 645 switch (a->af) { 646 #ifdef INET 647 case AF_INET: 648 if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) 649 return (1); 650 if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) 651 return (-1); 652 if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) 653 return (1); 654 if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) 655 return (-1); 656 break; 657 #endif /* INET */ 658 #ifdef INET6 659 case AF_INET6: 660 if (a->addr[0].addr32[3] > b->addr[0].addr32[3]) 661 return (1); 662 if (a->addr[0].addr32[3] < b->addr[0].addr32[3]) 663 return (-1); 664 if (a->addr[1].addr32[3] > b->addr[1].addr32[3]) 665 return (1); 666 if (a->addr[1].addr32[3] < b->addr[1].addr32[3]) 667 return (-1); 668 if (a->addr[0].addr32[2] > b->addr[0].addr32[2]) 669 return (1); 670 if (a->addr[0].addr32[2] < b->addr[0].addr32[2]) 671 return (-1); 672 if (a->addr[1].addr32[2] > b->addr[1].addr32[2]) 673 return (1); 674 if (a->addr[1].addr32[2] < b->addr[1].addr32[2]) 675 return (-1); 676 if (a->addr[0].addr32[1] > b->addr[0].addr32[1]) 677 return (1); 678 if (a->addr[0].addr32[1] < b->addr[0].addr32[1]) 679 return (-1); 680 if (a->addr[1].addr32[1] > b->addr[1].addr32[1]) 681 return (1); 682 if (a->addr[1].addr32[1] < b->addr[1].addr32[1]) 683 return (-1); 684 if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) 685 return (1); 686 if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) 687 return (-1); 688 if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) 689 return (1); 690 if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) 691 return (-1); 692 break; 693 #endif /* INET6 */ 694 } 695 696 if ((diff = a->port[0] - b->port[0]) != 0) 697 return (diff); 698 if ((diff = a->port[1] - b->port[1]) != 0) 699 return (diff); 700 701 return (0); 702 } 703 704 /* 705 * Used for RB_FIND only, compare in the reverse direction. The 706 * element to be reversed is always (a), since we obviously can't 707 * reverse the state tree depicted by (b). 708 */ 709 static __inline int 710 pf_state_compare_rkey(struct pf_state_key *a, struct pf_state_key *b) 711 { 712 int diff; 713 714 if ((diff = a->proto - b->proto) != 0) 715 return (diff); 716 if ((diff = a->af - b->af) != 0) 717 return (diff); 718 switch (a->af) { 719 #ifdef INET 720 case AF_INET: 721 if (a->addr[1].addr32[0] > b->addr[0].addr32[0]) 722 return (1); 723 if (a->addr[1].addr32[0] < b->addr[0].addr32[0]) 724 return (-1); 725 if (a->addr[0].addr32[0] > b->addr[1].addr32[0]) 726 return (1); 727 if (a->addr[0].addr32[0] < b->addr[1].addr32[0]) 728 return (-1); 729 break; 730 #endif /* INET */ 731 #ifdef INET6 732 case AF_INET6: 733 if (a->addr[1].addr32[3] > b->addr[0].addr32[3]) 734 return (1); 735 if (a->addr[1].addr32[3] < b->addr[0].addr32[3]) 736 return (-1); 737 if (a->addr[0].addr32[3] > b->addr[1].addr32[3]) 738 return (1); 739 if (a->addr[0].addr32[3] < b->addr[1].addr32[3]) 740 return (-1); 741 if (a->addr[1].addr32[2] > b->addr[0].addr32[2]) 742 return (1); 743 if (a->addr[1].addr32[2] < b->addr[0].addr32[2]) 744 return (-1); 745 if (a->addr[0].addr32[2] > b->addr[1].addr32[2]) 746 return (1); 747 if (a->addr[0].addr32[2] < b->addr[1].addr32[2]) 748 return (-1); 749 if (a->addr[1].addr32[1] > b->addr[0].addr32[1]) 750 return (1); 751 if (a->addr[1].addr32[1] < b->addr[0].addr32[1]) 752 return (-1); 753 if (a->addr[0].addr32[1] > b->addr[1].addr32[1]) 754 return (1); 755 if (a->addr[0].addr32[1] < b->addr[1].addr32[1]) 756 return (-1); 757 if (a->addr[1].addr32[0] > b->addr[0].addr32[0]) 758 return (1); 759 if (a->addr[1].addr32[0] < b->addr[0].addr32[0]) 760 return (-1); 761 if (a->addr[0].addr32[0] > b->addr[1].addr32[0]) 762 return (1); 763 if (a->addr[0].addr32[0] < b->addr[1].addr32[0]) 764 return (-1); 765 break; 766 #endif /* INET6 */ 767 } 768 769 if ((diff = a->port[1] - b->port[0]) != 0) 770 return (diff); 771 if ((diff = a->port[0] - b->port[1]) != 0) 772 return (diff); 773 774 return (0); 775 } 776 777 static __inline int 778 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 779 { 780 if (a->id > b->id) 781 return (1); 782 if (a->id < b->id) 783 return (-1); 784 if (a->creatorid > b->creatorid) 785 return (1); 786 if (a->creatorid < b->creatorid) 787 return (-1); 788 789 return (0); 790 } 791 792 int 793 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 794 { 795 struct pf_state_item *si; 796 struct pf_state_key *cur; 797 int cpu; 798 int error; 799 800 /* 801 * PFSTATE_STACK_GLOBAL is set when the state might not hash to the 802 * current cpu. The keys are managed on the global statetbl tree 803 * for this case. Only translations (RDR, NAT) can cause this. 804 * 805 * When this flag is not set we must still check the global statetbl 806 * for a collision, and if we find one we set the HALF_DUPLEX flag 807 * in the state. 808 */ 809 if (s->state_flags & PFSTATE_STACK_GLOBAL) { 810 cpu = ncpus; 811 lockmgr(&pf_global_statetbl_lock, LK_EXCLUSIVE); 812 } else { 813 cpu = mycpu->gd_cpuid; 814 lockmgr(&pf_global_statetbl_lock, LK_SHARED); 815 } 816 KKASSERT(s->key[idx] == NULL); /* XXX handle this? */ 817 818 if (pf_status.debug >= PF_DEBUG_MISC) { 819 kprintf("state_key attach cpu %d (%08x:%d) %s (%08x:%d)\n", 820 cpu, 821 ntohl(sk->addr[0].addr32[0]), ntohs(sk->port[0]), 822 (idx == PF_SK_WIRE ? "->" : "<-"), 823 ntohl(sk->addr[1].addr32[0]), ntohs(sk->port[1])); 824 } 825 826 /* 827 * Check whether (e.g.) a PASS rule being put on a per-cpu tree 828 * collides with a translation rule on the global tree. This is 829 * NOT an error. We *WANT* to establish state for this case so the 830 * packet path is short-cutted and doesn't need to scan the ruleset 831 * on every packet. But the established state will only see one 832 * side of a two-way packet conversation. To prevent this from 833 * causing problems (e.g. generating a RST), we force PFSTATE_SLOPPY 834 * to be set on the established state. 835 * 836 * A collision against RDR state can only occur with a PASS IN in the 837 * opposite direction or a PASS OUT in the forwards direction. This 838 * is because RDRs are processed on the input side. 839 * 840 * A collision against NAT state can only occur with a PASS IN in the 841 * forwards direction or a PASS OUT in the opposite direction. This 842 * is because NATs are processed on the output side. 843 * 844 * In both situations we need to do a reverse addr/port test because 845 * the PASS IN or PASS OUT only establishes if it doesn't match the 846 * established RDR state in the forwards direction. The direction 847 * flag has to be ignored (it will be one way for a PASS IN and the 848 * other way for a PASS OUT). 849 * 850 * pf_global_statetbl_lock will be locked shared when testing and 851 * not entering into the global state table. 852 */ 853 if (cpu != ncpus && 854 (cur = RB_FIND(pf_state_rtree, 855 (struct pf_state_rtree *)&pf_statetbl[ncpus], 856 sk)) != NULL) { 857 TAILQ_FOREACH(si, &cur->states, entry) { 858 /* 859 * NOTE: We must ignore direction mismatches. 860 */ 861 if (si->s->kif == s->kif) { 862 s->state_flags |= PFSTATE_HALF_DUPLEX | 863 PFSTATE_SLOPPY; 864 if (pf_status.debug >= PF_DEBUG_MISC) { 865 kprintf( 866 "pf: %s key attach collision " 867 "on %s: ", 868 (idx == PF_SK_WIRE) ? 869 "wire" : "stack", 870 s->kif->pfik_name); 871 pf_print_state_parts(s, 872 (idx == PF_SK_WIRE) ? sk : NULL, 873 (idx == PF_SK_STACK) ? sk : NULL); 874 kprintf("\n"); 875 } 876 break; 877 } 878 } 879 } 880 881 /* 882 * Enter into either the per-cpu or the global state table. 883 * 884 * pf_global_statetbl_lock will be locked exclusively when entering 885 * into the global state table. 886 */ 887 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl[cpu], sk)) != NULL) { 888 /* key exists. check for same kif, if none, add to key */ 889 TAILQ_FOREACH(si, &cur->states, entry) { 890 if (si->s->kif == s->kif && 891 si->s->direction == s->direction) { 892 if (pf_status.debug >= PF_DEBUG_MISC) { 893 kprintf( 894 "pf: %s key attach failed on %s: ", 895 (idx == PF_SK_WIRE) ? 896 "wire" : "stack", 897 s->kif->pfik_name); 898 pf_print_state_parts(s, 899 (idx == PF_SK_WIRE) ? sk : NULL, 900 (idx == PF_SK_STACK) ? sk : NULL); 901 kprintf("\n"); 902 } 903 kfree(sk, M_PFSTATEKEYPL); 904 error = -1; 905 goto failed; /* collision! */ 906 } 907 } 908 kfree(sk, M_PFSTATEKEYPL); 909 910 s->key[idx] = cur; 911 } else { 912 s->key[idx] = sk; 913 } 914 915 if ((si = kmalloc(sizeof(struct pf_state_item), 916 M_PFSTATEITEMPL, M_NOWAIT)) == NULL) { 917 pf_state_key_detach(s, idx); 918 error = -1; 919 goto failed; /* collision! */ 920 } 921 si->s = s; 922 923 /* list is sorted, if-bound states before floating */ 924 if (s->kif == pfi_all) 925 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 926 else 927 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 928 929 error = 0; 930 failed: 931 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 932 return error; 933 } 934 935 /* 936 * NOTE: Can only be called indirectly via the purge thread with pf_token 937 * exclusively locked. 938 */ 939 void 940 pf_detach_state(struct pf_state *s) 941 { 942 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 943 s->key[PF_SK_WIRE] = NULL; 944 945 if (s->key[PF_SK_STACK] != NULL) 946 pf_state_key_detach(s, PF_SK_STACK); 947 948 if (s->key[PF_SK_WIRE] != NULL) 949 pf_state_key_detach(s, PF_SK_WIRE); 950 } 951 952 /* 953 * NOTE: Can only be called indirectly via the purge thread with pf_token 954 * exclusively locked. 955 */ 956 void 957 pf_state_key_detach(struct pf_state *s, int idx) 958 { 959 struct pf_state_item *si; 960 int cpu; 961 962 /* 963 * PFSTATE_STACK_GLOBAL is set for translations when the translated 964 * address/port is not localized to the same cpu that the untranslated 965 * address/port is on. The wire pf_state_key is managed on the global 966 * statetbl tree for this case. 967 */ 968 if (s->state_flags & PFSTATE_STACK_GLOBAL) { 969 cpu = ncpus; 970 lockmgr(&pf_global_statetbl_lock, LK_EXCLUSIVE); 971 } else { 972 cpu = mycpu->gd_cpuid; 973 } 974 975 si = TAILQ_FIRST(&s->key[idx]->states); 976 while (si && si->s != s) 977 si = TAILQ_NEXT(si, entry); 978 979 if (si) { 980 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 981 kfree(si, M_PFSTATEITEMPL); 982 } 983 984 if (TAILQ_EMPTY(&s->key[idx]->states)) { 985 RB_REMOVE(pf_state_tree, &pf_statetbl[cpu], s->key[idx]); 986 if (s->key[idx]->reverse) 987 s->key[idx]->reverse->reverse = NULL; 988 if (s->key[idx]->inp) 989 s->key[idx]->inp->inp_pf_sk = NULL; 990 kfree(s->key[idx], M_PFSTATEKEYPL); 991 } 992 s->key[idx] = NULL; 993 994 if (s->state_flags & PFSTATE_STACK_GLOBAL) 995 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 996 } 997 998 struct pf_state_key * 999 pf_alloc_state_key(int pool_flags) 1000 { 1001 struct pf_state_key *sk; 1002 1003 sk = kmalloc(sizeof(struct pf_state_key), M_PFSTATEKEYPL, pool_flags); 1004 if (sk) { 1005 TAILQ_INIT(&sk->states); 1006 } 1007 return (sk); 1008 } 1009 1010 int 1011 pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr, 1012 struct pf_state_key **skw, struct pf_state_key **sks, 1013 struct pf_state_key **skp, struct pf_state_key **nkp, 1014 struct pf_addr *saddr, struct pf_addr *daddr, 1015 u_int16_t sport, u_int16_t dport) 1016 { 1017 KKASSERT((*skp == NULL && *nkp == NULL)); 1018 1019 if ((*skp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL) 1020 return (ENOMEM); 1021 1022 PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af); 1023 PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af); 1024 (*skp)->port[pd->sidx] = sport; 1025 (*skp)->port[pd->didx] = dport; 1026 (*skp)->proto = pd->proto; 1027 (*skp)->af = pd->af; 1028 1029 if (nr != NULL) { 1030 if ((*nkp = pf_alloc_state_key(M_NOWAIT | M_ZERO)) == NULL) 1031 return (ENOMEM); /* caller must handle cleanup */ 1032 1033 /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */ 1034 PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af); 1035 PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af); 1036 (*nkp)->port[0] = (*skp)->port[0]; 1037 (*nkp)->port[1] = (*skp)->port[1]; 1038 (*nkp)->proto = pd->proto; 1039 (*nkp)->af = pd->af; 1040 } else { 1041 *nkp = *skp; 1042 } 1043 1044 if (pd->dir == PF_IN) { 1045 *skw = *skp; 1046 *sks = *nkp; 1047 } else { 1048 *sks = *skp; 1049 *skw = *nkp; 1050 } 1051 return (0); 1052 } 1053 1054 /* 1055 * Insert pf_state with one or two state keys (allowing a reverse path lookup 1056 * which is used by NAT). In the NAT case skw is the initiator (?) and 1057 * sks is the target. 1058 */ 1059 int 1060 pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, 1061 struct pf_state_key *sks, struct pf_state *s) 1062 { 1063 int cpu = mycpu->gd_cpuid; 1064 1065 s->kif = kif; 1066 s->cpuid = cpu; 1067 1068 if (skw == sks) { 1069 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) 1070 return (-1); 1071 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 1072 } else { 1073 /* 1074 skw->reverse = sks; 1075 sks->reverse = skw; 1076 */ 1077 if (pf_state_key_attach(skw, s, PF_SK_WIRE)) { 1078 kfree(sks, M_PFSTATEKEYPL); 1079 return (-1); 1080 } 1081 if (pf_state_key_attach(sks, s, PF_SK_STACK)) { 1082 pf_state_key_detach(s, PF_SK_WIRE); 1083 return (-1); 1084 } 1085 } 1086 1087 if (s->id == 0 && s->creatorid == 0) { 1088 u_int64_t sid; 1089 1090 sid = atomic_fetchadd_long(&pf_status.stateid, 1); 1091 s->id = htobe64(sid); 1092 s->creatorid = pf_status.hostid; 1093 } 1094 1095 /* 1096 * Calculate hash code for altq 1097 */ 1098 s->hash = crc32(s->key[PF_SK_WIRE], PF_STATE_KEY_HASH_LENGTH); 1099 1100 if (RB_INSERT(pf_state_tree_id, &tree_id[cpu], s) != NULL) { 1101 if (pf_status.debug >= PF_DEBUG_MISC) { 1102 kprintf("pf: state insert failed: " 1103 "id: %016jx creatorid: %08x", 1104 (uintmax_t)be64toh(s->id), ntohl(s->creatorid)); 1105 if (s->sync_flags & PFSTATE_FROMSYNC) 1106 kprintf(" (from sync)"); 1107 kprintf("\n"); 1108 } 1109 pf_detach_state(s); 1110 return (-1); 1111 } 1112 TAILQ_INSERT_TAIL(&state_list[cpu], s, entry_list); 1113 PF_INC_FCOUNTER(FCNT_STATE_INSERT); 1114 atomic_add_int(&pf_status.states, 1); 1115 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 1116 pfsync_insert_state(s); 1117 return (0); 1118 } 1119 1120 struct pf_state * 1121 pf_find_state_byid(struct pf_state_cmp *key) 1122 { 1123 int cpu = mycpu->gd_cpuid; 1124 1125 PF_INC_FCOUNTER(FCNT_STATE_SEARCH); 1126 1127 return (RB_FIND(pf_state_tree_id, &tree_id[cpu], 1128 (struct pf_state *)key)); 1129 } 1130 1131 /* 1132 * WARNING! May return a state structure that was localized to another cpu, 1133 * destruction is typically protected by the callers pf_token. 1134 * The element can only be destroyed 1135 */ 1136 struct pf_state * 1137 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, 1138 struct mbuf *m) 1139 { 1140 struct pf_state_key *skey = (void *)key; 1141 struct pf_state_key *sk; 1142 struct pf_state_item *si; 1143 struct pf_state *s; 1144 int cpu = mycpu->gd_cpuid; 1145 int globalstl = 0; 1146 1147 PF_INC_FCOUNTER(FCNT_STATE_SEARCH); 1148 1149 if (dir == PF_OUT && m->m_pkthdr.pf.statekey && 1150 ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) { 1151 sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; 1152 } else { 1153 sk = RB_FIND(pf_state_tree, &pf_statetbl[cpu], skey); 1154 if (sk == NULL) { 1155 lockmgr(&pf_global_statetbl_lock, LK_SHARED); 1156 sk = RB_FIND(pf_state_tree, &pf_statetbl[ncpus], skey); 1157 if (sk == NULL) { 1158 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 1159 return (NULL); 1160 } 1161 globalstl = 1; 1162 } 1163 if (dir == PF_OUT && m->m_pkthdr.pf.statekey) { 1164 ((struct pf_state_key *) 1165 m->m_pkthdr.pf.statekey)->reverse = sk; 1166 sk->reverse = m->m_pkthdr.pf.statekey; 1167 } 1168 } 1169 if (dir == PF_OUT) 1170 m->m_pkthdr.pf.statekey = NULL; 1171 1172 /* list is sorted, if-bound states before floating ones */ 1173 TAILQ_FOREACH(si, &sk->states, entry) { 1174 if ((si->s->kif == pfi_all || si->s->kif == kif) && 1175 sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1176 si->s->key[PF_SK_STACK])) { 1177 break; 1178 } 1179 } 1180 1181 /* 1182 * Extract state before potentially releasing the global statetbl 1183 * lock. Ignore the state if the create is still in-progress as 1184 * it can be deleted out from under us by the owning localized cpu. 1185 * However, if CREATEINPROG is not set, state can only be deleted 1186 * by the purge thread which we are protected from via our shared 1187 * pf_token. 1188 */ 1189 if (si) { 1190 s = si->s; 1191 if (s && (s->state_flags & PFSTATE_CREATEINPROG)) 1192 s = NULL; 1193 } else { 1194 s = NULL; 1195 } 1196 if (globalstl) 1197 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 1198 return s; 1199 } 1200 1201 /* 1202 * WARNING! May return a state structure that was localized to another cpu, 1203 * destruction is typically protected by the callers pf_token. 1204 */ 1205 struct pf_state * 1206 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1207 { 1208 struct pf_state_key *skey = (void *)key; 1209 struct pf_state_key *sk; 1210 struct pf_state_item *si, *ret = NULL; 1211 struct pf_state *s; 1212 int cpu = mycpu->gd_cpuid; 1213 int globalstl = 0; 1214 1215 PF_INC_FCOUNTER(FCNT_STATE_SEARCH); 1216 1217 sk = RB_FIND(pf_state_tree, &pf_statetbl[cpu], skey); 1218 if (sk == NULL) { 1219 lockmgr(&pf_global_statetbl_lock, LK_SHARED); 1220 sk = RB_FIND(pf_state_tree, &pf_statetbl[ncpus], skey); 1221 globalstl = 1; 1222 } 1223 if (sk != NULL) { 1224 TAILQ_FOREACH(si, &sk->states, entry) 1225 if (dir == PF_INOUT || 1226 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1227 si->s->key[PF_SK_STACK]))) { 1228 if (more == NULL) { 1229 ret = si; 1230 break; 1231 } 1232 if (ret) 1233 (*more)++; 1234 else 1235 ret = si; 1236 } 1237 } 1238 1239 /* 1240 * Extract state before potentially releasing the global statetbl 1241 * lock. Ignore the state if the create is still in-progress as 1242 * it can be deleted out from under us by the owning localized cpu. 1243 * However, if CREATEINPROG is not set, state can only be deleted 1244 * by the purge thread which we are protected from via our shared 1245 * pf_token. 1246 */ 1247 if (ret) { 1248 s = ret->s; 1249 if (s && (s->state_flags & PFSTATE_CREATEINPROG)) 1250 s = NULL; 1251 } else { 1252 s = NULL; 1253 } 1254 if (globalstl) 1255 lockmgr(&pf_global_statetbl_lock, LK_RELEASE); 1256 return s; 1257 } 1258 1259 /* END state table stuff */ 1260 1261 void 1262 pf_purge_thread(void *v) 1263 { 1264 globaldata_t save_gd = mycpu; 1265 int nloops = 0; 1266 int locked = 0; 1267 int nn; 1268 int endingit; 1269 1270 for (;;) { 1271 tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz); 1272 1273 endingit = pf_end_threads; 1274 1275 for (nn = 0; nn < ncpus; ++nn) { 1276 lwkt_setcpu_self(globaldata_find(nn)); 1277 1278 lwkt_gettoken(&pf_token); 1279 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 1280 crit_enter(); 1281 1282 /* 1283 * process a fraction of the state table every second 1284 */ 1285 if(!pf_purge_expired_states( 1286 1 + (pf_status.states / 1287 pf_default_rule.timeout[ 1288 PFTM_INTERVAL]), 0)) { 1289 pf_purge_expired_states( 1290 1 + (pf_status.states / 1291 pf_default_rule.timeout[ 1292 PFTM_INTERVAL]), 1); 1293 } 1294 1295 /* 1296 * purge other expired types every PFTM_INTERVAL 1297 * seconds 1298 */ 1299 if (++nloops >= 1300 pf_default_rule.timeout[PFTM_INTERVAL]) { 1301 pf_purge_expired_fragments(); 1302 if (!pf_purge_expired_src_nodes(locked)) { 1303 pf_purge_expired_src_nodes(1); 1304 } 1305 nloops = 0; 1306 } 1307 1308 /* 1309 * If terminating the thread, clean everything out 1310 * (on all cpus). 1311 */ 1312 if (endingit) { 1313 pf_purge_expired_states(pf_status.states, 0); 1314 pf_purge_expired_fragments(); 1315 pf_purge_expired_src_nodes(1); 1316 } 1317 1318 crit_exit(); 1319 lockmgr(&pf_consistency_lock, LK_RELEASE); 1320 lwkt_reltoken(&pf_token); 1321 } 1322 lwkt_setcpu_self(save_gd); 1323 if (endingit) 1324 break; 1325 } 1326 1327 /* 1328 * Thread termination 1329 */ 1330 pf_end_threads++; 1331 wakeup(pf_purge_thread); 1332 kthread_exit(); 1333 } 1334 1335 u_int32_t 1336 pf_state_expires(const struct pf_state *state) 1337 { 1338 u_int32_t timeout; 1339 u_int32_t start; 1340 u_int32_t end; 1341 u_int32_t states; 1342 1343 /* handle all PFTM_* > PFTM_MAX here */ 1344 if (state->timeout == PFTM_PURGE) 1345 return (time_second); 1346 if (state->timeout == PFTM_UNTIL_PACKET) 1347 return (0); 1348 KKASSERT(state->timeout != PFTM_UNLINKED); 1349 KKASSERT(state->timeout < PFTM_MAX); 1350 timeout = state->rule.ptr->timeout[state->timeout]; 1351 if (!timeout) 1352 timeout = pf_default_rule.timeout[state->timeout]; 1353 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1354 if (start) { 1355 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1356 states = state->rule.ptr->states_cur; 1357 } else { 1358 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1359 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1360 states = pf_status.states; 1361 } 1362 1363 /* 1364 * If the number of states exceeds allowed values, adaptively 1365 * timeout the state more quickly. This can be very dangerous 1366 * to legitimate connections, however, so defray the timeout 1367 * based on the packet count. 1368 * 1369 * Retain from 0-100% based on number of states. 1370 * 1371 * Recover up to 50% of the lost portion if there was 1372 * packet traffic (100 pkts = 50%). 1373 */ 1374 if (end && states > start && start < end) { 1375 u_int32_t n; /* timeout retention 0-100% */ 1376 u_int64_t pkts; 1377 #if 0 1378 static struct krate boorate = { .freq = 1 }; 1379 #endif 1380 1381 /* 1382 * Reduce timeout by n% (0-100) 1383 */ 1384 n = (states - start) * 100 / (end - start); 1385 if (n > 100) 1386 n = 0; 1387 else 1388 n = 100 - n; 1389 1390 /* 1391 * But claw back some of the reduction based on packet 1392 * count associated with the state. 1393 */ 1394 pkts = state->packets[0] + state->packets[1]; 1395 if (pkts > 100) 1396 pkts = 100; 1397 #if 0 1398 krateprintf(&boorate, "timeout %-4u n=%u pkts=%-3lu -> %lu\n", 1399 timeout, n, pkts, n + (100 - n) * pkts / 200); 1400 #endif 1401 1402 n += (100 - n) * pkts / 200; /* recover by up-to 50% */ 1403 timeout = timeout * n / 100; 1404 1405 } 1406 return (state->expire + timeout); 1407 } 1408 1409 /* 1410 * (called with exclusive pf_token) 1411 */ 1412 int 1413 pf_purge_expired_src_nodes(int waslocked) 1414 { 1415 struct pf_src_node *cur, *next; 1416 int locked = waslocked; 1417 int cpu = mycpu->gd_cpuid; 1418 1419 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking[cpu]); 1420 cur; 1421 cur = next) { 1422 next = RB_NEXT(pf_src_tree, &tree_src_tracking[cpu], cur); 1423 1424 if (cur->states <= 0 && cur->expire <= time_second) { 1425 if (!locked) { 1426 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 1427 next = RB_NEXT(pf_src_tree, 1428 &tree_src_tracking[cpu], cur); 1429 locked = 1; 1430 } 1431 if (cur->rule.ptr != NULL) { 1432 /* 1433 * decrements in rule should be ok, token is 1434 * held exclusively in this code path. 1435 */ 1436 atomic_add_int(&cur->rule.ptr->src_nodes, -1); 1437 if (cur->rule.ptr->states_cur <= 0 && 1438 cur->rule.ptr->max_src_nodes <= 0) 1439 pf_rm_rule(NULL, cur->rule.ptr); 1440 } 1441 RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], cur); 1442 PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS); 1443 atomic_add_int(&pf_status.src_nodes, -1); 1444 kfree(cur, M_PFSRCTREEPL); 1445 } 1446 } 1447 if (locked && !waslocked) 1448 lockmgr(&pf_consistency_lock, LK_RELEASE); 1449 return(1); 1450 } 1451 1452 void 1453 pf_src_tree_remove_state(struct pf_state *s) 1454 { 1455 u_int32_t timeout; 1456 1457 if (s->src_node != NULL) { 1458 if (s->src.tcp_est) 1459 atomic_add_int(&s->src_node->conn, -1); 1460 if (--s->src_node->states <= 0) { 1461 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1462 if (!timeout) { 1463 timeout = 1464 pf_default_rule.timeout[PFTM_SRC_NODE]; 1465 } 1466 s->src_node->expire = time_second + timeout; 1467 } 1468 } 1469 if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { 1470 if (--s->nat_src_node->states <= 0) { 1471 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1472 if (!timeout) 1473 timeout = 1474 pf_default_rule.timeout[PFTM_SRC_NODE]; 1475 s->nat_src_node->expire = time_second + timeout; 1476 } 1477 } 1478 s->src_node = s->nat_src_node = NULL; 1479 } 1480 1481 /* callers should be at crit_enter() */ 1482 void 1483 pf_unlink_state(struct pf_state *cur) 1484 { 1485 int cpu = mycpu->gd_cpuid; 1486 1487 if (cur->src.state == PF_TCPS_PROXY_DST) { 1488 /* XXX wire key the right one? */ 1489 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1490 &cur->key[PF_SK_WIRE]->addr[1], 1491 &cur->key[PF_SK_WIRE]->addr[0], 1492 cur->key[PF_SK_WIRE]->port[1], 1493 cur->key[PF_SK_WIRE]->port[0], 1494 cur->src.seqhi, cur->src.seqlo + 1, 1495 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); 1496 } 1497 RB_REMOVE(pf_state_tree_id, &tree_id[cpu], cur); 1498 if (cur->creatorid == pf_status.hostid) 1499 pfsync_delete_state(cur); 1500 cur->timeout = PFTM_UNLINKED; 1501 pf_src_tree_remove_state(cur); 1502 pf_detach_state(cur); 1503 } 1504 1505 /* 1506 * callers should be at crit_enter() and hold pf_consistency_lock exclusively. 1507 * pf_token must also be held exclusively. 1508 */ 1509 void 1510 pf_free_state(struct pf_state *cur) 1511 { 1512 int cpu = mycpu->gd_cpuid; 1513 1514 KKASSERT(cur->cpuid == cpu); 1515 1516 if (pfsyncif != NULL && 1517 (pfsyncif->sc_bulk_send_next == cur || 1518 pfsyncif->sc_bulk_terminator == cur)) 1519 return; 1520 KKASSERT(cur->timeout == PFTM_UNLINKED); 1521 /* 1522 * decrements in rule should be ok, token is 1523 * held exclusively in this code path. 1524 */ 1525 if (--cur->rule.ptr->states_cur <= 0 && 1526 cur->rule.ptr->src_nodes <= 0) 1527 pf_rm_rule(NULL, cur->rule.ptr); 1528 if (cur->nat_rule.ptr != NULL) { 1529 if (--cur->nat_rule.ptr->states_cur <= 0 && 1530 cur->nat_rule.ptr->src_nodes <= 0) { 1531 pf_rm_rule(NULL, cur->nat_rule.ptr); 1532 } 1533 } 1534 if (cur->anchor.ptr != NULL) { 1535 if (--cur->anchor.ptr->states_cur <= 0) 1536 pf_rm_rule(NULL, cur->anchor.ptr); 1537 } 1538 pf_normalize_tcp_cleanup(cur); 1539 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1540 1541 /* 1542 * We may be freeing pf_purge_expired_states()'s saved scan entry, 1543 * adjust it if necessary. 1544 */ 1545 if (purge_cur[cpu] == cur) { 1546 kprintf("PURGE CONFLICT\n"); 1547 purge_cur[cpu] = TAILQ_NEXT(purge_cur[cpu], entry_list); 1548 } 1549 TAILQ_REMOVE(&state_list[cpu], cur, entry_list); 1550 if (cur->tag) 1551 pf_tag_unref(cur->tag); 1552 kfree(cur, M_PFSTATEPL); 1553 PF_INC_FCOUNTER(FCNT_STATE_REMOVALS); 1554 atomic_add_int(&pf_status.states, -1); 1555 } 1556 1557 int 1558 pf_purge_expired_states(u_int32_t maxcheck, int waslocked) 1559 { 1560 struct pf_state *cur; 1561 int locked = waslocked; 1562 int cpu = mycpu->gd_cpuid; 1563 1564 while (maxcheck--) { 1565 /* 1566 * Wrap to start of list when we hit the end 1567 */ 1568 cur = purge_cur[cpu]; 1569 if (cur == NULL) { 1570 cur = TAILQ_FIRST(&state_list[cpu]); 1571 if (cur == NULL) 1572 break; /* list empty */ 1573 } 1574 1575 /* 1576 * Setup next (purge_cur) while we process this one. If 1577 * we block and something else deletes purge_cur, 1578 * pf_free_state() will adjust it further ahead. 1579 */ 1580 purge_cur[cpu] = TAILQ_NEXT(cur, entry_list); 1581 1582 if (cur->timeout == PFTM_UNLINKED) { 1583 /* free unlinked state */ 1584 if (! locked) { 1585 lockmgr(&pf_consistency_lock, LK_EXCLUSIVE); 1586 locked = 1; 1587 } 1588 pf_free_state(cur); 1589 } else if (pf_state_expires(cur) <= time_second) { 1590 /* unlink and free expired state */ 1591 pf_unlink_state(cur); 1592 if (! locked) { 1593 if (!lockmgr(&pf_consistency_lock, LK_EXCLUSIVE)) 1594 return (0); 1595 locked = 1; 1596 } 1597 pf_free_state(cur); 1598 } 1599 } 1600 1601 if (locked) 1602 lockmgr(&pf_consistency_lock, LK_RELEASE); 1603 return (1); 1604 } 1605 1606 int 1607 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1608 { 1609 if (aw->type != PF_ADDR_TABLE) 1610 return (0); 1611 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) 1612 return (1); 1613 return (0); 1614 } 1615 1616 void 1617 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1618 { 1619 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1620 return; 1621 pfr_detach_table(aw->p.tbl); 1622 aw->p.tbl = NULL; 1623 } 1624 1625 void 1626 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1627 { 1628 struct pfr_ktable *kt = aw->p.tbl; 1629 1630 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1631 return; 1632 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1633 kt = kt->pfrkt_root; 1634 aw->p.tbl = NULL; 1635 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1636 kt->pfrkt_cnt : -1; 1637 } 1638 1639 void 1640 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1641 { 1642 switch (af) { 1643 #ifdef INET 1644 case AF_INET: { 1645 u_int32_t a = ntohl(addr->addr32[0]); 1646 kprintf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1647 (a>>8)&255, a&255); 1648 if (p) { 1649 p = ntohs(p); 1650 kprintf(":%u", p); 1651 } 1652 break; 1653 } 1654 #endif /* INET */ 1655 #ifdef INET6 1656 case AF_INET6: { 1657 u_int16_t b; 1658 u_int8_t i, curstart, curend, maxstart, maxend; 1659 curstart = curend = maxstart = maxend = 255; 1660 for (i = 0; i < 8; i++) { 1661 if (!addr->addr16[i]) { 1662 if (curstart == 255) 1663 curstart = i; 1664 curend = i; 1665 } else { 1666 if ((curend - curstart) > 1667 (maxend - maxstart)) { 1668 maxstart = curstart; 1669 maxend = curend; 1670 } 1671 curstart = curend = 255; 1672 } 1673 } 1674 if ((curend - curstart) > 1675 (maxend - maxstart)) { 1676 maxstart = curstart; 1677 maxend = curend; 1678 } 1679 for (i = 0; i < 8; i++) { 1680 if (i >= maxstart && i <= maxend) { 1681 if (i == 0) 1682 kprintf(":"); 1683 if (i == maxend) 1684 kprintf(":"); 1685 } else { 1686 b = ntohs(addr->addr16[i]); 1687 kprintf("%x", b); 1688 if (i < 7) 1689 kprintf(":"); 1690 } 1691 } 1692 if (p) { 1693 p = ntohs(p); 1694 kprintf("[%u]", p); 1695 } 1696 break; 1697 } 1698 #endif /* INET6 */ 1699 } 1700 } 1701 1702 void 1703 pf_print_state(struct pf_state *s) 1704 { 1705 pf_print_state_parts(s, NULL, NULL); 1706 } 1707 1708 void 1709 pf_print_state_parts(struct pf_state *s, 1710 struct pf_state_key *skwp, struct pf_state_key *sksp) 1711 { 1712 struct pf_state_key *skw, *sks; 1713 u_int8_t proto, dir; 1714 1715 /* Do our best to fill these, but they're skipped if NULL */ 1716 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1717 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1718 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1719 dir = s ? s->direction : 0; 1720 1721 switch (proto) { 1722 case IPPROTO_TCP: 1723 kprintf("TCP "); 1724 break; 1725 case IPPROTO_UDP: 1726 kprintf("UDP "); 1727 break; 1728 case IPPROTO_ICMP: 1729 kprintf("ICMP "); 1730 break; 1731 case IPPROTO_ICMPV6: 1732 kprintf("ICMPV6 "); 1733 break; 1734 default: 1735 kprintf("%u ", skw->proto); 1736 break; 1737 } 1738 switch (dir) { 1739 case PF_IN: 1740 kprintf(" in"); 1741 break; 1742 case PF_OUT: 1743 kprintf(" out"); 1744 break; 1745 } 1746 if (skw) { 1747 kprintf(" wire: "); 1748 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1749 kprintf(" "); 1750 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1751 } 1752 if (sks) { 1753 kprintf(" stack: "); 1754 if (sks != skw) { 1755 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1756 kprintf(" "); 1757 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1758 } else 1759 kprintf("-"); 1760 } 1761 if (s) { 1762 if (proto == IPPROTO_TCP) { 1763 kprintf(" [lo=%u high=%u win=%u modulator=%u", 1764 s->src.seqlo, s->src.seqhi, 1765 s->src.max_win, s->src.seqdiff); 1766 if (s->src.wscale && s->dst.wscale) 1767 kprintf(" wscale=%u", 1768 s->src.wscale & PF_WSCALE_MASK); 1769 kprintf("]"); 1770 kprintf(" [lo=%u high=%u win=%u modulator=%u", 1771 s->dst.seqlo, s->dst.seqhi, 1772 s->dst.max_win, s->dst.seqdiff); 1773 if (s->src.wscale && s->dst.wscale) 1774 kprintf(" wscale=%u", 1775 s->dst.wscale & PF_WSCALE_MASK); 1776 kprintf("]"); 1777 } 1778 kprintf(" %u:%u", s->src.state, s->dst.state); 1779 } 1780 } 1781 1782 void 1783 pf_print_flags(u_int8_t f) 1784 { 1785 if (f) 1786 kprintf(" "); 1787 if (f & TH_FIN) 1788 kprintf("F"); 1789 if (f & TH_SYN) 1790 kprintf("S"); 1791 if (f & TH_RST) 1792 kprintf("R"); 1793 if (f & TH_PUSH) 1794 kprintf("P"); 1795 if (f & TH_ACK) 1796 kprintf("A"); 1797 if (f & TH_URG) 1798 kprintf("U"); 1799 if (f & TH_ECE) 1800 kprintf("E"); 1801 if (f & TH_CWR) 1802 kprintf("W"); 1803 } 1804 1805 #define PF_SET_SKIP_STEPS(i) \ 1806 do { \ 1807 while (head[i] != cur) { \ 1808 head[i]->skip[i].ptr = cur; \ 1809 head[i] = TAILQ_NEXT(head[i], entries); \ 1810 } \ 1811 } while (0) 1812 1813 void 1814 pf_calc_skip_steps(struct pf_rulequeue *rules) 1815 { 1816 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1817 int i; 1818 1819 cur = TAILQ_FIRST(rules); 1820 prev = cur; 1821 for (i = 0; i < PF_SKIP_COUNT; ++i) 1822 head[i] = cur; 1823 while (cur != NULL) { 1824 1825 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1826 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1827 if (cur->direction != prev->direction) 1828 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1829 if (cur->af != prev->af) 1830 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1831 if (cur->proto != prev->proto) 1832 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1833 if (cur->src.neg != prev->src.neg || 1834 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1835 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1836 if (cur->src.port[0] != prev->src.port[0] || 1837 cur->src.port[1] != prev->src.port[1] || 1838 cur->src.port_op != prev->src.port_op) 1839 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1840 if (cur->dst.neg != prev->dst.neg || 1841 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1842 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1843 if (cur->dst.port[0] != prev->dst.port[0] || 1844 cur->dst.port[1] != prev->dst.port[1] || 1845 cur->dst.port_op != prev->dst.port_op) 1846 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1847 1848 prev = cur; 1849 cur = TAILQ_NEXT(cur, entries); 1850 } 1851 for (i = 0; i < PF_SKIP_COUNT; ++i) 1852 PF_SET_SKIP_STEPS(i); 1853 } 1854 1855 int 1856 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1857 { 1858 if (aw1->type != aw2->type) 1859 return (1); 1860 switch (aw1->type) { 1861 case PF_ADDR_ADDRMASK: 1862 case PF_ADDR_RANGE: 1863 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) 1864 return (1); 1865 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) 1866 return (1); 1867 return (0); 1868 case PF_ADDR_DYNIFTL: 1869 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1870 case PF_ADDR_NOROUTE: 1871 case PF_ADDR_URPFFAILED: 1872 return (0); 1873 case PF_ADDR_TABLE: 1874 return (aw1->p.tbl != aw2->p.tbl); 1875 case PF_ADDR_RTLABEL: 1876 return (aw1->v.rtlabel != aw2->v.rtlabel); 1877 default: 1878 kprintf("invalid address type: %d\n", aw1->type); 1879 return (1); 1880 } 1881 } 1882 1883 u_int16_t 1884 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) 1885 { 1886 u_int32_t l; 1887 1888 if (udp && !cksum) 1889 return (0x0000); 1890 l = cksum + old - new; 1891 l = (l >> 16) + (l & 65535); 1892 l = l & 65535; 1893 if (udp && !l) 1894 return (0xFFFF); 1895 return (l); 1896 } 1897 1898 void 1899 pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc, 1900 struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af) 1901 { 1902 struct pf_addr ao; 1903 u_int16_t po = *p; 1904 1905 PF_ACPY(&ao, a, af); 1906 PF_ACPY(a, an, af); 1907 1908 *p = pn; 1909 1910 switch (af) { 1911 #ifdef INET 1912 case AF_INET: 1913 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 1914 ao.addr16[0], an->addr16[0], 0), 1915 ao.addr16[1], an->addr16[1], 0); 1916 *p = pn; 1917 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1918 ao.addr16[0], an->addr16[0], u), 1919 ao.addr16[1], an->addr16[1], u), 1920 po, pn, u); 1921 break; 1922 #endif /* INET */ 1923 #ifdef INET6 1924 case AF_INET6: 1925 *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1926 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1927 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, 1928 ao.addr16[0], an->addr16[0], u), 1929 ao.addr16[1], an->addr16[1], u), 1930 ao.addr16[2], an->addr16[2], u), 1931 ao.addr16[3], an->addr16[3], u), 1932 ao.addr16[4], an->addr16[4], u), 1933 ao.addr16[5], an->addr16[5], u), 1934 ao.addr16[6], an->addr16[6], u), 1935 ao.addr16[7], an->addr16[7], u), 1936 po, pn, u); 1937 break; 1938 #endif /* INET6 */ 1939 } 1940 } 1941 1942 1943 /* Changes a u_int32_t. Uses a void * so there are no align restrictions */ 1944 void 1945 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) 1946 { 1947 u_int32_t ao; 1948 1949 memcpy(&ao, a, sizeof(ao)); 1950 memcpy(a, &an, sizeof(u_int32_t)); 1951 *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), 1952 ao % 65536, an % 65536, u); 1953 } 1954 1955 #ifdef INET6 1956 void 1957 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) 1958 { 1959 struct pf_addr ao; 1960 1961 PF_ACPY(&ao, a, AF_INET6); 1962 PF_ACPY(a, an, AF_INET6); 1963 1964 *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1965 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 1966 pf_cksum_fixup(pf_cksum_fixup(*c, 1967 ao.addr16[0], an->addr16[0], u), 1968 ao.addr16[1], an->addr16[1], u), 1969 ao.addr16[2], an->addr16[2], u), 1970 ao.addr16[3], an->addr16[3], u), 1971 ao.addr16[4], an->addr16[4], u), 1972 ao.addr16[5], an->addr16[5], u), 1973 ao.addr16[6], an->addr16[6], u), 1974 ao.addr16[7], an->addr16[7], u); 1975 } 1976 #endif /* INET6 */ 1977 1978 void 1979 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, 1980 struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, 1981 u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) 1982 { 1983 struct pf_addr oia, ooa; 1984 1985 PF_ACPY(&oia, ia, af); 1986 if (oa) 1987 PF_ACPY(&ooa, oa, af); 1988 1989 /* Change inner protocol port, fix inner protocol checksum. */ 1990 if (ip != NULL) { 1991 u_int16_t oip = *ip; 1992 u_int32_t opc = 0; 1993 1994 if (pc != NULL) 1995 opc = *pc; 1996 *ip = np; 1997 if (pc != NULL) 1998 *pc = pf_cksum_fixup(*pc, oip, *ip, u); 1999 *ic = pf_cksum_fixup(*ic, oip, *ip, 0); 2000 if (pc != NULL) 2001 *ic = pf_cksum_fixup(*ic, opc, *pc, 0); 2002 } 2003 /* Change inner ip address, fix inner ip and icmp checksums. */ 2004 PF_ACPY(ia, na, af); 2005 switch (af) { 2006 #ifdef INET 2007 case AF_INET: { 2008 u_int32_t oh2c = *h2c; 2009 2010 *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, 2011 oia.addr16[0], ia->addr16[0], 0), 2012 oia.addr16[1], ia->addr16[1], 0); 2013 *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, 2014 oia.addr16[0], ia->addr16[0], 0), 2015 oia.addr16[1], ia->addr16[1], 0); 2016 *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); 2017 break; 2018 } 2019 #endif /* INET */ 2020 #ifdef INET6 2021 case AF_INET6: 2022 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2023 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2024 pf_cksum_fixup(pf_cksum_fixup(*ic, 2025 oia.addr16[0], ia->addr16[0], u), 2026 oia.addr16[1], ia->addr16[1], u), 2027 oia.addr16[2], ia->addr16[2], u), 2028 oia.addr16[3], ia->addr16[3], u), 2029 oia.addr16[4], ia->addr16[4], u), 2030 oia.addr16[5], ia->addr16[5], u), 2031 oia.addr16[6], ia->addr16[6], u), 2032 oia.addr16[7], ia->addr16[7], u); 2033 break; 2034 #endif /* INET6 */ 2035 } 2036 /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ 2037 if (oa) { 2038 PF_ACPY(oa, na, af); 2039 switch (af) { 2040 #ifdef INET 2041 case AF_INET: 2042 *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, 2043 ooa.addr16[0], oa->addr16[0], 0), 2044 ooa.addr16[1], oa->addr16[1], 0); 2045 break; 2046 #endif /* INET */ 2047 #ifdef INET6 2048 case AF_INET6: 2049 *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2050 pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( 2051 pf_cksum_fixup(pf_cksum_fixup(*ic, 2052 ooa.addr16[0], oa->addr16[0], u), 2053 ooa.addr16[1], oa->addr16[1], u), 2054 ooa.addr16[2], oa->addr16[2], u), 2055 ooa.addr16[3], oa->addr16[3], u), 2056 ooa.addr16[4], oa->addr16[4], u), 2057 ooa.addr16[5], oa->addr16[5], u), 2058 ooa.addr16[6], oa->addr16[6], u), 2059 ooa.addr16[7], oa->addr16[7], u); 2060 break; 2061 #endif /* INET6 */ 2062 } 2063 } 2064 } 2065 2066 2067 /* 2068 * Need to modulate the sequence numbers in the TCP SACK option 2069 * (credits to Krzysztof Pfaff for report and patch) 2070 */ 2071 int 2072 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, 2073 struct tcphdr *th, struct pf_state_peer *dst) 2074 { 2075 int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen; 2076 u_int8_t opts[TCP_MAXOLEN], *opt = opts; 2077 int copyback = 0, i, olen; 2078 struct raw_sackblock sack; 2079 2080 #define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) 2081 if (hlen < TCPOLEN_SACKLEN || 2082 !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af)) 2083 return 0; 2084 2085 while (hlen >= TCPOLEN_SACKLEN) { 2086 olen = opt[1]; 2087 switch (*opt) { 2088 case TCPOPT_EOL: /* FALLTHROUGH */ 2089 case TCPOPT_NOP: 2090 opt++; 2091 hlen--; 2092 break; 2093 case TCPOPT_SACK: 2094 if (olen > hlen) 2095 olen = hlen; 2096 if (olen >= TCPOLEN_SACKLEN) { 2097 for (i = 2; i + TCPOLEN_SACK <= olen; 2098 i += TCPOLEN_SACK) { 2099 memcpy(&sack, &opt[i], sizeof(sack)); 2100 pf_change_a(&sack.rblk_start, &th->th_sum, 2101 htonl(ntohl(sack.rblk_start) - 2102 dst->seqdiff), 0); 2103 pf_change_a(&sack.rblk_end, &th->th_sum, 2104 htonl(ntohl(sack.rblk_end) - 2105 dst->seqdiff), 0); 2106 memcpy(&opt[i], &sack, sizeof(sack)); 2107 } 2108 copyback = 1; 2109 } 2110 /* FALLTHROUGH */ 2111 default: 2112 if (olen < 2) 2113 olen = 2; 2114 hlen -= olen; 2115 opt += olen; 2116 } 2117 } 2118 2119 if (copyback) 2120 m_copyback(m, off + sizeof(*th), thoptlen, opts); 2121 return (copyback); 2122 } 2123 2124 void 2125 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2126 const struct pf_addr *saddr, const struct pf_addr *daddr, 2127 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2128 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2129 u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) 2130 { 2131 struct mbuf *m; 2132 int len = 0, tlen; 2133 #ifdef INET 2134 struct ip *h = NULL; 2135 #endif /* INET */ 2136 #ifdef INET6 2137 struct ip6_hdr *h6 = NULL; 2138 #endif /* INET6 */ 2139 struct tcphdr *th = NULL; 2140 char *opt; 2141 2142 ASSERT_LWKT_TOKEN_HELD(&pf_token); 2143 2144 /* maximum segment size tcp option */ 2145 tlen = sizeof(struct tcphdr); 2146 if (mss) 2147 tlen += 4; 2148 2149 switch (af) { 2150 #ifdef INET 2151 case AF_INET: 2152 len = sizeof(struct ip) + tlen; 2153 break; 2154 #endif /* INET */ 2155 #ifdef INET6 2156 case AF_INET6: 2157 len = sizeof(struct ip6_hdr) + tlen; 2158 break; 2159 #endif /* INET6 */ 2160 } 2161 2162 /* 2163 * Create outgoing mbuf. 2164 * 2165 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 2166 * so make sure pf.flags is clear. 2167 */ 2168 m = m_gethdr(M_NOWAIT, MT_HEADER); 2169 if (m == NULL) { 2170 return; 2171 } 2172 if (tag) 2173 m->m_pkthdr.fw_flags |= PF_MBUF_TAGGED; 2174 m->m_pkthdr.pf.flags = 0; 2175 m->m_pkthdr.pf.tag = rtag; 2176 /* XXX Recheck when upgrading to > 4.4 */ 2177 m->m_pkthdr.pf.statekey = NULL; 2178 if (r != NULL && r->rtableid >= 0) 2179 m->m_pkthdr.pf.rtableid = r->rtableid; 2180 2181 #ifdef ALTQ 2182 if (r != NULL && r->qid) { 2183 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 2184 m->m_pkthdr.pf.qid = r->qid; 2185 m->m_pkthdr.pf.ecn_af = af; 2186 m->m_pkthdr.pf.hdr = mtod(m, struct ip *); 2187 } 2188 #endif /* ALTQ */ 2189 m->m_data += max_linkhdr; 2190 m->m_pkthdr.len = m->m_len = len; 2191 m->m_pkthdr.rcvif = NULL; 2192 bzero(m->m_data, len); 2193 switch (af) { 2194 #ifdef INET 2195 case AF_INET: 2196 h = mtod(m, struct ip *); 2197 2198 /* IP header fields included in the TCP checksum */ 2199 h->ip_p = IPPROTO_TCP; 2200 h->ip_len = htons(tlen); 2201 h->ip_src.s_addr = saddr->v4.s_addr; 2202 h->ip_dst.s_addr = daddr->v4.s_addr; 2203 2204 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2205 break; 2206 #endif /* INET */ 2207 #ifdef INET6 2208 case AF_INET6: 2209 h6 = mtod(m, struct ip6_hdr *); 2210 2211 /* IP header fields included in the TCP checksum */ 2212 h6->ip6_nxt = IPPROTO_TCP; 2213 h6->ip6_plen = htons(tlen); 2214 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2215 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2216 2217 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2218 break; 2219 #endif /* INET6 */ 2220 } 2221 2222 /* TCP header */ 2223 th->th_sport = sport; 2224 th->th_dport = dport; 2225 th->th_seq = htonl(seq); 2226 th->th_ack = htonl(ack); 2227 th->th_off = tlen >> 2; 2228 th->th_flags = flags; 2229 th->th_win = htons(win); 2230 2231 if (mss) { 2232 opt = (char *)(th + 1); 2233 opt[0] = TCPOPT_MAXSEG; 2234 opt[1] = 4; 2235 mss = htons(mss); 2236 bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); 2237 } 2238 2239 switch (af) { 2240 #ifdef INET 2241 case AF_INET: 2242 /* TCP checksum */ 2243 th->th_sum = in_cksum(m, len); 2244 2245 /* Finish the IP header */ 2246 h->ip_v = 4; 2247 h->ip_hl = sizeof(*h) >> 2; 2248 h->ip_tos = IPTOS_LOWDELAY; 2249 h->ip_len = htons(len); 2250 h->ip_off = path_mtu_discovery ? htons(IP_DF) : 0; 2251 h->ip_ttl = ttl ? ttl : ip_defttl; 2252 h->ip_sum = 0; 2253 if (eh == NULL) { 2254 lwkt_reltoken(&pf_token); 2255 ip_output(m, NULL, NULL, 0, NULL, NULL); 2256 lwkt_gettoken(&pf_token); 2257 } else { 2258 struct route ro; 2259 struct rtentry rt; 2260 struct ether_header *e = (void *)ro.ro_dst.sa_data; 2261 2262 if (ifp == NULL) { 2263 m_freem(m); 2264 return; 2265 } 2266 rt.rt_ifp = ifp; 2267 ro.ro_rt = &rt; 2268 ro.ro_dst.sa_len = sizeof(ro.ro_dst); 2269 ro.ro_dst.sa_family = pseudo_AF_HDRCMPLT; 2270 bcopy(eh->ether_dhost, e->ether_shost, ETHER_ADDR_LEN); 2271 bcopy(eh->ether_shost, e->ether_dhost, ETHER_ADDR_LEN); 2272 e->ether_type = eh->ether_type; 2273 /* XXX_IMPORT: later */ 2274 lwkt_reltoken(&pf_token); 2275 ip_output(m, NULL, &ro, 0, NULL, NULL); 2276 lwkt_gettoken(&pf_token); 2277 } 2278 break; 2279 #endif /* INET */ 2280 #ifdef INET6 2281 case AF_INET6: 2282 /* TCP checksum */ 2283 th->th_sum = in6_cksum(m, IPPROTO_TCP, 2284 sizeof(struct ip6_hdr), tlen); 2285 2286 h6->ip6_vfc |= IPV6_VERSION; 2287 h6->ip6_hlim = IPV6_DEFHLIM; 2288 2289 lwkt_reltoken(&pf_token); 2290 ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 2291 lwkt_gettoken(&pf_token); 2292 break; 2293 #endif /* INET6 */ 2294 } 2295 } 2296 2297 void 2298 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, 2299 struct pf_rule *r) 2300 { 2301 struct mbuf *m0; 2302 2303 /* 2304 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 2305 * so make sure pf.flags is clear. 2306 */ 2307 if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL) 2308 return; 2309 2310 m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED; 2311 m0->m_pkthdr.pf.flags = 0; 2312 /* XXX Re-Check when Upgrading to > 4.4 */ 2313 m0->m_pkthdr.pf.statekey = NULL; 2314 2315 if (r->rtableid >= 0) 2316 m0->m_pkthdr.pf.rtableid = r->rtableid; 2317 2318 #ifdef ALTQ 2319 if (r->qid) { 2320 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 2321 m0->m_pkthdr.pf.qid = r->qid; 2322 m0->m_pkthdr.pf.ecn_af = af; 2323 m0->m_pkthdr.pf.hdr = mtod(m0, struct ip *); 2324 } 2325 #endif /* ALTQ */ 2326 2327 switch (af) { 2328 #ifdef INET 2329 case AF_INET: 2330 icmp_error(m0, type, code, 0, 0); 2331 break; 2332 #endif /* INET */ 2333 #ifdef INET6 2334 case AF_INET6: 2335 icmp6_error(m0, type, code, 0); 2336 break; 2337 #endif /* INET6 */ 2338 } 2339 } 2340 2341 /* 2342 * Return 1 if the addresses a and b match (with mask m), otherwise return 0. 2343 * If n is 0, they match if they are equal. If n is != 0, they match if they 2344 * are different. 2345 */ 2346 int 2347 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 2348 struct pf_addr *b, sa_family_t af) 2349 { 2350 int match = 0; 2351 2352 switch (af) { 2353 #ifdef INET 2354 case AF_INET: 2355 if ((a->addr32[0] & m->addr32[0]) == 2356 (b->addr32[0] & m->addr32[0])) 2357 match++; 2358 break; 2359 #endif /* INET */ 2360 #ifdef INET6 2361 case AF_INET6: 2362 if (((a->addr32[0] & m->addr32[0]) == 2363 (b->addr32[0] & m->addr32[0])) && 2364 ((a->addr32[1] & m->addr32[1]) == 2365 (b->addr32[1] & m->addr32[1])) && 2366 ((a->addr32[2] & m->addr32[2]) == 2367 (b->addr32[2] & m->addr32[2])) && 2368 ((a->addr32[3] & m->addr32[3]) == 2369 (b->addr32[3] & m->addr32[3]))) 2370 match++; 2371 break; 2372 #endif /* INET6 */ 2373 } 2374 if (match) { 2375 if (n) 2376 return (0); 2377 else 2378 return (1); 2379 } else { 2380 if (n) 2381 return (1); 2382 else 2383 return (0); 2384 } 2385 } 2386 2387 /* 2388 * Return 1 if b <= a <= e, otherwise return 0. 2389 */ 2390 int 2391 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 2392 struct pf_addr *a, sa_family_t af) 2393 { 2394 switch (af) { 2395 #ifdef INET 2396 case AF_INET: 2397 if ((a->addr32[0] < b->addr32[0]) || 2398 (a->addr32[0] > e->addr32[0])) 2399 return (0); 2400 break; 2401 #endif /* INET */ 2402 #ifdef INET6 2403 case AF_INET6: { 2404 int i; 2405 2406 /* check a >= b */ 2407 for (i = 0; i < 4; ++i) 2408 if (a->addr32[i] > b->addr32[i]) 2409 break; 2410 else if (a->addr32[i] < b->addr32[i]) 2411 return (0); 2412 /* check a <= e */ 2413 for (i = 0; i < 4; ++i) 2414 if (a->addr32[i] < e->addr32[i]) 2415 break; 2416 else if (a->addr32[i] > e->addr32[i]) 2417 return (0); 2418 break; 2419 } 2420 #endif /* INET6 */ 2421 } 2422 return (1); 2423 } 2424 2425 int 2426 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 2427 { 2428 switch (op) { 2429 case PF_OP_IRG: 2430 return ((p > a1) && (p < a2)); 2431 case PF_OP_XRG: 2432 return ((p < a1) || (p > a2)); 2433 case PF_OP_RRG: 2434 return ((p >= a1) && (p <= a2)); 2435 case PF_OP_EQ: 2436 return (p == a1); 2437 case PF_OP_NE: 2438 return (p != a1); 2439 case PF_OP_LT: 2440 return (p < a1); 2441 case PF_OP_LE: 2442 return (p <= a1); 2443 case PF_OP_GT: 2444 return (p > a1); 2445 case PF_OP_GE: 2446 return (p >= a1); 2447 } 2448 return (0); /* never reached */ 2449 } 2450 2451 int 2452 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 2453 { 2454 a1 = ntohs(a1); 2455 a2 = ntohs(a2); 2456 p = ntohs(p); 2457 return (pf_match(op, a1, a2, p)); 2458 } 2459 2460 int 2461 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 2462 { 2463 if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2464 return (0); 2465 return (pf_match(op, a1, a2, u)); 2466 } 2467 2468 int 2469 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 2470 { 2471 if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) 2472 return (0); 2473 return (pf_match(op, a1, a2, g)); 2474 } 2475 2476 int 2477 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 2478 { 2479 if (*tag == -1) 2480 *tag = m->m_pkthdr.pf.tag; 2481 2482 return ((!r->match_tag_not && r->match_tag == *tag) || 2483 (r->match_tag_not && r->match_tag != *tag)); 2484 } 2485 2486 int 2487 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 2488 { 2489 if (tag <= 0 && rtableid < 0) 2490 return (0); 2491 2492 if (tag > 0) 2493 m->m_pkthdr.pf.tag = tag; 2494 if (rtableid >= 0) 2495 m->m_pkthdr.pf.rtableid = rtableid; 2496 2497 return (0); 2498 } 2499 2500 void 2501 pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, 2502 struct pf_rule **r, struct pf_rule **a, int *match) 2503 { 2504 struct pf_anchor_stackframe *f; 2505 2506 (*r)->anchor->match = 0; 2507 if (match) 2508 *match = 0; 2509 if (*depth >= NELEM(pf_anchor_stack)) { 2510 kprintf("pf_step_into_anchor: stack overflow\n"); 2511 *r = TAILQ_NEXT(*r, entries); 2512 return; 2513 } else if (*depth == 0 && a != NULL) 2514 *a = *r; 2515 f = pf_anchor_stack + (*depth)++; 2516 f->rs = *rs; 2517 f->r = *r; 2518 if ((*r)->anchor_wildcard) { 2519 f->parent = &(*r)->anchor->children; 2520 if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == 2521 NULL) { 2522 *r = NULL; 2523 return; 2524 } 2525 *rs = &f->child->ruleset; 2526 } else { 2527 f->parent = NULL; 2528 f->child = NULL; 2529 *rs = &(*r)->anchor->ruleset; 2530 } 2531 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); 2532 } 2533 2534 int 2535 pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, 2536 struct pf_rule **r, struct pf_rule **a, int *match) 2537 { 2538 struct pf_anchor_stackframe *f; 2539 int quick = 0; 2540 2541 do { 2542 if (*depth <= 0) 2543 break; 2544 f = pf_anchor_stack + *depth - 1; 2545 if (f->parent != NULL && f->child != NULL) { 2546 if (f->child->match || 2547 (match != NULL && *match)) { 2548 f->r->anchor->match = 1; 2549 *match = 0; 2550 } 2551 f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); 2552 if (f->child != NULL) { 2553 *rs = &f->child->ruleset; 2554 *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); 2555 if (*r == NULL) 2556 continue; 2557 else 2558 break; 2559 } 2560 } 2561 (*depth)--; 2562 if (*depth == 0 && a != NULL) 2563 *a = NULL; 2564 *rs = f->rs; 2565 if (f->r->anchor->match || (match != NULL && *match)) 2566 quick = f->r->quick; 2567 *r = TAILQ_NEXT(f->r, entries); 2568 } while (*r == NULL); 2569 2570 return (quick); 2571 } 2572 2573 #ifdef INET6 2574 void 2575 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 2576 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 2577 { 2578 switch (af) { 2579 #ifdef INET 2580 case AF_INET: 2581 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2582 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2583 break; 2584 #endif /* INET */ 2585 case AF_INET6: 2586 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 2587 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 2588 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 2589 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 2590 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 2591 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 2592 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 2593 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 2594 break; 2595 } 2596 } 2597 2598 void 2599 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 2600 { 2601 switch (af) { 2602 #ifdef INET 2603 case AF_INET: 2604 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 2605 break; 2606 #endif /* INET */ 2607 case AF_INET6: 2608 if (addr->addr32[3] == 0xffffffff) { 2609 addr->addr32[3] = 0; 2610 if (addr->addr32[2] == 0xffffffff) { 2611 addr->addr32[2] = 0; 2612 if (addr->addr32[1] == 0xffffffff) { 2613 addr->addr32[1] = 0; 2614 addr->addr32[0] = 2615 htonl(ntohl(addr->addr32[0]) + 1); 2616 } else 2617 addr->addr32[1] = 2618 htonl(ntohl(addr->addr32[1]) + 1); 2619 } else 2620 addr->addr32[2] = 2621 htonl(ntohl(addr->addr32[2]) + 1); 2622 } else 2623 addr->addr32[3] = 2624 htonl(ntohl(addr->addr32[3]) + 1); 2625 break; 2626 } 2627 } 2628 #endif /* INET6 */ 2629 2630 #define mix(a,b,c) \ 2631 do { \ 2632 a -= b; a -= c; a ^= (c >> 13); \ 2633 b -= c; b -= a; b ^= (a << 8); \ 2634 c -= a; c -= b; c ^= (b >> 13); \ 2635 a -= b; a -= c; a ^= (c >> 12); \ 2636 b -= c; b -= a; b ^= (a << 16); \ 2637 c -= a; c -= b; c ^= (b >> 5); \ 2638 a -= b; a -= c; a ^= (c >> 3); \ 2639 b -= c; b -= a; b ^= (a << 10); \ 2640 c -= a; c -= b; c ^= (b >> 15); \ 2641 } while (0) 2642 2643 /* 2644 * hash function based on bridge_hash in if_bridge.c 2645 */ 2646 void 2647 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 2648 struct pf_poolhashkey *key, sa_family_t af) 2649 { 2650 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; 2651 2652 switch (af) { 2653 #ifdef INET 2654 case AF_INET: 2655 a += inaddr->addr32[0]; 2656 b += key->key32[1]; 2657 mix(a, b, c); 2658 hash->addr32[0] = c + key->key32[2]; 2659 break; 2660 #endif /* INET */ 2661 #ifdef INET6 2662 case AF_INET6: 2663 a += inaddr->addr32[0]; 2664 b += inaddr->addr32[2]; 2665 mix(a, b, c); 2666 hash->addr32[0] = c; 2667 a += inaddr->addr32[1]; 2668 b += inaddr->addr32[3]; 2669 c += key->key32[1]; 2670 mix(a, b, c); 2671 hash->addr32[1] = c; 2672 a += inaddr->addr32[2]; 2673 b += inaddr->addr32[1]; 2674 c += key->key32[2]; 2675 mix(a, b, c); 2676 hash->addr32[2] = c; 2677 a += inaddr->addr32[3]; 2678 b += inaddr->addr32[0]; 2679 c += key->key32[3]; 2680 mix(a, b, c); 2681 hash->addr32[3] = c; 2682 break; 2683 #endif /* INET6 */ 2684 } 2685 } 2686 2687 int 2688 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 2689 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) 2690 { 2691 unsigned char hash[16]; 2692 struct pf_pool *rpool = &r->rpool; 2693 struct pf_pooladdr *acur = rpool->cur; 2694 struct pf_pooladdr *cur; 2695 struct pf_addr *raddr; 2696 struct pf_addr *rmask; 2697 struct pf_addr counter; 2698 struct pf_src_node k; 2699 int cpu = mycpu->gd_cpuid; 2700 int tblidx; 2701 2702 bzero(hash, sizeof(hash)); /* avoid gcc warnings */ 2703 2704 /* 2705 * NOTE! rpool->cur and rpool->tblidx can be iterators and thus 2706 * may represent a SMP race due to the shared nature of the 2707 * rpool structure. We allow the race and ensure that updates 2708 * do not create a fatal condition. 2709 */ 2710 cpu_ccfence(); 2711 cur = acur; 2712 raddr = &cur->addr.v.a.addr; 2713 rmask = &cur->addr.v.a.mask; 2714 2715 if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && 2716 (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 2717 k.af = af; 2718 PF_ACPY(&k.addr, saddr, af); 2719 if (r->rule_flag & PFRULE_RULESRCTRACK || 2720 r->rpool.opts & PF_POOL_STICKYADDR) 2721 k.rule.ptr = r; 2722 else 2723 k.rule.ptr = NULL; 2724 PF_INC_SCOUNTER(SCNT_SRC_NODE_SEARCH); 2725 *sn = RB_FIND(pf_src_tree, &tree_src_tracking[cpu], &k); 2726 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { 2727 PF_ACPY(naddr, &(*sn)->raddr, af); 2728 if (pf_status.debug >= PF_DEBUG_MISC) { 2729 kprintf("pf_map_addr: src tracking maps "); 2730 pf_print_host(&k.addr, 0, af); 2731 kprintf(" to "); 2732 pf_print_host(naddr, 0, af); 2733 kprintf("\n"); 2734 } 2735 return (0); 2736 } 2737 } 2738 2739 if (cur->addr.type == PF_ADDR_NOROUTE) 2740 return (1); 2741 if (cur->addr.type == PF_ADDR_DYNIFTL) { 2742 switch (af) { 2743 #ifdef INET 2744 case AF_INET: 2745 if (cur->addr.p.dyn->pfid_acnt4 < 1 && 2746 (rpool->opts & PF_POOL_TYPEMASK) != 2747 PF_POOL_ROUNDROBIN) 2748 return (1); 2749 raddr = &cur->addr.p.dyn->pfid_addr4; 2750 rmask = &cur->addr.p.dyn->pfid_mask4; 2751 break; 2752 #endif /* INET */ 2753 #ifdef INET6 2754 case AF_INET6: 2755 if (cur->addr.p.dyn->pfid_acnt6 < 1 && 2756 (rpool->opts & PF_POOL_TYPEMASK) != 2757 PF_POOL_ROUNDROBIN) 2758 return (1); 2759 raddr = &cur->addr.p.dyn->pfid_addr6; 2760 rmask = &cur->addr.p.dyn->pfid_mask6; 2761 break; 2762 #endif /* INET6 */ 2763 } 2764 } else if (cur->addr.type == PF_ADDR_TABLE) { 2765 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) 2766 return (1); /* unsupported */ 2767 } else { 2768 raddr = &cur->addr.v.a.addr; 2769 rmask = &cur->addr.v.a.mask; 2770 } 2771 2772 switch (rpool->opts & PF_POOL_TYPEMASK) { 2773 case PF_POOL_NONE: 2774 PF_ACPY(naddr, raddr, af); 2775 break; 2776 case PF_POOL_BITMASK: 2777 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 2778 break; 2779 case PF_POOL_RANDOM: 2780 if (init_addr != NULL && PF_AZERO(init_addr, af)) { 2781 switch (af) { 2782 #ifdef INET 2783 case AF_INET: 2784 counter.addr32[0] = htonl(karc4random()); 2785 break; 2786 #endif /* INET */ 2787 #ifdef INET6 2788 case AF_INET6: 2789 if (rmask->addr32[3] != 0xffffffff) 2790 counter.addr32[3] = 2791 htonl(karc4random()); 2792 else 2793 break; 2794 if (rmask->addr32[2] != 0xffffffff) 2795 counter.addr32[2] = 2796 htonl(karc4random()); 2797 else 2798 break; 2799 if (rmask->addr32[1] != 0xffffffff) 2800 counter.addr32[1] = 2801 htonl(karc4random()); 2802 else 2803 break; 2804 if (rmask->addr32[0] != 0xffffffff) 2805 counter.addr32[0] = 2806 htonl(karc4random()); 2807 break; 2808 #endif /* INET6 */ 2809 } 2810 PF_POOLMASK(naddr, raddr, rmask, &counter, af); 2811 PF_ACPY(init_addr, naddr, af); 2812 2813 } else { 2814 counter = rpool->counter; 2815 cpu_ccfence(); 2816 PF_AINC(&counter, af); 2817 PF_POOLMASK(naddr, raddr, rmask, &counter, af); 2818 rpool->counter = counter; 2819 } 2820 break; 2821 case PF_POOL_SRCHASH: 2822 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 2823 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); 2824 break; 2825 case PF_POOL_ROUNDROBIN: 2826 tblidx = rpool->tblidx; 2827 counter = rpool->counter; 2828 if (cur->addr.type == PF_ADDR_TABLE) { 2829 if (!pfr_pool_get(cur->addr.p.tbl, 2830 &tblidx, &counter, 2831 &raddr, &rmask, af)) { 2832 goto get_addr; 2833 } 2834 } else if (cur->addr.type == PF_ADDR_DYNIFTL) { 2835 if (!pfr_pool_get(cur->addr.p.dyn->pfid_kt, 2836 &tblidx, &counter, 2837 &raddr, &rmask, af)) { 2838 goto get_addr; 2839 } 2840 } else if (pf_match_addr(0, raddr, rmask, 2841 &counter, af)) { 2842 goto get_addr; 2843 } 2844 2845 try_next: 2846 if ((cur = TAILQ_NEXT(cur, entries)) == NULL) 2847 cur = TAILQ_FIRST(&rpool->list); 2848 if (cur->addr.type == PF_ADDR_TABLE) { 2849 tblidx = -1; 2850 if (pfr_pool_get(cur->addr.p.tbl, 2851 &tblidx, &counter, 2852 &raddr, &rmask, af)) { 2853 /* table contains no address of type 'af' */ 2854 if (cur != acur) 2855 goto try_next; 2856 return (1); 2857 } 2858 } else if (cur->addr.type == PF_ADDR_DYNIFTL) { 2859 tblidx = -1; 2860 if (pfr_pool_get(cur->addr.p.dyn->pfid_kt, 2861 &tblidx, &counter, 2862 &raddr, &rmask, af)) { 2863 /* table contains no address of type 'af' */ 2864 if (cur != acur) 2865 goto try_next; 2866 return (1); 2867 } 2868 } else { 2869 raddr = &cur->addr.v.a.addr; 2870 rmask = &cur->addr.v.a.mask; 2871 PF_ACPY(&counter, raddr, af); 2872 } 2873 2874 get_addr: 2875 rpool->cur = cur; 2876 rpool->tblidx = tblidx; 2877 PF_ACPY(naddr, &counter, af); 2878 if (init_addr != NULL && PF_AZERO(init_addr, af)) 2879 PF_ACPY(init_addr, naddr, af); 2880 PF_AINC(&counter, af); 2881 rpool->counter = counter; 2882 break; 2883 } 2884 if (*sn != NULL) 2885 PF_ACPY(&(*sn)->raddr, naddr, af); 2886 2887 if (pf_status.debug >= PF_DEBUG_MISC && 2888 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 2889 kprintf("pf_map_addr: selected address "); 2890 pf_print_host(naddr, 0, af); 2891 kprintf("\n"); 2892 } 2893 2894 return (0); 2895 } 2896 2897 int 2898 pf_get_sport(struct pf_pdesc *pd, sa_family_t af, 2899 u_int8_t proto, struct pf_rule *r, 2900 struct pf_addr *saddr, struct pf_addr *daddr, 2901 u_int16_t sport, u_int16_t dport, 2902 struct pf_addr *naddr, u_int16_t *nport, 2903 u_int16_t low, u_int16_t high, struct pf_src_node **sn) 2904 { 2905 struct pf_state_key_cmp key; 2906 struct pf_addr init_addr; 2907 u_int16_t cut; 2908 u_int32_t hash_base = 0; 2909 int do_hash = 0; 2910 2911 bzero(&init_addr, sizeof(init_addr)); 2912 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) 2913 return (1); 2914 2915 if (proto == IPPROTO_ICMP) { 2916 low = 1; 2917 high = 65535; 2918 } 2919 2920 bzero(&key, sizeof(key)); 2921 key.af = af; 2922 key.proto = proto; 2923 key.port[0] = dport; 2924 PF_ACPY(&key.addr[0], daddr, key.af); 2925 2926 do { 2927 PF_ACPY(&key.addr[1], naddr, key.af); 2928 2929 /* 2930 * We want to select a port that calculates to a toeplitz hash 2931 * that masks to the same cpu, otherwise the response may 2932 * not see the new state. 2933 * 2934 * We can still do this even if the kernel is disregarding 2935 * the hash and vectoring the packets to a specific cpu, 2936 * but it will reduce the number of ports we can use. 2937 */ 2938 switch(af) { 2939 case AF_INET: 2940 if (proto == IPPROTO_TCP) { 2941 do_hash = 1; 2942 hash_base = toeplitz_piecemeal_port(dport) ^ 2943 toeplitz_piecemeal_addr(daddr->v4.s_addr) ^ 2944 toeplitz_piecemeal_addr(naddr->v4.s_addr); 2945 } 2946 break; 2947 case AF_INET6: 2948 /* XXX TODO XXX */ 2949 default: 2950 /* XXX TODO XXX */ 2951 break; 2952 } 2953 2954 /* 2955 * port search; start random, step; 2956 * similar 2 portloop in in_pcbbind 2957 * 2958 * WARNING! We try to match such that the kernel will 2959 * dispatch the translated host/port to the same 2960 * cpu, but this might not be possible. 2961 * 2962 * In the case where the port is fixed, or for the 2963 * UDP case (whos toeplitz does not incorporate the 2964 * port), we set not_cpu_localized which ultimately 2965 * causes the pf_state_tree element 2966 * 2967 * XXX fixed ports present a problem for cpu localization. 2968 */ 2969 if (!(proto == IPPROTO_TCP || 2970 proto == IPPROTO_UDP || 2971 proto == IPPROTO_ICMP)) { 2972 /* 2973 * non-specific protocol, leave port intact. 2974 */ 2975 key.port[1] = sport; 2976 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 2977 *nport = sport; 2978 pd->not_cpu_localized = 1; 2979 return (0); 2980 } 2981 } else if (low == 0 && high == 0) { 2982 /* 2983 * static-port same as originator. 2984 */ 2985 key.port[1] = sport; 2986 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 2987 *nport = sport; 2988 pd->not_cpu_localized = 1; 2989 return (0); 2990 } 2991 } else if (low == high) { 2992 /* 2993 * specific port as specified. 2994 */ 2995 key.port[1] = htons(low); 2996 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 2997 *nport = htons(low); 2998 pd->not_cpu_localized = 1; 2999 return (0); 3000 } 3001 } else { 3002 /* 3003 * normal dynamic port 3004 */ 3005 u_int16_t tmp; 3006 3007 if (low > high) { 3008 tmp = low; 3009 low = high; 3010 high = tmp; 3011 } 3012 /* low < high */ 3013 cut = htonl(karc4random()) % (1 + high - low) + low; 3014 /* low <= cut <= high */ 3015 for (tmp = cut; tmp <= high; ++(tmp)) { 3016 key.port[1] = htons(tmp); 3017 if (do_hash) { 3018 uint32_t hash; 3019 3020 hash = hash_base ^ 3021 toeplitz_piecemeal_port(key.port[1]); 3022 if (netisr_hashcpu(hash) != mycpuid) 3023 continue; 3024 } 3025 if (pf_find_state_all(&key, PF_IN, NULL) == 3026 NULL && !in_baddynamic(tmp, proto)) { 3027 if (proto == IPPROTO_UDP) 3028 pd->not_cpu_localized = 1; 3029 *nport = htons(tmp); 3030 return (0); 3031 } 3032 } 3033 for (tmp = cut - 1; tmp >= low; --(tmp)) { 3034 key.port[1] = htons(tmp); 3035 if (do_hash) { 3036 uint32_t hash; 3037 3038 hash = hash_base ^ 3039 toeplitz_piecemeal_port(key.port[1]); 3040 if (netisr_hashcpu(hash) != mycpuid) 3041 continue; 3042 } 3043 if (pf_find_state_all(&key, PF_IN, NULL) == 3044 NULL && !in_baddynamic(tmp, proto)) { 3045 if (proto == IPPROTO_UDP) 3046 pd->not_cpu_localized = 1; 3047 *nport = htons(tmp); 3048 return (0); 3049 } 3050 } 3051 } 3052 3053 /* 3054 * Next address 3055 */ 3056 switch (r->rpool.opts & PF_POOL_TYPEMASK) { 3057 case PF_POOL_RANDOM: 3058 case PF_POOL_ROUNDROBIN: 3059 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) 3060 return (1); 3061 break; 3062 case PF_POOL_NONE: 3063 case PF_POOL_SRCHASH: 3064 case PF_POOL_BITMASK: 3065 default: 3066 return (1); 3067 } 3068 } while (! PF_AEQ(&init_addr, naddr, af) ); 3069 return (1); /* none available */ 3070 } 3071 3072 struct pf_rule * 3073 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, 3074 int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, 3075 struct pf_addr *daddr, u_int16_t dport, int rs_num) 3076 { 3077 struct pf_rule *r, *rm = NULL; 3078 struct pf_ruleset *ruleset = NULL; 3079 int tag = -1; 3080 int rtableid = -1; 3081 int asd = 0; 3082 3083 r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); 3084 while (r && rm == NULL) { 3085 struct pf_rule_addr *src = NULL, *dst = NULL; 3086 struct pf_addr_wrap *xdst = NULL; 3087 struct pf_pooladdr *cur; 3088 3089 if (r->action == PF_BINAT && direction == PF_IN) { 3090 src = &r->dst; 3091 cur = r->rpool.cur; /* SMP race possible */ 3092 cpu_ccfence(); 3093 if (cur) 3094 xdst = &cur->addr; 3095 } else { 3096 src = &r->src; 3097 dst = &r->dst; 3098 } 3099 3100 r->evaluations++; 3101 if (pfi_kif_match(r->kif, kif) == r->ifnot) 3102 r = r->skip[PF_SKIP_IFP].ptr; 3103 else if (r->direction && r->direction != direction) 3104 r = r->skip[PF_SKIP_DIR].ptr; 3105 else if (r->af && r->af != pd->af) 3106 r = r->skip[PF_SKIP_AF].ptr; 3107 else if (r->proto && r->proto != pd->proto) 3108 r = r->skip[PF_SKIP_PROTO].ptr; 3109 else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, 3110 src->neg, kif)) 3111 r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : 3112 PF_SKIP_DST_ADDR].ptr; 3113 else if (src->port_op && !pf_match_port(src->port_op, 3114 src->port[0], src->port[1], sport)) 3115 r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : 3116 PF_SKIP_DST_PORT].ptr; 3117 else if (dst != NULL && 3118 PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) 3119 r = r->skip[PF_SKIP_DST_ADDR].ptr; 3120 else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, 3121 0, NULL)) 3122 r = TAILQ_NEXT(r, entries); 3123 else if (dst != NULL && dst->port_op && 3124 !pf_match_port(dst->port_op, dst->port[0], 3125 dst->port[1], dport)) 3126 r = r->skip[PF_SKIP_DST_PORT].ptr; 3127 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 3128 r = TAILQ_NEXT(r, entries); 3129 else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != 3130 IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, 3131 off, pd->hdr.tcp), r->os_fingerprint))) 3132 r = TAILQ_NEXT(r, entries); 3133 else { 3134 if (r->tag) 3135 tag = r->tag; 3136 if (r->rtableid >= 0) 3137 rtableid = r->rtableid; 3138 if (r->anchor == NULL) { 3139 rm = r; 3140 } else 3141 pf_step_into_anchor(&asd, &ruleset, rs_num, 3142 &r, NULL, NULL); 3143 } 3144 if (r == NULL) 3145 pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, 3146 NULL, NULL); 3147 } 3148 if (pf_tag_packet(m, tag, rtableid)) 3149 return (NULL); 3150 if (rm != NULL && (rm->action == PF_NONAT || 3151 rm->action == PF_NORDR || rm->action == PF_NOBINAT)) 3152 return (NULL); 3153 return (rm); 3154 } 3155 3156 struct pf_rule * 3157 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, 3158 struct pfi_kif *kif, struct pf_src_node **sn, 3159 struct pf_state_key **skw, struct pf_state_key **sks, 3160 struct pf_state_key **skp, struct pf_state_key **nkp, 3161 struct pf_addr *saddr, struct pf_addr *daddr, 3162 u_int16_t sport, u_int16_t dport) 3163 { 3164 struct pf_rule *r = NULL; 3165 3166 if (direction == PF_OUT) { 3167 r = pf_match_translation(pd, m, off, direction, kif, saddr, 3168 sport, daddr, dport, PF_RULESET_BINAT); 3169 if (r == NULL) 3170 r = pf_match_translation(pd, m, off, direction, kif, 3171 saddr, sport, daddr, dport, PF_RULESET_NAT); 3172 } else { 3173 r = pf_match_translation(pd, m, off, direction, kif, saddr, 3174 sport, daddr, dport, PF_RULESET_RDR); 3175 if (r == NULL) 3176 r = pf_match_translation(pd, m, off, direction, kif, 3177 saddr, sport, daddr, dport, PF_RULESET_BINAT); 3178 } 3179 3180 if (r != NULL) { 3181 struct pf_addr *naddr; 3182 u_int16_t *nport; 3183 3184 if (pf_state_key_setup(pd, r, skw, sks, skp, nkp, 3185 saddr, daddr, sport, dport)) 3186 return r; 3187 3188 /* XXX We only modify one side for now. */ 3189 naddr = &(*nkp)->addr[1]; 3190 nport = &(*nkp)->port[1]; 3191 3192 /* 3193 * NOTE: Currently all translations will clear 3194 * BRIDGE_MBUF_TAGGED, telling the bridge to 3195 * ignore the original input encapsulation. 3196 */ 3197 switch (r->action) { 3198 case PF_NONAT: 3199 case PF_NOBINAT: 3200 case PF_NORDR: 3201 return (NULL); 3202 case PF_NAT: 3203 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 3204 if (pf_get_sport(pd, pd->af, pd->proto, r, 3205 saddr, daddr, sport, dport, 3206 naddr, nport, r->rpool.proxy_port[0], 3207 r->rpool.proxy_port[1], sn)) { 3208 DPFPRINTF(PF_DEBUG_MISC, 3209 ("pf: NAT proxy port allocation " 3210 "(%u-%u) failed\n", 3211 r->rpool.proxy_port[0], 3212 r->rpool.proxy_port[1])); 3213 return (NULL); 3214 } 3215 break; 3216 case PF_BINAT: 3217 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 3218 switch (direction) { 3219 case PF_OUT: 3220 if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){ 3221 switch (pd->af) { 3222 #ifdef INET 3223 case AF_INET: 3224 if (r->rpool.cur->addr.p.dyn-> 3225 pfid_acnt4 < 1) 3226 return (NULL); 3227 PF_POOLMASK(naddr, 3228 &r->rpool.cur->addr.p.dyn-> 3229 pfid_addr4, 3230 &r->rpool.cur->addr.p.dyn-> 3231 pfid_mask4, 3232 saddr, AF_INET); 3233 break; 3234 #endif /* INET */ 3235 #ifdef INET6 3236 case AF_INET6: 3237 if (r->rpool.cur->addr.p.dyn-> 3238 pfid_acnt6 < 1) 3239 return (NULL); 3240 PF_POOLMASK(naddr, 3241 &r->rpool.cur->addr.p.dyn-> 3242 pfid_addr6, 3243 &r->rpool.cur->addr.p.dyn-> 3244 pfid_mask6, 3245 saddr, AF_INET6); 3246 break; 3247 #endif /* INET6 */ 3248 } 3249 } else 3250 PF_POOLMASK(naddr, 3251 &r->rpool.cur->addr.v.a.addr, 3252 &r->rpool.cur->addr.v.a.mask, 3253 saddr, pd->af); 3254 break; 3255 case PF_IN: 3256 if (r->src.addr.type == PF_ADDR_DYNIFTL) { 3257 switch (pd->af) { 3258 #ifdef INET 3259 case AF_INET: 3260 if (r->src.addr.p.dyn-> 3261 pfid_acnt4 < 1) 3262 return (NULL); 3263 PF_POOLMASK(naddr, 3264 &r->src.addr.p.dyn-> 3265 pfid_addr4, 3266 &r->src.addr.p.dyn-> 3267 pfid_mask4, 3268 daddr, AF_INET); 3269 break; 3270 #endif /* INET */ 3271 #ifdef INET6 3272 case AF_INET6: 3273 if (r->src.addr.p.dyn-> 3274 pfid_acnt6 < 1) 3275 return (NULL); 3276 PF_POOLMASK(naddr, 3277 &r->src.addr.p.dyn-> 3278 pfid_addr6, 3279 &r->src.addr.p.dyn-> 3280 pfid_mask6, 3281 daddr, AF_INET6); 3282 break; 3283 #endif /* INET6 */ 3284 } 3285 } else 3286 PF_POOLMASK(naddr, 3287 &r->src.addr.v.a.addr, 3288 &r->src.addr.v.a.mask, daddr, 3289 pd->af); 3290 break; 3291 } 3292 break; 3293 case PF_RDR: { 3294 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 3295 if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) 3296 return (NULL); 3297 if ((r->rpool.opts & PF_POOL_TYPEMASK) == 3298 PF_POOL_BITMASK) 3299 PF_POOLMASK(naddr, naddr, 3300 &r->rpool.cur->addr.v.a.mask, daddr, 3301 pd->af); 3302 3303 if (r->rpool.proxy_port[1]) { 3304 u_int32_t tmp_nport; 3305 3306 tmp_nport = ((ntohs(dport) - 3307 ntohs(r->dst.port[0])) % 3308 (r->rpool.proxy_port[1] - 3309 r->rpool.proxy_port[0] + 1)) + 3310 r->rpool.proxy_port[0]; 3311 3312 /* wrap around if necessary */ 3313 if (tmp_nport > 65535) 3314 tmp_nport -= 65535; 3315 *nport = htons((u_int16_t)tmp_nport); 3316 } else if (r->rpool.proxy_port[0]) { 3317 *nport = htons(r->rpool.proxy_port[0]); 3318 } 3319 pd->not_cpu_localized = 1; 3320 break; 3321 } 3322 default: 3323 return (NULL); 3324 } 3325 } 3326 3327 return (r); 3328 } 3329 3330 struct netmsg_hashlookup { 3331 struct netmsg_base base; 3332 struct inpcb **nm_pinp; 3333 struct inpcbinfo *nm_pcbinfo; 3334 struct pf_addr *nm_saddr; 3335 struct pf_addr *nm_daddr; 3336 uint16_t nm_sport; 3337 uint16_t nm_dport; 3338 sa_family_t nm_af; 3339 }; 3340 3341 #ifdef PF_SOCKET_LOOKUP_DOMSG 3342 static void 3343 in_pcblookup_hash_handler(netmsg_t msg) 3344 { 3345 struct netmsg_hashlookup *rmsg = (struct netmsg_hashlookup *)msg; 3346 3347 if (rmsg->nm_af == AF_INET) 3348 *rmsg->nm_pinp = in_pcblookup_hash(rmsg->nm_pcbinfo, 3349 rmsg->nm_saddr->v4, rmsg->nm_sport, rmsg->nm_daddr->v4, 3350 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL); 3351 #ifdef INET6 3352 else 3353 *rmsg->nm_pinp = in6_pcblookup_hash(rmsg->nm_pcbinfo, 3354 &rmsg->nm_saddr->v6, rmsg->nm_sport, &rmsg->nm_daddr->v6, 3355 rmsg->nm_dport, INPLOOKUP_WILDCARD, NULL); 3356 #endif /* INET6 */ 3357 lwkt_replymsg(&rmsg->base.lmsg, 0); 3358 } 3359 #endif /* PF_SOCKET_LOOKUP_DOMSG */ 3360 3361 int 3362 pf_socket_lookup(int direction, struct pf_pdesc *pd) 3363 { 3364 struct pf_addr *saddr, *daddr; 3365 u_int16_t sport, dport; 3366 struct inpcbinfo *pi; 3367 struct inpcb *inp; 3368 struct netmsg_hashlookup *msg = NULL; 3369 #ifdef PF_SOCKET_LOOKUP_DOMSG 3370 struct netmsg_hashlookup msg0; 3371 #endif 3372 int pi_cpu = 0; 3373 3374 if (pd == NULL) 3375 return (-1); 3376 pd->lookup.uid = UID_MAX; 3377 pd->lookup.gid = GID_MAX; 3378 pd->lookup.pid = NO_PID; 3379 if (direction == PF_IN) { 3380 saddr = pd->src; 3381 daddr = pd->dst; 3382 } else { 3383 saddr = pd->dst; 3384 daddr = pd->src; 3385 } 3386 switch (pd->proto) { 3387 case IPPROTO_TCP: 3388 if (pd->hdr.tcp == NULL) 3389 return (-1); 3390 sport = pd->hdr.tcp->th_sport; 3391 dport = pd->hdr.tcp->th_dport; 3392 3393 pi_cpu = tcp_addrcpu(saddr->v4.s_addr, sport, daddr->v4.s_addr, dport); 3394 pi = &tcbinfo[pi_cpu]; 3395 /* 3396 * Our netstack runs lockless on MP systems 3397 * (only for TCP connections at the moment). 3398 * 3399 * As we are not allowed to read another CPU's tcbinfo, 3400 * we have to ask that CPU via remote call to search the 3401 * table for us. 3402 * 3403 * Prepare a msg iff data belongs to another CPU. 3404 */ 3405 if (pi_cpu != mycpu->gd_cpuid) { 3406 #ifdef PF_SOCKET_LOOKUP_DOMSG 3407 /* 3408 * NOTE: 3409 * 3410 * Following lwkt_domsg() is dangerous and could 3411 * lockup the network system, e.g. 3412 * 3413 * On 2 CPU system: 3414 * netisr0 domsg to netisr1 (due to lookup) 3415 * netisr1 domsg to netisr0 (due to lookup) 3416 * 3417 * We simply return -1 here, since we are probably 3418 * called before NAT, so the TCP packet should 3419 * already be on the correct CPU. 3420 */ 3421 msg = &msg0; 3422 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 3423 0, in_pcblookup_hash_handler); 3424 msg->nm_pinp = &inp; 3425 msg->nm_pcbinfo = pi; 3426 msg->nm_saddr = saddr; 3427 msg->nm_sport = sport; 3428 msg->nm_daddr = daddr; 3429 msg->nm_dport = dport; 3430 msg->nm_af = pd->af; 3431 #else /* !PF_SOCKET_LOOKUP_DOMSG */ 3432 kprintf("pf_socket_lookup: tcp packet not on the " 3433 "correct cpu %d, cur cpu %d\n", 3434 pi_cpu, mycpuid); 3435 print_backtrace(-1); 3436 return -1; 3437 #endif /* PF_SOCKET_LOOKUP_DOMSG */ 3438 } 3439 break; 3440 case IPPROTO_UDP: 3441 if (pd->hdr.udp == NULL) 3442 return (-1); 3443 sport = pd->hdr.udp->uh_sport; 3444 dport = pd->hdr.udp->uh_dport; 3445 pi = &udbinfo[mycpuid]; 3446 break; 3447 default: 3448 return (-1); 3449 } 3450 if (direction != PF_IN) { 3451 u_int16_t p; 3452 3453 p = sport; 3454 sport = dport; 3455 dport = p; 3456 } 3457 switch (pd->af) { 3458 #ifdef INET6 3459 case AF_INET6: 3460 /* 3461 * Query other CPU, second part 3462 * 3463 * msg only gets initialized when: 3464 * 1) packet is TCP 3465 * 2) the info belongs to another CPU 3466 * 3467 * Use some switch/case magic to avoid code duplication. 3468 */ 3469 if (msg == NULL) { 3470 inp = in6_pcblookup_hash(pi, &saddr->v6, sport, 3471 &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL); 3472 3473 if (inp == NULL) 3474 return (-1); 3475 break; 3476 } 3477 /* FALLTHROUGH if SMP and on other CPU */ 3478 #endif /* INET6 */ 3479 case AF_INET: 3480 if (msg != NULL) { 3481 lwkt_domsg(netisr_cpuport(pi_cpu), 3482 &msg->base.lmsg, 0); 3483 } else 3484 { 3485 inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, 3486 dport, INPLOOKUP_WILDCARD, NULL); 3487 } 3488 if (inp == NULL) 3489 return (-1); 3490 break; 3491 3492 default: 3493 return (-1); 3494 } 3495 pd->lookup.uid = inp->inp_socket->so_cred->cr_uid; 3496 pd->lookup.gid = inp->inp_socket->so_cred->cr_groups[0]; 3497 return (1); 3498 } 3499 3500 u_int8_t 3501 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 3502 { 3503 int hlen; 3504 u_int8_t hdr[60]; 3505 u_int8_t *opt, optlen; 3506 u_int8_t wscale = 0; 3507 3508 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 3509 if (hlen <= sizeof(struct tcphdr)) 3510 return (0); 3511 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 3512 return (0); 3513 opt = hdr + sizeof(struct tcphdr); 3514 hlen -= sizeof(struct tcphdr); 3515 while (hlen >= 3) { 3516 switch (*opt) { 3517 case TCPOPT_EOL: 3518 case TCPOPT_NOP: 3519 ++opt; 3520 --hlen; 3521 break; 3522 case TCPOPT_WINDOW: 3523 wscale = opt[2]; 3524 if (wscale > TCP_MAX_WINSHIFT) 3525 wscale = TCP_MAX_WINSHIFT; 3526 wscale |= PF_WSCALE_FLAG; 3527 /* FALLTHROUGH */ 3528 default: 3529 optlen = opt[1]; 3530 if (optlen < 2) 3531 optlen = 2; 3532 hlen -= optlen; 3533 opt += optlen; 3534 break; 3535 } 3536 } 3537 return (wscale); 3538 } 3539 3540 u_int16_t 3541 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) 3542 { 3543 int hlen; 3544 u_int8_t hdr[60]; 3545 u_int8_t *opt, optlen; 3546 u_int16_t mss = tcp_mssdflt; 3547 3548 hlen = th_off << 2; /* hlen <= sizeof(hdr) */ 3549 if (hlen <= sizeof(struct tcphdr)) 3550 return (0); 3551 if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) 3552 return (0); 3553 opt = hdr + sizeof(struct tcphdr); 3554 hlen -= sizeof(struct tcphdr); 3555 while (hlen >= TCPOLEN_MAXSEG) { 3556 switch (*opt) { 3557 case TCPOPT_EOL: 3558 case TCPOPT_NOP: 3559 ++opt; 3560 --hlen; 3561 break; 3562 case TCPOPT_MAXSEG: 3563 bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); 3564 /* FALLTHROUGH */ 3565 default: 3566 optlen = opt[1]; 3567 if (optlen < 2) 3568 optlen = 2; 3569 hlen -= optlen; 3570 opt += optlen; 3571 break; 3572 } 3573 } 3574 return (mss); 3575 } 3576 3577 u_int16_t 3578 pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) 3579 { 3580 #ifdef INET 3581 struct sockaddr_in *dst; 3582 struct route ro; 3583 #endif /* INET */ 3584 #ifdef INET6 3585 struct sockaddr_in6 *dst6; 3586 struct route_in6 ro6; 3587 #endif /* INET6 */ 3588 struct rtentry *rt = NULL; 3589 int hlen = 0; 3590 u_int16_t mss = tcp_mssdflt; 3591 3592 switch (af) { 3593 #ifdef INET 3594 case AF_INET: 3595 hlen = sizeof(struct ip); 3596 bzero(&ro, sizeof(ro)); 3597 dst = (struct sockaddr_in *)&ro.ro_dst; 3598 dst->sin_family = AF_INET; 3599 dst->sin_len = sizeof(*dst); 3600 dst->sin_addr = addr->v4; 3601 rtalloc_ign(&ro, (RTF_CLONING | RTF_PRCLONING)); 3602 rt = ro.ro_rt; 3603 break; 3604 #endif /* INET */ 3605 #ifdef INET6 3606 case AF_INET6: 3607 hlen = sizeof(struct ip6_hdr); 3608 bzero(&ro6, sizeof(ro6)); 3609 dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; 3610 dst6->sin6_family = AF_INET6; 3611 dst6->sin6_len = sizeof(*dst6); 3612 dst6->sin6_addr = addr->v6; 3613 rtalloc_ign((struct route *)&ro6, (RTF_CLONING | RTF_PRCLONING)); 3614 rt = ro6.ro_rt; 3615 break; 3616 #endif /* INET6 */ 3617 } 3618 3619 if (rt && rt->rt_ifp) { 3620 mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr); 3621 mss = max(tcp_mssdflt, mss); 3622 RTFREE(rt); 3623 } 3624 mss = min(mss, offer); 3625 mss = max(mss, 64); /* sanity - at least max opt space */ 3626 return (mss); 3627 } 3628 3629 void 3630 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) 3631 { 3632 struct pf_rule *r = s->rule.ptr; 3633 3634 s->rt_kif = NULL; 3635 if (!r->rt || r->rt == PF_FASTROUTE) 3636 return; 3637 switch (s->key[PF_SK_WIRE]->af) { 3638 #ifdef INET 3639 case AF_INET: 3640 pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, 3641 &s->nat_src_node); 3642 s->rt_kif = r->rpool.cur->kif; 3643 break; 3644 #endif /* INET */ 3645 #ifdef INET6 3646 case AF_INET6: 3647 pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, 3648 &s->nat_src_node); 3649 s->rt_kif = r->rpool.cur->kif; 3650 break; 3651 #endif /* INET6 */ 3652 } 3653 } 3654 3655 u_int32_t 3656 pf_tcp_iss(struct pf_pdesc *pd) 3657 { 3658 MD5_CTX ctx; 3659 u_int32_t digest[4]; 3660 3661 if (pf_tcp_secret_init == 0) { 3662 lwkt_gettoken(&pf_gtoken); 3663 if (pf_tcp_secret_init == 0) { 3664 karc4rand(pf_tcp_secret, sizeof(pf_tcp_secret)); 3665 MD5Init(&pf_tcp_secret_ctx); 3666 MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3667 sizeof(pf_tcp_secret)); 3668 pf_tcp_secret_init = 1; 3669 } 3670 lwkt_reltoken(&pf_gtoken); 3671 } 3672 ctx = pf_tcp_secret_ctx; 3673 3674 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short)); 3675 MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short)); 3676 if (pd->af == AF_INET6) { 3677 MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr)); 3678 MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr)); 3679 } else { 3680 MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr)); 3681 MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr)); 3682 } 3683 MD5Final((u_char *)digest, &ctx); 3684 pf_tcp_iss_off += 4096; 3685 3686 return (digest[0] + pd->hdr.tcp->th_seq + pf_tcp_iss_off); 3687 } 3688 3689 int 3690 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, 3691 struct pfi_kif *kif, struct mbuf *m, int off, void *h, 3692 struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, 3693 struct ifqueue *ifq, struct inpcb *inp) 3694 { 3695 struct pf_rule *nr = NULL; 3696 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 3697 sa_family_t af = pd->af; 3698 struct pf_rule *r, *a = NULL; 3699 struct pf_ruleset *ruleset = NULL; 3700 struct pf_src_node *nsn = NULL; 3701 struct tcphdr *th = pd->hdr.tcp; 3702 struct pf_state_key *skw = NULL, *sks = NULL; 3703 struct pf_state_key *sk = NULL, *nk = NULL; 3704 u_short reason; 3705 int rewrite = 0, hdrlen = 0; 3706 int tag = -1, rtableid = -1; 3707 int asd = 0; 3708 int match = 0; 3709 int state_icmp = 0; 3710 u_int16_t sport = 0, dport = 0; 3711 u_int16_t bproto_sum = 0, bip_sum = 0; 3712 u_int8_t icmptype = 0, icmpcode = 0; 3713 3714 3715 if (direction == PF_IN && pf_check_congestion(ifq)) { 3716 REASON_SET(&reason, PFRES_CONGEST); 3717 return (PF_DROP); 3718 } 3719 3720 if (inp != NULL) 3721 pd->lookup.done = pf_socket_lookup(direction, pd); 3722 else if (debug_pfugidhack) { 3723 DPFPRINTF(PF_DEBUG_MISC, ("pf: unlocked lookup\n")); 3724 pd->lookup.done = pf_socket_lookup(direction, pd); 3725 } 3726 3727 switch (pd->proto) { 3728 case IPPROTO_TCP: 3729 sport = th->th_sport; 3730 dport = th->th_dport; 3731 hdrlen = sizeof(*th); 3732 break; 3733 case IPPROTO_UDP: 3734 sport = pd->hdr.udp->uh_sport; 3735 dport = pd->hdr.udp->uh_dport; 3736 hdrlen = sizeof(*pd->hdr.udp); 3737 break; 3738 #ifdef INET 3739 case IPPROTO_ICMP: 3740 if (pd->af != AF_INET) 3741 break; 3742 sport = dport = pd->hdr.icmp->icmp_id; 3743 hdrlen = sizeof(*pd->hdr.icmp); 3744 icmptype = pd->hdr.icmp->icmp_type; 3745 icmpcode = pd->hdr.icmp->icmp_code; 3746 3747 if (icmptype == ICMP_UNREACH || 3748 icmptype == ICMP_SOURCEQUENCH || 3749 icmptype == ICMP_REDIRECT || 3750 icmptype == ICMP_TIMXCEED || 3751 icmptype == ICMP_PARAMPROB) 3752 state_icmp++; 3753 break; 3754 #endif /* INET */ 3755 #ifdef INET6 3756 case IPPROTO_ICMPV6: 3757 if (af != AF_INET6) 3758 break; 3759 sport = dport = pd->hdr.icmp6->icmp6_id; 3760 hdrlen = sizeof(*pd->hdr.icmp6); 3761 icmptype = pd->hdr.icmp6->icmp6_type; 3762 icmpcode = pd->hdr.icmp6->icmp6_code; 3763 3764 if (icmptype == ICMP6_DST_UNREACH || 3765 icmptype == ICMP6_PACKET_TOO_BIG || 3766 icmptype == ICMP6_TIME_EXCEEDED || 3767 icmptype == ICMP6_PARAM_PROB) 3768 state_icmp++; 3769 break; 3770 #endif /* INET6 */ 3771 default: 3772 sport = dport = hdrlen = 0; 3773 break; 3774 } 3775 3776 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); 3777 3778 /* check packet for BINAT/NAT/RDR */ 3779 if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, 3780 &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) { 3781 if (nk == NULL || sk == NULL) { 3782 REASON_SET(&reason, PFRES_MEMORY); 3783 goto cleanup; 3784 } 3785 3786 if (pd->ip_sum) 3787 bip_sum = *pd->ip_sum; 3788 3789 m->m_flags &= ~M_HASH; 3790 switch (pd->proto) { 3791 case IPPROTO_TCP: 3792 bproto_sum = th->th_sum; 3793 pd->proto_sum = &th->th_sum; 3794 3795 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || 3796 nk->port[pd->sidx] != sport) { 3797 pf_change_ap(saddr, &th->th_sport, pd->ip_sum, 3798 &th->th_sum, &nk->addr[pd->sidx], 3799 nk->port[pd->sidx], 0, af); 3800 pd->sport = &th->th_sport; 3801 sport = th->th_sport; 3802 } 3803 3804 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || 3805 nk->port[pd->didx] != dport) { 3806 pf_change_ap(daddr, &th->th_dport, pd->ip_sum, 3807 &th->th_sum, &nk->addr[pd->didx], 3808 nk->port[pd->didx], 0, af); 3809 dport = th->th_dport; 3810 pd->dport = &th->th_dport; 3811 } 3812 rewrite++; 3813 break; 3814 case IPPROTO_UDP: 3815 bproto_sum = pd->hdr.udp->uh_sum; 3816 pd->proto_sum = &pd->hdr.udp->uh_sum; 3817 3818 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || 3819 nk->port[pd->sidx] != sport) { 3820 pf_change_ap(saddr, &pd->hdr.udp->uh_sport, 3821 pd->ip_sum, &pd->hdr.udp->uh_sum, 3822 &nk->addr[pd->sidx], 3823 nk->port[pd->sidx], 1, af); 3824 sport = pd->hdr.udp->uh_sport; 3825 pd->sport = &pd->hdr.udp->uh_sport; 3826 } 3827 3828 if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || 3829 nk->port[pd->didx] != dport) { 3830 pf_change_ap(daddr, &pd->hdr.udp->uh_dport, 3831 pd->ip_sum, &pd->hdr.udp->uh_sum, 3832 &nk->addr[pd->didx], 3833 nk->port[pd->didx], 1, af); 3834 dport = pd->hdr.udp->uh_dport; 3835 pd->dport = &pd->hdr.udp->uh_dport; 3836 } 3837 rewrite++; 3838 break; 3839 #ifdef INET 3840 case IPPROTO_ICMP: 3841 nk->port[0] = nk->port[1]; 3842 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) 3843 pf_change_a(&saddr->v4.s_addr, pd->ip_sum, 3844 nk->addr[pd->sidx].v4.s_addr, 0); 3845 3846 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) 3847 pf_change_a(&daddr->v4.s_addr, pd->ip_sum, 3848 nk->addr[pd->didx].v4.s_addr, 0); 3849 3850 if (nk->port[1] != pd->hdr.icmp->icmp_id) { 3851 pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( 3852 pd->hdr.icmp->icmp_cksum, sport, 3853 nk->port[1], 0); 3854 pd->hdr.icmp->icmp_id = nk->port[1]; 3855 pd->sport = &pd->hdr.icmp->icmp_id; 3856 } 3857 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); 3858 break; 3859 #endif /* INET */ 3860 #ifdef INET6 3861 case IPPROTO_ICMPV6: 3862 nk->port[0] = nk->port[1]; 3863 if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) 3864 pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, 3865 &nk->addr[pd->sidx], 0); 3866 3867 if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) 3868 pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, 3869 &nk->addr[pd->didx], 0); 3870 rewrite++; 3871 break; 3872 #endif /* INET */ 3873 default: 3874 switch (af) { 3875 #ifdef INET 3876 case AF_INET: 3877 if (PF_ANEQ(saddr, 3878 &nk->addr[pd->sidx], AF_INET)) 3879 pf_change_a(&saddr->v4.s_addr, 3880 pd->ip_sum, 3881 nk->addr[pd->sidx].v4.s_addr, 0); 3882 3883 if (PF_ANEQ(daddr, 3884 &nk->addr[pd->didx], AF_INET)) 3885 pf_change_a(&daddr->v4.s_addr, 3886 pd->ip_sum, 3887 nk->addr[pd->didx].v4.s_addr, 0); 3888 break; 3889 #endif /* INET */ 3890 #ifdef INET6 3891 case AF_INET6: 3892 if (PF_ANEQ(saddr, 3893 &nk->addr[pd->sidx], AF_INET6)) 3894 PF_ACPY(saddr, &nk->addr[pd->sidx], af); 3895 3896 if (PF_ANEQ(daddr, 3897 &nk->addr[pd->didx], AF_INET6)) 3898 PF_ACPY(saddr, &nk->addr[pd->didx], af); 3899 break; 3900 #endif /* INET */ 3901 } 3902 break; 3903 } 3904 if (nr->natpass) 3905 r = NULL; 3906 pd->nat_rule = nr; 3907 } 3908 3909 while (r != NULL) { 3910 r->evaluations++; 3911 if (pfi_kif_match(r->kif, kif) == r->ifnot) 3912 r = r->skip[PF_SKIP_IFP].ptr; 3913 else if (r->direction && r->direction != direction) 3914 r = r->skip[PF_SKIP_DIR].ptr; 3915 else if (r->af && r->af != af) 3916 r = r->skip[PF_SKIP_AF].ptr; 3917 else if (r->proto && r->proto != pd->proto) 3918 r = r->skip[PF_SKIP_PROTO].ptr; 3919 else if (PF_MISMATCHAW(&r->src.addr, saddr, af, 3920 r->src.neg, kif)) 3921 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 3922 /* tcp/udp only. port_op always 0 in other cases */ 3923 else if (r->src.port_op && !pf_match_port(r->src.port_op, 3924 r->src.port[0], r->src.port[1], sport)) 3925 r = r->skip[PF_SKIP_SRC_PORT].ptr; 3926 else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, 3927 r->dst.neg, NULL)) 3928 r = r->skip[PF_SKIP_DST_ADDR].ptr; 3929 /* tcp/udp only. port_op always 0 in other cases */ 3930 else if (r->dst.port_op && !pf_match_port(r->dst.port_op, 3931 r->dst.port[0], r->dst.port[1], dport)) 3932 r = r->skip[PF_SKIP_DST_PORT].ptr; 3933 /* icmp only. type always 0 in other cases */ 3934 else if (r->type && r->type != icmptype + 1) 3935 r = TAILQ_NEXT(r, entries); 3936 /* icmp only. type always 0 in other cases */ 3937 else if (r->code && r->code != icmpcode + 1) 3938 r = TAILQ_NEXT(r, entries); 3939 else if (r->tos && !(r->tos == pd->tos)) 3940 r = TAILQ_NEXT(r, entries); 3941 else if (r->rule_flag & PFRULE_FRAGMENT) 3942 r = TAILQ_NEXT(r, entries); 3943 else if (pd->proto == IPPROTO_TCP && 3944 (r->flagset & th->th_flags) != r->flags) 3945 r = TAILQ_NEXT(r, entries); 3946 /* tcp/udp only. uid.op always 0 in other cases */ 3947 else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = 3948 pf_socket_lookup(direction, pd), 1)) && 3949 !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], 3950 pd->lookup.uid)) 3951 r = TAILQ_NEXT(r, entries); 3952 /* tcp/udp only. gid.op always 0 in other cases */ 3953 else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = 3954 pf_socket_lookup(direction, pd), 1)) && 3955 !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], 3956 pd->lookup.gid)) 3957 r = TAILQ_NEXT(r, entries); 3958 else if (r->prob && 3959 r->prob <= karc4random()) 3960 r = TAILQ_NEXT(r, entries); 3961 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 3962 r = TAILQ_NEXT(r, entries); 3963 else if (r->os_fingerprint != PF_OSFP_ANY && 3964 (pd->proto != IPPROTO_TCP || !pf_osfp_match( 3965 pf_osfp_fingerprint(pd, m, off, th), 3966 r->os_fingerprint))) 3967 r = TAILQ_NEXT(r, entries); 3968 else { 3969 if (r->tag) 3970 tag = r->tag; 3971 if (r->rtableid >= 0) 3972 rtableid = r->rtableid; 3973 if (r->anchor == NULL) { 3974 match = 1; 3975 *rm = r; 3976 *am = a; 3977 *rsm = ruleset; 3978 if ((*rm)->quick) 3979 break; 3980 r = TAILQ_NEXT(r, entries); 3981 } else 3982 pf_step_into_anchor(&asd, &ruleset, 3983 PF_RULESET_FILTER, &r, &a, &match); 3984 } 3985 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 3986 PF_RULESET_FILTER, &r, &a, &match)) 3987 break; 3988 } 3989 r = *rm; 3990 a = *am; 3991 ruleset = *rsm; 3992 3993 REASON_SET(&reason, PFRES_MATCH); 3994 3995 if (r->log || (nr != NULL && nr->log)) { 3996 if (rewrite) 3997 m_copyback(m, off, hdrlen, pd->hdr.any); 3998 PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, 3999 a, ruleset, pd); 4000 } 4001 4002 if ((r->action == PF_DROP) && 4003 ((r->rule_flag & PFRULE_RETURNRST) || 4004 (r->rule_flag & PFRULE_RETURNICMP) || 4005 (r->rule_flag & PFRULE_RETURN))) { 4006 /* undo NAT changes, if they have taken place */ 4007 if (nr != NULL) { 4008 PF_ACPY(saddr, &sk->addr[pd->sidx], af); 4009 PF_ACPY(daddr, &sk->addr[pd->didx], af); 4010 if (pd->sport) 4011 *pd->sport = sk->port[pd->sidx]; 4012 if (pd->dport) 4013 *pd->dport = sk->port[pd->didx]; 4014 if (pd->proto_sum) 4015 *pd->proto_sum = bproto_sum; 4016 if (pd->ip_sum) 4017 *pd->ip_sum = bip_sum; 4018 m_copyback(m, off, hdrlen, pd->hdr.any); 4019 } 4020 if (pd->proto == IPPROTO_TCP && 4021 ((r->rule_flag & PFRULE_RETURNRST) || 4022 (r->rule_flag & PFRULE_RETURN)) && 4023 !(th->th_flags & TH_RST)) { 4024 u_int32_t ack = ntohl(th->th_seq) + pd->p_len; 4025 int len = 0; 4026 struct ip *h4; 4027 #ifdef INET6 4028 struct ip6_hdr *h6; 4029 #endif 4030 switch (af) { 4031 case AF_INET: 4032 h4 = mtod(m, struct ip *); 4033 len = ntohs(h4->ip_len) - off; 4034 break; 4035 #ifdef INET6 4036 case AF_INET6: 4037 h6 = mtod(m, struct ip6_hdr *); 4038 len = h6->ip6_plen - (off - sizeof(*h6)); 4039 break; 4040 #endif 4041 } 4042 4043 if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) 4044 REASON_SET(&reason, PFRES_PROTCKSUM); 4045 else { 4046 if (th->th_flags & TH_SYN) 4047 ack++; 4048 if (th->th_flags & TH_FIN) 4049 ack++; 4050 pf_send_tcp(r, af, pd->dst, 4051 pd->src, th->th_dport, th->th_sport, 4052 ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, 4053 r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); 4054 } 4055 } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && 4056 r->return_icmp) 4057 pf_send_icmp(m, r->return_icmp >> 8, 4058 r->return_icmp & 255, af, r); 4059 else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 && 4060 r->return_icmp6) 4061 pf_send_icmp(m, r->return_icmp6 >> 8, 4062 r->return_icmp6 & 255, af, r); 4063 } 4064 4065 if (r->action == PF_DROP) 4066 goto cleanup; 4067 4068 if (pf_tag_packet(m, tag, rtableid)) { 4069 REASON_SET(&reason, PFRES_MEMORY); 4070 goto cleanup; 4071 } 4072 4073 if (!state_icmp && (r->keep_state || nr != NULL || 4074 (pd->flags & PFDESC_TCP_NORM))) { 4075 int action; 4076 action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m, 4077 off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, 4078 bip_sum, hdrlen); 4079 if (action != PF_PASS) 4080 return (action); 4081 } 4082 4083 /* copy back packet headers if we performed NAT operations */ 4084 if (rewrite) 4085 m_copyback(m, off, hdrlen, pd->hdr.any); 4086 4087 return (PF_PASS); 4088 4089 cleanup: 4090 if (sk != NULL) 4091 kfree(sk, M_PFSTATEKEYPL); 4092 if (nk != NULL) 4093 kfree(nk, M_PFSTATEKEYPL); 4094 return (PF_DROP); 4095 } 4096 4097 static __inline int 4098 pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, 4099 struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw, 4100 struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk, 4101 struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, 4102 struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, 4103 u_int16_t bip_sum, int hdrlen) 4104 { 4105 struct pf_state *s = NULL; 4106 struct pf_src_node *sn = NULL; 4107 struct tcphdr *th = pd->hdr.tcp; 4108 u_int16_t mss = tcp_mssdflt; 4109 u_short reason; 4110 int cpu = mycpu->gd_cpuid; 4111 4112 /* check maximums */ 4113 if (r->max_states && (r->states_cur >= r->max_states)) { 4114 PF_INC_LCOUNTER(LCNT_STATES); 4115 REASON_SET(&reason, PFRES_MAXSTATES); 4116 return (PF_DROP); 4117 } 4118 /* src node for filter rule */ 4119 if ((r->rule_flag & PFRULE_SRCTRACK || 4120 r->rpool.opts & PF_POOL_STICKYADDR) && 4121 pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) { 4122 REASON_SET(&reason, PFRES_SRCLIMIT); 4123 goto csfailed; 4124 } 4125 /* src node for translation rule */ 4126 if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && 4127 pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) { 4128 REASON_SET(&reason, PFRES_SRCLIMIT); 4129 goto csfailed; 4130 } 4131 s = kmalloc(sizeof(struct pf_state), M_PFSTATEPL, M_NOWAIT|M_ZERO); 4132 if (s == NULL) { 4133 REASON_SET(&reason, PFRES_MEMORY); 4134 goto csfailed; 4135 } 4136 lockinit(&s->lk, "pfstlk", 0, 0); 4137 s->id = 0; /* XXX Do we really need that? not in OpenBSD */ 4138 s->creatorid = 0; 4139 s->rule.ptr = r; 4140 s->nat_rule.ptr = nr; 4141 s->anchor.ptr = a; 4142 s->state_flags = PFSTATE_CREATEINPROG; 4143 STATE_INC_COUNTERS(s); 4144 if (r->allow_opts) 4145 s->state_flags |= PFSTATE_ALLOWOPTS; 4146 if (r->rule_flag & PFRULE_STATESLOPPY) 4147 s->state_flags |= PFSTATE_SLOPPY; 4148 if (pd->not_cpu_localized) 4149 s->state_flags |= PFSTATE_STACK_GLOBAL; 4150 4151 s->log = r->log & PF_LOG_ALL; 4152 if (nr != NULL) 4153 s->log |= nr->log & PF_LOG_ALL; 4154 switch (pd->proto) { 4155 case IPPROTO_TCP: 4156 s->src.seqlo = ntohl(th->th_seq); 4157 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 4158 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4159 r->keep_state == PF_STATE_MODULATE) { 4160 /* Generate sequence number modulator */ 4161 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 4162 0) 4163 s->src.seqdiff = 1; 4164 pf_change_a(&th->th_seq, &th->th_sum, 4165 htonl(s->src.seqlo + s->src.seqdiff), 0); 4166 *rewrite = 1; 4167 } else 4168 s->src.seqdiff = 0; 4169 if (th->th_flags & TH_SYN) { 4170 s->src.seqhi++; 4171 s->src.wscale = pf_get_wscale(m, off, 4172 th->th_off, pd->af); 4173 } 4174 s->src.max_win = MAX(ntohs(th->th_win), 1); 4175 if (s->src.wscale & PF_WSCALE_MASK) { 4176 /* Remove scale factor from initial window */ 4177 int win = s->src.max_win; 4178 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 4179 s->src.max_win = (win - 1) >> 4180 (s->src.wscale & PF_WSCALE_MASK); 4181 } 4182 if (th->th_flags & TH_FIN) 4183 s->src.seqhi++; 4184 s->dst.seqhi = 1; 4185 s->dst.max_win = 1; 4186 s->src.state = TCPS_SYN_SENT; 4187 s->dst.state = TCPS_CLOSED; 4188 s->timeout = PFTM_TCP_FIRST_PACKET; 4189 break; 4190 case IPPROTO_UDP: 4191 s->src.state = PFUDPS_SINGLE; 4192 s->dst.state = PFUDPS_NO_TRAFFIC; 4193 s->timeout = PFTM_UDP_FIRST_PACKET; 4194 break; 4195 case IPPROTO_ICMP: 4196 #ifdef INET6 4197 case IPPROTO_ICMPV6: 4198 #endif 4199 s->timeout = PFTM_ICMP_FIRST_PACKET; 4200 break; 4201 default: 4202 s->src.state = PFOTHERS_SINGLE; 4203 s->dst.state = PFOTHERS_NO_TRAFFIC; 4204 s->timeout = PFTM_OTHER_FIRST_PACKET; 4205 } 4206 4207 s->creation = time_second; 4208 s->expire = time_second; 4209 4210 if (sn != NULL) { 4211 s->src_node = sn; 4212 s->src_node->states++; 4213 } 4214 if (nsn != NULL) { 4215 /* XXX We only modify one side for now. */ 4216 PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); 4217 s->nat_src_node = nsn; 4218 s->nat_src_node->states++; 4219 } 4220 if (pd->proto == IPPROTO_TCP) { 4221 if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, 4222 off, pd, th, &s->src, &s->dst)) { 4223 REASON_SET(&reason, PFRES_MEMORY); 4224 pf_src_tree_remove_state(s); 4225 STATE_DEC_COUNTERS(s); 4226 kfree(s, M_PFSTATEPL); 4227 return (PF_DROP); 4228 } 4229 if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && 4230 pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, 4231 &s->src, &s->dst, rewrite)) { 4232 /* This really shouldn't happen!!! */ 4233 DPFPRINTF(PF_DEBUG_URGENT, 4234 ("pf_normalize_tcp_stateful failed on first pkt")); 4235 pf_normalize_tcp_cleanup(s); 4236 pf_src_tree_remove_state(s); 4237 STATE_DEC_COUNTERS(s); 4238 kfree(s, M_PFSTATEPL); 4239 return (PF_DROP); 4240 } 4241 } 4242 s->direction = pd->dir; 4243 4244 if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk, 4245 pd->src, pd->dst, sport, dport)) { 4246 REASON_SET(&reason, PFRES_MEMORY); 4247 goto csfailed; 4248 } 4249 4250 if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) { 4251 if (pd->proto == IPPROTO_TCP) 4252 pf_normalize_tcp_cleanup(s); 4253 REASON_SET(&reason, PFRES_STATEINS); 4254 pf_src_tree_remove_state(s); 4255 STATE_DEC_COUNTERS(s); 4256 kfree(s, M_PFSTATEPL); 4257 return (PF_DROP); 4258 } else 4259 *sm = s; 4260 4261 pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ 4262 if (tag > 0) { 4263 pf_tag_ref(tag); 4264 s->tag = tag; 4265 } 4266 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4267 TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { 4268 s->src.state = PF_TCPS_PROXY_SRC; 4269 /* undo NAT changes, if they have taken place */ 4270 if (nr != NULL) { 4271 struct pf_state_key *skt = s->key[PF_SK_WIRE]; 4272 if (pd->dir == PF_OUT) 4273 skt = s->key[PF_SK_STACK]; 4274 PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af); 4275 PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af); 4276 if (pd->sport) 4277 *pd->sport = skt->port[pd->sidx]; 4278 if (pd->dport) 4279 *pd->dport = skt->port[pd->didx]; 4280 if (pd->proto_sum) 4281 *pd->proto_sum = bproto_sum; 4282 if (pd->ip_sum) 4283 *pd->ip_sum = bip_sum; 4284 m->m_flags &= ~M_HASH; 4285 m_copyback(m, off, hdrlen, pd->hdr.any); 4286 } 4287 s->src.seqhi = htonl(karc4random()); 4288 /* Find mss option */ 4289 mss = pf_get_mss(m, off, th->th_off, pd->af); 4290 mss = pf_calc_mss(pd->src, pd->af, mss); 4291 mss = pf_calc_mss(pd->dst, pd->af, mss); 4292 s->src.mss = mss; 4293 s->state_flags &= ~PFSTATE_CREATEINPROG; 4294 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4295 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4296 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); 4297 REASON_SET(&reason, PFRES_SYNPROXY); 4298 return (PF_SYNPROXY_DROP); 4299 } 4300 4301 s->state_flags &= ~PFSTATE_CREATEINPROG; 4302 return (PF_PASS); 4303 4304 csfailed: 4305 if (sk != NULL) 4306 kfree(sk, M_PFSTATEKEYPL); 4307 if (nk != NULL) 4308 kfree(nk, M_PFSTATEKEYPL); 4309 4310 if (sn != NULL && sn->states == 0 && sn->expire == 0) { 4311 RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], sn); 4312 PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS); 4313 atomic_add_int(&pf_status.src_nodes, -1); 4314 kfree(sn, M_PFSRCTREEPL); 4315 } 4316 if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { 4317 RB_REMOVE(pf_src_tree, &tree_src_tracking[cpu], nsn); 4318 PF_INC_SCOUNTER(SCNT_SRC_NODE_REMOVALS); 4319 atomic_add_int(&pf_status.src_nodes, -1); 4320 kfree(nsn, M_PFSRCTREEPL); 4321 } 4322 if (s) { 4323 pf_src_tree_remove_state(s); 4324 STATE_DEC_COUNTERS(s); 4325 kfree(s, M_PFSTATEPL); 4326 } 4327 4328 return (PF_DROP); 4329 } 4330 4331 int 4332 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, 4333 struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, 4334 struct pf_ruleset **rsm) 4335 { 4336 struct pf_rule *r, *a = NULL; 4337 struct pf_ruleset *ruleset = NULL; 4338 sa_family_t af = pd->af; 4339 u_short reason; 4340 int tag = -1; 4341 int asd = 0; 4342 int match = 0; 4343 4344 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); 4345 while (r != NULL) { 4346 r->evaluations++; 4347 if (pfi_kif_match(r->kif, kif) == r->ifnot) 4348 r = r->skip[PF_SKIP_IFP].ptr; 4349 else if (r->direction && r->direction != direction) 4350 r = r->skip[PF_SKIP_DIR].ptr; 4351 else if (r->af && r->af != af) 4352 r = r->skip[PF_SKIP_AF].ptr; 4353 else if (r->proto && r->proto != pd->proto) 4354 r = r->skip[PF_SKIP_PROTO].ptr; 4355 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, 4356 r->src.neg, kif)) 4357 r = r->skip[PF_SKIP_SRC_ADDR].ptr; 4358 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, 4359 r->dst.neg, NULL)) 4360 r = r->skip[PF_SKIP_DST_ADDR].ptr; 4361 else if (r->tos && !(r->tos == pd->tos)) 4362 r = TAILQ_NEXT(r, entries); 4363 else if (r->os_fingerprint != PF_OSFP_ANY) 4364 r = TAILQ_NEXT(r, entries); 4365 else if (pd->proto == IPPROTO_UDP && 4366 (r->src.port_op || r->dst.port_op)) 4367 r = TAILQ_NEXT(r, entries); 4368 else if (pd->proto == IPPROTO_TCP && 4369 (r->src.port_op || r->dst.port_op || r->flagset)) 4370 r = TAILQ_NEXT(r, entries); 4371 else if ((pd->proto == IPPROTO_ICMP || 4372 pd->proto == IPPROTO_ICMPV6) && 4373 (r->type || r->code)) 4374 r = TAILQ_NEXT(r, entries); 4375 else if (r->prob && r->prob <= karc4random()) 4376 r = TAILQ_NEXT(r, entries); 4377 else if (r->match_tag && !pf_match_tag(m, r, &tag)) 4378 r = TAILQ_NEXT(r, entries); 4379 else { 4380 if (r->anchor == NULL) { 4381 match = 1; 4382 *rm = r; 4383 *am = a; 4384 *rsm = ruleset; 4385 if ((*rm)->quick) 4386 break; 4387 r = TAILQ_NEXT(r, entries); 4388 } else 4389 pf_step_into_anchor(&asd, &ruleset, 4390 PF_RULESET_FILTER, &r, &a, &match); 4391 } 4392 if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, 4393 PF_RULESET_FILTER, &r, &a, &match)) 4394 break; 4395 } 4396 r = *rm; 4397 a = *am; 4398 ruleset = *rsm; 4399 4400 REASON_SET(&reason, PFRES_MATCH); 4401 4402 if (r->log) 4403 PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, 4404 pd); 4405 4406 if (r->action != PF_PASS) 4407 return (PF_DROP); 4408 4409 if (pf_tag_packet(m, tag, -1)) { 4410 REASON_SET(&reason, PFRES_MEMORY); 4411 return (PF_DROP); 4412 } 4413 4414 return (PF_PASS); 4415 } 4416 4417 /* 4418 * Called with state locked 4419 */ 4420 int 4421 pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, 4422 struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, 4423 struct pf_pdesc *pd, u_short *reason, int *copyback) 4424 { 4425 struct tcphdr *th = pd->hdr.tcp; 4426 u_int16_t win = ntohs(th->th_win); 4427 u_int32_t ack, end, seq, orig_seq; 4428 u_int8_t sws, dws; 4429 int ackskew; 4430 4431 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4432 sws = src->wscale & PF_WSCALE_MASK; 4433 dws = dst->wscale & PF_WSCALE_MASK; 4434 } else { 4435 sws = dws = 0; 4436 } 4437 4438 /* 4439 * Sequence tracking algorithm from Guido van Rooij's paper: 4440 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4441 * tcp_filtering.ps 4442 */ 4443 4444 orig_seq = seq = ntohl(th->th_seq); 4445 if (src->seqlo == 0) { 4446 /* First packet from this end. Set its state */ 4447 4448 if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && 4449 src->scrub == NULL) { 4450 if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { 4451 REASON_SET(reason, PFRES_MEMORY); 4452 return (PF_DROP); 4453 } 4454 } 4455 4456 /* Deferred generation of sequence number modulator */ 4457 if (dst->seqdiff && !src->seqdiff) { 4458 /* use random iss for the TCP server */ 4459 while ((src->seqdiff = karc4random() - seq) == 0) 4460 ; 4461 ack = ntohl(th->th_ack) - dst->seqdiff; 4462 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 4463 src->seqdiff), 0); 4464 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 4465 *copyback = 1; 4466 } else { 4467 ack = ntohl(th->th_ack); 4468 } 4469 4470 end = seq + pd->p_len; 4471 if (th->th_flags & TH_SYN) { 4472 end++; 4473 (*state)->sync_flags |= PFSTATE_GOT_SYN2; 4474 if (dst->wscale & PF_WSCALE_FLAG) { 4475 src->wscale = pf_get_wscale(m, off, th->th_off, 4476 pd->af); 4477 if (src->wscale & PF_WSCALE_FLAG) { 4478 /* Remove scale factor from initial 4479 * window */ 4480 sws = src->wscale & PF_WSCALE_MASK; 4481 win = ((u_int32_t)win + (1 << sws) - 1) 4482 >> sws; 4483 dws = dst->wscale & PF_WSCALE_MASK; 4484 } else { 4485 /* fixup other window */ 4486 dst->max_win <<= dst->wscale & 4487 PF_WSCALE_MASK; 4488 /* in case of a retrans SYN|ACK */ 4489 dst->wscale = 0; 4490 } 4491 } 4492 } 4493 if (th->th_flags & TH_FIN) 4494 end++; 4495 4496 src->seqlo = seq; 4497 if (src->state < TCPS_SYN_SENT) 4498 src->state = TCPS_SYN_SENT; 4499 4500 /* 4501 * May need to slide the window (seqhi may have been set by 4502 * the crappy stack check or if we picked up the connection 4503 * after establishment) 4504 */ 4505 if (src->seqhi == 1 || 4506 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4507 src->seqhi = end + MAX(1, dst->max_win << dws); 4508 if (win > src->max_win) 4509 src->max_win = win; 4510 4511 } else { 4512 ack = ntohl(th->th_ack) - dst->seqdiff; 4513 if (src->seqdiff) { 4514 /* Modulate sequence numbers */ 4515 pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + 4516 src->seqdiff), 0); 4517 pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); 4518 *copyback = 1; 4519 } 4520 end = seq + pd->p_len; 4521 if (th->th_flags & TH_SYN) 4522 end++; 4523 if (th->th_flags & TH_FIN) 4524 end++; 4525 } 4526 4527 if ((th->th_flags & TH_ACK) == 0) { 4528 /* Let it pass through the ack skew check */ 4529 ack = dst->seqlo; 4530 } else if ((ack == 0 && 4531 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4532 /* broken tcp stacks do not set ack */ 4533 (dst->state < TCPS_SYN_SENT)) { 4534 /* 4535 * Many stacks (ours included) will set the ACK number in an 4536 * FIN|ACK if the SYN times out -- no sequence to ACK. 4537 */ 4538 ack = dst->seqlo; 4539 } 4540 4541 if (seq == end) { 4542 /* Ease sequencing restrictions on no data packets */ 4543 seq = src->seqlo; 4544 end = seq; 4545 } 4546 4547 ackskew = dst->seqlo - ack; 4548 4549 4550 /* 4551 * Need to demodulate the sequence numbers in any TCP SACK options 4552 * (Selective ACK). We could optionally validate the SACK values 4553 * against the current ACK window, either forwards or backwards, but 4554 * I'm not confident that SACK has been implemented properly 4555 * everywhere. It wouldn't surprise me if several stacks accidently 4556 * SACK too far backwards of previously ACKed data. There really aren't 4557 * any security implications of bad SACKing unless the target stack 4558 * doesn't validate the option length correctly. Someone trying to 4559 * spoof into a TCP connection won't bother blindly sending SACK 4560 * options anyway. 4561 */ 4562 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4563 if (pf_modulate_sack(m, off, pd, th, dst)) 4564 *copyback = 1; 4565 } 4566 4567 4568 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4569 if (SEQ_GEQ(src->seqhi, end) && 4570 /* Last octet inside other's window space */ 4571 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4572 /* Retrans: not more than one window back */ 4573 (ackskew >= -MAXACKWINDOW) && 4574 /* Acking not more than one reassembled fragment backwards */ 4575 (ackskew <= (MAXACKWINDOW << sws)) && 4576 /* Acking not more than one window forward */ 4577 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4578 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || 4579 (pd->flags & PFDESC_IP_REAS) == 0)) { 4580 /* Require an exact/+1 sequence match on resets when possible */ 4581 4582 if (dst->scrub || src->scrub) { 4583 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 4584 *state, src, dst, copyback)) 4585 return (PF_DROP); 4586 } 4587 4588 /* update max window */ 4589 if (src->max_win < win) 4590 src->max_win = win; 4591 /* synchronize sequencing */ 4592 if (SEQ_GT(end, src->seqlo)) 4593 src->seqlo = end; 4594 /* slide the window of what the other end can send */ 4595 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4596 dst->seqhi = ack + MAX((win << sws), 1); 4597 4598 4599 /* update states */ 4600 if (th->th_flags & TH_SYN) 4601 if (src->state < TCPS_SYN_SENT) 4602 src->state = TCPS_SYN_SENT; 4603 if (th->th_flags & TH_FIN) 4604 if (src->state < TCPS_CLOSING) 4605 src->state = TCPS_CLOSING; 4606 if (th->th_flags & TH_ACK) { 4607 if (dst->state == TCPS_SYN_SENT) { 4608 dst->state = TCPS_ESTABLISHED; 4609 if (src->state == TCPS_ESTABLISHED && 4610 (*state)->src_node != NULL && 4611 pf_src_connlimit(*state)) { 4612 REASON_SET(reason, PFRES_SRCLIMIT); 4613 return (PF_DROP); 4614 } 4615 } else if (dst->state == TCPS_CLOSING) 4616 dst->state = TCPS_FIN_WAIT_2; 4617 } 4618 if (th->th_flags & TH_RST) 4619 src->state = dst->state = TCPS_TIME_WAIT; 4620 4621 /* update expire time */ 4622 (*state)->expire = time_second; 4623 if (src->state >= TCPS_FIN_WAIT_2 && 4624 dst->state >= TCPS_FIN_WAIT_2) 4625 (*state)->timeout = PFTM_TCP_CLOSED; 4626 else if (src->state >= TCPS_CLOSING && 4627 dst->state >= TCPS_CLOSING) 4628 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4629 else if (src->state < TCPS_ESTABLISHED || 4630 dst->state < TCPS_ESTABLISHED) 4631 (*state)->timeout = PFTM_TCP_OPENING; 4632 else if (src->state >= TCPS_CLOSING || 4633 dst->state >= TCPS_CLOSING) 4634 (*state)->timeout = PFTM_TCP_CLOSING; 4635 else if ((th->th_flags & TH_SYN) && 4636 ((*state)->state_flags & PFSTATE_SLOPPY)) 4637 (*state)->timeout = PFTM_TCP_FIRST_PACKET; 4638 else 4639 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4640 4641 /* Fall through to PASS packet */ 4642 4643 } else if ((dst->state < TCPS_SYN_SENT || 4644 dst->state >= TCPS_FIN_WAIT_2 || 4645 src->state >= TCPS_FIN_WAIT_2) && 4646 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && 4647 /* Within a window forward of the originating packet */ 4648 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4649 /* Within a window backward of the originating packet */ 4650 4651 /* 4652 * This currently handles three situations: 4653 * 1) Stupid stacks will shotgun SYNs before their peer 4654 * replies. 4655 * 2) When PF catches an already established stream (the 4656 * firewall rebooted, the state table was flushed, routes 4657 * changed...) 4658 * 3) Packets get funky immediately after the connection 4659 * closes (this should catch Solaris spurious ACK|FINs 4660 * that web servers like to spew after a close) 4661 * 4662 * This must be a little more careful than the above code 4663 * since packet floods will also be caught here. We don't 4664 * update the TTL here to mitigate the damage of a packet 4665 * flood and so the same code can handle awkward establishment 4666 * and a loosened connection close. 4667 * In the establishment case, a correct peer response will 4668 * validate the connection, go through the normal state code 4669 * and keep updating the state TTL. 4670 */ 4671 4672 if (pf_status.debug >= PF_DEBUG_MISC) { 4673 kprintf("pf: loose state match: "); 4674 pf_print_state(*state); 4675 pf_print_flags(th->th_flags); 4676 kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4677 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, 4678 ackskew, (unsigned long long)(*state)->packets[0], 4679 (unsigned long long)(*state)->packets[1], 4680 pd->dir == PF_IN ? "in" : "out", 4681 pd->dir == (*state)->direction ? "fwd" : "rev"); 4682 } 4683 4684 if (dst->scrub || src->scrub) { 4685 if (pf_normalize_tcp_stateful(m, off, pd, reason, th, 4686 *state, src, dst, copyback)) 4687 return (PF_DROP); 4688 } 4689 4690 /* update max window */ 4691 if (src->max_win < win) 4692 src->max_win = win; 4693 /* synchronize sequencing */ 4694 if (SEQ_GT(end, src->seqlo)) 4695 src->seqlo = end; 4696 /* slide the window of what the other end can send */ 4697 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4698 dst->seqhi = ack + MAX((win << sws), 1); 4699 4700 /* 4701 * Cannot set dst->seqhi here since this could be a shotgunned 4702 * SYN and not an already established connection. 4703 */ 4704 4705 if (th->th_flags & TH_FIN) 4706 if (src->state < TCPS_CLOSING) 4707 src->state = TCPS_CLOSING; 4708 if (th->th_flags & TH_RST) 4709 src->state = dst->state = TCPS_TIME_WAIT; 4710 4711 /* Fall through to PASS packet */ 4712 4713 } else if ((*state)->pickup_mode == PF_PICKUPS_HASHONLY || 4714 ((*state)->pickup_mode == PF_PICKUPS_ENABLED && 4715 ((*state)->sync_flags & PFSTATE_GOT_SYN_MASK) != 4716 PFSTATE_GOT_SYN_MASK)) { 4717 /* 4718 * If pickup mode is hash only, do not fail on sequence checks. 4719 * 4720 * If pickup mode is enabled and we did not see the SYN in 4721 * both direction, do not fail on sequence checks because 4722 * we do not have complete information on window scale. 4723 * 4724 * Adjust expiration and fall through to PASS packet. 4725 * XXX Add a FIN check to reduce timeout? 4726 */ 4727 (*state)->expire = time_second; 4728 } else { 4729 /* 4730 * Failure processing 4731 */ 4732 if ((*state)->dst.state == TCPS_SYN_SENT && 4733 (*state)->src.state == TCPS_SYN_SENT) { 4734 /* Send RST for state mismatches during handshake */ 4735 if (!(th->th_flags & TH_RST)) 4736 pf_send_tcp((*state)->rule.ptr, pd->af, 4737 pd->dst, pd->src, th->th_dport, 4738 th->th_sport, ntohl(th->th_ack), 0, 4739 TH_RST, 0, 0, 4740 (*state)->rule.ptr->return_ttl, 1, 0, 4741 pd->eh, kif->pfik_ifp); 4742 src->seqlo = 0; 4743 src->seqhi = 1; 4744 src->max_win = 1; 4745 } else if (pf_status.debug >= PF_DEBUG_MISC) { 4746 kprintf("pf: BAD state: "); 4747 pf_print_state(*state); 4748 pf_print_flags(th->th_flags); 4749 kprintf(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4750 "pkts=%llu:%llu dir=%s,%s\n", 4751 seq, orig_seq, ack, pd->p_len, ackskew, 4752 (unsigned long long)(*state)->packets[0], 4753 (unsigned long long)(*state)->packets[1], 4754 pd->dir == PF_IN ? "in" : "out", 4755 pd->dir == (*state)->direction ? "fwd" : "rev"); 4756 kprintf("pf: State failure on: %c %c %c %c | %c %c\n", 4757 SEQ_GEQ(src->seqhi, end) ? ' ' : '1', 4758 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4759 ' ': '2', 4760 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4761 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4762 SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', 4763 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4764 } 4765 REASON_SET(reason, PFRES_BADSTATE); 4766 return (PF_DROP); 4767 } 4768 4769 return (PF_PASS); 4770 } 4771 4772 /* 4773 * Called with state locked 4774 */ 4775 int 4776 pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, 4777 struct pf_state **state, struct pf_pdesc *pd, u_short *reason) 4778 { 4779 struct tcphdr *th = pd->hdr.tcp; 4780 4781 if (th->th_flags & TH_SYN) 4782 if (src->state < TCPS_SYN_SENT) 4783 src->state = TCPS_SYN_SENT; 4784 if (th->th_flags & TH_FIN) 4785 if (src->state < TCPS_CLOSING) 4786 src->state = TCPS_CLOSING; 4787 if (th->th_flags & TH_ACK) { 4788 if (dst->state == TCPS_SYN_SENT) { 4789 dst->state = TCPS_ESTABLISHED; 4790 if (src->state == TCPS_ESTABLISHED && 4791 (*state)->src_node != NULL && 4792 pf_src_connlimit(*state)) { 4793 REASON_SET(reason, PFRES_SRCLIMIT); 4794 return (PF_DROP); 4795 } 4796 } else if (dst->state == TCPS_CLOSING) { 4797 dst->state = TCPS_FIN_WAIT_2; 4798 } else if (src->state == TCPS_SYN_SENT && 4799 dst->state < TCPS_SYN_SENT) { 4800 /* 4801 * Handle a special sloppy case where we only see one 4802 * half of the connection. If there is a ACK after 4803 * the initial SYN without ever seeing a packet from 4804 * the destination, set the connection to established. 4805 */ 4806 dst->state = src->state = TCPS_ESTABLISHED; 4807 if ((*state)->src_node != NULL && 4808 pf_src_connlimit(*state)) { 4809 REASON_SET(reason, PFRES_SRCLIMIT); 4810 return (PF_DROP); 4811 } 4812 } else if (src->state == TCPS_CLOSING && 4813 dst->state == TCPS_ESTABLISHED && 4814 dst->seqlo == 0) { 4815 /* 4816 * Handle the closing of half connections where we 4817 * don't see the full bidirectional FIN/ACK+ACK 4818 * handshake. 4819 */ 4820 dst->state = TCPS_CLOSING; 4821 } 4822 } 4823 if (th->th_flags & TH_RST) 4824 src->state = dst->state = TCPS_TIME_WAIT; 4825 4826 /* update expire time */ 4827 (*state)->expire = time_second; 4828 if (src->state >= TCPS_FIN_WAIT_2 && 4829 dst->state >= TCPS_FIN_WAIT_2) 4830 (*state)->timeout = PFTM_TCP_CLOSED; 4831 else if (src->state >= TCPS_CLOSING && 4832 dst->state >= TCPS_CLOSING) 4833 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4834 else if (src->state < TCPS_ESTABLISHED || 4835 dst->state < TCPS_ESTABLISHED) 4836 (*state)->timeout = PFTM_TCP_OPENING; 4837 else if (src->state >= TCPS_CLOSING || 4838 dst->state >= TCPS_CLOSING) 4839 (*state)->timeout = PFTM_TCP_CLOSING; 4840 else if ((th->th_flags & TH_SYN) && 4841 ((*state)->state_flags & PFSTATE_SLOPPY)) 4842 (*state)->timeout = PFTM_TCP_FIRST_PACKET; 4843 else 4844 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4845 4846 return (PF_PASS); 4847 } 4848 4849 /* 4850 * Test TCP connection state. Caller must hold the state locked. 4851 */ 4852 int 4853 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, 4854 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, 4855 u_short *reason) 4856 { 4857 struct pf_state_key_cmp key; 4858 struct tcphdr *th = pd->hdr.tcp; 4859 int copyback = 0; 4860 int error; 4861 struct pf_state_peer *src, *dst; 4862 struct pf_state_key *sk; 4863 4864 bzero(&key, sizeof(key)); 4865 key.af = pd->af; 4866 key.proto = IPPROTO_TCP; 4867 if (direction == PF_IN) { /* wire side, straight */ 4868 PF_ACPY(&key.addr[0], pd->src, key.af); 4869 PF_ACPY(&key.addr[1], pd->dst, key.af); 4870 key.port[0] = th->th_sport; 4871 key.port[1] = th->th_dport; 4872 if (pf_status.debug >= PF_DEBUG_MISC) { 4873 kprintf("test-tcp IN (%08x:%d) -> (%08x:%d)\n", 4874 ntohl(key.addr[0].addr32[0]), 4875 ntohs(key.port[0]), 4876 ntohl(key.addr[1].addr32[0]), 4877 ntohs(key.port[1])); 4878 } 4879 } else { /* stack side, reverse */ 4880 PF_ACPY(&key.addr[1], pd->src, key.af); 4881 PF_ACPY(&key.addr[0], pd->dst, key.af); 4882 key.port[1] = th->th_sport; 4883 key.port[0] = th->th_dport; 4884 if (pf_status.debug >= PF_DEBUG_MISC) { 4885 kprintf("test-tcp OUT (%08x:%d) <- (%08x:%d)\n", 4886 ntohl(key.addr[0].addr32[0]), 4887 ntohs(key.port[0]), 4888 ntohl(key.addr[1].addr32[0]), 4889 ntohs(key.port[1])); 4890 } 4891 } 4892 4893 STATE_LOOKUP(kif, &key, direction, *state, m); 4894 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 4895 4896 if (direction == (*state)->direction) { 4897 src = &(*state)->src; 4898 dst = &(*state)->dst; 4899 } else { 4900 src = &(*state)->dst; 4901 dst = &(*state)->src; 4902 } 4903 4904 sk = (*state)->key[pd->didx]; 4905 4906 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4907 if (direction != (*state)->direction) { 4908 REASON_SET(reason, PFRES_SYNPROXY); 4909 FAIL (PF_SYNPROXY_DROP); 4910 } 4911 if (th->th_flags & TH_SYN) { 4912 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4913 REASON_SET(reason, PFRES_SYNPROXY); 4914 FAIL (PF_DROP); 4915 } 4916 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4917 pd->src, th->th_dport, th->th_sport, 4918 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4919 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4920 0, NULL, NULL); 4921 REASON_SET(reason, PFRES_SYNPROXY); 4922 FAIL (PF_SYNPROXY_DROP); 4923 } else if (!(th->th_flags & TH_ACK) || 4924 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4925 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4926 REASON_SET(reason, PFRES_SYNPROXY); 4927 FAIL (PF_DROP); 4928 } else if ((*state)->src_node != NULL && 4929 pf_src_connlimit(*state)) { 4930 REASON_SET(reason, PFRES_SRCLIMIT); 4931 FAIL (PF_DROP); 4932 } else 4933 (*state)->src.state = PF_TCPS_PROXY_DST; 4934 } 4935 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4936 if (direction == (*state)->direction) { 4937 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4938 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4939 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4940 REASON_SET(reason, PFRES_SYNPROXY); 4941 FAIL (PF_DROP); 4942 } 4943 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4944 if ((*state)->dst.seqhi == 1) 4945 (*state)->dst.seqhi = htonl(karc4random()); 4946 pf_send_tcp((*state)->rule.ptr, pd->af, 4947 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4948 sk->port[pd->sidx], sk->port[pd->didx], 4949 (*state)->dst.seqhi, 0, TH_SYN, 0, 4950 (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); 4951 REASON_SET(reason, PFRES_SYNPROXY); 4952 FAIL (PF_SYNPROXY_DROP); 4953 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4954 (TH_SYN|TH_ACK)) || 4955 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4956 REASON_SET(reason, PFRES_SYNPROXY); 4957 FAIL (PF_DROP); 4958 } else { 4959 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4960 (*state)->dst.seqlo = ntohl(th->th_seq); 4961 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4962 pd->src, th->th_dport, th->th_sport, 4963 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4964 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4965 (*state)->tag, NULL, NULL); 4966 pf_send_tcp((*state)->rule.ptr, pd->af, 4967 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4968 sk->port[pd->sidx], sk->port[pd->didx], 4969 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4970 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4971 0, NULL, NULL); 4972 (*state)->src.seqdiff = (*state)->dst.seqhi - 4973 (*state)->src.seqlo; 4974 (*state)->dst.seqdiff = (*state)->src.seqhi - 4975 (*state)->dst.seqlo; 4976 (*state)->src.seqhi = (*state)->src.seqlo + 4977 (*state)->dst.max_win; 4978 (*state)->dst.seqhi = (*state)->dst.seqlo + 4979 (*state)->src.max_win; 4980 (*state)->src.wscale = (*state)->dst.wscale = 0; 4981 (*state)->src.state = (*state)->dst.state = 4982 TCPS_ESTABLISHED; 4983 REASON_SET(reason, PFRES_SYNPROXY); 4984 FAIL (PF_SYNPROXY_DROP); 4985 } 4986 } 4987 4988 /* 4989 * Check for connection (addr+port pair) reuse. We can't actually 4990 * unlink the state if we don't own it. 4991 */ 4992 if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && 4993 dst->state >= TCPS_FIN_WAIT_2 && 4994 src->state >= TCPS_FIN_WAIT_2) { 4995 if (pf_status.debug >= PF_DEBUG_MISC) { 4996 kprintf("pf: state reuse "); 4997 pf_print_state(*state); 4998 pf_print_flags(th->th_flags); 4999 kprintf("\n"); 5000 } 5001 /* XXX make sure it's the same direction ?? */ 5002 (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; 5003 if ((*state)->cpuid == mycpu->gd_cpuid) { 5004 pf_unlink_state(*state); 5005 *state = NULL; 5006 } else { 5007 (*state)->timeout = PFTM_PURGE; 5008 } 5009 FAIL (PF_DROP); 5010 } 5011 5012 if ((*state)->state_flags & PFSTATE_SLOPPY) { 5013 if (pf_tcp_track_sloppy(src, dst, state, pd, 5014 reason) == PF_DROP) { 5015 FAIL (PF_DROP); 5016 } 5017 } else { 5018 if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, 5019 reason, ©back) == PF_DROP) { 5020 FAIL (PF_DROP); 5021 } 5022 } 5023 5024 /* translate source/destination address, if necessary */ 5025 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5026 struct pf_state_key *nk = (*state)->key[pd->didx]; 5027 5028 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || 5029 nk->port[pd->sidx] != th->th_sport) { 5030 /* 5031 * The translated source address may be completely 5032 * unrelated to the saved link header, make sure 5033 * a bridge doesn't try to use it. 5034 */ 5035 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 5036 pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, 5037 &th->th_sum, &nk->addr[pd->sidx], 5038 nk->port[pd->sidx], 0, pd->af); 5039 } 5040 5041 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 5042 nk->port[pd->didx] != th->th_dport) { 5043 /* 5044 * If we don't redispatch the packet will go into 5045 * the protocol stack on the wrong cpu for the 5046 * post-translated address. 5047 */ 5048 pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, 5049 &th->th_sum, &nk->addr[pd->didx], 5050 nk->port[pd->didx], 0, pd->af); 5051 } 5052 copyback = 1; 5053 } 5054 5055 /* Copyback sequence modulation or stateful scrub changes if needed */ 5056 if (copyback) { 5057 m->m_flags &= ~M_HASH; 5058 m_copyback(m, off, sizeof(*th), (caddr_t)th); 5059 } 5060 5061 pfsync_update_state(*state); 5062 error = PF_PASS; 5063 done: 5064 if (*state) 5065 lockmgr(&(*state)->lk, LK_RELEASE); 5066 return (error); 5067 } 5068 5069 /* 5070 * Test UDP connection state. Caller must hold the state locked. 5071 */ 5072 int 5073 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, 5074 struct mbuf *m, int off, void *h, struct pf_pdesc *pd) 5075 { 5076 struct pf_state_peer *src, *dst; 5077 struct pf_state_key_cmp key; 5078 struct udphdr *uh = pd->hdr.udp; 5079 5080 bzero(&key, sizeof(key)); 5081 key.af = pd->af; 5082 key.proto = IPPROTO_UDP; 5083 if (direction == PF_IN) { /* wire side, straight */ 5084 PF_ACPY(&key.addr[0], pd->src, key.af); 5085 PF_ACPY(&key.addr[1], pd->dst, key.af); 5086 key.port[0] = uh->uh_sport; 5087 key.port[1] = uh->uh_dport; 5088 } else { /* stack side, reverse */ 5089 PF_ACPY(&key.addr[1], pd->src, key.af); 5090 PF_ACPY(&key.addr[0], pd->dst, key.af); 5091 key.port[1] = uh->uh_sport; 5092 key.port[0] = uh->uh_dport; 5093 } 5094 5095 STATE_LOOKUP(kif, &key, direction, *state, m); 5096 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5097 5098 if (direction == (*state)->direction) { 5099 src = &(*state)->src; 5100 dst = &(*state)->dst; 5101 } else { 5102 src = &(*state)->dst; 5103 dst = &(*state)->src; 5104 } 5105 5106 /* update states */ 5107 if (src->state < PFUDPS_SINGLE) 5108 src->state = PFUDPS_SINGLE; 5109 if (dst->state == PFUDPS_SINGLE) 5110 dst->state = PFUDPS_MULTIPLE; 5111 5112 /* update expire time */ 5113 (*state)->expire = time_second; 5114 if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) 5115 (*state)->timeout = PFTM_UDP_MULTIPLE; 5116 else 5117 (*state)->timeout = PFTM_UDP_SINGLE; 5118 5119 /* translate source/destination address, if necessary */ 5120 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5121 struct pf_state_key *nk = (*state)->key[pd->didx]; 5122 5123 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || 5124 nk->port[pd->sidx] != uh->uh_sport) { 5125 /* 5126 * The translated source address may be completely 5127 * unrelated to the saved link header, make sure 5128 * a bridge doesn't try to use it. 5129 */ 5130 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 5131 m->m_flags &= ~M_HASH; 5132 pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, 5133 &uh->uh_sum, &nk->addr[pd->sidx], 5134 nk->port[pd->sidx], 1, pd->af); 5135 } 5136 5137 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || 5138 nk->port[pd->didx] != uh->uh_dport) { 5139 /* 5140 * If we don't redispatch the packet will go into 5141 * the protocol stack on the wrong cpu for the 5142 * post-translated address. 5143 */ 5144 m->m_flags &= ~M_HASH; 5145 pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, 5146 &uh->uh_sum, &nk->addr[pd->didx], 5147 nk->port[pd->didx], 1, pd->af); 5148 } 5149 m_copyback(m, off, sizeof(*uh), (caddr_t)uh); 5150 } 5151 5152 pfsync_update_state(*state); 5153 lockmgr(&(*state)->lk, LK_RELEASE); 5154 return (PF_PASS); 5155 } 5156 5157 /* 5158 * Test ICMP connection state. Caller must hold the state locked. 5159 */ 5160 int 5161 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, 5162 struct mbuf *m, int off, void *h, struct pf_pdesc *pd, 5163 u_short *reason) 5164 { 5165 struct pf_addr *saddr = pd->src, *daddr = pd->dst; 5166 u_int16_t icmpid = 0, *icmpsum = NULL; 5167 u_int8_t icmptype = 0; 5168 int state_icmp = 0; 5169 int error; 5170 struct pf_state_key_cmp key; 5171 5172 bzero(&key, sizeof(key)); 5173 5174 switch (pd->proto) { 5175 #ifdef INET 5176 case IPPROTO_ICMP: 5177 icmptype = pd->hdr.icmp->icmp_type; 5178 icmpid = pd->hdr.icmp->icmp_id; 5179 icmpsum = &pd->hdr.icmp->icmp_cksum; 5180 5181 if (icmptype == ICMP_UNREACH || 5182 icmptype == ICMP_SOURCEQUENCH || 5183 icmptype == ICMP_REDIRECT || 5184 icmptype == ICMP_TIMXCEED || 5185 icmptype == ICMP_PARAMPROB) 5186 state_icmp++; 5187 break; 5188 #endif /* INET */ 5189 #ifdef INET6 5190 case IPPROTO_ICMPV6: 5191 icmptype = pd->hdr.icmp6->icmp6_type; 5192 icmpid = pd->hdr.icmp6->icmp6_id; 5193 icmpsum = &pd->hdr.icmp6->icmp6_cksum; 5194 5195 if (icmptype == ICMP6_DST_UNREACH || 5196 icmptype == ICMP6_PACKET_TOO_BIG || 5197 icmptype == ICMP6_TIME_EXCEEDED || 5198 icmptype == ICMP6_PARAM_PROB) 5199 state_icmp++; 5200 break; 5201 #endif /* INET6 */ 5202 } 5203 5204 if (!state_icmp) { 5205 5206 /* 5207 * ICMP query/reply message not related to a TCP/UDP packet. 5208 * Search for an ICMP state. 5209 */ 5210 key.af = pd->af; 5211 key.proto = pd->proto; 5212 key.port[0] = key.port[1] = icmpid; 5213 if (direction == PF_IN) { /* wire side, straight */ 5214 PF_ACPY(&key.addr[0], pd->src, key.af); 5215 PF_ACPY(&key.addr[1], pd->dst, key.af); 5216 } else { /* stack side, reverse */ 5217 PF_ACPY(&key.addr[1], pd->src, key.af); 5218 PF_ACPY(&key.addr[0], pd->dst, key.af); 5219 } 5220 5221 STATE_LOOKUP(kif, &key, direction, *state, m); 5222 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5223 5224 (*state)->expire = time_second; 5225 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 5226 5227 /* translate source/destination address, if necessary */ 5228 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5229 struct pf_state_key *nk = (*state)->key[pd->didx]; 5230 5231 switch (pd->af) { 5232 #ifdef INET 5233 case AF_INET: 5234 if (PF_ANEQ(pd->src, 5235 &nk->addr[pd->sidx], AF_INET)) 5236 pf_change_a(&saddr->v4.s_addr, 5237 pd->ip_sum, 5238 nk->addr[pd->sidx].v4.s_addr, 0); 5239 5240 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], 5241 AF_INET)) 5242 pf_change_a(&daddr->v4.s_addr, 5243 pd->ip_sum, 5244 nk->addr[pd->didx].v4.s_addr, 0); 5245 5246 if (nk->port[0] != 5247 pd->hdr.icmp->icmp_id) { 5248 pd->hdr.icmp->icmp_cksum = 5249 pf_cksum_fixup( 5250 pd->hdr.icmp->icmp_cksum, icmpid, 5251 nk->port[pd->sidx], 0); 5252 pd->hdr.icmp->icmp_id = 5253 nk->port[pd->sidx]; 5254 } 5255 5256 m->m_flags &= ~M_HASH; 5257 m_copyback(m, off, ICMP_MINLEN, 5258 (caddr_t)pd->hdr.icmp); 5259 break; 5260 #endif /* INET */ 5261 #ifdef INET6 5262 case AF_INET6: 5263 if (PF_ANEQ(pd->src, 5264 &nk->addr[pd->sidx], AF_INET6)) 5265 pf_change_a6(saddr, 5266 &pd->hdr.icmp6->icmp6_cksum, 5267 &nk->addr[pd->sidx], 0); 5268 5269 if (PF_ANEQ(pd->dst, 5270 &nk->addr[pd->didx], AF_INET6)) 5271 pf_change_a6(daddr, 5272 &pd->hdr.icmp6->icmp6_cksum, 5273 &nk->addr[pd->didx], 0); 5274 5275 m->m_flags &= ~M_HASH; 5276 m_copyback(m, off, 5277 sizeof(struct icmp6_hdr), 5278 (caddr_t)pd->hdr.icmp6); 5279 break; 5280 #endif /* INET6 */ 5281 } 5282 } 5283 } else { 5284 /* 5285 * ICMP error message in response to a TCP/UDP packet. 5286 * Extract the inner TCP/UDP header and search for that state. 5287 */ 5288 5289 struct pf_pdesc pd2; 5290 #ifdef INET 5291 struct ip h2; 5292 #endif /* INET */ 5293 #ifdef INET6 5294 struct ip6_hdr h2_6; 5295 int terminal = 0; 5296 #endif /* INET6 */ 5297 int ipoff2; 5298 int off2; 5299 5300 pd2.not_cpu_localized = 1; 5301 pd2.af = pd->af; 5302 /* Payload packet is from the opposite direction. */ 5303 pd2.sidx = (direction == PF_IN) ? 1 : 0; 5304 pd2.didx = (direction == PF_IN) ? 0 : 1; 5305 switch (pd->af) { 5306 #ifdef INET 5307 case AF_INET: 5308 /* offset of h2 in mbuf chain */ 5309 ipoff2 = off + ICMP_MINLEN; 5310 5311 if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2), 5312 NULL, reason, pd2.af)) { 5313 DPFPRINTF(PF_DEBUG_MISC, 5314 ("pf: ICMP error message too short " 5315 "(ip)\n")); 5316 FAIL (PF_DROP); 5317 } 5318 /* 5319 * ICMP error messages don't refer to non-first 5320 * fragments 5321 */ 5322 if (h2.ip_off & htons(IP_OFFMASK)) { 5323 REASON_SET(reason, PFRES_FRAG); 5324 FAIL (PF_DROP); 5325 } 5326 5327 /* offset of protocol header that follows h2 */ 5328 off2 = ipoff2 + (h2.ip_hl << 2); 5329 5330 pd2.proto = h2.ip_p; 5331 pd2.src = (struct pf_addr *)&h2.ip_src; 5332 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5333 pd2.ip_sum = &h2.ip_sum; 5334 break; 5335 #endif /* INET */ 5336 #ifdef INET6 5337 case AF_INET6: 5338 ipoff2 = off + sizeof(struct icmp6_hdr); 5339 5340 if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6), 5341 NULL, reason, pd2.af)) { 5342 DPFPRINTF(PF_DEBUG_MISC, 5343 ("pf: ICMP error message too short " 5344 "(ip6)\n")); 5345 FAIL (PF_DROP); 5346 } 5347 pd2.proto = h2_6.ip6_nxt; 5348 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5349 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5350 pd2.ip_sum = NULL; 5351 off2 = ipoff2 + sizeof(h2_6); 5352 do { 5353 switch (pd2.proto) { 5354 case IPPROTO_FRAGMENT: 5355 /* 5356 * ICMPv6 error messages for 5357 * non-first fragments 5358 */ 5359 REASON_SET(reason, PFRES_FRAG); 5360 FAIL (PF_DROP); 5361 case IPPROTO_AH: 5362 case IPPROTO_HOPOPTS: 5363 case IPPROTO_ROUTING: 5364 case IPPROTO_DSTOPTS: { 5365 /* get next header and header length */ 5366 struct ip6_ext opt6; 5367 5368 if (!pf_pull_hdr(m, off2, &opt6, 5369 sizeof(opt6), NULL, reason, 5370 pd2.af)) { 5371 DPFPRINTF(PF_DEBUG_MISC, 5372 ("pf: ICMPv6 short opt\n")); 5373 FAIL (PF_DROP); 5374 } 5375 if (pd2.proto == IPPROTO_AH) 5376 off2 += (opt6.ip6e_len + 2) * 4; 5377 else 5378 off2 += (opt6.ip6e_len + 1) * 8; 5379 pd2.proto = opt6.ip6e_nxt; 5380 /* goto the next header */ 5381 break; 5382 } 5383 default: 5384 terminal++; 5385 break; 5386 } 5387 } while (!terminal); 5388 break; 5389 #endif /* INET6 */ 5390 default: 5391 DPFPRINTF(PF_DEBUG_MISC, 5392 ("pf: ICMP AF %d unknown (ip6)\n", pd->af)); 5393 FAIL (PF_DROP); 5394 break; 5395 } 5396 5397 switch (pd2.proto) { 5398 case IPPROTO_TCP: { 5399 struct tcphdr th; 5400 u_int32_t seq; 5401 struct pf_state_peer *src, *dst; 5402 u_int8_t dws; 5403 int copyback = 0; 5404 5405 /* 5406 * Only the first 8 bytes of the TCP header can be 5407 * expected. Don't access any TCP header fields after 5408 * th_seq, an ackskew test is not possible. 5409 */ 5410 if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, 5411 pd2.af)) { 5412 DPFPRINTF(PF_DEBUG_MISC, 5413 ("pf: ICMP error message too short " 5414 "(tcp)\n")); 5415 FAIL (PF_DROP); 5416 } 5417 5418 key.af = pd2.af; 5419 key.proto = IPPROTO_TCP; 5420 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5421 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5422 key.port[pd2.sidx] = th.th_sport; 5423 key.port[pd2.didx] = th.th_dport; 5424 5425 STATE_LOOKUP(kif, &key, direction, *state, m); 5426 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5427 5428 if (direction == (*state)->direction) { 5429 src = &(*state)->dst; 5430 dst = &(*state)->src; 5431 } else { 5432 src = &(*state)->src; 5433 dst = &(*state)->dst; 5434 } 5435 5436 if (src->wscale && dst->wscale) 5437 dws = dst->wscale & PF_WSCALE_MASK; 5438 else 5439 dws = 0; 5440 5441 /* Demodulate sequence number */ 5442 seq = ntohl(th.th_seq) - src->seqdiff; 5443 if (src->seqdiff) { 5444 pf_change_a(&th.th_seq, icmpsum, 5445 htonl(seq), 0); 5446 copyback = 1; 5447 } 5448 5449 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5450 (!SEQ_GEQ(src->seqhi, seq) || 5451 !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { 5452 if (pf_status.debug >= PF_DEBUG_MISC) { 5453 kprintf("pf: BAD ICMP %d:%d ", 5454 icmptype, pd->hdr.icmp->icmp_code); 5455 pf_print_host(pd->src, 0, pd->af); 5456 kprintf(" -> "); 5457 pf_print_host(pd->dst, 0, pd->af); 5458 kprintf(" state: "); 5459 pf_print_state(*state); 5460 kprintf(" seq=%u\n", seq); 5461 } 5462 REASON_SET(reason, PFRES_BADSTATE); 5463 FAIL (PF_DROP); 5464 } else { 5465 if (pf_status.debug >= PF_DEBUG_MISC) { 5466 kprintf("pf: OK ICMP %d:%d ", 5467 icmptype, pd->hdr.icmp->icmp_code); 5468 pf_print_host(pd->src, 0, pd->af); 5469 kprintf(" -> "); 5470 pf_print_host(pd->dst, 0, pd->af); 5471 kprintf(" state: "); 5472 pf_print_state(*state); 5473 kprintf(" seq=%u\n", seq); 5474 } 5475 } 5476 5477 /* translate source/destination address, if necessary */ 5478 if ((*state)->key[PF_SK_WIRE] != 5479 (*state)->key[PF_SK_STACK]) { 5480 struct pf_state_key *nk = 5481 (*state)->key[pd->didx]; 5482 5483 if (PF_ANEQ(pd2.src, 5484 &nk->addr[pd2.sidx], pd2.af) || 5485 nk->port[pd2.sidx] != th.th_sport) 5486 pf_change_icmp(pd2.src, &th.th_sport, 5487 daddr, &nk->addr[pd2.sidx], 5488 nk->port[pd2.sidx], NULL, 5489 pd2.ip_sum, icmpsum, 5490 pd->ip_sum, 0, pd2.af); 5491 5492 if (PF_ANEQ(pd2.dst, 5493 &nk->addr[pd2.didx], pd2.af) || 5494 nk->port[pd2.didx] != th.th_dport) 5495 pf_change_icmp(pd2.dst, &th.th_dport, 5496 NULL, /* XXX Inbound NAT? */ 5497 &nk->addr[pd2.didx], 5498 nk->port[pd2.didx], NULL, 5499 pd2.ip_sum, icmpsum, 5500 pd->ip_sum, 0, pd2.af); 5501 copyback = 1; 5502 } 5503 5504 if (copyback) { 5505 switch (pd2.af) { 5506 #ifdef INET 5507 case AF_INET: 5508 m_copyback(m, off, ICMP_MINLEN, 5509 (caddr_t)pd->hdr.icmp); 5510 m_copyback(m, ipoff2, sizeof(h2), 5511 (caddr_t)&h2); 5512 break; 5513 #endif /* INET */ 5514 #ifdef INET6 5515 case AF_INET6: 5516 m_copyback(m, off, 5517 sizeof(struct icmp6_hdr), 5518 (caddr_t)pd->hdr.icmp6); 5519 m_copyback(m, ipoff2, sizeof(h2_6), 5520 (caddr_t)&h2_6); 5521 break; 5522 #endif /* INET6 */ 5523 } 5524 m->m_flags &= ~M_HASH; 5525 m_copyback(m, off2, 8, (caddr_t)&th); 5526 } 5527 break; 5528 } 5529 case IPPROTO_UDP: { 5530 struct udphdr uh; 5531 5532 if (!pf_pull_hdr(m, off2, &uh, sizeof(uh), 5533 NULL, reason, pd2.af)) { 5534 DPFPRINTF(PF_DEBUG_MISC, 5535 ("pf: ICMP error message too short " 5536 "(udp)\n")); 5537 return (PF_DROP); 5538 } 5539 5540 key.af = pd2.af; 5541 key.proto = IPPROTO_UDP; 5542 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5543 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5544 key.port[pd2.sidx] = uh.uh_sport; 5545 key.port[pd2.didx] = uh.uh_dport; 5546 5547 STATE_LOOKUP(kif, &key, direction, *state, m); 5548 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5549 5550 /* translate source/destination address, if necessary */ 5551 if ((*state)->key[PF_SK_WIRE] != 5552 (*state)->key[PF_SK_STACK]) { 5553 struct pf_state_key *nk = 5554 (*state)->key[pd->didx]; 5555 5556 if (PF_ANEQ(pd2.src, 5557 &nk->addr[pd2.sidx], pd2.af) || 5558 nk->port[pd2.sidx] != uh.uh_sport) 5559 pf_change_icmp(pd2.src, &uh.uh_sport, 5560 daddr, &nk->addr[pd2.sidx], 5561 nk->port[pd2.sidx], &uh.uh_sum, 5562 pd2.ip_sum, icmpsum, 5563 pd->ip_sum, 1, pd2.af); 5564 5565 if (PF_ANEQ(pd2.dst, 5566 &nk->addr[pd2.didx], pd2.af) || 5567 nk->port[pd2.didx] != uh.uh_dport) 5568 pf_change_icmp(pd2.dst, &uh.uh_dport, 5569 NULL, /* XXX Inbound NAT? */ 5570 &nk->addr[pd2.didx], 5571 nk->port[pd2.didx], &uh.uh_sum, 5572 pd2.ip_sum, icmpsum, 5573 pd->ip_sum, 1, pd2.af); 5574 5575 switch (pd2.af) { 5576 #ifdef INET 5577 case AF_INET: 5578 m_copyback(m, off, ICMP_MINLEN, 5579 (caddr_t)pd->hdr.icmp); 5580 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 5581 break; 5582 #endif /* INET */ 5583 #ifdef INET6 5584 case AF_INET6: 5585 m_copyback(m, off, 5586 sizeof(struct icmp6_hdr), 5587 (caddr_t)pd->hdr.icmp6); 5588 m_copyback(m, ipoff2, sizeof(h2_6), 5589 (caddr_t)&h2_6); 5590 break; 5591 #endif /* INET6 */ 5592 } 5593 m->m_flags &= ~M_HASH; 5594 m_copyback(m, off2, sizeof(uh), (caddr_t)&uh); 5595 } 5596 break; 5597 } 5598 #ifdef INET 5599 case IPPROTO_ICMP: { 5600 struct icmp iih; 5601 5602 if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, 5603 NULL, reason, pd2.af)) { 5604 DPFPRINTF(PF_DEBUG_MISC, 5605 ("pf: ICMP error message too short i" 5606 "(icmp)\n")); 5607 return (PF_DROP); 5608 } 5609 5610 key.af = pd2.af; 5611 key.proto = IPPROTO_ICMP; 5612 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5613 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5614 key.port[0] = key.port[1] = iih.icmp_id; 5615 5616 STATE_LOOKUP(kif, &key, direction, *state, m); 5617 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5618 5619 /* translate source/destination address, if necessary */ 5620 if ((*state)->key[PF_SK_WIRE] != 5621 (*state)->key[PF_SK_STACK]) { 5622 struct pf_state_key *nk = 5623 (*state)->key[pd->didx]; 5624 5625 if (PF_ANEQ(pd2.src, 5626 &nk->addr[pd2.sidx], pd2.af) || 5627 nk->port[pd2.sidx] != iih.icmp_id) 5628 pf_change_icmp(pd2.src, &iih.icmp_id, 5629 daddr, &nk->addr[pd2.sidx], 5630 nk->port[pd2.sidx], NULL, 5631 pd2.ip_sum, icmpsum, 5632 pd->ip_sum, 0, AF_INET); 5633 5634 if (PF_ANEQ(pd2.dst, 5635 &nk->addr[pd2.didx], pd2.af) || 5636 nk->port[pd2.didx] != iih.icmp_id) 5637 pf_change_icmp(pd2.dst, &iih.icmp_id, 5638 NULL, /* XXX Inbound NAT? */ 5639 &nk->addr[pd2.didx], 5640 nk->port[pd2.didx], NULL, 5641 pd2.ip_sum, icmpsum, 5642 pd->ip_sum, 0, AF_INET); 5643 5644 m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp); 5645 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 5646 m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih); 5647 m->m_flags &= ~M_HASH; 5648 } 5649 break; 5650 } 5651 #endif /* INET */ 5652 #ifdef INET6 5653 case IPPROTO_ICMPV6: { 5654 struct icmp6_hdr iih; 5655 5656 if (!pf_pull_hdr(m, off2, &iih, 5657 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5658 DPFPRINTF(PF_DEBUG_MISC, 5659 ("pf: ICMP error message too short " 5660 "(icmp6)\n")); 5661 FAIL (PF_DROP); 5662 } 5663 5664 key.af = pd2.af; 5665 key.proto = IPPROTO_ICMPV6; 5666 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5667 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5668 key.port[0] = key.port[1] = iih.icmp6_id; 5669 5670 STATE_LOOKUP(kif, &key, direction, *state, m); 5671 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5672 5673 /* translate source/destination address, if necessary */ 5674 if ((*state)->key[PF_SK_WIRE] != 5675 (*state)->key[PF_SK_STACK]) { 5676 struct pf_state_key *nk = 5677 (*state)->key[pd->didx]; 5678 5679 if (PF_ANEQ(pd2.src, 5680 &nk->addr[pd2.sidx], pd2.af) || 5681 nk->port[pd2.sidx] != iih.icmp6_id) 5682 pf_change_icmp(pd2.src, &iih.icmp6_id, 5683 daddr, &nk->addr[pd2.sidx], 5684 nk->port[pd2.sidx], NULL, 5685 pd2.ip_sum, icmpsum, 5686 pd->ip_sum, 0, AF_INET6); 5687 5688 if (PF_ANEQ(pd2.dst, 5689 &nk->addr[pd2.didx], pd2.af) || 5690 nk->port[pd2.didx] != iih.icmp6_id) 5691 pf_change_icmp(pd2.dst, &iih.icmp6_id, 5692 NULL, /* XXX Inbound NAT? */ 5693 &nk->addr[pd2.didx], 5694 nk->port[pd2.didx], NULL, 5695 pd2.ip_sum, icmpsum, 5696 pd->ip_sum, 0, AF_INET6); 5697 5698 m_copyback(m, off, sizeof(struct icmp6_hdr), 5699 (caddr_t)pd->hdr.icmp6); 5700 m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6); 5701 m_copyback(m, off2, sizeof(struct icmp6_hdr), 5702 (caddr_t)&iih); 5703 m->m_flags &= ~M_HASH; 5704 } 5705 break; 5706 } 5707 #endif /* INET6 */ 5708 default: { 5709 key.af = pd2.af; 5710 key.proto = pd2.proto; 5711 PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); 5712 PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); 5713 key.port[0] = key.port[1] = 0; 5714 5715 STATE_LOOKUP(kif, &key, direction, *state, m); 5716 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5717 5718 /* translate source/destination address, if necessary */ 5719 if ((*state)->key[PF_SK_WIRE] != 5720 (*state)->key[PF_SK_STACK]) { 5721 struct pf_state_key *nk = 5722 (*state)->key[pd->didx]; 5723 5724 if (PF_ANEQ(pd2.src, 5725 &nk->addr[pd2.sidx], pd2.af)) 5726 pf_change_icmp(pd2.src, NULL, daddr, 5727 &nk->addr[pd2.sidx], 0, NULL, 5728 pd2.ip_sum, icmpsum, 5729 pd->ip_sum, 0, pd2.af); 5730 5731 if (PF_ANEQ(pd2.dst, 5732 &nk->addr[pd2.didx], pd2.af)) 5733 pf_change_icmp(pd2.src, NULL, 5734 NULL, /* XXX Inbound NAT? */ 5735 &nk->addr[pd2.didx], 0, NULL, 5736 pd2.ip_sum, icmpsum, 5737 pd->ip_sum, 0, pd2.af); 5738 5739 switch (pd2.af) { 5740 #ifdef INET 5741 case AF_INET: 5742 m_copyback(m, off, ICMP_MINLEN, 5743 (caddr_t)pd->hdr.icmp); 5744 m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2); 5745 m->m_flags &= ~M_HASH; 5746 break; 5747 #endif /* INET */ 5748 #ifdef INET6 5749 case AF_INET6: 5750 m_copyback(m, off, 5751 sizeof(struct icmp6_hdr), 5752 (caddr_t)pd->hdr.icmp6); 5753 m_copyback(m, ipoff2, sizeof(h2_6), 5754 (caddr_t)&h2_6); 5755 m->m_flags &= ~M_HASH; 5756 break; 5757 #endif /* INET6 */ 5758 } 5759 } 5760 break; 5761 } 5762 } 5763 } 5764 5765 pfsync_update_state(*state); 5766 error = PF_PASS; 5767 done: 5768 if (*state) 5769 lockmgr(&(*state)->lk, LK_RELEASE); 5770 return (error); 5771 } 5772 5773 /* 5774 * Test other connection state. Caller must hold the state locked. 5775 */ 5776 int 5777 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, 5778 struct mbuf *m, struct pf_pdesc *pd) 5779 { 5780 struct pf_state_peer *src, *dst; 5781 struct pf_state_key_cmp key; 5782 5783 bzero(&key, sizeof(key)); 5784 key.af = pd->af; 5785 key.proto = pd->proto; 5786 if (direction == PF_IN) { 5787 PF_ACPY(&key.addr[0], pd->src, key.af); 5788 PF_ACPY(&key.addr[1], pd->dst, key.af); 5789 key.port[0] = key.port[1] = 0; 5790 } else { 5791 PF_ACPY(&key.addr[1], pd->src, key.af); 5792 PF_ACPY(&key.addr[0], pd->dst, key.af); 5793 key.port[1] = key.port[0] = 0; 5794 } 5795 5796 STATE_LOOKUP(kif, &key, direction, *state, m); 5797 lockmgr(&(*state)->lk, LK_EXCLUSIVE); 5798 5799 if (direction == (*state)->direction) { 5800 src = &(*state)->src; 5801 dst = &(*state)->dst; 5802 } else { 5803 src = &(*state)->dst; 5804 dst = &(*state)->src; 5805 } 5806 5807 /* update states */ 5808 if (src->state < PFOTHERS_SINGLE) 5809 src->state = PFOTHERS_SINGLE; 5810 if (dst->state == PFOTHERS_SINGLE) 5811 dst->state = PFOTHERS_MULTIPLE; 5812 5813 /* update expire time */ 5814 (*state)->expire = time_second; 5815 if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) 5816 (*state)->timeout = PFTM_OTHER_MULTIPLE; 5817 else 5818 (*state)->timeout = PFTM_OTHER_SINGLE; 5819 5820 /* translate source/destination address, if necessary */ 5821 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5822 struct pf_state_key *nk = (*state)->key[pd->didx]; 5823 5824 KKASSERT(nk); 5825 KKASSERT(pd); 5826 KKASSERT(pd->src); 5827 KKASSERT(pd->dst); 5828 switch (pd->af) { 5829 #ifdef INET 5830 case AF_INET: 5831 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) 5832 pf_change_a(&pd->src->v4.s_addr, 5833 pd->ip_sum, 5834 nk->addr[pd->sidx].v4.s_addr, 5835 0); 5836 5837 5838 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) 5839 pf_change_a(&pd->dst->v4.s_addr, 5840 pd->ip_sum, 5841 nk->addr[pd->didx].v4.s_addr, 5842 0); 5843 5844 break; 5845 #endif /* INET */ 5846 #ifdef INET6 5847 case AF_INET6: 5848 if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) 5849 PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); 5850 5851 if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) 5852 PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); 5853 #endif /* INET6 */ 5854 } 5855 } 5856 5857 pfsync_update_state(*state); 5858 lockmgr(&(*state)->lk, LK_RELEASE); 5859 return (PF_PASS); 5860 } 5861 5862 /* 5863 * ipoff and off are measured from the start of the mbuf chain. 5864 * h must be at "ipoff" on the mbuf chain. 5865 */ 5866 void * 5867 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5868 u_short *actionp, u_short *reasonp, sa_family_t af) 5869 { 5870 switch (af) { 5871 #ifdef INET 5872 case AF_INET: { 5873 struct ip *h = mtod(m, struct ip *); 5874 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5875 5876 if (fragoff) { 5877 if (fragoff >= len) 5878 ACTION_SET(actionp, PF_PASS); 5879 else { 5880 ACTION_SET(actionp, PF_DROP); 5881 REASON_SET(reasonp, PFRES_FRAG); 5882 } 5883 return (NULL); 5884 } 5885 if (m->m_pkthdr.len < off + len || 5886 ntohs(h->ip_len) < off + len) { 5887 ACTION_SET(actionp, PF_DROP); 5888 REASON_SET(reasonp, PFRES_SHORT); 5889 return (NULL); 5890 } 5891 break; 5892 } 5893 #endif /* INET */ 5894 #ifdef INET6 5895 case AF_INET6: { 5896 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5897 5898 if (m->m_pkthdr.len < off + len || 5899 (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) < 5900 (unsigned)(off + len)) { 5901 ACTION_SET(actionp, PF_DROP); 5902 REASON_SET(reasonp, PFRES_SHORT); 5903 return (NULL); 5904 } 5905 break; 5906 } 5907 #endif /* INET6 */ 5908 } 5909 m_copydata(m, off, len, p); 5910 return (p); 5911 } 5912 5913 int 5914 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) 5915 { 5916 struct sockaddr_in *dst; 5917 int ret = 1; 5918 int check_mpath; 5919 #ifdef INET6 5920 struct sockaddr_in6 *dst6; 5921 struct route_in6 ro; 5922 #else 5923 struct route ro; 5924 #endif 5925 struct radix_node *rn; 5926 struct rtentry *rt; 5927 struct ifnet *ifp; 5928 5929 check_mpath = 0; 5930 bzero(&ro, sizeof(ro)); 5931 switch (af) { 5932 case AF_INET: 5933 dst = satosin(&ro.ro_dst); 5934 dst->sin_family = AF_INET; 5935 dst->sin_len = sizeof(*dst); 5936 dst->sin_addr = addr->v4; 5937 break; 5938 #ifdef INET6 5939 case AF_INET6: 5940 /* 5941 * Skip check for addresses with embedded interface scope, 5942 * as they would always match anyway. 5943 */ 5944 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5945 goto out; 5946 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 5947 dst6->sin6_family = AF_INET6; 5948 dst6->sin6_len = sizeof(*dst6); 5949 dst6->sin6_addr = addr->v6; 5950 break; 5951 #endif /* INET6 */ 5952 default: 5953 return (0); 5954 } 5955 5956 /* Skip checks for ipsec interfaces */ 5957 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5958 goto out; 5959 5960 rtalloc_ign((struct route *)&ro, 0); 5961 5962 if (ro.ro_rt != NULL) { 5963 /* No interface given, this is a no-route check */ 5964 if (kif == NULL) 5965 goto out; 5966 5967 if (kif->pfik_ifp == NULL) { 5968 ret = 0; 5969 goto out; 5970 } 5971 5972 /* Perform uRPF check if passed input interface */ 5973 ret = 0; 5974 rn = (struct radix_node *)ro.ro_rt; 5975 do { 5976 rt = (struct rtentry *)rn; 5977 ifp = rt->rt_ifp; 5978 5979 if (kif->pfik_ifp == ifp) 5980 ret = 1; 5981 rn = NULL; 5982 } while (check_mpath == 1 && rn != NULL && ret == 0); 5983 } else 5984 ret = 0; 5985 out: 5986 if (ro.ro_rt != NULL) 5987 RTFREE(ro.ro_rt); 5988 return (ret); 5989 } 5990 5991 int 5992 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) 5993 { 5994 struct sockaddr_in *dst; 5995 #ifdef INET6 5996 struct sockaddr_in6 *dst6; 5997 struct route_in6 ro; 5998 #else 5999 struct route ro; 6000 #endif 6001 int ret = 0; 6002 6003 ASSERT_LWKT_TOKEN_HELD(&pf_token); 6004 6005 bzero(&ro, sizeof(ro)); 6006 switch (af) { 6007 case AF_INET: 6008 dst = satosin(&ro.ro_dst); 6009 dst->sin_family = AF_INET; 6010 dst->sin_len = sizeof(*dst); 6011 dst->sin_addr = addr->v4; 6012 break; 6013 #ifdef INET6 6014 case AF_INET6: 6015 dst6 = (struct sockaddr_in6 *)&ro.ro_dst; 6016 dst6->sin6_family = AF_INET6; 6017 dst6->sin6_len = sizeof(*dst6); 6018 dst6->sin6_addr = addr->v6; 6019 break; 6020 #endif /* INET6 */ 6021 default: 6022 return (0); 6023 } 6024 6025 rtalloc_ign((struct route *)&ro, (RTF_CLONING | RTF_PRCLONING)); 6026 6027 if (ro.ro_rt != NULL) { 6028 RTFREE(ro.ro_rt); 6029 } 6030 6031 return (ret); 6032 } 6033 6034 #ifdef INET 6035 void 6036 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 6037 struct pf_state *s, struct pf_pdesc *pd) 6038 { 6039 struct mbuf *m0, *m1; 6040 struct route iproute; 6041 struct route *ro = NULL; 6042 struct sockaddr_in *dst; 6043 struct ip *ip; 6044 struct ifnet *ifp = NULL; 6045 struct pf_addr naddr; 6046 struct pf_src_node *sn = NULL; 6047 int error = 0; 6048 int sw_csum; 6049 6050 ASSERT_LWKT_TOKEN_HELD(&pf_token); 6051 6052 if (m == NULL || *m == NULL || r == NULL || 6053 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 6054 panic("pf_route: invalid parameters"); 6055 6056 if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) { 6057 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED; 6058 (*m)->m_pkthdr.pf.routed = 1; 6059 } else { 6060 if ((*m)->m_pkthdr.pf.routed++ > 3) { 6061 m0 = *m; 6062 *m = NULL; 6063 goto bad; 6064 } 6065 } 6066 6067 if (r->rt == PF_DUPTO) { 6068 if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { 6069 return; 6070 } 6071 } else { 6072 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { 6073 return; 6074 } 6075 m0 = *m; 6076 } 6077 6078 if (m0->m_len < sizeof(struct ip)) { 6079 DPFPRINTF(PF_DEBUG_URGENT, 6080 ("pf_route: m0->m_len < sizeof(struct ip)\n")); 6081 goto bad; 6082 } 6083 6084 ip = mtod(m0, struct ip *); 6085 6086 ro = &iproute; 6087 bzero((caddr_t)ro, sizeof(*ro)); 6088 dst = satosin(&ro->ro_dst); 6089 dst->sin_family = AF_INET; 6090 dst->sin_len = sizeof(*dst); 6091 dst->sin_addr = ip->ip_dst; 6092 6093 if (r->rt == PF_FASTROUTE) { 6094 rtalloc(ro); 6095 if (ro->ro_rt == 0) { 6096 ipstat.ips_noroute++; 6097 goto bad; 6098 } 6099 6100 ifp = ro->ro_rt->rt_ifp; 6101 ro->ro_rt->rt_use++; 6102 6103 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 6104 dst = satosin(ro->ro_rt->rt_gateway); 6105 } else { 6106 if (TAILQ_EMPTY(&r->rpool.list)) { 6107 DPFPRINTF(PF_DEBUG_URGENT, 6108 ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); 6109 goto bad; 6110 } 6111 if (s == NULL) { 6112 pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, 6113 &naddr, NULL, &sn); 6114 if (!PF_AZERO(&naddr, AF_INET)) 6115 dst->sin_addr.s_addr = naddr.v4.s_addr; 6116 ifp = r->rpool.cur->kif ? 6117 r->rpool.cur->kif->pfik_ifp : NULL; 6118 } else { 6119 if (!PF_AZERO(&s->rt_addr, AF_INET)) 6120 dst->sin_addr.s_addr = 6121 s->rt_addr.v4.s_addr; 6122 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6123 } 6124 } 6125 if (ifp == NULL) 6126 goto bad; 6127 6128 if (oifp != ifp) { 6129 if (pf_test(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { 6130 goto bad; 6131 } else if (m0 == NULL) { 6132 goto done; 6133 } 6134 if (m0->m_len < sizeof(struct ip)) { 6135 DPFPRINTF(PF_DEBUG_URGENT, 6136 ("pf_route: m0->m_len < sizeof(struct ip)\n")); 6137 goto bad; 6138 } 6139 ip = mtod(m0, struct ip *); 6140 } 6141 6142 /* Copied from FreeBSD 5.1-CURRENT ip_output. */ 6143 m0->m_pkthdr.csum_flags |= CSUM_IP; 6144 sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist; 6145 if (sw_csum & CSUM_DELAY_DATA) { 6146 in_delayed_cksum(m0); 6147 sw_csum &= ~CSUM_DELAY_DATA; 6148 } 6149 m0->m_pkthdr.csum_flags &= ifp->if_hwassist; 6150 m0->m_pkthdr.csum_iphlen = (ip->ip_hl << 2); 6151 6152 /* 6153 * WARNING! We cannot fragment if the packet was modified from an 6154 * original which expected to be using TSO. In this 6155 * situation we pray that the target interface is 6156 * compatible with the originating interface. 6157 */ 6158 if (ntohs(ip->ip_len) <= ifp->if_mtu || 6159 (m0->m_pkthdr.csum_flags & CSUM_TSO) || 6160 ((ifp->if_hwassist & CSUM_FRAGMENT) && 6161 (ip->ip_off & htons(IP_DF)) == 0)) { 6162 ip->ip_sum = 0; 6163 if (sw_csum & CSUM_DELAY_IP) { 6164 /* From KAME */ 6165 if (ip->ip_v == IPVERSION && 6166 (ip->ip_hl << 2) == sizeof(*ip)) { 6167 ip->ip_sum = in_cksum_hdr(ip); 6168 } else { 6169 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 6170 } 6171 } 6172 lwkt_reltoken(&pf_token); 6173 error = ifp->if_output(ifp, m0, sintosa(dst), ro->ro_rt); 6174 lwkt_gettoken(&pf_token); 6175 goto done; 6176 } 6177 6178 /* 6179 * Too large for interface; fragment if possible. 6180 * Must be able to put at least 8 bytes per fragment. 6181 */ 6182 if (ip->ip_off & htons(IP_DF)) { 6183 ipstat.ips_cantfrag++; 6184 if (r->rt != PF_DUPTO) { 6185 icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, 6186 ifp->if_mtu); 6187 goto done; 6188 } else 6189 goto bad; 6190 } 6191 6192 m1 = m0; 6193 error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum); 6194 if (error) { 6195 goto bad; 6196 } 6197 6198 for (m0 = m1; m0; m0 = m1) { 6199 m1 = m0->m_nextpkt; 6200 m0->m_nextpkt = 0; 6201 if (error == 0) { 6202 lwkt_reltoken(&pf_token); 6203 error = (*ifp->if_output)(ifp, m0, sintosa(dst), 6204 NULL); 6205 lwkt_gettoken(&pf_token); 6206 } else 6207 m_freem(m0); 6208 } 6209 6210 if (error == 0) 6211 ipstat.ips_fragmented++; 6212 6213 done: 6214 if (r->rt != PF_DUPTO) 6215 *m = NULL; 6216 if (ro == &iproute && ro->ro_rt) 6217 RTFREE(ro->ro_rt); 6218 return; 6219 6220 bad: 6221 m_freem(m0); 6222 goto done; 6223 } 6224 #endif /* INET */ 6225 6226 #ifdef INET6 6227 void 6228 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, 6229 struct pf_state *s, struct pf_pdesc *pd) 6230 { 6231 struct mbuf *m0; 6232 struct route_in6 ip6route; 6233 struct route_in6 *ro; 6234 struct sockaddr_in6 *dst; 6235 struct ip6_hdr *ip6; 6236 struct ifnet *ifp = NULL; 6237 struct pf_addr naddr; 6238 struct pf_src_node *sn = NULL; 6239 6240 if (m == NULL || *m == NULL || r == NULL || 6241 (dir != PF_IN && dir != PF_OUT) || oifp == NULL) 6242 panic("pf_route6: invalid parameters"); 6243 6244 if (((*m)->m_pkthdr.fw_flags & PF_MBUF_ROUTED) == 0) { 6245 (*m)->m_pkthdr.fw_flags |= PF_MBUF_ROUTED; 6246 (*m)->m_pkthdr.pf.routed = 1; 6247 } else { 6248 if ((*m)->m_pkthdr.pf.routed++ > 3) { 6249 m0 = *m; 6250 *m = NULL; 6251 goto bad; 6252 } 6253 } 6254 6255 if (r->rt == PF_DUPTO) { 6256 if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) 6257 return; 6258 } else { 6259 if ((r->rt == PF_REPLYTO) == (r->direction == dir)) 6260 return; 6261 m0 = *m; 6262 } 6263 6264 if (m0->m_len < sizeof(struct ip6_hdr)) { 6265 DPFPRINTF(PF_DEBUG_URGENT, 6266 ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); 6267 goto bad; 6268 } 6269 ip6 = mtod(m0, struct ip6_hdr *); 6270 6271 ro = &ip6route; 6272 bzero((caddr_t)ro, sizeof(*ro)); 6273 dst = (struct sockaddr_in6 *)&ro->ro_dst; 6274 dst->sin6_family = AF_INET6; 6275 dst->sin6_len = sizeof(*dst); 6276 dst->sin6_addr = ip6->ip6_dst; 6277 6278 /* 6279 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 6280 * so make sure pf.flags is clear. 6281 * 6282 * Cheat. XXX why only in the v6 case??? 6283 */ 6284 if (r->rt == PF_FASTROUTE) { 6285 m0->m_pkthdr.fw_flags |= PF_MBUF_TAGGED; 6286 m0->m_pkthdr.pf.flags = 0; 6287 /* XXX Re-Check when Upgrading to > 4.4 */ 6288 m0->m_pkthdr.pf.statekey = NULL; 6289 ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); 6290 return; 6291 } 6292 6293 if (TAILQ_EMPTY(&r->rpool.list)) { 6294 DPFPRINTF(PF_DEBUG_URGENT, 6295 ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); 6296 goto bad; 6297 } 6298 if (s == NULL) { 6299 pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, 6300 &naddr, NULL, &sn); 6301 if (!PF_AZERO(&naddr, AF_INET6)) 6302 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 6303 &naddr, AF_INET6); 6304 ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; 6305 } else { 6306 if (!PF_AZERO(&s->rt_addr, AF_INET6)) 6307 PF_ACPY((struct pf_addr *)&dst->sin6_addr, 6308 &s->rt_addr, AF_INET6); 6309 ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; 6310 } 6311 if (ifp == NULL) 6312 goto bad; 6313 6314 if (oifp != ifp) { 6315 if (pf_test6(PF_OUT, ifp, &m0, NULL, NULL) != PF_PASS) { 6316 goto bad; 6317 } else if (m0 == NULL) { 6318 goto done; 6319 } 6320 if (m0->m_len < sizeof(struct ip6_hdr)) { 6321 DPFPRINTF(PF_DEBUG_URGENT, 6322 ("pf_route6: m0->m_len < sizeof(struct ip6_hdr)\n")); 6323 goto bad; 6324 } 6325 ip6 = mtod(m0, struct ip6_hdr *); 6326 } 6327 6328 /* 6329 * If the packet is too large for the outgoing interface, 6330 * send back an icmp6 error. 6331 */ 6332 if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) 6333 dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); 6334 if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6335 nd6_output(ifp, ifp, m0, dst, NULL); 6336 } else { 6337 in6_ifstat_inc(ifp, ifs6_in_toobig); 6338 if (r->rt != PF_DUPTO) 6339 icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); 6340 else 6341 goto bad; 6342 } 6343 6344 done: 6345 if (r->rt != PF_DUPTO) 6346 *m = NULL; 6347 return; 6348 6349 bad: 6350 m_freem(m0); 6351 goto done; 6352 } 6353 #endif /* INET6 */ 6354 6355 6356 /* 6357 * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag 6358 * off is the offset where the protocol header starts 6359 * len is the total length of protocol header plus payload 6360 * returns 0 when the checksum is valid, otherwise returns 1. 6361 */ 6362 /* 6363 * XXX 6364 * FreeBSD supports cksum offload for the following drivers. 6365 * em(4), gx(4), lge(4), nge(4), ti(4), xl(4) 6366 * If we can make full use of it we would outperform ipfw/ipfilter in 6367 * very heavy traffic. 6368 * I have not tested 'cause I don't have NICs that supports cksum offload. 6369 * (There might be problems. Typical phenomena would be 6370 * 1. No route message for UDP packet. 6371 * 2. No connection acceptance from external hosts regardless of rule set.) 6372 */ 6373 int 6374 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, 6375 sa_family_t af) 6376 { 6377 u_int16_t sum = 0; 6378 int hw_assist = 0; 6379 struct ip *ip; 6380 6381 if (off < sizeof(struct ip) || len < sizeof(struct udphdr)) 6382 return (1); 6383 if (m->m_pkthdr.len < off + len) 6384 return (1); 6385 6386 switch (p) { 6387 case IPPROTO_TCP: 6388 case IPPROTO_UDP: 6389 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { 6390 if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { 6391 sum = m->m_pkthdr.csum_data; 6392 } else { 6393 ip = mtod(m, struct ip *); 6394 sum = in_pseudo(ip->ip_src.s_addr, 6395 ip->ip_dst.s_addr, htonl((u_short)len + 6396 m->m_pkthdr.csum_data + p)); 6397 } 6398 sum ^= 0xffff; 6399 ++hw_assist; 6400 } 6401 break; 6402 case IPPROTO_ICMP: 6403 #ifdef INET6 6404 case IPPROTO_ICMPV6: 6405 #endif /* INET6 */ 6406 break; 6407 default: 6408 return (1); 6409 } 6410 6411 if (!hw_assist) { 6412 switch (af) { 6413 case AF_INET: 6414 if (p == IPPROTO_ICMP) { 6415 if (m->m_len < off) 6416 return (1); 6417 m->m_data += off; 6418 m->m_len -= off; 6419 sum = in_cksum(m, len); 6420 m->m_data -= off; 6421 m->m_len += off; 6422 } else { 6423 if (m->m_len < sizeof(struct ip)) 6424 return (1); 6425 sum = in_cksum_range(m, p, off, len); 6426 if (sum == 0) { 6427 m->m_pkthdr.csum_flags |= 6428 (CSUM_DATA_VALID | 6429 CSUM_PSEUDO_HDR); 6430 m->m_pkthdr.csum_data = 0xffff; 6431 } 6432 } 6433 break; 6434 #ifdef INET6 6435 case AF_INET6: 6436 if (m->m_len < sizeof(struct ip6_hdr)) 6437 return (1); 6438 sum = in6_cksum(m, p, off, len); 6439 /* 6440 * XXX 6441 * IPv6 H/W cksum off-load not supported yet! 6442 * 6443 * if (sum == 0) { 6444 * m->m_pkthdr.csum_flags |= 6445 * (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); 6446 * m->m_pkthdr.csum_data = 0xffff; 6447 *} 6448 */ 6449 break; 6450 #endif /* INET6 */ 6451 default: 6452 return (1); 6453 } 6454 } 6455 if (sum) { 6456 switch (p) { 6457 case IPPROTO_TCP: 6458 tcpstat.tcps_rcvbadsum++; 6459 break; 6460 case IPPROTO_UDP: 6461 udp_stat.udps_badsum++; 6462 break; 6463 case IPPROTO_ICMP: 6464 icmpstat.icps_checksum++; 6465 break; 6466 #ifdef INET6 6467 case IPPROTO_ICMPV6: 6468 icmp6stat.icp6s_checksum++; 6469 break; 6470 #endif /* INET6 */ 6471 } 6472 return (1); 6473 } 6474 return (0); 6475 } 6476 6477 struct pf_divert * 6478 pf_find_divert(struct mbuf *m) 6479 { 6480 struct m_tag *mtag; 6481 6482 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6483 return (NULL); 6484 6485 return ((struct pf_divert *)(mtag + 1)); 6486 } 6487 6488 struct pf_divert * 6489 pf_get_divert(struct mbuf *m) 6490 { 6491 struct m_tag *mtag; 6492 6493 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6494 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6495 M_NOWAIT); 6496 if (mtag == NULL) 6497 return (NULL); 6498 bzero(mtag + 1, sizeof(struct pf_divert)); 6499 m_tag_prepend(m, mtag); 6500 } 6501 6502 return ((struct pf_divert *)(mtag + 1)); 6503 } 6504 6505 #ifdef INET 6506 6507 /* 6508 * WARNING: pf_token held shared on entry, THIS IS CPU LOCALIZED CODE 6509 */ 6510 int 6511 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, 6512 struct ether_header *eh, struct inpcb *inp) 6513 { 6514 struct pfi_kif *kif; 6515 u_short action, reason = 0, log = 0; 6516 struct mbuf *m = *m0; 6517 struct ip *h = NULL; 6518 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; 6519 struct pf_state *s = NULL; 6520 struct pf_ruleset *ruleset = NULL; 6521 struct pf_pdesc pd; 6522 int off, dirndx; 6523 #ifdef ALTQ 6524 int pqid = 0; 6525 #endif 6526 6527 if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) { 6528 /* Skip us; continue in ipfw. */ 6529 return (PF_PASS); 6530 } 6531 6532 if (!pf_status.running) 6533 return (PF_PASS); 6534 6535 memset(&pd, 0, sizeof(pd)); 6536 #ifdef foo 6537 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6538 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6539 else 6540 #endif 6541 kif = (struct pfi_kif *)ifp->if_pf_kif; 6542 6543 if (kif == NULL) { 6544 DPFPRINTF(PF_DEBUG_URGENT, 6545 ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname)); 6546 return (PF_DROP); 6547 } 6548 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6549 return (PF_PASS); 6550 6551 #ifdef DIAGNOSTIC 6552 if ((m->m_flags & M_PKTHDR) == 0) 6553 panic("non-M_PKTHDR is passed to pf_test"); 6554 #endif /* DIAGNOSTIC */ 6555 6556 if (m->m_pkthdr.len < (int)sizeof(*h)) { 6557 action = PF_DROP; 6558 REASON_SET(&reason, PFRES_SHORT); 6559 log = 1; 6560 goto done; 6561 } 6562 6563 /* 6564 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 6565 * so make sure pf.flags is clear. 6566 */ 6567 if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED) 6568 return (PF_PASS); 6569 m->m_pkthdr.pf.flags = 0; 6570 /* Re-Check when updating to > 4.4 */ 6571 m->m_pkthdr.pf.statekey = NULL; 6572 6573 /* We do IP header normalization and packet reassembly here */ 6574 if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { 6575 action = PF_DROP; 6576 goto done; 6577 } 6578 m = *m0; /* pf_normalize messes with m0 */ 6579 h = mtod(m, struct ip *); 6580 6581 off = h->ip_hl << 2; 6582 if (off < (int)sizeof(*h)) { 6583 action = PF_DROP; 6584 REASON_SET(&reason, PFRES_SHORT); 6585 log = 1; 6586 goto done; 6587 } 6588 6589 pd.src = (struct pf_addr *)&h->ip_src; 6590 pd.dst = (struct pf_addr *)&h->ip_dst; 6591 pd.sport = pd.dport = NULL; 6592 pd.ip_sum = &h->ip_sum; 6593 pd.proto_sum = NULL; 6594 pd.proto = h->ip_p; 6595 pd.dir = dir; 6596 pd.sidx = (dir == PF_IN) ? 0 : 1; 6597 pd.didx = (dir == PF_IN) ? 1 : 0; 6598 pd.af = AF_INET; 6599 pd.tos = h->ip_tos; 6600 pd.tot_len = ntohs(h->ip_len); 6601 pd.eh = eh; 6602 6603 /* handle fragments that didn't get reassembled by normalization */ 6604 if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { 6605 action = pf_test_fragment(&r, dir, kif, m, h, 6606 &pd, &a, &ruleset); 6607 goto done; 6608 } 6609 6610 switch (h->ip_p) { 6611 6612 case IPPROTO_TCP: { 6613 struct tcphdr th; 6614 6615 pd.hdr.tcp = &th; 6616 if (!pf_pull_hdr(m, off, &th, sizeof(th), 6617 &action, &reason, AF_INET)) { 6618 log = action != PF_PASS; 6619 goto done; 6620 } 6621 pd.p_len = pd.tot_len - off - (th.th_off << 2); 6622 #ifdef ALTQ 6623 if ((th.th_flags & TH_ACK) && pd.p_len == 0) 6624 pqid = 1; 6625 #endif 6626 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); 6627 if (action == PF_DROP) 6628 goto done; 6629 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, 6630 &reason); 6631 if (action == PF_PASS) { 6632 r = s->rule.ptr; 6633 a = s->anchor.ptr; 6634 log = s->log; 6635 } else if (s == NULL) { 6636 action = pf_test_rule(&r, &s, dir, kif, 6637 m, off, h, &pd, &a, 6638 &ruleset, NULL, inp); 6639 } 6640 break; 6641 } 6642 6643 case IPPROTO_UDP: { 6644 struct udphdr uh; 6645 6646 pd.hdr.udp = &uh; 6647 if (!pf_pull_hdr(m, off, &uh, sizeof(uh), 6648 &action, &reason, AF_INET)) { 6649 log = action != PF_PASS; 6650 goto done; 6651 } 6652 if (uh.uh_dport == 0 || 6653 ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || 6654 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { 6655 action = PF_DROP; 6656 REASON_SET(&reason, PFRES_SHORT); 6657 goto done; 6658 } 6659 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); 6660 if (action == PF_PASS) { 6661 r = s->rule.ptr; 6662 a = s->anchor.ptr; 6663 log = s->log; 6664 } else if (s == NULL) { 6665 action = pf_test_rule(&r, &s, dir, kif, 6666 m, off, h, &pd, &a, 6667 &ruleset, NULL, inp); 6668 } 6669 break; 6670 } 6671 6672 case IPPROTO_ICMP: { 6673 struct icmp ih; 6674 6675 pd.hdr.icmp = &ih; 6676 if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, 6677 &action, &reason, AF_INET)) { 6678 log = action != PF_PASS; 6679 goto done; 6680 } 6681 action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, 6682 &reason); 6683 if (action == PF_PASS) { 6684 r = s->rule.ptr; 6685 a = s->anchor.ptr; 6686 log = s->log; 6687 } else if (s == NULL) { 6688 action = pf_test_rule(&r, &s, dir, kif, 6689 m, off, h, &pd, &a, 6690 &ruleset, NULL, inp); 6691 } 6692 break; 6693 } 6694 6695 default: 6696 action = pf_test_state_other(&s, dir, kif, m, &pd); 6697 if (action == PF_PASS) { 6698 r = s->rule.ptr; 6699 a = s->anchor.ptr; 6700 log = s->log; 6701 } else if (s == NULL) { 6702 action = pf_test_rule(&r, &s, dir, kif, m, off, h, 6703 &pd, &a, &ruleset, NULL, inp); 6704 } 6705 break; 6706 } 6707 6708 done: 6709 if (action == PF_PASS && h->ip_hl > 5 && 6710 !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { 6711 action = PF_DROP; 6712 REASON_SET(&reason, PFRES_IPOPTIONS); 6713 log = 1; 6714 DPFPRINTF(PF_DEBUG_MISC, 6715 ("pf: dropping packet with ip options\n")); 6716 } 6717 6718 if ((s && s->tag) || r->rtableid) 6719 pf_tag_packet(m, s ? s->tag : 0, r->rtableid); 6720 6721 #if 0 6722 if (dir == PF_IN && s && s->key[PF_SK_STACK]) 6723 m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; 6724 #endif 6725 6726 #ifdef ALTQ 6727 /* 6728 * Generate a hash code and qid request for ALTQ. A qid of 0 6729 * is allowed and will cause altq to select the default queue. 6730 */ 6731 if (action == PF_PASS) { 6732 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 6733 if (pqid || (pd.tos & IPTOS_LOWDELAY)) 6734 m->m_pkthdr.pf.qid = r->pqid; 6735 else 6736 m->m_pkthdr.pf.qid = r->qid; 6737 m->m_pkthdr.pf.ecn_af = AF_INET; 6738 m->m_pkthdr.pf.hdr = h; 6739 /* add connection hash for fairq */ 6740 if (s) { 6741 /* for fairq */ 6742 m->m_pkthdr.pf.state_hash = s->hash; 6743 m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED; 6744 } 6745 } 6746 #endif /* ALTQ */ 6747 6748 /* 6749 * connections redirected to loopback should not match sockets 6750 * bound specifically to loopback due to security implications, 6751 * see tcp_input() and in_pcblookup_listen(). 6752 */ 6753 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 6754 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && 6755 (s->nat_rule.ptr->action == PF_RDR || 6756 s->nat_rule.ptr->action == PF_BINAT) && 6757 (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) 6758 { 6759 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 6760 } 6761 6762 if (dir == PF_IN && action == PF_PASS && r->divert.port) { 6763 struct pf_divert *divert; 6764 6765 if ((divert = pf_get_divert(m))) { 6766 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 6767 divert->port = r->divert.port; 6768 divert->addr.ipv4 = r->divert.addr.v4; 6769 } 6770 } 6771 6772 if (log) { 6773 struct pf_rule *lr; 6774 6775 if (s != NULL && s->nat_rule.ptr != NULL && 6776 s->nat_rule.ptr->log & PF_LOG_ALL) 6777 lr = s->nat_rule.ptr; 6778 else 6779 lr = r; 6780 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, 6781 &pd); 6782 } 6783 6784 kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; 6785 kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; 6786 6787 if (action == PF_PASS || r->action == PF_DROP) { 6788 dirndx = (dir == PF_OUT); 6789 r->packets[dirndx]++; 6790 r->bytes[dirndx] += pd.tot_len; 6791 if (a != NULL) { 6792 a->packets[dirndx]++; 6793 a->bytes[dirndx] += pd.tot_len; 6794 } 6795 if (s != NULL) { 6796 if (s->nat_rule.ptr != NULL) { 6797 s->nat_rule.ptr->packets[dirndx]++; 6798 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; 6799 } 6800 if (s->src_node != NULL) { 6801 s->src_node->packets[dirndx]++; 6802 s->src_node->bytes[dirndx] += pd.tot_len; 6803 } 6804 if (s->nat_src_node != NULL) { 6805 s->nat_src_node->packets[dirndx]++; 6806 s->nat_src_node->bytes[dirndx] += pd.tot_len; 6807 } 6808 dirndx = (dir == s->direction) ? 0 : 1; 6809 s->packets[dirndx]++; 6810 s->bytes[dirndx] += pd.tot_len; 6811 } 6812 tr = r; 6813 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; 6814 if (nr != NULL && r == &pf_default_rule) 6815 tr = nr; 6816 if (tr->src.addr.type == PF_ADDR_TABLE) 6817 pfr_update_stats(tr->src.addr.p.tbl, 6818 (s == NULL) ? pd.src : 6819 &s->key[(s->direction == PF_IN)]-> 6820 addr[(s->direction == PF_OUT)], 6821 pd.af, pd.tot_len, dir == PF_OUT, 6822 r->action == PF_PASS, tr->src.neg); 6823 if (tr->dst.addr.type == PF_ADDR_TABLE) 6824 pfr_update_stats(tr->dst.addr.p.tbl, 6825 (s == NULL) ? pd.dst : 6826 &s->key[(s->direction == PF_IN)]-> 6827 addr[(s->direction == PF_IN)], 6828 pd.af, pd.tot_len, dir == PF_OUT, 6829 r->action == PF_PASS, tr->dst.neg); 6830 } 6831 6832 6833 if (action == PF_SYNPROXY_DROP) { 6834 m_freem(*m0); 6835 *m0 = NULL; 6836 action = PF_PASS; 6837 } else if (r->rt) { 6838 /* pf_route can free the mbuf causing *m0 to become NULL */ 6839 pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); 6840 } 6841 6842 return (action); 6843 } 6844 #endif /* INET */ 6845 6846 #ifdef INET6 6847 6848 /* 6849 * WARNING: pf_token held shared on entry, THIS IS CPU LOCALIZED CODE 6850 */ 6851 int 6852 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, 6853 struct ether_header *eh, struct inpcb *inp) 6854 { 6855 struct pfi_kif *kif; 6856 u_short action, reason = 0, log = 0; 6857 struct mbuf *m = *m0, *n = NULL; 6858 struct ip6_hdr *h = NULL; 6859 struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; 6860 struct pf_state *s = NULL; 6861 struct pf_ruleset *ruleset = NULL; 6862 struct pf_pdesc pd; 6863 int off, terminal = 0, dirndx, rh_cnt = 0; 6864 6865 if (!pf_status.running) 6866 return (PF_PASS); 6867 6868 memset(&pd, 0, sizeof(pd)); 6869 #ifdef foo 6870 if (ifp->if_type == IFT_CARP && ifp->if_carpdev) 6871 kif = (struct pfi_kif *)ifp->if_carpdev->if_pf_kif; 6872 else 6873 #endif 6874 kif = (struct pfi_kif *)ifp->if_pf_kif; 6875 6876 if (kif == NULL) { 6877 DPFPRINTF(PF_DEBUG_URGENT, 6878 ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname)); 6879 return (PF_DROP); 6880 } 6881 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6882 return (PF_PASS); 6883 6884 #ifdef DIAGNOSTIC 6885 if ((m->m_flags & M_PKTHDR) == 0) 6886 panic("non-M_PKTHDR is passed to pf_test6"); 6887 #endif /* DIAGNOSTIC */ 6888 6889 if (m->m_pkthdr.len < (int)sizeof(*h)) { 6890 action = PF_DROP; 6891 REASON_SET(&reason, PFRES_SHORT); 6892 log = 1; 6893 goto done; 6894 } 6895 6896 /* 6897 * DragonFly doesn't zero the auxillary pkghdr fields, only fw_flags, 6898 * so make sure pf.flags is clear. 6899 */ 6900 if (m->m_pkthdr.fw_flags & PF_MBUF_TAGGED) 6901 return (PF_PASS); 6902 m->m_pkthdr.pf.flags = 0; 6903 /* Re-Check when updating to > 4.4 */ 6904 m->m_pkthdr.pf.statekey = NULL; 6905 6906 /* We do IP header normalization and packet reassembly here */ 6907 if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { 6908 action = PF_DROP; 6909 goto done; 6910 } 6911 m = *m0; /* pf_normalize messes with m0 */ 6912 h = mtod(m, struct ip6_hdr *); 6913 6914 #if 1 6915 /* 6916 * we do not support jumbogram yet. if we keep going, zero ip6_plen 6917 * will do something bad, so drop the packet for now. 6918 */ 6919 if (htons(h->ip6_plen) == 0) { 6920 action = PF_DROP; 6921 REASON_SET(&reason, PFRES_NORM); /*XXX*/ 6922 goto done; 6923 } 6924 #endif 6925 6926 pd.src = (struct pf_addr *)&h->ip6_src; 6927 pd.dst = (struct pf_addr *)&h->ip6_dst; 6928 pd.sport = pd.dport = NULL; 6929 pd.ip_sum = NULL; 6930 pd.proto_sum = NULL; 6931 pd.dir = dir; 6932 pd.sidx = (dir == PF_IN) ? 0 : 1; 6933 pd.didx = (dir == PF_IN) ? 1 : 0; 6934 pd.af = AF_INET6; 6935 pd.tos = 0; 6936 pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6937 pd.eh = eh; 6938 6939 off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr); 6940 pd.proto = h->ip6_nxt; 6941 do { 6942 switch (pd.proto) { 6943 case IPPROTO_FRAGMENT: 6944 action = pf_test_fragment(&r, dir, kif, m, h, 6945 &pd, &a, &ruleset); 6946 if (action == PF_DROP) 6947 REASON_SET(&reason, PFRES_FRAG); 6948 goto done; 6949 case IPPROTO_ROUTING: { 6950 struct ip6_rthdr rthdr; 6951 6952 if (rh_cnt++) { 6953 DPFPRINTF(PF_DEBUG_MISC, 6954 ("pf: IPv6 more than one rthdr\n")); 6955 action = PF_DROP; 6956 REASON_SET(&reason, PFRES_IPOPTIONS); 6957 log = 1; 6958 goto done; 6959 } 6960 if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL, 6961 &reason, pd.af)) { 6962 DPFPRINTF(PF_DEBUG_MISC, 6963 ("pf: IPv6 short rthdr\n")); 6964 action = PF_DROP; 6965 REASON_SET(&reason, PFRES_SHORT); 6966 log = 1; 6967 goto done; 6968 } 6969 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6970 DPFPRINTF(PF_DEBUG_MISC, 6971 ("pf: IPv6 rthdr0\n")); 6972 action = PF_DROP; 6973 REASON_SET(&reason, PFRES_IPOPTIONS); 6974 log = 1; 6975 goto done; 6976 } 6977 /* FALLTHROUGH */ 6978 } 6979 case IPPROTO_AH: 6980 case IPPROTO_HOPOPTS: 6981 case IPPROTO_DSTOPTS: { 6982 /* get next header and header length */ 6983 struct ip6_ext opt6; 6984 6985 if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6), 6986 NULL, &reason, pd.af)) { 6987 DPFPRINTF(PF_DEBUG_MISC, 6988 ("pf: IPv6 short opt\n")); 6989 action = PF_DROP; 6990 log = 1; 6991 goto done; 6992 } 6993 if (pd.proto == IPPROTO_AH) 6994 off += (opt6.ip6e_len + 2) * 4; 6995 else 6996 off += (opt6.ip6e_len + 1) * 8; 6997 pd.proto = opt6.ip6e_nxt; 6998 /* goto the next header */ 6999 break; 7000 } 7001 default: 7002 terminal++; 7003 break; 7004 } 7005 } while (!terminal); 7006 7007 /* if there's no routing header, use unmodified mbuf for checksumming */ 7008 if (!n) 7009 n = m; 7010 7011 switch (pd.proto) { 7012 7013 case IPPROTO_TCP: { 7014 struct tcphdr th; 7015 7016 pd.hdr.tcp = &th; 7017 if (!pf_pull_hdr(m, off, &th, sizeof(th), 7018 &action, &reason, AF_INET6)) { 7019 log = action != PF_PASS; 7020 goto done; 7021 } 7022 pd.p_len = pd.tot_len - off - (th.th_off << 2); 7023 action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); 7024 if (action == PF_DROP) 7025 goto done; 7026 action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, 7027 &reason); 7028 if (action == PF_PASS) { 7029 r = s->rule.ptr; 7030 a = s->anchor.ptr; 7031 log = s->log; 7032 } else if (s == NULL) { 7033 action = pf_test_rule(&r, &s, dir, kif, 7034 m, off, h, &pd, &a, 7035 &ruleset, NULL, inp); 7036 } 7037 break; 7038 } 7039 7040 case IPPROTO_UDP: { 7041 struct udphdr uh; 7042 7043 pd.hdr.udp = &uh; 7044 if (!pf_pull_hdr(m, off, &uh, sizeof(uh), 7045 &action, &reason, AF_INET6)) { 7046 log = action != PF_PASS; 7047 goto done; 7048 } 7049 if (uh.uh_dport == 0 || 7050 ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || 7051 ntohs(uh.uh_ulen) < sizeof(struct udphdr)) { 7052 action = PF_DROP; 7053 REASON_SET(&reason, PFRES_SHORT); 7054 goto done; 7055 } 7056 action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); 7057 if (action == PF_PASS) { 7058 r = s->rule.ptr; 7059 a = s->anchor.ptr; 7060 log = s->log; 7061 } else if (s == NULL) { 7062 action = pf_test_rule(&r, &s, dir, kif, 7063 m, off, h, &pd, &a, 7064 &ruleset, NULL, inp); 7065 } 7066 break; 7067 } 7068 7069 case IPPROTO_ICMPV6: { 7070 struct icmp6_hdr ih; 7071 7072 pd.hdr.icmp6 = &ih; 7073 if (!pf_pull_hdr(m, off, &ih, sizeof(ih), 7074 &action, &reason, AF_INET6)) { 7075 log = action != PF_PASS; 7076 goto done; 7077 } 7078 action = pf_test_state_icmp(&s, dir, kif, 7079 m, off, h, &pd, &reason); 7080 if (action == PF_PASS) { 7081 r = s->rule.ptr; 7082 a = s->anchor.ptr; 7083 log = s->log; 7084 } else if (s == NULL) { 7085 action = pf_test_rule(&r, &s, dir, kif, 7086 m, off, h, &pd, &a, 7087 &ruleset, NULL, inp); 7088 } 7089 break; 7090 } 7091 7092 default: 7093 action = pf_test_state_other(&s, dir, kif, m, &pd); 7094 if (action == PF_PASS) { 7095 r = s->rule.ptr; 7096 a = s->anchor.ptr; 7097 log = s->log; 7098 } else if (s == NULL) { 7099 action = pf_test_rule(&r, &s, dir, kif, m, off, h, 7100 &pd, &a, &ruleset, NULL, inp); 7101 } 7102 break; 7103 } 7104 7105 done: 7106 if (n != m) { 7107 m_freem(n); 7108 n = NULL; 7109 } 7110 7111 /* handle dangerous IPv6 extension headers. */ 7112 if (action == PF_PASS && rh_cnt && 7113 !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { 7114 action = PF_DROP; 7115 REASON_SET(&reason, PFRES_IPOPTIONS); 7116 log = 1; 7117 DPFPRINTF(PF_DEBUG_MISC, 7118 ("pf: dropping packet with dangerous v6 headers\n")); 7119 } 7120 7121 if ((s && s->tag) || r->rtableid) 7122 pf_tag_packet(m, s ? s->tag : 0, r->rtableid); 7123 7124 #if 0 7125 if (dir == PF_IN && s && s->key[PF_SK_STACK]) 7126 m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; 7127 #endif 7128 7129 #ifdef ALTQ 7130 /* 7131 * Generate a hash code and qid request for ALTQ. A qid of 0 7132 * is allowed and will cause altq to select the default queue. 7133 */ 7134 if (action == PF_PASS) { 7135 m->m_pkthdr.fw_flags |= PF_MBUF_STRUCTURE; 7136 if (pd.tos & IPTOS_LOWDELAY) 7137 m->m_pkthdr.pf.qid = r->pqid; 7138 else 7139 m->m_pkthdr.pf.qid = r->qid; 7140 m->m_pkthdr.pf.ecn_af = AF_INET6; 7141 m->m_pkthdr.pf.hdr = h; 7142 if (s) { 7143 /* for fairq */ 7144 m->m_pkthdr.pf.state_hash = s->hash; 7145 m->m_pkthdr.pf.flags |= PF_TAG_STATE_HASHED; 7146 } 7147 } 7148 #endif /* ALTQ */ 7149 7150 if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || 7151 pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && 7152 (s->nat_rule.ptr->action == PF_RDR || 7153 s->nat_rule.ptr->action == PF_BINAT) && 7154 IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) 7155 { 7156 m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7157 } 7158 7159 if (dir == PF_IN && action == PF_PASS && r->divert.port) { 7160 struct pf_divert *divert; 7161 7162 if ((divert = pf_get_divert(m))) { 7163 m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7164 divert->port = r->divert.port; 7165 divert->addr.ipv6 = r->divert.addr.v6; 7166 } 7167 } 7168 7169 if (log) { 7170 struct pf_rule *lr; 7171 7172 if (s != NULL && s->nat_rule.ptr != NULL && 7173 s->nat_rule.ptr->log & PF_LOG_ALL) 7174 lr = s->nat_rule.ptr; 7175 else 7176 lr = r; 7177 PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, 7178 &pd); 7179 } 7180 7181 kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; 7182 kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; 7183 7184 if (action == PF_PASS || r->action == PF_DROP) { 7185 dirndx = (dir == PF_OUT); 7186 r->packets[dirndx]++; 7187 r->bytes[dirndx] += pd.tot_len; 7188 if (a != NULL) { 7189 a->packets[dirndx]++; 7190 a->bytes[dirndx] += pd.tot_len; 7191 } 7192 if (s != NULL) { 7193 if (s->nat_rule.ptr != NULL) { 7194 s->nat_rule.ptr->packets[dirndx]++; 7195 s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; 7196 } 7197 if (s->src_node != NULL) { 7198 s->src_node->packets[dirndx]++; 7199 s->src_node->bytes[dirndx] += pd.tot_len; 7200 } 7201 if (s->nat_src_node != NULL) { 7202 s->nat_src_node->packets[dirndx]++; 7203 s->nat_src_node->bytes[dirndx] += pd.tot_len; 7204 } 7205 dirndx = (dir == s->direction) ? 0 : 1; 7206 s->packets[dirndx]++; 7207 s->bytes[dirndx] += pd.tot_len; 7208 } 7209 tr = r; 7210 nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; 7211 if (nr != NULL && r == &pf_default_rule) 7212 tr = nr; 7213 if (tr->src.addr.type == PF_ADDR_TABLE) 7214 pfr_update_stats(tr->src.addr.p.tbl, 7215 (s == NULL) ? pd.src : 7216 &s->key[(s->direction == PF_IN)]->addr[0], 7217 pd.af, pd.tot_len, dir == PF_OUT, 7218 r->action == PF_PASS, tr->src.neg); 7219 if (tr->dst.addr.type == PF_ADDR_TABLE) 7220 pfr_update_stats(tr->dst.addr.p.tbl, 7221 (s == NULL) ? pd.dst : 7222 &s->key[(s->direction == PF_IN)]->addr[1], 7223 pd.af, pd.tot_len, dir == PF_OUT, 7224 r->action == PF_PASS, tr->dst.neg); 7225 } 7226 7227 7228 if (action == PF_SYNPROXY_DROP) { 7229 m_freem(*m0); 7230 *m0 = NULL; 7231 action = PF_PASS; 7232 } else if (r->rt) 7233 /* pf_route6 can free the mbuf causing *m0 to become NULL */ 7234 pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); 7235 7236 return (action); 7237 } 7238 #endif /* INET6 */ 7239 7240 int 7241 pf_check_congestion(struct ifqueue *ifq) 7242 { 7243 return (0); 7244 } 7245