1 /* $OpenBSD: pf.c,v 1.1122 2021/07/07 18:38:25 sashan Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "carp.h" 40 #include "pflog.h" 41 #include "pfsync.h" 42 #include "pflow.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/mbuf.h> 47 #include <sys/filio.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/time.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 #include <sys/rwlock.h> 55 #include <sys/syslog.h> 56 57 #include <crypto/sha2.h> 58 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_types.h> 62 #include <net/route.h> 63 64 #include <netinet/in.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/in_pcb.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp_var.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_timer.h> 74 #include <netinet/tcp_var.h> 75 #include <netinet/tcp_fsm.h> 76 #include <netinet/udp.h> 77 #include <netinet/udp_var.h> 78 #include <netinet/ip_divert.h> 79 80 #ifdef INET6 81 #include <netinet6/in6_var.h> 82 #include <netinet/ip6.h> 83 #include <netinet6/ip6_var.h> 84 #include <netinet/icmp6.h> 85 #include <netinet6/nd6.h> 86 #include <netinet6/ip6_divert.h> 87 #endif /* INET6 */ 88 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 92 #if NPFLOG > 0 93 #include <net/if_pflog.h> 94 #endif /* NPFLOG > 0 */ 95 96 #if NPFLOW > 0 97 #include <net/if_pflow.h> 98 #endif /* NPFLOW > 0 */ 99 100 #if NPFSYNC > 0 101 #include <net/if_pfsync.h> 102 #else 103 struct pfsync_deferral; 104 #endif /* NPFSYNC > 0 */ 105 106 #ifdef DDB 107 #include <machine/db_machdep.h> 108 #include <ddb/db_interface.h> 109 #endif 110 111 /* 112 * Global variables 113 */ 114 struct pf_state_tree pf_statetbl; 115 struct pf_queuehead pf_queues[2]; 116 struct pf_queuehead *pf_queues_active; 117 struct pf_queuehead *pf_queues_inactive; 118 119 struct pf_status pf_status; 120 121 int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ 122 123 SHA2_CTX pf_tcp_secret_ctx; 124 u_char pf_tcp_secret[16]; 125 int pf_tcp_secret_init; 126 int pf_tcp_iss_off; 127 128 int pf_npurge; 129 struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); 130 struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); 131 132 enum pf_test_status { 133 PF_TEST_FAIL = -1, 134 PF_TEST_OK, 135 PF_TEST_QUICK 136 }; 137 138 struct pf_test_ctx { 139 enum pf_test_status test_status; 140 struct pf_pdesc *pd; 141 struct pf_rule_actions act; 142 u_int8_t icmpcode; 143 u_int8_t icmptype; 144 int icmp_dir; 145 int state_icmp; 146 int tag; 147 u_short reason; 148 struct pf_rule_item *ri; 149 struct pf_src_node *sns[PF_SN_MAX]; 150 struct pf_rule_slist rules; 151 struct pf_rule *nr; 152 struct pf_rule **rm; 153 struct pf_rule *a; 154 struct pf_rule **am; 155 struct pf_ruleset **rsm; 156 struct pf_ruleset *arsm; 157 struct pf_ruleset *aruleset; 158 struct tcphdr *th; 159 int depth; 160 }; 161 162 #define PF_ANCHOR_STACK_MAX 64 163 164 struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 165 struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 166 struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; 167 168 void pf_add_threshold(struct pf_threshold *); 169 int pf_check_threshold(struct pf_threshold *); 170 int pf_check_tcp_cksum(struct mbuf *, int, int, 171 sa_family_t); 172 static __inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 173 u_int8_t); 174 void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 175 const struct pf_addr *, sa_family_t, u_int8_t); 176 int pf_modulate_sack(struct pf_pdesc *, 177 struct pf_state_peer *); 178 int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 179 u_int16_t *, u_int16_t *); 180 int pf_change_icmp_af(struct mbuf *, int, 181 struct pf_pdesc *, struct pf_pdesc *, 182 struct pf_addr *, struct pf_addr *, sa_family_t, 183 sa_family_t); 184 int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 185 struct pf_addr *); 186 void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 187 u_int16_t *, struct pf_addr *, struct pf_addr *, 188 u_int16_t); 189 int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 190 void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, 191 sa_family_t, struct pf_rule *, u_int); 192 void pf_detach_state(struct pf_state *); 193 void pf_state_key_detach(struct pf_state *, int); 194 u_int32_t pf_tcp_iss(struct pf_pdesc *); 195 void pf_rule_to_actions(struct pf_rule *, 196 struct pf_rule_actions *); 197 int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 198 struct pf_state **, struct pf_rule **, 199 struct pf_ruleset **, u_short *, 200 struct pfsync_deferral **); 201 static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 202 struct pf_rule *, struct pf_rule *, 203 struct pf_state_key **, struct pf_state_key **, 204 int *, struct pf_state **, int, 205 struct pf_rule_slist *, struct pf_rule_actions *, 206 struct pf_src_node *[]); 207 static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 208 int, struct pf_addr *, int, struct pf_addr *, 209 int, int); 210 int pf_state_key_setup(struct pf_pdesc *, struct 211 pf_state_key **, struct pf_state_key **, int); 212 int pf_tcp_track_full(struct pf_pdesc *, 213 struct pf_state **, u_short *, int *, int); 214 int pf_tcp_track_sloppy(struct pf_pdesc *, 215 struct pf_state **, u_short *); 216 static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 217 u_short *); 218 int pf_test_state(struct pf_pdesc *, struct pf_state **, 219 u_short *, int); 220 int pf_icmp_state_lookup(struct pf_pdesc *, 221 struct pf_state_key_cmp *, struct pf_state **, 222 u_int16_t, u_int16_t, int, int *, int, int); 223 int pf_test_state_icmp(struct pf_pdesc *, 224 struct pf_state **, u_short *); 225 u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 226 u_int16_t); 227 static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 228 sa_family_t, struct pf_src_node **); 229 struct pf_divert *pf_get_divert(struct mbuf *); 230 int pf_walk_header(struct pf_pdesc *, struct ip *, 231 u_short *); 232 int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 233 int, int, u_short *); 234 int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 235 u_short *); 236 void pf_print_state_parts(struct pf_state *, 237 struct pf_state_key *, struct pf_state_key *); 238 int pf_addr_wrap_neq(struct pf_addr_wrap *, 239 struct pf_addr_wrap *); 240 int pf_compare_state_keys(struct pf_state_key *, 241 struct pf_state_key *, struct pfi_kif *, u_int); 242 int pf_find_state(struct pf_pdesc *, 243 struct pf_state_key_cmp *, struct pf_state **); 244 int pf_src_connlimit(struct pf_state **); 245 int pf_match_rcvif(struct mbuf *, struct pf_rule *); 246 int pf_step_into_anchor(struct pf_test_ctx *, 247 struct pf_rule *); 248 int pf_match_rule(struct pf_test_ctx *, 249 struct pf_ruleset *); 250 void pf_counters_inc(int, struct pf_pdesc *, 251 struct pf_state *, struct pf_rule *, 252 struct pf_rule *); 253 254 int pf_state_key_isvalid(struct pf_state_key *); 255 struct pf_state_key *pf_state_key_ref(struct pf_state_key *); 256 void pf_state_key_unref(struct pf_state_key *); 257 void pf_state_key_link_reverse(struct pf_state_key *, 258 struct pf_state_key *); 259 void pf_state_key_unlink_reverse(struct pf_state_key *); 260 void pf_state_key_link_inpcb(struct pf_state_key *, 261 struct inpcb *); 262 void pf_state_key_unlink_inpcb(struct pf_state_key *); 263 void pf_inpcb_unlink_state_key(struct inpcb *); 264 void pf_pktenqueue_delayed(void *); 265 int32_t pf_state_expires(const struct pf_state *, uint8_t); 266 267 #if NPFLOG > 0 268 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 269 struct pf_rule *, struct pf_ruleset *, 270 struct pf_rule_slist *); 271 #endif /* NPFLOG > 0 */ 272 273 extern struct pool pfr_ktable_pl; 274 extern struct pool pfr_kentry_pl; 275 276 struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 277 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 278 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 279 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 280 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 281 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT }, 282 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS } 283 }; 284 285 #define BOUND_IFACE(r, k) \ 286 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 287 288 #define STATE_INC_COUNTERS(s) \ 289 do { \ 290 struct pf_rule_item *mrm; \ 291 s->rule.ptr->states_cur++; \ 292 s->rule.ptr->states_tot++; \ 293 if (s->anchor.ptr != NULL) { \ 294 s->anchor.ptr->states_cur++; \ 295 s->anchor.ptr->states_tot++; \ 296 } \ 297 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 298 mrm->r->states_cur++; \ 299 } while (0) 300 301 static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 302 static __inline int pf_state_compare_key(struct pf_state_key *, 303 struct pf_state_key *); 304 static __inline int pf_state_compare_id(struct pf_state *, 305 struct pf_state *); 306 #ifdef INET6 307 static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 308 static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 309 #endif /* INET6 */ 310 static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); 311 312 struct pf_src_tree tree_src_tracking; 313 314 struct pf_state_tree_id tree_id; 315 struct pf_state_list pf_state_list = PF_STATE_LIST_INITIALIZER(pf_state_list); 316 317 RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 318 RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); 319 RB_GENERATE(pf_state_tree_id, pf_state, 320 entry_id, pf_state_compare_id); 321 322 SLIST_HEAD(pf_rule_gcl, pf_rule) pf_rule_gcl = 323 SLIST_HEAD_INITIALIZER(pf_rule_gcl); 324 325 __inline int 326 pf_addr_compare(struct pf_addr *a, struct pf_addr *b, sa_family_t af) 327 { 328 switch (af) { 329 case AF_INET: 330 if (a->addr32[0] > b->addr32[0]) 331 return (1); 332 if (a->addr32[0] < b->addr32[0]) 333 return (-1); 334 break; 335 #ifdef INET6 336 case AF_INET6: 337 if (a->addr32[3] > b->addr32[3]) 338 return (1); 339 if (a->addr32[3] < b->addr32[3]) 340 return (-1); 341 if (a->addr32[2] > b->addr32[2]) 342 return (1); 343 if (a->addr32[2] < b->addr32[2]) 344 return (-1); 345 if (a->addr32[1] > b->addr32[1]) 346 return (1); 347 if (a->addr32[1] < b->addr32[1]) 348 return (-1); 349 if (a->addr32[0] > b->addr32[0]) 350 return (1); 351 if (a->addr32[0] < b->addr32[0]) 352 return (-1); 353 break; 354 #endif /* INET6 */ 355 } 356 return (0); 357 } 358 359 static __inline int 360 pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 361 { 362 int diff; 363 364 if (a->rule.ptr > b->rule.ptr) 365 return (1); 366 if (a->rule.ptr < b->rule.ptr) 367 return (-1); 368 if ((diff = a->type - b->type) != 0) 369 return (diff); 370 if ((diff = a->af - b->af) != 0) 371 return (diff); 372 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 373 return (diff); 374 return (0); 375 } 376 377 static __inline void 378 pf_set_protostate(struct pf_state *s, int which, u_int8_t newstate) 379 { 380 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 381 s->dst.state = newstate; 382 if (which == PF_PEER_DST) 383 return; 384 385 if (s->src.state == newstate) 386 return; 387 if (s->creatorid == pf_status.hostid && s->key[PF_SK_STACK] != NULL && 388 s->key[PF_SK_STACK]->proto == IPPROTO_TCP && 389 !(TCPS_HAVEESTABLISHED(s->src.state) || 390 s->src.state == TCPS_CLOSED) && 391 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 392 pf_status.states_halfopen--; 393 394 s->src.state = newstate; 395 } 396 397 void 398 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 399 { 400 switch (af) { 401 case AF_INET: 402 dst->addr32[0] = src->addr32[0]; 403 break; 404 #ifdef INET6 405 case AF_INET6: 406 dst->addr32[0] = src->addr32[0]; 407 dst->addr32[1] = src->addr32[1]; 408 dst->addr32[2] = src->addr32[2]; 409 dst->addr32[3] = src->addr32[3]; 410 break; 411 #endif /* INET6 */ 412 default: 413 unhandled_af(af); 414 } 415 } 416 417 void 418 pf_init_threshold(struct pf_threshold *threshold, 419 u_int32_t limit, u_int32_t seconds) 420 { 421 threshold->limit = limit * PF_THRESHOLD_MULT; 422 threshold->seconds = seconds; 423 threshold->count = 0; 424 threshold->last = getuptime(); 425 } 426 427 void 428 pf_add_threshold(struct pf_threshold *threshold) 429 { 430 u_int32_t t = getuptime(), diff = t - threshold->last; 431 432 if (diff >= threshold->seconds) 433 threshold->count = 0; 434 else 435 threshold->count -= threshold->count * diff / 436 threshold->seconds; 437 threshold->count += PF_THRESHOLD_MULT; 438 threshold->last = t; 439 } 440 441 int 442 pf_check_threshold(struct pf_threshold *threshold) 443 { 444 return (threshold->count > threshold->limit); 445 } 446 447 void 448 pf_state_list_insert(struct pf_state_list *pfs, struct pf_state *st) 449 { 450 /* 451 * we can always put states on the end of the list. 452 * 453 * things reading the list should take a read lock, then 454 * the mutex, get the head and tail pointers, release the 455 * mutex, and then they can iterate between the head and tail. 456 */ 457 458 pf_state_ref(st); /* get a ref for the list */ 459 460 mtx_enter(&pfs->pfs_mtx); 461 TAILQ_INSERT_TAIL(&pfs->pfs_list, st, entry_list); 462 mtx_leave(&pfs->pfs_mtx); 463 } 464 465 void 466 pf_state_list_remove(struct pf_state_list *pfs, struct pf_state *st) 467 { 468 /* states can only be removed when the write lock is held */ 469 rw_assert_wrlock(&pfs->pfs_rwl); 470 471 mtx_enter(&pfs->pfs_mtx); 472 TAILQ_REMOVE(&pfs->pfs_list, st, entry_list); 473 mtx_leave(&pfs->pfs_mtx); 474 475 pf_state_unref(st); /* list no longer references the state */ 476 } 477 478 int 479 pf_src_connlimit(struct pf_state **state) 480 { 481 int bad = 0; 482 struct pf_src_node *sn; 483 484 if ((sn = pf_get_src_node((*state), PF_SN_NONE)) == NULL) 485 return (0); 486 487 sn->conn++; 488 (*state)->src.tcp_est = 1; 489 pf_add_threshold(&sn->conn_rate); 490 491 if ((*state)->rule.ptr->max_src_conn && 492 (*state)->rule.ptr->max_src_conn < sn->conn) { 493 pf_status.lcounters[LCNT_SRCCONN]++; 494 bad++; 495 } 496 497 if ((*state)->rule.ptr->max_src_conn_rate.limit && 498 pf_check_threshold(&sn->conn_rate)) { 499 pf_status.lcounters[LCNT_SRCCONNRATE]++; 500 bad++; 501 } 502 503 if (!bad) 504 return (0); 505 506 if ((*state)->rule.ptr->overload_tbl) { 507 struct pfr_addr p; 508 u_int32_t killed = 0; 509 510 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 511 if (pf_status.debug >= LOG_NOTICE) { 512 log(LOG_NOTICE, 513 "pf: pf_src_connlimit: blocking address "); 514 pf_print_host(&sn->addr, 0, 515 (*state)->key[PF_SK_WIRE]->af); 516 } 517 518 memset(&p, 0, sizeof(p)); 519 p.pfra_af = (*state)->key[PF_SK_WIRE]->af; 520 switch ((*state)->key[PF_SK_WIRE]->af) { 521 case AF_INET: 522 p.pfra_net = 32; 523 p.pfra_ip4addr = sn->addr.v4; 524 break; 525 #ifdef INET6 526 case AF_INET6: 527 p.pfra_net = 128; 528 p.pfra_ip6addr = sn->addr.v6; 529 break; 530 #endif /* INET6 */ 531 } 532 533 pfr_insert_kentry((*state)->rule.ptr->overload_tbl, 534 &p, gettime()); 535 536 /* kill existing states if that's required. */ 537 if ((*state)->rule.ptr->flush) { 538 struct pf_state_key *sk; 539 struct pf_state *st; 540 541 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 542 RB_FOREACH(st, pf_state_tree_id, &tree_id) { 543 sk = st->key[PF_SK_WIRE]; 544 /* 545 * Kill states from this source. (Only those 546 * from the same rule if PF_FLUSH_GLOBAL is not 547 * set) 548 */ 549 if (sk->af == 550 (*state)->key[PF_SK_WIRE]->af && 551 (((*state)->direction == PF_OUT && 552 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 553 ((*state)->direction == PF_IN && 554 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 555 ((*state)->rule.ptr->flush & 556 PF_FLUSH_GLOBAL || 557 (*state)->rule.ptr == st->rule.ptr)) { 558 st->timeout = PFTM_PURGE; 559 pf_set_protostate(st, PF_PEER_BOTH, 560 TCPS_CLOSED); 561 killed++; 562 } 563 } 564 if (pf_status.debug >= LOG_NOTICE) 565 addlog(", %u states killed", killed); 566 } 567 if (pf_status.debug >= LOG_NOTICE) 568 addlog("\n"); 569 } 570 571 /* kill this state */ 572 (*state)->timeout = PFTM_PURGE; 573 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_CLOSED); 574 return (1); 575 } 576 577 int 578 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 579 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 580 struct pf_addr *raddr, struct pfi_kif *kif) 581 { 582 struct pf_src_node k; 583 584 if (*sn == NULL) { 585 k.af = af; 586 k.type = type; 587 pf_addrcpy(&k.addr, src, af); 588 k.rule.ptr = rule; 589 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 590 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 591 } 592 if (*sn == NULL) { 593 if (!rule->max_src_nodes || 594 rule->src_nodes < rule->max_src_nodes) 595 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 596 else 597 pf_status.lcounters[LCNT_SRCNODES]++; 598 if ((*sn) == NULL) 599 return (-1); 600 601 pf_init_threshold(&(*sn)->conn_rate, 602 rule->max_src_conn_rate.limit, 603 rule->max_src_conn_rate.seconds); 604 605 (*sn)->type = type; 606 (*sn)->af = af; 607 (*sn)->rule.ptr = rule; 608 pf_addrcpy(&(*sn)->addr, src, af); 609 if (raddr) 610 pf_addrcpy(&(*sn)->raddr, raddr, af); 611 if (RB_INSERT(pf_src_tree, 612 &tree_src_tracking, *sn) != NULL) { 613 if (pf_status.debug >= LOG_NOTICE) { 614 log(LOG_NOTICE, 615 "pf: src_tree insert failed: "); 616 pf_print_host(&(*sn)->addr, 0, af); 617 addlog("\n"); 618 } 619 pool_put(&pf_src_tree_pl, *sn); 620 return (-1); 621 } 622 (*sn)->creation = getuptime(); 623 (*sn)->rule.ptr->src_nodes++; 624 if (kif != NULL) { 625 (*sn)->kif = kif; 626 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE); 627 } 628 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 629 pf_status.src_nodes++; 630 } else { 631 if (rule->max_src_states && 632 (*sn)->states >= rule->max_src_states) { 633 pf_status.lcounters[LCNT_SRCSTATES]++; 634 return (-1); 635 } 636 } 637 return (0); 638 } 639 640 void 641 pf_remove_src_node(struct pf_src_node *sn) 642 { 643 if (sn->states > 0 || sn->expire > getuptime()) 644 return; 645 646 sn->rule.ptr->src_nodes--; 647 if (sn->rule.ptr->states_cur == 0 && 648 sn->rule.ptr->src_nodes == 0) 649 pf_rm_rule(NULL, sn->rule.ptr); 650 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 651 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 652 pf_status.src_nodes--; 653 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE); 654 pool_put(&pf_src_tree_pl, sn); 655 } 656 657 struct pf_src_node * 658 pf_get_src_node(struct pf_state *s, enum pf_sn_types type) 659 { 660 struct pf_sn_item *sni; 661 662 SLIST_FOREACH(sni, &s->src_nodes, next) 663 if (sni->sn->type == type) 664 return (sni->sn); 665 return (NULL); 666 } 667 668 void 669 pf_state_rm_src_node(struct pf_state *s, struct pf_src_node *sn) 670 { 671 struct pf_sn_item *sni, *snin, *snip = NULL; 672 673 for (sni = SLIST_FIRST(&s->src_nodes); sni; sni = snin) { 674 snin = SLIST_NEXT(sni, next); 675 if (sni->sn == sn) { 676 if (snip) 677 SLIST_REMOVE_AFTER(snip, next); 678 else 679 SLIST_REMOVE_HEAD(&s->src_nodes, next); 680 pool_put(&pf_sn_item_pl, sni); 681 sni = NULL; 682 sn->states--; 683 } 684 if (sni != NULL) 685 snip = sni; 686 } 687 } 688 689 /* state table stuff */ 690 691 static __inline int 692 pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) 693 { 694 int diff; 695 696 if ((diff = a->proto - b->proto) != 0) 697 return (diff); 698 if ((diff = a->af - b->af) != 0) 699 return (diff); 700 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 701 return (diff); 702 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 703 return (diff); 704 if ((diff = a->port[0] - b->port[0]) != 0) 705 return (diff); 706 if ((diff = a->port[1] - b->port[1]) != 0) 707 return (diff); 708 if ((diff = a->rdomain - b->rdomain) != 0) 709 return (diff); 710 return (0); 711 } 712 713 static __inline int 714 pf_state_compare_id(struct pf_state *a, struct pf_state *b) 715 { 716 if (a->id > b->id) 717 return (1); 718 if (a->id < b->id) 719 return (-1); 720 if (a->creatorid > b->creatorid) 721 return (1); 722 if (a->creatorid < b->creatorid) 723 return (-1); 724 725 return (0); 726 } 727 728 int 729 pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) 730 { 731 struct pf_state_item *si; 732 struct pf_state_key *cur; 733 struct pf_state *olds = NULL; 734 735 KASSERT(s->key[idx] == NULL); 736 if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { 737 /* key exists. check for same kif, if none, add to key */ 738 TAILQ_FOREACH(si, &cur->states, entry) 739 if (si->s->kif == s->kif && 740 ((si->s->key[PF_SK_WIRE]->af == sk->af && 741 si->s->direction == s->direction) || 742 (si->s->key[PF_SK_WIRE]->af != 743 si->s->key[PF_SK_STACK]->af && 744 sk->af == si->s->key[PF_SK_STACK]->af && 745 si->s->direction != s->direction))) { 746 int reuse = 0; 747 748 if (sk->proto == IPPROTO_TCP && 749 si->s->src.state >= TCPS_FIN_WAIT_2 && 750 si->s->dst.state >= TCPS_FIN_WAIT_2) 751 reuse = 1; 752 if (pf_status.debug >= LOG_NOTICE) { 753 log(LOG_NOTICE, 754 "pf: %s key attach %s on %s: ", 755 (idx == PF_SK_WIRE) ? 756 "wire" : "stack", 757 reuse ? "reuse" : "failed", 758 s->kif->pfik_name); 759 pf_print_state_parts(s, 760 (idx == PF_SK_WIRE) ? sk : NULL, 761 (idx == PF_SK_STACK) ? sk : NULL); 762 addlog(", existing: "); 763 pf_print_state_parts(si->s, 764 (idx == PF_SK_WIRE) ? sk : NULL, 765 (idx == PF_SK_STACK) ? sk : NULL); 766 addlog("\n"); 767 } 768 if (reuse) { 769 pf_set_protostate(si->s, PF_PEER_BOTH, 770 TCPS_CLOSED); 771 /* remove late or sks can go away */ 772 olds = si->s; 773 } else { 774 pool_put(&pf_state_key_pl, sk); 775 return (-1); /* collision! */ 776 } 777 } 778 pool_put(&pf_state_key_pl, sk); 779 s->key[idx] = cur; 780 } else 781 s->key[idx] = sk; 782 783 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 784 pf_state_key_detach(s, idx); 785 return (-1); 786 } 787 si->s = s; 788 789 /* list is sorted, if-bound states before floating */ 790 if (s->kif == pfi_all) 791 TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); 792 else 793 TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); 794 795 if (olds) 796 pf_remove_state(olds); 797 798 return (0); 799 } 800 801 void 802 pf_detach_state(struct pf_state *s) 803 { 804 if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) 805 s->key[PF_SK_WIRE] = NULL; 806 807 if (s->key[PF_SK_STACK] != NULL) 808 pf_state_key_detach(s, PF_SK_STACK); 809 810 if (s->key[PF_SK_WIRE] != NULL) 811 pf_state_key_detach(s, PF_SK_WIRE); 812 } 813 814 void 815 pf_state_key_detach(struct pf_state *s, int idx) 816 { 817 struct pf_state_item *si; 818 struct pf_state_key *sk; 819 820 if (s->key[idx] == NULL) 821 return; 822 823 si = TAILQ_FIRST(&s->key[idx]->states); 824 while (si && si->s != s) 825 si = TAILQ_NEXT(si, entry); 826 827 if (si) { 828 TAILQ_REMOVE(&s->key[idx]->states, si, entry); 829 pool_put(&pf_state_item_pl, si); 830 } 831 832 sk = s->key[idx]; 833 s->key[idx] = NULL; 834 if (TAILQ_EMPTY(&sk->states)) { 835 RB_REMOVE(pf_state_tree, &pf_statetbl, sk); 836 sk->removed = 1; 837 pf_state_key_unlink_reverse(sk); 838 pf_state_key_unlink_inpcb(sk); 839 pf_state_key_unref(sk); 840 } 841 } 842 843 struct pf_state_key * 844 pf_alloc_state_key(int pool_flags) 845 { 846 struct pf_state_key *sk; 847 848 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 849 return (NULL); 850 TAILQ_INIT(&sk->states); 851 852 return (sk); 853 } 854 855 static __inline int 856 pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 857 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 858 { 859 struct pf_state_key_cmp *key = arg; 860 #ifdef INET6 861 struct pf_addr *target; 862 863 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 864 goto copy; 865 866 switch (pd->hdr.icmp6.icmp6_type) { 867 case ND_NEIGHBOR_SOLICIT: 868 if (multi) 869 return (-1); 870 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 871 daddr = target; 872 break; 873 case ND_NEIGHBOR_ADVERT: 874 if (multi) 875 return (-1); 876 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 877 saddr = target; 878 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 879 key->addr[didx].addr32[0] = 0; 880 key->addr[didx].addr32[1] = 0; 881 key->addr[didx].addr32[2] = 0; 882 key->addr[didx].addr32[3] = 0; 883 daddr = NULL; /* overwritten */ 884 } 885 break; 886 default: 887 if (multi) { 888 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 889 key->addr[sidx].addr32[1] = 0; 890 key->addr[sidx].addr32[2] = 0; 891 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 892 saddr = NULL; /* overwritten */ 893 } 894 } 895 copy: 896 #endif /* INET6 */ 897 if (saddr) 898 pf_addrcpy(&key->addr[sidx], saddr, af); 899 if (daddr) 900 pf_addrcpy(&key->addr[didx], daddr, af); 901 902 return (0); 903 } 904 905 int 906 pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 907 struct pf_state_key **sks, int rtableid) 908 { 909 /* if returning error we MUST pool_put state keys ourselves */ 910 struct pf_state_key *sk1, *sk2; 911 u_int wrdom = pd->rdomain; 912 int afto = pd->af != pd->naf; 913 914 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 915 return (ENOMEM); 916 917 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 918 pd->af, 0); 919 sk1->port[pd->sidx] = pd->osport; 920 sk1->port[pd->didx] = pd->odport; 921 sk1->proto = pd->proto; 922 sk1->af = pd->af; 923 sk1->rdomain = pd->rdomain; 924 PF_REF_INIT(sk1->refcnt); 925 sk1->removed = 0; 926 if (rtableid >= 0) 927 wrdom = rtable_l2(rtableid); 928 929 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 930 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 931 pd->nsport != pd->osport || pd->ndport != pd->odport || 932 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 933 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 934 pool_put(&pf_state_key_pl, sk1); 935 return (ENOMEM); 936 } 937 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 938 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 939 pd->naf, 0); 940 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 941 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 942 if (afto) { 943 switch (pd->proto) { 944 case IPPROTO_ICMP: 945 sk2->proto = IPPROTO_ICMPV6; 946 break; 947 case IPPROTO_ICMPV6: 948 sk2->proto = IPPROTO_ICMP; 949 break; 950 default: 951 sk2->proto = pd->proto; 952 } 953 } else 954 sk2->proto = pd->proto; 955 sk2->af = pd->naf; 956 sk2->rdomain = wrdom; 957 PF_REF_INIT(sk2->refcnt); 958 sk2->removed = 0; 959 } else 960 sk2 = sk1; 961 962 if (pd->dir == PF_IN) { 963 *skw = sk1; 964 *sks = sk2; 965 } else { 966 *sks = sk1; 967 *skw = sk2; 968 } 969 970 if (pf_status.debug >= LOG_DEBUG) { 971 log(LOG_DEBUG, "pf: key setup: "); 972 pf_print_state_parts(NULL, *skw, *sks); 973 addlog("\n"); 974 } 975 976 return (0); 977 } 978 979 int 980 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, 981 struct pf_state_key **sks, struct pf_state *s) 982 { 983 PF_ASSERT_LOCKED(); 984 985 s->kif = kif; 986 PF_STATE_ENTER_WRITE(); 987 if (*skw == *sks) { 988 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 989 PF_STATE_EXIT_WRITE(); 990 return (-1); 991 } 992 *skw = *sks = s->key[PF_SK_WIRE]; 993 s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; 994 } else { 995 if (pf_state_key_attach(*skw, s, PF_SK_WIRE)) { 996 pool_put(&pf_state_key_pl, *sks); 997 PF_STATE_EXIT_WRITE(); 998 return (-1); 999 } 1000 *skw = s->key[PF_SK_WIRE]; 1001 if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { 1002 pf_state_key_detach(s, PF_SK_WIRE); 1003 PF_STATE_EXIT_WRITE(); 1004 return (-1); 1005 } 1006 *sks = s->key[PF_SK_STACK]; 1007 } 1008 1009 if (s->id == 0 && s->creatorid == 0) { 1010 s->id = htobe64(pf_status.stateid++); 1011 s->creatorid = pf_status.hostid; 1012 } 1013 if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { 1014 if (pf_status.debug >= LOG_NOTICE) { 1015 log(LOG_NOTICE, "pf: state insert failed: " 1016 "id: %016llx creatorid: %08x", 1017 betoh64(s->id), ntohl(s->creatorid)); 1018 addlog("\n"); 1019 } 1020 pf_detach_state(s); 1021 PF_STATE_EXIT_WRITE(); 1022 return (-1); 1023 } 1024 pf_state_list_insert(&pf_state_list, s); 1025 pf_status.fcounters[FCNT_STATE_INSERT]++; 1026 pf_status.states++; 1027 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 1028 PF_STATE_EXIT_WRITE(); 1029 #if NPFSYNC > 0 1030 pfsync_insert_state(s); 1031 #endif /* NPFSYNC > 0 */ 1032 return (0); 1033 } 1034 1035 struct pf_state * 1036 pf_find_state_byid(struct pf_state_cmp *key) 1037 { 1038 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1039 1040 return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 1041 } 1042 1043 int 1044 pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 1045 struct pfi_kif *kif, u_int dir) 1046 { 1047 /* a (from hdr) and b (new) must be exact opposites of each other */ 1048 if (a->af == b->af && a->proto == b->proto && 1049 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 1050 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 1051 a->port[0] == b->port[1] && 1052 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 1053 return (0); 1054 else { 1055 /* mismatch. must not happen. */ 1056 if (pf_status.debug >= LOG_ERR) { 1057 log(LOG_ERR, 1058 "pf: state key linking mismatch! dir=%s, " 1059 "if=%s, stored af=%u, a0: ", 1060 dir == PF_OUT ? "OUT" : "IN", 1061 kif->pfik_name, a->af); 1062 pf_print_host(&a->addr[0], a->port[0], a->af); 1063 addlog(", a1: "); 1064 pf_print_host(&a->addr[1], a->port[1], a->af); 1065 addlog(", proto=%u", a->proto); 1066 addlog(", found af=%u, a0: ", b->af); 1067 pf_print_host(&b->addr[0], b->port[0], b->af); 1068 addlog(", a1: "); 1069 pf_print_host(&b->addr[1], b->port[1], b->af); 1070 addlog(", proto=%u", b->proto); 1071 addlog("\n"); 1072 } 1073 return (-1); 1074 } 1075 } 1076 1077 int 1078 pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 1079 struct pf_state **state) 1080 { 1081 struct pf_state_key *sk, *pkt_sk, *inp_sk; 1082 struct pf_state_item *si; 1083 struct pf_state *s = NULL; 1084 1085 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1086 if (pf_status.debug >= LOG_DEBUG) { 1087 log(LOG_DEBUG, "pf: key search, %s on %s: ", 1088 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name); 1089 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1090 addlog("\n"); 1091 } 1092 1093 inp_sk = NULL; 1094 pkt_sk = NULL; 1095 sk = NULL; 1096 if (pd->dir == PF_OUT) { 1097 /* first if block deals with outbound forwarded packet */ 1098 pkt_sk = pd->m->m_pkthdr.pf.statekey; 1099 1100 if (!pf_state_key_isvalid(pkt_sk)) { 1101 pf_mbuf_unlink_state_key(pd->m); 1102 pkt_sk = NULL; 1103 } 1104 1105 if (pkt_sk && pf_state_key_isvalid(pkt_sk->reverse)) 1106 sk = pkt_sk->reverse; 1107 1108 if (pkt_sk == NULL) { 1109 /* here we deal with local outbound packet */ 1110 if (pd->m->m_pkthdr.pf.inp != NULL) { 1111 inp_sk = pd->m->m_pkthdr.pf.inp->inp_pf_sk; 1112 if (pf_state_key_isvalid(inp_sk)) 1113 sk = inp_sk; 1114 else 1115 pf_inpcb_unlink_state_key( 1116 pd->m->m_pkthdr.pf.inp); 1117 } 1118 } 1119 } 1120 1121 if (sk == NULL) { 1122 if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, 1123 (struct pf_state_key *)key)) == NULL) 1124 return (PF_DROP); 1125 if (pd->dir == PF_OUT && pkt_sk && 1126 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) 1127 pf_state_key_link_reverse(sk, pkt_sk); 1128 else if (pd->dir == PF_OUT && pd->m->m_pkthdr.pf.inp && 1129 !pd->m->m_pkthdr.pf.inp->inp_pf_sk && !sk->inp) 1130 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); 1131 } 1132 1133 /* remove firewall data from outbound packet */ 1134 if (pd->dir == PF_OUT) 1135 pf_pkt_addr_changed(pd->m); 1136 1137 /* list is sorted, if-bound states before floating ones */ 1138 TAILQ_FOREACH(si, &sk->states, entry) 1139 if ((si->s->kif == pfi_all || si->s->kif == pd->kif) && 1140 ((si->s->key[PF_SK_WIRE]->af == si->s->key[PF_SK_STACK]->af 1141 && sk == (pd->dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1142 si->s->key[PF_SK_STACK])) || 1143 (si->s->key[PF_SK_WIRE]->af != si->s->key[PF_SK_STACK]->af 1144 && pd->dir == PF_IN && (sk == si->s->key[PF_SK_STACK] || 1145 sk == si->s->key[PF_SK_WIRE])))) { 1146 s = si->s; 1147 break; 1148 } 1149 1150 if (s == NULL || s->timeout == PFTM_PURGE) 1151 return (PF_DROP); 1152 1153 if (s->rule.ptr->pktrate.limit && pd->dir == s->direction) { 1154 pf_add_threshold(&s->rule.ptr->pktrate); 1155 if (pf_check_threshold(&s->rule.ptr->pktrate)) 1156 return (PF_DROP); 1157 } 1158 1159 *state = s; 1160 1161 return (PF_MATCH); 1162 } 1163 1164 struct pf_state * 1165 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1166 { 1167 struct pf_state_key *sk; 1168 struct pf_state_item *si, *ret = NULL; 1169 1170 pf_status.fcounters[FCNT_STATE_SEARCH]++; 1171 1172 sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1173 1174 if (sk != NULL) { 1175 TAILQ_FOREACH(si, &sk->states, entry) 1176 if (dir == PF_INOUT || 1177 (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : 1178 si->s->key[PF_SK_STACK]))) { 1179 if (more == NULL) 1180 return (si->s); 1181 1182 if (ret) 1183 (*more)++; 1184 else 1185 ret = si; 1186 } 1187 } 1188 return (ret ? ret->s : NULL); 1189 } 1190 1191 void 1192 pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1193 { 1194 int32_t expire; 1195 1196 memset(sp, 0, sizeof(struct pfsync_state)); 1197 1198 /* copy from state key */ 1199 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1200 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1201 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1202 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1203 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1204 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1205 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1206 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1207 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1208 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1209 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1210 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1211 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1212 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1213 sp->proto = st->key[PF_SK_WIRE]->proto; 1214 sp->af = st->key[PF_SK_WIRE]->af; 1215 1216 /* copy from state */ 1217 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1218 sp->rt = st->rt; 1219 sp->rt_addr = st->rt_addr; 1220 sp->creation = htonl(getuptime() - st->creation); 1221 expire = pf_state_expires(st, st->timeout); 1222 if (expire <= getuptime()) 1223 sp->expire = htonl(0); 1224 else 1225 sp->expire = htonl(expire - getuptime()); 1226 1227 sp->direction = st->direction; 1228 #if NPFLOG > 0 1229 sp->log = st->log; 1230 #endif /* NPFLOG > 0 */ 1231 sp->timeout = st->timeout; 1232 sp->state_flags = htons(st->state_flags); 1233 if (!SLIST_EMPTY(&st->src_nodes)) 1234 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1235 1236 sp->id = st->id; 1237 sp->creatorid = st->creatorid; 1238 pf_state_peer_hton(&st->src, &sp->src); 1239 pf_state_peer_hton(&st->dst, &sp->dst); 1240 1241 if (st->rule.ptr == NULL) 1242 sp->rule = htonl(-1); 1243 else 1244 sp->rule = htonl(st->rule.ptr->nr); 1245 if (st->anchor.ptr == NULL) 1246 sp->anchor = htonl(-1); 1247 else 1248 sp->anchor = htonl(st->anchor.ptr->nr); 1249 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1250 1251 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1252 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1253 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1254 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1255 1256 sp->max_mss = htons(st->max_mss); 1257 sp->min_ttl = st->min_ttl; 1258 sp->set_tos = st->set_tos; 1259 sp->set_prio[0] = st->set_prio[0]; 1260 sp->set_prio[1] = st->set_prio[1]; 1261 } 1262 1263 /* END state table stuff */ 1264 1265 void 1266 pf_purge_expired_rules(void) 1267 { 1268 struct pf_rule *r; 1269 1270 PF_ASSERT_LOCKED(); 1271 1272 if (SLIST_EMPTY(&pf_rule_gcl)) 1273 return; 1274 1275 while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) { 1276 SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle); 1277 KASSERT(r->rule_flag & PFRULE_EXPIRED); 1278 pf_purge_rule(r); 1279 } 1280 } 1281 1282 void 1283 pf_purge_timeout(void *unused) 1284 { 1285 /* XXX move to systqmp to avoid KERNEL_LOCK */ 1286 task_add(systq, &pf_purge_task); 1287 } 1288 1289 void 1290 pf_purge(void *xnloops) 1291 { 1292 int *nloops = xnloops; 1293 1294 /* 1295 * process a fraction of the state table every second 1296 * Note: 1297 * we no longer need PF_LOCK() here, because 1298 * pf_purge_expired_states() uses pf_state_lock to maintain 1299 * consistency. 1300 */ 1301 pf_purge_expired_states(1 + (pf_status.states 1302 / pf_default_rule.timeout[PFTM_INTERVAL])); 1303 1304 NET_LOCK(); 1305 1306 PF_LOCK(); 1307 /* purge other expired types every PFTM_INTERVAL seconds */ 1308 if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1309 pf_purge_expired_src_nodes(); 1310 pf_purge_expired_rules(); 1311 } 1312 PF_UNLOCK(); 1313 1314 /* 1315 * Fragments don't require PF_LOCK(), they use their own lock. 1316 */ 1317 if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { 1318 pf_purge_expired_fragments(); 1319 *nloops = 0; 1320 } 1321 NET_UNLOCK(); 1322 1323 timeout_add_sec(&pf_purge_to, 1); 1324 } 1325 1326 int32_t 1327 pf_state_expires(const struct pf_state *state, uint8_t stimeout) 1328 { 1329 u_int32_t timeout; 1330 u_int32_t start; 1331 u_int32_t end; 1332 u_int32_t states; 1333 1334 /* 1335 * pf_state_expires is used by the state purge task to 1336 * decide if a state is a candidate for cleanup, and by the 1337 * pfsync state export code to populate an expiry time. 1338 * 1339 * this function may be called by the state purge task while 1340 * the state is being modified. avoid inconsistent reads of 1341 * state->timeout by having the caller do the read (and any 1342 * chacks it needs to do on the same variable) and then pass 1343 * their view of the timeout in here for this function to use. 1344 * the only consequnce of using a stale timeout value is 1345 * that the state won't be a candidate for purging until the 1346 * next pass of the purge task. 1347 */ 1348 1349 /* handle all PFTM_* > PFTM_MAX here */ 1350 if (stimeout == PFTM_PURGE) 1351 return (0); 1352 1353 KASSERT(stimeout != PFTM_UNLINKED); 1354 KASSERT(stimeout < PFTM_MAX); 1355 1356 timeout = state->rule.ptr->timeout[stimeout]; 1357 if (!timeout) 1358 timeout = pf_default_rule.timeout[stimeout]; 1359 1360 start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 1361 if (start) { 1362 end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 1363 states = state->rule.ptr->states_cur; 1364 } else { 1365 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 1366 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 1367 states = pf_status.states; 1368 } 1369 if (end && states > start && start < end) { 1370 if (states >= end) 1371 return (0); 1372 1373 timeout = (u_int64_t)timeout * (end - states) / (end - start); 1374 } 1375 1376 return (state->expire + timeout); 1377 } 1378 1379 void 1380 pf_purge_expired_src_nodes(void) 1381 { 1382 struct pf_src_node *cur, *next; 1383 1384 PF_ASSERT_LOCKED(); 1385 1386 for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { 1387 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1388 1389 if (cur->states == 0 && cur->expire <= getuptime()) { 1390 next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); 1391 pf_remove_src_node(cur); 1392 } 1393 } 1394 } 1395 1396 void 1397 pf_src_tree_remove_state(struct pf_state *s) 1398 { 1399 u_int32_t timeout; 1400 struct pf_sn_item *sni; 1401 1402 while ((sni = SLIST_FIRST(&s->src_nodes)) != NULL) { 1403 SLIST_REMOVE_HEAD(&s->src_nodes, next); 1404 if (s->src.tcp_est) 1405 --sni->sn->conn; 1406 if (--sni->sn->states == 0) { 1407 timeout = s->rule.ptr->timeout[PFTM_SRC_NODE]; 1408 if (!timeout) 1409 timeout = 1410 pf_default_rule.timeout[PFTM_SRC_NODE]; 1411 sni->sn->expire = getuptime() + timeout; 1412 } 1413 pool_put(&pf_sn_item_pl, sni); 1414 } 1415 } 1416 1417 void 1418 pf_remove_state(struct pf_state *cur) 1419 { 1420 PF_ASSERT_LOCKED(); 1421 1422 /* handle load balancing related tasks */ 1423 pf_postprocess_addr(cur); 1424 1425 if (cur->src.state == PF_TCPS_PROXY_DST) { 1426 pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, 1427 &cur->key[PF_SK_WIRE]->addr[1], 1428 &cur->key[PF_SK_WIRE]->addr[0], 1429 cur->key[PF_SK_WIRE]->port[1], 1430 cur->key[PF_SK_WIRE]->port[0], 1431 cur->src.seqhi, cur->src.seqlo + 1, 1432 TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, 1433 cur->key[PF_SK_WIRE]->rdomain); 1434 } 1435 if (cur->key[PF_SK_STACK]->proto == IPPROTO_TCP) 1436 pf_set_protostate(cur, PF_PEER_BOTH, TCPS_CLOSED); 1437 1438 RB_REMOVE(pf_state_tree_id, &tree_id, cur); 1439 #if NPFLOW > 0 1440 if (cur->state_flags & PFSTATE_PFLOW) 1441 export_pflow(cur); 1442 #endif /* NPFLOW > 0 */ 1443 #if NPFSYNC > 0 1444 pfsync_delete_state(cur); 1445 #endif /* NPFSYNC > 0 */ 1446 cur->timeout = PFTM_UNLINKED; 1447 pf_src_tree_remove_state(cur); 1448 pf_detach_state(cur); 1449 } 1450 1451 void 1452 pf_remove_divert_state(struct pf_state_key *sk) 1453 { 1454 struct pf_state_item *si; 1455 1456 PF_ASSERT_UNLOCKED(); 1457 1458 PF_LOCK(); 1459 PF_STATE_ENTER_WRITE(); 1460 TAILQ_FOREACH(si, &sk->states, entry) { 1461 if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr && 1462 (si->s->rule.ptr->divert.type == PF_DIVERT_TO || 1463 si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) { 1464 pf_remove_state(si->s); 1465 break; 1466 } 1467 } 1468 PF_STATE_EXIT_WRITE(); 1469 PF_UNLOCK(); 1470 } 1471 1472 void 1473 pf_free_state(struct pf_state *cur) 1474 { 1475 struct pf_rule_item *ri; 1476 1477 PF_ASSERT_LOCKED(); 1478 1479 #if NPFSYNC > 0 1480 if (pfsync_state_in_use(cur)) 1481 return; 1482 #endif /* NPFSYNC > 0 */ 1483 KASSERT(cur->timeout == PFTM_UNLINKED); 1484 if (--cur->rule.ptr->states_cur == 0 && 1485 cur->rule.ptr->src_nodes == 0) 1486 pf_rm_rule(NULL, cur->rule.ptr); 1487 if (cur->anchor.ptr != NULL) 1488 if (--cur->anchor.ptr->states_cur == 0) 1489 pf_rm_rule(NULL, cur->anchor.ptr); 1490 while ((ri = SLIST_FIRST(&cur->match_rules))) { 1491 SLIST_REMOVE_HEAD(&cur->match_rules, entry); 1492 if (--ri->r->states_cur == 0 && 1493 ri->r->src_nodes == 0) 1494 pf_rm_rule(NULL, ri->r); 1495 pool_put(&pf_rule_item_pl, ri); 1496 } 1497 pf_normalize_tcp_cleanup(cur); 1498 pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); 1499 pf_state_list_remove(&pf_state_list, cur); 1500 if (cur->tag) 1501 pf_tag_unref(cur->tag); 1502 pf_state_unref(cur); 1503 pf_status.fcounters[FCNT_STATE_REMOVALS]++; 1504 pf_status.states--; 1505 } 1506 1507 void 1508 pf_purge_expired_states(u_int32_t maxcheck) 1509 { 1510 /* 1511 * this task/thread/context/whatever is the only thing that 1512 * removes states from the pf_state_list, so the cur reference 1513 * it holds between calls is guaranteed to still be in the 1514 * list. 1515 */ 1516 static struct pf_state *cur = NULL; 1517 1518 struct pf_state *head, *tail; 1519 struct pf_state *st; 1520 SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl); 1521 time_t now; 1522 1523 PF_ASSERT_UNLOCKED(); 1524 1525 rw_enter_read(&pf_state_list.pfs_rwl); 1526 1527 mtx_enter(&pf_state_list.pfs_mtx); 1528 head = TAILQ_FIRST(&pf_state_list.pfs_list); 1529 tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); 1530 mtx_leave(&pf_state_list.pfs_mtx); 1531 1532 if (head == NULL) { 1533 /* the list is empty */ 1534 rw_exit_read(&pf_state_list.pfs_rwl); 1535 return; 1536 } 1537 1538 /* (re)start at the front of the list */ 1539 if (cur == NULL) 1540 cur = head; 1541 1542 now = getuptime(); 1543 1544 do { 1545 uint8_t stimeout = cur->timeout; 1546 1547 if ((stimeout == PFTM_UNLINKED) || 1548 (pf_state_expires(cur, stimeout) <= now)) { 1549 st = pf_state_ref(cur); 1550 SLIST_INSERT_HEAD(&gcl, st, gc_list); 1551 } 1552 1553 /* don't iterate past the end of our view of the list */ 1554 if (cur == tail) { 1555 cur = NULL; 1556 break; 1557 } 1558 1559 cur = TAILQ_NEXT(cur, entry_list); 1560 } while (maxcheck--); 1561 1562 rw_exit_read(&pf_state_list.pfs_rwl); 1563 1564 if (SLIST_EMPTY(&gcl)) 1565 return; 1566 1567 NET_LOCK(); 1568 rw_enter_write(&pf_state_list.pfs_rwl); 1569 PF_LOCK(); 1570 PF_STATE_ENTER_WRITE(); 1571 SLIST_FOREACH(st, &gcl, gc_list) { 1572 if (st->timeout != PFTM_UNLINKED) 1573 pf_remove_state(st); 1574 1575 pf_free_state(st); 1576 } 1577 PF_STATE_EXIT_WRITE(); 1578 PF_UNLOCK(); 1579 rw_exit_write(&pf_state_list.pfs_rwl); 1580 NET_UNLOCK(); 1581 1582 while ((st = SLIST_FIRST(&gcl)) != NULL) { 1583 SLIST_REMOVE_HEAD(&gcl, gc_list); 1584 pf_state_unref(st); 1585 } 1586 } 1587 1588 int 1589 pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) 1590 { 1591 if (aw->type != PF_ADDR_TABLE) 1592 return (0); 1593 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, 1)) == NULL) 1594 return (1); 1595 return (0); 1596 } 1597 1598 void 1599 pf_tbladdr_remove(struct pf_addr_wrap *aw) 1600 { 1601 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 1602 return; 1603 pfr_detach_table(aw->p.tbl); 1604 aw->p.tbl = NULL; 1605 } 1606 1607 void 1608 pf_tbladdr_copyout(struct pf_addr_wrap *aw) 1609 { 1610 struct pfr_ktable *kt = aw->p.tbl; 1611 1612 if (aw->type != PF_ADDR_TABLE || kt == NULL) 1613 return; 1614 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 1615 kt = kt->pfrkt_root; 1616 aw->p.tbl = NULL; 1617 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 1618 kt->pfrkt_cnt : -1; 1619 } 1620 1621 void 1622 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 1623 { 1624 switch (af) { 1625 case AF_INET: { 1626 u_int32_t a = ntohl(addr->addr32[0]); 1627 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 1628 (a>>8)&255, a&255); 1629 if (p) { 1630 p = ntohs(p); 1631 addlog(":%u", p); 1632 } 1633 break; 1634 } 1635 #ifdef INET6 1636 case AF_INET6: { 1637 u_int16_t b; 1638 u_int8_t i, curstart, curend, maxstart, maxend; 1639 curstart = curend = maxstart = maxend = 255; 1640 for (i = 0; i < 8; i++) { 1641 if (!addr->addr16[i]) { 1642 if (curstart == 255) 1643 curstart = i; 1644 curend = i; 1645 } else { 1646 if ((curend - curstart) > 1647 (maxend - maxstart)) { 1648 maxstart = curstart; 1649 maxend = curend; 1650 } 1651 curstart = curend = 255; 1652 } 1653 } 1654 if ((curend - curstart) > 1655 (maxend - maxstart)) { 1656 maxstart = curstart; 1657 maxend = curend; 1658 } 1659 for (i = 0; i < 8; i++) { 1660 if (i >= maxstart && i <= maxend) { 1661 if (i == 0) 1662 addlog(":"); 1663 if (i == maxend) 1664 addlog(":"); 1665 } else { 1666 b = ntohs(addr->addr16[i]); 1667 addlog("%x", b); 1668 if (i < 7) 1669 addlog(":"); 1670 } 1671 } 1672 if (p) { 1673 p = ntohs(p); 1674 addlog("[%u]", p); 1675 } 1676 break; 1677 } 1678 #endif /* INET6 */ 1679 } 1680 } 1681 1682 void 1683 pf_print_state(struct pf_state *s) 1684 { 1685 pf_print_state_parts(s, NULL, NULL); 1686 } 1687 1688 void 1689 pf_print_state_parts(struct pf_state *s, 1690 struct pf_state_key *skwp, struct pf_state_key *sksp) 1691 { 1692 struct pf_state_key *skw, *sks; 1693 u_int8_t proto, dir; 1694 1695 /* Do our best to fill these, but they're skipped if NULL */ 1696 skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); 1697 sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); 1698 proto = skw ? skw->proto : (sks ? sks->proto : 0); 1699 dir = s ? s->direction : 0; 1700 1701 switch (proto) { 1702 case IPPROTO_IPV4: 1703 addlog("IPv4"); 1704 break; 1705 case IPPROTO_IPV6: 1706 addlog("IPv6"); 1707 break; 1708 case IPPROTO_TCP: 1709 addlog("TCP"); 1710 break; 1711 case IPPROTO_UDP: 1712 addlog("UDP"); 1713 break; 1714 case IPPROTO_ICMP: 1715 addlog("ICMP"); 1716 break; 1717 case IPPROTO_ICMPV6: 1718 addlog("ICMPv6"); 1719 break; 1720 default: 1721 addlog("%u", proto); 1722 break; 1723 } 1724 switch (dir) { 1725 case PF_IN: 1726 addlog(" in"); 1727 break; 1728 case PF_OUT: 1729 addlog(" out"); 1730 break; 1731 } 1732 if (skw) { 1733 addlog(" wire: (%d) ", skw->rdomain); 1734 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 1735 addlog(" "); 1736 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 1737 } 1738 if (sks) { 1739 addlog(" stack: (%d) ", sks->rdomain); 1740 if (sks != skw) { 1741 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 1742 addlog(" "); 1743 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 1744 } else 1745 addlog("-"); 1746 } 1747 if (s) { 1748 if (proto == IPPROTO_TCP) { 1749 addlog(" [lo=%u high=%u win=%u modulator=%u", 1750 s->src.seqlo, s->src.seqhi, 1751 s->src.max_win, s->src.seqdiff); 1752 if (s->src.wscale && s->dst.wscale) 1753 addlog(" wscale=%u", 1754 s->src.wscale & PF_WSCALE_MASK); 1755 addlog("]"); 1756 addlog(" [lo=%u high=%u win=%u modulator=%u", 1757 s->dst.seqlo, s->dst.seqhi, 1758 s->dst.max_win, s->dst.seqdiff); 1759 if (s->src.wscale && s->dst.wscale) 1760 addlog(" wscale=%u", 1761 s->dst.wscale & PF_WSCALE_MASK); 1762 addlog("]"); 1763 } 1764 addlog(" %u:%u", s->src.state, s->dst.state); 1765 if (s->rule.ptr) 1766 addlog(" @%d", s->rule.ptr->nr); 1767 } 1768 } 1769 1770 void 1771 pf_print_flags(u_int8_t f) 1772 { 1773 if (f) 1774 addlog(" "); 1775 if (f & TH_FIN) 1776 addlog("F"); 1777 if (f & TH_SYN) 1778 addlog("S"); 1779 if (f & TH_RST) 1780 addlog("R"); 1781 if (f & TH_PUSH) 1782 addlog("P"); 1783 if (f & TH_ACK) 1784 addlog("A"); 1785 if (f & TH_URG) 1786 addlog("U"); 1787 if (f & TH_ECE) 1788 addlog("E"); 1789 if (f & TH_CWR) 1790 addlog("W"); 1791 } 1792 1793 #define PF_SET_SKIP_STEPS(i) \ 1794 do { \ 1795 while (head[i] != cur) { \ 1796 head[i]->skip[i].ptr = cur; \ 1797 head[i] = TAILQ_NEXT(head[i], entries); \ 1798 } \ 1799 } while (0) 1800 1801 void 1802 pf_calc_skip_steps(struct pf_rulequeue *rules) 1803 { 1804 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 1805 int i; 1806 1807 cur = TAILQ_FIRST(rules); 1808 prev = cur; 1809 for (i = 0; i < PF_SKIP_COUNT; ++i) 1810 head[i] = cur; 1811 while (cur != NULL) { 1812 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 1813 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 1814 if (cur->direction != prev->direction) 1815 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 1816 if (cur->onrdomain != prev->onrdomain || 1817 cur->ifnot != prev->ifnot) 1818 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 1819 if (cur->af != prev->af) 1820 PF_SET_SKIP_STEPS(PF_SKIP_AF); 1821 if (cur->proto != prev->proto) 1822 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 1823 if (cur->src.neg != prev->src.neg || 1824 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 1825 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 1826 if (cur->dst.neg != prev->dst.neg || 1827 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 1828 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 1829 if (cur->src.port[0] != prev->src.port[0] || 1830 cur->src.port[1] != prev->src.port[1] || 1831 cur->src.port_op != prev->src.port_op) 1832 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 1833 if (cur->dst.port[0] != prev->dst.port[0] || 1834 cur->dst.port[1] != prev->dst.port[1] || 1835 cur->dst.port_op != prev->dst.port_op) 1836 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 1837 1838 prev = cur; 1839 cur = TAILQ_NEXT(cur, entries); 1840 } 1841 for (i = 0; i < PF_SKIP_COUNT; ++i) 1842 PF_SET_SKIP_STEPS(i); 1843 } 1844 1845 int 1846 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 1847 { 1848 if (aw1->type != aw2->type) 1849 return (1); 1850 switch (aw1->type) { 1851 case PF_ADDR_ADDRMASK: 1852 case PF_ADDR_RANGE: 1853 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 1854 return (1); 1855 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 1856 return (1); 1857 return (0); 1858 case PF_ADDR_DYNIFTL: 1859 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 1860 case PF_ADDR_NONE: 1861 case PF_ADDR_NOROUTE: 1862 case PF_ADDR_URPFFAILED: 1863 return (0); 1864 case PF_ADDR_TABLE: 1865 return (aw1->p.tbl != aw2->p.tbl); 1866 case PF_ADDR_RTLABEL: 1867 return (aw1->v.rtlabel != aw2->v.rtlabel); 1868 default: 1869 addlog("invalid address type: %d\n", aw1->type); 1870 return (1); 1871 } 1872 } 1873 1874 /* This algorithm computes 'a + b - c' in ones-complement using a trick to 1875 * emulate at most one ones-complement subtraction. This thereby limits net 1876 * carries/borrows to at most one, eliminating a reduction step and saving one 1877 * each of +, >>, & and ~. 1878 * 1879 * def. x mod y = x - (x//y)*y for integer x,y 1880 * def. sum = x mod 2^16 1881 * def. accumulator = (x >> 16) mod 2^16 1882 * 1883 * The trick works as follows: subtracting exactly one u_int16_t from the 1884 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 1885 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 1886 * ones-complement borrow: 1887 * 1888 * (sum + accumulator) mod 2^16 1889 * = { assume underflow: accumulator := 2^16 - 1 } 1890 * (sum + 2^16 - 1) mod 2^16 1891 * = { mod } 1892 * (sum - 1) mod 2^16 1893 * 1894 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 1895 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 1896 * to zero as that requires subtraction of at least 2^16, which exceeds a 1897 * single u_int16_t's range. 1898 * 1899 * We use the following theorem to derive the implementation: 1900 * 1901 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 1902 * proof. 1903 * (x + (y mod z)) mod z 1904 * = { def mod } 1905 * (x + y - (y//z)*z) mod z 1906 * = { (a + b*c) mod c = a mod c } 1907 * (x + y) mod z [end of proof] 1908 * 1909 * ... and thereby obtain: 1910 * 1911 * (sum + accumulator) mod 2^16 1912 * = { def. accumulator, def. sum } 1913 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 1914 * = { (0), twice } 1915 * (x + (x >> 16)) mod 2^16 1916 * = { x mod 2^n = x & (2^n - 1) } 1917 * (x + (x >> 16)) & 0xffff 1918 * 1919 * Note: this serves also as a reduction step for at most one add (as the 1920 * trailing mod 2^16 prevents further reductions by destroying carries). 1921 */ 1922 static __inline void 1923 pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 1924 u_int8_t proto) 1925 { 1926 u_int32_t x; 1927 const int udp = proto == IPPROTO_UDP; 1928 1929 x = *cksum + was - now; 1930 x = (x + (x >> 16)) & 0xffff; 1931 1932 /* optimise: eliminate a branch when not udp */ 1933 if (udp && *cksum == 0x0000) 1934 return; 1935 if (udp && x == 0x0000) 1936 x = 0xffff; 1937 1938 *cksum = (u_int16_t)(x); 1939 } 1940 1941 #ifdef INET6 1942 /* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 1943 static __inline void 1944 pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 1945 { 1946 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 1947 } 1948 1949 /* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 1950 static __inline void 1951 pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 1952 { 1953 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 1954 } 1955 #endif /* INET6 */ 1956 1957 /* pre: *a is 16-bit aligned within its packet 1958 * 1959 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 1960 * machine by conserving ones-complement's otherwise discarded carries in the 1961 * upper bits of x. These accumulated carries when added to the lower 16-bits 1962 * over at least zero 'reduction' steps then complete the ones-complement sum. 1963 * 1964 * def. sum = x mod 2^16 1965 * def. accumulator = (x >> 16) 1966 * 1967 * At most two reduction steps 1968 * 1969 * x := sum + accumulator 1970 * = { def sum, def accumulator } 1971 * x := x mod 2^16 + (x >> 16) 1972 * = { x mod 2^n = x & (2^n - 1) } 1973 * x := (x & 0xffff) + (x >> 16) 1974 * 1975 * are necessary to incorporate the accumulated carries (at most one per add) 1976 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 1977 * 1978 * The function is also invariant over the endian of the host. Why? 1979 * 1980 * Define the unary transpose operator ~ on a bitstring in python slice 1981 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 1982 * 1983 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 1984 * 1985 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 1986 * 1987 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 1988 * 'half-adds'. Under ones-complement addition, each half-add carries to the 1989 * other, so the sum of each half-add is unaffected by their relative 1990 * order. Therefore: 1991 * 1992 * ~m +_1 ~n 1993 * = { half-adds invariant under transposition } 1994 * ~s 1995 * = { substitute } 1996 * ~(m +_1 n) [end of proof] 1997 * 1998 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 1999 * with the converse endian does not alter the result. 2000 * 2001 * proof. 2002 * { converse machine endian: load/store transposes, P := 8 } 2003 * ~(~m +_1 ~n) 2004 * = { ~ over +_1 } 2005 * ~~m +_1 ~~n 2006 * = { ~ is an involution } 2007 * m +_1 n [end of proof] 2008 * 2009 */ 2010 #define NEG(x) ((u_int16_t)~(x)) 2011 void 2012 pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 2013 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 2014 { 2015 u_int32_t x; 2016 const u_int16_t *n = an->addr16; 2017 const u_int16_t *o = a->addr16; 2018 const int udp = proto == IPPROTO_UDP; 2019 2020 switch (af) { 2021 case AF_INET: 2022 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 2023 break; 2024 #ifdef INET6 2025 case AF_INET6: 2026 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 2027 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 2028 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 2029 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 2030 break; 2031 #endif /* INET6 */ 2032 default: 2033 unhandled_af(af); 2034 } 2035 2036 x = (x & 0xffff) + (x >> 16); 2037 x = (x & 0xffff) + (x >> 16); 2038 2039 /* optimise: eliminate a branch when not udp */ 2040 if (udp && *cksum == 0x0000) 2041 return; 2042 if (udp && x == 0x0000) 2043 x = 0xffff; 2044 2045 *cksum = (u_int16_t)(x); 2046 } 2047 2048 int 2049 pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 2050 { 2051 int rewrite = 0; 2052 2053 if (*f != v) { 2054 u_int16_t old = htons(hi ? (*f << 8) : *f); 2055 u_int16_t new = htons(hi ? ( v << 8) : v); 2056 2057 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 2058 *f = v; 2059 rewrite = 1; 2060 } 2061 2062 return (rewrite); 2063 } 2064 2065 /* pre: *f is 16-bit aligned within its packet */ 2066 int 2067 pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 2068 { 2069 int rewrite = 0; 2070 2071 if (*f != v) { 2072 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 2073 *f = v; 2074 rewrite = 1; 2075 } 2076 2077 return (rewrite); 2078 } 2079 2080 int 2081 pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 2082 { 2083 int rewrite = 0; 2084 u_int8_t *fb = (u_int8_t*)f; 2085 u_int8_t *vb = (u_int8_t*)&v; 2086 2087 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 2088 return (pf_patch_16(pd, f, v)); /* optimise */ 2089 } 2090 2091 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2092 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2093 2094 return (rewrite); 2095 } 2096 2097 /* pre: *f is 16-bit aligned within its packet */ 2098 /* pre: pd->proto != IPPROTO_UDP */ 2099 int 2100 pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 2101 { 2102 int rewrite = 0; 2103 u_int16_t *pc = pd->pcksum; 2104 u_int8_t proto = pd->proto; 2105 2106 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 2107 if (proto == IPPROTO_UDP) 2108 panic("%s: udp", __func__); 2109 2110 /* optimise: skip *f != v guard; true for all use-cases */ 2111 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 2112 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 2113 2114 *f = v; 2115 rewrite = 1; 2116 2117 return (rewrite); 2118 } 2119 2120 int 2121 pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 2122 { 2123 int rewrite = 0; 2124 u_int8_t *fb = (u_int8_t*)f; 2125 u_int8_t *vb = (u_int8_t*)&v; 2126 2127 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 2128 return (pf_patch_32(pd, f, v)); /* optimise */ 2129 } 2130 2131 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2132 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2133 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2134 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2135 2136 return (rewrite); 2137 } 2138 2139 int 2140 pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 2141 u_int16_t *virtual_id, u_int16_t *virtual_type) 2142 { 2143 /* 2144 * ICMP types marked with PF_OUT are typically responses to 2145 * PF_IN, and will match states in the opposite direction. 2146 * PF_IN ICMP types need to match a state with that type. 2147 */ 2148 *icmp_dir = PF_OUT; 2149 2150 /* Queries (and responses) */ 2151 switch (pd->af) { 2152 case AF_INET: 2153 switch (type) { 2154 case ICMP_ECHO: 2155 *icmp_dir = PF_IN; 2156 /* FALLTHROUGH */ 2157 case ICMP_ECHOREPLY: 2158 *virtual_type = ICMP_ECHO; 2159 *virtual_id = pd->hdr.icmp.icmp_id; 2160 break; 2161 2162 case ICMP_TSTAMP: 2163 *icmp_dir = PF_IN; 2164 /* FALLTHROUGH */ 2165 case ICMP_TSTAMPREPLY: 2166 *virtual_type = ICMP_TSTAMP; 2167 *virtual_id = pd->hdr.icmp.icmp_id; 2168 break; 2169 2170 case ICMP_IREQ: 2171 *icmp_dir = PF_IN; 2172 /* FALLTHROUGH */ 2173 case ICMP_IREQREPLY: 2174 *virtual_type = ICMP_IREQ; 2175 *virtual_id = pd->hdr.icmp.icmp_id; 2176 break; 2177 2178 case ICMP_MASKREQ: 2179 *icmp_dir = PF_IN; 2180 /* FALLTHROUGH */ 2181 case ICMP_MASKREPLY: 2182 *virtual_type = ICMP_MASKREQ; 2183 *virtual_id = pd->hdr.icmp.icmp_id; 2184 break; 2185 2186 case ICMP_IPV6_WHEREAREYOU: 2187 *icmp_dir = PF_IN; 2188 /* FALLTHROUGH */ 2189 case ICMP_IPV6_IAMHERE: 2190 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2191 *virtual_id = 0; /* Nothing sane to match on! */ 2192 break; 2193 2194 case ICMP_MOBILE_REGREQUEST: 2195 *icmp_dir = PF_IN; 2196 /* FALLTHROUGH */ 2197 case ICMP_MOBILE_REGREPLY: 2198 *virtual_type = ICMP_MOBILE_REGREQUEST; 2199 *virtual_id = 0; /* Nothing sane to match on! */ 2200 break; 2201 2202 case ICMP_ROUTERSOLICIT: 2203 *icmp_dir = PF_IN; 2204 /* FALLTHROUGH */ 2205 case ICMP_ROUTERADVERT: 2206 *virtual_type = ICMP_ROUTERSOLICIT; 2207 *virtual_id = 0; /* Nothing sane to match on! */ 2208 break; 2209 2210 /* These ICMP types map to other connections */ 2211 case ICMP_UNREACH: 2212 case ICMP_SOURCEQUENCH: 2213 case ICMP_REDIRECT: 2214 case ICMP_TIMXCEED: 2215 case ICMP_PARAMPROB: 2216 /* These will not be used, but set them anyway */ 2217 *icmp_dir = PF_IN; 2218 *virtual_type = htons(type); 2219 *virtual_id = 0; 2220 return (1); /* These types match to another state */ 2221 2222 /* 2223 * All remaining ICMP types get their own states, 2224 * and will only match in one direction. 2225 */ 2226 default: 2227 *icmp_dir = PF_IN; 2228 *virtual_type = type; 2229 *virtual_id = 0; 2230 break; 2231 } 2232 break; 2233 #ifdef INET6 2234 case AF_INET6: 2235 switch (type) { 2236 case ICMP6_ECHO_REQUEST: 2237 *icmp_dir = PF_IN; 2238 /* FALLTHROUGH */ 2239 case ICMP6_ECHO_REPLY: 2240 *virtual_type = ICMP6_ECHO_REQUEST; 2241 *virtual_id = pd->hdr.icmp6.icmp6_id; 2242 break; 2243 2244 case MLD_LISTENER_QUERY: 2245 case MLD_LISTENER_REPORT: { 2246 struct mld_hdr *mld = &pd->hdr.mld; 2247 u_int32_t h; 2248 2249 /* 2250 * Listener Report can be sent by clients 2251 * without an associated Listener Query. 2252 * In addition to that, when Report is sent as a 2253 * reply to a Query its source and destination 2254 * address are different. 2255 */ 2256 *icmp_dir = PF_IN; 2257 *virtual_type = MLD_LISTENER_QUERY; 2258 /* generate fake id for these messages */ 2259 h = mld->mld_addr.s6_addr32[0] ^ 2260 mld->mld_addr.s6_addr32[1] ^ 2261 mld->mld_addr.s6_addr32[2] ^ 2262 mld->mld_addr.s6_addr32[3]; 2263 *virtual_id = (h >> 16) ^ (h & 0xffff); 2264 break; 2265 } 2266 2267 /* 2268 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 2269 * ICMP6_WRU 2270 */ 2271 case ICMP6_WRUREQUEST: 2272 *icmp_dir = PF_IN; 2273 /* FALLTHROUGH */ 2274 case ICMP6_WRUREPLY: 2275 *virtual_type = ICMP6_WRUREQUEST; 2276 *virtual_id = 0; /* Nothing sane to match on! */ 2277 break; 2278 2279 case MLD_MTRACE: 2280 *icmp_dir = PF_IN; 2281 /* FALLTHROUGH */ 2282 case MLD_MTRACE_RESP: 2283 *virtual_type = MLD_MTRACE; 2284 *virtual_id = 0; /* Nothing sane to match on! */ 2285 break; 2286 2287 case ND_NEIGHBOR_SOLICIT: 2288 *icmp_dir = PF_IN; 2289 /* FALLTHROUGH */ 2290 case ND_NEIGHBOR_ADVERT: { 2291 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns; 2292 u_int32_t h; 2293 2294 *virtual_type = ND_NEIGHBOR_SOLICIT; 2295 /* generate fake id for these messages */ 2296 h = nd->nd_ns_target.s6_addr32[0] ^ 2297 nd->nd_ns_target.s6_addr32[1] ^ 2298 nd->nd_ns_target.s6_addr32[2] ^ 2299 nd->nd_ns_target.s6_addr32[3]; 2300 *virtual_id = (h >> 16) ^ (h & 0xffff); 2301 break; 2302 } 2303 2304 /* 2305 * These ICMP types map to other connections. 2306 * ND_REDIRECT can't be in this list because the triggering 2307 * packet header is optional. 2308 */ 2309 case ICMP6_DST_UNREACH: 2310 case ICMP6_PACKET_TOO_BIG: 2311 case ICMP6_TIME_EXCEEDED: 2312 case ICMP6_PARAM_PROB: 2313 /* These will not be used, but set them anyway */ 2314 *icmp_dir = PF_IN; 2315 *virtual_type = htons(type); 2316 *virtual_id = 0; 2317 return (1); /* These types match to another state */ 2318 /* 2319 * All remaining ICMP6 types get their own states, 2320 * and will only match in one direction. 2321 */ 2322 default: 2323 *icmp_dir = PF_IN; 2324 *virtual_type = type; 2325 *virtual_id = 0; 2326 break; 2327 } 2328 break; 2329 #endif /* INET6 */ 2330 } 2331 *virtual_type = htons(*virtual_type); 2332 return (0); /* These types match to their own state */ 2333 } 2334 2335 void 2336 pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 2337 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 2338 { 2339 /* note: doesn't trouble to fixup quoted checksums, if any */ 2340 2341 /* change quoted protocol port */ 2342 if (qp != NULL) 2343 pf_patch_16(pd, qp, np); 2344 2345 /* change quoted ip address */ 2346 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 2347 pf_addrcpy(qa, na, pd->af); 2348 2349 /* change network-header's ip address */ 2350 if (oa) 2351 pf_translate_a(pd, oa, na); 2352 } 2353 2354 /* pre: *a is 16-bit aligned within its packet */ 2355 /* *a is a network header src/dst address */ 2356 int 2357 pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 2358 { 2359 int rewrite = 0; 2360 2361 /* warning: !PF_ANEQ != PF_AEQ */ 2362 if (!PF_ANEQ(a, an, pd->af)) 2363 return (0); 2364 2365 /* fixup transport pseudo-header, if any */ 2366 switch (pd->proto) { 2367 case IPPROTO_TCP: /* FALLTHROUGH */ 2368 case IPPROTO_UDP: /* FALLTHROUGH */ 2369 case IPPROTO_ICMPV6: 2370 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 2371 break; 2372 default: 2373 break; /* assume no pseudo-header */ 2374 } 2375 2376 pf_addrcpy(a, an, pd->af); 2377 rewrite = 1; 2378 2379 return (rewrite); 2380 } 2381 2382 #if INET6 2383 /* pf_translate_af() may change pd->m, adjust local copies after calling */ 2384 int 2385 pf_translate_af(struct pf_pdesc *pd) 2386 { 2387 static const struct pf_addr zero; 2388 struct ip *ip4; 2389 struct ip6_hdr *ip6; 2390 int copyback = 0; 2391 u_int hlen, ohlen, dlen; 2392 u_int16_t *pc; 2393 u_int8_t af_proto, naf_proto; 2394 2395 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 2396 ohlen = pd->off; 2397 dlen = pd->tot_len - pd->off; 2398 pc = pd->pcksum; 2399 2400 af_proto = naf_proto = pd->proto; 2401 if (naf_proto == IPPROTO_ICMP) 2402 af_proto = IPPROTO_ICMPV6; 2403 if (naf_proto == IPPROTO_ICMPV6) 2404 af_proto = IPPROTO_ICMP; 2405 2406 /* uncover stale pseudo-header */ 2407 switch (af_proto) { 2408 case IPPROTO_ICMPV6: 2409 /* optimise: unchanged for TCP/UDP */ 2410 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 2411 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 2412 /* FALLTHROUGH */ 2413 case IPPROTO_UDP: /* FALLTHROUGH */ 2414 case IPPROTO_TCP: 2415 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 2416 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 2417 copyback = 1; 2418 break; 2419 default: 2420 break; /* assume no pseudo-header */ 2421 } 2422 2423 /* replace the network header */ 2424 m_adj(pd->m, pd->off); 2425 pd->src = NULL; 2426 pd->dst = NULL; 2427 2428 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 2429 pd->m = NULL; 2430 return (-1); 2431 } 2432 2433 pd->off = hlen; 2434 pd->tot_len += hlen - ohlen; 2435 2436 switch (pd->naf) { 2437 case AF_INET: 2438 ip4 = mtod(pd->m, struct ip *); 2439 memset(ip4, 0, hlen); 2440 ip4->ip_v = IPVERSION; 2441 ip4->ip_hl = hlen >> 2; 2442 ip4->ip_tos = pd->tos; 2443 ip4->ip_len = htons(hlen + dlen); 2444 ip4->ip_id = htons(ip_randomid()); 2445 ip4->ip_off = htons(IP_DF); 2446 ip4->ip_ttl = pd->ttl; 2447 ip4->ip_p = pd->proto; 2448 ip4->ip_src = pd->nsaddr.v4; 2449 ip4->ip_dst = pd->ndaddr.v4; 2450 break; 2451 case AF_INET6: 2452 ip6 = mtod(pd->m, struct ip6_hdr *); 2453 memset(ip6, 0, hlen); 2454 ip6->ip6_vfc = IPV6_VERSION; 2455 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 2456 ip6->ip6_plen = htons(dlen); 2457 ip6->ip6_nxt = pd->proto; 2458 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 2459 ip6->ip6_hlim = IPV6_DEFHLIM; 2460 else 2461 ip6->ip6_hlim = pd->ttl; 2462 ip6->ip6_src = pd->nsaddr.v6; 2463 ip6->ip6_dst = pd->ndaddr.v6; 2464 break; 2465 default: 2466 unhandled_af(pd->naf); 2467 } 2468 2469 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 2470 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 2471 pd->naf == AF_INET6) { 2472 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 2473 } 2474 2475 /* cover fresh pseudo-header */ 2476 switch (naf_proto) { 2477 case IPPROTO_ICMPV6: 2478 /* optimise: unchanged for TCP/UDP */ 2479 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 2480 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 2481 /* FALLTHROUGH */ 2482 case IPPROTO_UDP: /* FALLTHROUGH */ 2483 case IPPROTO_TCP: 2484 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 2485 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 2486 copyback = 1; 2487 break; 2488 default: 2489 break; /* assume no pseudo-header */ 2490 } 2491 2492 /* flush pd->pcksum */ 2493 if (copyback) 2494 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 2495 2496 return (0); 2497 } 2498 2499 int 2500 pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 2501 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 2502 sa_family_t af, sa_family_t naf) 2503 { 2504 struct mbuf *n = NULL; 2505 struct ip *ip4; 2506 struct ip6_hdr *ip6; 2507 u_int hlen, ohlen, dlen; 2508 int d; 2509 2510 if (af == naf || (af != AF_INET && af != AF_INET6) || 2511 (naf != AF_INET && naf != AF_INET6)) 2512 return (-1); 2513 2514 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 2515 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 2516 return (-1); 2517 2518 /* new quoted header */ 2519 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 2520 /* old quoted header */ 2521 ohlen = pd2->off - ipoff2; 2522 2523 /* trim old quoted header */ 2524 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 2525 m_adj(n, ohlen); 2526 2527 /* prepend a new, translated, quoted header */ 2528 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 2529 return (-1); 2530 2531 switch (naf) { 2532 case AF_INET: 2533 ip4 = mtod(n, struct ip *); 2534 memset(ip4, 0, sizeof(*ip4)); 2535 ip4->ip_v = IPVERSION; 2536 ip4->ip_hl = sizeof(*ip4) >> 2; 2537 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 2538 ip4->ip_id = htons(ip_randomid()); 2539 ip4->ip_off = htons(IP_DF); 2540 ip4->ip_ttl = pd2->ttl; 2541 if (pd2->proto == IPPROTO_ICMPV6) 2542 ip4->ip_p = IPPROTO_ICMP; 2543 else 2544 ip4->ip_p = pd2->proto; 2545 ip4->ip_src = src->v4; 2546 ip4->ip_dst = dst->v4; 2547 ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2); 2548 break; 2549 case AF_INET6: 2550 ip6 = mtod(n, struct ip6_hdr *); 2551 memset(ip6, 0, sizeof(*ip6)); 2552 ip6->ip6_vfc = IPV6_VERSION; 2553 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 2554 if (pd2->proto == IPPROTO_ICMP) 2555 ip6->ip6_nxt = IPPROTO_ICMPV6; 2556 else 2557 ip6->ip6_nxt = pd2->proto; 2558 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 2559 ip6->ip6_hlim = IPV6_DEFHLIM; 2560 else 2561 ip6->ip6_hlim = pd2->ttl; 2562 ip6->ip6_src = src->v6; 2563 ip6->ip6_dst = dst->v6; 2564 break; 2565 } 2566 2567 /* cover new quoted header */ 2568 /* optimise: any new AF_INET header of ours sums to zero */ 2569 if (naf != AF_INET) { 2570 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 2571 } 2572 2573 /* reattach modified quoted packet to outer header */ 2574 { 2575 int nlen = n->m_pkthdr.len; 2576 m_cat(m, n); 2577 m->m_pkthdr.len += nlen; 2578 } 2579 2580 /* account for altered length */ 2581 d = hlen - ohlen; 2582 2583 if (pd->proto == IPPROTO_ICMPV6) { 2584 /* fixup pseudo-header */ 2585 dlen = pd->tot_len - pd->off; 2586 pf_cksum_fixup(pd->pcksum, 2587 htons(dlen), htons(dlen + d), pd->proto); 2588 } 2589 2590 pd->tot_len += d; 2591 pd2->tot_len += d; 2592 pd2->off += d; 2593 2594 /* note: not bothering to update network headers as 2595 these due for rewrite by pf_translate_af() */ 2596 2597 return (0); 2598 } 2599 2600 2601 #define PTR_IP(field) (offsetof(struct ip, field)) 2602 #define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 2603 2604 int 2605 pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 2606 { 2607 struct icmp *icmp4; 2608 struct icmp6_hdr *icmp6; 2609 u_int32_t mtu; 2610 int32_t ptr = -1; 2611 u_int8_t type; 2612 u_int8_t code; 2613 2614 switch (af) { 2615 case AF_INET: 2616 icmp6 = arg; 2617 type = icmp6->icmp6_type; 2618 code = icmp6->icmp6_code; 2619 mtu = ntohl(icmp6->icmp6_mtu); 2620 2621 switch (type) { 2622 case ICMP6_ECHO_REQUEST: 2623 type = ICMP_ECHO; 2624 break; 2625 case ICMP6_ECHO_REPLY: 2626 type = ICMP_ECHOREPLY; 2627 break; 2628 case ICMP6_DST_UNREACH: 2629 type = ICMP_UNREACH; 2630 switch (code) { 2631 case ICMP6_DST_UNREACH_NOROUTE: 2632 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2633 case ICMP6_DST_UNREACH_ADDR: 2634 code = ICMP_UNREACH_HOST; 2635 break; 2636 case ICMP6_DST_UNREACH_ADMIN: 2637 code = ICMP_UNREACH_HOST_PROHIB; 2638 break; 2639 case ICMP6_DST_UNREACH_NOPORT: 2640 code = ICMP_UNREACH_PORT; 2641 break; 2642 default: 2643 return (-1); 2644 } 2645 break; 2646 case ICMP6_PACKET_TOO_BIG: 2647 type = ICMP_UNREACH; 2648 code = ICMP_UNREACH_NEEDFRAG; 2649 mtu -= 20; 2650 break; 2651 case ICMP6_TIME_EXCEEDED: 2652 type = ICMP_TIMXCEED; 2653 break; 2654 case ICMP6_PARAM_PROB: 2655 switch (code) { 2656 case ICMP6_PARAMPROB_HEADER: 2657 type = ICMP_PARAMPROB; 2658 code = ICMP_PARAMPROB_ERRATPTR; 2659 ptr = ntohl(icmp6->icmp6_pptr); 2660 2661 if (ptr == PTR_IP6(ip6_vfc)) 2662 ; /* preserve */ 2663 else if (ptr == PTR_IP6(ip6_vfc) + 1) 2664 ptr = PTR_IP(ip_tos); 2665 else if (ptr == PTR_IP6(ip6_plen) || 2666 ptr == PTR_IP6(ip6_plen) + 1) 2667 ptr = PTR_IP(ip_len); 2668 else if (ptr == PTR_IP6(ip6_nxt)) 2669 ptr = PTR_IP(ip_p); 2670 else if (ptr == PTR_IP6(ip6_hlim)) 2671 ptr = PTR_IP(ip_ttl); 2672 else if (ptr >= PTR_IP6(ip6_src) && 2673 ptr < PTR_IP6(ip6_dst)) 2674 ptr = PTR_IP(ip_src); 2675 else if (ptr >= PTR_IP6(ip6_dst) && 2676 ptr < sizeof(struct ip6_hdr)) 2677 ptr = PTR_IP(ip_dst); 2678 else { 2679 return (-1); 2680 } 2681 break; 2682 case ICMP6_PARAMPROB_NEXTHEADER: 2683 type = ICMP_UNREACH; 2684 code = ICMP_UNREACH_PROTOCOL; 2685 break; 2686 default: 2687 return (-1); 2688 } 2689 break; 2690 default: 2691 return (-1); 2692 } 2693 2694 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 2695 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 2696 2697 /* aligns well with a icmpv4 nextmtu */ 2698 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 2699 2700 /* icmpv4 pptr is a one most significant byte */ 2701 if (ptr >= 0) 2702 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 2703 break; 2704 case AF_INET6: 2705 icmp4 = arg; 2706 type = icmp4->icmp_type; 2707 code = icmp4->icmp_code; 2708 mtu = ntohs(icmp4->icmp_nextmtu); 2709 2710 switch (type) { 2711 case ICMP_ECHO: 2712 type = ICMP6_ECHO_REQUEST; 2713 break; 2714 case ICMP_ECHOREPLY: 2715 type = ICMP6_ECHO_REPLY; 2716 break; 2717 case ICMP_UNREACH: 2718 type = ICMP6_DST_UNREACH; 2719 switch (code) { 2720 case ICMP_UNREACH_NET: 2721 case ICMP_UNREACH_HOST: 2722 case ICMP_UNREACH_NET_UNKNOWN: 2723 case ICMP_UNREACH_HOST_UNKNOWN: 2724 case ICMP_UNREACH_ISOLATED: 2725 case ICMP_UNREACH_TOSNET: 2726 case ICMP_UNREACH_TOSHOST: 2727 code = ICMP6_DST_UNREACH_NOROUTE; 2728 break; 2729 case ICMP_UNREACH_PORT: 2730 code = ICMP6_DST_UNREACH_NOPORT; 2731 break; 2732 case ICMP_UNREACH_NET_PROHIB: 2733 case ICMP_UNREACH_HOST_PROHIB: 2734 case ICMP_UNREACH_FILTER_PROHIB: 2735 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 2736 code = ICMP6_DST_UNREACH_ADMIN; 2737 break; 2738 case ICMP_UNREACH_PROTOCOL: 2739 type = ICMP6_PARAM_PROB; 2740 code = ICMP6_PARAMPROB_NEXTHEADER; 2741 ptr = offsetof(struct ip6_hdr, ip6_nxt); 2742 break; 2743 case ICMP_UNREACH_NEEDFRAG: 2744 type = ICMP6_PACKET_TOO_BIG; 2745 code = 0; 2746 mtu += 20; 2747 break; 2748 default: 2749 return (-1); 2750 } 2751 break; 2752 case ICMP_TIMXCEED: 2753 type = ICMP6_TIME_EXCEEDED; 2754 break; 2755 case ICMP_PARAMPROB: 2756 type = ICMP6_PARAM_PROB; 2757 switch (code) { 2758 case ICMP_PARAMPROB_ERRATPTR: 2759 code = ICMP6_PARAMPROB_HEADER; 2760 break; 2761 case ICMP_PARAMPROB_LENGTH: 2762 code = ICMP6_PARAMPROB_HEADER; 2763 break; 2764 default: 2765 return (-1); 2766 } 2767 2768 ptr = icmp4->icmp_pptr; 2769 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 2770 ; /* preserve */ 2771 else if (ptr == PTR_IP(ip_len) || 2772 ptr == PTR_IP(ip_len) + 1) 2773 ptr = PTR_IP6(ip6_plen); 2774 else if (ptr == PTR_IP(ip_ttl)) 2775 ptr = PTR_IP6(ip6_hlim); 2776 else if (ptr == PTR_IP(ip_p)) 2777 ptr = PTR_IP6(ip6_nxt); 2778 else if (ptr >= PTR_IP(ip_src) && 2779 ptr < PTR_IP(ip_dst)) 2780 ptr = PTR_IP6(ip6_src); 2781 else if (ptr >= PTR_IP(ip_dst) && 2782 ptr < sizeof(struct ip)) 2783 ptr = PTR_IP6(ip6_dst); 2784 else { 2785 return (-1); 2786 } 2787 break; 2788 default: 2789 return (-1); 2790 } 2791 2792 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 2793 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 2794 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 2795 if (ptr >= 0) 2796 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 2797 break; 2798 } 2799 2800 return (0); 2801 } 2802 #endif /* INET6 */ 2803 2804 /* 2805 * Need to modulate the sequence numbers in the TCP SACK option 2806 * (credits to Krzysztof Pfaff for report and patch) 2807 */ 2808 int 2809 pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 2810 { 2811 struct sackblk sack; 2812 int copyback = 0, i; 2813 int olen, optsoff; 2814 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 2815 2816 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 2817 optsoff = pd->off + sizeof(struct tcphdr); 2818 #define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 2819 if (olen < TCPOLEN_MINSACK || 2820 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, NULL, pd->af)) 2821 return (0); 2822 2823 eoh = opts + olen; 2824 opt = opts; 2825 while ((opt = pf_find_tcpopt(opt, opts, olen, 2826 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 2827 { 2828 size_t safelen = MIN(opt[1], (eoh - opt)); 2829 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 2830 size_t startoff = (opt + i) - opts; 2831 memcpy(&sack, &opt[i], sizeof(sack)); 2832 pf_patch_32_unaligned(pd, &sack.start, 2833 htonl(ntohl(sack.start) - dst->seqdiff), 2834 PF_ALGNMNT(startoff)); 2835 pf_patch_32_unaligned(pd, &sack.end, 2836 htonl(ntohl(sack.end) - dst->seqdiff), 2837 PF_ALGNMNT(startoff + sizeof(sack.start))); 2838 memcpy(&opt[i], &sack, sizeof(sack)); 2839 } 2840 copyback = 1; 2841 opt += opt[1]; 2842 } 2843 2844 if (copyback) 2845 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); 2846 return (copyback); 2847 } 2848 2849 struct mbuf * 2850 pf_build_tcp(const struct pf_rule *r, sa_family_t af, 2851 const struct pf_addr *saddr, const struct pf_addr *daddr, 2852 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2853 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2854 u_int16_t rtag, u_int sack, u_int rdom) 2855 { 2856 struct mbuf *m; 2857 int len, tlen; 2858 struct ip *h; 2859 #ifdef INET6 2860 struct ip6_hdr *h6; 2861 #endif /* INET6 */ 2862 struct tcphdr *th; 2863 char *opt; 2864 2865 /* maximum segment size tcp option */ 2866 tlen = sizeof(struct tcphdr); 2867 if (mss) 2868 tlen += 4; 2869 if (sack) 2870 tlen += 2; 2871 2872 switch (af) { 2873 case AF_INET: 2874 len = sizeof(struct ip) + tlen; 2875 break; 2876 #ifdef INET6 2877 case AF_INET6: 2878 len = sizeof(struct ip6_hdr) + tlen; 2879 break; 2880 #endif /* INET6 */ 2881 default: 2882 unhandled_af(af); 2883 } 2884 2885 /* create outgoing mbuf */ 2886 m = m_gethdr(M_DONTWAIT, MT_HEADER); 2887 if (m == NULL) 2888 return (NULL); 2889 if (tag) 2890 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 2891 m->m_pkthdr.pf.tag = rtag; 2892 m->m_pkthdr.ph_rtableid = rdom; 2893 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 2894 m->m_pkthdr.pf.prio = r->set_prio[0]; 2895 if (r && r->qid) 2896 m->m_pkthdr.pf.qid = r->qid; 2897 m->m_data += max_linkhdr; 2898 m->m_pkthdr.len = m->m_len = len; 2899 m->m_pkthdr.ph_ifidx = 0; 2900 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 2901 memset(m->m_data, 0, len); 2902 switch (af) { 2903 case AF_INET: 2904 h = mtod(m, struct ip *); 2905 h->ip_p = IPPROTO_TCP; 2906 h->ip_len = htons(tlen); 2907 h->ip_v = 4; 2908 h->ip_hl = sizeof(*h) >> 2; 2909 h->ip_tos = IPTOS_LOWDELAY; 2910 h->ip_len = htons(len); 2911 h->ip_off = htons(ip_mtudisc ? IP_DF : 0); 2912 h->ip_ttl = ttl ? ttl : ip_defttl; 2913 h->ip_sum = 0; 2914 h->ip_src.s_addr = saddr->v4.s_addr; 2915 h->ip_dst.s_addr = daddr->v4.s_addr; 2916 2917 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 2918 break; 2919 #ifdef INET6 2920 case AF_INET6: 2921 h6 = mtod(m, struct ip6_hdr *); 2922 h6->ip6_nxt = IPPROTO_TCP; 2923 h6->ip6_plen = htons(tlen); 2924 h6->ip6_vfc |= IPV6_VERSION; 2925 h6->ip6_hlim = IPV6_DEFHLIM; 2926 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 2927 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 2928 2929 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 2930 break; 2931 #endif /* INET6 */ 2932 default: 2933 unhandled_af(af); 2934 } 2935 2936 /* TCP header */ 2937 th->th_sport = sport; 2938 th->th_dport = dport; 2939 th->th_seq = htonl(seq); 2940 th->th_ack = htonl(ack); 2941 th->th_off = tlen >> 2; 2942 th->th_flags = flags; 2943 th->th_win = htons(win); 2944 2945 opt = (char *)(th + 1); 2946 if (mss) { 2947 opt[0] = TCPOPT_MAXSEG; 2948 opt[1] = 4; 2949 mss = htons(mss); 2950 memcpy((opt + 2), &mss, 2); 2951 opt += 4; 2952 } 2953 if (sack) { 2954 opt[0] = TCPOPT_SACK_PERMITTED; 2955 opt[1] = 2; 2956 opt += 2; 2957 } 2958 2959 return (m); 2960 } 2961 2962 void 2963 pf_send_tcp(const struct pf_rule *r, sa_family_t af, 2964 const struct pf_addr *saddr, const struct pf_addr *daddr, 2965 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 2966 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 2967 u_int16_t rtag, u_int rdom) 2968 { 2969 struct mbuf *m; 2970 2971 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, 2972 flags, win, mss, ttl, tag, rtag, 0, rdom)) == NULL) 2973 return; 2974 2975 switch (af) { 2976 case AF_INET: 2977 ip_send(m); 2978 break; 2979 #ifdef INET6 2980 case AF_INET6: 2981 ip6_send(m); 2982 break; 2983 #endif /* INET6 */ 2984 } 2985 } 2986 2987 static void 2988 pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *s, 2989 struct pf_state_peer *src, struct pf_state_peer *dst) 2990 { 2991 /* 2992 * We are sending challenge ACK as a response to SYN packet, which 2993 * matches existing state (modulo TCP window check). Therefore packet 2994 * must be sent on behalf of destination. 2995 * 2996 * We expect sender to remain either silent, or send RST packet 2997 * so both, firewall and remote peer, can purge dead state from 2998 * memory. 2999 */ 3000 pf_send_tcp(s->rule.ptr, pd->af, pd->dst, pd->src, 3001 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 3002 src->seqlo, TH_ACK, 0, 0, s->rule.ptr->return_ttl, 1, 0, 3003 pd->rdomain); 3004 } 3005 3006 void 3007 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param, 3008 sa_family_t af, struct pf_rule *r, u_int rdomain) 3009 { 3010 struct mbuf *m0; 3011 3012 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 3013 return; 3014 3015 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 3016 m0->m_pkthdr.ph_rtableid = rdomain; 3017 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 3018 m0->m_pkthdr.pf.prio = r->set_prio[0]; 3019 if (r && r->qid) 3020 m0->m_pkthdr.pf.qid = r->qid; 3021 3022 switch (af) { 3023 case AF_INET: 3024 icmp_error(m0, type, code, 0, param); 3025 break; 3026 #ifdef INET6 3027 case AF_INET6: 3028 icmp6_error(m0, type, code, param); 3029 break; 3030 #endif /* INET6 */ 3031 } 3032 } 3033 3034 /* 3035 * Return ((n = 0) == (a = b [with mask m])) 3036 * Note: n != 0 => returns (a != b [with mask m]) 3037 */ 3038 int 3039 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 3040 struct pf_addr *b, sa_family_t af) 3041 { 3042 switch (af) { 3043 case AF_INET: 3044 if ((a->addr32[0] & m->addr32[0]) == 3045 (b->addr32[0] & m->addr32[0])) 3046 return (n == 0); 3047 break; 3048 #ifdef INET6 3049 case AF_INET6: 3050 if (((a->addr32[0] & m->addr32[0]) == 3051 (b->addr32[0] & m->addr32[0])) && 3052 ((a->addr32[1] & m->addr32[1]) == 3053 (b->addr32[1] & m->addr32[1])) && 3054 ((a->addr32[2] & m->addr32[2]) == 3055 (b->addr32[2] & m->addr32[2])) && 3056 ((a->addr32[3] & m->addr32[3]) == 3057 (b->addr32[3] & m->addr32[3]))) 3058 return (n == 0); 3059 break; 3060 #endif /* INET6 */ 3061 } 3062 3063 return (n != 0); 3064 } 3065 3066 /* 3067 * Return 1 if b <= a <= e, otherwise return 0. 3068 */ 3069 int 3070 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 3071 struct pf_addr *a, sa_family_t af) 3072 { 3073 switch (af) { 3074 case AF_INET: 3075 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 3076 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 3077 return (0); 3078 break; 3079 #ifdef INET6 3080 case AF_INET6: { 3081 int i; 3082 3083 /* check a >= b */ 3084 for (i = 0; i < 4; ++i) 3085 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 3086 break; 3087 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 3088 return (0); 3089 /* check a <= e */ 3090 for (i = 0; i < 4; ++i) 3091 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 3092 break; 3093 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 3094 return (0); 3095 break; 3096 } 3097 #endif /* INET6 */ 3098 } 3099 return (1); 3100 } 3101 3102 int 3103 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 3104 { 3105 switch (op) { 3106 case PF_OP_IRG: 3107 return ((p > a1) && (p < a2)); 3108 case PF_OP_XRG: 3109 return ((p < a1) || (p > a2)); 3110 case PF_OP_RRG: 3111 return ((p >= a1) && (p <= a2)); 3112 case PF_OP_EQ: 3113 return (p == a1); 3114 case PF_OP_NE: 3115 return (p != a1); 3116 case PF_OP_LT: 3117 return (p < a1); 3118 case PF_OP_LE: 3119 return (p <= a1); 3120 case PF_OP_GT: 3121 return (p > a1); 3122 case PF_OP_GE: 3123 return (p >= a1); 3124 } 3125 return (0); /* never reached */ 3126 } 3127 3128 int 3129 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 3130 { 3131 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 3132 } 3133 3134 int 3135 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 3136 { 3137 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3138 return (0); 3139 return (pf_match(op, a1, a2, u)); 3140 } 3141 3142 int 3143 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 3144 { 3145 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3146 return (0); 3147 return (pf_match(op, a1, a2, g)); 3148 } 3149 3150 int 3151 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 3152 { 3153 if (*tag == -1) 3154 *tag = m->m_pkthdr.pf.tag; 3155 3156 return ((!r->match_tag_not && r->match_tag == *tag) || 3157 (r->match_tag_not && r->match_tag != *tag)); 3158 } 3159 3160 int 3161 pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 3162 { 3163 struct ifnet *ifp; 3164 #if NCARP > 0 3165 struct ifnet *ifp0; 3166 #endif 3167 struct pfi_kif *kif; 3168 3169 ifp = if_get(m->m_pkthdr.ph_ifidx); 3170 if (ifp == NULL) 3171 return (0); 3172 3173 #if NCARP > 0 3174 if (ifp->if_type == IFT_CARP && 3175 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 3176 kif = (struct pfi_kif *)ifp0->if_pf_kif; 3177 if_put(ifp0); 3178 } else 3179 #endif /* NCARP */ 3180 kif = (struct pfi_kif *)ifp->if_pf_kif; 3181 3182 if_put(ifp); 3183 3184 if (kif == NULL) { 3185 DPFPRINTF(LOG_ERR, 3186 "%s: kif == NULL, @%d via %s", __func__, 3187 r->nr, r->rcv_ifname); 3188 return (0); 3189 } 3190 3191 return (pfi_kif_match(r->rcv_kif, kif)); 3192 } 3193 3194 void 3195 pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3196 { 3197 if (tag > 0) 3198 m->m_pkthdr.pf.tag = tag; 3199 if (rtableid >= 0) 3200 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3201 } 3202 3203 enum pf_test_status 3204 pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_rule *r) 3205 { 3206 int rv; 3207 3208 if (ctx->depth >= PF_ANCHOR_STACK_MAX) { 3209 log(LOG_ERR, "pf_step_into_anchor: stack overflow\n"); 3210 return (PF_TEST_FAIL); 3211 } 3212 3213 ctx->depth++; 3214 3215 if (r->anchor_wildcard) { 3216 struct pf_anchor *child; 3217 rv = PF_TEST_OK; 3218 RB_FOREACH(child, pf_anchor_node, &r->anchor->children) { 3219 rv = pf_match_rule(ctx, &child->ruleset); 3220 if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { 3221 /* 3222 * we either hit a rule with quick action 3223 * (more likely), or hit some runtime 3224 * error (e.g. pool_get() failure). 3225 */ 3226 break; 3227 } 3228 } 3229 } else { 3230 rv = pf_match_rule(ctx, &r->anchor->ruleset); 3231 /* 3232 * Unless errors occurred, stop iff any rule matched 3233 * within quick anchors. 3234 */ 3235 if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && 3236 *ctx->am == r) 3237 rv = PF_TEST_QUICK; 3238 } 3239 3240 ctx->depth--; 3241 3242 return (rv); 3243 } 3244 3245 void 3246 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 3247 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 3248 { 3249 switch (af) { 3250 case AF_INET: 3251 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3252 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3253 break; 3254 #ifdef INET6 3255 case AF_INET6: 3256 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 3257 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 3258 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 3259 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 3260 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 3261 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 3262 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 3263 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 3264 break; 3265 #endif /* INET6 */ 3266 default: 3267 unhandled_af(af); 3268 } 3269 } 3270 3271 void 3272 pf_addr_inc(struct pf_addr *addr, sa_family_t af) 3273 { 3274 switch (af) { 3275 case AF_INET: 3276 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 3277 break; 3278 #ifdef INET6 3279 case AF_INET6: 3280 if (addr->addr32[3] == 0xffffffff) { 3281 addr->addr32[3] = 0; 3282 if (addr->addr32[2] == 0xffffffff) { 3283 addr->addr32[2] = 0; 3284 if (addr->addr32[1] == 0xffffffff) { 3285 addr->addr32[1] = 0; 3286 addr->addr32[0] = 3287 htonl(ntohl(addr->addr32[0]) + 1); 3288 } else 3289 addr->addr32[1] = 3290 htonl(ntohl(addr->addr32[1]) + 1); 3291 } else 3292 addr->addr32[2] = 3293 htonl(ntohl(addr->addr32[2]) + 1); 3294 } else 3295 addr->addr32[3] = 3296 htonl(ntohl(addr->addr32[3]) + 1); 3297 break; 3298 #endif /* INET6 */ 3299 default: 3300 unhandled_af(af); 3301 } 3302 } 3303 3304 int 3305 pf_socket_lookup(struct pf_pdesc *pd) 3306 { 3307 struct pf_addr *saddr, *daddr; 3308 u_int16_t sport, dport; 3309 struct inpcbtable *tb; 3310 struct inpcb *inp; 3311 3312 pd->lookup.uid = -1; 3313 pd->lookup.gid = -1; 3314 pd->lookup.pid = NO_PID; 3315 switch (pd->virtual_proto) { 3316 case IPPROTO_TCP: 3317 sport = pd->hdr.tcp.th_sport; 3318 dport = pd->hdr.tcp.th_dport; 3319 PF_ASSERT_LOCKED(); 3320 NET_ASSERT_LOCKED(); 3321 tb = &tcbtable; 3322 break; 3323 case IPPROTO_UDP: 3324 sport = pd->hdr.udp.uh_sport; 3325 dport = pd->hdr.udp.uh_dport; 3326 PF_ASSERT_LOCKED(); 3327 NET_ASSERT_LOCKED(); 3328 tb = &udbtable; 3329 break; 3330 default: 3331 return (-1); 3332 } 3333 if (pd->dir == PF_IN) { 3334 saddr = pd->src; 3335 daddr = pd->dst; 3336 } else { 3337 u_int16_t p; 3338 3339 p = sport; 3340 sport = dport; 3341 dport = p; 3342 saddr = pd->dst; 3343 daddr = pd->src; 3344 } 3345 switch (pd->af) { 3346 case AF_INET: 3347 /* 3348 * Fails when rtable is changed while evaluating the ruleset 3349 * The socket looked up will not match the one hit in the end. 3350 */ 3351 inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport, 3352 pd->rdomain); 3353 if (inp == NULL) { 3354 inp = in_pcblookup_listen(tb, daddr->v4, dport, 3355 NULL, pd->rdomain); 3356 if (inp == NULL) 3357 return (-1); 3358 } 3359 break; 3360 #ifdef INET6 3361 case AF_INET6: 3362 inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, 3363 dport, pd->rdomain); 3364 if (inp == NULL) { 3365 inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 3366 NULL, pd->rdomain); 3367 if (inp == NULL) 3368 return (-1); 3369 } 3370 break; 3371 #endif /* INET6 */ 3372 default: 3373 unhandled_af(pd->af); 3374 } 3375 pd->lookup.uid = inp->inp_socket->so_euid; 3376 pd->lookup.gid = inp->inp_socket->so_egid; 3377 pd->lookup.pid = inp->inp_socket->so_cpid; 3378 return (1); 3379 } 3380 3381 /* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 3382 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 3383 * 3384 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 3385 */ 3386 u_int8_t* 3387 pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 3388 u_int8_t min_typelen) 3389 { 3390 u_int8_t *eoh = opts + hlen; 3391 3392 if (min_typelen < 2) 3393 return (NULL); 3394 3395 while ((eoh - opt) >= min_typelen) { 3396 switch (*opt) { 3397 case TCPOPT_EOL: 3398 /* FALLTHROUGH - Workaround the failure of some 3399 systems to NOP-pad their bzero'd option buffers, 3400 producing spurious EOLs */ 3401 case TCPOPT_NOP: 3402 opt++; 3403 continue; 3404 default: 3405 if (opt[0] == type && 3406 opt[1] >= min_typelen) 3407 return (opt); 3408 } 3409 3410 opt += MAX(opt[1], 2); /* evade infinite loops */ 3411 } 3412 3413 return (NULL); 3414 } 3415 3416 u_int8_t 3417 pf_get_wscale(struct pf_pdesc *pd) 3418 { 3419 int olen; 3420 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3421 u_int8_t wscale = 0; 3422 3423 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3424 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 3425 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3426 return (0); 3427 3428 opt = opts; 3429 while ((opt = pf_find_tcpopt(opt, opts, olen, 3430 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 3431 wscale = opt[2]; 3432 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 3433 wscale |= PF_WSCALE_FLAG; 3434 3435 opt += opt[1]; 3436 } 3437 3438 return (wscale); 3439 } 3440 3441 u_int16_t 3442 pf_get_mss(struct pf_pdesc *pd) 3443 { 3444 int olen; 3445 u_int8_t opts[MAX_TCPOPTLEN], *opt; 3446 u_int16_t mss = tcp_mssdflt; 3447 3448 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3449 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 3450 pd->off + sizeof(struct tcphdr), opts, olen, NULL, NULL, pd->af)) 3451 return (0); 3452 3453 opt = opts; 3454 while ((opt = pf_find_tcpopt(opt, opts, olen, 3455 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 3456 memcpy(&mss, (opt + 2), 2); 3457 mss = ntohs(mss); 3458 3459 opt += opt[1]; 3460 } 3461 return (mss); 3462 } 3463 3464 u_int16_t 3465 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer) 3466 { 3467 struct ifnet *ifp; 3468 struct sockaddr_in *dst; 3469 #ifdef INET6 3470 struct sockaddr_in6 *dst6; 3471 #endif /* INET6 */ 3472 struct rtentry *rt = NULL; 3473 struct sockaddr_storage ss; 3474 int hlen; 3475 u_int16_t mss = tcp_mssdflt; 3476 3477 memset(&ss, 0, sizeof(ss)); 3478 3479 switch (af) { 3480 case AF_INET: 3481 hlen = sizeof(struct ip); 3482 dst = (struct sockaddr_in *)&ss; 3483 dst->sin_family = AF_INET; 3484 dst->sin_len = sizeof(*dst); 3485 dst->sin_addr = addr->v4; 3486 rt = rtalloc(sintosa(dst), 0, rtableid); 3487 break; 3488 #ifdef INET6 3489 case AF_INET6: 3490 hlen = sizeof(struct ip6_hdr); 3491 dst6 = (struct sockaddr_in6 *)&ss; 3492 dst6->sin6_family = AF_INET6; 3493 dst6->sin6_len = sizeof(*dst6); 3494 dst6->sin6_addr = addr->v6; 3495 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 3496 break; 3497 #endif /* INET6 */ 3498 } 3499 3500 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 3501 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 3502 mss = max(tcp_mssdflt, mss); 3503 if_put(ifp); 3504 } 3505 rtfree(rt); 3506 mss = min(mss, offer); 3507 mss = max(mss, 64); /* sanity - at least max opt space */ 3508 return (mss); 3509 } 3510 3511 static __inline int 3512 pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr, sa_family_t af, 3513 struct pf_src_node **sns) 3514 { 3515 struct pf_rule *r = s->rule.ptr; 3516 int rv; 3517 3518 if (!r->rt) 3519 return (0); 3520 3521 rv = pf_map_addr(af, r, saddr, &s->rt_addr, NULL, sns, 3522 &r->route, PF_SN_ROUTE); 3523 if (rv == 0) 3524 s->rt = r->rt; 3525 3526 return (rv); 3527 } 3528 3529 u_int32_t 3530 pf_tcp_iss(struct pf_pdesc *pd) 3531 { 3532 SHA2_CTX ctx; 3533 union { 3534 uint8_t bytes[SHA512_DIGEST_LENGTH]; 3535 uint32_t words[1]; 3536 } digest; 3537 3538 if (pf_tcp_secret_init == 0) { 3539 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 3540 SHA512Init(&pf_tcp_secret_ctx); 3541 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 3542 sizeof(pf_tcp_secret)); 3543 pf_tcp_secret_init = 1; 3544 } 3545 ctx = pf_tcp_secret_ctx; 3546 3547 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 3548 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 3549 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 3550 switch (pd->af) { 3551 case AF_INET: 3552 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 3553 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 3554 break; 3555 #ifdef INET6 3556 case AF_INET6: 3557 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 3558 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 3559 break; 3560 #endif /* INET6 */ 3561 } 3562 SHA512Final(digest.bytes, &ctx); 3563 pf_tcp_iss_off += 4096; 3564 return (digest.words[0] + tcp_iss + pf_tcp_iss_off); 3565 } 3566 3567 void 3568 pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 3569 { 3570 if (r->qid) 3571 a->qid = r->qid; 3572 if (r->pqid) 3573 a->pqid = r->pqid; 3574 if (r->rtableid >= 0) 3575 a->rtableid = r->rtableid; 3576 #if NPFLOG > 0 3577 a->log |= r->log; 3578 #endif /* NPFLOG > 0 */ 3579 if (r->scrub_flags & PFSTATE_SETTOS) 3580 a->set_tos = r->set_tos; 3581 if (r->min_ttl) 3582 a->min_ttl = r->min_ttl; 3583 if (r->max_mss) 3584 a->max_mss = r->max_mss; 3585 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 3586 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 3587 if (r->scrub_flags & PFSTATE_SETPRIO) { 3588 a->set_prio[0] = r->set_prio[0]; 3589 a->set_prio[1] = r->set_prio[1]; 3590 } 3591 if (r->rule_flag & PFRULE_SETDELAY) 3592 a->delay = r->delay; 3593 } 3594 3595 #define PF_TEST_ATTRIB(t, a) \ 3596 if (t) { \ 3597 r = a; \ 3598 continue; \ 3599 } else do { \ 3600 } while (0) 3601 3602 enum pf_test_status 3603 pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) 3604 { 3605 struct pf_rule *r; 3606 struct pf_rule *save_a; 3607 struct pf_ruleset *save_aruleset; 3608 3609 r = TAILQ_FIRST(ruleset->rules.active.ptr); 3610 while (r != NULL) { 3611 r->evaluations++; 3612 PF_TEST_ATTRIB( 3613 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot), 3614 r->skip[PF_SKIP_IFP].ptr); 3615 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir), 3616 r->skip[PF_SKIP_DIR].ptr); 3617 PF_TEST_ATTRIB((r->onrdomain >= 0 && 3618 (r->onrdomain == ctx->pd->rdomain) == r->ifnot), 3619 r->skip[PF_SKIP_RDOM].ptr); 3620 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af), 3621 r->skip[PF_SKIP_AF].ptr); 3622 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto), 3623 r->skip[PF_SKIP_PROTO].ptr); 3624 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr, 3625 ctx->pd->naf, r->src.neg, ctx->pd->kif, 3626 ctx->act.rtableid)), 3627 r->skip[PF_SKIP_SRC_ADDR].ptr); 3628 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr, 3629 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)), 3630 r->skip[PF_SKIP_DST_ADDR].ptr); 3631 3632 switch (ctx->pd->virtual_proto) { 3633 case PF_VPROTO_FRAGMENT: 3634 /* tcp/udp only. port_op always 0 in other cases */ 3635 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 3636 TAILQ_NEXT(r, entries)); 3637 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP && 3638 r->flagset), 3639 TAILQ_NEXT(r, entries)); 3640 /* icmp only. type/code always 0 in other cases */ 3641 PF_TEST_ATTRIB((r->type || r->code), 3642 TAILQ_NEXT(r, entries)); 3643 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 3644 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 3645 TAILQ_NEXT(r, entries)); 3646 break; 3647 3648 case IPPROTO_TCP: 3649 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) != 3650 r->flags), 3651 TAILQ_NEXT(r, entries)); 3652 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 3653 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd), 3654 r->os_fingerprint)), 3655 TAILQ_NEXT(r, entries)); 3656 /* FALLTHROUGH */ 3657 3658 case IPPROTO_UDP: 3659 /* tcp/udp only. port_op always 0 in other cases */ 3660 PF_TEST_ATTRIB((r->src.port_op && 3661 !pf_match_port(r->src.port_op, r->src.port[0], 3662 r->src.port[1], ctx->pd->nsport)), 3663 r->skip[PF_SKIP_SRC_PORT].ptr); 3664 PF_TEST_ATTRIB((r->dst.port_op && 3665 !pf_match_port(r->dst.port_op, r->dst.port[0], 3666 r->dst.port[1], ctx->pd->ndport)), 3667 r->skip[PF_SKIP_DST_PORT].ptr); 3668 /* tcp/udp only. uid.op always 0 in other cases */ 3669 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done || 3670 (ctx->pd->lookup.done = 3671 pf_socket_lookup(ctx->pd), 1)) && 3672 !pf_match_uid(r->uid.op, r->uid.uid[0], 3673 r->uid.uid[1], ctx->pd->lookup.uid)), 3674 TAILQ_NEXT(r, entries)); 3675 /* tcp/udp only. gid.op always 0 in other cases */ 3676 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done || 3677 (ctx->pd->lookup.done = 3678 pf_socket_lookup(ctx->pd), 1)) && 3679 !pf_match_gid(r->gid.op, r->gid.gid[0], 3680 r->gid.gid[1], ctx->pd->lookup.gid)), 3681 TAILQ_NEXT(r, entries)); 3682 break; 3683 3684 case IPPROTO_ICMP: 3685 case IPPROTO_ICMPV6: 3686 /* icmp only. type always 0 in other cases */ 3687 PF_TEST_ATTRIB((r->type && 3688 r->type != ctx->icmptype + 1), 3689 TAILQ_NEXT(r, entries)); 3690 /* icmp only. type always 0 in other cases */ 3691 PF_TEST_ATTRIB((r->code && 3692 r->code != ctx->icmpcode + 1), 3693 TAILQ_NEXT(r, entries)); 3694 /* icmp only. don't create states on replies */ 3695 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 3696 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 3697 ctx->icmp_dir != PF_IN), 3698 TAILQ_NEXT(r, entries)); 3699 break; 3700 3701 default: 3702 break; 3703 } 3704 3705 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 3706 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT), 3707 TAILQ_NEXT(r, entries)); 3708 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)), 3709 TAILQ_NEXT(r, entries)); 3710 PF_TEST_ATTRIB((r->prob && 3711 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 3712 TAILQ_NEXT(r, entries)); 3713 PF_TEST_ATTRIB((r->match_tag && 3714 !pf_match_tag(ctx->pd->m, r, &ctx->tag)), 3715 TAILQ_NEXT(r, entries)); 3716 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) == 3717 r->rcvifnot), 3718 TAILQ_NEXT(r, entries)); 3719 PF_TEST_ATTRIB((r->prio && 3720 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != 3721 ctx->pd->m->m_pkthdr.pf.prio), 3722 TAILQ_NEXT(r, entries)); 3723 3724 /* must be last! */ 3725 if (r->pktrate.limit) { 3726 pf_add_threshold(&r->pktrate); 3727 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 3728 TAILQ_NEXT(r, entries)); 3729 } 3730 3731 /* FALLTHROUGH */ 3732 if (r->tag) 3733 ctx->tag = r->tag; 3734 if (r->anchor == NULL) { 3735 if (r->action == PF_MATCH) { 3736 if ((ctx->ri = pool_get(&pf_rule_item_pl, 3737 PR_NOWAIT)) == NULL) { 3738 REASON_SET(&ctx->reason, PFRES_MEMORY); 3739 ctx->test_status = PF_TEST_FAIL; 3740 break; 3741 } 3742 ctx->ri->r = r; 3743 /* order is irrelevant */ 3744 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry); 3745 ctx->ri = NULL; 3746 pf_rule_to_actions(r, &ctx->act); 3747 if (r->rule_flag & PFRULE_AFTO) 3748 ctx->pd->naf = r->naf; 3749 if (pf_get_transaddr(r, ctx->pd, ctx->sns, 3750 &ctx->nr) == -1) { 3751 REASON_SET(&ctx->reason, 3752 PFRES_TRANSLATE); 3753 ctx->test_status = PF_TEST_FAIL; 3754 break; 3755 } 3756 #if NPFLOG > 0 3757 if (r->log) { 3758 REASON_SET(&ctx->reason, PFRES_MATCH); 3759 pflog_packet(ctx->pd, ctx->reason, r, 3760 ctx->a, ruleset, NULL); 3761 } 3762 #endif /* NPFLOG > 0 */ 3763 } else { 3764 /* 3765 * found matching r 3766 */ 3767 *ctx->rm = r; 3768 /* 3769 * anchor, with ruleset, where r belongs to 3770 */ 3771 *ctx->am = ctx->a; 3772 /* 3773 * ruleset where r belongs to 3774 */ 3775 *ctx->rsm = ruleset; 3776 /* 3777 * ruleset, where anchor belongs to. 3778 */ 3779 ctx->arsm = ctx->aruleset; 3780 } 3781 3782 #if NPFLOG > 0 3783 if (ctx->act.log & PF_LOG_MATCHES) 3784 pf_log_matches(ctx->pd, r, ctx->a, ruleset, 3785 &ctx->rules); 3786 #endif /* NPFLOG > 0 */ 3787 3788 if (r->quick) { 3789 ctx->test_status = PF_TEST_QUICK; 3790 break; 3791 } 3792 } else { 3793 save_a = ctx->a; 3794 save_aruleset = ctx->aruleset; 3795 ctx->a = r; /* remember anchor */ 3796 ctx->aruleset = ruleset; /* and its ruleset */ 3797 /* 3798 * Note: we don't need to restore if we are not going 3799 * to continue with ruleset evaluation. 3800 */ 3801 if (pf_step_into_anchor(ctx, r) != PF_TEST_OK) 3802 break; 3803 ctx->a = save_a; 3804 ctx->aruleset = save_aruleset; 3805 } 3806 r = TAILQ_NEXT(r, entries); 3807 } 3808 3809 return (ctx->test_status); 3810 } 3811 3812 int 3813 pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 3814 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason, 3815 struct pfsync_deferral **pdeferral) 3816 { 3817 struct pf_rule *r = NULL; 3818 struct pf_rule *a = NULL; 3819 struct pf_ruleset *ruleset = NULL; 3820 struct pf_state_key *skw = NULL, *sks = NULL; 3821 int rewrite = 0; 3822 u_int16_t virtual_type, virtual_id; 3823 int action = PF_DROP; 3824 struct pf_test_ctx ctx; 3825 int rv; 3826 3827 memset(&ctx, 0, sizeof(ctx)); 3828 ctx.pd = pd; 3829 ctx.rm = rm; 3830 ctx.am = am; 3831 ctx.rsm = rsm; 3832 ctx.th = &pd->hdr.tcp; 3833 ctx.act.rtableid = pd->rdomain; 3834 ctx.tag = -1; 3835 SLIST_INIT(&ctx.rules); 3836 3837 if (pd->dir == PF_IN && if_congested()) { 3838 REASON_SET(&ctx.reason, PFRES_CONGEST); 3839 return (PF_DROP); 3840 } 3841 3842 switch (pd->virtual_proto) { 3843 case IPPROTO_ICMP: 3844 ctx.icmptype = pd->hdr.icmp.icmp_type; 3845 ctx.icmpcode = pd->hdr.icmp.icmp_code; 3846 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3847 &ctx.icmp_dir, &virtual_id, &virtual_type); 3848 if (ctx.icmp_dir == PF_IN) { 3849 pd->osport = pd->nsport = virtual_id; 3850 pd->odport = pd->ndport = virtual_type; 3851 } else { 3852 pd->osport = pd->nsport = virtual_type; 3853 pd->odport = pd->ndport = virtual_id; 3854 } 3855 break; 3856 #ifdef INET6 3857 case IPPROTO_ICMPV6: 3858 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 3859 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 3860 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 3861 &ctx.icmp_dir, &virtual_id, &virtual_type); 3862 if (ctx.icmp_dir == PF_IN) { 3863 pd->osport = pd->nsport = virtual_id; 3864 pd->odport = pd->ndport = virtual_type; 3865 } else { 3866 pd->osport = pd->nsport = virtual_type; 3867 pd->odport = pd->ndport = virtual_id; 3868 } 3869 break; 3870 #endif /* INET6 */ 3871 } 3872 3873 ruleset = &pf_main_ruleset; 3874 rv = pf_match_rule(&ctx, ruleset); 3875 if (rv == PF_TEST_FAIL) { 3876 /* 3877 * Reason has been set in pf_match_rule() already. 3878 */ 3879 goto cleanup; 3880 } 3881 3882 r = *ctx.rm; /* matching rule */ 3883 a = *ctx.am; /* rule that defines an anchor containing 'r' */ 3884 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */ 3885 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */ 3886 3887 /* apply actions for last matching pass/block rule */ 3888 pf_rule_to_actions(r, &ctx.act); 3889 if (r->rule_flag & PFRULE_AFTO) 3890 pd->naf = r->naf; 3891 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) { 3892 REASON_SET(&ctx.reason, PFRES_TRANSLATE); 3893 goto cleanup; 3894 } 3895 REASON_SET(&ctx.reason, PFRES_MATCH); 3896 3897 #if NPFLOG > 0 3898 if (r->log) 3899 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL); 3900 if (ctx.act.log & PF_LOG_MATCHES) 3901 pf_log_matches(pd, r, a, ruleset, &ctx.rules); 3902 #endif /* NPFLOG > 0 */ 3903 3904 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 3905 (r->action == PF_DROP) && 3906 ((r->rule_flag & PFRULE_RETURNRST) || 3907 (r->rule_flag & PFRULE_RETURNICMP) || 3908 (r->rule_flag & PFRULE_RETURN))) { 3909 if (pd->proto == IPPROTO_TCP && 3910 ((r->rule_flag & PFRULE_RETURNRST) || 3911 (r->rule_flag & PFRULE_RETURN)) && 3912 !(ctx.th->th_flags & TH_RST)) { 3913 u_int32_t ack = 3914 ntohl(ctx.th->th_seq) + pd->p_len; 3915 3916 if (pf_check_tcp_cksum(pd->m, pd->off, 3917 pd->tot_len - pd->off, pd->af)) 3918 REASON_SET(&ctx.reason, PFRES_PROTCKSUM); 3919 else { 3920 if (ctx.th->th_flags & TH_SYN) 3921 ack++; 3922 if (ctx.th->th_flags & TH_FIN) 3923 ack++; 3924 pf_send_tcp(r, pd->af, pd->dst, 3925 pd->src, ctx.th->th_dport, 3926 ctx.th->th_sport, ntohl(ctx.th->th_ack), 3927 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 3928 1, 0, pd->rdomain); 3929 } 3930 } else if ((pd->proto != IPPROTO_ICMP || 3931 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET && 3932 r->return_icmp) 3933 pf_send_icmp(pd->m, r->return_icmp >> 8, 3934 r->return_icmp & 255, 0, pd->af, r, pd->rdomain); 3935 else if ((pd->proto != IPPROTO_ICMPV6 || 3936 (ctx.icmptype >= ICMP6_ECHO_REQUEST && 3937 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 3938 r->return_icmp6) 3939 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 3940 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain); 3941 } 3942 3943 if (r->action == PF_DROP) 3944 goto cleanup; 3945 3946 /* 3947 * If an expired "once" rule has not been purged, drop any new matching 3948 * packets. 3949 */ 3950 if (r->rule_flag & PFRULE_EXPIRED) 3951 goto cleanup; 3952 3953 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid); 3954 if (ctx.act.rtableid >= 0 && 3955 rtable_l2(ctx.act.rtableid) != pd->rdomain) 3956 pd->destchg = 1; 3957 3958 if (r->action == PF_PASS && pd->badopts && ! r->allow_opts) { 3959 REASON_SET(&ctx.reason, PFRES_IPOPTIONS); 3960 #if NPFLOG > 0 3961 pd->pflog |= PF_LOG_FORCE; 3962 #endif /* NPFLOG > 0 */ 3963 DPFPRINTF(LOG_NOTICE, "dropping packet with " 3964 "ip/ipv6 options in pf_test_rule()"); 3965 goto cleanup; 3966 } 3967 3968 action = PF_PASS; 3969 3970 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 3971 && !ctx.state_icmp && r->keep_state) { 3972 3973 if (r->rule_flag & PFRULE_SRCTRACK && 3974 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE, 3975 pd->af, pd->src, NULL, NULL) != 0) { 3976 REASON_SET(&ctx.reason, PFRES_SRCLIMIT); 3977 goto cleanup; 3978 } 3979 3980 if (r->max_states && (r->states_cur >= r->max_states)) { 3981 pf_status.lcounters[LCNT_STATES]++; 3982 REASON_SET(&ctx.reason, PFRES_MAXSTATES); 3983 goto cleanup; 3984 } 3985 3986 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, 3987 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns); 3988 3989 if (action != PF_PASS) 3990 goto cleanup; 3991 if (sks != skw) { 3992 struct pf_state_key *sk; 3993 3994 if (pd->dir == PF_IN) 3995 sk = sks; 3996 else 3997 sk = skw; 3998 rewrite += pf_translate(pd, 3999 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 4000 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 4001 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 4002 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 4003 virtual_type, ctx.icmp_dir); 4004 } 4005 4006 #ifdef INET6 4007 if (rewrite && skw->af != sks->af) 4008 action = PF_AFRT; 4009 #endif /* INET6 */ 4010 4011 } else { 4012 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 4013 SLIST_REMOVE_HEAD(&ctx.rules, entry); 4014 pool_put(&pf_rule_item_pl, ctx.ri); 4015 } 4016 } 4017 4018 /* copy back packet headers if needed */ 4019 if (rewrite && pd->hdrlen) { 4020 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 4021 } 4022 4023 if (r->rule_flag & PFRULE_ONCE) { 4024 u_int32_t rule_flag; 4025 4026 /* 4027 * Use atomic_cas() to determine a clear winner, which will 4028 * insert an expired rule to gcl. 4029 */ 4030 rule_flag = r->rule_flag; 4031 if (((rule_flag & PFRULE_EXPIRED) == 0) && 4032 atomic_cas_uint(&r->rule_flag, rule_flag, 4033 rule_flag | PFRULE_EXPIRED) == rule_flag) { 4034 r->exptime = gettime(); 4035 SLIST_INSERT_HEAD(&pf_rule_gcl, r, gcle); 4036 } 4037 } 4038 4039 #if NPFSYNC > 0 4040 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 4041 pd->dir == PF_OUT && pfsync_up()) { 4042 /* 4043 * We want the state created, but we dont 4044 * want to send this in case a partner 4045 * firewall has to know about it to allow 4046 * replies through it. 4047 */ 4048 if (pfsync_defer(*sm, pd->m, pdeferral)) 4049 return (PF_DEFER); 4050 } 4051 #endif /* NPFSYNC > 0 */ 4052 4053 return (action); 4054 4055 cleanup: 4056 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 4057 SLIST_REMOVE_HEAD(&ctx.rules, entry); 4058 pool_put(&pf_rule_item_pl, ctx.ri); 4059 } 4060 4061 return (action); 4062 } 4063 4064 static __inline int 4065 pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 4066 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 4067 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 4068 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) 4069 { 4070 struct pf_state *s = NULL; 4071 struct tcphdr *th = &pd->hdr.tcp; 4072 u_int16_t mss = tcp_mssdflt; 4073 u_short reason; 4074 u_int i; 4075 4076 s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 4077 if (s == NULL) { 4078 REASON_SET(&reason, PFRES_MEMORY); 4079 goto csfailed; 4080 } 4081 s->rule.ptr = r; 4082 s->anchor.ptr = a; 4083 s->natrule.ptr = nr; 4084 if (r->allow_opts) 4085 s->state_flags |= PFSTATE_ALLOWOPTS; 4086 if (r->rule_flag & PFRULE_STATESLOPPY) 4087 s->state_flags |= PFSTATE_SLOPPY; 4088 if (r->rule_flag & PFRULE_PFLOW) 4089 s->state_flags |= PFSTATE_PFLOW; 4090 #if NPFLOG > 0 4091 s->log = act->log & PF_LOG_ALL; 4092 #endif /* NPFLOG > 0 */ 4093 s->qid = act->qid; 4094 s->pqid = act->pqid; 4095 s->rtableid[pd->didx] = act->rtableid; 4096 s->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 4097 s->min_ttl = act->min_ttl; 4098 s->set_tos = act->set_tos; 4099 s->max_mss = act->max_mss; 4100 s->state_flags |= act->flags; 4101 #if NPFSYNC > 0 4102 s->sync_state = PFSYNC_S_NONE; 4103 #endif /* NPFSYNC > 0 */ 4104 s->set_prio[0] = act->set_prio[0]; 4105 s->set_prio[1] = act->set_prio[1]; 4106 s->delay = act->delay; 4107 SLIST_INIT(&s->src_nodes); 4108 /* 4109 * must initialize refcnt, before pf_state_insert() gets called. 4110 * pf_state_inserts() grabs reference for pfsync! 4111 */ 4112 refcnt_init(&s->refcnt); 4113 4114 switch (pd->proto) { 4115 case IPPROTO_TCP: 4116 s->src.seqlo = ntohl(th->th_seq); 4117 s->src.seqhi = s->src.seqlo + pd->p_len + 1; 4118 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 4119 r->keep_state == PF_STATE_MODULATE) { 4120 /* Generate sequence number modulator */ 4121 if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == 4122 0) 4123 s->src.seqdiff = 1; 4124 pf_patch_32(pd, 4125 &th->th_seq, htonl(s->src.seqlo + s->src.seqdiff)); 4126 *rewrite = 1; 4127 } else 4128 s->src.seqdiff = 0; 4129 if (th->th_flags & TH_SYN) { 4130 s->src.seqhi++; 4131 s->src.wscale = pf_get_wscale(pd); 4132 } 4133 s->src.max_win = MAX(ntohs(th->th_win), 1); 4134 if (s->src.wscale & PF_WSCALE_MASK) { 4135 /* Remove scale factor from initial window */ 4136 int win = s->src.max_win; 4137 win += 1 << (s->src.wscale & PF_WSCALE_MASK); 4138 s->src.max_win = (win - 1) >> 4139 (s->src.wscale & PF_WSCALE_MASK); 4140 } 4141 if (th->th_flags & TH_FIN) 4142 s->src.seqhi++; 4143 s->dst.seqhi = 1; 4144 s->dst.max_win = 1; 4145 pf_set_protostate(s, PF_PEER_SRC, TCPS_SYN_SENT); 4146 pf_set_protostate(s, PF_PEER_DST, TCPS_CLOSED); 4147 s->timeout = PFTM_TCP_FIRST_PACKET; 4148 pf_status.states_halfopen++; 4149 break; 4150 case IPPROTO_UDP: 4151 pf_set_protostate(s, PF_PEER_SRC, PFUDPS_SINGLE); 4152 pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 4153 s->timeout = PFTM_UDP_FIRST_PACKET; 4154 break; 4155 case IPPROTO_ICMP: 4156 #ifdef INET6 4157 case IPPROTO_ICMPV6: 4158 #endif /* INET6 */ 4159 s->timeout = PFTM_ICMP_FIRST_PACKET; 4160 break; 4161 default: 4162 pf_set_protostate(s, PF_PEER_SRC, PFOTHERS_SINGLE); 4163 pf_set_protostate(s, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 4164 s->timeout = PFTM_OTHER_FIRST_PACKET; 4165 } 4166 4167 s->creation = getuptime(); 4168 s->expire = getuptime(); 4169 4170 if (pd->proto == IPPROTO_TCP) { 4171 if (s->state_flags & PFSTATE_SCRUB_TCP && 4172 pf_normalize_tcp_init(pd, &s->src)) { 4173 REASON_SET(&reason, PFRES_MEMORY); 4174 goto csfailed; 4175 } 4176 if (s->state_flags & PFSTATE_SCRUB_TCP && s->src.scrub && 4177 pf_normalize_tcp_stateful(pd, &reason, s, &s->src, &s->dst, 4178 rewrite)) { 4179 /* This really shouldn't happen!!! */ 4180 DPFPRINTF(LOG_ERR, 4181 "%s: tcp normalize failed on first pkt", __func__); 4182 goto csfailed; 4183 } 4184 } 4185 s->direction = pd->dir; 4186 4187 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 4188 REASON_SET(&reason, PFRES_MEMORY); 4189 goto csfailed; 4190 } 4191 4192 if (pf_set_rt_ifp(s, pd->src, (*skw)->af, sns) != 0) { 4193 REASON_SET(&reason, PFRES_NOROUTE); 4194 goto csfailed; 4195 } 4196 4197 for (i = 0; i < PF_SN_MAX; i++) 4198 if (sns[i] != NULL) { 4199 struct pf_sn_item *sni; 4200 4201 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 4202 if (sni == NULL) { 4203 REASON_SET(&reason, PFRES_MEMORY); 4204 goto csfailed; 4205 } 4206 sni->sn = sns[i]; 4207 SLIST_INSERT_HEAD(&s->src_nodes, sni, next); 4208 sni->sn->states++; 4209 } 4210 4211 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, s)) { 4212 pf_detach_state(s); 4213 *sks = *skw = NULL; 4214 REASON_SET(&reason, PFRES_STATEINS); 4215 goto csfailed; 4216 } else 4217 *sm = s; 4218 4219 /* 4220 * Make state responsible for rules it binds here. 4221 */ 4222 memcpy(&s->match_rules, rules, sizeof(s->match_rules)); 4223 memset(rules, 0, sizeof(*rules)); 4224 STATE_INC_COUNTERS(s); 4225 4226 if (tag > 0) { 4227 pf_tag_ref(tag); 4228 s->tag = tag; 4229 } 4230 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 4231 TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { 4232 int rtid = pd->rdomain; 4233 if (act->rtableid >= 0) 4234 rtid = act->rtableid; 4235 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 4236 s->src.seqhi = arc4random(); 4237 /* Find mss option */ 4238 mss = pf_get_mss(pd); 4239 mss = pf_calc_mss(pd->src, pd->af, rtid, mss); 4240 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); 4241 s->src.mss = mss; 4242 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 4243 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 4244 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain); 4245 REASON_SET(&reason, PFRES_SYNPROXY); 4246 return (PF_SYNPROXY_DROP); 4247 } 4248 4249 return (PF_PASS); 4250 4251 csfailed: 4252 if (s) { 4253 pf_normalize_tcp_cleanup(s); /* safe even w/o init */ 4254 pf_src_tree_remove_state(s); 4255 pool_put(&pf_state_pl, s); 4256 } 4257 4258 for (i = 0; i < PF_SN_MAX; i++) 4259 if (sns[i] != NULL) 4260 pf_remove_src_node(sns[i]); 4261 4262 return (PF_DROP); 4263 } 4264 4265 int 4266 pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 4267 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 4268 int icmp_dir) 4269 { 4270 int rewrite = 0; 4271 int afto = pd->af != pd->naf; 4272 4273 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 4274 pd->destchg = 1; 4275 4276 switch (pd->proto) { 4277 case IPPROTO_TCP: /* FALLTHROUGH */ 4278 case IPPROTO_UDP: 4279 rewrite += pf_patch_16(pd, pd->sport, sport); 4280 rewrite += pf_patch_16(pd, pd->dport, dport); 4281 break; 4282 4283 case IPPROTO_ICMP: 4284 if (pd->af != AF_INET) 4285 return (0); 4286 4287 #ifdef INET6 4288 if (afto) { 4289 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp)) 4290 return (0); 4291 pd->proto = IPPROTO_ICMPV6; 4292 rewrite = 1; 4293 } 4294 #endif /* INET6 */ 4295 if (virtual_type == htons(ICMP_ECHO)) { 4296 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4297 rewrite += pf_patch_16(pd, 4298 &pd->hdr.icmp.icmp_id, icmpid); 4299 } 4300 break; 4301 4302 #ifdef INET6 4303 case IPPROTO_ICMPV6: 4304 if (pd->af != AF_INET6) 4305 return (0); 4306 4307 if (afto) { 4308 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6)) 4309 return (0); 4310 pd->proto = IPPROTO_ICMP; 4311 rewrite = 1; 4312 } 4313 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 4314 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 4315 rewrite += pf_patch_16(pd, 4316 &pd->hdr.icmp6.icmp6_id, icmpid); 4317 } 4318 break; 4319 #endif /* INET6 */ 4320 } 4321 4322 if (!afto) { 4323 rewrite += pf_translate_a(pd, pd->src, saddr); 4324 rewrite += pf_translate_a(pd, pd->dst, daddr); 4325 } 4326 4327 return (rewrite); 4328 } 4329 4330 int 4331 pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4332 int *copyback, int reverse) 4333 { 4334 struct tcphdr *th = &pd->hdr.tcp; 4335 struct pf_state_peer *src, *dst; 4336 u_int16_t win = ntohs(th->th_win); 4337 u_int32_t ack, end, data_end, seq, orig_seq; 4338 u_int8_t sws, dws, psrc, pdst; 4339 int ackskew; 4340 4341 if ((pd->dir == (*state)->direction && !reverse) || 4342 (pd->dir != (*state)->direction && reverse)) { 4343 src = &(*state)->src; 4344 dst = &(*state)->dst; 4345 psrc = PF_PEER_SRC; 4346 pdst = PF_PEER_DST; 4347 } else { 4348 src = &(*state)->dst; 4349 dst = &(*state)->src; 4350 psrc = PF_PEER_DST; 4351 pdst = PF_PEER_SRC; 4352 } 4353 4354 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 4355 sws = src->wscale & PF_WSCALE_MASK; 4356 dws = dst->wscale & PF_WSCALE_MASK; 4357 } else 4358 sws = dws = 0; 4359 4360 /* 4361 * Sequence tracking algorithm from Guido van Rooij's paper: 4362 * http://www.madison-gurkha.com/publications/tcp_filtering/ 4363 * tcp_filtering.ps 4364 */ 4365 4366 orig_seq = seq = ntohl(th->th_seq); 4367 if (src->seqlo == 0) { 4368 /* First packet from this end. Set its state */ 4369 4370 if (((*state)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 4371 src->scrub == NULL) { 4372 if (pf_normalize_tcp_init(pd, src)) { 4373 REASON_SET(reason, PFRES_MEMORY); 4374 return (PF_DROP); 4375 } 4376 } 4377 4378 /* Deferred generation of sequence number modulator */ 4379 if (dst->seqdiff && !src->seqdiff) { 4380 /* use random iss for the TCP server */ 4381 while ((src->seqdiff = arc4random() - seq) == 0) 4382 continue; 4383 ack = ntohl(th->th_ack) - dst->seqdiff; 4384 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4385 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4386 *copyback = 1; 4387 } else { 4388 ack = ntohl(th->th_ack); 4389 } 4390 4391 end = seq + pd->p_len; 4392 if (th->th_flags & TH_SYN) { 4393 end++; 4394 if (dst->wscale & PF_WSCALE_FLAG) { 4395 src->wscale = pf_get_wscale(pd); 4396 if (src->wscale & PF_WSCALE_FLAG) { 4397 /* Remove scale factor from initial 4398 * window */ 4399 sws = src->wscale & PF_WSCALE_MASK; 4400 win = ((u_int32_t)win + (1 << sws) - 1) 4401 >> sws; 4402 dws = dst->wscale & PF_WSCALE_MASK; 4403 } else { 4404 /* fixup other window */ 4405 dst->max_win = MIN(TCP_MAXWIN, 4406 (u_int32_t)dst->max_win << 4407 (dst->wscale & PF_WSCALE_MASK)); 4408 /* in case of a retrans SYN|ACK */ 4409 dst->wscale = 0; 4410 } 4411 } 4412 } 4413 data_end = end; 4414 if (th->th_flags & TH_FIN) 4415 end++; 4416 4417 src->seqlo = seq; 4418 if (src->state < TCPS_SYN_SENT) 4419 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4420 4421 /* 4422 * May need to slide the window (seqhi may have been set by 4423 * the crappy stack check or if we picked up the connection 4424 * after establishment) 4425 */ 4426 if (src->seqhi == 1 || 4427 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 4428 src->seqhi = end + MAX(1, dst->max_win << dws); 4429 if (win > src->max_win) 4430 src->max_win = win; 4431 4432 } else { 4433 ack = ntohl(th->th_ack) - dst->seqdiff; 4434 if (src->seqdiff) { 4435 /* Modulate sequence numbers */ 4436 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 4437 pf_patch_32(pd, &th->th_ack, htonl(ack)); 4438 *copyback = 1; 4439 } 4440 end = seq + pd->p_len; 4441 if (th->th_flags & TH_SYN) 4442 end++; 4443 data_end = end; 4444 if (th->th_flags & TH_FIN) 4445 end++; 4446 } 4447 4448 if ((th->th_flags & TH_ACK) == 0) { 4449 /* Let it pass through the ack skew check */ 4450 ack = dst->seqlo; 4451 } else if ((ack == 0 && 4452 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 4453 /* broken tcp stacks do not set ack */ 4454 (dst->state < TCPS_SYN_SENT)) { 4455 /* 4456 * Many stacks (ours included) will set the ACK number in an 4457 * FIN|ACK if the SYN times out -- no sequence to ACK. 4458 */ 4459 ack = dst->seqlo; 4460 } 4461 4462 if (seq == end) { 4463 /* Ease sequencing restrictions on no data packets */ 4464 seq = src->seqlo; 4465 data_end = end = seq; 4466 } 4467 4468 ackskew = dst->seqlo - ack; 4469 4470 4471 /* 4472 * Need to demodulate the sequence numbers in any TCP SACK options 4473 * (Selective ACK). We could optionally validate the SACK values 4474 * against the current ACK window, either forwards or backwards, but 4475 * I'm not confident that SACK has been implemented properly 4476 * everywhere. It wouldn't surprise me if several stacks accidently 4477 * SACK too far backwards of previously ACKed data. There really aren't 4478 * any security implications of bad SACKing unless the target stack 4479 * doesn't validate the option length correctly. Someone trying to 4480 * spoof into a TCP connection won't bother blindly sending SACK 4481 * options anyway. 4482 */ 4483 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 4484 if (pf_modulate_sack(pd, dst)) 4485 *copyback = 1; 4486 } 4487 4488 4489 #define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 4490 if (SEQ_GEQ(src->seqhi, data_end) && 4491 /* Last octet inside other's window space */ 4492 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 4493 /* Retrans: not more than one window back */ 4494 (ackskew >= -MAXACKWINDOW) && 4495 /* Acking not more than one reassembled fragment backwards */ 4496 (ackskew <= (MAXACKWINDOW << sws)) && 4497 /* Acking not more than one window forward */ 4498 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 4499 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo))) { 4500 /* Require an exact/+1 sequence match on resets when possible */ 4501 4502 if (dst->scrub || src->scrub) { 4503 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4504 dst, copyback)) 4505 return (PF_DROP); 4506 } 4507 4508 /* update max window */ 4509 if (src->max_win < win) 4510 src->max_win = win; 4511 /* synchronize sequencing */ 4512 if (SEQ_GT(end, src->seqlo)) 4513 src->seqlo = end; 4514 /* slide the window of what the other end can send */ 4515 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4516 dst->seqhi = ack + MAX((win << sws), 1); 4517 4518 /* update states */ 4519 if (th->th_flags & TH_SYN) 4520 if (src->state < TCPS_SYN_SENT) 4521 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4522 if (th->th_flags & TH_FIN) 4523 if (src->state < TCPS_CLOSING) 4524 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4525 if (th->th_flags & TH_ACK) { 4526 if (dst->state == TCPS_SYN_SENT) { 4527 pf_set_protostate(*state, pdst, 4528 TCPS_ESTABLISHED); 4529 if (src->state == TCPS_ESTABLISHED && 4530 !SLIST_EMPTY(&(*state)->src_nodes) && 4531 pf_src_connlimit(state)) { 4532 REASON_SET(reason, PFRES_SRCLIMIT); 4533 return (PF_DROP); 4534 } 4535 } else if (dst->state == TCPS_CLOSING) 4536 pf_set_protostate(*state, pdst, 4537 TCPS_FIN_WAIT_2); 4538 } 4539 if (th->th_flags & TH_RST) 4540 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4541 4542 /* update expire time */ 4543 (*state)->expire = getuptime(); 4544 if (src->state >= TCPS_FIN_WAIT_2 && 4545 dst->state >= TCPS_FIN_WAIT_2) 4546 (*state)->timeout = PFTM_TCP_CLOSED; 4547 else if (src->state >= TCPS_CLOSING && 4548 dst->state >= TCPS_CLOSING) 4549 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4550 else if (src->state < TCPS_ESTABLISHED || 4551 dst->state < TCPS_ESTABLISHED) 4552 (*state)->timeout = PFTM_TCP_OPENING; 4553 else if (src->state >= TCPS_CLOSING || 4554 dst->state >= TCPS_CLOSING) 4555 (*state)->timeout = PFTM_TCP_CLOSING; 4556 else 4557 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4558 4559 /* Fall through to PASS packet */ 4560 } else if ((dst->state < TCPS_SYN_SENT || 4561 dst->state >= TCPS_FIN_WAIT_2 || 4562 src->state >= TCPS_FIN_WAIT_2) && 4563 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 4564 /* Within a window forward of the originating packet */ 4565 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 4566 /* Within a window backward of the originating packet */ 4567 4568 /* 4569 * This currently handles three situations: 4570 * 1) Stupid stacks will shotgun SYNs before their peer 4571 * replies. 4572 * 2) When PF catches an already established stream (the 4573 * firewall rebooted, the state table was flushed, routes 4574 * changed...) 4575 * 3) Packets get funky immediately after the connection 4576 * closes (this should catch Solaris spurious ACK|FINs 4577 * that web servers like to spew after a close) 4578 * 4579 * This must be a little more careful than the above code 4580 * since packet floods will also be caught here. We don't 4581 * update the TTL here to mitigate the damage of a packet 4582 * flood and so the same code can handle awkward establishment 4583 * and a loosened connection close. 4584 * In the establishment case, a correct peer response will 4585 * validate the connection, go through the normal state code 4586 * and keep updating the state TTL. 4587 */ 4588 4589 if (pf_status.debug >= LOG_NOTICE) { 4590 log(LOG_NOTICE, "pf: loose state match: "); 4591 pf_print_state(*state); 4592 pf_print_flags(th->th_flags); 4593 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4594 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 4595 pd->p_len, ackskew, (*state)->packets[0], 4596 (*state)->packets[1], 4597 pd->dir == PF_IN ? "in" : "out", 4598 pd->dir == (*state)->direction ? "fwd" : "rev"); 4599 } 4600 4601 if (dst->scrub || src->scrub) { 4602 if (pf_normalize_tcp_stateful(pd, reason, *state, src, 4603 dst, copyback)) 4604 return (PF_DROP); 4605 } 4606 4607 /* update max window */ 4608 if (src->max_win < win) 4609 src->max_win = win; 4610 /* synchronize sequencing */ 4611 if (SEQ_GT(end, src->seqlo)) 4612 src->seqlo = end; 4613 /* slide the window of what the other end can send */ 4614 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 4615 dst->seqhi = ack + MAX((win << sws), 1); 4616 4617 /* 4618 * Cannot set dst->seqhi here since this could be a shotgunned 4619 * SYN and not an already established connection. 4620 */ 4621 if (th->th_flags & TH_FIN) 4622 if (src->state < TCPS_CLOSING) 4623 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4624 if (th->th_flags & TH_RST) 4625 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4626 4627 /* Fall through to PASS packet */ 4628 } else { 4629 if ((*state)->dst.state == TCPS_SYN_SENT && 4630 (*state)->src.state == TCPS_SYN_SENT) { 4631 /* Send RST for state mismatches during handshake */ 4632 if (!(th->th_flags & TH_RST)) 4633 pf_send_tcp((*state)->rule.ptr, pd->af, 4634 pd->dst, pd->src, th->th_dport, 4635 th->th_sport, ntohl(th->th_ack), 0, 4636 TH_RST, 0, 0, 4637 (*state)->rule.ptr->return_ttl, 1, 0, 4638 pd->rdomain); 4639 src->seqlo = 0; 4640 src->seqhi = 1; 4641 src->max_win = 1; 4642 } else if (pf_status.debug >= LOG_NOTICE) { 4643 log(LOG_NOTICE, "pf: BAD state: "); 4644 pf_print_state(*state); 4645 pf_print_flags(th->th_flags); 4646 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 4647 "pkts=%llu:%llu dir=%s,%s\n", 4648 seq, orig_seq, ack, pd->p_len, ackskew, 4649 (*state)->packets[0], (*state)->packets[1], 4650 pd->dir == PF_IN ? "in" : "out", 4651 pd->dir == (*state)->direction ? "fwd" : "rev"); 4652 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 4653 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 4654 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 4655 ' ': '2', 4656 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 4657 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 4658 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 4659 ' ' :'5', 4660 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 4661 } 4662 REASON_SET(reason, PFRES_BADSTATE); 4663 return (PF_DROP); 4664 } 4665 4666 return (PF_PASS); 4667 } 4668 4669 int 4670 pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **state, 4671 u_short *reason) 4672 { 4673 struct tcphdr *th = &pd->hdr.tcp; 4674 struct pf_state_peer *src, *dst; 4675 u_int8_t psrc, pdst; 4676 4677 if (pd->dir == (*state)->direction) { 4678 src = &(*state)->src; 4679 dst = &(*state)->dst; 4680 psrc = PF_PEER_SRC; 4681 pdst = PF_PEER_DST; 4682 } else { 4683 src = &(*state)->dst; 4684 dst = &(*state)->src; 4685 psrc = PF_PEER_DST; 4686 pdst = PF_PEER_SRC; 4687 } 4688 4689 if (th->th_flags & TH_SYN) 4690 if (src->state < TCPS_SYN_SENT) 4691 pf_set_protostate(*state, psrc, TCPS_SYN_SENT); 4692 if (th->th_flags & TH_FIN) 4693 if (src->state < TCPS_CLOSING) 4694 pf_set_protostate(*state, psrc, TCPS_CLOSING); 4695 if (th->th_flags & TH_ACK) { 4696 if (dst->state == TCPS_SYN_SENT) { 4697 pf_set_protostate(*state, pdst, TCPS_ESTABLISHED); 4698 if (src->state == TCPS_ESTABLISHED && 4699 !SLIST_EMPTY(&(*state)->src_nodes) && 4700 pf_src_connlimit(state)) { 4701 REASON_SET(reason, PFRES_SRCLIMIT); 4702 return (PF_DROP); 4703 } 4704 } else if (dst->state == TCPS_CLOSING) { 4705 pf_set_protostate(*state, pdst, TCPS_FIN_WAIT_2); 4706 } else if (src->state == TCPS_SYN_SENT && 4707 dst->state < TCPS_SYN_SENT) { 4708 /* 4709 * Handle a special sloppy case where we only see one 4710 * half of the connection. If there is a ACK after 4711 * the initial SYN without ever seeing a packet from 4712 * the destination, set the connection to established. 4713 */ 4714 pf_set_protostate(*state, PF_PEER_BOTH, 4715 TCPS_ESTABLISHED); 4716 if (!SLIST_EMPTY(&(*state)->src_nodes) && 4717 pf_src_connlimit(state)) { 4718 REASON_SET(reason, PFRES_SRCLIMIT); 4719 return (PF_DROP); 4720 } 4721 } else if (src->state == TCPS_CLOSING && 4722 dst->state == TCPS_ESTABLISHED && 4723 dst->seqlo == 0) { 4724 /* 4725 * Handle the closing of half connections where we 4726 * don't see the full bidirectional FIN/ACK+ACK 4727 * handshake. 4728 */ 4729 pf_set_protostate(*state, pdst, TCPS_CLOSING); 4730 } 4731 } 4732 if (th->th_flags & TH_RST) 4733 pf_set_protostate(*state, PF_PEER_BOTH, TCPS_TIME_WAIT); 4734 4735 /* update expire time */ 4736 (*state)->expire = getuptime(); 4737 if (src->state >= TCPS_FIN_WAIT_2 && 4738 dst->state >= TCPS_FIN_WAIT_2) 4739 (*state)->timeout = PFTM_TCP_CLOSED; 4740 else if (src->state >= TCPS_CLOSING && 4741 dst->state >= TCPS_CLOSING) 4742 (*state)->timeout = PFTM_TCP_FIN_WAIT; 4743 else if (src->state < TCPS_ESTABLISHED || 4744 dst->state < TCPS_ESTABLISHED) 4745 (*state)->timeout = PFTM_TCP_OPENING; 4746 else if (src->state >= TCPS_CLOSING || 4747 dst->state >= TCPS_CLOSING) 4748 (*state)->timeout = PFTM_TCP_CLOSING; 4749 else 4750 (*state)->timeout = PFTM_TCP_ESTABLISHED; 4751 4752 return (PF_PASS); 4753 } 4754 4755 static __inline int 4756 pf_synproxy(struct pf_pdesc *pd, struct pf_state **state, u_short *reason) 4757 { 4758 struct pf_state_key *sk = (*state)->key[pd->didx]; 4759 4760 if ((*state)->src.state == PF_TCPS_PROXY_SRC) { 4761 struct tcphdr *th = &pd->hdr.tcp; 4762 4763 if (pd->dir != (*state)->direction) { 4764 REASON_SET(reason, PFRES_SYNPROXY); 4765 return (PF_SYNPROXY_DROP); 4766 } 4767 if (th->th_flags & TH_SYN) { 4768 if (ntohl(th->th_seq) != (*state)->src.seqlo) { 4769 REASON_SET(reason, PFRES_SYNPROXY); 4770 return (PF_DROP); 4771 } 4772 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4773 pd->src, th->th_dport, th->th_sport, 4774 (*state)->src.seqhi, ntohl(th->th_seq) + 1, 4775 TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 4776 0, pd->rdomain); 4777 REASON_SET(reason, PFRES_SYNPROXY); 4778 return (PF_SYNPROXY_DROP); 4779 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 4780 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4781 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4782 REASON_SET(reason, PFRES_SYNPROXY); 4783 return (PF_DROP); 4784 } else if (!SLIST_EMPTY(&(*state)->src_nodes) && 4785 pf_src_connlimit(state)) { 4786 REASON_SET(reason, PFRES_SRCLIMIT); 4787 return (PF_DROP); 4788 } else 4789 pf_set_protostate(*state, PF_PEER_SRC, 4790 PF_TCPS_PROXY_DST); 4791 } 4792 if ((*state)->src.state == PF_TCPS_PROXY_DST) { 4793 struct tcphdr *th = &pd->hdr.tcp; 4794 4795 if (pd->dir == (*state)->direction) { 4796 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 4797 (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || 4798 (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { 4799 REASON_SET(reason, PFRES_SYNPROXY); 4800 return (PF_DROP); 4801 } 4802 (*state)->src.max_win = MAX(ntohs(th->th_win), 1); 4803 if ((*state)->dst.seqhi == 1) 4804 (*state)->dst.seqhi = arc4random(); 4805 pf_send_tcp((*state)->rule.ptr, pd->af, 4806 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4807 sk->port[pd->sidx], sk->port[pd->didx], 4808 (*state)->dst.seqhi, 0, TH_SYN, 0, 4809 (*state)->src.mss, 0, 0, (*state)->tag, 4810 sk->rdomain); 4811 REASON_SET(reason, PFRES_SYNPROXY); 4812 return (PF_SYNPROXY_DROP); 4813 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 4814 (TH_SYN|TH_ACK)) || 4815 (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { 4816 REASON_SET(reason, PFRES_SYNPROXY); 4817 return (PF_DROP); 4818 } else { 4819 (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); 4820 (*state)->dst.seqlo = ntohl(th->th_seq); 4821 pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, 4822 pd->src, th->th_dport, th->th_sport, 4823 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 4824 TH_ACK, (*state)->src.max_win, 0, 0, 0, 4825 (*state)->tag, pd->rdomain); 4826 pf_send_tcp((*state)->rule.ptr, pd->af, 4827 &sk->addr[pd->sidx], &sk->addr[pd->didx], 4828 sk->port[pd->sidx], sk->port[pd->didx], 4829 (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, 4830 TH_ACK, (*state)->dst.max_win, 0, 0, 1, 4831 0, sk->rdomain); 4832 (*state)->src.seqdiff = (*state)->dst.seqhi - 4833 (*state)->src.seqlo; 4834 (*state)->dst.seqdiff = (*state)->src.seqhi - 4835 (*state)->dst.seqlo; 4836 (*state)->src.seqhi = (*state)->src.seqlo + 4837 (*state)->dst.max_win; 4838 (*state)->dst.seqhi = (*state)->dst.seqlo + 4839 (*state)->src.max_win; 4840 (*state)->src.wscale = (*state)->dst.wscale = 0; 4841 pf_set_protostate(*state, PF_PEER_BOTH, 4842 TCPS_ESTABLISHED); 4843 REASON_SET(reason, PFRES_SYNPROXY); 4844 return (PF_SYNPROXY_DROP); 4845 } 4846 } 4847 return (PF_PASS); 4848 } 4849 4850 int 4851 pf_test_state(struct pf_pdesc *pd, struct pf_state **state, u_short *reason, 4852 int syncookie) 4853 { 4854 struct pf_state_key_cmp key; 4855 int copyback = 0; 4856 struct pf_state_peer *src, *dst; 4857 int action; 4858 struct inpcb *inp; 4859 u_int8_t psrc, pdst; 4860 4861 key.af = pd->af; 4862 key.proto = pd->virtual_proto; 4863 key.rdomain = pd->rdomain; 4864 pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af); 4865 pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af); 4866 key.port[pd->sidx] = pd->osport; 4867 key.port[pd->didx] = pd->odport; 4868 inp = pd->m->m_pkthdr.pf.inp; 4869 4870 action = pf_find_state(pd, &key, state); 4871 if (action != PF_MATCH) 4872 return (action); 4873 4874 action = PF_PASS; 4875 if (pd->dir == (*state)->direction) { 4876 src = &(*state)->src; 4877 dst = &(*state)->dst; 4878 psrc = PF_PEER_SRC; 4879 pdst = PF_PEER_DST; 4880 } else { 4881 src = &(*state)->dst; 4882 dst = &(*state)->src; 4883 psrc = PF_PEER_DST; 4884 pdst = PF_PEER_SRC; 4885 } 4886 4887 switch (pd->virtual_proto) { 4888 case IPPROTO_TCP: 4889 if (syncookie) { 4890 pf_set_protostate(*state, PF_PEER_SRC, 4891 PF_TCPS_PROXY_DST); 4892 (*state)->dst.seqhi = ntohl(pd->hdr.tcp.th_ack) - 1; 4893 } 4894 if ((action = pf_synproxy(pd, state, reason)) != PF_PASS) 4895 return (action); 4896 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) { 4897 4898 if (dst->state >= TCPS_FIN_WAIT_2 && 4899 src->state >= TCPS_FIN_WAIT_2) { 4900 if (pf_status.debug >= LOG_NOTICE) { 4901 log(LOG_NOTICE, "pf: state reuse "); 4902 pf_print_state(*state); 4903 pf_print_flags(pd->hdr.tcp.th_flags); 4904 addlog("\n"); 4905 } 4906 /* XXX make sure it's the same direction ?? */ 4907 (*state)->timeout = PFTM_PURGE; 4908 *state = NULL; 4909 pf_mbuf_link_inpcb(pd->m, inp); 4910 return (PF_DROP); 4911 } else if (dst->state >= TCPS_ESTABLISHED && 4912 src->state >= TCPS_ESTABLISHED) { 4913 /* 4914 * SYN matches existing state??? 4915 * Typically happens when sender boots up after 4916 * sudden panic. Certain protocols (NFSv3) are 4917 * always using same port numbers. Challenge 4918 * ACK enables all parties (firewall and peers) 4919 * to get in sync again. 4920 */ 4921 pf_send_challenge_ack(pd, *state, src, dst); 4922 return (PF_DROP); 4923 } 4924 } 4925 4926 if ((*state)->state_flags & PFSTATE_SLOPPY) { 4927 if (pf_tcp_track_sloppy(pd, state, reason) == PF_DROP) 4928 return (PF_DROP); 4929 } else { 4930 if (pf_tcp_track_full(pd, state, reason, ©back, 4931 PF_REVERSED_KEY((*state)->key, pd->af)) == PF_DROP) 4932 return (PF_DROP); 4933 } 4934 break; 4935 case IPPROTO_UDP: 4936 /* update states */ 4937 if (src->state < PFUDPS_SINGLE) 4938 pf_set_protostate(*state, psrc, PFUDPS_SINGLE); 4939 if (dst->state == PFUDPS_SINGLE) 4940 pf_set_protostate(*state, pdst, PFUDPS_MULTIPLE); 4941 4942 /* update expire time */ 4943 (*state)->expire = getuptime(); 4944 if (src->state == PFUDPS_MULTIPLE && 4945 dst->state == PFUDPS_MULTIPLE) 4946 (*state)->timeout = PFTM_UDP_MULTIPLE; 4947 else 4948 (*state)->timeout = PFTM_UDP_SINGLE; 4949 break; 4950 default: 4951 /* update states */ 4952 if (src->state < PFOTHERS_SINGLE) 4953 pf_set_protostate(*state, psrc, PFOTHERS_SINGLE); 4954 if (dst->state == PFOTHERS_SINGLE) 4955 pf_set_protostate(*state, pdst, PFOTHERS_MULTIPLE); 4956 4957 /* update expire time */ 4958 (*state)->expire = getuptime(); 4959 if (src->state == PFOTHERS_MULTIPLE && 4960 dst->state == PFOTHERS_MULTIPLE) 4961 (*state)->timeout = PFTM_OTHER_MULTIPLE; 4962 else 4963 (*state)->timeout = PFTM_OTHER_SINGLE; 4964 break; 4965 } 4966 4967 /* translate source/destination address, if necessary */ 4968 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 4969 struct pf_state_key *nk; 4970 int afto, sidx, didx; 4971 4972 if (PF_REVERSED_KEY((*state)->key, pd->af)) 4973 nk = (*state)->key[pd->sidx]; 4974 else 4975 nk = (*state)->key[pd->didx]; 4976 4977 afto = pd->af != nk->af; 4978 sidx = afto ? pd->didx : pd->sidx; 4979 didx = afto ? pd->sidx : pd->didx; 4980 4981 #ifdef INET6 4982 if (afto) { 4983 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 4984 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 4985 pd->naf = nk->af; 4986 action = PF_AFRT; 4987 } 4988 #endif /* INET6 */ 4989 4990 if (!afto) 4991 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 4992 4993 if (pd->sport != NULL) 4994 pf_patch_16(pd, pd->sport, nk->port[sidx]); 4995 4996 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 4997 pd->rdomain != nk->rdomain) 4998 pd->destchg = 1; 4999 5000 if (!afto) 5001 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5002 5003 if (pd->dport != NULL) 5004 pf_patch_16(pd, pd->dport, nk->port[didx]); 5005 5006 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5007 copyback = 1; 5008 } 5009 5010 if (copyback && pd->hdrlen > 0) { 5011 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5012 } 5013 5014 return (action); 5015 } 5016 5017 int 5018 pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 5019 struct pf_state **state, u_int16_t icmpid, u_int16_t type, 5020 int icmp_dir, int *iidx, int multi, int inner) 5021 { 5022 int direction, action; 5023 5024 key->af = pd->af; 5025 key->proto = pd->proto; 5026 key->rdomain = pd->rdomain; 5027 if (icmp_dir == PF_IN) { 5028 *iidx = pd->sidx; 5029 key->port[pd->sidx] = icmpid; 5030 key->port[pd->didx] = type; 5031 } else { 5032 *iidx = pd->didx; 5033 key->port[pd->sidx] = type; 5034 key->port[pd->didx] = icmpid; 5035 } 5036 5037 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 5038 pd->dst, pd->af, multi)) 5039 return (PF_DROP); 5040 5041 action = pf_find_state(pd, key, state); 5042 if (action != PF_MATCH) 5043 return (action); 5044 5045 if ((*state)->state_flags & PFSTATE_SLOPPY) 5046 return (-1); 5047 5048 /* Is this ICMP message flowing in right direction? */ 5049 if ((*state)->key[PF_SK_WIRE]->af != (*state)->key[PF_SK_STACK]->af) 5050 direction = (pd->af == (*state)->key[PF_SK_WIRE]->af) ? 5051 PF_IN : PF_OUT; 5052 else 5053 direction = (*state)->direction; 5054 if ((((!inner && direction == pd->dir) || 5055 (inner && direction != pd->dir)) ? 5056 PF_IN : PF_OUT) != icmp_dir) { 5057 if (pf_status.debug >= LOG_NOTICE) { 5058 log(LOG_NOTICE, 5059 "pf: icmp type %d in wrong direction (%d): ", 5060 ntohs(type), icmp_dir); 5061 pf_print_state(*state); 5062 addlog("\n"); 5063 } 5064 return (PF_DROP); 5065 } 5066 return (-1); 5067 } 5068 5069 int 5070 pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **state, 5071 u_short *reason) 5072 { 5073 u_int16_t virtual_id, virtual_type; 5074 u_int8_t icmptype, icmpcode; 5075 int icmp_dir, iidx, ret, copyback = 0; 5076 5077 struct pf_state_key_cmp key; 5078 5079 switch (pd->proto) { 5080 case IPPROTO_ICMP: 5081 icmptype = pd->hdr.icmp.icmp_type; 5082 icmpcode = pd->hdr.icmp.icmp_code; 5083 break; 5084 #ifdef INET6 5085 case IPPROTO_ICMPV6: 5086 icmptype = pd->hdr.icmp6.icmp6_type; 5087 icmpcode = pd->hdr.icmp6.icmp6_code; 5088 break; 5089 #endif /* INET6 */ 5090 default: 5091 panic("unhandled proto %d", pd->proto); 5092 } 5093 5094 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 5095 &virtual_type) == 0) { 5096 /* 5097 * ICMP query/reply message not related to a TCP/UDP packet. 5098 * Search for an ICMP state. 5099 */ 5100 ret = pf_icmp_state_lookup(pd, &key, state, 5101 virtual_id, virtual_type, icmp_dir, &iidx, 5102 0, 0); 5103 /* IPv6? try matching a multicast address */ 5104 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 5105 ret = pf_icmp_state_lookup(pd, &key, state, virtual_id, 5106 virtual_type, icmp_dir, &iidx, 1, 0); 5107 if (ret >= 0) 5108 return (ret); 5109 5110 (*state)->expire = getuptime(); 5111 (*state)->timeout = PFTM_ICMP_ERROR_REPLY; 5112 5113 /* translate source/destination address, if necessary */ 5114 if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { 5115 struct pf_state_key *nk; 5116 int afto, sidx, didx; 5117 5118 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5119 nk = (*state)->key[pd->sidx]; 5120 else 5121 nk = (*state)->key[pd->didx]; 5122 5123 afto = pd->af != nk->af; 5124 sidx = afto ? pd->didx : pd->sidx; 5125 didx = afto ? pd->sidx : pd->didx; 5126 iidx = afto ? !iidx : iidx; 5127 #ifdef INET6 5128 if (afto) { 5129 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 5130 nk->af); 5131 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 5132 nk->af); 5133 pd->naf = nk->af; 5134 } 5135 #endif /* INET6 */ 5136 if (!afto) { 5137 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 5138 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 5139 } 5140 5141 if (pd->rdomain != nk->rdomain) 5142 pd->destchg = 1; 5143 if (!afto && PF_ANEQ(pd->dst, 5144 &nk->addr[didx], pd->af)) 5145 pd->destchg = 1; 5146 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5147 5148 switch (pd->af) { 5149 case AF_INET: 5150 #ifdef INET6 5151 if (afto) { 5152 if (pf_translate_icmp_af(pd, AF_INET6, 5153 &pd->hdr.icmp)) 5154 return (PF_DROP); 5155 pd->proto = IPPROTO_ICMPV6; 5156 } 5157 #endif /* INET6 */ 5158 pf_patch_16(pd, 5159 &pd->hdr.icmp.icmp_id, nk->port[iidx]); 5160 5161 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5162 &pd->hdr.icmp, M_NOWAIT); 5163 copyback = 1; 5164 break; 5165 #ifdef INET6 5166 case AF_INET6: 5167 if (afto) { 5168 if (pf_translate_icmp_af(pd, AF_INET, 5169 &pd->hdr.icmp6)) 5170 return (PF_DROP); 5171 pd->proto = IPPROTO_ICMP; 5172 } 5173 5174 pf_patch_16(pd, 5175 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]); 5176 5177 m_copyback(pd->m, pd->off, 5178 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5179 M_NOWAIT); 5180 copyback = 1; 5181 break; 5182 #endif /* INET6 */ 5183 } 5184 #ifdef INET6 5185 if (afto) 5186 return (PF_AFRT); 5187 #endif /* INET6 */ 5188 } 5189 } else { 5190 /* 5191 * ICMP error message in response to a TCP/UDP packet. 5192 * Extract the inner TCP/UDP header and search for that state. 5193 */ 5194 struct pf_pdesc pd2; 5195 struct ip h2; 5196 #ifdef INET6 5197 struct ip6_hdr h2_6; 5198 #endif /* INET6 */ 5199 int ipoff2; 5200 5201 /* Initialize pd2 fields valid for both packets with pd. */ 5202 memset(&pd2, 0, sizeof(pd2)); 5203 pd2.af = pd->af; 5204 pd2.dir = pd->dir; 5205 pd2.kif = pd->kif; 5206 pd2.m = pd->m; 5207 pd2.rdomain = pd->rdomain; 5208 /* Payload packet is from the opposite direction. */ 5209 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 5210 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 5211 switch (pd->af) { 5212 case AF_INET: 5213 /* offset of h2 in mbuf chain */ 5214 ipoff2 = pd->off + ICMP_MINLEN; 5215 5216 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 5217 NULL, reason, pd2.af)) { 5218 DPFPRINTF(LOG_NOTICE, 5219 "ICMP error message too short (ip)"); 5220 return (PF_DROP); 5221 } 5222 /* 5223 * ICMP error messages don't refer to non-first 5224 * fragments 5225 */ 5226 if (h2.ip_off & htons(IP_OFFMASK)) { 5227 REASON_SET(reason, PFRES_FRAG); 5228 return (PF_DROP); 5229 } 5230 5231 /* offset of protocol header that follows h2 */ 5232 pd2.off = ipoff2; 5233 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 5234 return (PF_DROP); 5235 5236 pd2.tot_len = ntohs(h2.ip_len); 5237 pd2.src = (struct pf_addr *)&h2.ip_src; 5238 pd2.dst = (struct pf_addr *)&h2.ip_dst; 5239 break; 5240 #ifdef INET6 5241 case AF_INET6: 5242 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 5243 5244 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 5245 NULL, reason, pd2.af)) { 5246 DPFPRINTF(LOG_NOTICE, 5247 "ICMP error message too short (ip6)"); 5248 return (PF_DROP); 5249 } 5250 5251 pd2.off = ipoff2; 5252 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 5253 return (PF_DROP); 5254 5255 pd2.tot_len = ntohs(h2_6.ip6_plen) + 5256 sizeof(struct ip6_hdr); 5257 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 5258 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 5259 break; 5260 #endif /* INET6 */ 5261 default: 5262 unhandled_af(pd->af); 5263 } 5264 5265 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { 5266 if (pf_status.debug >= LOG_NOTICE) { 5267 log(LOG_NOTICE, 5268 "pf: BAD ICMP %d:%d outer dst: ", 5269 icmptype, icmpcode); 5270 pf_print_host(pd->src, 0, pd->af); 5271 addlog(" -> "); 5272 pf_print_host(pd->dst, 0, pd->af); 5273 addlog(" inner src: "); 5274 pf_print_host(pd2.src, 0, pd2.af); 5275 addlog(" -> "); 5276 pf_print_host(pd2.dst, 0, pd2.af); 5277 addlog("\n"); 5278 } 5279 REASON_SET(reason, PFRES_BADSTATE); 5280 return (PF_DROP); 5281 } 5282 5283 switch (pd2.proto) { 5284 case IPPROTO_TCP: { 5285 struct tcphdr *th = &pd2.hdr.tcp; 5286 u_int32_t seq; 5287 struct pf_state_peer *src, *dst; 5288 u_int8_t dws; 5289 int action; 5290 5291 /* 5292 * Only the first 8 bytes of the TCP header can be 5293 * expected. Don't access any TCP header fields after 5294 * th_seq, an ackskew test is not possible. 5295 */ 5296 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, NULL, reason, 5297 pd2.af)) { 5298 DPFPRINTF(LOG_NOTICE, 5299 "ICMP error message too short (tcp)"); 5300 return (PF_DROP); 5301 } 5302 5303 key.af = pd2.af; 5304 key.proto = IPPROTO_TCP; 5305 key.rdomain = pd2.rdomain; 5306 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5307 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5308 key.port[pd2.sidx] = th->th_sport; 5309 key.port[pd2.didx] = th->th_dport; 5310 5311 action = pf_find_state(&pd2, &key, state); 5312 if (action != PF_MATCH) 5313 return (action); 5314 5315 if (pd2.dir == (*state)->direction) { 5316 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5317 src = &(*state)->src; 5318 dst = &(*state)->dst; 5319 } else { 5320 src = &(*state)->dst; 5321 dst = &(*state)->src; 5322 } 5323 } else { 5324 if (PF_REVERSED_KEY((*state)->key, pd->af)) { 5325 src = &(*state)->dst; 5326 dst = &(*state)->src; 5327 } else { 5328 src = &(*state)->src; 5329 dst = &(*state)->dst; 5330 } 5331 } 5332 5333 if (src->wscale && dst->wscale) 5334 dws = dst->wscale & PF_WSCALE_MASK; 5335 else 5336 dws = 0; 5337 5338 /* Demodulate sequence number */ 5339 seq = ntohl(th->th_seq) - src->seqdiff; 5340 if (src->seqdiff) { 5341 pf_patch_32(pd, &th->th_seq, htonl(seq)); 5342 copyback = 1; 5343 } 5344 5345 if (!((*state)->state_flags & PFSTATE_SLOPPY) && 5346 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 5347 src->seqlo - (dst->max_win << dws)))) { 5348 if (pf_status.debug >= LOG_NOTICE) { 5349 log(LOG_NOTICE, 5350 "pf: BAD ICMP %d:%d ", 5351 icmptype, icmpcode); 5352 pf_print_host(pd->src, 0, pd->af); 5353 addlog(" -> "); 5354 pf_print_host(pd->dst, 0, pd->af); 5355 addlog(" state: "); 5356 pf_print_state(*state); 5357 addlog(" seq=%u\n", seq); 5358 } 5359 REASON_SET(reason, PFRES_BADSTATE); 5360 return (PF_DROP); 5361 } else { 5362 if (pf_status.debug >= LOG_DEBUG) { 5363 log(LOG_DEBUG, 5364 "pf: OK ICMP %d:%d ", 5365 icmptype, icmpcode); 5366 pf_print_host(pd->src, 0, pd->af); 5367 addlog(" -> "); 5368 pf_print_host(pd->dst, 0, pd->af); 5369 addlog(" state: "); 5370 pf_print_state(*state); 5371 addlog(" seq=%u\n", seq); 5372 } 5373 } 5374 5375 /* translate source/destination address, if necessary */ 5376 if ((*state)->key[PF_SK_WIRE] != 5377 (*state)->key[PF_SK_STACK]) { 5378 struct pf_state_key *nk; 5379 int afto, sidx, didx; 5380 5381 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5382 nk = (*state)->key[pd->sidx]; 5383 else 5384 nk = (*state)->key[pd->didx]; 5385 5386 afto = pd->af != nk->af; 5387 sidx = afto ? pd2.didx : pd2.sidx; 5388 didx = afto ? pd2.sidx : pd2.didx; 5389 5390 #ifdef INET6 5391 if (afto) { 5392 if (pf_translate_icmp_af(pd, nk->af, 5393 &pd->hdr.icmp)) 5394 return (PF_DROP); 5395 m_copyback(pd->m, pd->off, 5396 sizeof(struct icmp6_hdr), 5397 &pd->hdr.icmp6, M_NOWAIT); 5398 if (pf_change_icmp_af(pd->m, ipoff2, 5399 pd, &pd2, &nk->addr[sidx], 5400 &nk->addr[didx], pd->af, nk->af)) 5401 return (PF_DROP); 5402 if (nk->af == AF_INET) 5403 pd->proto = IPPROTO_ICMP; 5404 else 5405 pd->proto = IPPROTO_ICMPV6; 5406 pd->m->m_pkthdr.ph_rtableid = 5407 nk->rdomain; 5408 pd->destchg = 1; 5409 pf_addrcpy(&pd->nsaddr, 5410 &nk->addr[pd2.sidx], nk->af); 5411 pf_addrcpy(&pd->ndaddr, 5412 &nk->addr[pd2.didx], nk->af); 5413 pd->naf = nk->af; 5414 5415 pf_patch_16(pd, 5416 &th->th_sport, nk->port[sidx]); 5417 pf_patch_16(pd, 5418 &th->th_dport, nk->port[didx]); 5419 5420 m_copyback(pd2.m, pd2.off, 8, th, 5421 M_NOWAIT); 5422 return (PF_AFRT); 5423 } 5424 #endif /* INET6 */ 5425 if (PF_ANEQ(pd2.src, 5426 &nk->addr[pd2.sidx], pd2.af) || 5427 nk->port[pd2.sidx] != th->th_sport) 5428 pf_translate_icmp(pd, pd2.src, 5429 &th->th_sport, pd->dst, 5430 &nk->addr[pd2.sidx], 5431 nk->port[pd2.sidx]); 5432 5433 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5434 pd2.af) || pd2.rdomain != nk->rdomain) 5435 pd->destchg = 1; 5436 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5437 5438 if (PF_ANEQ(pd2.dst, 5439 &nk->addr[pd2.didx], pd2.af) || 5440 nk->port[pd2.didx] != th->th_dport) 5441 pf_translate_icmp(pd, pd2.dst, 5442 &th->th_dport, pd->src, 5443 &nk->addr[pd2.didx], 5444 nk->port[pd2.didx]); 5445 copyback = 1; 5446 } 5447 5448 if (copyback) { 5449 switch (pd2.af) { 5450 case AF_INET: 5451 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5452 &pd->hdr.icmp, M_NOWAIT); 5453 m_copyback(pd2.m, ipoff2, sizeof(h2), 5454 &h2, M_NOWAIT); 5455 break; 5456 #ifdef INET6 5457 case AF_INET6: 5458 m_copyback(pd->m, pd->off, 5459 sizeof(struct icmp6_hdr), 5460 &pd->hdr.icmp6, M_NOWAIT); 5461 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5462 &h2_6, M_NOWAIT); 5463 break; 5464 #endif /* INET6 */ 5465 } 5466 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT); 5467 } 5468 break; 5469 } 5470 case IPPROTO_UDP: { 5471 struct udphdr *uh = &pd2.hdr.udp; 5472 int action; 5473 5474 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh), 5475 NULL, reason, pd2.af)) { 5476 DPFPRINTF(LOG_NOTICE, 5477 "ICMP error message too short (udp)"); 5478 return (PF_DROP); 5479 } 5480 5481 key.af = pd2.af; 5482 key.proto = IPPROTO_UDP; 5483 key.rdomain = pd2.rdomain; 5484 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5485 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5486 key.port[pd2.sidx] = uh->uh_sport; 5487 key.port[pd2.didx] = uh->uh_dport; 5488 5489 action = pf_find_state(&pd2, &key, state); 5490 if (action != PF_MATCH) 5491 return (action); 5492 5493 /* translate source/destination address, if necessary */ 5494 if ((*state)->key[PF_SK_WIRE] != 5495 (*state)->key[PF_SK_STACK]) { 5496 struct pf_state_key *nk; 5497 int afto, sidx, didx; 5498 5499 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5500 nk = (*state)->key[pd->sidx]; 5501 else 5502 nk = (*state)->key[pd->didx]; 5503 5504 afto = pd->af != nk->af; 5505 sidx = afto ? pd2.didx : pd2.sidx; 5506 didx = afto ? pd2.sidx : pd2.didx; 5507 5508 #ifdef INET6 5509 if (afto) { 5510 if (pf_translate_icmp_af(pd, nk->af, 5511 &pd->hdr.icmp)) 5512 return (PF_DROP); 5513 m_copyback(pd->m, pd->off, 5514 sizeof(struct icmp6_hdr), 5515 &pd->hdr.icmp6, M_NOWAIT); 5516 if (pf_change_icmp_af(pd->m, ipoff2, 5517 pd, &pd2, &nk->addr[sidx], 5518 &nk->addr[didx], pd->af, nk->af)) 5519 return (PF_DROP); 5520 if (nk->af == AF_INET) 5521 pd->proto = IPPROTO_ICMP; 5522 else 5523 pd->proto = IPPROTO_ICMPV6; 5524 pd->m->m_pkthdr.ph_rtableid = 5525 nk->rdomain; 5526 pd->destchg = 1; 5527 pf_addrcpy(&pd->nsaddr, 5528 &nk->addr[pd2.sidx], nk->af); 5529 pf_addrcpy(&pd->ndaddr, 5530 &nk->addr[pd2.didx], nk->af); 5531 pd->naf = nk->af; 5532 5533 pf_patch_16(pd, 5534 &uh->uh_sport, nk->port[sidx]); 5535 pf_patch_16(pd, 5536 &uh->uh_dport, nk->port[didx]); 5537 5538 m_copyback(pd2.m, pd2.off, sizeof(*uh), 5539 uh, M_NOWAIT); 5540 return (PF_AFRT); 5541 } 5542 #endif /* INET6 */ 5543 5544 if (PF_ANEQ(pd2.src, 5545 &nk->addr[pd2.sidx], pd2.af) || 5546 nk->port[pd2.sidx] != uh->uh_sport) 5547 pf_translate_icmp(pd, pd2.src, 5548 &uh->uh_sport, pd->dst, 5549 &nk->addr[pd2.sidx], 5550 nk->port[pd2.sidx]); 5551 5552 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5553 pd2.af) || pd2.rdomain != nk->rdomain) 5554 pd->destchg = 1; 5555 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5556 5557 if (PF_ANEQ(pd2.dst, 5558 &nk->addr[pd2.didx], pd2.af) || 5559 nk->port[pd2.didx] != uh->uh_dport) 5560 pf_translate_icmp(pd, pd2.dst, 5561 &uh->uh_dport, pd->src, 5562 &nk->addr[pd2.didx], 5563 nk->port[pd2.didx]); 5564 5565 switch (pd2.af) { 5566 case AF_INET: 5567 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5568 &pd->hdr.icmp, M_NOWAIT); 5569 m_copyback(pd2.m, ipoff2, sizeof(h2), 5570 &h2, M_NOWAIT); 5571 break; 5572 #ifdef INET6 5573 case AF_INET6: 5574 m_copyback(pd->m, pd->off, 5575 sizeof(struct icmp6_hdr), 5576 &pd->hdr.icmp6, M_NOWAIT); 5577 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5578 &h2_6, M_NOWAIT); 5579 break; 5580 #endif /* INET6 */ 5581 } 5582 /* Avoid recomputing quoted UDP checksum. 5583 * note: udp6 0 csum invalid per rfc2460 p27. 5584 * but presumed nothing cares in this context */ 5585 pf_patch_16(pd, &uh->uh_sum, 0); 5586 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh, 5587 M_NOWAIT); 5588 copyback = 1; 5589 } 5590 break; 5591 } 5592 case IPPROTO_ICMP: { 5593 struct icmp *iih = &pd2.hdr.icmp; 5594 5595 if (pd2.af != AF_INET) { 5596 REASON_SET(reason, PFRES_NORM); 5597 return (PF_DROP); 5598 } 5599 5600 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN, 5601 NULL, reason, pd2.af)) { 5602 DPFPRINTF(LOG_NOTICE, 5603 "ICMP error message too short (icmp)"); 5604 return (PF_DROP); 5605 } 5606 5607 pf_icmp_mapping(&pd2, iih->icmp_type, 5608 &icmp_dir, &virtual_id, &virtual_type); 5609 5610 ret = pf_icmp_state_lookup(&pd2, &key, state, 5611 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5612 if (ret >= 0) 5613 return (ret); 5614 5615 /* translate source/destination address, if necessary */ 5616 if ((*state)->key[PF_SK_WIRE] != 5617 (*state)->key[PF_SK_STACK]) { 5618 struct pf_state_key *nk; 5619 int afto, sidx, didx; 5620 5621 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5622 nk = (*state)->key[pd->sidx]; 5623 else 5624 nk = (*state)->key[pd->didx]; 5625 5626 afto = pd->af != nk->af; 5627 sidx = afto ? pd2.didx : pd2.sidx; 5628 didx = afto ? pd2.sidx : pd2.didx; 5629 iidx = afto ? !iidx : iidx; 5630 5631 #ifdef INET6 5632 if (afto) { 5633 if (nk->af != AF_INET6) 5634 return (PF_DROP); 5635 if (pf_translate_icmp_af(pd, nk->af, 5636 &pd->hdr.icmp)) 5637 return (PF_DROP); 5638 m_copyback(pd->m, pd->off, 5639 sizeof(struct icmp6_hdr), 5640 &pd->hdr.icmp6, M_NOWAIT); 5641 if (pf_change_icmp_af(pd->m, ipoff2, 5642 pd, &pd2, &nk->addr[sidx], 5643 &nk->addr[didx], pd->af, nk->af)) 5644 return (PF_DROP); 5645 pd->proto = IPPROTO_ICMPV6; 5646 if (pf_translate_icmp_af(pd, 5647 nk->af, iih)) 5648 return (PF_DROP); 5649 if (virtual_type == htons(ICMP_ECHO)) 5650 pf_patch_16(pd, &iih->icmp_id, 5651 nk->port[iidx]); 5652 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 5653 iih, M_NOWAIT); 5654 pd->m->m_pkthdr.ph_rtableid = 5655 nk->rdomain; 5656 pd->destchg = 1; 5657 pf_addrcpy(&pd->nsaddr, 5658 &nk->addr[pd2.sidx], nk->af); 5659 pf_addrcpy(&pd->ndaddr, 5660 &nk->addr[pd2.didx], nk->af); 5661 pd->naf = nk->af; 5662 return (PF_AFRT); 5663 } 5664 #endif /* INET6 */ 5665 5666 if (PF_ANEQ(pd2.src, 5667 &nk->addr[pd2.sidx], pd2.af) || 5668 (virtual_type == htons(ICMP_ECHO) && 5669 nk->port[iidx] != iih->icmp_id)) 5670 pf_translate_icmp(pd, pd2.src, 5671 (virtual_type == htons(ICMP_ECHO)) ? 5672 &iih->icmp_id : NULL, 5673 pd->dst, &nk->addr[pd2.sidx], 5674 (virtual_type == htons(ICMP_ECHO)) ? 5675 nk->port[iidx] : 0); 5676 5677 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5678 pd2.af) || pd2.rdomain != nk->rdomain) 5679 pd->destchg = 1; 5680 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5681 5682 if (PF_ANEQ(pd2.dst, 5683 &nk->addr[pd2.didx], pd2.af)) 5684 pf_translate_icmp(pd, pd2.dst, NULL, 5685 pd->src, &nk->addr[pd2.didx], 0); 5686 5687 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5688 &pd->hdr.icmp, M_NOWAIT); 5689 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 5690 M_NOWAIT); 5691 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih, 5692 M_NOWAIT); 5693 copyback = 1; 5694 } 5695 break; 5696 } 5697 #ifdef INET6 5698 case IPPROTO_ICMPV6: { 5699 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 5700 5701 if (pd2.af != AF_INET6) { 5702 REASON_SET(reason, PFRES_NORM); 5703 return (PF_DROP); 5704 } 5705 5706 if (!pf_pull_hdr(pd2.m, pd2.off, iih, 5707 sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) { 5708 DPFPRINTF(LOG_NOTICE, 5709 "ICMP error message too short (icmp6)"); 5710 return (PF_DROP); 5711 } 5712 5713 pf_icmp_mapping(&pd2, iih->icmp6_type, 5714 &icmp_dir, &virtual_id, &virtual_type); 5715 ret = pf_icmp_state_lookup(&pd2, &key, state, 5716 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 5717 /* IPv6? try matching a multicast address */ 5718 if (ret == PF_DROP && pd2.af == AF_INET6 && 5719 icmp_dir == PF_OUT) 5720 ret = pf_icmp_state_lookup(&pd2, &key, state, 5721 virtual_id, virtual_type, icmp_dir, &iidx, 5722 1, 1); 5723 if (ret >= 0) 5724 return (ret); 5725 5726 /* translate source/destination address, if necessary */ 5727 if ((*state)->key[PF_SK_WIRE] != 5728 (*state)->key[PF_SK_STACK]) { 5729 struct pf_state_key *nk; 5730 int afto, sidx, didx; 5731 5732 if (PF_REVERSED_KEY((*state)->key, pd->af)) 5733 nk = (*state)->key[pd->sidx]; 5734 else 5735 nk = (*state)->key[pd->didx]; 5736 5737 afto = pd->af != nk->af; 5738 sidx = afto ? pd2.didx : pd2.sidx; 5739 didx = afto ? pd2.sidx : pd2.didx; 5740 iidx = afto ? !iidx : iidx; 5741 5742 if (afto) { 5743 if (nk->af != AF_INET) 5744 return (PF_DROP); 5745 if (pf_translate_icmp_af(pd, nk->af, 5746 &pd->hdr.icmp)) 5747 return (PF_DROP); 5748 m_copyback(pd->m, pd->off, 5749 sizeof(struct icmp6_hdr), 5750 &pd->hdr.icmp6, M_NOWAIT); 5751 if (pf_change_icmp_af(pd->m, ipoff2, 5752 pd, &pd2, &nk->addr[sidx], 5753 &nk->addr[didx], pd->af, nk->af)) 5754 return (PF_DROP); 5755 pd->proto = IPPROTO_ICMP; 5756 if (pf_translate_icmp_af(pd, 5757 nk->af, iih)) 5758 return (PF_DROP); 5759 if (virtual_type == 5760 htons(ICMP6_ECHO_REQUEST)) 5761 pf_patch_16(pd, &iih->icmp6_id, 5762 nk->port[iidx]); 5763 m_copyback(pd2.m, pd2.off, 5764 sizeof(struct icmp6_hdr), iih, 5765 M_NOWAIT); 5766 pd->m->m_pkthdr.ph_rtableid = 5767 nk->rdomain; 5768 pd->destchg = 1; 5769 pf_addrcpy(&pd->nsaddr, 5770 &nk->addr[pd2.sidx], nk->af); 5771 pf_addrcpy(&pd->ndaddr, 5772 &nk->addr[pd2.didx], nk->af); 5773 pd->naf = nk->af; 5774 return (PF_AFRT); 5775 } 5776 5777 if (PF_ANEQ(pd2.src, 5778 &nk->addr[pd2.sidx], pd2.af) || 5779 ((virtual_type == 5780 htons(ICMP6_ECHO_REQUEST)) && 5781 nk->port[pd2.sidx] != iih->icmp6_id)) 5782 pf_translate_icmp(pd, pd2.src, 5783 (virtual_type == 5784 htons(ICMP6_ECHO_REQUEST)) 5785 ? &iih->icmp6_id : NULL, 5786 pd->dst, &nk->addr[pd2.sidx], 5787 (virtual_type == 5788 htons(ICMP6_ECHO_REQUEST)) 5789 ? nk->port[iidx] : 0); 5790 5791 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5792 pd2.af) || pd2.rdomain != nk->rdomain) 5793 pd->destchg = 1; 5794 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5795 5796 if (PF_ANEQ(pd2.dst, 5797 &nk->addr[pd2.didx], pd2.af)) 5798 pf_translate_icmp(pd, pd2.dst, NULL, 5799 pd->src, &nk->addr[pd2.didx], 0); 5800 5801 m_copyback(pd->m, pd->off, 5802 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 5803 M_NOWAIT); 5804 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 5805 M_NOWAIT); 5806 m_copyback(pd2.m, pd2.off, 5807 sizeof(struct icmp6_hdr), iih, M_NOWAIT); 5808 copyback = 1; 5809 } 5810 break; 5811 } 5812 #endif /* INET6 */ 5813 default: { 5814 int action; 5815 5816 key.af = pd2.af; 5817 key.proto = pd2.proto; 5818 key.rdomain = pd2.rdomain; 5819 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 5820 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 5821 key.port[0] = key.port[1] = 0; 5822 5823 action = pf_find_state(&pd2, &key, state); 5824 if (action != PF_MATCH) 5825 return (action); 5826 5827 /* translate source/destination address, if necessary */ 5828 if ((*state)->key[PF_SK_WIRE] != 5829 (*state)->key[PF_SK_STACK]) { 5830 struct pf_state_key *nk = 5831 (*state)->key[pd->didx]; 5832 5833 if (PF_ANEQ(pd2.src, 5834 &nk->addr[pd2.sidx], pd2.af)) 5835 pf_translate_icmp(pd, pd2.src, NULL, 5836 pd->dst, &nk->addr[pd2.sidx], 0); 5837 5838 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 5839 pd2.af) || pd2.rdomain != nk->rdomain) 5840 pd->destchg = 1; 5841 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 5842 5843 if (PF_ANEQ(pd2.dst, 5844 &nk->addr[pd2.didx], pd2.af)) 5845 pf_translate_icmp(pd, pd2.dst, NULL, 5846 pd->src, &nk->addr[pd2.didx], 0); 5847 5848 switch (pd2.af) { 5849 case AF_INET: 5850 m_copyback(pd->m, pd->off, ICMP_MINLEN, 5851 &pd->hdr.icmp, M_NOWAIT); 5852 m_copyback(pd2.m, ipoff2, sizeof(h2), 5853 &h2, M_NOWAIT); 5854 break; 5855 #ifdef INET6 5856 case AF_INET6: 5857 m_copyback(pd->m, pd->off, 5858 sizeof(struct icmp6_hdr), 5859 &pd->hdr.icmp6, M_NOWAIT); 5860 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 5861 &h2_6, M_NOWAIT); 5862 break; 5863 #endif /* INET6 */ 5864 } 5865 copyback = 1; 5866 } 5867 break; 5868 } 5869 } 5870 } 5871 if (copyback) { 5872 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5873 } 5874 5875 return (PF_PASS); 5876 } 5877 5878 /* 5879 * ipoff and off are measured from the start of the mbuf chain. 5880 * h must be at "ipoff" on the mbuf chain. 5881 */ 5882 void * 5883 pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 5884 u_short *actionp, u_short *reasonp, sa_family_t af) 5885 { 5886 int iplen = 0; 5887 5888 switch (af) { 5889 case AF_INET: { 5890 struct ip *h = mtod(m, struct ip *); 5891 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 5892 5893 if (fragoff) { 5894 if (fragoff >= len) 5895 ACTION_SET(actionp, PF_PASS); 5896 else { 5897 ACTION_SET(actionp, PF_DROP); 5898 REASON_SET(reasonp, PFRES_FRAG); 5899 } 5900 return (NULL); 5901 } 5902 iplen = ntohs(h->ip_len); 5903 break; 5904 } 5905 #ifdef INET6 5906 case AF_INET6: { 5907 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 5908 5909 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 5910 break; 5911 } 5912 #endif /* INET6 */ 5913 } 5914 if (m->m_pkthdr.len < off + len || iplen < off + len) { 5915 ACTION_SET(actionp, PF_DROP); 5916 REASON_SET(reasonp, PFRES_SHORT); 5917 return (NULL); 5918 } 5919 m_copydata(m, off, len, p); 5920 return (p); 5921 } 5922 5923 int 5924 pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 5925 int rtableid) 5926 { 5927 struct sockaddr_storage ss; 5928 struct sockaddr_in *dst; 5929 int ret = 1; 5930 int check_mpath; 5931 #ifdef INET6 5932 struct sockaddr_in6 *dst6; 5933 #endif /* INET6 */ 5934 struct rtentry *rt = NULL; 5935 5936 check_mpath = 0; 5937 memset(&ss, 0, sizeof(ss)); 5938 switch (af) { 5939 case AF_INET: 5940 dst = (struct sockaddr_in *)&ss; 5941 dst->sin_family = AF_INET; 5942 dst->sin_len = sizeof(*dst); 5943 dst->sin_addr = addr->v4; 5944 if (ipmultipath) 5945 check_mpath = 1; 5946 break; 5947 #ifdef INET6 5948 case AF_INET6: 5949 /* 5950 * Skip check for addresses with embedded interface scope, 5951 * as they would always match anyway. 5952 */ 5953 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 5954 goto out; 5955 dst6 = (struct sockaddr_in6 *)&ss; 5956 dst6->sin6_family = AF_INET6; 5957 dst6->sin6_len = sizeof(*dst6); 5958 dst6->sin6_addr = addr->v6; 5959 if (ip6_multipath) 5960 check_mpath = 1; 5961 break; 5962 #endif /* INET6 */ 5963 } 5964 5965 /* Skip checks for ipsec interfaces */ 5966 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 5967 goto out; 5968 5969 rt = rtalloc(sstosa(&ss), 0, rtableid); 5970 if (rt != NULL) { 5971 /* No interface given, this is a no-route check */ 5972 if (kif == NULL) 5973 goto out; 5974 5975 if (kif->pfik_ifp == NULL) { 5976 ret = 0; 5977 goto out; 5978 } 5979 5980 /* Perform uRPF check if passed input interface */ 5981 ret = 0; 5982 do { 5983 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 5984 ret = 1; 5985 #if NCARP > 0 5986 } else { 5987 struct ifnet *ifp; 5988 5989 ifp = if_get(rt->rt_ifidx); 5990 if (ifp != NULL && ifp->if_type == IFT_CARP && 5991 ifp->if_carpdevidx == 5992 kif->pfik_ifp->if_index) 5993 ret = 1; 5994 if_put(ifp); 5995 #endif /* NCARP */ 5996 } 5997 5998 rt = rtable_iterate(rt); 5999 } while (check_mpath == 1 && rt != NULL && ret == 0); 6000 } else 6001 ret = 0; 6002 out: 6003 rtfree(rt); 6004 return (ret); 6005 } 6006 6007 int 6008 pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 6009 int rtableid) 6010 { 6011 struct sockaddr_storage ss; 6012 struct sockaddr_in *dst; 6013 #ifdef INET6 6014 struct sockaddr_in6 *dst6; 6015 #endif /* INET6 */ 6016 struct rtentry *rt; 6017 int ret = 0; 6018 6019 memset(&ss, 0, sizeof(ss)); 6020 switch (af) { 6021 case AF_INET: 6022 dst = (struct sockaddr_in *)&ss; 6023 dst->sin_family = AF_INET; 6024 dst->sin_len = sizeof(*dst); 6025 dst->sin_addr = addr->v4; 6026 break; 6027 #ifdef INET6 6028 case AF_INET6: 6029 dst6 = (struct sockaddr_in6 *)&ss; 6030 dst6->sin6_family = AF_INET6; 6031 dst6->sin6_len = sizeof(*dst6); 6032 dst6->sin6_addr = addr->v6; 6033 break; 6034 #endif /* INET6 */ 6035 } 6036 6037 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid); 6038 if (rt != NULL) { 6039 if (rt->rt_labelid == aw->v.rtlabel) 6040 ret = 1; 6041 rtfree(rt); 6042 } 6043 6044 return (ret); 6045 } 6046 6047 /* pf_route() may change pd->m, adjust local copies after calling */ 6048 void 6049 pf_route(struct pf_pdesc *pd, struct pf_state *s) 6050 { 6051 struct mbuf *m0; 6052 struct mbuf_list fml; 6053 struct sockaddr_in *dst, sin; 6054 struct rtentry *rt = NULL; 6055 struct ip *ip; 6056 struct ifnet *ifp = NULL; 6057 int error = 0; 6058 unsigned int rtableid; 6059 6060 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6061 m_freem(pd->m); 6062 pd->m = NULL; 6063 return; 6064 } 6065 6066 if (s->rt == PF_DUPTO) { 6067 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6068 return; 6069 } else { 6070 if ((s->rt == PF_REPLYTO) == (s->direction == pd->dir)) 6071 return; 6072 m0 = pd->m; 6073 pd->m = NULL; 6074 } 6075 6076 if (m0->m_len < sizeof(struct ip)) { 6077 DPFPRINTF(LOG_ERR, 6078 "%s: m0->m_len < sizeof(struct ip)", __func__); 6079 goto bad; 6080 } 6081 6082 ip = mtod(m0, struct ip *); 6083 6084 if (pd->dir == PF_IN) { 6085 if (ip->ip_ttl <= IPTTLDEC) { 6086 if (s->rt != PF_DUPTO) { 6087 pf_send_icmp(m0, ICMP_TIMXCEED, 6088 ICMP_TIMXCEED_INTRANS, 0, 6089 pd->af, s->rule.ptr, pd->rdomain); 6090 } 6091 goto bad; 6092 } 6093 ip->ip_ttl -= IPTTLDEC; 6094 } 6095 6096 memset(&sin, 0, sizeof(sin)); 6097 dst = &sin; 6098 dst->sin_family = AF_INET; 6099 dst->sin_len = sizeof(*dst); 6100 dst->sin_addr = s->rt_addr.v4; 6101 rtableid = m0->m_pkthdr.ph_rtableid; 6102 6103 rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid); 6104 if (!rtisvalid(rt)) { 6105 if (s->rt != PF_DUPTO) { 6106 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST, 6107 0, pd->af, s->rule.ptr, pd->rdomain); 6108 } 6109 ipstat_inc(ips_noroute); 6110 goto bad; 6111 } 6112 6113 ifp = if_get(rt->rt_ifidx); 6114 if (ifp == NULL) 6115 goto bad; 6116 6117 /* A locally generated packet may have invalid source address. */ 6118 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && 6119 (ifp->if_flags & IFF_LOOPBACK) == 0) 6120 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; 6121 6122 if (s->rt != PF_DUPTO && pd->dir == PF_IN) { 6123 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 6124 goto bad; 6125 else if (m0 == NULL) 6126 goto done; 6127 if (m0->m_len < sizeof(struct ip)) { 6128 DPFPRINTF(LOG_ERR, 6129 "%s: m0->m_len < sizeof(struct ip)", __func__); 6130 goto bad; 6131 } 6132 ip = mtod(m0, struct ip *); 6133 } 6134 6135 in_proto_cksum_out(m0, ifp); 6136 6137 if (ntohs(ip->ip_len) <= ifp->if_mtu) { 6138 ip->ip_sum = 0; 6139 if (ifp->if_capabilities & IFCAP_CSUM_IPv4) 6140 m0->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 6141 else { 6142 ipstat_inc(ips_outswcsum); 6143 ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); 6144 } 6145 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6146 goto done; 6147 } 6148 6149 /* 6150 * Too large for interface; fragment if possible. 6151 * Must be able to put at least 8 bytes per fragment. 6152 */ 6153 if (ip->ip_off & htons(IP_DF)) { 6154 ipstat_inc(ips_cantfrag); 6155 if (s->rt != PF_DUPTO) 6156 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 6157 ifp->if_mtu, pd->af, s->rule.ptr, pd->rdomain); 6158 goto bad; 6159 } 6160 6161 error = ip_fragment(m0, &fml, ifp, ifp->if_mtu); 6162 if (error) 6163 goto done; 6164 6165 while ((m0 = ml_dequeue(&fml)) != NULL) { 6166 error = ifp->if_output(ifp, m0, sintosa(dst), rt); 6167 if (error) 6168 break; 6169 } 6170 if (error) 6171 ml_purge(&fml); 6172 else 6173 ipstat_inc(ips_fragmented); 6174 6175 done: 6176 if_put(ifp); 6177 rtfree(rt); 6178 return; 6179 6180 bad: 6181 m_freem(m0); 6182 goto done; 6183 } 6184 6185 #ifdef INET6 6186 /* pf_route6() may change pd->m, adjust local copies after calling */ 6187 void 6188 pf_route6(struct pf_pdesc *pd, struct pf_state *s) 6189 { 6190 struct mbuf *m0; 6191 struct sockaddr_in6 *dst, sin6; 6192 struct rtentry *rt = NULL; 6193 struct ip6_hdr *ip6; 6194 struct ifnet *ifp = NULL; 6195 struct m_tag *mtag; 6196 unsigned int rtableid; 6197 6198 if (pd->m->m_pkthdr.pf.routed++ > 3) { 6199 m_freem(pd->m); 6200 pd->m = NULL; 6201 return; 6202 } 6203 6204 if (s->rt == PF_DUPTO) { 6205 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 6206 return; 6207 } else { 6208 if ((s->rt == PF_REPLYTO) == (s->direction == pd->dir)) 6209 return; 6210 m0 = pd->m; 6211 pd->m = NULL; 6212 } 6213 6214 if (m0->m_len < sizeof(struct ip6_hdr)) { 6215 DPFPRINTF(LOG_ERR, 6216 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6217 goto bad; 6218 } 6219 ip6 = mtod(m0, struct ip6_hdr *); 6220 6221 if (pd->dir == PF_IN) { 6222 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 6223 if (s->rt != PF_DUPTO) { 6224 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 6225 ICMP6_TIME_EXCEED_TRANSIT, 0, 6226 pd->af, s->rule.ptr, pd->rdomain); 6227 } 6228 goto bad; 6229 } 6230 ip6->ip6_hlim -= IPV6_HLIMDEC; 6231 } 6232 6233 memset(&sin6, 0, sizeof(sin6)); 6234 dst = &sin6; 6235 dst->sin6_family = AF_INET6; 6236 dst->sin6_len = sizeof(*dst); 6237 dst->sin6_addr = s->rt_addr.v6; 6238 rtableid = m0->m_pkthdr.ph_rtableid; 6239 6240 rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0], 6241 rtableid); 6242 if (!rtisvalid(rt)) { 6243 if (s->rt != PF_DUPTO) { 6244 pf_send_icmp(m0, ICMP6_DST_UNREACH, 6245 ICMP6_DST_UNREACH_NOROUTE, 0, 6246 pd->af, s->rule.ptr, pd->rdomain); 6247 } 6248 ip6stat_inc(ip6s_noroute); 6249 goto bad; 6250 } 6251 6252 ifp = if_get(rt->rt_ifidx); 6253 if (ifp == NULL) 6254 goto bad; 6255 6256 /* A locally generated packet may have invalid source address. */ 6257 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && 6258 (ifp->if_flags & IFF_LOOPBACK) == 0) 6259 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; 6260 6261 if (s->rt != PF_DUPTO && pd->dir == PF_IN) { 6262 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 6263 goto bad; 6264 else if (m0 == NULL) 6265 goto done; 6266 if (m0->m_len < sizeof(struct ip6_hdr)) { 6267 DPFPRINTF(LOG_ERR, 6268 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 6269 goto bad; 6270 } 6271 } 6272 6273 in6_proto_cksum_out(m0, ifp); 6274 6275 /* 6276 * If packet has been reassembled by PF earlier, we have to 6277 * use pf_refragment6() here to turn it back to fragments. 6278 */ 6279 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 6280 (void) pf_refragment6(&m0, mtag, dst, ifp, rt); 6281 } else if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { 6282 ifp->if_output(ifp, m0, sin6tosa(dst), rt); 6283 } else { 6284 ip6stat_inc(ip6s_cantfrag); 6285 if (s->rt != PF_DUPTO) 6286 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 6287 ifp->if_mtu, pd->af, s->rule.ptr, pd->rdomain); 6288 goto bad; 6289 } 6290 6291 done: 6292 if_put(ifp); 6293 rtfree(rt); 6294 return; 6295 6296 bad: 6297 m_freem(m0); 6298 goto done; 6299 } 6300 #endif /* INET6 */ 6301 6302 /* 6303 * check TCP checksum and set mbuf flag 6304 * off is the offset where the protocol header starts 6305 * len is the total length of protocol header plus payload 6306 * returns 0 when the checksum is valid, otherwise returns 1. 6307 * if the _OUT flag is set the checksum isn't done yet, consider these ok 6308 */ 6309 int 6310 pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 6311 { 6312 u_int16_t sum; 6313 6314 if (m->m_pkthdr.csum_flags & 6315 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 6316 return (0); 6317 } 6318 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 6319 off < sizeof(struct ip) || 6320 m->m_pkthdr.len < off + len) { 6321 return (1); 6322 } 6323 6324 /* need to do it in software */ 6325 tcpstat_inc(tcps_inswcsum); 6326 6327 switch (af) { 6328 case AF_INET: 6329 if (m->m_len < sizeof(struct ip)) 6330 return (1); 6331 6332 sum = in4_cksum(m, IPPROTO_TCP, off, len); 6333 break; 6334 #ifdef INET6 6335 case AF_INET6: 6336 if (m->m_len < sizeof(struct ip6_hdr)) 6337 return (1); 6338 6339 sum = in6_cksum(m, IPPROTO_TCP, off, len); 6340 break; 6341 #endif /* INET6 */ 6342 default: 6343 unhandled_af(af); 6344 } 6345 if (sum) { 6346 tcpstat_inc(tcps_rcvbadsum); 6347 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 6348 return (1); 6349 } 6350 6351 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 6352 return (0); 6353 } 6354 6355 struct pf_divert * 6356 pf_find_divert(struct mbuf *m) 6357 { 6358 struct m_tag *mtag; 6359 6360 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 6361 return (NULL); 6362 6363 return ((struct pf_divert *)(mtag + 1)); 6364 } 6365 6366 struct pf_divert * 6367 pf_get_divert(struct mbuf *m) 6368 { 6369 struct m_tag *mtag; 6370 6371 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 6372 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 6373 M_NOWAIT); 6374 if (mtag == NULL) 6375 return (NULL); 6376 memset(mtag + 1, 0, sizeof(struct pf_divert)); 6377 m_tag_prepend(m, mtag); 6378 } 6379 6380 return ((struct pf_divert *)(mtag + 1)); 6381 } 6382 6383 int 6384 pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 6385 { 6386 struct ip6_ext ext; 6387 u_int32_t hlen, end; 6388 int hdr_cnt; 6389 6390 hlen = h->ip_hl << 2; 6391 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 6392 REASON_SET(reason, PFRES_SHORT); 6393 return (PF_DROP); 6394 } 6395 if (hlen != sizeof(struct ip)) 6396 pd->badopts++; 6397 end = pd->off + ntohs(h->ip_len); 6398 pd->off += hlen; 6399 pd->proto = h->ip_p; 6400 /* stop walking over non initial fragments */ 6401 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 6402 return (PF_PASS); 6403 6404 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6405 switch (pd->proto) { 6406 case IPPROTO_AH: 6407 /* fragments may be short */ 6408 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 6409 end < pd->off + sizeof(ext)) 6410 return (PF_PASS); 6411 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6412 NULL, reason, AF_INET)) { 6413 DPFPRINTF(LOG_NOTICE, "IP short exthdr"); 6414 return (PF_DROP); 6415 } 6416 pd->off += (ext.ip6e_len + 2) * 4; 6417 pd->proto = ext.ip6e_nxt; 6418 break; 6419 default: 6420 return (PF_PASS); 6421 } 6422 } 6423 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit"); 6424 REASON_SET(reason, PFRES_IPOPTIONS); 6425 return (PF_DROP); 6426 } 6427 6428 #ifdef INET6 6429 int 6430 pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 6431 u_short *reason) 6432 { 6433 struct ip6_opt opt; 6434 struct ip6_opt_jumbo jumbo; 6435 6436 while (off < end) { 6437 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 6438 sizeof(opt.ip6o_type), NULL, reason, AF_INET6)) { 6439 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 6440 return (PF_DROP); 6441 } 6442 if (opt.ip6o_type == IP6OPT_PAD1) { 6443 off++; 6444 continue; 6445 } 6446 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 6447 NULL, reason, AF_INET6)) { 6448 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 6449 return (PF_DROP); 6450 } 6451 if (off + sizeof(opt) + opt.ip6o_len > end) { 6452 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 6453 REASON_SET(reason, PFRES_IPOPTIONS); 6454 return (PF_DROP); 6455 } 6456 switch (opt.ip6o_type) { 6457 case IP6OPT_JUMBO: 6458 if (pd->jumbolen != 0) { 6459 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 6460 REASON_SET(reason, PFRES_IPOPTIONS); 6461 return (PF_DROP); 6462 } 6463 if (ntohs(h->ip6_plen) != 0) { 6464 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 6465 REASON_SET(reason, PFRES_IPOPTIONS); 6466 return (PF_DROP); 6467 } 6468 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 6469 NULL, reason, AF_INET6)) { 6470 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 6471 return (PF_DROP); 6472 } 6473 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 6474 sizeof(pd->jumbolen)); 6475 pd->jumbolen = ntohl(pd->jumbolen); 6476 if (pd->jumbolen < IPV6_MAXPACKET) { 6477 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 6478 REASON_SET(reason, PFRES_IPOPTIONS); 6479 return (PF_DROP); 6480 } 6481 break; 6482 default: 6483 break; 6484 } 6485 off += sizeof(opt) + opt.ip6o_len; 6486 } 6487 6488 return (PF_PASS); 6489 } 6490 6491 int 6492 pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 6493 { 6494 struct ip6_frag frag; 6495 struct ip6_ext ext; 6496 struct ip6_rthdr rthdr; 6497 u_int32_t end; 6498 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 6499 6500 pd->off += sizeof(struct ip6_hdr); 6501 end = pd->off + ntohs(h->ip6_plen); 6502 pd->fragoff = pd->extoff = pd->jumbolen = 0; 6503 pd->proto = h->ip6_nxt; 6504 6505 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 6506 switch (pd->proto) { 6507 case IPPROTO_ROUTING: 6508 case IPPROTO_HOPOPTS: 6509 case IPPROTO_DSTOPTS: 6510 pd->badopts++; 6511 break; 6512 } 6513 switch (pd->proto) { 6514 case IPPROTO_FRAGMENT: 6515 if (fraghdr_cnt++) { 6516 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 6517 REASON_SET(reason, PFRES_FRAG); 6518 return (PF_DROP); 6519 } 6520 /* jumbo payload packets cannot be fragmented */ 6521 if (pd->jumbolen != 0) { 6522 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 6523 REASON_SET(reason, PFRES_FRAG); 6524 return (PF_DROP); 6525 } 6526 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 6527 NULL, reason, AF_INET6)) { 6528 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 6529 return (PF_DROP); 6530 } 6531 /* stop walking over non initial fragments */ 6532 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 6533 pd->fragoff = pd->off; 6534 return (PF_PASS); 6535 } 6536 /* RFC6946: reassemble only non atomic fragments */ 6537 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 6538 pd->fragoff = pd->off; 6539 pd->off += sizeof(frag); 6540 pd->proto = frag.ip6f_nxt; 6541 break; 6542 case IPPROTO_ROUTING: 6543 if (rthdr_cnt++) { 6544 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 6545 REASON_SET(reason, PFRES_IPOPTIONS); 6546 return (PF_DROP); 6547 } 6548 /* fragments may be short */ 6549 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 6550 pd->off = pd->fragoff; 6551 pd->proto = IPPROTO_FRAGMENT; 6552 return (PF_PASS); 6553 } 6554 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 6555 NULL, reason, AF_INET6)) { 6556 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 6557 return (PF_DROP); 6558 } 6559 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 6560 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 6561 REASON_SET(reason, PFRES_IPOPTIONS); 6562 return (PF_DROP); 6563 } 6564 /* FALLTHROUGH */ 6565 case IPPROTO_HOPOPTS: 6566 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 6567 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 6568 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first"); 6569 REASON_SET(reason, PFRES_IPOPTIONS); 6570 return (PF_DROP); 6571 } 6572 /* FALLTHROUGH */ 6573 case IPPROTO_AH: 6574 case IPPROTO_DSTOPTS: 6575 /* fragments may be short */ 6576 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 6577 pd->off = pd->fragoff; 6578 pd->proto = IPPROTO_FRAGMENT; 6579 return (PF_PASS); 6580 } 6581 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 6582 NULL, reason, AF_INET6)) { 6583 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 6584 return (PF_DROP); 6585 } 6586 /* reassembly needs the ext header before the frag */ 6587 if (pd->fragoff == 0) 6588 pd->extoff = pd->off; 6589 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0) { 6590 if (pf_walk_option6(pd, h, 6591 pd->off + sizeof(ext), 6592 pd->off + (ext.ip6e_len + 1) * 8, reason) 6593 != PF_PASS) 6594 return (PF_DROP); 6595 if (ntohs(h->ip6_plen) == 0 && 6596 pd->jumbolen != 0) { 6597 DPFPRINTF(LOG_NOTICE, 6598 "IPv6 missing jumbo"); 6599 REASON_SET(reason, PFRES_IPOPTIONS); 6600 return (PF_DROP); 6601 } 6602 } 6603 if (pd->proto == IPPROTO_AH) 6604 pd->off += (ext.ip6e_len + 2) * 4; 6605 else 6606 pd->off += (ext.ip6e_len + 1) * 8; 6607 pd->proto = ext.ip6e_nxt; 6608 break; 6609 case IPPROTO_TCP: 6610 case IPPROTO_UDP: 6611 case IPPROTO_ICMPV6: 6612 /* fragments may be short, ignore inner header then */ 6613 if (pd->fragoff != 0 && end < pd->off + 6614 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 6615 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 6616 sizeof(struct icmp6_hdr))) { 6617 pd->off = pd->fragoff; 6618 pd->proto = IPPROTO_FRAGMENT; 6619 } 6620 /* FALLTHROUGH */ 6621 default: 6622 return (PF_PASS); 6623 } 6624 } 6625 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit"); 6626 REASON_SET(reason, PFRES_IPOPTIONS); 6627 return (PF_DROP); 6628 } 6629 #endif /* INET6 */ 6630 6631 int 6632 pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, 6633 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 6634 { 6635 memset(pd, 0, sizeof(*pd)); 6636 pd->dir = dir; 6637 pd->kif = kif; /* kif is NULL when called by pflog */ 6638 pd->m = m; 6639 pd->sidx = (dir == PF_IN) ? 0 : 1; 6640 pd->didx = (dir == PF_IN) ? 1 : 0; 6641 pd->af = pd->naf = af; 6642 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 6643 6644 switch (pd->af) { 6645 case AF_INET: { 6646 struct ip *h; 6647 6648 /* Check for illegal packets */ 6649 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 6650 REASON_SET(reason, PFRES_SHORT); 6651 return (PF_DROP); 6652 } 6653 6654 h = mtod(pd->m, struct ip *); 6655 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 6656 REASON_SET(reason, PFRES_SHORT); 6657 return (PF_DROP); 6658 } 6659 6660 if (pf_walk_header(pd, h, reason) != PF_PASS) 6661 return (PF_DROP); 6662 6663 pd->src = (struct pf_addr *)&h->ip_src; 6664 pd->dst = (struct pf_addr *)&h->ip_dst; 6665 pd->tot_len = ntohs(h->ip_len); 6666 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 6667 pd->ttl = h->ip_ttl; 6668 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 6669 PF_VPROTO_FRAGMENT : pd->proto; 6670 6671 break; 6672 } 6673 #ifdef INET6 6674 case AF_INET6: { 6675 struct ip6_hdr *h; 6676 6677 /* Check for illegal packets */ 6678 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 6679 REASON_SET(reason, PFRES_SHORT); 6680 return (PF_DROP); 6681 } 6682 6683 h = mtod(pd->m, struct ip6_hdr *); 6684 if (pd->m->m_pkthdr.len < 6685 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 6686 REASON_SET(reason, PFRES_SHORT); 6687 return (PF_DROP); 6688 } 6689 6690 if (pf_walk_header6(pd, h, reason) != PF_PASS) 6691 return (PF_DROP); 6692 6693 #if 1 6694 /* 6695 * we do not support jumbogram yet. if we keep going, zero 6696 * ip6_plen will do something bad, so drop the packet for now. 6697 */ 6698 if (pd->jumbolen != 0) { 6699 REASON_SET(reason, PFRES_NORM); 6700 return (PF_DROP); 6701 } 6702 #endif /* 1 */ 6703 6704 pd->src = (struct pf_addr *)&h->ip6_src; 6705 pd->dst = (struct pf_addr *)&h->ip6_dst; 6706 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 6707 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 6708 pd->ttl = h->ip6_hlim; 6709 pd->virtual_proto = (pd->fragoff != 0) ? 6710 PF_VPROTO_FRAGMENT : pd->proto; 6711 6712 break; 6713 } 6714 #endif /* INET6 */ 6715 default: 6716 panic("pf_setup_pdesc called with illegal af %u", pd->af); 6717 6718 } 6719 6720 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 6721 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 6722 6723 switch (pd->virtual_proto) { 6724 case IPPROTO_TCP: { 6725 struct tcphdr *th = &pd->hdr.tcp; 6726 6727 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 6728 NULL, reason, pd->af)) 6729 return (PF_DROP); 6730 pd->hdrlen = sizeof(*th); 6731 if (pd->off + (th->th_off << 2) > pd->tot_len || 6732 (th->th_off << 2) < sizeof(struct tcphdr)) { 6733 REASON_SET(reason, PFRES_SHORT); 6734 return (PF_DROP); 6735 } 6736 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 6737 pd->sport = &th->th_sport; 6738 pd->dport = &th->th_dport; 6739 pd->pcksum = &th->th_sum; 6740 break; 6741 } 6742 case IPPROTO_UDP: { 6743 struct udphdr *uh = &pd->hdr.udp; 6744 6745 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 6746 NULL, reason, pd->af)) 6747 return (PF_DROP); 6748 pd->hdrlen = sizeof(*uh); 6749 if (uh->uh_dport == 0 || 6750 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 6751 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 6752 REASON_SET(reason, PFRES_SHORT); 6753 return (PF_DROP); 6754 } 6755 pd->sport = &uh->uh_sport; 6756 pd->dport = &uh->uh_dport; 6757 pd->pcksum = &uh->uh_sum; 6758 break; 6759 } 6760 case IPPROTO_ICMP: { 6761 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 6762 NULL, reason, pd->af)) 6763 return (PF_DROP); 6764 pd->hdrlen = ICMP_MINLEN; 6765 if (pd->off + pd->hdrlen > pd->tot_len) { 6766 REASON_SET(reason, PFRES_SHORT); 6767 return (PF_DROP); 6768 } 6769 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 6770 break; 6771 } 6772 #ifdef INET6 6773 case IPPROTO_ICMPV6: { 6774 size_t icmp_hlen = sizeof(struct icmp6_hdr); 6775 6776 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6777 NULL, reason, pd->af)) 6778 return (PF_DROP); 6779 /* ICMP headers we look further into to match state */ 6780 switch (pd->hdr.icmp6.icmp6_type) { 6781 case MLD_LISTENER_QUERY: 6782 case MLD_LISTENER_REPORT: 6783 icmp_hlen = sizeof(struct mld_hdr); 6784 break; 6785 case ND_NEIGHBOR_SOLICIT: 6786 case ND_NEIGHBOR_ADVERT: 6787 icmp_hlen = sizeof(struct nd_neighbor_solicit); 6788 /* FALLTHROUGH */ 6789 case ND_ROUTER_SOLICIT: 6790 case ND_ROUTER_ADVERT: 6791 case ND_REDIRECT: 6792 if (pd->ttl != 255) { 6793 REASON_SET(reason, PFRES_NORM); 6794 return (PF_DROP); 6795 } 6796 break; 6797 } 6798 if (icmp_hlen > sizeof(struct icmp6_hdr) && 6799 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 6800 NULL, reason, pd->af)) 6801 return (PF_DROP); 6802 pd->hdrlen = icmp_hlen; 6803 if (pd->off + pd->hdrlen > pd->tot_len) { 6804 REASON_SET(reason, PFRES_SHORT); 6805 return (PF_DROP); 6806 } 6807 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 6808 break; 6809 } 6810 #endif /* INET6 */ 6811 } 6812 6813 if (pd->sport) 6814 pd->osport = pd->nsport = *pd->sport; 6815 if (pd->dport) 6816 pd->odport = pd->ndport = *pd->dport; 6817 6818 return (PF_PASS); 6819 } 6820 6821 void 6822 pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *s, 6823 struct pf_rule *r, struct pf_rule *a) 6824 { 6825 int dirndx; 6826 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 6827 [action != PF_PASS] += pd->tot_len; 6828 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 6829 [action != PF_PASS]++; 6830 6831 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 6832 dirndx = (pd->dir == PF_OUT); 6833 r->packets[dirndx]++; 6834 r->bytes[dirndx] += pd->tot_len; 6835 if (a != NULL) { 6836 a->packets[dirndx]++; 6837 a->bytes[dirndx] += pd->tot_len; 6838 } 6839 if (s != NULL) { 6840 struct pf_rule_item *ri; 6841 struct pf_sn_item *sni; 6842 6843 SLIST_FOREACH(sni, &s->src_nodes, next) { 6844 sni->sn->packets[dirndx]++; 6845 sni->sn->bytes[dirndx] += pd->tot_len; 6846 } 6847 dirndx = (pd->dir == s->direction) ? 0 : 1; 6848 s->packets[dirndx]++; 6849 s->bytes[dirndx] += pd->tot_len; 6850 6851 SLIST_FOREACH(ri, &s->match_rules, entry) { 6852 ri->r->packets[dirndx]++; 6853 ri->r->bytes[dirndx] += pd->tot_len; 6854 6855 if (ri->r->src.addr.type == PF_ADDR_TABLE) 6856 pfr_update_stats(ri->r->src.addr.p.tbl, 6857 &s->key[(s->direction == PF_IN)]-> 6858 addr[(s->direction == PF_OUT)], 6859 pd, ri->r->action, ri->r->src.neg); 6860 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 6861 pfr_update_stats(ri->r->dst.addr.p.tbl, 6862 &s->key[(s->direction == PF_IN)]-> 6863 addr[(s->direction == PF_IN)], 6864 pd, ri->r->action, ri->r->dst.neg); 6865 } 6866 } 6867 if (r->src.addr.type == PF_ADDR_TABLE) 6868 pfr_update_stats(r->src.addr.p.tbl, 6869 (s == NULL) ? pd->src : 6870 &s->key[(s->direction == PF_IN)]-> 6871 addr[(s->direction == PF_OUT)], 6872 pd, r->action, r->src.neg); 6873 if (r->dst.addr.type == PF_ADDR_TABLE) 6874 pfr_update_stats(r->dst.addr.p.tbl, 6875 (s == NULL) ? pd->dst : 6876 &s->key[(s->direction == PF_IN)]-> 6877 addr[(s->direction == PF_IN)], 6878 pd, r->action, r->dst.neg); 6879 } 6880 } 6881 6882 int 6883 pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 6884 { 6885 #if NCARP > 0 6886 struct ifnet *ifp0; 6887 #endif 6888 struct pfi_kif *kif; 6889 u_short action, reason = 0; 6890 struct pf_rule *a = NULL, *r = &pf_default_rule; 6891 struct pf_state *s = NULL; 6892 struct pf_ruleset *ruleset = NULL; 6893 struct pf_pdesc pd; 6894 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 6895 u_int32_t qid, pqid = 0; 6896 int have_pf_lock = 0; 6897 struct pfsync_deferral *deferral = NULL; 6898 6899 if (!pf_status.running) 6900 return (PF_PASS); 6901 6902 #if NCARP > 0 6903 if (ifp->if_type == IFT_CARP && 6904 (ifp0 = if_get(ifp->if_carpdevidx)) != NULL) { 6905 kif = (struct pfi_kif *)ifp0->if_pf_kif; 6906 if_put(ifp0); 6907 } else 6908 #endif /* NCARP */ 6909 kif = (struct pfi_kif *)ifp->if_pf_kif; 6910 6911 if (kif == NULL) { 6912 DPFPRINTF(LOG_ERR, 6913 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 6914 return (PF_DROP); 6915 } 6916 if (kif->pfik_flags & PFI_IFLAG_SKIP) 6917 return (PF_PASS); 6918 6919 #ifdef DIAGNOSTIC 6920 if (((*m0)->m_flags & M_PKTHDR) == 0) 6921 panic("non-M_PKTHDR is passed to pf_test"); 6922 #endif /* DIAGNOSTIC */ 6923 6924 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 6925 return (PF_PASS); 6926 6927 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) { 6928 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET; 6929 return (PF_PASS); 6930 } 6931 6932 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 6933 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 6934 return (PF_PASS); 6935 } 6936 6937 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason); 6938 if (action != PF_PASS) { 6939 #if NPFLOG > 0 6940 pd.pflog |= PF_LOG_FORCE; 6941 #endif /* NPFLOG > 0 */ 6942 goto done; 6943 } 6944 6945 /* packet normalization and reassembly */ 6946 switch (pd.af) { 6947 case AF_INET: 6948 action = pf_normalize_ip(&pd, &reason); 6949 break; 6950 #ifdef INET6 6951 case AF_INET6: 6952 action = pf_normalize_ip6(&pd, &reason); 6953 break; 6954 #endif /* INET6 */ 6955 } 6956 *m0 = pd.m; 6957 /* if packet sits in reassembly queue, return without error */ 6958 if (pd.m == NULL) 6959 return PF_PASS; 6960 6961 if (action != PF_PASS) { 6962 #if NPFLOG > 0 6963 pd.pflog |= PF_LOG_FORCE; 6964 #endif /* NPFLOG > 0 */ 6965 goto done; 6966 } 6967 6968 /* if packet has been reassembled, update packet description */ 6969 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 6970 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason); 6971 if (action != PF_PASS) { 6972 #if NPFLOG > 0 6973 pd.pflog |= PF_LOG_FORCE; 6974 #endif /* NPFLOG > 0 */ 6975 goto done; 6976 } 6977 } 6978 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 6979 6980 /* 6981 * Avoid pcb-lookups from the forwarding path. They should never 6982 * match and would cause MP locking problems. 6983 */ 6984 if (fwdir == PF_FWD) { 6985 pd.lookup.done = -1; 6986 pd.lookup.uid = -1; 6987 pd.lookup.gid = -1; 6988 pd.lookup.pid = NO_PID; 6989 } 6990 6991 switch (pd.virtual_proto) { 6992 6993 case PF_VPROTO_FRAGMENT: { 6994 /* 6995 * handle fragments that aren't reassembled by 6996 * normalization 6997 */ 6998 PF_LOCK(); 6999 have_pf_lock = 1; 7000 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, &reason, 7001 &deferral); 7002 s = pf_state_ref(s); 7003 if (action != PF_PASS) 7004 REASON_SET(&reason, PFRES_FRAG); 7005 break; 7006 } 7007 7008 case IPPROTO_ICMP: { 7009 if (pd.af != AF_INET) { 7010 action = PF_DROP; 7011 REASON_SET(&reason, PFRES_NORM); 7012 DPFPRINTF(LOG_NOTICE, 7013 "dropping IPv6 packet with ICMPv4 payload"); 7014 break; 7015 } 7016 PF_STATE_ENTER_READ(); 7017 action = pf_test_state_icmp(&pd, &s, &reason); 7018 s = pf_state_ref(s); 7019 PF_STATE_EXIT_READ(); 7020 if (action == PF_PASS || action == PF_AFRT) { 7021 #if NPFSYNC > 0 7022 pfsync_update_state(s); 7023 #endif /* NPFSYNC > 0 */ 7024 r = s->rule.ptr; 7025 a = s->anchor.ptr; 7026 #if NPFLOG > 0 7027 pd.pflog |= s->log; 7028 #endif /* NPFLOG > 0 */ 7029 } else if (s == NULL) { 7030 PF_LOCK(); 7031 have_pf_lock = 1; 7032 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7033 &reason, &deferral); 7034 s = pf_state_ref(s); 7035 } 7036 break; 7037 } 7038 7039 #ifdef INET6 7040 case IPPROTO_ICMPV6: { 7041 if (pd.af != AF_INET6) { 7042 action = PF_DROP; 7043 REASON_SET(&reason, PFRES_NORM); 7044 DPFPRINTF(LOG_NOTICE, 7045 "dropping IPv4 packet with ICMPv6 payload"); 7046 break; 7047 } 7048 PF_STATE_ENTER_READ(); 7049 action = pf_test_state_icmp(&pd, &s, &reason); 7050 s = pf_state_ref(s); 7051 PF_STATE_EXIT_READ(); 7052 if (action == PF_PASS || action == PF_AFRT) { 7053 #if NPFSYNC > 0 7054 pfsync_update_state(s); 7055 #endif /* NPFSYNC > 0 */ 7056 r = s->rule.ptr; 7057 a = s->anchor.ptr; 7058 #if NPFLOG > 0 7059 pd.pflog |= s->log; 7060 #endif /* NPFLOG > 0 */ 7061 } else if (s == NULL) { 7062 PF_LOCK(); 7063 have_pf_lock = 1; 7064 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7065 &reason, &deferral); 7066 s = pf_state_ref(s); 7067 } 7068 break; 7069 } 7070 #endif /* INET6 */ 7071 7072 default: 7073 if (pd.virtual_proto == IPPROTO_TCP) { 7074 if (pd.dir == PF_IN && (pd.hdr.tcp.th_flags & 7075 (TH_SYN|TH_ACK)) == TH_SYN && 7076 pf_synflood_check(&pd)) { 7077 PF_LOCK(); 7078 have_pf_lock = 1; 7079 pf_syncookie_send(&pd); 7080 action = PF_DROP; 7081 break; 7082 } 7083 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) 7084 pqid = 1; 7085 action = pf_normalize_tcp(&pd); 7086 if (action == PF_DROP) 7087 break; 7088 } 7089 PF_STATE_ENTER_READ(); 7090 action = pf_test_state(&pd, &s, &reason, 0); 7091 s = pf_state_ref(s); 7092 PF_STATE_EXIT_READ(); 7093 if (s == NULL && action != PF_PASS && action != PF_AFRT && 7094 pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && 7095 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && 7096 pf_syncookie_validate(&pd)) { 7097 struct mbuf *msyn; 7098 msyn = pf_syncookie_recreate_syn(&pd); 7099 if (msyn) { 7100 action = pf_test(af, fwdir, ifp, &msyn); 7101 m_freem(msyn); 7102 if (action == PF_PASS || action == PF_AFRT) { 7103 PF_STATE_ENTER_READ(); 7104 pf_test_state(&pd, &s, &reason, 1); 7105 s = pf_state_ref(s); 7106 PF_STATE_EXIT_READ(); 7107 if (s == NULL) 7108 return (PF_DROP); 7109 s->src.seqhi = 7110 ntohl(pd.hdr.tcp.th_ack) - 1; 7111 s->src.seqlo = 7112 ntohl(pd.hdr.tcp.th_seq) - 1; 7113 pf_set_protostate(s, PF_PEER_SRC, 7114 PF_TCPS_PROXY_DST); 7115 PF_LOCK(); 7116 have_pf_lock = 1; 7117 action = pf_synproxy(&pd, &s, &reason); 7118 if (action != PF_PASS) { 7119 PF_UNLOCK(); 7120 pf_state_unref(s); 7121 return (action); 7122 } 7123 } 7124 } else 7125 action = PF_DROP; 7126 } 7127 7128 if (action == PF_PASS || action == PF_AFRT) { 7129 #if NPFSYNC > 0 7130 pfsync_update_state(s); 7131 #endif /* NPFSYNC > 0 */ 7132 r = s->rule.ptr; 7133 a = s->anchor.ptr; 7134 #if NPFLOG > 0 7135 pd.pflog |= s->log; 7136 #endif /* NPFLOG > 0 */ 7137 } else if (s == NULL) { 7138 PF_LOCK(); 7139 have_pf_lock = 1; 7140 action = pf_test_rule(&pd, &r, &s, &a, &ruleset, 7141 &reason, &deferral); 7142 s = pf_state_ref(s); 7143 } 7144 7145 if (pd.virtual_proto == IPPROTO_TCP) { 7146 if (s) { 7147 if (s->max_mss) 7148 pf_normalize_mss(&pd, s->max_mss); 7149 } else if (r->max_mss) 7150 pf_normalize_mss(&pd, r->max_mss); 7151 } 7152 7153 break; 7154 } 7155 7156 if (have_pf_lock != 0) 7157 PF_UNLOCK(); 7158 7159 /* 7160 * At the moment, we rely on NET_LOCK() to prevent removal of items 7161 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have 7162 * to be refcounted when NET_LOCK() is gone. 7163 */ 7164 7165 done: 7166 if (action != PF_DROP) { 7167 if (s) { 7168 /* The non-state case is handled in pf_test_rule() */ 7169 if (action == PF_PASS && pd.badopts && 7170 !(s->state_flags & PFSTATE_ALLOWOPTS)) { 7171 action = PF_DROP; 7172 REASON_SET(&reason, PFRES_IPOPTIONS); 7173 #if NPFLOG > 0 7174 pd.pflog |= PF_LOG_FORCE; 7175 #endif /* NPFLOG > 0 */ 7176 DPFPRINTF(LOG_NOTICE, "dropping packet with " 7177 "ip/ipv6 options in pf_test()"); 7178 } 7179 7180 pf_scrub(pd.m, s->state_flags, pd.af, s->min_ttl, 7181 s->set_tos); 7182 pf_tag_packet(pd.m, s->tag, s->rtableid[pd.didx]); 7183 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7184 qid = s->pqid; 7185 if (s->state_flags & PFSTATE_SETPRIO) 7186 pd.m->m_pkthdr.pf.prio = s->set_prio[1]; 7187 } else { 7188 qid = s->qid; 7189 if (s->state_flags & PFSTATE_SETPRIO) 7190 pd.m->m_pkthdr.pf.prio = s->set_prio[0]; 7191 } 7192 pd.m->m_pkthdr.pf.delay = s->delay; 7193 } else { 7194 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 7195 r->set_tos); 7196 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 7197 qid = r->pqid; 7198 if (r->scrub_flags & PFSTATE_SETPRIO) 7199 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 7200 } else { 7201 qid = r->qid; 7202 if (r->scrub_flags & PFSTATE_SETPRIO) 7203 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 7204 } 7205 pd.m->m_pkthdr.pf.delay = r->delay; 7206 } 7207 } 7208 7209 if (action == PF_PASS && qid) 7210 pd.m->m_pkthdr.pf.qid = qid; 7211 if (pd.dir == PF_IN && s && s->key[PF_SK_STACK]) 7212 pf_mbuf_link_state_key(pd.m, s->key[PF_SK_STACK]); 7213 if (pd.dir == PF_OUT && 7214 pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk && 7215 s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) 7216 pf_state_key_link_inpcb(s->key[PF_SK_STACK], 7217 pd.m->m_pkthdr.pf.inp); 7218 7219 if (s != NULL && !ISSET(pd.m->m_pkthdr.csum_flags, M_FLOWID)) { 7220 pd.m->m_pkthdr.ph_flowid = bemtoh64(&s->id); 7221 SET(pd.m->m_pkthdr.csum_flags, M_FLOWID); 7222 } 7223 7224 /* 7225 * connections redirected to loopback should not match sockets 7226 * bound specifically to loopback due to security implications, 7227 * see in_pcblookup_listen(). 7228 */ 7229 if (pd.destchg) 7230 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 7231 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 7232 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 7233 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 7234 /* We need to redo the route lookup on outgoing routes. */ 7235 if (pd.destchg && pd.dir == PF_OUT) 7236 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 7237 7238 if (pd.dir == PF_IN && action == PF_PASS && 7239 (r->divert.type == PF_DIVERT_TO || 7240 r->divert.type == PF_DIVERT_REPLY)) { 7241 struct pf_divert *divert; 7242 7243 if ((divert = pf_get_divert(pd.m))) { 7244 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 7245 divert->addr = r->divert.addr; 7246 divert->port = r->divert.port; 7247 divert->rdomain = pd.rdomain; 7248 divert->type = r->divert.type; 7249 } 7250 } 7251 7252 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET) 7253 action = PF_DIVERT; 7254 7255 #if NPFLOG > 0 7256 if (pd.pflog) { 7257 struct pf_rule_item *ri; 7258 7259 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 7260 pflog_packet(&pd, reason, r, a, ruleset, NULL); 7261 if (s) { 7262 SLIST_FOREACH(ri, &s->match_rules, entry) 7263 if (ri->r->log & PF_LOG_ALL) 7264 pflog_packet(&pd, reason, ri->r, a, 7265 ruleset, NULL); 7266 } 7267 } 7268 #endif /* NPFLOG > 0 */ 7269 7270 pf_counters_inc(action, &pd, s, r, a); 7271 7272 switch (action) { 7273 case PF_SYNPROXY_DROP: 7274 m_freem(pd.m); 7275 /* FALLTHROUGH */ 7276 case PF_DEFER: 7277 #if NPFSYNC > 0 7278 /* 7279 * We no longer hold PF_LOCK() here, so we can dispatch 7280 * deferral if we are asked to do so. 7281 */ 7282 if (deferral != NULL) 7283 pfsync_undefer(deferral, 0); 7284 #endif /* NPFSYNC > 0 */ 7285 pd.m = NULL; 7286 action = PF_PASS; 7287 break; 7288 case PF_DIVERT: 7289 switch (pd.af) { 7290 case AF_INET: 7291 if (!divert_packet(pd.m, pd.dir, r->divert.port)) 7292 pd.m = NULL; 7293 break; 7294 #ifdef INET6 7295 case AF_INET6: 7296 if (!divert6_packet(pd.m, pd.dir, r->divert.port)) 7297 pd.m = NULL; 7298 break; 7299 #endif /* INET6 */ 7300 } 7301 action = PF_PASS; 7302 break; 7303 #ifdef INET6 7304 case PF_AFRT: 7305 if (pf_translate_af(&pd)) { 7306 action = PF_DROP; 7307 break; 7308 } 7309 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 7310 switch (pd.naf) { 7311 case AF_INET: 7312 if (pd.dir == PF_IN) { 7313 if (ipforwarding == 0) { 7314 ipstat_inc(ips_cantforward); 7315 action = PF_DROP; 7316 break; 7317 } 7318 ip_forward(pd.m, ifp, NULL, 1); 7319 } else 7320 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); 7321 break; 7322 case AF_INET6: 7323 if (pd.dir == PF_IN) { 7324 if (ip6_forwarding == 0) { 7325 ip6stat_inc(ip6s_cantforward); 7326 action = PF_DROP; 7327 break; 7328 } 7329 ip6_forward(pd.m, NULL, 1); 7330 } else 7331 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL); 7332 break; 7333 } 7334 if (action != PF_DROP) { 7335 pd.m = NULL; 7336 action = PF_PASS; 7337 } 7338 break; 7339 #endif /* INET6 */ 7340 case PF_DROP: 7341 m_freem(pd.m); 7342 pd.m = NULL; 7343 break; 7344 default: 7345 if (s && s->rt) { 7346 switch (pd.af) { 7347 case AF_INET: 7348 pf_route(&pd, s); 7349 break; 7350 #ifdef INET6 7351 case AF_INET6: 7352 pf_route6(&pd, s); 7353 break; 7354 #endif /* INET6 */ 7355 } 7356 } 7357 break; 7358 } 7359 7360 #ifdef INET6 7361 /* if reassembled packet passed, create new fragments */ 7362 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && 7363 pd.af == AF_INET6) { 7364 struct m_tag *mtag; 7365 7366 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) 7367 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); 7368 } 7369 #endif /* INET6 */ 7370 if (s && action != PF_DROP) { 7371 if (!s->if_index_in && dir == PF_IN) 7372 s->if_index_in = ifp->if_index; 7373 else if (!s->if_index_out && dir == PF_OUT) 7374 s->if_index_out = ifp->if_index; 7375 } 7376 7377 *m0 = pd.m; 7378 7379 pf_state_unref(s); 7380 7381 return (action); 7382 } 7383 7384 int 7385 pf_ouraddr(struct mbuf *m) 7386 { 7387 struct pf_state_key *sk; 7388 7389 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 7390 return (1); 7391 7392 sk = m->m_pkthdr.pf.statekey; 7393 if (sk != NULL) { 7394 if (sk->inp != NULL) 7395 return (1); 7396 } 7397 7398 return (-1); 7399 } 7400 7401 /* 7402 * must be called whenever any addressing information such as 7403 * address, port, protocol has changed 7404 */ 7405 void 7406 pf_pkt_addr_changed(struct mbuf *m) 7407 { 7408 pf_mbuf_unlink_state_key(m); 7409 pf_mbuf_unlink_inpcb(m); 7410 } 7411 7412 struct inpcb * 7413 pf_inp_lookup(struct mbuf *m) 7414 { 7415 struct inpcb *inp = NULL; 7416 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7417 7418 if (!pf_state_key_isvalid(sk)) 7419 pf_mbuf_unlink_state_key(m); 7420 else 7421 inp = m->m_pkthdr.pf.statekey->inp; 7422 7423 if (inp && inp->inp_pf_sk) 7424 KASSERT(m->m_pkthdr.pf.statekey == inp->inp_pf_sk); 7425 7426 return (inp); 7427 } 7428 7429 void 7430 pf_inp_link(struct mbuf *m, struct inpcb *inp) 7431 { 7432 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7433 7434 if (!pf_state_key_isvalid(sk)) { 7435 pf_mbuf_unlink_state_key(m); 7436 return; 7437 } 7438 7439 /* 7440 * we don't need to grab PF-lock here. At worst case we link inp to 7441 * state, which might be just being marked as deleted by another 7442 * thread. 7443 */ 7444 if (inp && !sk->inp && !inp->inp_pf_sk) 7445 pf_state_key_link_inpcb(sk, inp); 7446 7447 /* The statekey has finished finding the inp, it is no longer needed. */ 7448 pf_mbuf_unlink_state_key(m); 7449 } 7450 7451 void 7452 pf_inp_unlink(struct inpcb *inp) 7453 { 7454 pf_inpcb_unlink_state_key(inp); 7455 } 7456 7457 void 7458 pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) 7459 { 7460 struct pf_state_key *old_reverse; 7461 7462 old_reverse = atomic_cas_ptr(&sk->reverse, NULL, skrev); 7463 if (old_reverse != NULL) 7464 KASSERT(old_reverse == skrev); 7465 else { 7466 pf_state_key_ref(skrev); 7467 7468 /* 7469 * NOTE: if sk == skrev, then KASSERT() below holds true, we 7470 * still want to grab a reference in such case, because 7471 * pf_state_key_unlink_reverse() does not check whether keys 7472 * are identical or not. 7473 */ 7474 old_reverse = atomic_cas_ptr(&skrev->reverse, NULL, sk); 7475 if (old_reverse != NULL) 7476 KASSERT(old_reverse == sk); 7477 7478 pf_state_key_ref(sk); 7479 } 7480 } 7481 7482 #if NPFLOG > 0 7483 void 7484 pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 7485 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 7486 { 7487 struct pf_rule_item *ri; 7488 7489 /* if this is the log(matches) rule, packet has been logged already */ 7490 if (rm->log & PF_LOG_MATCHES) 7491 return; 7492 7493 SLIST_FOREACH(ri, matchrules, entry) 7494 if (ri->r->log & PF_LOG_MATCHES) 7495 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 7496 } 7497 #endif /* NPFLOG > 0 */ 7498 7499 struct pf_state_key * 7500 pf_state_key_ref(struct pf_state_key *sk) 7501 { 7502 if (sk != NULL) 7503 PF_REF_TAKE(sk->refcnt); 7504 7505 return (sk); 7506 } 7507 7508 void 7509 pf_state_key_unref(struct pf_state_key *sk) 7510 { 7511 if (PF_REF_RELE(sk->refcnt)) { 7512 /* state key must be removed from tree */ 7513 KASSERT(!pf_state_key_isvalid(sk)); 7514 /* state key must be unlinked from reverse key */ 7515 KASSERT(sk->reverse == NULL); 7516 /* state key must be unlinked from socket */ 7517 KASSERT(sk->inp == NULL); 7518 pool_put(&pf_state_key_pl, sk); 7519 } 7520 } 7521 7522 int 7523 pf_state_key_isvalid(struct pf_state_key *sk) 7524 { 7525 return ((sk != NULL) && (sk->removed == 0)); 7526 } 7527 7528 void 7529 pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) 7530 { 7531 KASSERT(m->m_pkthdr.pf.statekey == NULL); 7532 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); 7533 } 7534 7535 void 7536 pf_mbuf_unlink_state_key(struct mbuf *m) 7537 { 7538 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 7539 7540 if (sk != NULL) { 7541 m->m_pkthdr.pf.statekey = NULL; 7542 pf_state_key_unref(sk); 7543 } 7544 } 7545 7546 void 7547 pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp) 7548 { 7549 KASSERT(m->m_pkthdr.pf.inp == NULL); 7550 m->m_pkthdr.pf.inp = in_pcbref(inp); 7551 } 7552 7553 void 7554 pf_mbuf_unlink_inpcb(struct mbuf *m) 7555 { 7556 struct inpcb *inp = m->m_pkthdr.pf.inp; 7557 7558 if (inp != NULL) { 7559 m->m_pkthdr.pf.inp = NULL; 7560 in_pcbunref(inp); 7561 } 7562 } 7563 7564 void 7565 pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) 7566 { 7567 KASSERT(sk->inp == NULL); 7568 sk->inp = in_pcbref(inp); 7569 KASSERT(inp->inp_pf_sk == NULL); 7570 inp->inp_pf_sk = pf_state_key_ref(sk); 7571 } 7572 7573 void 7574 pf_inpcb_unlink_state_key(struct inpcb *inp) 7575 { 7576 struct pf_state_key *sk = inp->inp_pf_sk; 7577 7578 if (sk != NULL) { 7579 KASSERT(sk->inp == inp); 7580 sk->inp = NULL; 7581 inp->inp_pf_sk = NULL; 7582 pf_state_key_unref(sk); 7583 in_pcbunref(inp); 7584 } 7585 } 7586 7587 void 7588 pf_state_key_unlink_inpcb(struct pf_state_key *sk) 7589 { 7590 struct inpcb *inp = sk->inp; 7591 7592 if (inp != NULL) { 7593 KASSERT(inp->inp_pf_sk == sk); 7594 sk->inp = NULL; 7595 inp->inp_pf_sk = NULL; 7596 pf_state_key_unref(sk); 7597 in_pcbunref(inp); 7598 } 7599 } 7600 7601 void 7602 pf_state_key_unlink_reverse(struct pf_state_key *sk) 7603 { 7604 struct pf_state_key *skrev = sk->reverse; 7605 7606 /* Note that sk and skrev may be equal, then we unref twice. */ 7607 if (skrev != NULL) { 7608 KASSERT(skrev->reverse == sk); 7609 sk->reverse = NULL; 7610 skrev->reverse = NULL; 7611 pf_state_key_unref(skrev); 7612 pf_state_key_unref(sk); 7613 } 7614 } 7615 7616 struct pf_state * 7617 pf_state_ref(struct pf_state *s) 7618 { 7619 if (s != NULL) 7620 PF_REF_TAKE(s->refcnt); 7621 return (s); 7622 } 7623 7624 void 7625 pf_state_unref(struct pf_state *s) 7626 { 7627 if ((s != NULL) && PF_REF_RELE(s->refcnt)) { 7628 /* never inserted or removed */ 7629 #if NPFSYNC > 0 7630 KASSERT((TAILQ_NEXT(s, sync_list) == NULL) || 7631 ((TAILQ_NEXT(s, sync_list) == _Q_INVALID) && 7632 (s->sync_state == PFSYNC_S_NONE))); 7633 #endif /* NPFSYNC */ 7634 KASSERT((TAILQ_NEXT(s, entry_list) == NULL) || 7635 (TAILQ_NEXT(s, entry_list) == _Q_INVALID)); 7636 KASSERT((s->key[PF_SK_WIRE] == NULL) && 7637 (s->key[PF_SK_STACK] == NULL)); 7638 7639 pool_put(&pf_state_pl, s); 7640 } 7641 } 7642 7643 int 7644 pf_delay_pkt(struct mbuf *m, u_int ifidx) 7645 { 7646 struct pf_pktdelay *pdy; 7647 7648 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) { 7649 m_freem(m); 7650 return (ENOBUFS); 7651 } 7652 pdy->ifidx = ifidx; 7653 pdy->m = m; 7654 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy); 7655 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay); 7656 m->m_pkthdr.pf.delay = 0; 7657 return (0); 7658 } 7659 7660 void 7661 pf_pktenqueue_delayed(void *arg) 7662 { 7663 struct pf_pktdelay *pdy = arg; 7664 struct ifnet *ifp; 7665 7666 ifp = if_get(pdy->ifidx); 7667 if (ifp != NULL) { 7668 if_enqueue(ifp, pdy->m); 7669 if_put(ifp); 7670 } else 7671 m_freem(pdy->m); 7672 7673 pool_put(&pf_pktdelay_pl, pdy); 7674 } 7675