1 /* 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $ 26 */ 27 28 /* 29 * Implement IP packet firewall (new version) 30 */ 31 32 #include "opt_ipfw.h" 33 #include "opt_inet.h" 34 #ifndef INET 35 #error IPFIREWALL requires INET. 36 #endif /* INET */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/kernel.h> 43 #include <sys/proc.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 #include <sys/ucred.h> 49 #include <sys/in_cksum.h> 50 #include <sys/limits.h> 51 #include <sys/lock.h> 52 #include <sys/tree.h> 53 54 #include <net/if.h> 55 #include <net/route.h> 56 #include <net/pfil.h> 57 #include <net/dummynet/ip_dummynet.h> 58 59 #include <sys/thread2.h> 60 #include <net/netmsg2.h> 61 62 #include <netinet/in.h> 63 #include <netinet/in_systm.h> 64 #include <netinet/in_var.h> 65 #include <netinet/in_pcb.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_icmp.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_seq.h> 71 #include <netinet/tcp_timer.h> 72 #include <netinet/tcp_var.h> 73 #include <netinet/tcpip.h> 74 #include <netinet/udp.h> 75 #include <netinet/udp_var.h> 76 #include <netinet/ip_divert.h> 77 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ 78 79 #include <net/ipfw/ip_fw2.h> 80 81 #ifdef IPFIREWALL_DEBUG 82 #define DPRINTF(fmt, ...) \ 83 do { \ 84 if (fw_debug > 0) \ 85 kprintf(fmt, __VA_ARGS__); \ 86 } while (0) 87 #else 88 #define DPRINTF(fmt, ...) ((void)0) 89 #endif 90 91 /* 92 * Description about per-CPU rule duplication: 93 * 94 * Module loading/unloading and all ioctl operations are serialized 95 * by netisr0, so we don't have any ordering or locking problems. 96 * 97 * Following graph shows how operation on per-CPU rule list is 98 * performed [2 CPU case]: 99 * 100 * CPU0 CPU1 101 * 102 * netisr0 <------------------------------------+ 103 * domsg | 104 * : | 105 * :(delete/add...) | 106 * : | 107 * : netmsg | netmsg 108 * forwardmsg---------->netisr1 | 109 * : | 110 * :(delete/add...) | 111 * : | 112 * : | 113 * replymsg--------------+ 114 * 115 * 116 * 117 * Rule structure [2 CPU case] 118 * 119 * CPU0 CPU1 120 * 121 * layer3_chain layer3_chain 122 * | | 123 * V V 124 * +-------+ sibling +-------+ sibling 125 * | rule1 |--------->| rule1 |--------->NULL 126 * +-------+ +-------+ 127 * | | 128 * |next |next 129 * V V 130 * +-------+ sibling +-------+ sibling 131 * | rule2 |--------->| rule2 |--------->NULL 132 * +-------+ +-------+ 133 * 134 * ip_fw.sibling: 135 * 1) Ease statistics calculation during IP_FW_GET. We only need to 136 * iterate layer3_chain in netisr0; the current rule's duplication 137 * to the other CPUs could safely be read-only accessed through 138 * ip_fw.sibling. 139 * 2) Accelerate rule insertion and deletion, e.g. rule insertion: 140 * a) In netisr0 rule3 is determined to be inserted between rule1 141 * and rule2. To make this decision we need to iterate the 142 * layer3_chain in netisr0. The netmsg, which is used to insert 143 * the rule, will contain rule1 in netisr0 as prev_rule and rule2 144 * in netisr0 as next_rule. 145 * b) After the insertion in netisr0 is done, we will move on to 146 * netisr1. But instead of relocating the rule3's position in 147 * netisr1 by iterating the layer3_chain in netisr1, we set the 148 * netmsg's prev_rule to rule1->sibling and next_rule to 149 * rule2->sibling before the netmsg is forwarded to netisr1 from 150 * netisr0. 151 */ 152 153 /* 154 * Description of states and tracks. 155 * 156 * Both states and tracks are stored in per-cpu RB trees instead of 157 * per-cpu hash tables to avoid the worst case hash degeneration. 158 * 159 * The lifetimes of states and tracks are regulated by dyn_*_lifetime, 160 * measured in seconds and depending on the flags. 161 * 162 * When a packet is received, its address fields are first masked with 163 * the mask defined for the rule, then matched against the entries in 164 * the per-cpu state RB tree. States are generated by 'keep-state' 165 * and 'limit' options. 166 * 167 * The max number of states is ipfw_state_max. When we reach the 168 * maximum number of states we do not create anymore. This is done to 169 * avoid consuming too much memory, but also too much time when 170 * searching on each packet. 171 * 172 * Each state holds a pointer to the parent ipfw rule of the current 173 * CPU so we know what action to perform. States are removed when the 174 * parent rule is deleted. XXX we should make them survive. 175 * 176 * There are some limitations with states -- we do not obey the 177 * 'randomized match', and we do not do multiple passes through the 178 * firewall. XXX check the latter!!! 179 * 180 * States grow independently on each CPU, e.g. 2 CPU case: 181 * 182 * CPU0 CPU1 183 * ................... ................... 184 * : state RB tree : : state RB tree : 185 * : : : : 186 * : state1 state2 : : state3 : 187 * : | | : : | : 188 * :.....|....|......: :........|........: 189 * | | | 190 * | | |st_rule 191 * | | | 192 * V V V 193 * +-------+ +-------+ 194 * | rule1 | | rule1 | 195 * +-------+ +-------+ 196 * 197 * Tracks are used to enforce limits on the number of sessions. Tracks 198 * are generated by 'limit' option. 199 * 200 * The max number of tracks is ipfw_track_max. When we reach the 201 * maximum number of tracks we do not create anymore. This is done to 202 * avoid consuming too much memory. 203 * 204 * Tracks are organized into two layers, track counter RB tree is 205 * shared between CPUs, track RB tree is per-cpu. States generated by 206 * 'limit' option are linked to the track in addition to the per-cpu 207 * state RB tree; mainly to ease expiration. e.g. 2 CPU case: 208 * 209 * .............................. 210 * : track counter RB tree : 211 * : : 212 * : +-----------+ : 213 * : | trkcnt1 | : 214 * : | | : 215 * : +--->counter<----+ : 216 * : | | | | : 217 * : | +-----------+ | : 218 * :......|................|....: 219 * | | 220 * CPU0 | | CPU1 221 * ................. |t_count | ................. 222 * : track RB tree : | | : track RB tree : 223 * : : | | : : 224 * : +-->track1-------+ +--------track2 : 225 * : | A : : : 226 * : | | : : : 227 * :.|.....|.......: :...............: 228 * | +----------------+ 229 * | .................... | 230 * | : state RB tree : |st_track 231 * | : : | 232 * +---state1 state2---+ 233 * : | | : 234 * :.....|.......|....: 235 * | | 236 * | |st_rule 237 * V V 238 * +----------+ 239 * | rule1 | 240 * +----------+ 241 */ 242 243 #define IPFW_AUTOINC_STEP_MIN 1 244 #define IPFW_AUTOINC_STEP_MAX 1000 245 #define IPFW_AUTOINC_STEP_DEF 100 246 247 #define IPFW_TABLE_MAX_DEF 64 248 249 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */ 250 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */ 251 252 #define MATCH_REVERSE 0 253 #define MATCH_FORWARD 1 254 #define MATCH_NONE 2 255 #define MATCH_UNKNOWN 3 256 257 #define TIME_LEQ(a, b) ((a) - (b) <= 0) 258 259 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST) 260 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \ 261 (IPFW_STATE_TCPFLAGS << 8)) 262 263 #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 264 #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 265 #define BOTH_RST (TH_RST | (TH_RST << 8)) 266 /* TH_ACK here means FIN was ACKed. */ 267 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8)) 268 269 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \ 270 (((s)->st_state & BOTH_RST) || \ 271 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK)) 272 273 #define O_ANCHOR O_NOP 274 275 #define IPFW_ISXLAT(type) ((type) == O_REDIRECT) 276 #define IPFW_XLAT_INVALID(s) (IPFW_ISXLAT((s)->st_type) && \ 277 ((struct ipfw_xlat *)(s))->xlat_invalid) 278 279 #define IPFW_MBUF_XLATINS FW_MBUF_PRIVATE1 280 #define IPFW_MBUF_XLATFWD FW_MBUF_PRIVATE2 281 282 #define IPFW_XLATE_INSERT 0x0001 283 #define IPFW_XLATE_FORWARD 0x0002 284 #define IPFW_XLATE_OUTPUT 0x0004 285 286 struct netmsg_ipfw { 287 struct netmsg_base base; 288 const struct ipfw_ioc_rule *ioc_rule; 289 struct ip_fw *next_rule; 290 struct ip_fw *prev_rule; 291 struct ip_fw *sibling; 292 uint32_t rule_flags; 293 struct ip_fw **cross_rules; 294 }; 295 296 struct netmsg_del { 297 struct netmsg_base base; 298 struct ip_fw *start_rule; 299 struct ip_fw *prev_rule; 300 uint16_t rulenum; 301 uint8_t from_set; 302 uint8_t to_set; 303 }; 304 305 struct netmsg_zent { 306 struct netmsg_base base; 307 struct ip_fw *start_rule; 308 uint16_t rulenum; 309 uint16_t log_only; 310 }; 311 312 struct netmsg_cpstate { 313 struct netmsg_base base; 314 struct ipfw_ioc_state *ioc_state; 315 int state_cntmax; 316 int state_cnt; 317 }; 318 319 struct netmsg_tblent { 320 struct netmsg_base base; 321 struct sockaddr *key; 322 struct sockaddr *netmask; 323 struct ipfw_tblent *sibling; 324 int tableid; 325 }; 326 327 struct netmsg_tblflush { 328 struct netmsg_base base; 329 int tableid; 330 int destroy; 331 }; 332 333 struct netmsg_tblexp { 334 struct netmsg_base base; 335 time_t expire; 336 int tableid; 337 int cnt; 338 int expcnt; 339 struct radix_node_head *rnh; 340 }; 341 342 struct ipfw_table_cp { 343 struct ipfw_ioc_tblent *te; 344 int te_idx; 345 int te_cnt; 346 }; 347 348 struct ip_fw_local { 349 /* 350 * offset The offset of a fragment. offset != 0 means that 351 * we have a fragment at this offset of an IPv4 packet. 352 * offset == 0 means that (if this is an IPv4 packet) 353 * this is the first or only fragment. 354 */ 355 u_short offset; 356 357 /* 358 * Local copies of addresses. They are only valid if we have 359 * an IP packet. 360 * 361 * proto The protocol. Set to 0 for non-ip packets, 362 * or to the protocol read from the packet otherwise. 363 * proto != 0 means that we have an IPv4 packet. 364 * 365 * src_port, dst_port port numbers, in HOST format. Only 366 * valid for TCP and UDP packets. 367 * 368 * src_ip, dst_ip ip addresses, in NETWORK format. 369 * Only valid for IPv4 packets. 370 */ 371 uint8_t proto; 372 uint16_t src_port; /* NOTE: host format */ 373 uint16_t dst_port; /* NOTE: host format */ 374 struct in_addr src_ip; /* NOTE: network format */ 375 struct in_addr dst_ip; /* NOTE: network format */ 376 uint16_t ip_len; 377 struct tcphdr *tcp; 378 }; 379 380 struct ipfw_addrs { 381 uint32_t addr1; /* host byte order */ 382 uint32_t addr2; /* host byte order */ 383 }; 384 385 struct ipfw_ports { 386 uint16_t port1; /* host byte order */ 387 uint16_t port2; /* host byte order */ 388 }; 389 390 struct ipfw_key { 391 union { 392 struct ipfw_addrs addrs; 393 uint64_t value; 394 } addr_u; 395 union { 396 struct ipfw_ports ports; 397 uint32_t value; 398 } port_u; 399 uint8_t proto; 400 uint8_t swap; /* IPFW_KEY_SWAP_ */ 401 uint16_t rsvd2; 402 }; 403 404 #define IPFW_KEY_SWAP_ADDRS 0x1 405 #define IPFW_KEY_SWAP_PORTS 0x2 406 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS) 407 408 struct ipfw_trkcnt { 409 RB_ENTRY(ipfw_trkcnt) tc_rblink; 410 struct ipfw_key tc_key; 411 uintptr_t tc_ruleid; 412 int tc_refs; 413 int tc_count; 414 time_t tc_expire; /* userland get-only */ 415 uint16_t tc_rulenum; /* userland get-only */ 416 } __cachealign; 417 418 #define tc_addrs tc_key.addr_u.value 419 #define tc_ports tc_key.port_u.value 420 #define tc_proto tc_key.proto 421 #define tc_saddr tc_key.addr_u.addrs.addr1 422 #define tc_daddr tc_key.addr_u.addrs.addr2 423 #define tc_sport tc_key.port_u.ports.port1 424 #define tc_dport tc_key.port_u.ports.port2 425 426 RB_HEAD(ipfw_trkcnt_tree, ipfw_trkcnt); 427 428 struct ipfw_state; 429 430 struct ipfw_track { 431 RB_ENTRY(ipfw_track) t_rblink; 432 struct ipfw_key t_key; 433 struct ip_fw *t_rule; 434 time_t t_lastexp; 435 LIST_HEAD(, ipfw_state) t_state_list; 436 time_t t_expire; 437 volatile int *t_count; 438 struct ipfw_trkcnt *t_trkcnt; 439 TAILQ_ENTRY(ipfw_track) t_link; 440 }; 441 442 #define t_addrs t_key.addr_u.value 443 #define t_ports t_key.port_u.value 444 #define t_proto t_key.proto 445 #define t_saddr t_key.addr_u.addrs.addr1 446 #define t_daddr t_key.addr_u.addrs.addr2 447 #define t_sport t_key.port_u.ports.port1 448 #define t_dport t_key.port_u.ports.port2 449 450 RB_HEAD(ipfw_track_tree, ipfw_track); 451 TAILQ_HEAD(ipfw_track_list, ipfw_track); 452 453 struct ipfw_state { 454 RB_ENTRY(ipfw_state) st_rblink; 455 struct ipfw_key st_key; 456 457 time_t st_expire; /* expire time */ 458 struct ip_fw *st_rule; 459 460 uint64_t st_pcnt; /* packets */ 461 uint64_t st_bcnt; /* bytes */ 462 463 /* 464 * st_state: 465 * State of this rule, typically a combination of TCP flags. 466 * 467 * st_ack_fwd/st_ack_rev: 468 * Most recent ACKs in forward and reverse direction. They 469 * are used to generate keepalives. 470 */ 471 uint32_t st_state; 472 uint32_t st_ack_fwd; /* host byte order */ 473 uint32_t st_seq_fwd; /* host byte order */ 474 uint32_t st_ack_rev; /* host byte order */ 475 uint32_t st_seq_rev; /* host byte order */ 476 477 uint16_t st_flags; /* IPFW_STATE_F_ */ 478 uint16_t st_type; /* KEEP_STATE/LIMIT/RDR */ 479 struct ipfw_track *st_track; 480 481 LIST_ENTRY(ipfw_state) st_trklink; 482 TAILQ_ENTRY(ipfw_state) st_link; 483 }; 484 485 #define st_addrs st_key.addr_u.value 486 #define st_ports st_key.port_u.value 487 #define st_proto st_key.proto 488 #define st_swap st_key.swap 489 490 #define IPFW_STATE_F_ACKFWD 0x0001 491 #define IPFW_STATE_F_SEQFWD 0x0002 492 #define IPFW_STATE_F_ACKREV 0x0004 493 #define IPFW_STATE_F_SEQREV 0x0008 494 #define IPFW_STATE_F_XLATSRC 0x0010 495 #define IPFW_STATE_F_XLATSLAVE 0x0020 496 #define IPFW_STATE_F_LINKED 0x0040 497 498 #define IPFW_STATE_SCANSKIP(s) ((s)->st_type == O_ANCHOR || \ 499 ((s)->st_flags & IPFW_STATE_F_XLATSLAVE)) 500 501 /* Expired or being deleted. */ 502 #define IPFW_STATE_ISDEAD(s) (TIME_LEQ((s)->st_expire, time_uptime) || \ 503 IPFW_XLAT_INVALID((s))) 504 505 TAILQ_HEAD(ipfw_state_list, ipfw_state); 506 RB_HEAD(ipfw_state_tree, ipfw_state); 507 508 struct ipfw_xlat { 509 struct ipfw_state xlat_st; /* MUST be the first field */ 510 uint32_t xlat_addr; /* network byte order */ 511 uint16_t xlat_port; /* network byte order */ 512 uint16_t xlat_dir; /* MATCH_ */ 513 struct ifnet *xlat_ifp; /* matching ifnet */ 514 struct ipfw_xlat *xlat_pair; /* paired state */ 515 int xlat_pcpu; /* paired cpu */ 516 volatile int xlat_invalid; /* invalid, but not dtor yet */ 517 volatile uint64_t xlat_crefs; /* cross references */ 518 struct netmsg_base xlat_freenm; /* for remote free */ 519 }; 520 521 #define xlat_type xlat_st.st_type 522 #define xlat_flags xlat_st.st_flags 523 #define xlat_rule xlat_st.st_rule 524 #define xlat_bcnt xlat_st.st_bcnt 525 #define xlat_pcnt xlat_st.st_pcnt 526 527 struct ipfw_tblent { 528 struct radix_node te_nodes[2]; 529 struct sockaddr_in te_key; 530 u_long te_use; 531 time_t te_lastuse; 532 struct ipfw_tblent *te_sibling; 533 volatile int te_expired; 534 }; 535 536 struct ipfw_context { 537 struct ip_fw *ipfw_layer3_chain; /* rules for layer3 */ 538 struct ip_fw *ipfw_default_rule; /* default rule */ 539 uint64_t ipfw_norule_counter; /* ipfw_log(NULL) stat*/ 540 541 /* 542 * ipfw_set_disable contains one bit per set value (0..31). 543 * If the bit is set, all rules with the corresponding set 544 * are disabled. Set IPDW_DEFAULT_SET is reserved for the 545 * default rule and CANNOT be disabled. 546 */ 547 uint32_t ipfw_set_disable; 548 549 uint8_t ipfw_flags; /* IPFW_FLAG_ */ 550 551 struct ip_fw *ipfw_cont_rule; 552 struct ipfw_xlat *ipfw_cont_xlat; 553 554 struct ipfw_state_tree ipfw_state_tree; 555 struct ipfw_state_list ipfw_state_list; 556 int ipfw_state_loosecnt; 557 int ipfw_state_cnt; 558 559 union { 560 struct ipfw_state state; 561 struct ipfw_track track; 562 struct ipfw_trkcnt trkcnt; 563 } ipfw_tmpkey; 564 565 struct ipfw_track_tree ipfw_track_tree; 566 struct ipfw_track_list ipfw_track_list; 567 struct ipfw_trkcnt *ipfw_trkcnt_spare; 568 569 struct callout ipfw_stateto_ch; 570 time_t ipfw_state_lastexp; 571 struct netmsg_base ipfw_stateexp_nm; 572 struct netmsg_base ipfw_stateexp_more; 573 struct ipfw_state ipfw_stateexp_anch; 574 575 struct callout ipfw_trackto_ch; 576 time_t ipfw_track_lastexp; 577 struct netmsg_base ipfw_trackexp_nm; 578 struct netmsg_base ipfw_trackexp_more; 579 struct ipfw_track ipfw_trackexp_anch; 580 581 struct callout ipfw_keepalive_ch; 582 struct netmsg_base ipfw_keepalive_nm; 583 struct netmsg_base ipfw_keepalive_more; 584 struct ipfw_state ipfw_keepalive_anch; 585 586 struct callout ipfw_xlatreap_ch; 587 struct netmsg_base ipfw_xlatreap_nm; 588 struct ipfw_state_list ipfw_xlatreap; 589 590 /* 591 * Statistics 592 */ 593 u_long ipfw_sts_reap; 594 u_long ipfw_sts_reapfailed; 595 u_long ipfw_sts_overflow; 596 u_long ipfw_sts_nomem; 597 u_long ipfw_sts_tcprecycled; 598 599 u_long ipfw_tks_nomem; 600 u_long ipfw_tks_reap; 601 u_long ipfw_tks_reapfailed; 602 u_long ipfw_tks_overflow; 603 u_long ipfw_tks_cntnomem; 604 605 u_long ipfw_frags; 606 u_long ipfw_defraged; 607 u_long ipfw_defrag_remote; 608 609 u_long ipfw_xlated; 610 u_long ipfw_xlate_split; 611 u_long ipfw_xlate_conflicts; 612 u_long ipfw_xlate_cresolved; 613 614 /* Last field */ 615 struct radix_node_head *ipfw_tables[]; 616 }; 617 618 #define IPFW_FLAG_KEEPALIVE 0x01 619 #define IPFW_FLAG_STATEEXP 0x02 620 #define IPFW_FLAG_TRACKEXP 0x04 621 #define IPFW_FLAG_STATEREAP 0x08 622 #define IPFW_FLAG_TRACKREAP 0x10 623 624 #define ipfw_state_tmpkey ipfw_tmpkey.state 625 #define ipfw_track_tmpkey ipfw_tmpkey.track 626 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt 627 628 struct ipfw_global { 629 int ipfw_state_loosecnt; /* cache aligned */ 630 time_t ipfw_state_globexp __cachealign; 631 632 struct lwkt_token ipfw_trkcnt_token __cachealign; 633 struct ipfw_trkcnt_tree ipfw_trkcnt_tree; 634 int ipfw_trkcnt_cnt; 635 time_t ipfw_track_globexp; 636 637 /* Accessed in netisr0. */ 638 struct ip_fw *ipfw_crossref_free __cachealign; 639 struct callout ipfw_crossref_ch; 640 struct netmsg_base ipfw_crossref_nm; 641 642 #ifdef KLD_MODULE 643 /* 644 * Module can not be unloaded, if there are references to 645 * certains rules of ipfw(4), e.g. dummynet(4) 646 */ 647 int ipfw_refcnt __cachealign; 648 #endif 649 } __cachealign; 650 651 static struct ipfw_context *ipfw_ctx[MAXCPU]; 652 653 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); 654 655 /* 656 * Following two global variables are accessed and updated only 657 * in netisr0. 658 */ 659 static uint32_t static_count; /* # of static rules */ 660 static uint32_t static_ioc_len; /* bytes of static rules */ 661 662 /* 663 * If 1, then ipfw static rules are being flushed, 664 * ipfw_chk() will skip to the default rule. 665 */ 666 static int ipfw_flushing; 667 668 static int fw_verbose; 669 static int verbose_limit; 670 671 static int fw_debug; 672 static int autoinc_step = IPFW_AUTOINC_STEP_DEF; 673 674 static int ipfw_table_max = IPFW_TABLE_MAX_DEF; 675 676 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS); 677 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS); 678 679 TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max); 680 681 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 682 SYSCTL_NODE(_net_inet_ip_fw, OID_AUTO, stats, CTLFLAG_RW, 0, 683 "Firewall statistics"); 684 685 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, 686 &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw"); 687 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW, 688 &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I", 689 "Rule number autincrement step"); 690 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, 691 &fw_one_pass, 0, 692 "Only do a single pass through ipfw when using dummynet(4)"); 693 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 694 &fw_debug, 0, "Enable printing of debug ip_fw statements"); 695 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, 696 &fw_verbose, 0, "Log matches to ipfw rules"); 697 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 698 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); 699 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, table_max, CTLFLAG_RD, 700 &ipfw_table_max, 0, "Max # of tables"); 701 702 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS); 703 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS); 704 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS); 705 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS); 706 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS); 707 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS); 708 709 /* 710 * Timeouts for various events in handing states. 711 * 712 * NOTE: 713 * 1 == 0~1 second. 714 * 2 == 1~2 second(s). 715 * 716 * We use 2 seconds for FIN lifetime, so that the states will not be 717 * ripped prematurely. 718 */ 719 static uint32_t dyn_ack_lifetime = 300; 720 static uint32_t dyn_syn_lifetime = 20; 721 static uint32_t dyn_finwait_lifetime = 20; 722 static uint32_t dyn_fin_lifetime = 2; 723 static uint32_t dyn_rst_lifetime = 2; 724 static uint32_t dyn_udp_lifetime = 10; 725 static uint32_t dyn_short_lifetime = 5; /* used by tracks too */ 726 727 /* 728 * Keepalives are sent if dyn_keepalive is set. They are sent every 729 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 730 * seconds of lifetime of a rule. 731 */ 732 static uint32_t dyn_keepalive_interval = 20; 733 static uint32_t dyn_keepalive_period = 5; 734 static uint32_t dyn_keepalive = 1; /* do send keepalives */ 735 736 static struct ipfw_global ipfw_gd; 737 static int ipfw_state_loosecnt_updthr; 738 static int ipfw_state_max = 4096; /* max # of states */ 739 static int ipfw_track_max = 4096; /* max # of tracks */ 740 741 static int ipfw_state_headroom; /* setup at module load time */ 742 static int ipfw_state_reap_min = 8; 743 static int ipfw_state_expire_max = 32; 744 static int ipfw_state_scan_max = 256; 745 static int ipfw_keepalive_max = 8; 746 static int ipfw_track_reap_max = 4; 747 static int ipfw_track_expire_max = 16; 748 static int ipfw_track_scan_max = 128; 749 750 static eventhandler_tag ipfw_ifaddr_event; 751 752 /* Compat */ 753 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count, 754 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_dyncnt, "I", 755 "Number of states and tracks"); 756 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, 757 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_dynmax, "I", 758 "Max number of states and tracks"); 759 760 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_cnt, 761 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_statecnt, "I", 762 "Number of states"); 763 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_max, 764 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_statemax, "I", 765 "Max number of states"); 766 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, state_headroom, CTLFLAG_RW, 767 &ipfw_state_headroom, 0, "headroom for state reap"); 768 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_cnt, CTLFLAG_RD, 769 &ipfw_gd.ipfw_trkcnt_cnt, 0, "Number of tracks"); 770 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_max, CTLFLAG_RW, 771 &ipfw_track_max, 0, "Max number of tracks"); 772 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, 773 &static_count, 0, "Number of static rules"); 774 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, 775 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); 776 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, 777 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); 778 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, 779 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); 780 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_finwait_lifetime, CTLFLAG_RW, 781 &dyn_finwait_lifetime, 0, "Lifetime of dyn. rules for fin wait"); 782 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, 783 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); 784 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, 785 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); 786 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, 787 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); 788 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, 789 &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); 790 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_scan_max, 791 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_scan_max, 0, ipfw_sysctl_scancnt, 792 "I", "# of states to scan for each expire iteration"); 793 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_expire_max, 794 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_expire_max, 0, ipfw_sysctl_scancnt, 795 "I", "# of states to expire for each expire iteration"); 796 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, keepalive_max, 797 CTLTYPE_INT | CTLFLAG_RW, &ipfw_keepalive_max, 0, ipfw_sysctl_scancnt, 798 "I", "# of states to expire for each expire iteration"); 799 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_reap_min, 800 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_reap_min, 0, ipfw_sysctl_scancnt, 801 "I", "# of states to reap for state shortage"); 802 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_scan_max, 803 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_scan_max, 0, ipfw_sysctl_scancnt, 804 "I", "# of tracks to scan for each expire iteration"); 805 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_expire_max, 806 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_expire_max, 0, ipfw_sysctl_scancnt, 807 "I", "# of tracks to expire for each expire iteration"); 808 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_reap_max, 809 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_reap_max, 0, ipfw_sysctl_scancnt, 810 "I", "# of tracks to reap for track shortage"); 811 812 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reap, 813 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 814 __offsetof(struct ipfw_context, ipfw_sts_reap), ipfw_sysctl_stat, 815 "LU", "# of state reaps due to states shortage"); 816 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reapfailed, 817 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 818 __offsetof(struct ipfw_context, ipfw_sts_reapfailed), ipfw_sysctl_stat, 819 "LU", "# of state reap failure"); 820 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_overflow, 821 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 822 __offsetof(struct ipfw_context, ipfw_sts_overflow), ipfw_sysctl_stat, 823 "LU", "# of state overflow"); 824 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_nomem, 825 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 826 __offsetof(struct ipfw_context, ipfw_sts_nomem), ipfw_sysctl_stat, 827 "LU", "# of state allocation failure"); 828 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_tcprecycled, 829 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 830 __offsetof(struct ipfw_context, ipfw_sts_tcprecycled), ipfw_sysctl_stat, 831 "LU", "# of state deleted due to fast TCP port recycling"); 832 833 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_nomem, 834 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 835 __offsetof(struct ipfw_context, ipfw_tks_nomem), ipfw_sysctl_stat, 836 "LU", "# of track allocation failure"); 837 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reap, 838 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 839 __offsetof(struct ipfw_context, ipfw_tks_reap), ipfw_sysctl_stat, 840 "LU", "# of track reap due to tracks shortage"); 841 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reapfailed, 842 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 843 __offsetof(struct ipfw_context, ipfw_tks_reapfailed), ipfw_sysctl_stat, 844 "LU", "# of track reap failure"); 845 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_overflow, 846 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 847 __offsetof(struct ipfw_context, ipfw_tks_overflow), ipfw_sysctl_stat, 848 "LU", "# of track overflow"); 849 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_cntnomem, 850 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 851 __offsetof(struct ipfw_context, ipfw_tks_cntnomem), ipfw_sysctl_stat, 852 "LU", "# of track counter allocation failure"); 853 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, frags, 854 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 855 __offsetof(struct ipfw_context, ipfw_frags), ipfw_sysctl_stat, 856 "LU", "# of IP fragements defraged"); 857 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defraged, 858 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 859 __offsetof(struct ipfw_context, ipfw_defraged), ipfw_sysctl_stat, 860 "LU", "# of IP packets after defrag"); 861 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defrag_remote, 862 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 863 __offsetof(struct ipfw_context, ipfw_defrag_remote), ipfw_sysctl_stat, 864 "LU", "# of IP packets after defrag dispatched to remote cpus"); 865 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlated, 866 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 867 __offsetof(struct ipfw_context, ipfw_xlated), ipfw_sysctl_stat, 868 "LU", "# address/port translations"); 869 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_split, 870 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 871 __offsetof(struct ipfw_context, ipfw_xlate_split), ipfw_sysctl_stat, 872 "LU", "# address/port translations split between different cpus"); 873 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_conflicts, 874 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 875 __offsetof(struct ipfw_context, ipfw_xlate_conflicts), ipfw_sysctl_stat, 876 "LU", "# address/port translations conflicts on remote cpu"); 877 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_cresolved, 878 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 879 __offsetof(struct ipfw_context, ipfw_xlate_cresolved), ipfw_sysctl_stat, 880 "LU", "# address/port translations conflicts resolved on remote cpu"); 881 882 static int ipfw_state_cmp(struct ipfw_state *, 883 struct ipfw_state *); 884 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt *, 885 struct ipfw_trkcnt *); 886 static int ipfw_track_cmp(struct ipfw_track *, 887 struct ipfw_track *); 888 889 RB_PROTOTYPE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp); 890 RB_GENERATE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp); 891 892 RB_PROTOTYPE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp); 893 RB_GENERATE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp); 894 895 RB_PROTOTYPE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp); 896 RB_GENERATE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp); 897 898 static int ipfw_chk(struct ip_fw_args *); 899 static void ipfw_track_expire_ipifunc(void *); 900 static void ipfw_state_expire_ipifunc(void *); 901 static void ipfw_keepalive(void *); 902 static int ipfw_state_expire_start(struct ipfw_context *, 903 int, int); 904 static void ipfw_crossref_timeo(void *); 905 static void ipfw_state_remove(struct ipfw_context *, 906 struct ipfw_state *); 907 static void ipfw_xlat_reap_timeo(void *); 908 static void ipfw_defrag_redispatch(struct mbuf *, int, 909 struct ip_fw *); 910 911 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token) 912 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token) 913 #define IPFW_TRKCNT_TOKINIT \ 914 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt"); 915 916 static void 917 sa_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 918 const struct sockaddr *netmask) 919 { 920 const u_char *cp1 = (const u_char *)src; 921 u_char *cp2 = (u_char *)dst; 922 const u_char *cp3 = (const u_char *)netmask; 923 u_char *cplim = cp2 + *cp3; 924 u_char *cplim2 = cp2 + *cp1; 925 926 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 927 cp3 += 2; 928 if (cplim > cplim2) 929 cplim = cplim2; 930 while (cp2 < cplim) 931 *cp2++ = *cp1++ & *cp3++; 932 if (cp2 < cplim2) 933 bzero(cp2, cplim2 - cp2); 934 } 935 936 static __inline uint16_t 937 pfil_cksum_fixup(uint16_t cksum, uint16_t old, uint16_t new, uint8_t udp) 938 { 939 uint32_t l; 940 941 if (udp && !cksum) 942 return (0x0000); 943 l = cksum + old - new; 944 l = (l >> 16) + (l & 65535); 945 l = l & 65535; 946 if (udp && !l) 947 return (0xFFFF); 948 return (l); 949 } 950 951 static __inline void 952 ipfw_key_build(struct ipfw_key *key, in_addr_t saddr, uint16_t sport, 953 in_addr_t daddr, uint16_t dport, uint8_t proto) 954 { 955 956 key->proto = proto; 957 key->swap = 0; 958 959 if (saddr < daddr) { 960 key->addr_u.addrs.addr1 = daddr; 961 key->addr_u.addrs.addr2 = saddr; 962 key->swap |= IPFW_KEY_SWAP_ADDRS; 963 } else { 964 key->addr_u.addrs.addr1 = saddr; 965 key->addr_u.addrs.addr2 = daddr; 966 } 967 968 if (sport < dport) { 969 key->port_u.ports.port1 = dport; 970 key->port_u.ports.port2 = sport; 971 key->swap |= IPFW_KEY_SWAP_PORTS; 972 } else { 973 key->port_u.ports.port1 = sport; 974 key->port_u.ports.port2 = dport; 975 } 976 977 if (sport == dport && (key->swap & IPFW_KEY_SWAP_ADDRS)) 978 key->swap |= IPFW_KEY_SWAP_PORTS; 979 if (saddr == daddr && (key->swap & IPFW_KEY_SWAP_PORTS)) 980 key->swap |= IPFW_KEY_SWAP_ADDRS; 981 } 982 983 static __inline void 984 ipfw_key_4tuple(const struct ipfw_key *key, in_addr_t *saddr, uint16_t *sport, 985 in_addr_t *daddr, uint16_t *dport) 986 { 987 988 if (key->swap & IPFW_KEY_SWAP_ADDRS) { 989 *saddr = key->addr_u.addrs.addr2; 990 *daddr = key->addr_u.addrs.addr1; 991 } else { 992 *saddr = key->addr_u.addrs.addr1; 993 *daddr = key->addr_u.addrs.addr2; 994 } 995 996 if (key->swap & IPFW_KEY_SWAP_PORTS) { 997 *sport = key->port_u.ports.port2; 998 *dport = key->port_u.ports.port1; 999 } else { 1000 *sport = key->port_u.ports.port1; 1001 *dport = key->port_u.ports.port2; 1002 } 1003 } 1004 1005 static int 1006 ipfw_state_cmp(struct ipfw_state *s1, struct ipfw_state *s2) 1007 { 1008 1009 if (s1->st_proto > s2->st_proto) 1010 return (1); 1011 if (s1->st_proto < s2->st_proto) 1012 return (-1); 1013 1014 if (s1->st_addrs > s2->st_addrs) 1015 return (1); 1016 if (s1->st_addrs < s2->st_addrs) 1017 return (-1); 1018 1019 if (s1->st_ports > s2->st_ports) 1020 return (1); 1021 if (s1->st_ports < s2->st_ports) 1022 return (-1); 1023 1024 if (s1->st_swap == s2->st_swap || 1025 (s1->st_swap ^ s2->st_swap) == IPFW_KEY_SWAP_ALL) 1026 return (0); 1027 1028 if (s1->st_swap > s2->st_swap) 1029 return (1); 1030 else 1031 return (-1); 1032 } 1033 1034 static int 1035 ipfw_trkcnt_cmp(struct ipfw_trkcnt *t1, struct ipfw_trkcnt *t2) 1036 { 1037 1038 if (t1->tc_proto > t2->tc_proto) 1039 return (1); 1040 if (t1->tc_proto < t2->tc_proto) 1041 return (-1); 1042 1043 if (t1->tc_addrs > t2->tc_addrs) 1044 return (1); 1045 if (t1->tc_addrs < t2->tc_addrs) 1046 return (-1); 1047 1048 if (t1->tc_ports > t2->tc_ports) 1049 return (1); 1050 if (t1->tc_ports < t2->tc_ports) 1051 return (-1); 1052 1053 if (t1->tc_ruleid > t2->tc_ruleid) 1054 return (1); 1055 if (t1->tc_ruleid < t2->tc_ruleid) 1056 return (-1); 1057 1058 return (0); 1059 } 1060 1061 static int 1062 ipfw_track_cmp(struct ipfw_track *t1, struct ipfw_track *t2) 1063 { 1064 1065 if (t1->t_proto > t2->t_proto) 1066 return (1); 1067 if (t1->t_proto < t2->t_proto) 1068 return (-1); 1069 1070 if (t1->t_addrs > t2->t_addrs) 1071 return (1); 1072 if (t1->t_addrs < t2->t_addrs) 1073 return (-1); 1074 1075 if (t1->t_ports > t2->t_ports) 1076 return (1); 1077 if (t1->t_ports < t2->t_ports) 1078 return (-1); 1079 1080 if ((uintptr_t)t1->t_rule > (uintptr_t)t2->t_rule) 1081 return (1); 1082 if ((uintptr_t)t1->t_rule < (uintptr_t)t2->t_rule) 1083 return (-1); 1084 1085 return (0); 1086 } 1087 1088 static __inline struct ipfw_state * 1089 ipfw_state_link(struct ipfw_context *ctx, struct ipfw_state *s) 1090 { 1091 struct ipfw_state *dup; 1092 1093 KASSERT((s->st_flags & IPFW_STATE_F_LINKED) == 0, 1094 ("state %p was linked", s)); 1095 dup = RB_INSERT(ipfw_state_tree, &ctx->ipfw_state_tree, s); 1096 if (dup == NULL) { 1097 TAILQ_INSERT_TAIL(&ctx->ipfw_state_list, s, st_link); 1098 s->st_flags |= IPFW_STATE_F_LINKED; 1099 } 1100 return (dup); 1101 } 1102 1103 static __inline void 1104 ipfw_state_unlink(struct ipfw_context *ctx, struct ipfw_state *s) 1105 { 1106 1107 KASSERT(s->st_flags & IPFW_STATE_F_LINKED, 1108 ("state %p was not linked", s)); 1109 RB_REMOVE(ipfw_state_tree, &ctx->ipfw_state_tree, s); 1110 TAILQ_REMOVE(&ctx->ipfw_state_list, s, st_link); 1111 s->st_flags &= ~IPFW_STATE_F_LINKED; 1112 } 1113 1114 static void 1115 ipfw_state_max_set(int state_max) 1116 { 1117 1118 ipfw_state_max = state_max; 1119 /* Allow 5% states over-allocation. */ 1120 ipfw_state_loosecnt_updthr = (state_max / 20) / netisr_ncpus; 1121 } 1122 1123 static __inline int 1124 ipfw_state_cntcoll(void) 1125 { 1126 int cpu, state_cnt = 0; 1127 1128 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 1129 state_cnt += ipfw_ctx[cpu]->ipfw_state_cnt; 1130 return (state_cnt); 1131 } 1132 1133 static __inline int 1134 ipfw_state_cntsync(void) 1135 { 1136 int state_cnt; 1137 1138 state_cnt = ipfw_state_cntcoll(); 1139 ipfw_gd.ipfw_state_loosecnt = state_cnt; 1140 return (state_cnt); 1141 } 1142 1143 static __inline int 1144 ipfw_free_rule(struct ip_fw *rule) 1145 { 1146 KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d", mycpuid)); 1147 KASSERT(rule->refcnt > 0, ("invalid refcnt %u", rule->refcnt)); 1148 rule->refcnt--; 1149 if (rule->refcnt == 0) { 1150 if (rule->cross_rules != NULL) 1151 kfree(rule->cross_rules, M_IPFW); 1152 kfree(rule, M_IPFW); 1153 return 1; 1154 } 1155 return 0; 1156 } 1157 1158 static void 1159 ipfw_unref_rule(void *priv) 1160 { 1161 ipfw_free_rule(priv); 1162 #ifdef KLD_MODULE 1163 KASSERT(ipfw_gd.ipfw_refcnt > 0, 1164 ("invalid ipfw_refcnt %d", ipfw_gd.ipfw_refcnt)); 1165 atomic_subtract_int(&ipfw_gd.ipfw_refcnt, 1); 1166 #endif 1167 } 1168 1169 static __inline void 1170 ipfw_ref_rule(struct ip_fw *rule) 1171 { 1172 KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d", mycpuid)); 1173 #ifdef KLD_MODULE 1174 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1); 1175 #endif 1176 rule->refcnt++; 1177 } 1178 1179 /* 1180 * This macro maps an ip pointer into a layer3 header pointer of type T 1181 */ 1182 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl)) 1183 1184 static __inline int 1185 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) 1186 { 1187 int type = L3HDR(struct icmp,ip)->icmp_type; 1188 int idx_max = F_LEN(&cmd->o) - F_INSN_SIZE(ipfw_insn); 1189 int idx = type / 32; 1190 1191 if (idx >= idx_max) 1192 return (0); 1193 return (cmd->d[idx] & (1 << (type % 32))); 1194 } 1195 1196 static __inline int 1197 icmpcode_match(struct ip *ip, ipfw_insn_u32 *cmd) 1198 { 1199 int code = L3HDR(struct icmp,ip)->icmp_code; 1200 int idx_max = F_LEN(&cmd->o) - F_INSN_SIZE(ipfw_insn); 1201 int idx = code / 32; 1202 1203 if (idx >= idx_max) 1204 return (0); 1205 return (cmd->d[idx] & (1 << (code % 32))); 1206 } 1207 1208 #define TT ((1 << ICMP_ECHO) | \ 1209 (1 << ICMP_ROUTERSOLICIT) | \ 1210 (1 << ICMP_TSTAMP) | \ 1211 (1 << ICMP_IREQ) | \ 1212 (1 << ICMP_MASKREQ)) 1213 1214 static int 1215 is_icmp_query(struct ip *ip) 1216 { 1217 int type = L3HDR(struct icmp, ip)->icmp_type; 1218 1219 return (type < 32 && (TT & (1 << type))); 1220 } 1221 1222 #undef TT 1223 1224 /* 1225 * The following checks use two arrays of 8 or 16 bits to store the 1226 * bits that we want set or clear, respectively. They are in the 1227 * low and high half of cmd->arg1 or cmd->d[0]. 1228 * 1229 * We scan options and store the bits we find set. We succeed if 1230 * 1231 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 1232 * 1233 * The code is sometimes optimized not to store additional variables. 1234 */ 1235 static int 1236 flags_match(ipfw_insn *cmd, uint8_t bits) 1237 { 1238 u_char want_clear; 1239 bits = ~bits; 1240 1241 if (((cmd->arg1 & 0xff) & bits) != 0) 1242 return 0; /* some bits we want set were clear */ 1243 1244 want_clear = (cmd->arg1 >> 8) & 0xff; 1245 if ((want_clear & bits) != want_clear) 1246 return 0; /* some bits we want clear were set */ 1247 return 1; 1248 } 1249 1250 static int 1251 ipopts_match(struct ip *ip, ipfw_insn *cmd) 1252 { 1253 int optlen, bits = 0; 1254 u_char *cp = (u_char *)(ip + 1); 1255 int x = (ip->ip_hl << 2) - sizeof(struct ip); 1256 1257 for (; x > 0; x -= optlen, cp += optlen) { 1258 int opt = cp[IPOPT_OPTVAL]; 1259 1260 if (opt == IPOPT_EOL) 1261 break; 1262 1263 if (opt == IPOPT_NOP) { 1264 optlen = 1; 1265 } else { 1266 optlen = cp[IPOPT_OLEN]; 1267 if (optlen <= 0 || optlen > x) 1268 return 0; /* invalid or truncated */ 1269 } 1270 1271 switch (opt) { 1272 case IPOPT_LSRR: 1273 bits |= IP_FW_IPOPT_LSRR; 1274 break; 1275 1276 case IPOPT_SSRR: 1277 bits |= IP_FW_IPOPT_SSRR; 1278 break; 1279 1280 case IPOPT_RR: 1281 bits |= IP_FW_IPOPT_RR; 1282 break; 1283 1284 case IPOPT_TS: 1285 bits |= IP_FW_IPOPT_TS; 1286 break; 1287 1288 default: 1289 break; 1290 } 1291 } 1292 return (flags_match(cmd, bits)); 1293 } 1294 1295 static int 1296 tcpopts_match(struct ip *ip, ipfw_insn *cmd) 1297 { 1298 int optlen, bits = 0; 1299 struct tcphdr *tcp = L3HDR(struct tcphdr,ip); 1300 u_char *cp = (u_char *)(tcp + 1); 1301 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 1302 1303 for (; x > 0; x -= optlen, cp += optlen) { 1304 int opt = cp[0]; 1305 1306 if (opt == TCPOPT_EOL) 1307 break; 1308 1309 if (opt == TCPOPT_NOP) { 1310 optlen = 1; 1311 } else { 1312 optlen = cp[1]; 1313 if (optlen <= 0) 1314 break; 1315 } 1316 1317 switch (opt) { 1318 case TCPOPT_MAXSEG: 1319 bits |= IP_FW_TCPOPT_MSS; 1320 break; 1321 1322 case TCPOPT_WINDOW: 1323 bits |= IP_FW_TCPOPT_WINDOW; 1324 break; 1325 1326 case TCPOPT_SACK_PERMITTED: 1327 case TCPOPT_SACK: 1328 bits |= IP_FW_TCPOPT_SACK; 1329 break; 1330 1331 case TCPOPT_TIMESTAMP: 1332 bits |= IP_FW_TCPOPT_TS; 1333 break; 1334 1335 case TCPOPT_CC: 1336 case TCPOPT_CCNEW: 1337 case TCPOPT_CCECHO: 1338 bits |= IP_FW_TCPOPT_CC; 1339 break; 1340 1341 default: 1342 break; 1343 } 1344 } 1345 return (flags_match(cmd, bits)); 1346 } 1347 1348 static int 1349 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) 1350 { 1351 if (ifp == NULL) /* no iface with this packet, match fails */ 1352 return 0; 1353 1354 /* Check by name or by IP address */ 1355 if (cmd->name[0] != '\0') { /* match by name */ 1356 /* Check name */ 1357 if (cmd->p.glob) { 1358 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0) 1359 return(1); 1360 } else { 1361 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 1362 return(1); 1363 } 1364 } else { 1365 struct ifaddr_container *ifac; 1366 1367 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1368 struct ifaddr *ia = ifac->ifa; 1369 1370 if (ia->ifa_addr == NULL) 1371 continue; 1372 if (ia->ifa_addr->sa_family != AF_INET) 1373 continue; 1374 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 1375 (ia->ifa_addr))->sin_addr.s_addr) 1376 return(1); /* match */ 1377 } 1378 } 1379 return(0); /* no match, fail ... */ 1380 } 1381 1382 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 1383 1384 /* 1385 * We enter here when we have a rule with O_LOG. 1386 * XXX this function alone takes about 2Kbytes of code! 1387 */ 1388 static void 1389 ipfw_log(struct ipfw_context *ctx, struct ip_fw *f, u_int hlen, 1390 struct ether_header *eh, struct mbuf *m, struct ifnet *oif) 1391 { 1392 char *action; 1393 int limit_reached = 0; 1394 char action2[40], proto[48], fragment[28], abuf[INET_ADDRSTRLEN]; 1395 1396 fragment[0] = '\0'; 1397 proto[0] = '\0'; 1398 1399 if (f == NULL) { /* bogus pkt */ 1400 if (verbose_limit != 0 && 1401 ctx->ipfw_norule_counter >= verbose_limit) 1402 return; 1403 ctx->ipfw_norule_counter++; 1404 if (ctx->ipfw_norule_counter == verbose_limit) 1405 limit_reached = verbose_limit; 1406 action = "Refuse"; 1407 } else { /* O_LOG is the first action, find the real one */ 1408 ipfw_insn *cmd = ACTION_PTR(f); 1409 ipfw_insn_log *l = (ipfw_insn_log *)cmd; 1410 1411 if (l->max_log != 0 && l->log_left == 0) 1412 return; 1413 l->log_left--; 1414 if (l->log_left == 0) 1415 limit_reached = l->max_log; 1416 cmd += F_LEN(cmd); /* point to first action */ 1417 if (cmd->opcode == O_PROB) 1418 cmd += F_LEN(cmd); 1419 1420 action = action2; 1421 switch (cmd->opcode) { 1422 case O_DENY: 1423 action = "Deny"; 1424 break; 1425 1426 case O_REJECT: 1427 if (cmd->arg1==ICMP_REJECT_RST) { 1428 action = "Reset"; 1429 } else if (cmd->arg1==ICMP_UNREACH_HOST) { 1430 action = "Reject"; 1431 } else { 1432 ksnprintf(SNPARGS(action2, 0), "Unreach %d", 1433 cmd->arg1); 1434 } 1435 break; 1436 1437 case O_ACCEPT: 1438 action = "Accept"; 1439 break; 1440 1441 case O_COUNT: 1442 action = "Count"; 1443 break; 1444 1445 case O_DIVERT: 1446 ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1); 1447 break; 1448 1449 case O_TEE: 1450 ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1); 1451 break; 1452 1453 case O_SKIPTO: 1454 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1); 1455 break; 1456 1457 case O_PIPE: 1458 ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1); 1459 break; 1460 1461 case O_QUEUE: 1462 ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1); 1463 break; 1464 1465 case O_FORWARD_IP: 1466 { 1467 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; 1468 int len; 1469 1470 len = ksnprintf(SNPARGS(action2, 0), 1471 "Forward to %s", 1472 kinet_ntoa(sa->sa.sin_addr, abuf)); 1473 if (sa->sa.sin_port) { 1474 ksnprintf(SNPARGS(action2, len), ":%d", 1475 sa->sa.sin_port); 1476 } 1477 } 1478 break; 1479 1480 default: 1481 action = "UNKNOWN"; 1482 break; 1483 } 1484 } 1485 1486 if (hlen == 0) { /* non-ip */ 1487 ksnprintf(SNPARGS(proto, 0), "MAC"); 1488 } else { 1489 struct ip *ip = mtod(m, struct ip *); 1490 /* these three are all aliases to the same thing */ 1491 struct icmp *const icmp = L3HDR(struct icmp, ip); 1492 struct tcphdr *const tcp = (struct tcphdr *)icmp; 1493 struct udphdr *const udp = (struct udphdr *)icmp; 1494 1495 int ip_off, offset, ip_len; 1496 int len; 1497 1498 if (eh != NULL) { /* layer 2 packets are as on the wire */ 1499 ip_off = ntohs(ip->ip_off); 1500 ip_len = ntohs(ip->ip_len); 1501 } else { 1502 ip_off = ip->ip_off; 1503 ip_len = ip->ip_len; 1504 } 1505 offset = ip_off & IP_OFFMASK; 1506 switch (ip->ip_p) { 1507 case IPPROTO_TCP: 1508 len = ksnprintf(SNPARGS(proto, 0), "TCP %s", 1509 kinet_ntoa(ip->ip_src, abuf)); 1510 if (offset == 0) { 1511 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 1512 ntohs(tcp->th_sport), 1513 kinet_ntoa(ip->ip_dst, abuf), 1514 ntohs(tcp->th_dport)); 1515 } else { 1516 ksnprintf(SNPARGS(proto, len), " %s", 1517 kinet_ntoa(ip->ip_dst, abuf)); 1518 } 1519 break; 1520 1521 case IPPROTO_UDP: 1522 len = ksnprintf(SNPARGS(proto, 0), "UDP %s", 1523 kinet_ntoa(ip->ip_src, abuf)); 1524 if (offset == 0) { 1525 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 1526 ntohs(udp->uh_sport), 1527 kinet_ntoa(ip->ip_dst, abuf), 1528 ntohs(udp->uh_dport)); 1529 } else { 1530 ksnprintf(SNPARGS(proto, len), " %s", 1531 kinet_ntoa(ip->ip_dst, abuf)); 1532 } 1533 break; 1534 1535 case IPPROTO_ICMP: 1536 if (offset == 0) { 1537 len = ksnprintf(SNPARGS(proto, 0), 1538 "ICMP:%u.%u ", 1539 icmp->icmp_type, 1540 icmp->icmp_code); 1541 } else { 1542 len = ksnprintf(SNPARGS(proto, 0), "ICMP "); 1543 } 1544 len += ksnprintf(SNPARGS(proto, len), "%s", 1545 kinet_ntoa(ip->ip_src, abuf)); 1546 ksnprintf(SNPARGS(proto, len), " %s", 1547 kinet_ntoa(ip->ip_dst, abuf)); 1548 break; 1549 1550 default: 1551 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, 1552 kinet_ntoa(ip->ip_src, abuf)); 1553 ksnprintf(SNPARGS(proto, len), " %s", 1554 kinet_ntoa(ip->ip_dst, abuf)); 1555 break; 1556 } 1557 1558 if (ip_off & (IP_MF | IP_OFFMASK)) { 1559 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", 1560 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), 1561 offset << 3, (ip_off & IP_MF) ? "+" : ""); 1562 } 1563 } 1564 1565 if (oif || m->m_pkthdr.rcvif) { 1566 log(LOG_SECURITY | LOG_INFO, 1567 "ipfw: %d %s %s %s via %s%s\n", 1568 f ? f->rulenum : -1, 1569 action, proto, oif ? "out" : "in", 1570 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, 1571 fragment); 1572 } else { 1573 log(LOG_SECURITY | LOG_INFO, 1574 "ipfw: %d %s %s [no if info]%s\n", 1575 f ? f->rulenum : -1, 1576 action, proto, fragment); 1577 } 1578 1579 if (limit_reached) { 1580 log(LOG_SECURITY | LOG_NOTICE, 1581 "ipfw: limit %d reached on entry %d\n", 1582 limit_reached, f ? f->rulenum : -1); 1583 } 1584 } 1585 1586 #undef SNPARGS 1587 1588 static void 1589 ipfw_xlat_reap(struct ipfw_xlat *x, struct ipfw_xlat *slave_x) 1590 { 1591 struct ip_fw *rule = slave_x->xlat_rule; 1592 1593 KKASSERT(rule->cpuid == mycpuid); 1594 1595 /* No more cross references; free this pair now. */ 1596 kfree(x, M_IPFW); 1597 kfree(slave_x, M_IPFW); 1598 1599 /* See the comment in ipfw_ip_xlate_dispatch(). */ 1600 rule->cross_refs--; 1601 } 1602 1603 static void 1604 ipfw_xlat_reap_dispatch(netmsg_t nm) 1605 { 1606 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1607 struct ipfw_state *s, *ns; 1608 1609 ASSERT_NETISR_NCPUS(mycpuid); 1610 1611 crit_enter(); 1612 /* Reply ASAP. */ 1613 netisr_replymsg(&ctx->ipfw_xlatreap_nm, 0); 1614 crit_exit(); 1615 1616 /* TODO: limit scanning depth */ 1617 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_xlatreap, st_link, ns) { 1618 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 1619 struct ipfw_xlat *slave_x = x->xlat_pair; 1620 uint64_t crefs; 1621 1622 crefs = slave_x->xlat_crefs + x->xlat_crefs; 1623 if (crefs == 0) { 1624 TAILQ_REMOVE(&ctx->ipfw_xlatreap, &x->xlat_st, st_link); 1625 ipfw_xlat_reap(x, slave_x); 1626 } 1627 } 1628 if (!TAILQ_EMPTY(&ctx->ipfw_xlatreap)) { 1629 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo, 1630 &ctx->ipfw_xlatreap_nm); 1631 } 1632 } 1633 1634 static void 1635 ipfw_xlat_reap_timeo(void *xnm) 1636 { 1637 struct netmsg_base *nm = xnm; 1638 1639 KKASSERT(mycpuid < netisr_ncpus); 1640 1641 crit_enter(); 1642 if (nm->lmsg.ms_flags & MSGF_DONE) 1643 netisr_sendmsg_oncpu(nm); 1644 crit_exit(); 1645 } 1646 1647 static void 1648 ipfw_xlat_free_dispatch(netmsg_t nmsg) 1649 { 1650 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1651 struct ipfw_xlat *x = nmsg->lmsg.u.ms_resultp; 1652 struct ipfw_xlat *slave_x = x->xlat_pair; 1653 uint64_t crefs; 1654 1655 ASSERT_NETISR_NCPUS(mycpuid); 1656 1657 KKASSERT(slave_x != NULL); 1658 KKASSERT(slave_x->xlat_invalid && x->xlat_invalid); 1659 1660 KASSERT((x->xlat_flags & IPFW_STATE_F_LINKED) == 0, 1661 ("master xlat is still linked")); 1662 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED) 1663 ipfw_state_unlink(ctx, &slave_x->xlat_st); 1664 1665 /* See the comment in ipfw_ip_xlate_dispatch(). */ 1666 slave_x->xlat_crefs--; 1667 1668 crefs = slave_x->xlat_crefs + x->xlat_crefs; 1669 if (crefs == 0) { 1670 ipfw_xlat_reap(x, slave_x); 1671 return; 1672 } 1673 1674 if (TAILQ_EMPTY(&ctx->ipfw_xlatreap)) { 1675 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo, 1676 &ctx->ipfw_xlatreap_nm); 1677 } 1678 1679 /* 1680 * This pair is still referenced; defer its destruction. 1681 * YYY reuse st_link. 1682 */ 1683 TAILQ_INSERT_TAIL(&ctx->ipfw_xlatreap, &x->xlat_st, st_link); 1684 } 1685 1686 static __inline void 1687 ipfw_xlat_invalidate(struct ipfw_xlat *x) 1688 { 1689 1690 x->xlat_invalid = 1; 1691 x->xlat_pair->xlat_invalid = 1; 1692 } 1693 1694 static void 1695 ipfw_state_del(struct ipfw_context *ctx, struct ipfw_state *s) 1696 { 1697 struct ipfw_xlat *x, *slave_x; 1698 struct netmsg_base *nm; 1699 1700 KASSERT(s->st_type == O_KEEP_STATE || s->st_type == O_LIMIT || 1701 IPFW_ISXLAT(s->st_type), ("invalid state type %u", s->st_type)); 1702 KASSERT((s->st_flags & IPFW_STATE_F_XLATSLAVE) == 0, 1703 ("delete slave xlat")); 1704 1705 KASSERT(ctx->ipfw_state_cnt > 0, 1706 ("invalid state count %d", ctx->ipfw_state_cnt)); 1707 ctx->ipfw_state_cnt--; 1708 if (ctx->ipfw_state_loosecnt > 0) 1709 ctx->ipfw_state_loosecnt--; 1710 1711 /* 1712 * Unhook this state. 1713 */ 1714 if (s->st_track != NULL) { 1715 struct ipfw_track *t = s->st_track; 1716 1717 KASSERT(!LIST_EMPTY(&t->t_state_list), 1718 ("track state list is empty")); 1719 LIST_REMOVE(s, st_trklink); 1720 1721 KASSERT(*t->t_count > 0, 1722 ("invalid track count %d", *t->t_count)); 1723 atomic_subtract_int(t->t_count, 1); 1724 } 1725 ipfw_state_unlink(ctx, s); 1726 1727 /* 1728 * Free this state. Xlat requires special processing, 1729 * since xlat are paired state and they could be on 1730 * different cpus. 1731 */ 1732 1733 if (!IPFW_ISXLAT(s->st_type)) { 1734 /* Not xlat; free now. */ 1735 kfree(s, M_IPFW); 1736 /* Done! */ 1737 return; 1738 } 1739 x = (struct ipfw_xlat *)s; 1740 1741 if (x->xlat_pair == NULL) { 1742 /* Not setup yet; free now. */ 1743 kfree(x, M_IPFW); 1744 /* Done! */ 1745 return; 1746 } 1747 slave_x = x->xlat_pair; 1748 KKASSERT(slave_x->xlat_flags & IPFW_STATE_F_XLATSLAVE); 1749 1750 if (x->xlat_pcpu == mycpuid) { 1751 /* 1752 * Paired states are on the same cpu; delete this 1753 * pair now. 1754 */ 1755 KKASSERT(x->xlat_crefs == 0); 1756 KKASSERT(slave_x->xlat_crefs == 0); 1757 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED) 1758 ipfw_state_unlink(ctx, &slave_x->xlat_st); 1759 kfree(x, M_IPFW); 1760 kfree(slave_x, M_IPFW); 1761 return; 1762 } 1763 1764 /* 1765 * Free the paired states on the cpu owning the slave xlat. 1766 */ 1767 1768 /* 1769 * Mark the state pair invalid; completely deleting them 1770 * may take some time. 1771 */ 1772 ipfw_xlat_invalidate(x); 1773 1774 nm = &x->xlat_freenm; 1775 netmsg_init(nm, NULL, &netisr_apanic_rport, MSGF_PRIORITY, 1776 ipfw_xlat_free_dispatch); 1777 nm->lmsg.u.ms_resultp = x; 1778 1779 /* See the comment in ipfw_xlate_redispatch(). */ 1780 x->xlat_rule->cross_refs++; 1781 x->xlat_crefs++; 1782 1783 netisr_sendmsg(nm, x->xlat_pcpu); 1784 } 1785 1786 static void 1787 ipfw_state_remove(struct ipfw_context *ctx, struct ipfw_state *s) 1788 { 1789 1790 if (s->st_flags & IPFW_STATE_F_XLATSLAVE) { 1791 KKASSERT(IPFW_ISXLAT(s->st_type)); 1792 ipfw_xlat_invalidate((struct ipfw_xlat *)s); 1793 ipfw_state_unlink(ctx, s); 1794 return; 1795 } 1796 ipfw_state_del(ctx, s); 1797 } 1798 1799 static int 1800 ipfw_state_reap(struct ipfw_context *ctx, int reap_max) 1801 { 1802 struct ipfw_state *s, *anchor; 1803 int expired; 1804 1805 if (reap_max < ipfw_state_reap_min) 1806 reap_max = ipfw_state_reap_min; 1807 1808 if ((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0) { 1809 /* 1810 * Kick start state expiring. Ignore scan limit, 1811 * we are short of states. 1812 */ 1813 ctx->ipfw_flags |= IPFW_FLAG_STATEREAP; 1814 expired = ipfw_state_expire_start(ctx, INT_MAX, reap_max); 1815 ctx->ipfw_flags &= ~IPFW_FLAG_STATEREAP; 1816 return (expired); 1817 } 1818 1819 /* 1820 * States are being expired. 1821 */ 1822 1823 if (ctx->ipfw_state_cnt == 0) 1824 return (0); 1825 1826 expired = 0; 1827 anchor = &ctx->ipfw_stateexp_anch; 1828 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 1829 /* 1830 * Ignore scan limit; we are short of states. 1831 */ 1832 1833 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1834 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 1835 1836 if (IPFW_STATE_SCANSKIP(s)) 1837 continue; 1838 1839 if (IPFW_STATE_ISDEAD(s) || IPFW_STATE_TCPCLOSED(s)) { 1840 ipfw_state_del(ctx, s); 1841 if (++expired >= reap_max) 1842 break; 1843 if ((expired & 0xff) == 0 && 1844 ipfw_state_cntcoll() + ipfw_state_headroom <= 1845 ipfw_state_max) 1846 break; 1847 } 1848 } 1849 /* 1850 * NOTE: 1851 * Leave the anchor on the list, even if the end of the list has 1852 * been reached. ipfw_state_expire_more_dispatch() will handle 1853 * the removal. 1854 */ 1855 return (expired); 1856 } 1857 1858 static void 1859 ipfw_state_flush(struct ipfw_context *ctx, const struct ip_fw *rule) 1860 { 1861 struct ipfw_state *s, *sn; 1862 1863 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_state_list, st_link, sn) { 1864 if (IPFW_STATE_SCANSKIP(s)) 1865 continue; 1866 if (rule != NULL && s->st_rule != rule) 1867 continue; 1868 ipfw_state_del(ctx, s); 1869 } 1870 } 1871 1872 static void 1873 ipfw_state_expire_done(struct ipfw_context *ctx) 1874 { 1875 1876 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1877 ("stateexp is not in progress")); 1878 ctx->ipfw_flags &= ~IPFW_FLAG_STATEEXP; 1879 callout_reset(&ctx->ipfw_stateto_ch, hz, 1880 ipfw_state_expire_ipifunc, NULL); 1881 } 1882 1883 static void 1884 ipfw_state_expire_more(struct ipfw_context *ctx) 1885 { 1886 struct netmsg_base *nm = &ctx->ipfw_stateexp_more; 1887 1888 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1889 ("stateexp is not in progress")); 1890 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 1891 ("stateexp more did not finish")); 1892 netisr_sendmsg_oncpu(nm); 1893 } 1894 1895 static int 1896 ipfw_state_expire_loop(struct ipfw_context *ctx, struct ipfw_state *anchor, 1897 int scan_max, int expire_max) 1898 { 1899 struct ipfw_state *s; 1900 int scanned = 0, expired = 0; 1901 1902 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1903 ("stateexp is not in progress")); 1904 1905 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 1906 if (scanned++ >= scan_max) { 1907 ipfw_state_expire_more(ctx); 1908 return (expired); 1909 } 1910 1911 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1912 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 1913 1914 if (IPFW_STATE_SCANSKIP(s)) 1915 continue; 1916 1917 if (IPFW_STATE_ISDEAD(s) || 1918 ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) && 1919 IPFW_STATE_TCPCLOSED(s))) { 1920 ipfw_state_del(ctx, s); 1921 if (++expired >= expire_max) { 1922 ipfw_state_expire_more(ctx); 1923 return (expired); 1924 } 1925 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) && 1926 (expired & 0xff) == 0 && 1927 ipfw_state_cntcoll() + ipfw_state_headroom <= 1928 ipfw_state_max) { 1929 ipfw_state_expire_more(ctx); 1930 return (expired); 1931 } 1932 } 1933 } 1934 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1935 ipfw_state_expire_done(ctx); 1936 return (expired); 1937 } 1938 1939 static void 1940 ipfw_state_expire_more_dispatch(netmsg_t nm) 1941 { 1942 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1943 struct ipfw_state *anchor; 1944 1945 ASSERT_NETISR_NCPUS(mycpuid); 1946 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1947 ("statexp is not in progress")); 1948 1949 /* Reply ASAP */ 1950 netisr_replymsg(&nm->base, 0); 1951 1952 anchor = &ctx->ipfw_stateexp_anch; 1953 if (ctx->ipfw_state_cnt == 0) { 1954 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1955 ipfw_state_expire_done(ctx); 1956 return; 1957 } 1958 ipfw_state_expire_loop(ctx, anchor, 1959 ipfw_state_scan_max, ipfw_state_expire_max); 1960 } 1961 1962 static int 1963 ipfw_state_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max) 1964 { 1965 struct ipfw_state *anchor; 1966 1967 KASSERT((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0, 1968 ("stateexp is in progress")); 1969 ctx->ipfw_flags |= IPFW_FLAG_STATEEXP; 1970 1971 if (ctx->ipfw_state_cnt == 0) { 1972 ipfw_state_expire_done(ctx); 1973 return (0); 1974 } 1975 1976 /* 1977 * Do not expire more than once per second, it is useless. 1978 */ 1979 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) == 0 && 1980 ctx->ipfw_state_lastexp == time_uptime) { 1981 ipfw_state_expire_done(ctx); 1982 return (0); 1983 } 1984 ctx->ipfw_state_lastexp = time_uptime; 1985 1986 anchor = &ctx->ipfw_stateexp_anch; 1987 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link); 1988 return (ipfw_state_expire_loop(ctx, anchor, scan_max, expire_max)); 1989 } 1990 1991 static void 1992 ipfw_state_expire_dispatch(netmsg_t nm) 1993 { 1994 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1995 1996 ASSERT_NETISR_NCPUS(mycpuid); 1997 1998 /* Reply ASAP */ 1999 crit_enter(); 2000 netisr_replymsg(&nm->base, 0); 2001 crit_exit(); 2002 2003 if (ctx->ipfw_flags & IPFW_FLAG_STATEEXP) { 2004 /* Running; done. */ 2005 return; 2006 } 2007 ipfw_state_expire_start(ctx, 2008 ipfw_state_scan_max, ipfw_state_expire_max); 2009 } 2010 2011 static void 2012 ipfw_state_expire_ipifunc(void *dummy __unused) 2013 { 2014 struct netmsg_base *msg; 2015 2016 KKASSERT(mycpuid < netisr_ncpus); 2017 msg = &ipfw_ctx[mycpuid]->ipfw_stateexp_nm; 2018 2019 crit_enter(); 2020 if (msg->lmsg.ms_flags & MSGF_DONE) 2021 netisr_sendmsg_oncpu(msg); 2022 crit_exit(); 2023 } 2024 2025 static boolean_t 2026 ipfw_state_update_tcp(struct ipfw_state *s, int dir, const struct tcphdr *tcp) 2027 { 2028 uint32_t seq = ntohl(tcp->th_seq); 2029 uint32_t ack = ntohl(tcp->th_ack); 2030 2031 if (tcp->th_flags & TH_RST) 2032 return (TRUE); 2033 2034 if (dir == MATCH_FORWARD) { 2035 if ((s->st_flags & IPFW_STATE_F_SEQFWD) == 0) { 2036 s->st_flags |= IPFW_STATE_F_SEQFWD; 2037 s->st_seq_fwd = seq; 2038 } else if (SEQ_GEQ(seq, s->st_seq_fwd)) { 2039 s->st_seq_fwd = seq; 2040 } else { 2041 /* Out-of-sequence; done. */ 2042 return (FALSE); 2043 } 2044 if (tcp->th_flags & TH_ACK) { 2045 if ((s->st_flags & IPFW_STATE_F_ACKFWD) == 0) { 2046 s->st_flags |= IPFW_STATE_F_ACKFWD; 2047 s->st_ack_fwd = ack; 2048 } else if (SEQ_GEQ(ack, s->st_ack_fwd)) { 2049 s->st_ack_fwd = ack; 2050 } else { 2051 /* Out-of-sequence; done. */ 2052 return (FALSE); 2053 } 2054 2055 if ((s->st_state & ((TH_FIN | TH_ACK) << 8)) == 2056 (TH_FIN << 8) && s->st_ack_fwd == s->st_seq_rev + 1) 2057 s->st_state |= (TH_ACK << 8); 2058 } 2059 } else { 2060 if ((s->st_flags & IPFW_STATE_F_SEQREV) == 0) { 2061 s->st_flags |= IPFW_STATE_F_SEQREV; 2062 s->st_seq_rev = seq; 2063 } else if (SEQ_GEQ(seq, s->st_seq_rev)) { 2064 s->st_seq_rev = seq; 2065 } else { 2066 /* Out-of-sequence; done. */ 2067 return (FALSE); 2068 } 2069 if (tcp->th_flags & TH_ACK) { 2070 if ((s->st_flags & IPFW_STATE_F_ACKREV) == 0) { 2071 s->st_flags |= IPFW_STATE_F_ACKREV; 2072 s->st_ack_rev= ack; 2073 } else if (SEQ_GEQ(ack, s->st_ack_rev)) { 2074 s->st_ack_rev = ack; 2075 } else { 2076 /* Out-of-sequence; done. */ 2077 return (FALSE); 2078 } 2079 2080 if ((s->st_state & (TH_FIN | TH_ACK)) == TH_FIN && 2081 s->st_ack_rev == s->st_seq_fwd + 1) 2082 s->st_state |= TH_ACK; 2083 } 2084 } 2085 return (TRUE); 2086 } 2087 2088 static void 2089 ipfw_state_update(const struct ipfw_flow_id *pkt, int dir, 2090 const struct tcphdr *tcp, struct ipfw_state *s) 2091 { 2092 2093 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 2094 u_char flags = pkt->flags & IPFW_STATE_TCPFLAGS; 2095 2096 if (tcp != NULL && !ipfw_state_update_tcp(s, dir, tcp)) 2097 return; 2098 2099 s->st_state |= (dir == MATCH_FORWARD) ? flags : (flags << 8); 2100 switch (s->st_state & IPFW_STATE_TCPSTATES) { 2101 case TH_SYN: /* opening */ 2102 s->st_expire = time_uptime + dyn_syn_lifetime; 2103 break; 2104 2105 case BOTH_SYN: /* move to established */ 2106 case BOTH_SYN | TH_FIN: /* one side tries to close */ 2107 case BOTH_SYN | (TH_FIN << 8): 2108 s->st_expire = time_uptime + dyn_ack_lifetime; 2109 break; 2110 2111 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 2112 if ((s->st_state & BOTH_FINACK) == BOTH_FINACK) { 2113 /* And both FINs were ACKed. */ 2114 s->st_expire = time_uptime + dyn_fin_lifetime; 2115 } else { 2116 s->st_expire = time_uptime + 2117 dyn_finwait_lifetime; 2118 } 2119 break; 2120 2121 default: 2122 #if 0 2123 /* 2124 * reset or some invalid combination, but can also 2125 * occur if we use keep-state the wrong way. 2126 */ 2127 if ((s->st_state & ((TH_RST << 8) | TH_RST)) == 0) 2128 kprintf("invalid state: 0x%x\n", s->st_state); 2129 #endif 2130 s->st_expire = time_uptime + dyn_rst_lifetime; 2131 break; 2132 } 2133 } else if (pkt->proto == IPPROTO_UDP) { 2134 s->st_expire = time_uptime + dyn_udp_lifetime; 2135 } else { 2136 /* other protocols */ 2137 s->st_expire = time_uptime + dyn_short_lifetime; 2138 } 2139 } 2140 2141 /* 2142 * Lookup a state. 2143 */ 2144 static struct ipfw_state * 2145 ipfw_state_lookup(struct ipfw_context *ctx, const struct ipfw_flow_id *pkt, 2146 int *match_direction, const struct tcphdr *tcp) 2147 { 2148 struct ipfw_state *key, *s; 2149 int dir = MATCH_NONE; 2150 2151 key = &ctx->ipfw_state_tmpkey; 2152 ipfw_key_build(&key->st_key, pkt->src_ip, pkt->src_port, 2153 pkt->dst_ip, pkt->dst_port, pkt->proto); 2154 s = RB_FIND(ipfw_state_tree, &ctx->ipfw_state_tree, key); 2155 if (s == NULL) 2156 goto done; /* not found. */ 2157 if (IPFW_STATE_ISDEAD(s)) { 2158 ipfw_state_remove(ctx, s); 2159 s = NULL; 2160 goto done; 2161 } 2162 if ((pkt->flags & TH_SYN) && IPFW_STATE_TCPCLOSED(s)) { 2163 /* TCP ports recycling is too fast. */ 2164 ctx->ipfw_sts_tcprecycled++; 2165 ipfw_state_remove(ctx, s); 2166 s = NULL; 2167 goto done; 2168 } 2169 2170 if (s->st_swap == key->st_swap) { 2171 dir = MATCH_FORWARD; 2172 } else { 2173 KASSERT((s->st_swap & key->st_swap) == 0, 2174 ("found mismatch state")); 2175 dir = MATCH_REVERSE; 2176 } 2177 2178 /* Update this state. */ 2179 ipfw_state_update(pkt, dir, tcp, s); 2180 2181 if (s->st_track != NULL) { 2182 /* This track has been used. */ 2183 s->st_track->t_expire = time_uptime + dyn_short_lifetime; 2184 } 2185 done: 2186 if (match_direction) 2187 *match_direction = dir; 2188 return (s); 2189 } 2190 2191 static struct ipfw_state * 2192 ipfw_state_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2193 uint16_t type, struct ip_fw *rule, const struct tcphdr *tcp) 2194 { 2195 struct ipfw_state *s; 2196 size_t sz; 2197 2198 KASSERT(type == O_KEEP_STATE || type == O_LIMIT || IPFW_ISXLAT(type), 2199 ("invalid state type %u", type)); 2200 2201 sz = sizeof(struct ipfw_state); 2202 if (IPFW_ISXLAT(type)) 2203 sz = sizeof(struct ipfw_xlat); 2204 2205 s = kmalloc(sz, M_IPFW, M_INTWAIT | M_NULLOK | M_ZERO); 2206 if (s == NULL) { 2207 ctx->ipfw_sts_nomem++; 2208 return (NULL); 2209 } 2210 2211 ipfw_key_build(&s->st_key, id->src_ip, id->src_port, 2212 id->dst_ip, id->dst_port, id->proto); 2213 2214 s->st_rule = rule; 2215 s->st_type = type; 2216 if (IPFW_ISXLAT(type)) { 2217 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 2218 2219 x->xlat_dir = MATCH_NONE; 2220 x->xlat_pcpu = -1; 2221 } 2222 2223 /* 2224 * Update this state: 2225 * Set st_expire and st_state. 2226 */ 2227 ipfw_state_update(id, MATCH_FORWARD, tcp, s); 2228 2229 return (s); 2230 } 2231 2232 static struct ipfw_state * 2233 ipfw_state_add(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2234 uint16_t type, struct ip_fw *rule, struct ipfw_track *t, 2235 const struct tcphdr *tcp) 2236 { 2237 struct ipfw_state *s, *dup; 2238 2239 s = ipfw_state_alloc(ctx, id, type, rule, tcp); 2240 if (s == NULL) 2241 return (NULL); 2242 2243 ctx->ipfw_state_cnt++; 2244 ctx->ipfw_state_loosecnt++; 2245 if (ctx->ipfw_state_loosecnt >= ipfw_state_loosecnt_updthr) { 2246 ipfw_gd.ipfw_state_loosecnt += ctx->ipfw_state_loosecnt; 2247 ctx->ipfw_state_loosecnt = 0; 2248 } 2249 2250 dup = ipfw_state_link(ctx, s); 2251 if (dup != NULL) 2252 panic("ipfw: %u state exists %p", type, dup); 2253 2254 if (t != NULL) { 2255 /* Keep the track referenced. */ 2256 LIST_INSERT_HEAD(&t->t_state_list, s, st_trklink); 2257 s->st_track = t; 2258 } 2259 return (s); 2260 } 2261 2262 static boolean_t 2263 ipfw_track_free(struct ipfw_context *ctx, struct ipfw_track *t) 2264 { 2265 struct ipfw_trkcnt *trk; 2266 boolean_t trk_freed = FALSE; 2267 2268 KASSERT(t->t_count != NULL, ("track anchor")); 2269 KASSERT(LIST_EMPTY(&t->t_state_list), 2270 ("invalid track is still referenced")); 2271 2272 trk = t->t_trkcnt; 2273 KASSERT(trk != NULL, ("track has no trkcnt")); 2274 2275 RB_REMOVE(ipfw_track_tree, &ctx->ipfw_track_tree, t); 2276 TAILQ_REMOVE(&ctx->ipfw_track_list, t, t_link); 2277 kfree(t, M_IPFW); 2278 2279 /* 2280 * fdrop() style reference counting. 2281 * See kern/kern_descrip.c fdrop(). 2282 */ 2283 for (;;) { 2284 int refs = trk->tc_refs; 2285 2286 cpu_ccfence(); 2287 KASSERT(refs > 0, ("invalid trkcnt refs %d", refs)); 2288 if (refs == 1) { 2289 IPFW_TRKCNT_TOKGET; 2290 if (atomic_cmpset_int(&trk->tc_refs, refs, 0)) { 2291 KASSERT(trk->tc_count == 0, 2292 ("%d states reference this trkcnt", 2293 trk->tc_count)); 2294 RB_REMOVE(ipfw_trkcnt_tree, 2295 &ipfw_gd.ipfw_trkcnt_tree, trk); 2296 2297 KASSERT(ipfw_gd.ipfw_trkcnt_cnt > 0, 2298 ("invalid trkcnt cnt %d", 2299 ipfw_gd.ipfw_trkcnt_cnt)); 2300 ipfw_gd.ipfw_trkcnt_cnt--; 2301 IPFW_TRKCNT_TOKREL; 2302 2303 if (ctx->ipfw_trkcnt_spare == NULL) 2304 ctx->ipfw_trkcnt_spare = trk; 2305 else 2306 kfree(trk, M_IPFW); 2307 trk_freed = TRUE; 2308 break; /* done! */ 2309 } 2310 IPFW_TRKCNT_TOKREL; 2311 /* retry */ 2312 } else if (atomic_cmpset_int(&trk->tc_refs, refs, refs - 1)) { 2313 break; /* done! */ 2314 } 2315 /* retry */ 2316 } 2317 return (trk_freed); 2318 } 2319 2320 static void 2321 ipfw_track_flush(struct ipfw_context *ctx, struct ip_fw *rule) 2322 { 2323 struct ipfw_track *t, *tn; 2324 2325 TAILQ_FOREACH_MUTABLE(t, &ctx->ipfw_track_list, t_link, tn) { 2326 if (t->t_count == NULL) /* anchor */ 2327 continue; 2328 if (rule != NULL && t->t_rule != rule) 2329 continue; 2330 ipfw_track_free(ctx, t); 2331 } 2332 } 2333 2334 static boolean_t 2335 ipfw_track_state_expire(struct ipfw_context *ctx, struct ipfw_track *t, 2336 boolean_t reap) 2337 { 2338 struct ipfw_state *s, *sn; 2339 boolean_t ret = FALSE; 2340 2341 KASSERT(t->t_count != NULL, ("track anchor")); 2342 2343 if (LIST_EMPTY(&t->t_state_list)) 2344 return (FALSE); 2345 2346 /* 2347 * Do not expire more than once per second, it is useless. 2348 */ 2349 if (t->t_lastexp == time_uptime) 2350 return (FALSE); 2351 t->t_lastexp = time_uptime; 2352 2353 LIST_FOREACH_MUTABLE(s, &t->t_state_list, st_trklink, sn) { 2354 if (IPFW_STATE_ISDEAD(s) || (reap && IPFW_STATE_TCPCLOSED(s))) { 2355 KASSERT(s->st_track == t, 2356 ("state track %p does not match %p", 2357 s->st_track, t)); 2358 ipfw_state_del(ctx, s); 2359 ret = TRUE; 2360 } 2361 } 2362 return (ret); 2363 } 2364 2365 static __inline struct ipfw_trkcnt * 2366 ipfw_trkcnt_alloc(struct ipfw_context *ctx) 2367 { 2368 struct ipfw_trkcnt *trk; 2369 2370 if (ctx->ipfw_trkcnt_spare != NULL) { 2371 trk = ctx->ipfw_trkcnt_spare; 2372 ctx->ipfw_trkcnt_spare = NULL; 2373 } else { 2374 trk = kmalloc(sizeof(*trk), M_IPFW, 2375 M_INTWAIT | M_NULLOK | M_CACHEALIGN); 2376 } 2377 return (trk); 2378 } 2379 2380 static void 2381 ipfw_track_expire_done(struct ipfw_context *ctx) 2382 { 2383 2384 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2385 ("trackexp is not in progress")); 2386 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKEXP; 2387 callout_reset(&ctx->ipfw_trackto_ch, hz, 2388 ipfw_track_expire_ipifunc, NULL); 2389 } 2390 2391 static void 2392 ipfw_track_expire_more(struct ipfw_context *ctx) 2393 { 2394 struct netmsg_base *nm = &ctx->ipfw_trackexp_more; 2395 2396 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2397 ("trackexp is not in progress")); 2398 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 2399 ("trackexp more did not finish")); 2400 netisr_sendmsg_oncpu(nm); 2401 } 2402 2403 static int 2404 ipfw_track_expire_loop(struct ipfw_context *ctx, struct ipfw_track *anchor, 2405 int scan_max, int expire_max) 2406 { 2407 struct ipfw_track *t; 2408 int scanned = 0, expired = 0; 2409 boolean_t reap = FALSE; 2410 2411 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2412 ("trackexp is not in progress")); 2413 2414 if (ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) 2415 reap = TRUE; 2416 2417 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) { 2418 if (scanned++ >= scan_max) { 2419 ipfw_track_expire_more(ctx); 2420 return (expired); 2421 } 2422 2423 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2424 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link); 2425 2426 if (t->t_count == NULL) /* anchor */ 2427 continue; 2428 2429 ipfw_track_state_expire(ctx, t, reap); 2430 if (!LIST_EMPTY(&t->t_state_list)) { 2431 /* There are states referencing this track. */ 2432 continue; 2433 } 2434 2435 if (TIME_LEQ(t->t_expire, time_uptime) || reap) { 2436 /* Expired. */ 2437 if (ipfw_track_free(ctx, t)) { 2438 if (++expired >= expire_max) { 2439 ipfw_track_expire_more(ctx); 2440 return (expired); 2441 } 2442 } 2443 } 2444 } 2445 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2446 ipfw_track_expire_done(ctx); 2447 return (expired); 2448 } 2449 2450 static int 2451 ipfw_track_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max) 2452 { 2453 struct ipfw_track *anchor; 2454 2455 KASSERT((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0, 2456 ("trackexp is in progress")); 2457 ctx->ipfw_flags |= IPFW_FLAG_TRACKEXP; 2458 2459 if (RB_EMPTY(&ctx->ipfw_track_tree)) { 2460 ipfw_track_expire_done(ctx); 2461 return (0); 2462 } 2463 2464 /* 2465 * Do not expire more than once per second, it is useless. 2466 */ 2467 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) == 0 && 2468 ctx->ipfw_track_lastexp == time_uptime) { 2469 ipfw_track_expire_done(ctx); 2470 return (0); 2471 } 2472 ctx->ipfw_track_lastexp = time_uptime; 2473 2474 anchor = &ctx->ipfw_trackexp_anch; 2475 TAILQ_INSERT_HEAD(&ctx->ipfw_track_list, anchor, t_link); 2476 return (ipfw_track_expire_loop(ctx, anchor, scan_max, expire_max)); 2477 } 2478 2479 static void 2480 ipfw_track_expire_more_dispatch(netmsg_t nm) 2481 { 2482 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2483 struct ipfw_track *anchor; 2484 2485 ASSERT_NETISR_NCPUS(mycpuid); 2486 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2487 ("trackexp is not in progress")); 2488 2489 /* Reply ASAP */ 2490 netisr_replymsg(&nm->base, 0); 2491 2492 anchor = &ctx->ipfw_trackexp_anch; 2493 if (RB_EMPTY(&ctx->ipfw_track_tree)) { 2494 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2495 ipfw_track_expire_done(ctx); 2496 return; 2497 } 2498 ipfw_track_expire_loop(ctx, anchor, 2499 ipfw_track_scan_max, ipfw_track_expire_max); 2500 } 2501 2502 static void 2503 ipfw_track_expire_dispatch(netmsg_t nm) 2504 { 2505 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2506 2507 ASSERT_NETISR_NCPUS(mycpuid); 2508 2509 /* Reply ASAP */ 2510 crit_enter(); 2511 netisr_replymsg(&nm->base, 0); 2512 crit_exit(); 2513 2514 if (ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) { 2515 /* Running; done. */ 2516 return; 2517 } 2518 ipfw_track_expire_start(ctx, 2519 ipfw_track_scan_max, ipfw_track_expire_max); 2520 } 2521 2522 static void 2523 ipfw_track_expire_ipifunc(void *dummy __unused) 2524 { 2525 struct netmsg_base *msg; 2526 2527 KKASSERT(mycpuid < netisr_ncpus); 2528 msg = &ipfw_ctx[mycpuid]->ipfw_trackexp_nm; 2529 2530 crit_enter(); 2531 if (msg->lmsg.ms_flags & MSGF_DONE) 2532 netisr_sendmsg_oncpu(msg); 2533 crit_exit(); 2534 } 2535 2536 static int 2537 ipfw_track_reap(struct ipfw_context *ctx) 2538 { 2539 struct ipfw_track *t, *anchor; 2540 int expired; 2541 2542 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0) { 2543 /* 2544 * Kick start track expiring. Ignore scan limit, 2545 * we are short of tracks. 2546 */ 2547 ctx->ipfw_flags |= IPFW_FLAG_TRACKREAP; 2548 expired = ipfw_track_expire_start(ctx, INT_MAX, 2549 ipfw_track_reap_max); 2550 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKREAP; 2551 return (expired); 2552 } 2553 2554 /* 2555 * Tracks are being expired. 2556 */ 2557 2558 if (RB_EMPTY(&ctx->ipfw_track_tree)) 2559 return (0); 2560 2561 expired = 0; 2562 anchor = &ctx->ipfw_trackexp_anch; 2563 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) { 2564 /* 2565 * Ignore scan limit; we are short of tracks. 2566 */ 2567 2568 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2569 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link); 2570 2571 if (t->t_count == NULL) /* anchor */ 2572 continue; 2573 2574 ipfw_track_state_expire(ctx, t, TRUE); 2575 if (!LIST_EMPTY(&t->t_state_list)) { 2576 /* There are states referencing this track. */ 2577 continue; 2578 } 2579 2580 if (ipfw_track_free(ctx, t)) { 2581 if (++expired >= ipfw_track_reap_max) { 2582 ipfw_track_expire_more(ctx); 2583 break; 2584 } 2585 } 2586 } 2587 /* 2588 * NOTE: 2589 * Leave the anchor on the list, even if the end of the list has 2590 * been reached. ipfw_track_expire_more_dispatch() will handle 2591 * the removal. 2592 */ 2593 return (expired); 2594 } 2595 2596 static struct ipfw_track * 2597 ipfw_track_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2598 uint16_t limit_mask, struct ip_fw *rule) 2599 { 2600 struct ipfw_track *key, *t, *dup; 2601 struct ipfw_trkcnt *trk, *ret; 2602 boolean_t do_expire = FALSE; 2603 2604 KASSERT(rule->track_ruleid != 0, 2605 ("rule %u has no track ruleid", rule->rulenum)); 2606 2607 key = &ctx->ipfw_track_tmpkey; 2608 key->t_proto = id->proto; 2609 key->t_addrs = 0; 2610 key->t_ports = 0; 2611 key->t_rule = rule; 2612 if (limit_mask & DYN_SRC_ADDR) 2613 key->t_saddr = id->src_ip; 2614 if (limit_mask & DYN_DST_ADDR) 2615 key->t_daddr = id->dst_ip; 2616 if (limit_mask & DYN_SRC_PORT) 2617 key->t_sport = id->src_port; 2618 if (limit_mask & DYN_DST_PORT) 2619 key->t_dport = id->dst_port; 2620 2621 t = RB_FIND(ipfw_track_tree, &ctx->ipfw_track_tree, key); 2622 if (t != NULL) 2623 goto done; 2624 2625 t = kmalloc(sizeof(*t), M_IPFW, M_INTWAIT | M_NULLOK); 2626 if (t == NULL) { 2627 ctx->ipfw_tks_nomem++; 2628 return (NULL); 2629 } 2630 2631 t->t_key = key->t_key; 2632 t->t_rule = rule; 2633 t->t_lastexp = 0; 2634 LIST_INIT(&t->t_state_list); 2635 2636 if (ipfw_gd.ipfw_trkcnt_cnt >= ipfw_track_max) { 2637 time_t globexp, uptime; 2638 2639 trk = NULL; 2640 do_expire = TRUE; 2641 2642 /* 2643 * Do not expire globally more than once per second, 2644 * it is useless. 2645 */ 2646 uptime = time_uptime; 2647 globexp = ipfw_gd.ipfw_track_globexp; 2648 if (globexp != uptime && 2649 atomic_cmpset_long(&ipfw_gd.ipfw_track_globexp, 2650 globexp, uptime)) { 2651 int cpu; 2652 2653 /* Expire tracks on other CPUs. */ 2654 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 2655 if (cpu == mycpuid) 2656 continue; 2657 lwkt_send_ipiq(globaldata_find(cpu), 2658 ipfw_track_expire_ipifunc, NULL); 2659 } 2660 } 2661 } else { 2662 trk = ipfw_trkcnt_alloc(ctx); 2663 } 2664 if (trk == NULL) { 2665 struct ipfw_trkcnt *tkey; 2666 2667 tkey = &ctx->ipfw_trkcnt_tmpkey; 2668 key = NULL; /* tkey overlaps key */ 2669 2670 tkey->tc_key = t->t_key; 2671 tkey->tc_ruleid = rule->track_ruleid; 2672 2673 IPFW_TRKCNT_TOKGET; 2674 trk = RB_FIND(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree, 2675 tkey); 2676 if (trk == NULL) { 2677 IPFW_TRKCNT_TOKREL; 2678 if (do_expire) { 2679 ctx->ipfw_tks_reap++; 2680 if (ipfw_track_reap(ctx) > 0) { 2681 if (ipfw_gd.ipfw_trkcnt_cnt < 2682 ipfw_track_max) { 2683 trk = ipfw_trkcnt_alloc(ctx); 2684 if (trk != NULL) 2685 goto install; 2686 ctx->ipfw_tks_cntnomem++; 2687 } else { 2688 ctx->ipfw_tks_overflow++; 2689 } 2690 } else { 2691 ctx->ipfw_tks_reapfailed++; 2692 ctx->ipfw_tks_overflow++; 2693 } 2694 } else { 2695 ctx->ipfw_tks_cntnomem++; 2696 } 2697 kfree(t, M_IPFW); 2698 return (NULL); 2699 } 2700 KASSERT(trk->tc_refs > 0 && trk->tc_refs < netisr_ncpus, 2701 ("invalid trkcnt refs %d", trk->tc_refs)); 2702 atomic_add_int(&trk->tc_refs, 1); 2703 IPFW_TRKCNT_TOKREL; 2704 } else { 2705 install: 2706 trk->tc_key = t->t_key; 2707 trk->tc_ruleid = rule->track_ruleid; 2708 trk->tc_refs = 0; 2709 trk->tc_count = 0; 2710 trk->tc_expire = 0; 2711 trk->tc_rulenum = rule->rulenum; 2712 2713 IPFW_TRKCNT_TOKGET; 2714 ret = RB_INSERT(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree, 2715 trk); 2716 if (ret != NULL) { 2717 KASSERT(ret->tc_refs > 0 && 2718 ret->tc_refs < netisr_ncpus, 2719 ("invalid trkcnt refs %d", ret->tc_refs)); 2720 KASSERT(ctx->ipfw_trkcnt_spare == NULL, 2721 ("trkcnt spare was installed")); 2722 ctx->ipfw_trkcnt_spare = trk; 2723 trk = ret; 2724 } else { 2725 ipfw_gd.ipfw_trkcnt_cnt++; 2726 } 2727 atomic_add_int(&trk->tc_refs, 1); 2728 IPFW_TRKCNT_TOKREL; 2729 } 2730 t->t_count = &trk->tc_count; 2731 t->t_trkcnt = trk; 2732 2733 dup = RB_INSERT(ipfw_track_tree, &ctx->ipfw_track_tree, t); 2734 if (dup != NULL) 2735 panic("ipfw: track exists"); 2736 TAILQ_INSERT_TAIL(&ctx->ipfw_track_list, t, t_link); 2737 done: 2738 t->t_expire = time_uptime + dyn_short_lifetime; 2739 return (t); 2740 } 2741 2742 /* 2743 * Install state for rule type cmd->o.opcode 2744 * 2745 * Returns NULL if state is not installed because of errors or because 2746 * states limitations are enforced. 2747 */ 2748 static struct ipfw_state * 2749 ipfw_state_install(struct ipfw_context *ctx, struct ip_fw *rule, 2750 ipfw_insn_limit *cmd, struct ip_fw_args *args, const struct tcphdr *tcp) 2751 { 2752 struct ipfw_state *s; 2753 struct ipfw_track *t; 2754 int count, diff; 2755 2756 if (ipfw_gd.ipfw_state_loosecnt >= ipfw_state_max && 2757 (diff = (ipfw_state_cntsync() - ipfw_state_max)) >= 0) { 2758 boolean_t overflow = TRUE; 2759 2760 ctx->ipfw_sts_reap++; 2761 if (ipfw_state_reap(ctx, diff) == 0) 2762 ctx->ipfw_sts_reapfailed++; 2763 if (ipfw_state_cntsync() < ipfw_state_max) 2764 overflow = FALSE; 2765 2766 if (overflow) { 2767 time_t globexp, uptime; 2768 int cpu; 2769 2770 /* 2771 * Do not expire globally more than once per second, 2772 * it is useless. 2773 */ 2774 uptime = time_uptime; 2775 globexp = ipfw_gd.ipfw_state_globexp; 2776 if (globexp == uptime || 2777 !atomic_cmpset_long(&ipfw_gd.ipfw_state_globexp, 2778 globexp, uptime)) { 2779 ctx->ipfw_sts_overflow++; 2780 return (NULL); 2781 } 2782 2783 /* Expire states on other CPUs. */ 2784 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 2785 if (cpu == mycpuid) 2786 continue; 2787 lwkt_send_ipiq(globaldata_find(cpu), 2788 ipfw_state_expire_ipifunc, NULL); 2789 } 2790 ctx->ipfw_sts_overflow++; 2791 return (NULL); 2792 } 2793 } 2794 2795 switch (cmd->o.opcode) { 2796 case O_KEEP_STATE: /* bidir rule */ 2797 case O_REDIRECT: 2798 s = ipfw_state_add(ctx, &args->f_id, cmd->o.opcode, rule, NULL, 2799 tcp); 2800 if (s == NULL) 2801 return (NULL); 2802 break; 2803 2804 case O_LIMIT: /* limit number of sessions */ 2805 t = ipfw_track_alloc(ctx, &args->f_id, cmd->limit_mask, rule); 2806 if (t == NULL) 2807 return (NULL); 2808 2809 if (*t->t_count >= cmd->conn_limit) { 2810 if (!ipfw_track_state_expire(ctx, t, TRUE)) 2811 return (NULL); 2812 } 2813 for (;;) { 2814 count = *t->t_count; 2815 if (count >= cmd->conn_limit) 2816 return (NULL); 2817 if (atomic_cmpset_int(t->t_count, count, count + 1)) 2818 break; 2819 } 2820 2821 s = ipfw_state_add(ctx, &args->f_id, O_LIMIT, rule, t, tcp); 2822 if (s == NULL) { 2823 /* Undo damage. */ 2824 atomic_subtract_int(t->t_count, 1); 2825 return (NULL); 2826 } 2827 break; 2828 2829 default: 2830 panic("unknown state type %u\n", cmd->o.opcode); 2831 } 2832 2833 if (s->st_type == O_REDIRECT) { 2834 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 2835 ipfw_insn_rdr *r = (ipfw_insn_rdr *)cmd; 2836 2837 x->xlat_addr = r->addr.s_addr; 2838 x->xlat_port = r->port; 2839 x->xlat_ifp = args->m->m_pkthdr.rcvif; 2840 x->xlat_dir = MATCH_FORWARD; 2841 KKASSERT(x->xlat_ifp != NULL); 2842 } 2843 return (s); 2844 } 2845 2846 static int 2847 ipfw_table_lookup(struct ipfw_context *ctx, uint16_t tableid, 2848 const struct in_addr *in) 2849 { 2850 struct radix_node_head *rnh; 2851 struct sockaddr_in sin; 2852 struct ipfw_tblent *te; 2853 2854 KASSERT(tableid < ipfw_table_max, ("invalid tableid %u", tableid)); 2855 rnh = ctx->ipfw_tables[tableid]; 2856 if (rnh == NULL) 2857 return (0); /* no match */ 2858 2859 memset(&sin, 0, sizeof(sin)); 2860 sin.sin_family = AF_INET; 2861 sin.sin_len = sizeof(sin); 2862 sin.sin_addr = *in; 2863 2864 te = (struct ipfw_tblent *)rnh->rnh_matchaddr((char *)&sin, rnh); 2865 if (te == NULL) 2866 return (0); /* no match */ 2867 2868 te->te_use++; 2869 te->te_lastuse = time_second; 2870 return (1); /* match */ 2871 } 2872 2873 /* 2874 * Transmit a TCP packet, containing either a RST or a keepalive. 2875 * When flags & TH_RST, we are sending a RST packet, because of a 2876 * "reset" action matched the packet. 2877 * Otherwise we are sending a keepalive, and flags & TH_ 2878 * 2879 * Only {src,dst}_{ip,port} of "id" are used. 2880 */ 2881 static void 2882 send_pkt(const struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags) 2883 { 2884 struct mbuf *m; 2885 struct ip *ip; 2886 struct tcphdr *tcp; 2887 struct route sro; /* fake route */ 2888 2889 MGETHDR(m, M_NOWAIT, MT_HEADER); 2890 if (m == NULL) 2891 return; 2892 m->m_pkthdr.rcvif = NULL; 2893 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); 2894 m->m_data += max_linkhdr; 2895 2896 ip = mtod(m, struct ip *); 2897 bzero(ip, m->m_len); 2898 tcp = (struct tcphdr *)(ip + 1); /* no IP options */ 2899 ip->ip_p = IPPROTO_TCP; 2900 tcp->th_off = 5; 2901 2902 /* 2903 * Assume we are sending a RST (or a keepalive in the reverse 2904 * direction), swap src and destination addresses and ports. 2905 */ 2906 ip->ip_src.s_addr = htonl(id->dst_ip); 2907 ip->ip_dst.s_addr = htonl(id->src_ip); 2908 tcp->th_sport = htons(id->dst_port); 2909 tcp->th_dport = htons(id->src_port); 2910 if (flags & TH_RST) { /* we are sending a RST */ 2911 if (flags & TH_ACK) { 2912 tcp->th_seq = htonl(ack); 2913 tcp->th_ack = htonl(0); 2914 tcp->th_flags = TH_RST; 2915 } else { 2916 if (flags & TH_SYN) 2917 seq++; 2918 tcp->th_seq = htonl(0); 2919 tcp->th_ack = htonl(seq); 2920 tcp->th_flags = TH_RST | TH_ACK; 2921 } 2922 } else { 2923 /* 2924 * We are sending a keepalive. flags & TH_SYN determines 2925 * the direction, forward if set, reverse if clear. 2926 * NOTE: seq and ack are always assumed to be correct 2927 * as set by the caller. This may be confusing... 2928 */ 2929 if (flags & TH_SYN) { 2930 /* 2931 * we have to rewrite the correct addresses! 2932 */ 2933 ip->ip_dst.s_addr = htonl(id->dst_ip); 2934 ip->ip_src.s_addr = htonl(id->src_ip); 2935 tcp->th_dport = htons(id->dst_port); 2936 tcp->th_sport = htons(id->src_port); 2937 } 2938 tcp->th_seq = htonl(seq); 2939 tcp->th_ack = htonl(ack); 2940 tcp->th_flags = TH_ACK; 2941 } 2942 2943 /* 2944 * set ip_len to the payload size so we can compute 2945 * the tcp checksum on the pseudoheader 2946 * XXX check this, could save a couple of words ? 2947 */ 2948 ip->ip_len = htons(sizeof(struct tcphdr)); 2949 tcp->th_sum = in_cksum(m, m->m_pkthdr.len); 2950 2951 /* 2952 * now fill fields left out earlier 2953 */ 2954 ip->ip_ttl = ip_defttl; 2955 ip->ip_len = m->m_pkthdr.len; 2956 2957 bzero(&sro, sizeof(sro)); 2958 ip_rtaddr(ip->ip_dst, &sro); 2959 2960 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED; 2961 ip_output(m, NULL, &sro, 0, NULL, NULL); 2962 if (sro.ro_rt) 2963 RTFREE(sro.ro_rt); 2964 } 2965 2966 /* 2967 * Send a reject message, consuming the mbuf passed as an argument. 2968 */ 2969 static void 2970 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) 2971 { 2972 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 2973 /* We need the IP header in host order for icmp_error(). */ 2974 if (args->eh != NULL) { 2975 struct ip *ip = mtod(args->m, struct ip *); 2976 2977 ip->ip_len = ntohs(ip->ip_len); 2978 ip->ip_off = ntohs(ip->ip_off); 2979 } 2980 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 2981 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { 2982 struct tcphdr *const tcp = 2983 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 2984 2985 if ((tcp->th_flags & TH_RST) == 0) { 2986 send_pkt(&args->f_id, ntohl(tcp->th_seq), 2987 ntohl(tcp->th_ack), tcp->th_flags | TH_RST); 2988 } 2989 m_freem(args->m); 2990 } else { 2991 m_freem(args->m); 2992 } 2993 args->m = NULL; 2994 } 2995 2996 /* 2997 * Given an ip_fw *, lookup_next_rule will return a pointer 2998 * to the next rule, which can be either the jump 2999 * target (for skipto instructions) or the next one in the list (in 3000 * all other cases including a missing jump target). 3001 * The result is also written in the "next_rule" field of the rule. 3002 * Backward jumps are not allowed, so start looking from the next 3003 * rule... 3004 * 3005 * This never returns NULL -- in case we do not have an exact match, 3006 * the next rule is returned. When the ruleset is changed, 3007 * pointers are flushed so we are always correct. 3008 */ 3009 static struct ip_fw * 3010 lookup_next_rule(struct ip_fw *me) 3011 { 3012 struct ip_fw *rule = NULL; 3013 ipfw_insn *cmd; 3014 3015 /* look for action, in case it is a skipto */ 3016 cmd = ACTION_PTR(me); 3017 if (cmd->opcode == O_LOG) 3018 cmd += F_LEN(cmd); 3019 if (cmd->opcode == O_SKIPTO) { 3020 for (rule = me->next; rule; rule = rule->next) { 3021 if (rule->rulenum >= cmd->arg1) 3022 break; 3023 } 3024 } 3025 if (rule == NULL) /* failure or not a skipto */ 3026 rule = me->next; 3027 me->next_rule = rule; 3028 return rule; 3029 } 3030 3031 static int 3032 ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, 3033 enum ipfw_opcodes opcode, uid_t uid) 3034 { 3035 struct in_addr src_ip, dst_ip; 3036 struct inpcbinfo *pi; 3037 boolean_t wildcard; 3038 struct inpcb *pcb; 3039 3040 if (fid->proto == IPPROTO_TCP) { 3041 wildcard = FALSE; 3042 pi = &tcbinfo[mycpuid]; 3043 } else if (fid->proto == IPPROTO_UDP) { 3044 wildcard = TRUE; 3045 pi = &udbinfo[mycpuid]; 3046 } else { 3047 return 0; 3048 } 3049 3050 /* 3051 * Values in 'fid' are in host byte order 3052 */ 3053 dst_ip.s_addr = htonl(fid->dst_ip); 3054 src_ip.s_addr = htonl(fid->src_ip); 3055 if (oif) { 3056 pcb = in_pcblookup_hash(pi, 3057 dst_ip, htons(fid->dst_port), 3058 src_ip, htons(fid->src_port), 3059 wildcard, oif); 3060 } else { 3061 pcb = in_pcblookup_hash(pi, 3062 src_ip, htons(fid->src_port), 3063 dst_ip, htons(fid->dst_port), 3064 wildcard, NULL); 3065 } 3066 if (pcb == NULL || pcb->inp_socket == NULL) 3067 return 0; 3068 3069 if (opcode == O_UID) { 3070 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b)) 3071 return !socheckuid(pcb->inp_socket, uid); 3072 #undef socheckuid 3073 } else { 3074 return groupmember(uid, pcb->inp_socket->so_cred); 3075 } 3076 } 3077 3078 static int 3079 ipfw_match_ifip(ipfw_insn_ifip *cmd, const struct in_addr *ip) 3080 { 3081 3082 if (__predict_false((cmd->o.arg1 & IPFW_IFIP_VALID) == 0)) { 3083 struct ifaddr_container *ifac; 3084 struct ifnet *ifp; 3085 3086 ifp = ifunit_netisr(cmd->ifname); 3087 if (ifp == NULL) 3088 return (0); 3089 3090 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 3091 struct ifaddr *ia = ifac->ifa; 3092 3093 if (ia->ifa_addr == NULL) 3094 continue; 3095 if (ia->ifa_addr->sa_family != AF_INET) 3096 continue; 3097 3098 cmd->mask.s_addr = INADDR_ANY; 3099 if (cmd->o.arg1 & IPFW_IFIP_NET) { 3100 cmd->mask = ((struct sockaddr_in *) 3101 ia->ifa_netmask)->sin_addr; 3102 } 3103 if (cmd->mask.s_addr == INADDR_ANY) 3104 cmd->mask.s_addr = INADDR_BROADCAST; 3105 3106 cmd->addr = 3107 ((struct sockaddr_in *)ia->ifa_addr)->sin_addr; 3108 cmd->addr.s_addr &= cmd->mask.s_addr; 3109 3110 cmd->o.arg1 |= IPFW_IFIP_VALID; 3111 break; 3112 } 3113 if ((cmd->o.arg1 & IPFW_IFIP_VALID) == 0) 3114 return (0); 3115 } 3116 return ((ip->s_addr & cmd->mask.s_addr) == cmd->addr.s_addr); 3117 } 3118 3119 static void 3120 ipfw_xlate(const struct ipfw_xlat *x, struct mbuf *m, 3121 struct in_addr *old_addr, uint16_t *old_port) 3122 { 3123 struct ip *ip = mtod(m, struct ip *); 3124 struct in_addr *addr; 3125 uint16_t *port, *csum, dlen = 0; 3126 uint8_t udp = 0; 3127 boolean_t pseudo = FALSE; 3128 3129 if (x->xlat_flags & IPFW_STATE_F_XLATSRC) { 3130 addr = &ip->ip_src; 3131 switch (ip->ip_p) { 3132 case IPPROTO_TCP: 3133 port = &L3HDR(struct tcphdr, ip)->th_sport; 3134 csum = &L3HDR(struct tcphdr, ip)->th_sum; 3135 break; 3136 case IPPROTO_UDP: 3137 port = &L3HDR(struct udphdr, ip)->uh_sport; 3138 csum = &L3HDR(struct udphdr, ip)->uh_sum; 3139 udp = 1; 3140 break; 3141 default: 3142 panic("ipfw: unsupported src xlate proto %u", ip->ip_p); 3143 } 3144 } else { 3145 addr = &ip->ip_dst; 3146 switch (ip->ip_p) { 3147 case IPPROTO_TCP: 3148 port = &L3HDR(struct tcphdr, ip)->th_dport; 3149 csum = &L3HDR(struct tcphdr, ip)->th_sum; 3150 break; 3151 case IPPROTO_UDP: 3152 port = &L3HDR(struct udphdr, ip)->uh_dport; 3153 csum = &L3HDR(struct udphdr, ip)->uh_sum; 3154 udp = 1; 3155 break; 3156 default: 3157 panic("ipfw: unsupported dst xlate proto %u", ip->ip_p); 3158 } 3159 } 3160 if (old_addr != NULL) 3161 *old_addr = *addr; 3162 if (old_port != NULL) { 3163 if (x->xlat_port != 0) 3164 *old_port = *port; 3165 else 3166 *old_port = 0; 3167 } 3168 3169 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) { 3170 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) 3171 dlen = ip->ip_len - (ip->ip_hl << 2); 3172 pseudo = TRUE; 3173 } 3174 3175 if (!pseudo) { 3176 const uint16_t *oaddr, *naddr; 3177 3178 oaddr = (const uint16_t *)&addr->s_addr; 3179 naddr = (const uint16_t *)&x->xlat_addr; 3180 3181 ip->ip_sum = pfil_cksum_fixup(pfil_cksum_fixup(ip->ip_sum, 3182 oaddr[0], naddr[0], 0), oaddr[1], naddr[1], 0); 3183 *csum = pfil_cksum_fixup(pfil_cksum_fixup(*csum, 3184 oaddr[0], naddr[0], udp), oaddr[1], naddr[1], udp); 3185 } 3186 addr->s_addr = x->xlat_addr; 3187 3188 if (x->xlat_port != 0) { 3189 if (!pseudo) { 3190 *csum = pfil_cksum_fixup(*csum, *port, x->xlat_port, 3191 udp); 3192 } 3193 *port = x->xlat_port; 3194 } 3195 3196 if (pseudo) { 3197 *csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 3198 htons(dlen + ip->ip_p)); 3199 } 3200 } 3201 3202 static void 3203 ipfw_ip_xlate_dispatch(netmsg_t nmsg) 3204 { 3205 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg; 3206 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3207 struct mbuf *m = nm->m; 3208 struct ipfw_xlat *x = nm->arg1; 3209 struct ip_fw *rule = x->xlat_rule; 3210 3211 ASSERT_NETISR_NCPUS(mycpuid); 3212 KASSERT(rule->cpuid == mycpuid, 3213 ("rule does not belong to cpu%d", mycpuid)); 3214 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE, 3215 ("mbuf does not have ipfw continue rule")); 3216 3217 KASSERT(ctx->ipfw_cont_rule == NULL, 3218 ("pending ipfw continue rule")); 3219 KASSERT(ctx->ipfw_cont_xlat == NULL, 3220 ("pending ipfw continue xlat")); 3221 ctx->ipfw_cont_rule = rule; 3222 ctx->ipfw_cont_xlat = x; 3223 3224 if (nm->arg2 == 0) 3225 ip_input(m); 3226 else 3227 ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); 3228 3229 /* May not be cleared, if ipfw was unload/disabled. */ 3230 ctx->ipfw_cont_rule = NULL; 3231 ctx->ipfw_cont_xlat = NULL; 3232 3233 /* 3234 * This state is no longer used; decrement its xlat_crefs, 3235 * so this state can be deleted. 3236 */ 3237 x->xlat_crefs--; 3238 /* 3239 * This rule is no longer used; decrement its cross_refs, 3240 * so this rule can be deleted. 3241 * 3242 * NOTE: 3243 * Decrement cross_refs in the last step of this function, 3244 * so that the module could be unloaded safely. 3245 */ 3246 rule->cross_refs--; 3247 } 3248 3249 static void 3250 ipfw_xlate_redispatch(struct mbuf *m, int cpuid, struct ipfw_xlat *x, 3251 uint32_t flags) 3252 { 3253 struct netmsg_genpkt *nm; 3254 3255 KASSERT(x->xlat_pcpu == cpuid, ("xlat paired cpu%d, target cpu%d", 3256 x->xlat_pcpu, cpuid)); 3257 3258 /* 3259 * Bump cross_refs to prevent this rule and its siblings 3260 * from being deleted, while this mbuf is inflight. The 3261 * cross_refs of the sibling rule on the target cpu will 3262 * be decremented, once this mbuf is going to be filtered 3263 * on the target cpu. 3264 */ 3265 x->xlat_rule->cross_refs++; 3266 /* 3267 * Bump xlat_crefs to prevent this state and its paired 3268 * state from being deleted, while this mbuf is inflight. 3269 * The xlat_crefs of the paired state on the target cpu 3270 * will be decremented, once this mbuf is going to be 3271 * filtered on the target cpu. 3272 */ 3273 x->xlat_crefs++; 3274 3275 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE; 3276 if (flags & IPFW_XLATE_INSERT) 3277 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATINS; 3278 if (flags & IPFW_XLATE_FORWARD) 3279 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATFWD; 3280 3281 if ((flags & IPFW_XLATE_OUTPUT) == 0) { 3282 struct ip *ip = mtod(m, struct ip *); 3283 3284 /* 3285 * NOTE: 3286 * ip_input() expects ip_len/ip_off are in network 3287 * byte order. 3288 */ 3289 ip->ip_len = htons(ip->ip_len); 3290 ip->ip_off = htons(ip->ip_off); 3291 } 3292 3293 nm = &m->m_hdr.mh_genmsg; 3294 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0, 3295 ipfw_ip_xlate_dispatch); 3296 nm->m = m; 3297 nm->arg1 = x->xlat_pair; 3298 nm->arg2 = 0; 3299 if (flags & IPFW_XLATE_OUTPUT) 3300 nm->arg2 = 1; 3301 netisr_sendmsg(&nm->base, cpuid); 3302 } 3303 3304 static struct mbuf * 3305 ipfw_setup_local(struct mbuf *m, const int hlen, struct ip_fw_args *args, 3306 struct ip_fw_local *local, struct ip **ip0) 3307 { 3308 struct ip *ip = mtod(m, struct ip *); 3309 struct tcphdr *tcp; 3310 struct udphdr *udp; 3311 3312 /* 3313 * Collect parameters into local variables for faster matching. 3314 */ 3315 if (hlen == 0) { /* do not grab addresses for non-ip pkts */ 3316 local->proto = args->f_id.proto = 0; /* mark f_id invalid */ 3317 goto done; 3318 } 3319 3320 local->proto = args->f_id.proto = ip->ip_p; 3321 local->src_ip = ip->ip_src; 3322 local->dst_ip = ip->ip_dst; 3323 if (args->eh != NULL) { /* layer 2 packets are as on the wire */ 3324 local->offset = ntohs(ip->ip_off) & IP_OFFMASK; 3325 local->ip_len = ntohs(ip->ip_len); 3326 } else { 3327 local->offset = ip->ip_off & IP_OFFMASK; 3328 local->ip_len = ip->ip_len; 3329 } 3330 3331 #define PULLUP_TO(len) \ 3332 do { \ 3333 if (m->m_len < (len)) { \ 3334 args->m = m = m_pullup(m, (len)); \ 3335 if (m == NULL) { \ 3336 ip = NULL; \ 3337 goto done; \ 3338 } \ 3339 ip = mtod(m, struct ip *); \ 3340 } \ 3341 } while (0) 3342 3343 if (local->offset == 0) { 3344 switch (local->proto) { 3345 case IPPROTO_TCP: 3346 PULLUP_TO(hlen + sizeof(struct tcphdr)); 3347 local->tcp = tcp = L3HDR(struct tcphdr, ip); 3348 local->dst_port = tcp->th_dport; 3349 local->src_port = tcp->th_sport; 3350 args->f_id.flags = tcp->th_flags; 3351 break; 3352 3353 case IPPROTO_UDP: 3354 PULLUP_TO(hlen + sizeof(struct udphdr)); 3355 udp = L3HDR(struct udphdr, ip); 3356 local->dst_port = udp->uh_dport; 3357 local->src_port = udp->uh_sport; 3358 break; 3359 3360 case IPPROTO_ICMP: 3361 PULLUP_TO(hlen + 4); /* type, code and checksum. */ 3362 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; 3363 break; 3364 3365 default: 3366 break; 3367 } 3368 } 3369 3370 #undef PULLUP_TO 3371 3372 args->f_id.src_ip = ntohl(local->src_ip.s_addr); 3373 args->f_id.dst_ip = ntohl(local->dst_ip.s_addr); 3374 args->f_id.src_port = local->src_port = ntohs(local->src_port); 3375 args->f_id.dst_port = local->dst_port = ntohs(local->dst_port); 3376 done: 3377 *ip0 = ip; 3378 return (m); 3379 } 3380 3381 static struct mbuf * 3382 ipfw_rehashm(struct mbuf *m, const int hlen, struct ip_fw_args *args, 3383 struct ip_fw_local *local, struct ip **ip0) 3384 { 3385 struct ip *ip = mtod(m, struct ip *); 3386 3387 ip->ip_len = htons(ip->ip_len); 3388 ip->ip_off = htons(ip->ip_off); 3389 3390 m->m_flags &= ~M_HASH; 3391 ip_hashfn(&m, 0); 3392 args->m = m; 3393 if (m == NULL) { 3394 *ip0 = NULL; 3395 return (NULL); 3396 } 3397 KASSERT(m->m_flags & M_HASH, ("no hash")); 3398 3399 /* 'm' might be changed by ip_hashfn(). */ 3400 ip = mtod(m, struct ip *); 3401 ip->ip_len = ntohs(ip->ip_len); 3402 ip->ip_off = ntohs(ip->ip_off); 3403 3404 return (ipfw_setup_local(m, hlen, args, local, ip0)); 3405 } 3406 3407 /* 3408 * The main check routine for the firewall. 3409 * 3410 * All arguments are in args so we can modify them and return them 3411 * back to the caller. 3412 * 3413 * Parameters: 3414 * 3415 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 3416 * Starts with the IP header. 3417 * args->eh (in) Mac header if present, or NULL for layer3 packet. 3418 * args->oif Outgoing interface, or NULL if packet is incoming. 3419 * The incoming interface is in the mbuf. (in) 3420 * 3421 * args->rule Pointer to the last matching rule (in/out) 3422 * args->f_id Addresses grabbed from the packet (out) 3423 * 3424 * Return value: 3425 * 3426 * If the packet was denied/rejected and has been dropped, *m is equal 3427 * to NULL upon return. 3428 * 3429 * IP_FW_DENY the packet must be dropped. 3430 * IP_FW_PASS The packet is to be accepted and routed normally. 3431 * IP_FW_DIVERT Divert the packet to port (args->cookie) 3432 * IP_FW_TEE Tee the packet to port (args->cookie) 3433 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie) 3434 * IP_FW_CONTINUE Continue processing on another cpu. 3435 */ 3436 static int 3437 ipfw_chk(struct ip_fw_args *args) 3438 { 3439 /* 3440 * Local variables hold state during the processing of a packet. 3441 * 3442 * IMPORTANT NOTE: to speed up the processing of rules, there 3443 * are some assumption on the values of the variables, which 3444 * are documented here. Should you change them, please check 3445 * the implementation of the various instructions to make sure 3446 * that they still work. 3447 * 3448 * args->eh The MAC header. It is non-null for a layer2 3449 * packet, it is NULL for a layer-3 packet. 3450 * 3451 * m | args->m Pointer to the mbuf, as received from the caller. 3452 * It may change if ipfw_chk() does an m_pullup, or if it 3453 * consumes the packet because it calls send_reject(). 3454 * XXX This has to change, so that ipfw_chk() never modifies 3455 * or consumes the buffer. 3456 * ip is simply an alias of the value of m, and it is kept 3457 * in sync with it (the packet is supposed to start with 3458 * the ip header). 3459 */ 3460 struct mbuf *m = args->m; 3461 struct ip *ip = mtod(m, struct ip *); 3462 3463 /* 3464 * oif | args->oif If NULL, ipfw_chk has been called on the 3465 * inbound path (ether_input, ip_input). 3466 * If non-NULL, ipfw_chk has been called on the outbound path 3467 * (ether_output, ip_output). 3468 */ 3469 struct ifnet *oif = args->oif; 3470 3471 struct ip_fw *f = NULL; /* matching rule */ 3472 int retval = IP_FW_PASS; 3473 struct m_tag *mtag; 3474 struct divert_info *divinfo; 3475 struct ipfw_state *s; 3476 3477 /* 3478 * hlen The length of the IPv4 header. 3479 * hlen >0 means we have an IPv4 packet. 3480 */ 3481 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 3482 3483 struct ip_fw_local lc; 3484 3485 /* 3486 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 3487 * MATCH_NONE when checked and not matched (dyn_f = NULL), 3488 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL) 3489 */ 3490 int dyn_dir = MATCH_UNKNOWN; 3491 struct ip_fw *dyn_f = NULL; 3492 int cpuid = mycpuid; 3493 struct ipfw_context *ctx; 3494 3495 ASSERT_NETISR_NCPUS(cpuid); 3496 ctx = ipfw_ctx[cpuid]; 3497 3498 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED) 3499 return IP_FW_PASS; /* accept */ 3500 3501 if (args->eh == NULL || /* layer 3 packet */ 3502 (m->m_pkthdr.len >= sizeof(struct ip) && 3503 ntohs(args->eh->ether_type) == ETHERTYPE_IP)) 3504 hlen = ip->ip_hl << 2; 3505 3506 memset(&lc, 0, sizeof(lc)); 3507 3508 m = ipfw_setup_local(m, hlen, args, &lc, &ip); 3509 if (m == NULL) 3510 goto pullup_failed; 3511 3512 if (args->rule) { 3513 /* 3514 * Packet has already been tagged. Look for the next rule 3515 * to restart processing. 3516 * 3517 * If fw_one_pass != 0 then just accept it. 3518 * XXX should not happen here, but optimized out in 3519 * the caller. 3520 */ 3521 if (fw_one_pass && (args->flags & IP_FWARG_F_CONT) == 0) 3522 return IP_FW_PASS; 3523 args->flags &= ~IP_FWARG_F_CONT; 3524 3525 /* This rule is being/has been flushed */ 3526 if (ipfw_flushing) 3527 return IP_FW_DENY; 3528 3529 KASSERT(args->rule->cpuid == cpuid, 3530 ("rule used on cpu%d", cpuid)); 3531 3532 /* This rule was deleted */ 3533 if (args->rule->rule_flags & IPFW_RULE_F_INVALID) 3534 return IP_FW_DENY; 3535 3536 if (args->xlat != NULL) { 3537 struct ipfw_xlat *x = args->xlat; 3538 3539 /* This xlat is being deleted. */ 3540 if (x->xlat_invalid) 3541 return IP_FW_DENY; 3542 3543 f = args->rule; 3544 3545 dyn_f = f; 3546 dyn_dir = (args->flags & IP_FWARG_F_XLATFWD) ? 3547 MATCH_FORWARD : MATCH_REVERSE; 3548 3549 if (args->flags & IP_FWARG_F_XLATINS) { 3550 KASSERT(x->xlat_flags & IPFW_STATE_F_XLATSLAVE, 3551 ("not slave %u state", x->xlat_type)); 3552 s = ipfw_state_link(ctx, &x->xlat_st); 3553 if (s != NULL) { 3554 ctx->ipfw_xlate_conflicts++; 3555 if (IPFW_STATE_ISDEAD(s)) { 3556 ipfw_state_remove(ctx, s); 3557 s = ipfw_state_link(ctx, 3558 &x->xlat_st); 3559 } 3560 if (s != NULL) { 3561 if (bootverbose) { 3562 kprintf("ipfw: " 3563 "slave %u state " 3564 "conflicts %u state\n", 3565 x->xlat_type, 3566 s->st_type); 3567 } 3568 ipfw_xlat_invalidate(x); 3569 return IP_FW_DENY; 3570 } 3571 ctx->ipfw_xlate_cresolved++; 3572 } 3573 } else { 3574 ipfw_state_update(&args->f_id, dyn_dir, 3575 lc.tcp, &x->xlat_st); 3576 } 3577 } else { 3578 /* TODO: setup dyn_f, dyn_dir */ 3579 3580 f = args->rule->next_rule; 3581 if (f == NULL) 3582 f = lookup_next_rule(args->rule); 3583 } 3584 } else { 3585 /* 3586 * Find the starting rule. It can be either the first 3587 * one, or the one after divert_rule if asked so. 3588 */ 3589 int skipto; 3590 3591 KKASSERT((args->flags & 3592 (IP_FWARG_F_XLATINS | IP_FWARG_F_CONT)) == 0); 3593 KKASSERT(args->xlat == NULL); 3594 3595 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL); 3596 if (mtag != NULL) { 3597 divinfo = m_tag_data(mtag); 3598 skipto = divinfo->skipto; 3599 } else { 3600 skipto = 0; 3601 } 3602 3603 f = ctx->ipfw_layer3_chain; 3604 if (args->eh == NULL && skipto != 0) { 3605 /* No skipto during rule flushing */ 3606 if (ipfw_flushing) 3607 return IP_FW_DENY; 3608 3609 if (skipto >= IPFW_DEFAULT_RULE) 3610 return IP_FW_DENY; /* invalid */ 3611 3612 while (f && f->rulenum <= skipto) 3613 f = f->next; 3614 if (f == NULL) /* drop packet */ 3615 return IP_FW_DENY; 3616 } else if (ipfw_flushing) { 3617 /* Rules are being flushed; skip to default rule */ 3618 f = ctx->ipfw_default_rule; 3619 } 3620 } 3621 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL) 3622 m_tag_delete(m, mtag); 3623 3624 /* 3625 * Now scan the rules, and parse microinstructions for each rule. 3626 */ 3627 for (; f; f = f->next) { 3628 int l, cmdlen; 3629 ipfw_insn *cmd; 3630 int skip_or; /* skip rest of OR block */ 3631 3632 again: 3633 if (ctx->ipfw_set_disable & (1 << f->set)) { 3634 args->xlat = NULL; 3635 continue; 3636 } 3637 3638 if (args->xlat != NULL) { 3639 args->xlat = NULL; 3640 l = f->cmd_len - f->act_ofs; 3641 cmd = ACTION_PTR(f); 3642 } else { 3643 l = f->cmd_len; 3644 cmd = f->cmd; 3645 } 3646 3647 skip_or = 0; 3648 for (; l > 0; l -= cmdlen, cmd += cmdlen) { 3649 int match; 3650 3651 /* 3652 * check_body is a jump target used when we find a 3653 * CHECK_STATE, and need to jump to the body of 3654 * the target rule. 3655 */ 3656 check_body: 3657 cmdlen = F_LEN(cmd); 3658 /* 3659 * An OR block (insn_1 || .. || insn_n) has the 3660 * F_OR bit set in all but the last instruction. 3661 * The first match will set "skip_or", and cause 3662 * the following instructions to be skipped until 3663 * past the one with the F_OR bit clear. 3664 */ 3665 if (skip_or) { /* skip this instruction */ 3666 if ((cmd->len & F_OR) == 0) 3667 skip_or = 0; /* next one is good */ 3668 continue; 3669 } 3670 match = 0; /* set to 1 if we succeed */ 3671 3672 switch (cmd->opcode) { 3673 /* 3674 * The first set of opcodes compares the packet's 3675 * fields with some pattern, setting 'match' if a 3676 * match is found. At the end of the loop there is 3677 * logic to deal with F_NOT and F_OR flags associated 3678 * with the opcode. 3679 */ 3680 case O_NOP: 3681 match = 1; 3682 break; 3683 3684 case O_FORWARD_MAC: 3685 kprintf("ipfw: opcode %d unimplemented\n", 3686 cmd->opcode); 3687 break; 3688 3689 case O_GID: 3690 case O_UID: 3691 /* 3692 * We only check offset == 0 && proto != 0, 3693 * as this ensures that we have an IPv4 3694 * packet with the ports info. 3695 */ 3696 if (lc.offset!=0) 3697 break; 3698 3699 match = ipfw_match_uid(&args->f_id, oif, 3700 cmd->opcode, 3701 (uid_t)((ipfw_insn_u32 *)cmd)->d[0]); 3702 break; 3703 3704 case O_RECV: 3705 match = iface_match(m->m_pkthdr.rcvif, 3706 (ipfw_insn_if *)cmd); 3707 break; 3708 3709 case O_XMIT: 3710 match = iface_match(oif, (ipfw_insn_if *)cmd); 3711 break; 3712 3713 case O_VIA: 3714 match = iface_match(oif ? oif : 3715 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); 3716 break; 3717 3718 case O_MACADDR2: 3719 if (args->eh != NULL) { /* have MAC header */ 3720 uint32_t *want = (uint32_t *) 3721 ((ipfw_insn_mac *)cmd)->addr; 3722 uint32_t *mask = (uint32_t *) 3723 ((ipfw_insn_mac *)cmd)->mask; 3724 uint32_t *hdr = (uint32_t *)args->eh; 3725 3726 match = 3727 (want[0] == (hdr[0] & mask[0]) && 3728 want[1] == (hdr[1] & mask[1]) && 3729 want[2] == (hdr[2] & mask[2])); 3730 } 3731 break; 3732 3733 case O_MAC_TYPE: 3734 if (args->eh != NULL) { 3735 uint16_t t = 3736 ntohs(args->eh->ether_type); 3737 uint16_t *p = 3738 ((ipfw_insn_u16 *)cmd)->ports; 3739 int i; 3740 3741 /* Special vlan handling */ 3742 if (m->m_flags & M_VLANTAG) 3743 t = ETHERTYPE_VLAN; 3744 3745 for (i = cmdlen - 1; !match && i > 0; 3746 i--, p += 2) { 3747 match = 3748 (t >= p[0] && t <= p[1]); 3749 } 3750 } 3751 break; 3752 3753 case O_FRAG: 3754 match = (hlen > 0 && lc.offset != 0); 3755 break; 3756 3757 case O_IPFRAG: 3758 if (hlen > 0) { 3759 uint16_t off; 3760 3761 if (args->eh != NULL) 3762 off = ntohs(ip->ip_off); 3763 else 3764 off = ip->ip_off; 3765 if (off & (IP_MF | IP_OFFMASK)) 3766 match = 1; 3767 } 3768 break; 3769 3770 case O_IN: /* "out" is "not in" */ 3771 match = (oif == NULL); 3772 break; 3773 3774 case O_LAYER2: 3775 match = (args->eh != NULL); 3776 break; 3777 3778 case O_PROTO: 3779 /* 3780 * We do not allow an arg of 0 so the 3781 * check of "proto" only suffices. 3782 */ 3783 match = (lc.proto == cmd->arg1); 3784 break; 3785 3786 case O_IP_SRC: 3787 match = (hlen > 0 && 3788 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3789 lc.src_ip.s_addr); 3790 break; 3791 3792 case O_IP_SRC_MASK: 3793 match = (hlen > 0 && 3794 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3795 (lc.src_ip.s_addr & 3796 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 3797 break; 3798 3799 case O_IP_SRC_ME: 3800 if (hlen > 0) { 3801 struct ifnet *tif; 3802 3803 tif = INADDR_TO_IFP(&lc.src_ip); 3804 match = (tif != NULL); 3805 } 3806 break; 3807 3808 case O_IP_SRC_TABLE: 3809 match = ipfw_table_lookup(ctx, cmd->arg1, 3810 &lc.src_ip); 3811 break; 3812 3813 case O_IP_SRC_IFIP: 3814 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd, 3815 &lc.src_ip); 3816 break; 3817 3818 case O_IP_DST_SET: 3819 case O_IP_SRC_SET: 3820 if (hlen > 0) { 3821 uint32_t *d = (uint32_t *)(cmd + 1); 3822 uint32_t addr = 3823 cmd->opcode == O_IP_DST_SET ? 3824 args->f_id.dst_ip : 3825 args->f_id.src_ip; 3826 3827 if (addr < d[0]) 3828 break; 3829 addr -= d[0]; /* subtract base */ 3830 match = 3831 (addr < cmd->arg1) && 3832 (d[1 + (addr >> 5)] & 3833 (1 << (addr & 0x1f))); 3834 } 3835 break; 3836 3837 case O_IP_DST: 3838 match = (hlen > 0 && 3839 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3840 lc.dst_ip.s_addr); 3841 break; 3842 3843 case O_IP_DST_MASK: 3844 match = (hlen > 0) && 3845 (((ipfw_insn_ip *)cmd)->addr.s_addr == 3846 (lc.dst_ip.s_addr & 3847 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 3848 break; 3849 3850 case O_IP_DST_ME: 3851 if (hlen > 0) { 3852 struct ifnet *tif; 3853 3854 tif = INADDR_TO_IFP(&lc.dst_ip); 3855 match = (tif != NULL); 3856 } 3857 break; 3858 3859 case O_IP_DST_TABLE: 3860 match = ipfw_table_lookup(ctx, cmd->arg1, 3861 &lc.dst_ip); 3862 break; 3863 3864 case O_IP_DST_IFIP: 3865 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd, 3866 &lc.dst_ip); 3867 break; 3868 3869 case O_IP_SRCPORT: 3870 case O_IP_DSTPORT: 3871 /* 3872 * offset == 0 && proto != 0 is enough 3873 * to guarantee that we have an IPv4 3874 * packet with port info. 3875 */ 3876 if ((lc.proto==IPPROTO_UDP || 3877 lc.proto==IPPROTO_TCP) 3878 && lc.offset == 0) { 3879 uint16_t x = 3880 (cmd->opcode == O_IP_SRCPORT) ? 3881 lc.src_port : lc.dst_port; 3882 uint16_t *p = 3883 ((ipfw_insn_u16 *)cmd)->ports; 3884 int i; 3885 3886 for (i = cmdlen - 1; !match && i > 0; 3887 i--, p += 2) { 3888 match = 3889 (x >= p[0] && x <= p[1]); 3890 } 3891 } 3892 break; 3893 3894 case O_ICMPCODE: 3895 match = (lc.offset == 0 && 3896 lc.proto==IPPROTO_ICMP && 3897 icmpcode_match(ip, (ipfw_insn_u32 *)cmd)); 3898 break; 3899 3900 case O_ICMPTYPE: 3901 match = (lc.offset == 0 && 3902 lc.proto==IPPROTO_ICMP && 3903 icmptype_match(ip, (ipfw_insn_u32 *)cmd)); 3904 break; 3905 3906 case O_IPOPT: 3907 match = (hlen > 0 && ipopts_match(ip, cmd)); 3908 break; 3909 3910 case O_IPVER: 3911 match = (hlen > 0 && cmd->arg1 == ip->ip_v); 3912 break; 3913 3914 case O_IPTTL: 3915 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl); 3916 break; 3917 3918 case O_IPID: 3919 match = (hlen > 0 && 3920 cmd->arg1 == ntohs(ip->ip_id)); 3921 break; 3922 3923 case O_IPLEN: 3924 match = (hlen > 0 && cmd->arg1 == lc.ip_len); 3925 break; 3926 3927 case O_IPPRECEDENCE: 3928 match = (hlen > 0 && 3929 (cmd->arg1 == (ip->ip_tos & 0xe0))); 3930 break; 3931 3932 case O_IPTOS: 3933 match = (hlen > 0 && 3934 flags_match(cmd, ip->ip_tos)); 3935 break; 3936 3937 case O_TCPFLAGS: 3938 match = (lc.proto == IPPROTO_TCP && 3939 lc.offset == 0 && 3940 flags_match(cmd, 3941 L3HDR(struct tcphdr,ip)->th_flags)); 3942 break; 3943 3944 case O_TCPOPTS: 3945 match = (lc.proto == IPPROTO_TCP && 3946 lc.offset == 0 && tcpopts_match(ip, cmd)); 3947 break; 3948 3949 case O_TCPSEQ: 3950 match = (lc.proto == IPPROTO_TCP && 3951 lc.offset == 0 && 3952 ((ipfw_insn_u32 *)cmd)->d[0] == 3953 L3HDR(struct tcphdr,ip)->th_seq); 3954 break; 3955 3956 case O_TCPACK: 3957 match = (lc.proto == IPPROTO_TCP && 3958 lc.offset == 0 && 3959 ((ipfw_insn_u32 *)cmd)->d[0] == 3960 L3HDR(struct tcphdr,ip)->th_ack); 3961 break; 3962 3963 case O_TCPWIN: 3964 match = (lc.proto == IPPROTO_TCP && 3965 lc.offset == 0 && 3966 cmd->arg1 == 3967 L3HDR(struct tcphdr,ip)->th_win); 3968 break; 3969 3970 case O_ESTAB: 3971 /* reject packets which have SYN only */ 3972 /* XXX should i also check for TH_ACK ? */ 3973 match = (lc.proto == IPPROTO_TCP && 3974 lc.offset == 0 && 3975 (L3HDR(struct tcphdr,ip)->th_flags & 3976 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 3977 break; 3978 3979 case O_LOG: 3980 if (fw_verbose) { 3981 ipfw_log(ctx, f, hlen, args->eh, m, 3982 oif); 3983 } 3984 match = 1; 3985 break; 3986 3987 case O_PROB: 3988 match = (krandom() < 3989 ((ipfw_insn_u32 *)cmd)->d[0]); 3990 break; 3991 3992 /* 3993 * The second set of opcodes represents 'actions', 3994 * i.e. the terminal part of a rule once the packet 3995 * matches all previous patterns. 3996 * Typically there is only one action for each rule, 3997 * and the opcode is stored at the end of the rule 3998 * (but there are exceptions -- see below). 3999 * 4000 * In general, here we set retval and terminate the 4001 * outer loop (would be a 'break 3' in some language, 4002 * but we need to do a 'goto done'). 4003 * 4004 * Exceptions: 4005 * O_COUNT and O_SKIPTO actions: 4006 * instead of terminating, we jump to the next rule 4007 * ('goto next_rule', equivalent to a 'break 2'), 4008 * or to the SKIPTO target ('goto again' after 4009 * having set f, cmd and l), respectively. 4010 * 4011 * O_LIMIT and O_KEEP_STATE, O_REDIRECT: these opcodes 4012 * are not real 'actions', and are stored right 4013 * before the 'action' part of the rule. 4014 * These opcodes try to install an entry in the 4015 * state tables; if successful, we continue with 4016 * the next opcode (match=1; break;), otherwise 4017 * the packet must be dropped ('goto done' after 4018 * setting retval). If static rules are changed 4019 * during the state installation, the packet will 4020 * be dropped and rule's stats will not beupdated 4021 * ('return IP_FW_DENY'). 4022 * 4023 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 4024 * cause a lookup of the state table, and a jump 4025 * to the 'action' part of the parent rule 4026 * ('goto check_body') if an entry is found, or 4027 * (CHECK_STATE only) a jump to the next rule if 4028 * the entry is not found ('goto next_rule'). 4029 * The result of the lookup is cached to make 4030 * further instances of these opcodes are 4031 * effectively NOPs. If static rules are changed 4032 * during the state looking up, the packet will 4033 * be dropped and rule's stats will not be updated 4034 * ('return IP_FW_DENY'). 4035 */ 4036 case O_REDIRECT: 4037 if (f->cross_rules == NULL) { 4038 /* 4039 * This rule was not completely setup; 4040 * move on to the next rule. 4041 */ 4042 goto next_rule; 4043 } 4044 /* 4045 * Apply redirect only on input path and 4046 * only to non-fragment TCP segments or 4047 * UDP datagrams. 4048 * 4049 * Does _not_ work with layer2 filtering. 4050 */ 4051 if (oif != NULL || args->eh != NULL || 4052 (ip->ip_off & (IP_MF | IP_OFFMASK)) || 4053 (lc.proto != IPPROTO_TCP && 4054 lc.proto != IPPROTO_UDP)) 4055 break; 4056 /* FALL THROUGH */ 4057 case O_LIMIT: 4058 case O_KEEP_STATE: 4059 if (hlen == 0) 4060 break; 4061 s = ipfw_state_install(ctx, f, 4062 (ipfw_insn_limit *)cmd, args, lc.tcp); 4063 if (s == NULL) { 4064 retval = IP_FW_DENY; 4065 goto done; /* error/limit violation */ 4066 } 4067 s->st_pcnt++; 4068 s->st_bcnt += lc.ip_len; 4069 4070 if (s->st_type == O_REDIRECT) { 4071 struct in_addr oaddr; 4072 uint16_t oport; 4073 struct ipfw_xlat *slave_x, *x; 4074 struct ipfw_state *dup; 4075 4076 x = (struct ipfw_xlat *)s; 4077 ipfw_xlate(x, m, &oaddr, &oport); 4078 m = ipfw_rehashm(m, hlen, args, &lc, 4079 &ip); 4080 if (m == NULL) { 4081 ipfw_state_del(ctx, s); 4082 goto pullup_failed; 4083 } 4084 4085 cpuid = netisr_hashcpu( 4086 m->m_pkthdr.hash); 4087 4088 slave_x = (struct ipfw_xlat *) 4089 ipfw_state_alloc(ctx, &args->f_id, 4090 O_REDIRECT, f->cross_rules[cpuid], 4091 lc.tcp); 4092 if (slave_x == NULL) { 4093 ipfw_state_del(ctx, s); 4094 retval = IP_FW_DENY; 4095 goto done; 4096 } 4097 slave_x->xlat_addr = oaddr.s_addr; 4098 slave_x->xlat_port = oport; 4099 slave_x->xlat_dir = MATCH_REVERSE; 4100 slave_x->xlat_flags |= 4101 IPFW_STATE_F_XLATSRC | 4102 IPFW_STATE_F_XLATSLAVE; 4103 4104 slave_x->xlat_pair = x; 4105 slave_x->xlat_pcpu = mycpuid; 4106 x->xlat_pair = slave_x; 4107 x->xlat_pcpu = cpuid; 4108 4109 ctx->ipfw_xlated++; 4110 if (cpuid != mycpuid) { 4111 ctx->ipfw_xlate_split++; 4112 ipfw_xlate_redispatch( 4113 m, cpuid, x, 4114 IPFW_XLATE_INSERT | 4115 IPFW_XLATE_FORWARD); 4116 args->m = NULL; 4117 return (IP_FW_REDISPATCH); 4118 } 4119 4120 dup = ipfw_state_link(ctx, 4121 &slave_x->xlat_st); 4122 if (dup != NULL) { 4123 ctx->ipfw_xlate_conflicts++; 4124 if (IPFW_STATE_ISDEAD(dup)) { 4125 ipfw_state_remove(ctx, 4126 dup); 4127 dup = ipfw_state_link( 4128 ctx, &slave_x->xlat_st); 4129 } 4130 if (dup != NULL) { 4131 if (bootverbose) { 4132 kprintf("ipfw: " 4133 "slave %u state " 4134 "conflicts " 4135 "%u state\n", 4136 x->xlat_type, 4137 s->st_type); 4138 } 4139 ipfw_state_del(ctx, s); 4140 return (IP_FW_DENY); 4141 } 4142 ctx->ipfw_xlate_cresolved++; 4143 } 4144 } 4145 match = 1; 4146 break; 4147 4148 case O_PROBE_STATE: 4149 case O_CHECK_STATE: 4150 /* 4151 * States are checked at the first keep-state 4152 * check-state occurrence, with the result 4153 * being stored in dyn_dir. The compiler 4154 * introduces a PROBE_STATE instruction for 4155 * us when we have a KEEP_STATE/LIMIT/RDR 4156 * (because PROBE_STATE needs to be run first). 4157 */ 4158 s = NULL; 4159 if (dyn_dir == MATCH_UNKNOWN) { 4160 s = ipfw_state_lookup(ctx, 4161 &args->f_id, &dyn_dir, lc.tcp); 4162 } 4163 if (s == NULL || 4164 (s->st_type == O_REDIRECT && 4165 (args->eh != NULL || 4166 (ip->ip_off & (IP_MF | IP_OFFMASK)) || 4167 (lc.proto != IPPROTO_TCP && 4168 lc.proto != IPPROTO_UDP)))) { 4169 /* 4170 * State not found. If CHECK_STATE, 4171 * skip to next rule, if PROBE_STATE 4172 * just ignore and continue with next 4173 * opcode. 4174 */ 4175 if (cmd->opcode == O_CHECK_STATE) 4176 goto next_rule; 4177 match = 1; 4178 break; 4179 } 4180 4181 s->st_pcnt++; 4182 s->st_bcnt += lc.ip_len; 4183 4184 if (s->st_type == O_REDIRECT) { 4185 struct ipfw_xlat *x = 4186 (struct ipfw_xlat *)s; 4187 4188 if (oif != NULL && 4189 x->xlat_ifp == NULL) { 4190 KASSERT(x->xlat_flags & 4191 IPFW_STATE_F_XLATSLAVE, 4192 ("master rdr state " 4193 "missing ifp")); 4194 x->xlat_ifp = oif; 4195 } else if ( 4196 (oif != NULL && x->xlat_ifp!=oif) || 4197 (oif == NULL && 4198 x->xlat_ifp!=m->m_pkthdr.rcvif)) { 4199 retval = IP_FW_DENY; 4200 goto done; 4201 } 4202 if (x->xlat_dir != dyn_dir) 4203 goto skip_xlate; 4204 4205 ipfw_xlate(x, m, NULL, NULL); 4206 m = ipfw_rehashm(m, hlen, args, &lc, 4207 &ip); 4208 if (m == NULL) 4209 goto pullup_failed; 4210 4211 cpuid = netisr_hashcpu( 4212 m->m_pkthdr.hash); 4213 if (cpuid != mycpuid) { 4214 uint32_t xlate = 0; 4215 4216 if (oif != NULL) { 4217 xlate |= 4218 IPFW_XLATE_OUTPUT; 4219 } 4220 if (dyn_dir == MATCH_FORWARD) { 4221 xlate |= 4222 IPFW_XLATE_FORWARD; 4223 } 4224 ipfw_xlate_redispatch(m, cpuid, 4225 x, xlate); 4226 args->m = NULL; 4227 return (IP_FW_REDISPATCH); 4228 } 4229 4230 KKASSERT(x->xlat_pcpu == mycpuid); 4231 ipfw_state_update(&args->f_id, dyn_dir, 4232 lc.tcp, &x->xlat_pair->xlat_st); 4233 } 4234 skip_xlate: 4235 /* 4236 * Found a rule from a state; jump to the 4237 * 'action' part of the rule. 4238 */ 4239 f = s->st_rule; 4240 KKASSERT(f->cpuid == mycpuid); 4241 4242 cmd = ACTION_PTR(f); 4243 l = f->cmd_len - f->act_ofs; 4244 dyn_f = f; 4245 goto check_body; 4246 4247 case O_ACCEPT: 4248 retval = IP_FW_PASS; /* accept */ 4249 goto done; 4250 4251 case O_DEFRAG: 4252 if (f->cross_rules == NULL) { 4253 /* 4254 * This rule was not completely setup; 4255 * move on to the next rule. 4256 */ 4257 goto next_rule; 4258 } 4259 4260 /* 4261 * Don't defrag for l2 packets, output packets 4262 * or non-fragments. 4263 */ 4264 if (oif != NULL || args->eh != NULL || 4265 (ip->ip_off & (IP_MF | IP_OFFMASK)) == 0) 4266 goto next_rule; 4267 4268 ctx->ipfw_frags++; 4269 m = ip_reass(m); 4270 args->m = m; 4271 if (m == NULL) { 4272 retval = IP_FW_PASS; 4273 goto done; 4274 } 4275 ctx->ipfw_defraged++; 4276 KASSERT((m->m_flags & M_HASH) == 0, 4277 ("hash not cleared")); 4278 4279 /* Update statistics */ 4280 f->pcnt++; 4281 f->bcnt += lc.ip_len; 4282 f->timestamp = time_second; 4283 4284 ip = mtod(m, struct ip *); 4285 hlen = ip->ip_hl << 2; 4286 ip->ip_len += hlen; 4287 4288 ip->ip_len = htons(ip->ip_len); 4289 ip->ip_off = htons(ip->ip_off); 4290 4291 ip_hashfn(&m, 0); 4292 args->m = m; 4293 if (m == NULL) 4294 goto pullup_failed; 4295 4296 KASSERT(m->m_flags & M_HASH, ("no hash")); 4297 cpuid = netisr_hashcpu(m->m_pkthdr.hash); 4298 if (cpuid != mycpuid) { 4299 /* 4300 * NOTE: 4301 * ip_len/ip_off are in network byte 4302 * order. 4303 */ 4304 ctx->ipfw_defrag_remote++; 4305 ipfw_defrag_redispatch(m, cpuid, f); 4306 args->m = NULL; 4307 return (IP_FW_REDISPATCH); 4308 } 4309 4310 /* 'm' might be changed by ip_hashfn(). */ 4311 ip = mtod(m, struct ip *); 4312 ip->ip_len = ntohs(ip->ip_len); 4313 ip->ip_off = ntohs(ip->ip_off); 4314 4315 m = ipfw_setup_local(m, hlen, args, &lc, &ip); 4316 if (m == NULL) 4317 goto pullup_failed; 4318 4319 /* Move on. */ 4320 goto next_rule; 4321 4322 case O_PIPE: 4323 case O_QUEUE: 4324 args->rule = f; /* report matching rule */ 4325 args->cookie = cmd->arg1; 4326 retval = IP_FW_DUMMYNET; 4327 goto done; 4328 4329 case O_DIVERT: 4330 case O_TEE: 4331 if (args->eh) /* not on layer 2 */ 4332 break; 4333 4334 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT, 4335 sizeof(*divinfo), M_INTWAIT | M_NULLOK); 4336 if (mtag == NULL) { 4337 retval = IP_FW_DENY; 4338 goto done; 4339 } 4340 divinfo = m_tag_data(mtag); 4341 4342 divinfo->skipto = f->rulenum; 4343 divinfo->port = cmd->arg1; 4344 divinfo->tee = (cmd->opcode == O_TEE); 4345 m_tag_prepend(m, mtag); 4346 4347 args->cookie = cmd->arg1; 4348 retval = (cmd->opcode == O_DIVERT) ? 4349 IP_FW_DIVERT : IP_FW_TEE; 4350 goto done; 4351 4352 case O_COUNT: 4353 case O_SKIPTO: 4354 f->pcnt++; /* update stats */ 4355 f->bcnt += lc.ip_len; 4356 f->timestamp = time_second; 4357 if (cmd->opcode == O_COUNT) 4358 goto next_rule; 4359 /* handle skipto */ 4360 if (f->next_rule == NULL) 4361 lookup_next_rule(f); 4362 f = f->next_rule; 4363 goto again; 4364 4365 case O_REJECT: 4366 /* 4367 * Drop the packet and send a reject notice 4368 * if the packet is not ICMP (or is an ICMP 4369 * query), and it is not multicast/broadcast. 4370 */ 4371 if (hlen > 0 && 4372 (lc.proto != IPPROTO_ICMP || 4373 is_icmp_query(ip)) && 4374 !(m->m_flags & (M_BCAST|M_MCAST)) && 4375 !IN_MULTICAST(ntohl(lc.dst_ip.s_addr))) { 4376 send_reject(args, cmd->arg1, 4377 lc.offset, lc.ip_len); 4378 retval = IP_FW_DENY; 4379 goto done; 4380 } 4381 /* FALLTHROUGH */ 4382 case O_DENY: 4383 retval = IP_FW_DENY; 4384 goto done; 4385 4386 case O_FORWARD_IP: 4387 if (args->eh) /* not valid on layer2 pkts */ 4388 break; 4389 if (!dyn_f || dyn_dir == MATCH_FORWARD) { 4390 struct sockaddr_in *sin; 4391 4392 mtag = m_tag_get(PACKET_TAG_IPFORWARD, 4393 sizeof(*sin), M_INTWAIT | M_NULLOK); 4394 if (mtag == NULL) { 4395 retval = IP_FW_DENY; 4396 goto done; 4397 } 4398 sin = m_tag_data(mtag); 4399 4400 /* Structure copy */ 4401 *sin = ((ipfw_insn_sa *)cmd)->sa; 4402 4403 m_tag_prepend(m, mtag); 4404 m->m_pkthdr.fw_flags |= 4405 IPFORWARD_MBUF_TAGGED; 4406 m->m_pkthdr.fw_flags &= 4407 ~BRIDGE_MBUF_TAGGED; 4408 } 4409 retval = IP_FW_PASS; 4410 goto done; 4411 4412 default: 4413 panic("-- unknown opcode %d", cmd->opcode); 4414 } /* end of switch() on opcodes */ 4415 4416 if (cmd->len & F_NOT) 4417 match = !match; 4418 4419 if (match) { 4420 if (cmd->len & F_OR) 4421 skip_or = 1; 4422 } else { 4423 if (!(cmd->len & F_OR)) /* not an OR block, */ 4424 break; /* try next rule */ 4425 } 4426 4427 } /* end of inner for, scan opcodes */ 4428 4429 next_rule:; /* try next rule */ 4430 4431 } /* end of outer for, scan rules */ 4432 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n"); 4433 return IP_FW_DENY; 4434 4435 done: 4436 /* Update statistics */ 4437 f->pcnt++; 4438 f->bcnt += lc.ip_len; 4439 f->timestamp = time_second; 4440 return retval; 4441 4442 pullup_failed: 4443 if (fw_verbose) 4444 kprintf("pullup failed\n"); 4445 return IP_FW_DENY; 4446 } 4447 4448 static struct mbuf * 4449 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 4450 { 4451 struct m_tag *mtag; 4452 struct dn_pkt *pkt; 4453 ipfw_insn *cmd; 4454 const struct ipfw_flow_id *id; 4455 struct dn_flow_id *fid; 4456 4457 M_ASSERTPKTHDR(m); 4458 4459 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), 4460 M_INTWAIT | M_NULLOK); 4461 if (mtag == NULL) { 4462 m_freem(m); 4463 return (NULL); 4464 } 4465 m_tag_prepend(m, mtag); 4466 4467 pkt = m_tag_data(mtag); 4468 bzero(pkt, sizeof(*pkt)); 4469 4470 cmd = fwa->rule->cmd + fwa->rule->act_ofs; 4471 if (cmd->opcode == O_LOG) 4472 cmd += F_LEN(cmd); 4473 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, 4474 ("Rule is not PIPE or QUEUE, opcode %d", cmd->opcode)); 4475 4476 pkt->dn_m = m; 4477 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK); 4478 pkt->ifp = fwa->oif; 4479 pkt->pipe_nr = pipe_nr; 4480 4481 pkt->cpuid = mycpuid; 4482 pkt->msgport = netisr_curport(); 4483 4484 id = &fwa->f_id; 4485 fid = &pkt->id; 4486 fid->fid_dst_ip = id->dst_ip; 4487 fid->fid_src_ip = id->src_ip; 4488 fid->fid_dst_port = id->dst_port; 4489 fid->fid_src_port = id->src_port; 4490 fid->fid_proto = id->proto; 4491 fid->fid_flags = id->flags; 4492 4493 ipfw_ref_rule(fwa->rule); 4494 pkt->dn_priv = fwa->rule; 4495 pkt->dn_unref_priv = ipfw_unref_rule; 4496 4497 if (cmd->opcode == O_PIPE) 4498 pkt->dn_flags |= DN_FLAGS_IS_PIPE; 4499 4500 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED; 4501 return (m); 4502 } 4503 4504 /* 4505 * When a rule is added/deleted, clear the next_rule pointers in all rules. 4506 * These will be reconstructed on the fly as packets are matched. 4507 */ 4508 static void 4509 ipfw_flush_rule_ptrs(struct ipfw_context *ctx) 4510 { 4511 struct ip_fw *rule; 4512 4513 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 4514 rule->next_rule = NULL; 4515 } 4516 4517 static void 4518 ipfw_inc_static_count(struct ip_fw *rule) 4519 { 4520 /* Static rule's counts are updated only on CPU0 */ 4521 KKASSERT(mycpuid == 0); 4522 4523 static_count++; 4524 static_ioc_len += IOC_RULESIZE(rule); 4525 } 4526 4527 static void 4528 ipfw_dec_static_count(struct ip_fw *rule) 4529 { 4530 int l = IOC_RULESIZE(rule); 4531 4532 /* Static rule's counts are updated only on CPU0 */ 4533 KKASSERT(mycpuid == 0); 4534 4535 KASSERT(static_count > 0, ("invalid static count %u", static_count)); 4536 static_count--; 4537 4538 KASSERT(static_ioc_len >= l, 4539 ("invalid static len %u", static_ioc_len)); 4540 static_ioc_len -= l; 4541 } 4542 4543 static void 4544 ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule) 4545 { 4546 if (fwmsg->sibling != NULL) { 4547 KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1); 4548 fwmsg->sibling->sibling = rule; 4549 } 4550 fwmsg->sibling = rule; 4551 } 4552 4553 static struct ip_fw * 4554 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 4555 { 4556 struct ip_fw *rule; 4557 4558 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO); 4559 4560 rule->act_ofs = ioc_rule->act_ofs; 4561 rule->cmd_len = ioc_rule->cmd_len; 4562 rule->rulenum = ioc_rule->rulenum; 4563 rule->set = ioc_rule->set; 4564 rule->usr_flags = ioc_rule->usr_flags; 4565 4566 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */); 4567 4568 rule->refcnt = 1; 4569 rule->cpuid = mycpuid; 4570 rule->rule_flags = rule_flags; 4571 4572 return rule; 4573 } 4574 4575 static void 4576 ipfw_add_rule_dispatch(netmsg_t nmsg) 4577 { 4578 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 4579 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4580 struct ip_fw *rule; 4581 4582 ASSERT_NETISR_NCPUS(mycpuid); 4583 4584 rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->rule_flags); 4585 4586 /* 4587 * Insert rule into the pre-determined position 4588 */ 4589 if (fwmsg->prev_rule != NULL) { 4590 struct ip_fw *prev, *next; 4591 4592 prev = fwmsg->prev_rule; 4593 KKASSERT(prev->cpuid == mycpuid); 4594 4595 next = fwmsg->next_rule; 4596 KKASSERT(next->cpuid == mycpuid); 4597 4598 rule->next = next; 4599 prev->next = rule; 4600 4601 /* 4602 * Move to the position on the next CPU 4603 * before the msg is forwarded. 4604 */ 4605 fwmsg->prev_rule = prev->sibling; 4606 fwmsg->next_rule = next->sibling; 4607 } else { 4608 KKASSERT(fwmsg->next_rule == NULL); 4609 rule->next = ctx->ipfw_layer3_chain; 4610 ctx->ipfw_layer3_chain = rule; 4611 } 4612 4613 /* Link rule CPU sibling */ 4614 ipfw_link_sibling(fwmsg, rule); 4615 4616 ipfw_flush_rule_ptrs(ctx); 4617 4618 if (mycpuid == 0) { 4619 /* Statistics only need to be updated once */ 4620 ipfw_inc_static_count(rule); 4621 4622 /* Return the rule on CPU0 */ 4623 nmsg->lmsg.u.ms_resultp = rule; 4624 } 4625 4626 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) 4627 rule->track_ruleid = (uintptr_t)nmsg->lmsg.u.ms_resultp; 4628 4629 if (fwmsg->cross_rules != NULL) { 4630 /* Save rules for later use. */ 4631 fwmsg->cross_rules[mycpuid] = rule; 4632 } 4633 4634 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4635 } 4636 4637 static void 4638 ipfw_crossref_rule_dispatch(netmsg_t nmsg) 4639 { 4640 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 4641 struct ip_fw *rule = fwmsg->sibling; 4642 int sz = sizeof(struct ip_fw *) * netisr_ncpus; 4643 4644 ASSERT_NETISR_NCPUS(mycpuid); 4645 KASSERT(rule->rule_flags & IPFW_RULE_F_CROSSREF, 4646 ("not crossref rule")); 4647 4648 rule->cross_rules = kmalloc(sz, M_IPFW, M_WAITOK); 4649 memcpy(rule->cross_rules, fwmsg->cross_rules, sz); 4650 4651 fwmsg->sibling = rule->sibling; 4652 netisr_forwardmsg(&fwmsg->base, mycpuid + 1); 4653 } 4654 4655 /* 4656 * Add a new rule to the list. Copy the rule into a malloc'ed area, 4657 * then possibly create a rule number and add the rule to the list. 4658 * Update the rule_number in the input struct so the caller knows 4659 * it as well. 4660 */ 4661 static void 4662 ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 4663 { 4664 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4665 struct netmsg_ipfw fwmsg; 4666 struct ip_fw *f, *prev, *rule; 4667 4668 ASSERT_NETISR0; 4669 4670 /* 4671 * If rulenum is 0, find highest numbered rule before the 4672 * default rule, and add rule number incremental step. 4673 */ 4674 if (ioc_rule->rulenum == 0) { 4675 int step = autoinc_step; 4676 4677 KKASSERT(step >= IPFW_AUTOINC_STEP_MIN && 4678 step <= IPFW_AUTOINC_STEP_MAX); 4679 4680 /* 4681 * Locate the highest numbered rule before default 4682 */ 4683 for (f = ctx->ipfw_layer3_chain; f; f = f->next) { 4684 if (f->rulenum == IPFW_DEFAULT_RULE) 4685 break; 4686 ioc_rule->rulenum = f->rulenum; 4687 } 4688 if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step) 4689 ioc_rule->rulenum += step; 4690 } 4691 KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE && 4692 ioc_rule->rulenum != 0, 4693 ("invalid rule num %d", ioc_rule->rulenum)); 4694 4695 /* 4696 * Now find the right place for the new rule in the sorted list. 4697 */ 4698 for (prev = NULL, f = ctx->ipfw_layer3_chain; f; 4699 prev = f, f = f->next) { 4700 if (f->rulenum > ioc_rule->rulenum) { 4701 /* Found the location */ 4702 break; 4703 } 4704 } 4705 KASSERT(f != NULL, ("no default rule?!")); 4706 4707 /* 4708 * Duplicate the rule onto each CPU. 4709 * The rule duplicated on CPU0 will be returned. 4710 */ 4711 bzero(&fwmsg, sizeof(fwmsg)); 4712 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4713 ipfw_add_rule_dispatch); 4714 fwmsg.ioc_rule = ioc_rule; 4715 fwmsg.prev_rule = prev; 4716 fwmsg.next_rule = prev == NULL ? NULL : f; 4717 fwmsg.rule_flags = rule_flags; 4718 if (rule_flags & IPFW_RULE_F_CROSSREF) { 4719 fwmsg.cross_rules = kmalloc( 4720 sizeof(struct ip_fw *) * netisr_ncpus, M_TEMP, 4721 M_WAITOK | M_ZERO); 4722 } 4723 4724 netisr_domsg_global(&fwmsg.base); 4725 KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL); 4726 4727 rule = fwmsg.base.lmsg.u.ms_resultp; 4728 KKASSERT(rule != NULL && rule->cpuid == mycpuid); 4729 4730 if (fwmsg.cross_rules != NULL) { 4731 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, 4732 MSGF_PRIORITY, ipfw_crossref_rule_dispatch); 4733 fwmsg.sibling = rule; 4734 netisr_domsg_global(&fwmsg.base); 4735 KKASSERT(fwmsg.sibling == NULL); 4736 4737 kfree(fwmsg.cross_rules, M_TEMP); 4738 4739 #ifdef KLD_MODULE 4740 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1); 4741 #endif 4742 } 4743 4744 DPRINTF("++ installed rule %d, static count now %d\n", 4745 rule->rulenum, static_count); 4746 } 4747 4748 /* 4749 * Free storage associated with a static rule (including derived 4750 * states/tracks). 4751 * The caller is in charge of clearing rule pointers to avoid 4752 * dangling pointers. 4753 * @return a pointer to the next entry. 4754 * Arguments are not checked, so they better be correct. 4755 */ 4756 static struct ip_fw * 4757 ipfw_delete_rule(struct ipfw_context *ctx, 4758 struct ip_fw *prev, struct ip_fw *rule) 4759 { 4760 struct ip_fw *n; 4761 4762 n = rule->next; 4763 if (prev == NULL) 4764 ctx->ipfw_layer3_chain = n; 4765 else 4766 prev->next = n; 4767 4768 /* Mark the rule as invalid */ 4769 rule->rule_flags |= IPFW_RULE_F_INVALID; 4770 rule->next_rule = NULL; 4771 rule->sibling = NULL; 4772 #ifdef foo 4773 /* Don't reset cpuid here; keep various assertion working */ 4774 rule->cpuid = -1; 4775 #endif 4776 4777 /* Statistics only need to be updated once */ 4778 if (mycpuid == 0) 4779 ipfw_dec_static_count(rule); 4780 4781 if ((rule->rule_flags & IPFW_RULE_F_CROSSREF) == 0) { 4782 /* Try to free this rule */ 4783 ipfw_free_rule(rule); 4784 } else { 4785 /* TODO: check staging area. */ 4786 if (mycpuid == 0) { 4787 rule->next = ipfw_gd.ipfw_crossref_free; 4788 ipfw_gd.ipfw_crossref_free = rule; 4789 } 4790 } 4791 4792 /* Return the next rule */ 4793 return n; 4794 } 4795 4796 static void 4797 ipfw_flush_dispatch(netmsg_t nmsg) 4798 { 4799 int kill_default = nmsg->lmsg.u.ms_result; 4800 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4801 struct ip_fw *rule; 4802 4803 ASSERT_NETISR_NCPUS(mycpuid); 4804 4805 /* 4806 * Flush states. 4807 */ 4808 ipfw_state_flush(ctx, NULL); 4809 KASSERT(ctx->ipfw_state_cnt == 0, 4810 ("%d pcpu states remain", ctx->ipfw_state_cnt)); 4811 ctx->ipfw_state_loosecnt = 0; 4812 ctx->ipfw_state_lastexp = 0; 4813 4814 /* 4815 * Flush tracks. 4816 */ 4817 ipfw_track_flush(ctx, NULL); 4818 ctx->ipfw_track_lastexp = 0; 4819 if (ctx->ipfw_trkcnt_spare != NULL) { 4820 kfree(ctx->ipfw_trkcnt_spare, M_IPFW); 4821 ctx->ipfw_trkcnt_spare = NULL; 4822 } 4823 4824 ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */ 4825 4826 while ((rule = ctx->ipfw_layer3_chain) != NULL && 4827 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE)) 4828 ipfw_delete_rule(ctx, NULL, rule); 4829 4830 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4831 } 4832 4833 /* 4834 * Deletes all rules from a chain (including the default rule 4835 * if the second argument is set). 4836 */ 4837 static void 4838 ipfw_flush(int kill_default) 4839 { 4840 struct netmsg_base nmsg; 4841 #ifdef INVARIANTS 4842 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4843 int state_cnt; 4844 #endif 4845 4846 ASSERT_NETISR0; 4847 4848 /* 4849 * If 'kill_default' then caller has done the necessary 4850 * msgport syncing; unnecessary to do it again. 4851 */ 4852 if (!kill_default) { 4853 /* 4854 * Let ipfw_chk() know the rules are going to 4855 * be flushed, so it could jump directly to 4856 * the default rule. 4857 */ 4858 ipfw_flushing = 1; 4859 /* XXX use priority sync */ 4860 netmsg_service_sync(); 4861 } 4862 4863 /* 4864 * Press the 'flush' button 4865 */ 4866 bzero(&nmsg, sizeof(nmsg)); 4867 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4868 ipfw_flush_dispatch); 4869 nmsg.lmsg.u.ms_result = kill_default; 4870 netisr_domsg_global(&nmsg); 4871 ipfw_gd.ipfw_state_loosecnt = 0; 4872 ipfw_gd.ipfw_state_globexp = 0; 4873 ipfw_gd.ipfw_track_globexp = 0; 4874 4875 #ifdef INVARIANTS 4876 state_cnt = ipfw_state_cntcoll(); 4877 KASSERT(state_cnt == 0, ("%d states remain", state_cnt)); 4878 4879 KASSERT(ipfw_gd.ipfw_trkcnt_cnt == 0, 4880 ("%d trkcnts remain", ipfw_gd.ipfw_trkcnt_cnt)); 4881 4882 if (kill_default) { 4883 KASSERT(static_count == 0, 4884 ("%u static rules remain", static_count)); 4885 KASSERT(static_ioc_len == 0, 4886 ("%u bytes of static rules remain", static_ioc_len)); 4887 } else { 4888 KASSERT(static_count == 1, 4889 ("%u static rules remain", static_count)); 4890 KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule), 4891 ("%u bytes of static rules remain, should be %lu", 4892 static_ioc_len, 4893 (u_long)IOC_RULESIZE(ctx->ipfw_default_rule))); 4894 } 4895 #endif 4896 4897 /* Flush is done */ 4898 ipfw_flushing = 0; 4899 } 4900 4901 static void 4902 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg) 4903 { 4904 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4905 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4906 struct ip_fw *rule, *prev; 4907 4908 ASSERT_NETISR_NCPUS(mycpuid); 4909 4910 rule = dmsg->start_rule; 4911 KKASSERT(rule->cpuid == mycpuid); 4912 dmsg->start_rule = rule->sibling; 4913 4914 prev = dmsg->prev_rule; 4915 if (prev != NULL) { 4916 KKASSERT(prev->cpuid == mycpuid); 4917 4918 /* 4919 * Move to the position on the next CPU 4920 * before the msg is forwarded. 4921 */ 4922 dmsg->prev_rule = prev->sibling; 4923 } 4924 4925 /* 4926 * flush pointers outside the loop, then delete all matching 4927 * rules. 'prev' remains the same throughout the cycle. 4928 */ 4929 ipfw_flush_rule_ptrs(ctx); 4930 while (rule && rule->rulenum == dmsg->rulenum) { 4931 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) { 4932 /* Flush states generated by this rule. */ 4933 ipfw_state_flush(ctx, rule); 4934 } 4935 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) { 4936 /* Flush tracks generated by this rule. */ 4937 ipfw_track_flush(ctx, rule); 4938 } 4939 rule = ipfw_delete_rule(ctx, prev, rule); 4940 } 4941 4942 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4943 } 4944 4945 static int 4946 ipfw_alt_delete_rule(uint16_t rulenum) 4947 { 4948 struct ip_fw *prev, *rule; 4949 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4950 struct netmsg_del dmsg; 4951 4952 ASSERT_NETISR0; 4953 4954 /* 4955 * Locate first rule to delete 4956 */ 4957 for (prev = NULL, rule = ctx->ipfw_layer3_chain; 4958 rule && rule->rulenum < rulenum; 4959 prev = rule, rule = rule->next) 4960 ; /* EMPTY */ 4961 if (rule->rulenum != rulenum) 4962 return EINVAL; 4963 4964 /* 4965 * Get rid of the rule duplications on all CPUs 4966 */ 4967 bzero(&dmsg, sizeof(dmsg)); 4968 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4969 ipfw_alt_delete_rule_dispatch); 4970 dmsg.prev_rule = prev; 4971 dmsg.start_rule = rule; 4972 dmsg.rulenum = rulenum; 4973 4974 netisr_domsg_global(&dmsg.base); 4975 KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL); 4976 return 0; 4977 } 4978 4979 static void 4980 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg) 4981 { 4982 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4983 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4984 struct ip_fw *prev, *rule; 4985 #ifdef INVARIANTS 4986 int del = 0; 4987 #endif 4988 4989 ASSERT_NETISR_NCPUS(mycpuid); 4990 4991 ipfw_flush_rule_ptrs(ctx); 4992 4993 prev = NULL; 4994 rule = ctx->ipfw_layer3_chain; 4995 while (rule != NULL) { 4996 if (rule->set == dmsg->from_set) { 4997 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) { 4998 /* Flush states generated by this rule. */ 4999 ipfw_state_flush(ctx, rule); 5000 } 5001 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) { 5002 /* Flush tracks generated by this rule. */ 5003 ipfw_track_flush(ctx, rule); 5004 } 5005 rule = ipfw_delete_rule(ctx, prev, rule); 5006 #ifdef INVARIANTS 5007 del = 1; 5008 #endif 5009 } else { 5010 prev = rule; 5011 rule = rule->next; 5012 } 5013 } 5014 KASSERT(del, ("no match set?!")); 5015 5016 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5017 } 5018 5019 static int 5020 ipfw_alt_delete_ruleset(uint8_t set) 5021 { 5022 struct netmsg_del dmsg; 5023 int del; 5024 struct ip_fw *rule; 5025 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5026 5027 ASSERT_NETISR0; 5028 5029 /* 5030 * Check whether the 'set' exists. If it exists, 5031 * then check whether any rules within the set will 5032 * try to create states. 5033 */ 5034 del = 0; 5035 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5036 if (rule->set == set) 5037 del = 1; 5038 } 5039 if (!del) 5040 return 0; /* XXX EINVAL? */ 5041 5042 /* 5043 * Delete this set 5044 */ 5045 bzero(&dmsg, sizeof(dmsg)); 5046 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5047 ipfw_alt_delete_ruleset_dispatch); 5048 dmsg.from_set = set; 5049 netisr_domsg_global(&dmsg.base); 5050 5051 return 0; 5052 } 5053 5054 static void 5055 ipfw_alt_move_rule_dispatch(netmsg_t nmsg) 5056 { 5057 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5058 struct ip_fw *rule; 5059 5060 ASSERT_NETISR_NCPUS(mycpuid); 5061 5062 rule = dmsg->start_rule; 5063 KKASSERT(rule->cpuid == mycpuid); 5064 5065 /* 5066 * Move to the position on the next CPU 5067 * before the msg is forwarded. 5068 */ 5069 dmsg->start_rule = rule->sibling; 5070 5071 while (rule && rule->rulenum <= dmsg->rulenum) { 5072 if (rule->rulenum == dmsg->rulenum) 5073 rule->set = dmsg->to_set; 5074 rule = rule->next; 5075 } 5076 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5077 } 5078 5079 static int 5080 ipfw_alt_move_rule(uint16_t rulenum, uint8_t set) 5081 { 5082 struct netmsg_del dmsg; 5083 struct netmsg_base *nmsg; 5084 struct ip_fw *rule; 5085 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5086 5087 ASSERT_NETISR0; 5088 5089 /* 5090 * Locate first rule to move 5091 */ 5092 for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum; 5093 rule = rule->next) { 5094 if (rule->rulenum == rulenum && rule->set != set) 5095 break; 5096 } 5097 if (rule == NULL || rule->rulenum > rulenum) 5098 return 0; /* XXX error? */ 5099 5100 bzero(&dmsg, sizeof(dmsg)); 5101 nmsg = &dmsg.base; 5102 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5103 ipfw_alt_move_rule_dispatch); 5104 dmsg.start_rule = rule; 5105 dmsg.rulenum = rulenum; 5106 dmsg.to_set = set; 5107 5108 netisr_domsg_global(nmsg); 5109 KKASSERT(dmsg.start_rule == NULL); 5110 return 0; 5111 } 5112 5113 static void 5114 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg) 5115 { 5116 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5117 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5118 struct ip_fw *rule; 5119 5120 ASSERT_NETISR_NCPUS(mycpuid); 5121 5122 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5123 if (rule->set == dmsg->from_set) 5124 rule->set = dmsg->to_set; 5125 } 5126 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5127 } 5128 5129 static int 5130 ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set) 5131 { 5132 struct netmsg_del dmsg; 5133 struct netmsg_base *nmsg; 5134 5135 ASSERT_NETISR0; 5136 5137 bzero(&dmsg, sizeof(dmsg)); 5138 nmsg = &dmsg.base; 5139 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5140 ipfw_alt_move_ruleset_dispatch); 5141 dmsg.from_set = from_set; 5142 dmsg.to_set = to_set; 5143 5144 netisr_domsg_global(nmsg); 5145 return 0; 5146 } 5147 5148 static void 5149 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg) 5150 { 5151 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5152 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5153 struct ip_fw *rule; 5154 5155 ASSERT_NETISR_NCPUS(mycpuid); 5156 5157 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5158 if (rule->set == dmsg->from_set) 5159 rule->set = dmsg->to_set; 5160 else if (rule->set == dmsg->to_set) 5161 rule->set = dmsg->from_set; 5162 } 5163 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5164 } 5165 5166 static int 5167 ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2) 5168 { 5169 struct netmsg_del dmsg; 5170 struct netmsg_base *nmsg; 5171 5172 ASSERT_NETISR0; 5173 5174 bzero(&dmsg, sizeof(dmsg)); 5175 nmsg = &dmsg.base; 5176 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5177 ipfw_alt_swap_ruleset_dispatch); 5178 dmsg.from_set = set1; 5179 dmsg.to_set = set2; 5180 5181 netisr_domsg_global(nmsg); 5182 return 0; 5183 } 5184 5185 /* 5186 * Remove all rules with given number, and also do set manipulation. 5187 * 5188 * The argument is an uint32_t. The low 16 bit are the rule or set number, 5189 * the next 8 bits are the new set, the top 8 bits are the command: 5190 * 5191 * 0 delete rules with given number 5192 * 1 delete rules with given set number 5193 * 2 move rules with given number to new set 5194 * 3 move rules with given set number to new set 5195 * 4 swap sets with given numbers 5196 */ 5197 static int 5198 ipfw_ctl_alter(uint32_t arg) 5199 { 5200 uint16_t rulenum; 5201 uint8_t cmd, new_set; 5202 int error = 0; 5203 5204 ASSERT_NETISR0; 5205 5206 rulenum = arg & 0xffff; 5207 cmd = (arg >> 24) & 0xff; 5208 new_set = (arg >> 16) & 0xff; 5209 5210 if (cmd > 4) 5211 return EINVAL; 5212 if (new_set >= IPFW_DEFAULT_SET) 5213 return EINVAL; 5214 if (cmd == 0 || cmd == 2) { 5215 if (rulenum == IPFW_DEFAULT_RULE) 5216 return EINVAL; 5217 } else { 5218 if (rulenum >= IPFW_DEFAULT_SET) 5219 return EINVAL; 5220 } 5221 5222 switch (cmd) { 5223 case 0: /* delete rules with given number */ 5224 error = ipfw_alt_delete_rule(rulenum); 5225 break; 5226 5227 case 1: /* delete all rules with given set number */ 5228 error = ipfw_alt_delete_ruleset(rulenum); 5229 break; 5230 5231 case 2: /* move rules with given number to new set */ 5232 error = ipfw_alt_move_rule(rulenum, new_set); 5233 break; 5234 5235 case 3: /* move rules with given set number to new set */ 5236 error = ipfw_alt_move_ruleset(rulenum, new_set); 5237 break; 5238 5239 case 4: /* swap two sets */ 5240 error = ipfw_alt_swap_ruleset(rulenum, new_set); 5241 break; 5242 } 5243 return error; 5244 } 5245 5246 /* 5247 * Clear counters for a specific rule. 5248 */ 5249 static void 5250 clear_counters(struct ip_fw *rule, int log_only) 5251 { 5252 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); 5253 5254 if (log_only == 0) { 5255 rule->bcnt = rule->pcnt = 0; 5256 rule->timestamp = 0; 5257 } 5258 if (l->o.opcode == O_LOG) 5259 l->log_left = l->max_log; 5260 } 5261 5262 static void 5263 ipfw_zero_entry_dispatch(netmsg_t nmsg) 5264 { 5265 struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg; 5266 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5267 struct ip_fw *rule; 5268 5269 ASSERT_NETISR_NCPUS(mycpuid); 5270 5271 if (zmsg->rulenum == 0) { 5272 KKASSERT(zmsg->start_rule == NULL); 5273 5274 ctx->ipfw_norule_counter = 0; 5275 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 5276 clear_counters(rule, zmsg->log_only); 5277 } else { 5278 struct ip_fw *start = zmsg->start_rule; 5279 5280 KKASSERT(start->cpuid == mycpuid); 5281 KKASSERT(start->rulenum == zmsg->rulenum); 5282 5283 /* 5284 * We can have multiple rules with the same number, so we 5285 * need to clear them all. 5286 */ 5287 for (rule = start; rule && rule->rulenum == zmsg->rulenum; 5288 rule = rule->next) 5289 clear_counters(rule, zmsg->log_only); 5290 5291 /* 5292 * Move to the position on the next CPU 5293 * before the msg is forwarded. 5294 */ 5295 zmsg->start_rule = start->sibling; 5296 } 5297 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5298 } 5299 5300 /* 5301 * Reset some or all counters on firewall rules. 5302 * @arg frwl is null to clear all entries, or contains a specific 5303 * rule number. 5304 * @arg log_only is 1 if we only want to reset logs, zero otherwise. 5305 */ 5306 static int 5307 ipfw_ctl_zero_entry(int rulenum, int log_only) 5308 { 5309 struct netmsg_zent zmsg; 5310 struct netmsg_base *nmsg; 5311 const char *msg; 5312 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5313 5314 ASSERT_NETISR0; 5315 5316 bzero(&zmsg, sizeof(zmsg)); 5317 nmsg = &zmsg.base; 5318 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5319 ipfw_zero_entry_dispatch); 5320 zmsg.log_only = log_only; 5321 5322 if (rulenum == 0) { 5323 msg = log_only ? "ipfw: All logging counts reset.\n" 5324 : "ipfw: Accounting cleared.\n"; 5325 } else { 5326 struct ip_fw *rule; 5327 5328 /* 5329 * Locate the first rule with 'rulenum' 5330 */ 5331 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5332 if (rule->rulenum == rulenum) 5333 break; 5334 } 5335 if (rule == NULL) /* we did not find any matching rules */ 5336 return (EINVAL); 5337 zmsg.start_rule = rule; 5338 zmsg.rulenum = rulenum; 5339 5340 msg = log_only ? "ipfw: Entry %d logging count reset.\n" 5341 : "ipfw: Entry %d cleared.\n"; 5342 } 5343 netisr_domsg_global(nmsg); 5344 KKASSERT(zmsg.start_rule == NULL); 5345 5346 if (fw_verbose) 5347 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); 5348 return (0); 5349 } 5350 5351 /* 5352 * Check validity of the structure before insert. 5353 * Fortunately rules are simple, so this mostly need to check rule sizes. 5354 */ 5355 static int 5356 ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags) 5357 { 5358 int l, cmdlen = 0; 5359 int have_action = 0; 5360 ipfw_insn *cmd; 5361 5362 *rule_flags = 0; 5363 5364 /* Check for valid size */ 5365 if (size < sizeof(*rule)) { 5366 kprintf("ipfw: rule too short\n"); 5367 return EINVAL; 5368 } 5369 l = IOC_RULESIZE(rule); 5370 if (l != size) { 5371 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l); 5372 return EINVAL; 5373 } 5374 5375 /* Check rule number */ 5376 if (rule->rulenum == IPFW_DEFAULT_RULE) { 5377 kprintf("ipfw: invalid rule number\n"); 5378 return EINVAL; 5379 } 5380 5381 /* 5382 * Now go for the individual checks. Very simple ones, basically only 5383 * instruction sizes. 5384 */ 5385 for (l = rule->cmd_len, cmd = rule->cmd; l > 0; 5386 l -= cmdlen, cmd += cmdlen) { 5387 cmdlen = F_LEN(cmd); 5388 if (cmdlen > l) { 5389 kprintf("ipfw: opcode %d size truncated\n", 5390 cmd->opcode); 5391 return EINVAL; 5392 } 5393 5394 DPRINTF("ipfw: opcode %d\n", cmd->opcode); 5395 5396 if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT || 5397 IPFW_ISXLAT(cmd->opcode)) { 5398 /* This rule will generate states. */ 5399 *rule_flags |= IPFW_RULE_F_GENSTATE; 5400 if (cmd->opcode == O_LIMIT) 5401 *rule_flags |= IPFW_RULE_F_GENTRACK; 5402 } 5403 if (cmd->opcode == O_DEFRAG || IPFW_ISXLAT(cmd->opcode)) 5404 *rule_flags |= IPFW_RULE_F_CROSSREF; 5405 if (cmd->opcode == O_IP_SRC_IFIP || 5406 cmd->opcode == O_IP_DST_IFIP) { 5407 *rule_flags |= IPFW_RULE_F_DYNIFADDR; 5408 cmd->arg1 &= IPFW_IFIP_SETTINGS; 5409 } 5410 5411 switch (cmd->opcode) { 5412 case O_NOP: 5413 case O_PROBE_STATE: 5414 case O_KEEP_STATE: 5415 case O_PROTO: 5416 case O_IP_SRC_ME: 5417 case O_IP_DST_ME: 5418 case O_LAYER2: 5419 case O_IN: 5420 case O_FRAG: 5421 case O_IPFRAG: 5422 case O_IPOPT: 5423 case O_IPLEN: 5424 case O_IPID: 5425 case O_IPTOS: 5426 case O_IPPRECEDENCE: 5427 case O_IPTTL: 5428 case O_IPVER: 5429 case O_TCPWIN: 5430 case O_TCPFLAGS: 5431 case O_TCPOPTS: 5432 case O_ESTAB: 5433 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5434 goto bad_size; 5435 break; 5436 5437 case O_IP_SRC_TABLE: 5438 case O_IP_DST_TABLE: 5439 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5440 goto bad_size; 5441 if (cmd->arg1 >= ipfw_table_max) { 5442 kprintf("ipfw: invalid table id %u, max %d\n", 5443 cmd->arg1, ipfw_table_max); 5444 return EINVAL; 5445 } 5446 break; 5447 5448 case O_IP_SRC_IFIP: 5449 case O_IP_DST_IFIP: 5450 if (cmdlen != F_INSN_SIZE(ipfw_insn_ifip)) 5451 goto bad_size; 5452 break; 5453 5454 case O_ICMPCODE: 5455 case O_ICMPTYPE: 5456 if (cmdlen < F_INSN_SIZE(ipfw_insn_u32)) 5457 goto bad_size; 5458 break; 5459 5460 case O_UID: 5461 case O_GID: 5462 case O_IP_SRC: 5463 case O_IP_DST: 5464 case O_TCPSEQ: 5465 case O_TCPACK: 5466 case O_PROB: 5467 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 5468 goto bad_size; 5469 break; 5470 5471 case O_LIMIT: 5472 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) 5473 goto bad_size; 5474 break; 5475 case O_REDIRECT: 5476 if (cmdlen != F_INSN_SIZE(ipfw_insn_rdr)) 5477 goto bad_size; 5478 break; 5479 5480 case O_LOG: 5481 if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) 5482 goto bad_size; 5483 5484 ((ipfw_insn_log *)cmd)->log_left = 5485 ((ipfw_insn_log *)cmd)->max_log; 5486 5487 break; 5488 5489 case O_IP_SRC_MASK: 5490 case O_IP_DST_MASK: 5491 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip)) 5492 goto bad_size; 5493 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) { 5494 kprintf("ipfw: opcode %d, useless rule\n", 5495 cmd->opcode); 5496 return EINVAL; 5497 } 5498 break; 5499 5500 case O_IP_SRC_SET: 5501 case O_IP_DST_SET: 5502 if (cmd->arg1 == 0 || cmd->arg1 > 256) { 5503 kprintf("ipfw: invalid set size %d\n", 5504 cmd->arg1); 5505 return EINVAL; 5506 } 5507 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 5508 (cmd->arg1+31)/32 ) 5509 goto bad_size; 5510 break; 5511 5512 case O_MACADDR2: 5513 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) 5514 goto bad_size; 5515 break; 5516 5517 case O_MAC_TYPE: 5518 case O_IP_SRCPORT: 5519 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ 5520 if (cmdlen < 2 || cmdlen > 31) 5521 goto bad_size; 5522 break; 5523 5524 case O_RECV: 5525 case O_XMIT: 5526 case O_VIA: 5527 if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) 5528 goto bad_size; 5529 break; 5530 5531 case O_PIPE: 5532 case O_QUEUE: 5533 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) 5534 goto bad_size; 5535 goto check_action; 5536 5537 case O_FORWARD_IP: 5538 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) { 5539 goto bad_size; 5540 } else { 5541 in_addr_t fwd_addr; 5542 5543 fwd_addr = ((ipfw_insn_sa *)cmd)-> 5544 sa.sin_addr.s_addr; 5545 if (IN_MULTICAST(ntohl(fwd_addr))) { 5546 kprintf("ipfw: try forwarding to " 5547 "multicast address\n"); 5548 return EINVAL; 5549 } 5550 } 5551 goto check_action; 5552 5553 case O_FORWARD_MAC: /* XXX not implemented yet */ 5554 case O_CHECK_STATE: 5555 case O_COUNT: 5556 case O_ACCEPT: 5557 case O_DENY: 5558 case O_REJECT: 5559 case O_SKIPTO: 5560 case O_DIVERT: 5561 case O_TEE: 5562 case O_DEFRAG: 5563 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5564 goto bad_size; 5565 check_action: 5566 if (have_action) { 5567 kprintf("ipfw: opcode %d, multiple actions" 5568 " not allowed\n", 5569 cmd->opcode); 5570 return EINVAL; 5571 } 5572 have_action = 1; 5573 if (l != cmdlen) { 5574 kprintf("ipfw: opcode %d, action must be" 5575 " last opcode\n", 5576 cmd->opcode); 5577 return EINVAL; 5578 } 5579 break; 5580 default: 5581 kprintf("ipfw: opcode %d, unknown opcode\n", 5582 cmd->opcode); 5583 return EINVAL; 5584 } 5585 } 5586 if (have_action == 0) { 5587 kprintf("ipfw: missing action\n"); 5588 return EINVAL; 5589 } 5590 return 0; 5591 5592 bad_size: 5593 kprintf("ipfw: opcode %d size %d wrong\n", 5594 cmd->opcode, cmdlen); 5595 return EINVAL; 5596 } 5597 5598 static int 5599 ipfw_ctl_add_rule(struct sockopt *sopt) 5600 { 5601 struct ipfw_ioc_rule *ioc_rule; 5602 size_t size; 5603 uint32_t rule_flags; 5604 int error; 5605 5606 ASSERT_NETISR0; 5607 5608 size = sopt->sopt_valsize; 5609 if (size > (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX) || 5610 size < sizeof(*ioc_rule)) { 5611 return EINVAL; 5612 } 5613 if (size != (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX)) { 5614 sopt->sopt_val = krealloc(sopt->sopt_val, sizeof(uint32_t) * 5615 IPFW_RULE_SIZE_MAX, M_TEMP, M_WAITOK); 5616 } 5617 ioc_rule = sopt->sopt_val; 5618 5619 error = ipfw_check_ioc_rule(ioc_rule, size, &rule_flags); 5620 if (error) 5621 return error; 5622 5623 ipfw_add_rule(ioc_rule, rule_flags); 5624 5625 if (sopt->sopt_dir == SOPT_GET) 5626 sopt->sopt_valsize = IOC_RULESIZE(ioc_rule); 5627 return 0; 5628 } 5629 5630 static void * 5631 ipfw_copy_rule(const struct ipfw_context *ctx, const struct ip_fw *rule, 5632 struct ipfw_ioc_rule *ioc_rule) 5633 { 5634 const struct ip_fw *sibling; 5635 #ifdef INVARIANTS 5636 int i; 5637 #endif 5638 5639 ASSERT_NETISR0; 5640 KASSERT(rule->cpuid == 0, ("rule does not belong to cpu0")); 5641 5642 ioc_rule->act_ofs = rule->act_ofs; 5643 ioc_rule->cmd_len = rule->cmd_len; 5644 ioc_rule->rulenum = rule->rulenum; 5645 ioc_rule->set = rule->set; 5646 ioc_rule->usr_flags = rule->usr_flags; 5647 5648 ioc_rule->set_disable = ctx->ipfw_set_disable; 5649 ioc_rule->static_count = static_count; 5650 ioc_rule->static_len = static_ioc_len; 5651 5652 /* 5653 * Visit (read-only) all of the rule's duplications to get 5654 * the necessary statistics 5655 */ 5656 #ifdef INVARIANTS 5657 i = 0; 5658 #endif 5659 ioc_rule->pcnt = 0; 5660 ioc_rule->bcnt = 0; 5661 ioc_rule->timestamp = 0; 5662 for (sibling = rule; sibling != NULL; sibling = sibling->sibling) { 5663 ioc_rule->pcnt += sibling->pcnt; 5664 ioc_rule->bcnt += sibling->bcnt; 5665 if (sibling->timestamp > ioc_rule->timestamp) 5666 ioc_rule->timestamp = sibling->timestamp; 5667 #ifdef INVARIANTS 5668 ++i; 5669 #endif 5670 } 5671 KASSERT(i == netisr_ncpus, 5672 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus)); 5673 5674 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */); 5675 5676 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule)); 5677 } 5678 5679 static boolean_t 5680 ipfw_track_copy(const struct ipfw_trkcnt *trk, struct ipfw_ioc_state *ioc_state) 5681 { 5682 struct ipfw_ioc_flowid *ioc_id; 5683 5684 if (trk->tc_expire == 0) { 5685 /* Not a scanned one. */ 5686 return (FALSE); 5687 } 5688 5689 ioc_state->expire = TIME_LEQ(trk->tc_expire, time_uptime) ? 5690 0 : trk->tc_expire - time_uptime; 5691 ioc_state->pcnt = 0; 5692 ioc_state->bcnt = 0; 5693 5694 ioc_state->dyn_type = O_LIMIT_PARENT; 5695 ioc_state->count = trk->tc_count; 5696 5697 ioc_state->rulenum = trk->tc_rulenum; 5698 5699 ioc_id = &ioc_state->id; 5700 ioc_id->type = ETHERTYPE_IP; 5701 ioc_id->u.ip.proto = trk->tc_proto; 5702 ioc_id->u.ip.src_ip = trk->tc_saddr; 5703 ioc_id->u.ip.dst_ip = trk->tc_daddr; 5704 ioc_id->u.ip.src_port = trk->tc_sport; 5705 ioc_id->u.ip.dst_port = trk->tc_dport; 5706 5707 return (TRUE); 5708 } 5709 5710 static boolean_t 5711 ipfw_state_copy(const struct ipfw_state *s, struct ipfw_ioc_state *ioc_state) 5712 { 5713 struct ipfw_ioc_flowid *ioc_id; 5714 5715 if (IPFW_STATE_SCANSKIP(s)) 5716 return (FALSE); 5717 5718 ioc_state->expire = TIME_LEQ(s->st_expire, time_uptime) ? 5719 0 : s->st_expire - time_uptime; 5720 ioc_state->pcnt = s->st_pcnt; 5721 ioc_state->bcnt = s->st_bcnt; 5722 5723 ioc_state->dyn_type = s->st_type; 5724 ioc_state->count = 0; 5725 5726 ioc_state->rulenum = s->st_rule->rulenum; 5727 5728 ioc_id = &ioc_state->id; 5729 ioc_id->type = ETHERTYPE_IP; 5730 ioc_id->u.ip.proto = s->st_proto; 5731 ipfw_key_4tuple(&s->st_key, 5732 &ioc_id->u.ip.src_ip, &ioc_id->u.ip.src_port, 5733 &ioc_id->u.ip.dst_ip, &ioc_id->u.ip.dst_port); 5734 5735 if (IPFW_ISXLAT(s->st_type)) { 5736 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s; 5737 5738 if (x->xlat_port == 0) 5739 ioc_state->xlat_port = ioc_id->u.ip.dst_port; 5740 else 5741 ioc_state->xlat_port = ntohs(x->xlat_port); 5742 ioc_state->xlat_addr = ntohl(x->xlat_addr); 5743 5744 ioc_state->pcnt += x->xlat_pair->xlat_pcnt; 5745 ioc_state->bcnt += x->xlat_pair->xlat_bcnt; 5746 } 5747 5748 return (TRUE); 5749 } 5750 5751 static void 5752 ipfw_state_copy_dispatch(netmsg_t nmsg) 5753 { 5754 struct netmsg_cpstate *nm = (struct netmsg_cpstate *)nmsg; 5755 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5756 const struct ipfw_state *s; 5757 const struct ipfw_track *t; 5758 5759 ASSERT_NETISR_NCPUS(mycpuid); 5760 KASSERT(nm->state_cnt < nm->state_cntmax, 5761 ("invalid state count %d, max %d", 5762 nm->state_cnt, nm->state_cntmax)); 5763 5764 TAILQ_FOREACH(s, &ctx->ipfw_state_list, st_link) { 5765 if (ipfw_state_copy(s, nm->ioc_state)) { 5766 nm->ioc_state++; 5767 nm->state_cnt++; 5768 if (nm->state_cnt == nm->state_cntmax) 5769 goto done; 5770 } 5771 } 5772 5773 /* 5774 * Prepare tracks in the global track tree for userland. 5775 */ 5776 TAILQ_FOREACH(t, &ctx->ipfw_track_list, t_link) { 5777 struct ipfw_trkcnt *trk; 5778 5779 if (t->t_count == NULL) /* anchor */ 5780 continue; 5781 trk = t->t_trkcnt; 5782 5783 /* 5784 * Only one netisr can run this function at 5785 * any time, and only this function accesses 5786 * trkcnt's tc_expire, so this is safe w/o 5787 * ipfw_gd.ipfw_trkcnt_token. 5788 */ 5789 if (trk->tc_expire > t->t_expire) 5790 continue; 5791 trk->tc_expire = t->t_expire; 5792 } 5793 5794 /* 5795 * Copy tracks in the global track tree to userland in 5796 * the last netisr. 5797 */ 5798 if (mycpuid == netisr_ncpus - 1) { 5799 struct ipfw_trkcnt *trk; 5800 5801 KASSERT(nm->state_cnt < nm->state_cntmax, 5802 ("invalid state count %d, max %d", 5803 nm->state_cnt, nm->state_cntmax)); 5804 5805 IPFW_TRKCNT_TOKGET; 5806 RB_FOREACH(trk, ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree) { 5807 if (ipfw_track_copy(trk, nm->ioc_state)) { 5808 nm->ioc_state++; 5809 nm->state_cnt++; 5810 if (nm->state_cnt == nm->state_cntmax) { 5811 IPFW_TRKCNT_TOKREL; 5812 goto done; 5813 } 5814 } 5815 } 5816 IPFW_TRKCNT_TOKREL; 5817 } 5818 done: 5819 if (nm->state_cnt == nm->state_cntmax) { 5820 /* No more space; done. */ 5821 netisr_replymsg(&nm->base, 0); 5822 } else { 5823 netisr_forwardmsg(&nm->base, mycpuid + 1); 5824 } 5825 } 5826 5827 static int 5828 ipfw_ctl_get_rules(struct sockopt *sopt) 5829 { 5830 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5831 struct ip_fw *rule; 5832 void *bp; 5833 size_t size; 5834 int state_cnt; 5835 5836 ASSERT_NETISR0; 5837 5838 /* 5839 * pass up a copy of the current rules. Static rules 5840 * come first (the last of which has number IPFW_DEFAULT_RULE), 5841 * followed by a possibly empty list of states. 5842 */ 5843 5844 size = static_ioc_len; /* size of static rules */ 5845 5846 /* 5847 * Size of the states. 5848 * XXX take tracks as state for userland compat. 5849 */ 5850 state_cnt = ipfw_state_cntcoll() + ipfw_gd.ipfw_trkcnt_cnt; 5851 state_cnt = (state_cnt * 5) / 4; /* leave 25% headroom */ 5852 size += state_cnt * sizeof(struct ipfw_ioc_state); 5853 5854 if (sopt->sopt_valsize < size) { 5855 /* short length, no need to return incomplete rules */ 5856 /* XXX: if superuser, no need to zero buffer */ 5857 bzero(sopt->sopt_val, sopt->sopt_valsize); 5858 return 0; 5859 } 5860 bp = sopt->sopt_val; 5861 5862 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 5863 bp = ipfw_copy_rule(ctx, rule, bp); 5864 5865 if (state_cnt) { 5866 struct netmsg_cpstate nm; 5867 #ifdef INVARIANTS 5868 size_t old_size = size; 5869 #endif 5870 5871 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 5872 MSGF_PRIORITY, ipfw_state_copy_dispatch); 5873 nm.ioc_state = bp; 5874 nm.state_cntmax = state_cnt; 5875 nm.state_cnt = 0; 5876 netisr_domsg_global(&nm.base); 5877 5878 /* 5879 * The # of states may be shrinked after the snapshot 5880 * of the state count was taken. To give user a correct 5881 * state count, nm->state_cnt is used to recalculate 5882 * the actual size. 5883 */ 5884 size = static_ioc_len + 5885 (nm.state_cnt * sizeof(struct ipfw_ioc_state)); 5886 KKASSERT(size <= old_size); 5887 } 5888 5889 sopt->sopt_valsize = size; 5890 return 0; 5891 } 5892 5893 static void 5894 ipfw_set_disable_dispatch(netmsg_t nmsg) 5895 { 5896 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5897 5898 ASSERT_NETISR_NCPUS(mycpuid); 5899 5900 ctx->ipfw_set_disable = nmsg->lmsg.u.ms_result32; 5901 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5902 } 5903 5904 static void 5905 ipfw_ctl_set_disable(uint32_t disable, uint32_t enable) 5906 { 5907 struct netmsg_base nmsg; 5908 uint32_t set_disable; 5909 5910 ASSERT_NETISR0; 5911 5912 /* IPFW_DEFAULT_SET is always enabled */ 5913 enable |= (1 << IPFW_DEFAULT_SET); 5914 set_disable = (ipfw_ctx[mycpuid]->ipfw_set_disable | disable) & ~enable; 5915 5916 bzero(&nmsg, sizeof(nmsg)); 5917 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5918 ipfw_set_disable_dispatch); 5919 nmsg.lmsg.u.ms_result32 = set_disable; 5920 5921 netisr_domsg_global(&nmsg); 5922 } 5923 5924 static void 5925 ipfw_table_create_dispatch(netmsg_t nm) 5926 { 5927 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5928 int tblid = nm->lmsg.u.ms_result; 5929 5930 ASSERT_NETISR_NCPUS(mycpuid); 5931 5932 if (!rn_inithead((void **)&ctx->ipfw_tables[tblid], 5933 rn_cpumaskhead(mycpuid), 32)) 5934 panic("ipfw: create table%d failed", tblid); 5935 5936 netisr_forwardmsg(&nm->base, mycpuid + 1); 5937 } 5938 5939 static int 5940 ipfw_table_create(struct sockopt *sopt) 5941 { 5942 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5943 struct ipfw_ioc_table *tbl; 5944 struct netmsg_base nm; 5945 5946 ASSERT_NETISR0; 5947 5948 if (sopt->sopt_valsize != sizeof(*tbl)) 5949 return (EINVAL); 5950 5951 tbl = sopt->sopt_val; 5952 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 5953 return (EINVAL); 5954 5955 if (ctx->ipfw_tables[tbl->tableid] != NULL) 5956 return (EEXIST); 5957 5958 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5959 ipfw_table_create_dispatch); 5960 nm.lmsg.u.ms_result = tbl->tableid; 5961 netisr_domsg_global(&nm); 5962 5963 return (0); 5964 } 5965 5966 static void 5967 ipfw_table_killrn(struct radix_node_head *rnh, struct radix_node *rn) 5968 { 5969 struct radix_node *ret; 5970 5971 ret = rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); 5972 if (ret != rn) 5973 panic("deleted other table entry"); 5974 kfree(ret, M_IPFW); 5975 } 5976 5977 static int 5978 ipfw_table_killent(struct radix_node *rn, void *xrnh) 5979 { 5980 5981 ipfw_table_killrn(xrnh, rn); 5982 return (0); 5983 } 5984 5985 static void 5986 ipfw_table_flush_oncpu(struct ipfw_context *ctx, int tableid, 5987 int destroy) 5988 { 5989 struct radix_node_head *rnh; 5990 5991 ASSERT_NETISR_NCPUS(mycpuid); 5992 5993 rnh = ctx->ipfw_tables[tableid]; 5994 rnh->rnh_walktree(rnh, ipfw_table_killent, rnh); 5995 if (destroy) { 5996 Free(rnh); 5997 ctx->ipfw_tables[tableid] = NULL; 5998 } 5999 } 6000 6001 static void 6002 ipfw_table_flush_dispatch(netmsg_t nmsg) 6003 { 6004 struct netmsg_tblflush *nm = (struct netmsg_tblflush *)nmsg; 6005 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6006 6007 ASSERT_NETISR_NCPUS(mycpuid); 6008 6009 ipfw_table_flush_oncpu(ctx, nm->tableid, nm->destroy); 6010 netisr_forwardmsg(&nm->base, mycpuid + 1); 6011 } 6012 6013 static void 6014 ipfw_table_flushall_oncpu(struct ipfw_context *ctx, int destroy) 6015 { 6016 int i; 6017 6018 ASSERT_NETISR_NCPUS(mycpuid); 6019 6020 for (i = 0; i < ipfw_table_max; ++i) { 6021 if (ctx->ipfw_tables[i] != NULL) 6022 ipfw_table_flush_oncpu(ctx, i, destroy); 6023 } 6024 } 6025 6026 static void 6027 ipfw_table_flushall_dispatch(netmsg_t nmsg) 6028 { 6029 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6030 6031 ASSERT_NETISR_NCPUS(mycpuid); 6032 6033 ipfw_table_flushall_oncpu(ctx, 0); 6034 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6035 } 6036 6037 static int 6038 ipfw_table_flush(struct sockopt *sopt) 6039 { 6040 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6041 struct ipfw_ioc_table *tbl; 6042 struct netmsg_tblflush nm; 6043 6044 ASSERT_NETISR0; 6045 6046 if (sopt->sopt_valsize != sizeof(*tbl)) 6047 return (EINVAL); 6048 6049 tbl = sopt->sopt_val; 6050 if (sopt->sopt_name == IP_FW_TBL_FLUSH && tbl->tableid < 0) { 6051 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6052 MSGF_PRIORITY, ipfw_table_flushall_dispatch); 6053 netisr_domsg_global(&nm.base); 6054 return (0); 6055 } 6056 6057 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 6058 return (EINVAL); 6059 6060 if (ctx->ipfw_tables[tbl->tableid] == NULL) 6061 return (ENOENT); 6062 6063 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6064 ipfw_table_flush_dispatch); 6065 nm.tableid = tbl->tableid; 6066 nm.destroy = 0; 6067 if (sopt->sopt_name == IP_FW_TBL_DESTROY) 6068 nm.destroy = 1; 6069 netisr_domsg_global(&nm.base); 6070 6071 return (0); 6072 } 6073 6074 static int 6075 ipfw_table_cntent(struct radix_node *rn __unused, void *xcnt) 6076 { 6077 int *cnt = xcnt; 6078 6079 (*cnt)++; 6080 return (0); 6081 } 6082 6083 static int 6084 ipfw_table_cpent(struct radix_node *rn, void *xcp) 6085 { 6086 struct ipfw_table_cp *cp = xcp; 6087 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6088 struct ipfw_ioc_tblent *ioc_te; 6089 #ifdef INVARIANTS 6090 int cnt; 6091 #endif 6092 6093 KASSERT(cp->te_idx < cp->te_cnt, ("invalid table cp idx %d, cnt %d", 6094 cp->te_idx, cp->te_cnt)); 6095 ioc_te = &cp->te[cp->te_idx]; 6096 6097 if (te->te_nodes->rn_mask != NULL) { 6098 memcpy(&ioc_te->netmask, te->te_nodes->rn_mask, 6099 *te->te_nodes->rn_mask); 6100 } else { 6101 ioc_te->netmask.sin_len = 0; 6102 } 6103 memcpy(&ioc_te->key, &te->te_key, sizeof(ioc_te->key)); 6104 6105 ioc_te->use = te->te_use; 6106 ioc_te->last_used = te->te_lastuse; 6107 #ifdef INVARIANTS 6108 cnt = 1; 6109 #endif 6110 6111 while ((te = te->te_sibling) != NULL) { 6112 #ifdef INVARIANTS 6113 ++cnt; 6114 #endif 6115 ioc_te->use += te->te_use; 6116 if (te->te_lastuse > ioc_te->last_used) 6117 ioc_te->last_used = te->te_lastuse; 6118 } 6119 KASSERT(cnt == netisr_ncpus, 6120 ("invalid # of tblent %d, should be %d", cnt, netisr_ncpus)); 6121 6122 cp->te_idx++; 6123 6124 return (0); 6125 } 6126 6127 static int 6128 ipfw_table_get(struct sockopt *sopt) 6129 { 6130 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6131 struct radix_node_head *rnh; 6132 struct ipfw_ioc_table *tbl; 6133 struct ipfw_ioc_tblcont *cont; 6134 struct ipfw_table_cp cp; 6135 int cnt = 0, sz; 6136 6137 ASSERT_NETISR0; 6138 6139 if (sopt->sopt_valsize < sizeof(*tbl)) 6140 return (EINVAL); 6141 6142 tbl = sopt->sopt_val; 6143 if (tbl->tableid < 0) { 6144 struct ipfw_ioc_tbllist *list; 6145 int i; 6146 6147 /* 6148 * List available table ids. 6149 */ 6150 for (i = 0; i < ipfw_table_max; ++i) { 6151 if (ctx->ipfw_tables[i] != NULL) 6152 ++cnt; 6153 } 6154 6155 sz = __offsetof(struct ipfw_ioc_tbllist, tables[cnt]); 6156 if (sopt->sopt_valsize < sz) { 6157 bzero(sopt->sopt_val, sopt->sopt_valsize); 6158 return (E2BIG); 6159 } 6160 list = sopt->sopt_val; 6161 list->tablecnt = cnt; 6162 6163 cnt = 0; 6164 for (i = 0; i < ipfw_table_max; ++i) { 6165 if (ctx->ipfw_tables[i] != NULL) { 6166 KASSERT(cnt < list->tablecnt, 6167 ("invalid idx %d, cnt %d", 6168 cnt, list->tablecnt)); 6169 list->tables[cnt++] = i; 6170 } 6171 } 6172 sopt->sopt_valsize = sz; 6173 return (0); 6174 } else if (tbl->tableid >= ipfw_table_max) { 6175 return (EINVAL); 6176 } 6177 6178 rnh = ctx->ipfw_tables[tbl->tableid]; 6179 if (rnh == NULL) 6180 return (ENOENT); 6181 rnh->rnh_walktree(rnh, ipfw_table_cntent, &cnt); 6182 6183 sz = __offsetof(struct ipfw_ioc_tblcont, ent[cnt]); 6184 if (sopt->sopt_valsize < sz) { 6185 bzero(sopt->sopt_val, sopt->sopt_valsize); 6186 return (E2BIG); 6187 } 6188 cont = sopt->sopt_val; 6189 cont->entcnt = cnt; 6190 6191 cp.te = cont->ent; 6192 cp.te_idx = 0; 6193 cp.te_cnt = cnt; 6194 rnh->rnh_walktree(rnh, ipfw_table_cpent, &cp); 6195 6196 sopt->sopt_valsize = sz; 6197 return (0); 6198 } 6199 6200 static void 6201 ipfw_table_add_dispatch(netmsg_t nmsg) 6202 { 6203 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; 6204 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6205 struct radix_node_head *rnh; 6206 struct ipfw_tblent *te; 6207 6208 ASSERT_NETISR_NCPUS(mycpuid); 6209 6210 rnh = ctx->ipfw_tables[nm->tableid]; 6211 6212 te = kmalloc(sizeof(*te), M_IPFW, M_WAITOK | M_ZERO); 6213 te->te_nodes->rn_key = (char *)&te->te_key; 6214 memcpy(&te->te_key, nm->key, sizeof(te->te_key)); 6215 6216 if (rnh->rnh_addaddr((char *)&te->te_key, (char *)nm->netmask, rnh, 6217 te->te_nodes) == NULL) { 6218 if (mycpuid == 0) { 6219 kfree(te, M_IPFW); 6220 netisr_replymsg(&nm->base, EEXIST); 6221 return; 6222 } 6223 panic("rnh_addaddr failed"); 6224 } 6225 6226 /* Link siblings. */ 6227 if (nm->sibling != NULL) 6228 nm->sibling->te_sibling = te; 6229 nm->sibling = te; 6230 6231 netisr_forwardmsg(&nm->base, mycpuid + 1); 6232 } 6233 6234 static void 6235 ipfw_table_del_dispatch(netmsg_t nmsg) 6236 { 6237 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; 6238 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6239 struct radix_node_head *rnh; 6240 struct radix_node *rn; 6241 6242 ASSERT_NETISR_NCPUS(mycpuid); 6243 6244 rnh = ctx->ipfw_tables[nm->tableid]; 6245 rn = rnh->rnh_deladdr((char *)nm->key, (char *)nm->netmask, rnh); 6246 if (rn == NULL) { 6247 if (mycpuid == 0) { 6248 netisr_replymsg(&nm->base, ESRCH); 6249 return; 6250 } 6251 panic("rnh_deladdr failed"); 6252 } 6253 kfree(rn, M_IPFW); 6254 6255 netisr_forwardmsg(&nm->base, mycpuid + 1); 6256 } 6257 6258 static int 6259 ipfw_table_alt(struct sockopt *sopt) 6260 { 6261 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6262 struct ipfw_ioc_tblcont *tbl; 6263 struct ipfw_ioc_tblent *te; 6264 struct sockaddr_in key0; 6265 struct sockaddr *netmask = NULL, *key; 6266 struct netmsg_tblent nm; 6267 6268 ASSERT_NETISR0; 6269 6270 if (sopt->sopt_valsize != sizeof(*tbl)) 6271 return (EINVAL); 6272 tbl = sopt->sopt_val; 6273 6274 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 6275 return (EINVAL); 6276 if (tbl->entcnt != 1) 6277 return (EINVAL); 6278 6279 if (ctx->ipfw_tables[tbl->tableid] == NULL) 6280 return (ENOENT); 6281 te = &tbl->ent[0]; 6282 6283 if (te->key.sin_family != AF_INET || 6284 te->key.sin_port != 0 || 6285 te->key.sin_len != sizeof(struct sockaddr_in)) 6286 return (EINVAL); 6287 key = (struct sockaddr *)&te->key; 6288 6289 if (te->netmask.sin_len != 0) { 6290 if (te->netmask.sin_port != 0 || 6291 te->netmask.sin_len > sizeof(struct sockaddr_in)) 6292 return (EINVAL); 6293 netmask = (struct sockaddr *)&te->netmask; 6294 sa_maskedcopy(key, (struct sockaddr *)&key0, netmask); 6295 key = (struct sockaddr *)&key0; 6296 } 6297 6298 if (sopt->sopt_name == IP_FW_TBL_ADD) { 6299 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6300 MSGF_PRIORITY, ipfw_table_add_dispatch); 6301 } else { 6302 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6303 MSGF_PRIORITY, ipfw_table_del_dispatch); 6304 } 6305 nm.key = key; 6306 nm.netmask = netmask; 6307 nm.tableid = tbl->tableid; 6308 nm.sibling = NULL; 6309 return (netisr_domsg_global(&nm.base)); 6310 } 6311 6312 static int 6313 ipfw_table_zeroent(struct radix_node *rn, void *arg __unused) 6314 { 6315 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6316 6317 te->te_use = 0; 6318 te->te_lastuse = 0; 6319 return (0); 6320 } 6321 6322 static void 6323 ipfw_table_zero_dispatch(netmsg_t nmsg) 6324 { 6325 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6326 struct radix_node_head *rnh; 6327 6328 ASSERT_NETISR_NCPUS(mycpuid); 6329 6330 rnh = ctx->ipfw_tables[nmsg->lmsg.u.ms_result]; 6331 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); 6332 6333 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6334 } 6335 6336 static void 6337 ipfw_table_zeroall_dispatch(netmsg_t nmsg) 6338 { 6339 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6340 int i; 6341 6342 ASSERT_NETISR_NCPUS(mycpuid); 6343 6344 for (i = 0; i < ipfw_table_max; ++i) { 6345 struct radix_node_head *rnh = ctx->ipfw_tables[i]; 6346 6347 if (rnh != NULL) 6348 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); 6349 } 6350 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6351 } 6352 6353 static int 6354 ipfw_table_zero(struct sockopt *sopt) 6355 { 6356 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6357 struct netmsg_base nm; 6358 struct ipfw_ioc_table *tbl; 6359 6360 ASSERT_NETISR0; 6361 6362 if (sopt->sopt_valsize != sizeof(*tbl)) 6363 return (EINVAL); 6364 tbl = sopt->sopt_val; 6365 6366 if (tbl->tableid < 0) { 6367 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6368 ipfw_table_zeroall_dispatch); 6369 netisr_domsg_global(&nm); 6370 return (0); 6371 } else if (tbl->tableid >= ipfw_table_max) { 6372 return (EINVAL); 6373 } else if (ctx->ipfw_tables[tbl->tableid] == NULL) { 6374 return (ENOENT); 6375 } 6376 6377 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6378 ipfw_table_zero_dispatch); 6379 nm.lmsg.u.ms_result = tbl->tableid; 6380 netisr_domsg_global(&nm); 6381 6382 return (0); 6383 } 6384 6385 static int 6386 ipfw_table_killexp(struct radix_node *rn, void *xnm) 6387 { 6388 struct netmsg_tblexp *nm = xnm; 6389 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6390 6391 if (te->te_expired) { 6392 ipfw_table_killrn(nm->rnh, rn); 6393 nm->expcnt++; 6394 } 6395 return (0); 6396 } 6397 6398 static void 6399 ipfw_table_expire_dispatch(netmsg_t nmsg) 6400 { 6401 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; 6402 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6403 struct radix_node_head *rnh; 6404 6405 ASSERT_NETISR_NCPUS(mycpuid); 6406 6407 rnh = ctx->ipfw_tables[nm->tableid]; 6408 nm->rnh = rnh; 6409 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); 6410 6411 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), 6412 ("not all expired addresses (%d) were deleted (%d)", 6413 nm->cnt * (mycpuid + 1), nm->expcnt)); 6414 6415 netisr_forwardmsg(&nm->base, mycpuid + 1); 6416 } 6417 6418 static void 6419 ipfw_table_expireall_dispatch(netmsg_t nmsg) 6420 { 6421 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; 6422 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6423 int i; 6424 6425 ASSERT_NETISR_NCPUS(mycpuid); 6426 6427 for (i = 0; i < ipfw_table_max; ++i) { 6428 struct radix_node_head *rnh = ctx->ipfw_tables[i]; 6429 6430 if (rnh == NULL) 6431 continue; 6432 nm->rnh = rnh; 6433 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); 6434 } 6435 6436 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), 6437 ("not all expired addresses (%d) were deleted (%d)", 6438 nm->cnt * (mycpuid + 1), nm->expcnt)); 6439 6440 netisr_forwardmsg(&nm->base, mycpuid + 1); 6441 } 6442 6443 static int 6444 ipfw_table_markexp(struct radix_node *rn, void *xnm) 6445 { 6446 struct netmsg_tblexp *nm = xnm; 6447 struct ipfw_tblent *te; 6448 time_t lastuse; 6449 6450 te = (struct ipfw_tblent *)rn; 6451 lastuse = te->te_lastuse; 6452 6453 while ((te = te->te_sibling) != NULL) { 6454 if (te->te_lastuse > lastuse) 6455 lastuse = te->te_lastuse; 6456 } 6457 if (!TIME_LEQ(lastuse + nm->expire, time_second)) { 6458 /* Not expired */ 6459 return (0); 6460 } 6461 6462 te = (struct ipfw_tblent *)rn; 6463 te->te_expired = 1; 6464 while ((te = te->te_sibling) != NULL) 6465 te->te_expired = 1; 6466 nm->cnt++; 6467 6468 return (0); 6469 } 6470 6471 static int 6472 ipfw_table_expire(struct sockopt *sopt) 6473 { 6474 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6475 struct netmsg_tblexp nm; 6476 struct ipfw_ioc_tblexp *tbl; 6477 struct radix_node_head *rnh; 6478 6479 ASSERT_NETISR0; 6480 6481 if (sopt->sopt_valsize != sizeof(*tbl)) 6482 return (EINVAL); 6483 tbl = sopt->sopt_val; 6484 tbl->expcnt = 0; 6485 6486 nm.expcnt = 0; 6487 nm.cnt = 0; 6488 nm.expire = tbl->expire; 6489 6490 if (tbl->tableid < 0) { 6491 int i; 6492 6493 for (i = 0; i < ipfw_table_max; ++i) { 6494 rnh = ctx->ipfw_tables[i]; 6495 if (rnh == NULL) 6496 continue; 6497 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); 6498 } 6499 if (nm.cnt == 0) { 6500 /* No addresses can be expired. */ 6501 return (0); 6502 } 6503 tbl->expcnt = nm.cnt; 6504 6505 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6506 MSGF_PRIORITY, ipfw_table_expireall_dispatch); 6507 nm.tableid = -1; 6508 netisr_domsg_global(&nm.base); 6509 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, 6510 ("not all expired addresses (%d) were deleted (%d)", 6511 nm.cnt * netisr_ncpus, nm.expcnt)); 6512 6513 return (0); 6514 } else if (tbl->tableid >= ipfw_table_max) { 6515 return (EINVAL); 6516 } 6517 6518 rnh = ctx->ipfw_tables[tbl->tableid]; 6519 if (rnh == NULL) 6520 return (ENOENT); 6521 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); 6522 if (nm.cnt == 0) { 6523 /* No addresses can be expired. */ 6524 return (0); 6525 } 6526 tbl->expcnt = nm.cnt; 6527 6528 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6529 ipfw_table_expire_dispatch); 6530 nm.tableid = tbl->tableid; 6531 netisr_domsg_global(&nm.base); 6532 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, 6533 ("not all expired addresses (%d) were deleted (%d)", 6534 nm.cnt * netisr_ncpus, nm.expcnt)); 6535 return (0); 6536 } 6537 6538 static void 6539 ipfw_crossref_free_dispatch(netmsg_t nmsg) 6540 { 6541 struct ip_fw *rule = nmsg->lmsg.u.ms_resultp; 6542 6543 KKASSERT((rule->rule_flags & 6544 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) == 6545 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)); 6546 ipfw_free_rule(rule); 6547 6548 netisr_replymsg(&nmsg->base, 0); 6549 } 6550 6551 static void 6552 ipfw_crossref_reap(void) 6553 { 6554 struct ip_fw *rule, *prev = NULL; 6555 6556 ASSERT_NETISR0; 6557 6558 rule = ipfw_gd.ipfw_crossref_free; 6559 while (rule != NULL) { 6560 uint64_t inflight = 0; 6561 int i; 6562 6563 for (i = 0; i < netisr_ncpus; ++i) 6564 inflight += rule->cross_rules[i]->cross_refs; 6565 if (inflight == 0) { 6566 struct ip_fw *f = rule; 6567 6568 /* 6569 * Unlink. 6570 */ 6571 rule = rule->next; 6572 if (prev != NULL) 6573 prev->next = rule; 6574 else 6575 ipfw_gd.ipfw_crossref_free = rule; 6576 6577 /* 6578 * Free. 6579 */ 6580 for (i = 1; i < netisr_ncpus; ++i) { 6581 struct netmsg_base nm; 6582 6583 netmsg_init(&nm, NULL, &curthread->td_msgport, 6584 MSGF_PRIORITY, ipfw_crossref_free_dispatch); 6585 nm.lmsg.u.ms_resultp = f->cross_rules[i]; 6586 netisr_domsg(&nm, i); 6587 } 6588 KKASSERT((f->rule_flags & 6589 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) == 6590 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)); 6591 ipfw_unref_rule(f); 6592 } else { 6593 prev = rule; 6594 rule = rule->next; 6595 } 6596 } 6597 6598 if (ipfw_gd.ipfw_crossref_free != NULL) { 6599 callout_reset(&ipfw_gd.ipfw_crossref_ch, hz, 6600 ipfw_crossref_timeo, NULL); 6601 } 6602 } 6603 6604 /* 6605 * {set|get}sockopt parser. 6606 */ 6607 static int 6608 ipfw_ctl(struct sockopt *sopt) 6609 { 6610 int error, rulenum; 6611 uint32_t *masks; 6612 size_t size; 6613 6614 ASSERT_NETISR0; 6615 6616 error = 0; 6617 6618 switch (sopt->sopt_name) { 6619 case IP_FW_GET: 6620 error = ipfw_ctl_get_rules(sopt); 6621 break; 6622 6623 case IP_FW_FLUSH: 6624 ipfw_flush(0 /* keep default rule */); 6625 break; 6626 6627 case IP_FW_ADD: 6628 error = ipfw_ctl_add_rule(sopt); 6629 break; 6630 6631 case IP_FW_DEL: 6632 /* 6633 * IP_FW_DEL is used for deleting single rules or sets, 6634 * and (ab)used to atomically manipulate sets. 6635 * Argument size is used to distinguish between the two: 6636 * sizeof(uint32_t) 6637 * delete single rule or set of rules, 6638 * or reassign rules (or sets) to a different set. 6639 * 2 * sizeof(uint32_t) 6640 * atomic disable/enable sets. 6641 * first uint32_t contains sets to be disabled, 6642 * second uint32_t contains sets to be enabled. 6643 */ 6644 masks = sopt->sopt_val; 6645 size = sopt->sopt_valsize; 6646 if (size == sizeof(*masks)) { 6647 /* 6648 * Delete or reassign static rule 6649 */ 6650 error = ipfw_ctl_alter(masks[0]); 6651 } else if (size == (2 * sizeof(*masks))) { 6652 /* 6653 * Set enable/disable 6654 */ 6655 ipfw_ctl_set_disable(masks[0], masks[1]); 6656 } else { 6657 error = EINVAL; 6658 } 6659 break; 6660 6661 case IP_FW_ZERO: 6662 case IP_FW_RESETLOG: /* argument is an int, the rule number */ 6663 rulenum = 0; 6664 6665 if (sopt->sopt_val != 0) { 6666 error = soopt_to_kbuf(sopt, &rulenum, 6667 sizeof(int), sizeof(int)); 6668 if (error) 6669 break; 6670 } 6671 error = ipfw_ctl_zero_entry(rulenum, 6672 sopt->sopt_name == IP_FW_RESETLOG); 6673 break; 6674 6675 case IP_FW_TBL_CREATE: 6676 error = ipfw_table_create(sopt); 6677 break; 6678 6679 case IP_FW_TBL_ADD: 6680 case IP_FW_TBL_DEL: 6681 error = ipfw_table_alt(sopt); 6682 break; 6683 6684 case IP_FW_TBL_FLUSH: 6685 case IP_FW_TBL_DESTROY: 6686 error = ipfw_table_flush(sopt); 6687 break; 6688 6689 case IP_FW_TBL_GET: 6690 error = ipfw_table_get(sopt); 6691 break; 6692 6693 case IP_FW_TBL_ZERO: 6694 error = ipfw_table_zero(sopt); 6695 break; 6696 6697 case IP_FW_TBL_EXPIRE: 6698 error = ipfw_table_expire(sopt); 6699 break; 6700 6701 default: 6702 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name); 6703 error = EINVAL; 6704 } 6705 6706 ipfw_crossref_reap(); 6707 return error; 6708 } 6709 6710 static void 6711 ipfw_keepalive_done(struct ipfw_context *ctx) 6712 { 6713 6714 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6715 ("keepalive is not in progress")); 6716 ctx->ipfw_flags &= ~IPFW_FLAG_KEEPALIVE; 6717 callout_reset(&ctx->ipfw_keepalive_ch, dyn_keepalive_period * hz, 6718 ipfw_keepalive, NULL); 6719 } 6720 6721 static void 6722 ipfw_keepalive_more(struct ipfw_context *ctx) 6723 { 6724 struct netmsg_base *nm = &ctx->ipfw_keepalive_more; 6725 6726 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6727 ("keepalive is not in progress")); 6728 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 6729 ("keepalive more did not finish")); 6730 netisr_sendmsg_oncpu(nm); 6731 } 6732 6733 static void 6734 ipfw_keepalive_loop(struct ipfw_context *ctx, struct ipfw_state *anchor) 6735 { 6736 struct ipfw_state *s; 6737 int scanned = 0, expired = 0, kept = 0; 6738 6739 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6740 ("keepalive is not in progress")); 6741 6742 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 6743 uint32_t ack_rev, ack_fwd; 6744 struct ipfw_flow_id id; 6745 uint8_t send_dir; 6746 6747 if (scanned++ >= ipfw_state_scan_max) { 6748 ipfw_keepalive_more(ctx); 6749 return; 6750 } 6751 6752 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6753 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 6754 6755 /* 6756 * NOTE: 6757 * Don't use IPFW_STATE_SCANSKIP; need to perform keepalive 6758 * on slave xlat. 6759 */ 6760 if (s->st_type == O_ANCHOR) 6761 continue; 6762 6763 if (IPFW_STATE_ISDEAD(s)) { 6764 ipfw_state_remove(ctx, s); 6765 if (++expired >= ipfw_state_expire_max) { 6766 ipfw_keepalive_more(ctx); 6767 return; 6768 } 6769 continue; 6770 } 6771 6772 /* 6773 * Keep alive processing 6774 */ 6775 6776 if (s->st_proto != IPPROTO_TCP) 6777 continue; 6778 if ((s->st_state & IPFW_STATE_TCPSTATES) != BOTH_SYN) 6779 continue; 6780 if (TIME_LEQ(time_uptime + dyn_keepalive_interval, 6781 s->st_expire)) 6782 continue; /* too early */ 6783 6784 ipfw_key_4tuple(&s->st_key, &id.src_ip, &id.src_port, 6785 &id.dst_ip, &id.dst_port); 6786 ack_rev = s->st_ack_rev; 6787 ack_fwd = s->st_ack_fwd; 6788 6789 #define SEND_FWD 0x1 6790 #define SEND_REV 0x2 6791 6792 if (IPFW_ISXLAT(s->st_type)) { 6793 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s; 6794 6795 if (x->xlat_dir == MATCH_FORWARD) 6796 send_dir = SEND_FWD; 6797 else 6798 send_dir = SEND_REV; 6799 } else { 6800 send_dir = SEND_FWD | SEND_REV; 6801 } 6802 6803 if (send_dir & SEND_REV) 6804 send_pkt(&id, ack_rev - 1, ack_fwd, TH_SYN); 6805 if (send_dir & SEND_FWD) 6806 send_pkt(&id, ack_fwd - 1, ack_rev, 0); 6807 6808 #undef SEND_FWD 6809 #undef SEND_REV 6810 6811 if (++kept >= ipfw_keepalive_max) { 6812 ipfw_keepalive_more(ctx); 6813 return; 6814 } 6815 } 6816 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6817 ipfw_keepalive_done(ctx); 6818 } 6819 6820 static void 6821 ipfw_keepalive_more_dispatch(netmsg_t nm) 6822 { 6823 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6824 struct ipfw_state *anchor; 6825 6826 ASSERT_NETISR_NCPUS(mycpuid); 6827 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6828 ("keepalive is not in progress")); 6829 6830 /* Reply ASAP */ 6831 netisr_replymsg(&nm->base, 0); 6832 6833 anchor = &ctx->ipfw_keepalive_anch; 6834 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) { 6835 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6836 ipfw_keepalive_done(ctx); 6837 return; 6838 } 6839 ipfw_keepalive_loop(ctx, anchor); 6840 } 6841 6842 /* 6843 * This procedure is only used to handle keepalives. It is invoked 6844 * every dyn_keepalive_period 6845 */ 6846 static void 6847 ipfw_keepalive_dispatch(netmsg_t nm) 6848 { 6849 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6850 struct ipfw_state *anchor; 6851 6852 ASSERT_NETISR_NCPUS(mycpuid); 6853 KASSERT((ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE) == 0, 6854 ("keepalive is in progress")); 6855 ctx->ipfw_flags |= IPFW_FLAG_KEEPALIVE; 6856 6857 /* Reply ASAP */ 6858 crit_enter(); 6859 netisr_replymsg(&nm->base, 0); 6860 crit_exit(); 6861 6862 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) { 6863 ipfw_keepalive_done(ctx); 6864 return; 6865 } 6866 6867 anchor = &ctx->ipfw_keepalive_anch; 6868 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link); 6869 ipfw_keepalive_loop(ctx, anchor); 6870 } 6871 6872 /* 6873 * This procedure is only used to handle keepalives. It is invoked 6874 * every dyn_keepalive_period 6875 */ 6876 static void 6877 ipfw_keepalive(void *dummy __unused) 6878 { 6879 struct netmsg_base *msg; 6880 6881 KKASSERT(mycpuid < netisr_ncpus); 6882 msg = &ipfw_ctx[mycpuid]->ipfw_keepalive_nm; 6883 6884 crit_enter(); 6885 if (msg->lmsg.ms_flags & MSGF_DONE) 6886 netisr_sendmsg_oncpu(msg); 6887 crit_exit(); 6888 } 6889 6890 static void 6891 ipfw_ip_input_dispatch(netmsg_t nmsg) 6892 { 6893 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg; 6894 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6895 struct mbuf *m = nm->m; 6896 struct ip_fw *rule = nm->arg1; 6897 6898 ASSERT_NETISR_NCPUS(mycpuid); 6899 KASSERT(rule->cpuid == mycpuid, 6900 ("rule does not belong to cpu%d", mycpuid)); 6901 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE, 6902 ("mbuf does not have ipfw continue rule")); 6903 6904 KASSERT(ctx->ipfw_cont_rule == NULL, 6905 ("pending ipfw continue rule")); 6906 ctx->ipfw_cont_rule = rule; 6907 ip_input(m); 6908 6909 /* May not be cleared, if ipfw was unload/disabled. */ 6910 ctx->ipfw_cont_rule = NULL; 6911 6912 /* 6913 * This rule is no longer used; decrement its cross_refs, 6914 * so this rule can be deleted. 6915 */ 6916 rule->cross_refs--; 6917 } 6918 6919 static void 6920 ipfw_defrag_redispatch(struct mbuf *m, int cpuid, struct ip_fw *rule) 6921 { 6922 struct netmsg_genpkt *nm; 6923 6924 KASSERT(cpuid != mycpuid, ("continue on the same cpu%d", cpuid)); 6925 6926 /* 6927 * NOTE: 6928 * Bump cross_refs to prevent this rule and its siblings 6929 * from being deleted, while this mbuf is inflight. The 6930 * cross_refs of the sibling rule on the target cpu will 6931 * be decremented, once this mbuf is going to be filtered 6932 * on the target cpu. 6933 */ 6934 rule->cross_refs++; 6935 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE; 6936 6937 nm = &m->m_hdr.mh_genmsg; 6938 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0, 6939 ipfw_ip_input_dispatch); 6940 nm->m = m; 6941 nm->arg1 = rule->cross_rules[cpuid]; 6942 netisr_sendmsg(&nm->base, cpuid); 6943 } 6944 6945 static void 6946 ipfw_init_args(struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif) 6947 { 6948 6949 args->flags = 0; 6950 args->rule = NULL; 6951 args->xlat = NULL; 6952 6953 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 6954 struct m_tag *mtag; 6955 6956 /* Extract info from dummynet tag */ 6957 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 6958 KKASSERT(mtag != NULL); 6959 args->rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 6960 KKASSERT(args->rule != NULL); 6961 6962 m_tag_delete(m, mtag); 6963 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 6964 } else if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) { 6965 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6966 6967 KKASSERT(ctx->ipfw_cont_rule != NULL); 6968 args->rule = ctx->ipfw_cont_rule; 6969 ctx->ipfw_cont_rule = NULL; 6970 6971 if (ctx->ipfw_cont_xlat != NULL) { 6972 args->xlat = ctx->ipfw_cont_xlat; 6973 ctx->ipfw_cont_xlat = NULL; 6974 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATINS) { 6975 args->flags |= IP_FWARG_F_XLATINS; 6976 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATINS; 6977 } 6978 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATFWD) { 6979 args->flags |= IP_FWARG_F_XLATFWD; 6980 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATFWD; 6981 } 6982 } 6983 KKASSERT((m->m_pkthdr.fw_flags & 6984 (IPFW_MBUF_XLATINS | IPFW_MBUF_XLATFWD)) == 0); 6985 6986 args->flags |= IP_FWARG_F_CONT; 6987 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_CONTINUE; 6988 } 6989 6990 args->eh = NULL; 6991 args->oif = oif; 6992 args->m = m; 6993 } 6994 6995 static int 6996 ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 6997 { 6998 struct ip_fw_args args; 6999 struct mbuf *m = *m0; 7000 int tee = 0, error = 0, ret; 7001 7002 ipfw_init_args(&args, m, NULL); 7003 7004 ret = ipfw_chk(&args); 7005 m = args.m; 7006 if (m == NULL) { 7007 if (ret != IP_FW_REDISPATCH) 7008 error = EACCES; 7009 goto back; 7010 } 7011 7012 switch (ret) { 7013 case IP_FW_PASS: 7014 break; 7015 7016 case IP_FW_DENY: 7017 m_freem(m); 7018 m = NULL; 7019 error = EACCES; 7020 break; 7021 7022 case IP_FW_DUMMYNET: 7023 /* Send packet to the appropriate pipe */ 7024 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_IN, &args); 7025 break; 7026 7027 case IP_FW_TEE: 7028 tee = 1; 7029 /* FALL THROUGH */ 7030 7031 case IP_FW_DIVERT: 7032 /* 7033 * Must clear bridge tag when changing 7034 */ 7035 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 7036 if (ip_divert_p != NULL) { 7037 m = ip_divert_p(m, tee, 1); 7038 } else { 7039 m_freem(m); 7040 m = NULL; 7041 /* not sure this is the right error msg */ 7042 error = EACCES; 7043 } 7044 break; 7045 7046 default: 7047 panic("unknown ipfw return value: %d", ret); 7048 } 7049 back: 7050 *m0 = m; 7051 return error; 7052 } 7053 7054 static int 7055 ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 7056 { 7057 struct ip_fw_args args; 7058 struct mbuf *m = *m0; 7059 int tee = 0, error = 0, ret; 7060 7061 ipfw_init_args(&args, m, ifp); 7062 7063 ret = ipfw_chk(&args); 7064 m = args.m; 7065 if (m == NULL) { 7066 if (ret != IP_FW_REDISPATCH) 7067 error = EACCES; 7068 goto back; 7069 } 7070 7071 switch (ret) { 7072 case IP_FW_PASS: 7073 break; 7074 7075 case IP_FW_DENY: 7076 m_freem(m); 7077 m = NULL; 7078 error = EACCES; 7079 break; 7080 7081 case IP_FW_DUMMYNET: 7082 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_OUT, &args); 7083 break; 7084 7085 case IP_FW_TEE: 7086 tee = 1; 7087 /* FALL THROUGH */ 7088 7089 case IP_FW_DIVERT: 7090 if (ip_divert_p != NULL) { 7091 m = ip_divert_p(m, tee, 0); 7092 } else { 7093 m_freem(m); 7094 m = NULL; 7095 /* not sure this is the right error msg */ 7096 error = EACCES; 7097 } 7098 break; 7099 7100 default: 7101 panic("unknown ipfw return value: %d", ret); 7102 } 7103 back: 7104 *m0 = m; 7105 return error; 7106 } 7107 7108 static void 7109 ipfw_hook(void) 7110 { 7111 struct pfil_head *pfh; 7112 7113 ASSERT_NETISR0; 7114 7115 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 7116 if (pfh == NULL) 7117 return; 7118 7119 pfil_add_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 7120 pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 7121 } 7122 7123 static void 7124 ipfw_dehook(void) 7125 { 7126 struct pfil_head *pfh; 7127 7128 ASSERT_NETISR0; 7129 7130 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 7131 if (pfh == NULL) 7132 return; 7133 7134 pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 7135 pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 7136 } 7137 7138 static int 7139 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS) 7140 { 7141 int dyn_cnt; 7142 7143 dyn_cnt = ipfw_state_cntcoll(); 7144 dyn_cnt += ipfw_gd.ipfw_trkcnt_cnt; 7145 7146 return (sysctl_handle_int(oidp, &dyn_cnt, 0, req)); 7147 } 7148 7149 static int 7150 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS) 7151 { 7152 int state_cnt; 7153 7154 state_cnt = ipfw_state_cntcoll(); 7155 return (sysctl_handle_int(oidp, &state_cnt, 0, req)); 7156 } 7157 7158 static int 7159 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS) 7160 { 7161 int state_max, error; 7162 7163 state_max = ipfw_state_max; 7164 error = sysctl_handle_int(oidp, &state_max, 0, req); 7165 if (error || req->newptr == NULL) 7166 return (error); 7167 7168 if (state_max < 1) 7169 return (EINVAL); 7170 7171 ipfw_state_max_set(state_max); 7172 return (0); 7173 } 7174 7175 static int 7176 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS) 7177 { 7178 int dyn_max, error; 7179 7180 dyn_max = ipfw_state_max + ipfw_track_max; 7181 7182 error = sysctl_handle_int(oidp, &dyn_max, 0, req); 7183 if (error || req->newptr == NULL) 7184 return (error); 7185 7186 if (dyn_max < 2) 7187 return (EINVAL); 7188 7189 ipfw_state_max_set(dyn_max / 2); 7190 ipfw_track_max = dyn_max / 2; 7191 return (0); 7192 } 7193 7194 static void 7195 ipfw_sysctl_enable_dispatch(netmsg_t nmsg) 7196 { 7197 int enable = nmsg->lmsg.u.ms_result; 7198 7199 ASSERT_NETISR0; 7200 7201 if (fw_enable == enable) 7202 goto reply; 7203 7204 fw_enable = enable; 7205 if (fw_enable) 7206 ipfw_hook(); 7207 else 7208 ipfw_dehook(); 7209 reply: 7210 netisr_replymsg(&nmsg->base, 0); 7211 } 7212 7213 static int 7214 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS) 7215 { 7216 struct netmsg_base nmsg; 7217 int enable, error; 7218 7219 enable = fw_enable; 7220 error = sysctl_handle_int(oidp, &enable, 0, req); 7221 if (error || req->newptr == NULL) 7222 return error; 7223 7224 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7225 ipfw_sysctl_enable_dispatch); 7226 nmsg.lmsg.u.ms_result = enable; 7227 7228 return netisr_domsg(&nmsg, 0); 7229 } 7230 7231 static int 7232 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS) 7233 { 7234 return sysctl_int_range(oidp, arg1, arg2, req, 7235 IPFW_AUTOINC_STEP_MIN, IPFW_AUTOINC_STEP_MAX); 7236 } 7237 7238 static int 7239 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS) 7240 { 7241 7242 return sysctl_int_range(oidp, arg1, arg2, req, 1, INT_MAX); 7243 } 7244 7245 static int 7246 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS) 7247 { 7248 u_long stat = 0; 7249 int cpu, error; 7250 7251 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7252 stat += *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)); 7253 7254 error = sysctl_handle_long(oidp, &stat, 0, req); 7255 if (error || req->newptr == NULL) 7256 return (error); 7257 7258 /* Zero out this stat. */ 7259 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7260 *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)) = 0; 7261 return (0); 7262 } 7263 7264 static void 7265 ipfw_ctx_init_dispatch(netmsg_t nmsg) 7266 { 7267 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 7268 struct ipfw_context *ctx; 7269 struct ip_fw *def_rule; 7270 7271 ASSERT_NETISR_NCPUS(mycpuid); 7272 7273 ctx = kmalloc(__offsetof(struct ipfw_context, 7274 ipfw_tables[ipfw_table_max]), M_IPFW, M_WAITOK | M_ZERO); 7275 7276 RB_INIT(&ctx->ipfw_state_tree); 7277 TAILQ_INIT(&ctx->ipfw_state_list); 7278 7279 RB_INIT(&ctx->ipfw_track_tree); 7280 TAILQ_INIT(&ctx->ipfw_track_list); 7281 7282 callout_init_mp(&ctx->ipfw_stateto_ch); 7283 netmsg_init(&ctx->ipfw_stateexp_nm, NULL, &netisr_adone_rport, 7284 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_state_expire_dispatch); 7285 ctx->ipfw_stateexp_anch.st_type = O_ANCHOR; 7286 netmsg_init(&ctx->ipfw_stateexp_more, NULL, &netisr_adone_rport, 7287 MSGF_DROPABLE, ipfw_state_expire_more_dispatch); 7288 7289 callout_init_mp(&ctx->ipfw_trackto_ch); 7290 netmsg_init(&ctx->ipfw_trackexp_nm, NULL, &netisr_adone_rport, 7291 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_track_expire_dispatch); 7292 netmsg_init(&ctx->ipfw_trackexp_more, NULL, &netisr_adone_rport, 7293 MSGF_DROPABLE, ipfw_track_expire_more_dispatch); 7294 7295 callout_init_mp(&ctx->ipfw_keepalive_ch); 7296 netmsg_init(&ctx->ipfw_keepalive_nm, NULL, &netisr_adone_rport, 7297 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_keepalive_dispatch); 7298 ctx->ipfw_keepalive_anch.st_type = O_ANCHOR; 7299 netmsg_init(&ctx->ipfw_keepalive_more, NULL, &netisr_adone_rport, 7300 MSGF_DROPABLE, ipfw_keepalive_more_dispatch); 7301 7302 callout_init_mp(&ctx->ipfw_xlatreap_ch); 7303 netmsg_init(&ctx->ipfw_xlatreap_nm, NULL, &netisr_adone_rport, 7304 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_xlat_reap_dispatch); 7305 TAILQ_INIT(&ctx->ipfw_xlatreap); 7306 7307 ipfw_ctx[mycpuid] = ctx; 7308 7309 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO); 7310 7311 def_rule->act_ofs = 0; 7312 def_rule->rulenum = IPFW_DEFAULT_RULE; 7313 def_rule->cmd_len = 1; 7314 def_rule->set = IPFW_DEFAULT_SET; 7315 7316 def_rule->cmd[0].len = 1; 7317 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 7318 def_rule->cmd[0].opcode = O_ACCEPT; 7319 #else 7320 if (filters_default_to_accept) 7321 def_rule->cmd[0].opcode = O_ACCEPT; 7322 else 7323 def_rule->cmd[0].opcode = O_DENY; 7324 #endif 7325 7326 def_rule->refcnt = 1; 7327 def_rule->cpuid = mycpuid; 7328 7329 /* Install the default rule */ 7330 ctx->ipfw_default_rule = def_rule; 7331 ctx->ipfw_layer3_chain = def_rule; 7332 7333 /* Link rule CPU sibling */ 7334 ipfw_link_sibling(fwmsg, def_rule); 7335 7336 /* Statistics only need to be updated once */ 7337 if (mycpuid == 0) 7338 ipfw_inc_static_count(def_rule); 7339 7340 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7341 } 7342 7343 static void 7344 ipfw_crossref_reap_dispatch(netmsg_t nmsg) 7345 { 7346 7347 crit_enter(); 7348 /* Reply ASAP */ 7349 netisr_replymsg(&nmsg->base, 0); 7350 crit_exit(); 7351 ipfw_crossref_reap(); 7352 } 7353 7354 static void 7355 ipfw_crossref_timeo(void *dummy __unused) 7356 { 7357 struct netmsg_base *msg = &ipfw_gd.ipfw_crossref_nm; 7358 7359 KKASSERT(mycpuid == 0); 7360 7361 crit_enter(); 7362 if (msg->lmsg.ms_flags & MSGF_DONE) 7363 netisr_sendmsg_oncpu(msg); 7364 crit_exit(); 7365 } 7366 7367 static void 7368 ipfw_ifaddr_dispatch(netmsg_t nmsg) 7369 { 7370 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 7371 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 7372 struct ip_fw *f; 7373 7374 ASSERT_NETISR_NCPUS(mycpuid); 7375 7376 for (f = ctx->ipfw_layer3_chain; f != NULL; f = f->next) { 7377 int l, cmdlen; 7378 ipfw_insn *cmd; 7379 7380 if ((f->rule_flags & IPFW_RULE_F_DYNIFADDR) == 0) 7381 continue; 7382 7383 for (l = f->cmd_len, cmd = f->cmd; l > 0; 7384 l -= cmdlen, cmd += cmdlen) { 7385 cmdlen = F_LEN(cmd); 7386 if (cmd->opcode == O_IP_SRC_IFIP || 7387 cmd->opcode == O_IP_DST_IFIP) { 7388 if (strncmp(ifp->if_xname, 7389 ((ipfw_insn_ifip *)cmd)->ifname, 7390 IFNAMSIZ) == 0) 7391 cmd->arg1 &= ~IPFW_IFIP_VALID; 7392 } 7393 } 7394 } 7395 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7396 } 7397 7398 static void 7399 ipfw_ifaddr(void *arg __unused, struct ifnet *ifp, 7400 enum ifaddr_event event __unused, struct ifaddr *ifa __unused) 7401 { 7402 struct netmsg_base nm; 7403 7404 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7405 ipfw_ifaddr_dispatch); 7406 nm.lmsg.u.ms_resultp = ifp; 7407 netisr_domsg_global(&nm); 7408 } 7409 7410 static void 7411 ipfw_init_dispatch(netmsg_t nmsg) 7412 { 7413 struct netmsg_ipfw fwmsg; 7414 int error = 0, cpu; 7415 7416 ASSERT_NETISR0; 7417 7418 if (IPFW_LOADED) { 7419 kprintf("IP firewall already loaded\n"); 7420 error = EEXIST; 7421 goto reply; 7422 } 7423 7424 if (ipfw_table_max > UINT16_MAX || ipfw_table_max <= 0) 7425 ipfw_table_max = UINT16_MAX; 7426 7427 /* Initialize global track tree. */ 7428 RB_INIT(&ipfw_gd.ipfw_trkcnt_tree); 7429 IPFW_TRKCNT_TOKINIT; 7430 7431 /* GC for freed crossref rules. */ 7432 callout_init_mp(&ipfw_gd.ipfw_crossref_ch); 7433 netmsg_init(&ipfw_gd.ipfw_crossref_nm, NULL, &netisr_adone_rport, 7434 MSGF_PRIORITY | MSGF_DROPABLE, ipfw_crossref_reap_dispatch); 7435 7436 ipfw_state_max_set(ipfw_state_max); 7437 ipfw_state_headroom = 8 * netisr_ncpus; 7438 7439 bzero(&fwmsg, sizeof(fwmsg)); 7440 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7441 ipfw_ctx_init_dispatch); 7442 netisr_domsg_global(&fwmsg.base); 7443 7444 ip_fw_chk_ptr = ipfw_chk; 7445 ip_fw_ctl_ptr = ipfw_ctl; 7446 ip_fw_dn_io_ptr = ipfw_dummynet_io; 7447 7448 kprintf("ipfw2 initialized, default to %s, logging ", 7449 ipfw_ctx[mycpuid]->ipfw_default_rule->cmd[0].opcode == 7450 O_ACCEPT ? "accept" : "deny"); 7451 7452 #ifdef IPFIREWALL_VERBOSE 7453 fw_verbose = 1; 7454 #endif 7455 #ifdef IPFIREWALL_VERBOSE_LIMIT 7456 verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 7457 #endif 7458 if (fw_verbose == 0) { 7459 kprintf("disabled\n"); 7460 } else if (verbose_limit == 0) { 7461 kprintf("unlimited\n"); 7462 } else { 7463 kprintf("limited to %d packets/entry by default\n", 7464 verbose_limit); 7465 } 7466 7467 ip_fw_loaded = 1; 7468 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 7469 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_stateto_ch, hz, 7470 ipfw_state_expire_ipifunc, NULL, cpu); 7471 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_trackto_ch, hz, 7472 ipfw_track_expire_ipifunc, NULL, cpu); 7473 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_keepalive_ch, hz, 7474 ipfw_keepalive, NULL, cpu); 7475 } 7476 7477 if (fw_enable) 7478 ipfw_hook(); 7479 7480 ipfw_ifaddr_event = EVENTHANDLER_REGISTER(ifaddr_event, ipfw_ifaddr, 7481 NULL, EVENTHANDLER_PRI_ANY); 7482 if (ipfw_ifaddr_event == NULL) 7483 kprintf("ipfw: ifaddr_event register failed\n"); 7484 7485 reply: 7486 netisr_replymsg(&nmsg->base, error); 7487 } 7488 7489 static int 7490 ipfw_init(void) 7491 { 7492 struct netmsg_base smsg; 7493 7494 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7495 ipfw_init_dispatch); 7496 return netisr_domsg(&smsg, 0); 7497 } 7498 7499 #ifdef KLD_MODULE 7500 7501 static void 7502 ipfw_ctx_fini_dispatch(netmsg_t nmsg) 7503 { 7504 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 7505 7506 ASSERT_NETISR_NCPUS(mycpuid); 7507 7508 callout_cancel(&ctx->ipfw_stateto_ch); 7509 callout_cancel(&ctx->ipfw_trackto_ch); 7510 callout_cancel(&ctx->ipfw_keepalive_ch); 7511 callout_cancel(&ctx->ipfw_xlatreap_ch); 7512 7513 crit_enter(); 7514 netisr_dropmsg(&ctx->ipfw_stateexp_more); 7515 netisr_dropmsg(&ctx->ipfw_stateexp_nm); 7516 netisr_dropmsg(&ctx->ipfw_trackexp_more); 7517 netisr_dropmsg(&ctx->ipfw_trackexp_nm); 7518 netisr_dropmsg(&ctx->ipfw_keepalive_more); 7519 netisr_dropmsg(&ctx->ipfw_keepalive_nm); 7520 netisr_dropmsg(&ctx->ipfw_xlatreap_nm); 7521 crit_exit(); 7522 7523 ipfw_table_flushall_oncpu(ctx, 1); 7524 7525 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7526 } 7527 7528 static void 7529 ipfw_fini_dispatch(netmsg_t nmsg) 7530 { 7531 struct netmsg_base nm; 7532 int error = 0, cpu; 7533 7534 ASSERT_NETISR0; 7535 7536 ipfw_crossref_reap(); 7537 7538 if (ipfw_gd.ipfw_refcnt != 0) { 7539 error = EBUSY; 7540 goto reply; 7541 } 7542 7543 ip_fw_loaded = 0; 7544 ipfw_dehook(); 7545 7546 /* Synchronize any inflight state/track expire IPIs. */ 7547 lwkt_synchronize_ipiqs("ipfwfini"); 7548 7549 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7550 ipfw_ctx_fini_dispatch); 7551 netisr_domsg_global(&nm); 7552 7553 callout_cancel(&ipfw_gd.ipfw_crossref_ch); 7554 crit_enter(); 7555 netisr_dropmsg(&ipfw_gd.ipfw_crossref_nm); 7556 crit_exit(); 7557 7558 if (ipfw_ifaddr_event != NULL) 7559 EVENTHANDLER_DEREGISTER(ifaddr_event, ipfw_ifaddr_event); 7560 7561 ip_fw_chk_ptr = NULL; 7562 ip_fw_ctl_ptr = NULL; 7563 ip_fw_dn_io_ptr = NULL; 7564 ipfw_flush(1 /* kill default rule */); 7565 7566 /* Free pre-cpu context */ 7567 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7568 kfree(ipfw_ctx[cpu], M_IPFW); 7569 7570 kprintf("IP firewall unloaded\n"); 7571 reply: 7572 netisr_replymsg(&nmsg->base, error); 7573 } 7574 7575 static void 7576 ipfw_fflush_dispatch(netmsg_t nmsg) 7577 { 7578 7579 ipfw_flush(0 /* keep default rule */); 7580 ipfw_crossref_reap(); 7581 netisr_replymsg(&nmsg->base, 0); 7582 } 7583 7584 static int 7585 ipfw_fini(void) 7586 { 7587 struct netmsg_base smsg; 7588 int i = 0; 7589 7590 for (;;) { 7591 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7592 ipfw_fflush_dispatch); 7593 netisr_domsg(&smsg, 0); 7594 7595 if (ipfw_gd.ipfw_refcnt == 0) 7596 break; 7597 kprintf("ipfw: flush pending %d\n", ++i); 7598 tsleep(&smsg, 0, "ipfwff", (3 * hz) / 2); 7599 } 7600 7601 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7602 ipfw_fini_dispatch); 7603 return netisr_domsg(&smsg, 0); 7604 } 7605 7606 #endif /* KLD_MODULE */ 7607 7608 static int 7609 ipfw_modevent(module_t mod, int type, void *unused) 7610 { 7611 int err = 0; 7612 7613 switch (type) { 7614 case MOD_LOAD: 7615 err = ipfw_init(); 7616 break; 7617 7618 case MOD_UNLOAD: 7619 #ifndef KLD_MODULE 7620 kprintf("ipfw statically compiled, cannot unload\n"); 7621 err = EBUSY; 7622 #else 7623 err = ipfw_fini(); 7624 #endif 7625 break; 7626 default: 7627 break; 7628 } 7629 return err; 7630 } 7631 7632 static moduledata_t ipfwmod = { 7633 "ipfw", 7634 ipfw_modevent, 7635 0 7636 }; 7637 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY); 7638 MODULE_VERSION(ipfw, 1); 7639