1 /* 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $ 26 */ 27 28 /* 29 * Implement IP packet firewall (new version) 30 */ 31 32 #include "opt_ipfw.h" 33 #include "opt_inet.h" 34 #ifndef INET 35 #error IPFIREWALL requires INET. 36 #endif /* INET */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/kernel.h> 43 #include <sys/proc.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 #include <sys/ucred.h> 49 #include <sys/in_cksum.h> 50 #include <sys/limits.h> 51 #include <sys/lock.h> 52 #include <sys/tree.h> 53 54 #include <net/if.h> 55 #include <net/route.h> 56 #include <net/pfil.h> 57 #include <net/dummynet/ip_dummynet.h> 58 59 #include <sys/thread2.h> 60 #include <sys/mplock2.h> 61 #include <net/netmsg2.h> 62 63 #include <netinet/in.h> 64 #include <netinet/in_systm.h> 65 #include <netinet/in_var.h> 66 #include <netinet/in_pcb.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/tcp.h> 71 #include <netinet/tcp_seq.h> 72 #include <netinet/tcp_timer.h> 73 #include <netinet/tcp_var.h> 74 #include <netinet/tcpip.h> 75 #include <netinet/udp.h> 76 #include <netinet/udp_var.h> 77 #include <netinet/ip_divert.h> 78 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ 79 80 #include <net/ipfw/ip_fw2.h> 81 82 #ifdef IPFIREWALL_DEBUG 83 #define DPRINTF(fmt, ...) \ 84 do { \ 85 if (fw_debug > 0) \ 86 kprintf(fmt, __VA_ARGS__); \ 87 } while (0) 88 #else 89 #define DPRINTF(fmt, ...) ((void)0) 90 #endif 91 92 /* 93 * Description about per-CPU rule duplication: 94 * 95 * Module loading/unloading and all ioctl operations are serialized 96 * by netisr0, so we don't have any ordering or locking problems. 97 * 98 * Following graph shows how operation on per-CPU rule list is 99 * performed [2 CPU case]: 100 * 101 * CPU0 CPU1 102 * 103 * netisr0 <------------------------------------+ 104 * domsg | 105 * : | 106 * :(delete/add...) | 107 * : | 108 * : netmsg | netmsg 109 * forwardmsg---------->netisr1 | 110 * : | 111 * :(delete/add...) | 112 * : | 113 * : | 114 * replymsg--------------+ 115 * 116 * 117 * 118 * Rule structure [2 CPU case] 119 * 120 * CPU0 CPU1 121 * 122 * layer3_chain layer3_chain 123 * | | 124 * V V 125 * +-------+ sibling +-------+ sibling 126 * | rule1 |--------->| rule1 |--------->NULL 127 * +-------+ +-------+ 128 * | | 129 * |next |next 130 * V V 131 * +-------+ sibling +-------+ sibling 132 * | rule2 |--------->| rule2 |--------->NULL 133 * +-------+ +-------+ 134 * 135 * ip_fw.sibling: 136 * 1) Ease statistics calculation during IP_FW_GET. We only need to 137 * iterate layer3_chain in netisr0; the current rule's duplication 138 * to the other CPUs could safely be read-only accessed through 139 * ip_fw.sibling. 140 * 2) Accelerate rule insertion and deletion, e.g. rule insertion: 141 * a) In netisr0 rule3 is determined to be inserted between rule1 142 * and rule2. To make this decision we need to iterate the 143 * layer3_chain in netisr0. The netmsg, which is used to insert 144 * the rule, will contain rule1 in netisr0 as prev_rule and rule2 145 * in netisr0 as next_rule. 146 * b) After the insertion in netisr0 is done, we will move on to 147 * netisr1. But instead of relocating the rule3's position in 148 * netisr1 by iterating the layer3_chain in netisr1, we set the 149 * netmsg's prev_rule to rule1->sibling and next_rule to 150 * rule2->sibling before the netmsg is forwarded to netisr1 from 151 * netisr0. 152 */ 153 154 /* 155 * Description of states and tracks. 156 * 157 * Both states and tracks are stored in per-cpu RB trees instead of 158 * per-cpu hash tables to avoid the worst case hash degeneration. 159 * 160 * The lifetimes of states and tracks are regulated by dyn_*_lifetime, 161 * measured in seconds and depending on the flags. 162 * 163 * When a packet is received, its address fields are first masked with 164 * the mask defined for the rule, then matched against the entries in 165 * the per-cpu state RB tree. States are generated by 'keep-state' 166 * and 'limit' options. 167 * 168 * The max number of states is ipfw_state_max. When we reach the 169 * maximum number of states we do not create anymore. This is done to 170 * avoid consuming too much memory, but also too much time when 171 * searching on each packet. 172 * 173 * Each state holds a pointer to the parent ipfw rule of the current 174 * CPU so we know what action to perform. States are removed when the 175 * parent rule is deleted. XXX we should make them survive. 176 * 177 * There are some limitations with states -- we do not obey the 178 * 'randomized match', and we do not do multiple passes through the 179 * firewall. XXX check the latter!!! 180 * 181 * States grow independently on each CPU, e.g. 2 CPU case: 182 * 183 * CPU0 CPU1 184 * ................... ................... 185 * : state RB tree : : state RB tree : 186 * : : : : 187 * : state1 state2 : : state3 : 188 * : | | : : | : 189 * :.....|....|......: :........|........: 190 * | | | 191 * | | |st_rule 192 * | | | 193 * V V V 194 * +-------+ +-------+ 195 * | rule1 | | rule1 | 196 * +-------+ +-------+ 197 * 198 * Tracks are used to enforce limits on the number of sessions. Tracks 199 * are generated by 'limit' option. 200 * 201 * The max number of tracks is ipfw_track_max. When we reach the 202 * maximum number of tracks we do not create anymore. This is done to 203 * avoid consuming too much memory. 204 * 205 * Tracks are organized into two layers, track counter RB tree is 206 * shared between CPUs, track RB tree is per-cpu. States generated by 207 * 'limit' option are linked to the track in addition to the per-cpu 208 * state RB tree; mainly to ease expiration. e.g. 2 CPU case: 209 * 210 * .............................. 211 * : track counter RB tree : 212 * : : 213 * : +-----------+ : 214 * : | trkcnt1 | : 215 * : | | : 216 * : +--->counter<----+ : 217 * : | | | | : 218 * : | +-----------+ | : 219 * :......|................|....: 220 * | | 221 * CPU0 | | CPU1 222 * ................. |t_count | ................. 223 * : track RB tree : | | : track RB tree : 224 * : : | | : : 225 * : +-->track1-------+ +--------track2 : 226 * : | A : : : 227 * : | | : : : 228 * :.|.....|.......: :...............: 229 * | +----------------+ 230 * | .................... | 231 * | : state RB tree : |st_track 232 * | : : | 233 * +---state1 state2---+ 234 * : | | : 235 * :.....|.......|....: 236 * | | 237 * | |st_rule 238 * V V 239 * +----------+ 240 * | rule1 | 241 * +----------+ 242 */ 243 244 #define IPFW_AUTOINC_STEP_MIN 1 245 #define IPFW_AUTOINC_STEP_MAX 1000 246 #define IPFW_AUTOINC_STEP_DEF 100 247 248 #define IPFW_TABLE_MAX_DEF 64 249 250 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */ 251 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */ 252 253 #define MATCH_REVERSE 0 254 #define MATCH_FORWARD 1 255 #define MATCH_NONE 2 256 #define MATCH_UNKNOWN 3 257 258 #define TIME_LEQ(a, b) ((a) - (b) <= 0) 259 260 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST) 261 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \ 262 (IPFW_STATE_TCPFLAGS << 8)) 263 264 #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 265 #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 266 #define BOTH_RST (TH_RST | (TH_RST << 8)) 267 /* TH_ACK here means FIN was ACKed. */ 268 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8)) 269 270 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \ 271 (((s)->st_state & BOTH_RST) || \ 272 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK)) 273 274 #define O_ANCHOR O_NOP 275 276 #define IPFW_ISXLAT(type) ((type) == O_REDIRECT) 277 #define IPFW_XLAT_INVALID(s) (IPFW_ISXLAT((s)->st_type) && \ 278 ((struct ipfw_xlat *)(s))->xlat_invalid) 279 280 #define IPFW_MBUF_XLATINS FW_MBUF_PRIVATE1 281 #define IPFW_MBUF_XLATFWD FW_MBUF_PRIVATE2 282 283 #define IPFW_XLATE_INSERT 0x0001 284 #define IPFW_XLATE_FORWARD 0x0002 285 #define IPFW_XLATE_OUTPUT 0x0004 286 287 struct netmsg_ipfw { 288 struct netmsg_base base; 289 const struct ipfw_ioc_rule *ioc_rule; 290 struct ip_fw *next_rule; 291 struct ip_fw *prev_rule; 292 struct ip_fw *sibling; 293 uint32_t rule_flags; 294 struct ip_fw **cross_rules; 295 }; 296 297 struct netmsg_del { 298 struct netmsg_base base; 299 struct ip_fw *start_rule; 300 struct ip_fw *prev_rule; 301 uint16_t rulenum; 302 uint8_t from_set; 303 uint8_t to_set; 304 }; 305 306 struct netmsg_zent { 307 struct netmsg_base base; 308 struct ip_fw *start_rule; 309 uint16_t rulenum; 310 uint16_t log_only; 311 }; 312 313 struct netmsg_cpstate { 314 struct netmsg_base base; 315 struct ipfw_ioc_state *ioc_state; 316 int state_cntmax; 317 int state_cnt; 318 }; 319 320 struct netmsg_tblent { 321 struct netmsg_base base; 322 struct sockaddr *key; 323 struct sockaddr *netmask; 324 struct ipfw_tblent *sibling; 325 int tableid; 326 }; 327 328 struct netmsg_tblflush { 329 struct netmsg_base base; 330 int tableid; 331 int destroy; 332 }; 333 334 struct netmsg_tblexp { 335 struct netmsg_base base; 336 time_t expire; 337 int tableid; 338 int cnt; 339 int expcnt; 340 struct radix_node_head *rnh; 341 }; 342 343 struct ipfw_table_cp { 344 struct ipfw_ioc_tblent *te; 345 int te_idx; 346 int te_cnt; 347 }; 348 349 struct ip_fw_local { 350 /* 351 * offset The offset of a fragment. offset != 0 means that 352 * we have a fragment at this offset of an IPv4 packet. 353 * offset == 0 means that (if this is an IPv4 packet) 354 * this is the first or only fragment. 355 */ 356 u_short offset; 357 358 /* 359 * Local copies of addresses. They are only valid if we have 360 * an IP packet. 361 * 362 * proto The protocol. Set to 0 for non-ip packets, 363 * or to the protocol read from the packet otherwise. 364 * proto != 0 means that we have an IPv4 packet. 365 * 366 * src_port, dst_port port numbers, in HOST format. Only 367 * valid for TCP and UDP packets. 368 * 369 * src_ip, dst_ip ip addresses, in NETWORK format. 370 * Only valid for IPv4 packets. 371 */ 372 uint8_t proto; 373 uint16_t src_port; /* NOTE: host format */ 374 uint16_t dst_port; /* NOTE: host format */ 375 struct in_addr src_ip; /* NOTE: network format */ 376 struct in_addr dst_ip; /* NOTE: network format */ 377 uint16_t ip_len; 378 struct tcphdr *tcp; 379 }; 380 381 struct ipfw_addrs { 382 uint32_t addr1; /* host byte order */ 383 uint32_t addr2; /* host byte order */ 384 }; 385 386 struct ipfw_ports { 387 uint16_t port1; /* host byte order */ 388 uint16_t port2; /* host byte order */ 389 }; 390 391 struct ipfw_key { 392 union { 393 struct ipfw_addrs addrs; 394 uint64_t value; 395 } addr_u; 396 union { 397 struct ipfw_ports ports; 398 uint32_t value; 399 } port_u; 400 uint8_t proto; 401 uint8_t swap; /* IPFW_KEY_SWAP_ */ 402 uint16_t rsvd2; 403 }; 404 405 #define IPFW_KEY_SWAP_ADDRS 0x1 406 #define IPFW_KEY_SWAP_PORTS 0x2 407 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS) 408 409 struct ipfw_trkcnt { 410 RB_ENTRY(ipfw_trkcnt) tc_rblink; 411 struct ipfw_key tc_key; 412 uintptr_t tc_ruleid; 413 int tc_refs; 414 int tc_count; 415 time_t tc_expire; /* userland get-only */ 416 uint16_t tc_rulenum; /* userland get-only */ 417 } __cachealign; 418 419 #define tc_addrs tc_key.addr_u.value 420 #define tc_ports tc_key.port_u.value 421 #define tc_proto tc_key.proto 422 #define tc_saddr tc_key.addr_u.addrs.addr1 423 #define tc_daddr tc_key.addr_u.addrs.addr2 424 #define tc_sport tc_key.port_u.ports.port1 425 #define tc_dport tc_key.port_u.ports.port2 426 427 RB_HEAD(ipfw_trkcnt_tree, ipfw_trkcnt); 428 429 struct ipfw_state; 430 431 struct ipfw_track { 432 RB_ENTRY(ipfw_track) t_rblink; 433 struct ipfw_key t_key; 434 struct ip_fw *t_rule; 435 time_t t_lastexp; 436 LIST_HEAD(, ipfw_state) t_state_list; 437 time_t t_expire; 438 volatile int *t_count; 439 struct ipfw_trkcnt *t_trkcnt; 440 TAILQ_ENTRY(ipfw_track) t_link; 441 }; 442 443 #define t_addrs t_key.addr_u.value 444 #define t_ports t_key.port_u.value 445 #define t_proto t_key.proto 446 #define t_saddr t_key.addr_u.addrs.addr1 447 #define t_daddr t_key.addr_u.addrs.addr2 448 #define t_sport t_key.port_u.ports.port1 449 #define t_dport t_key.port_u.ports.port2 450 451 RB_HEAD(ipfw_track_tree, ipfw_track); 452 TAILQ_HEAD(ipfw_track_list, ipfw_track); 453 454 struct ipfw_state { 455 RB_ENTRY(ipfw_state) st_rblink; 456 struct ipfw_key st_key; 457 458 time_t st_expire; /* expire time */ 459 struct ip_fw *st_rule; 460 461 uint64_t st_pcnt; /* packets */ 462 uint64_t st_bcnt; /* bytes */ 463 464 /* 465 * st_state: 466 * State of this rule, typically a combination of TCP flags. 467 * 468 * st_ack_fwd/st_ack_rev: 469 * Most recent ACKs in forward and reverse direction. They 470 * are used to generate keepalives. 471 */ 472 uint32_t st_state; 473 uint32_t st_ack_fwd; /* host byte order */ 474 uint32_t st_seq_fwd; /* host byte order */ 475 uint32_t st_ack_rev; /* host byte order */ 476 uint32_t st_seq_rev; /* host byte order */ 477 478 uint16_t st_flags; /* IPFW_STATE_F_ */ 479 uint16_t st_type; /* KEEP_STATE/LIMIT/RDR */ 480 struct ipfw_track *st_track; 481 482 LIST_ENTRY(ipfw_state) st_trklink; 483 TAILQ_ENTRY(ipfw_state) st_link; 484 }; 485 486 #define st_addrs st_key.addr_u.value 487 #define st_ports st_key.port_u.value 488 #define st_proto st_key.proto 489 #define st_swap st_key.swap 490 491 #define IPFW_STATE_F_ACKFWD 0x0001 492 #define IPFW_STATE_F_SEQFWD 0x0002 493 #define IPFW_STATE_F_ACKREV 0x0004 494 #define IPFW_STATE_F_SEQREV 0x0008 495 #define IPFW_STATE_F_XLATSRC 0x0010 496 #define IPFW_STATE_F_XLATSLAVE 0x0020 497 #define IPFW_STATE_F_LINKED 0x0040 498 499 #define IPFW_STATE_SCANSKIP(s) ((s)->st_type == O_ANCHOR || \ 500 ((s)->st_flags & IPFW_STATE_F_XLATSLAVE)) 501 502 /* Expired or being deleted. */ 503 #define IPFW_STATE_ISDEAD(s) (TIME_LEQ((s)->st_expire, time_uptime) || \ 504 IPFW_XLAT_INVALID((s))) 505 506 TAILQ_HEAD(ipfw_state_list, ipfw_state); 507 RB_HEAD(ipfw_state_tree, ipfw_state); 508 509 struct ipfw_xlat { 510 struct ipfw_state xlat_st; /* MUST be the first field */ 511 uint32_t xlat_addr; /* network byte order */ 512 uint16_t xlat_port; /* network byte order */ 513 uint16_t xlat_dir; /* MATCH_ */ 514 struct ifnet *xlat_ifp; /* matching ifnet */ 515 struct ipfw_xlat *xlat_pair; /* paired state */ 516 int xlat_pcpu; /* paired cpu */ 517 volatile int xlat_invalid; /* invalid, but not dtor yet */ 518 volatile uint64_t xlat_crefs; /* cross references */ 519 struct netmsg_base xlat_freenm; /* for remote free */ 520 }; 521 522 #define xlat_type xlat_st.st_type 523 #define xlat_flags xlat_st.st_flags 524 #define xlat_rule xlat_st.st_rule 525 #define xlat_bcnt xlat_st.st_bcnt 526 #define xlat_pcnt xlat_st.st_pcnt 527 528 struct ipfw_tblent { 529 struct radix_node te_nodes[2]; 530 struct sockaddr_in te_key; 531 u_long te_use; 532 time_t te_lastuse; 533 struct ipfw_tblent *te_sibling; 534 volatile int te_expired; 535 }; 536 537 struct ipfw_context { 538 struct ip_fw *ipfw_layer3_chain; /* rules for layer3 */ 539 struct ip_fw *ipfw_default_rule; /* default rule */ 540 uint64_t ipfw_norule_counter; /* ipfw_log(NULL) stat*/ 541 542 /* 543 * ipfw_set_disable contains one bit per set value (0..31). 544 * If the bit is set, all rules with the corresponding set 545 * are disabled. Set IPDW_DEFAULT_SET is reserved for the 546 * default rule and CANNOT be disabled. 547 */ 548 uint32_t ipfw_set_disable; 549 550 uint8_t ipfw_flags; /* IPFW_FLAG_ */ 551 552 struct ip_fw *ipfw_cont_rule; 553 struct ipfw_xlat *ipfw_cont_xlat; 554 555 struct ipfw_state_tree ipfw_state_tree; 556 struct ipfw_state_list ipfw_state_list; 557 int ipfw_state_loosecnt; 558 int ipfw_state_cnt; 559 560 union { 561 struct ipfw_state state; 562 struct ipfw_track track; 563 struct ipfw_trkcnt trkcnt; 564 } ipfw_tmpkey; 565 566 struct ipfw_track_tree ipfw_track_tree; 567 struct ipfw_track_list ipfw_track_list; 568 struct ipfw_trkcnt *ipfw_trkcnt_spare; 569 570 struct callout ipfw_stateto_ch; 571 time_t ipfw_state_lastexp; 572 struct netmsg_base ipfw_stateexp_nm; 573 struct netmsg_base ipfw_stateexp_more; 574 struct ipfw_state ipfw_stateexp_anch; 575 576 struct callout ipfw_trackto_ch; 577 time_t ipfw_track_lastexp; 578 struct netmsg_base ipfw_trackexp_nm; 579 struct netmsg_base ipfw_trackexp_more; 580 struct ipfw_track ipfw_trackexp_anch; 581 582 struct callout ipfw_keepalive_ch; 583 struct netmsg_base ipfw_keepalive_nm; 584 struct netmsg_base ipfw_keepalive_more; 585 struct ipfw_state ipfw_keepalive_anch; 586 587 struct callout ipfw_xlatreap_ch; 588 struct netmsg_base ipfw_xlatreap_nm; 589 struct ipfw_state_list ipfw_xlatreap; 590 591 /* 592 * Statistics 593 */ 594 u_long ipfw_sts_reap; 595 u_long ipfw_sts_reapfailed; 596 u_long ipfw_sts_overflow; 597 u_long ipfw_sts_nomem; 598 u_long ipfw_sts_tcprecycled; 599 600 u_long ipfw_tks_nomem; 601 u_long ipfw_tks_reap; 602 u_long ipfw_tks_reapfailed; 603 u_long ipfw_tks_overflow; 604 u_long ipfw_tks_cntnomem; 605 606 u_long ipfw_frags; 607 u_long ipfw_defraged; 608 u_long ipfw_defrag_remote; 609 610 u_long ipfw_xlated; 611 u_long ipfw_xlate_split; 612 u_long ipfw_xlate_conflicts; 613 u_long ipfw_xlate_cresolved; 614 615 /* Last field */ 616 struct radix_node_head *ipfw_tables[]; 617 }; 618 619 #define IPFW_FLAG_KEEPALIVE 0x01 620 #define IPFW_FLAG_STATEEXP 0x02 621 #define IPFW_FLAG_TRACKEXP 0x04 622 #define IPFW_FLAG_STATEREAP 0x08 623 #define IPFW_FLAG_TRACKREAP 0x10 624 625 #define ipfw_state_tmpkey ipfw_tmpkey.state 626 #define ipfw_track_tmpkey ipfw_tmpkey.track 627 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt 628 629 struct ipfw_global { 630 int ipfw_state_loosecnt; /* cache aligned */ 631 time_t ipfw_state_globexp __cachealign; 632 633 struct lwkt_token ipfw_trkcnt_token __cachealign; 634 struct ipfw_trkcnt_tree ipfw_trkcnt_tree; 635 int ipfw_trkcnt_cnt; 636 time_t ipfw_track_globexp; 637 638 /* Accessed in netisr0. */ 639 struct ip_fw *ipfw_crossref_free __cachealign; 640 struct callout ipfw_crossref_ch; 641 struct netmsg_base ipfw_crossref_nm; 642 643 #ifdef KLD_MODULE 644 /* 645 * Module can not be unloaded, if there are references to 646 * certains rules of ipfw(4), e.g. dummynet(4) 647 */ 648 int ipfw_refcnt __cachealign; 649 #endif 650 } __cachealign; 651 652 static struct ipfw_context *ipfw_ctx[MAXCPU]; 653 654 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); 655 656 /* 657 * Following two global variables are accessed and updated only 658 * in netisr0. 659 */ 660 static uint32_t static_count; /* # of static rules */ 661 static uint32_t static_ioc_len; /* bytes of static rules */ 662 663 /* 664 * If 1, then ipfw static rules are being flushed, 665 * ipfw_chk() will skip to the default rule. 666 */ 667 static int ipfw_flushing; 668 669 static int fw_verbose; 670 static int verbose_limit; 671 672 static int fw_debug; 673 static int autoinc_step = IPFW_AUTOINC_STEP_DEF; 674 675 static int ipfw_table_max = IPFW_TABLE_MAX_DEF; 676 677 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS); 678 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS); 679 680 TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max); 681 682 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 683 SYSCTL_NODE(_net_inet_ip_fw, OID_AUTO, stats, CTLFLAG_RW, 0, 684 "Firewall statistics"); 685 686 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, 687 &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw"); 688 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW, 689 &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I", 690 "Rule number autincrement step"); 691 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, 692 &fw_one_pass, 0, 693 "Only do a single pass through ipfw when using dummynet(4)"); 694 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 695 &fw_debug, 0, "Enable printing of debug ip_fw statements"); 696 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, 697 &fw_verbose, 0, "Log matches to ipfw rules"); 698 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 699 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); 700 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, table_max, CTLFLAG_RD, 701 &ipfw_table_max, 0, "Max # of tables"); 702 703 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS); 704 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS); 705 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS); 706 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS); 707 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS); 708 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS); 709 710 /* 711 * Timeouts for various events in handing states. 712 * 713 * NOTE: 714 * 1 == 0~1 second. 715 * 2 == 1~2 second(s). 716 * 717 * We use 2 seconds for FIN lifetime, so that the states will not be 718 * ripped prematurely. 719 */ 720 static uint32_t dyn_ack_lifetime = 300; 721 static uint32_t dyn_syn_lifetime = 20; 722 static uint32_t dyn_finwait_lifetime = 20; 723 static uint32_t dyn_fin_lifetime = 2; 724 static uint32_t dyn_rst_lifetime = 2; 725 static uint32_t dyn_udp_lifetime = 10; 726 static uint32_t dyn_short_lifetime = 5; /* used by tracks too */ 727 728 /* 729 * Keepalives are sent if dyn_keepalive is set. They are sent every 730 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 731 * seconds of lifetime of a rule. 732 */ 733 static uint32_t dyn_keepalive_interval = 20; 734 static uint32_t dyn_keepalive_period = 5; 735 static uint32_t dyn_keepalive = 1; /* do send keepalives */ 736 737 static struct ipfw_global ipfw_gd; 738 static int ipfw_state_loosecnt_updthr; 739 static int ipfw_state_max = 4096; /* max # of states */ 740 static int ipfw_track_max = 4096; /* max # of tracks */ 741 742 static int ipfw_state_headroom; /* setup at module load time */ 743 static int ipfw_state_reap_min = 8; 744 static int ipfw_state_expire_max = 32; 745 static int ipfw_state_scan_max = 256; 746 static int ipfw_keepalive_max = 8; 747 static int ipfw_track_reap_max = 4; 748 static int ipfw_track_expire_max = 16; 749 static int ipfw_track_scan_max = 128; 750 751 static eventhandler_tag ipfw_ifaddr_event; 752 753 /* Compat */ 754 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count, 755 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_dyncnt, "I", 756 "Number of states and tracks"); 757 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, 758 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_dynmax, "I", 759 "Max number of states and tracks"); 760 761 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_cnt, 762 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_statecnt, "I", 763 "Number of states"); 764 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_max, 765 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_statemax, "I", 766 "Max number of states"); 767 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, state_headroom, CTLFLAG_RW, 768 &ipfw_state_headroom, 0, "headroom for state reap"); 769 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_cnt, CTLFLAG_RD, 770 &ipfw_gd.ipfw_trkcnt_cnt, 0, "Number of tracks"); 771 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_max, CTLFLAG_RW, 772 &ipfw_track_max, 0, "Max number of tracks"); 773 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, 774 &static_count, 0, "Number of static rules"); 775 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, 776 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); 777 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, 778 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); 779 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, 780 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); 781 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_finwait_lifetime, CTLFLAG_RW, 782 &dyn_finwait_lifetime, 0, "Lifetime of dyn. rules for fin wait"); 783 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, 784 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); 785 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, 786 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); 787 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, 788 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); 789 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, 790 &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); 791 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_scan_max, 792 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_scan_max, 0, ipfw_sysctl_scancnt, 793 "I", "# of states to scan for each expire iteration"); 794 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_expire_max, 795 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_expire_max, 0, ipfw_sysctl_scancnt, 796 "I", "# of states to expire for each expire iteration"); 797 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, keepalive_max, 798 CTLTYPE_INT | CTLFLAG_RW, &ipfw_keepalive_max, 0, ipfw_sysctl_scancnt, 799 "I", "# of states to expire for each expire iteration"); 800 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_reap_min, 801 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_reap_min, 0, ipfw_sysctl_scancnt, 802 "I", "# of states to reap for state shortage"); 803 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_scan_max, 804 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_scan_max, 0, ipfw_sysctl_scancnt, 805 "I", "# of tracks to scan for each expire iteration"); 806 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_expire_max, 807 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_expire_max, 0, ipfw_sysctl_scancnt, 808 "I", "# of tracks to expire for each expire iteration"); 809 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_reap_max, 810 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_reap_max, 0, ipfw_sysctl_scancnt, 811 "I", "# of tracks to reap for track shortage"); 812 813 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reap, 814 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 815 __offsetof(struct ipfw_context, ipfw_sts_reap), ipfw_sysctl_stat, 816 "LU", "# of state reaps due to states shortage"); 817 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reapfailed, 818 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 819 __offsetof(struct ipfw_context, ipfw_sts_reapfailed), ipfw_sysctl_stat, 820 "LU", "# of state reap failure"); 821 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_overflow, 822 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 823 __offsetof(struct ipfw_context, ipfw_sts_overflow), ipfw_sysctl_stat, 824 "LU", "# of state overflow"); 825 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_nomem, 826 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 827 __offsetof(struct ipfw_context, ipfw_sts_nomem), ipfw_sysctl_stat, 828 "LU", "# of state allocation failure"); 829 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_tcprecycled, 830 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 831 __offsetof(struct ipfw_context, ipfw_sts_tcprecycled), ipfw_sysctl_stat, 832 "LU", "# of state deleted due to fast TCP port recycling"); 833 834 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_nomem, 835 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 836 __offsetof(struct ipfw_context, ipfw_tks_nomem), ipfw_sysctl_stat, 837 "LU", "# of track allocation failure"); 838 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reap, 839 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 840 __offsetof(struct ipfw_context, ipfw_tks_reap), ipfw_sysctl_stat, 841 "LU", "# of track reap due to tracks shortage"); 842 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reapfailed, 843 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 844 __offsetof(struct ipfw_context, ipfw_tks_reapfailed), ipfw_sysctl_stat, 845 "LU", "# of track reap failure"); 846 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_overflow, 847 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 848 __offsetof(struct ipfw_context, ipfw_tks_overflow), ipfw_sysctl_stat, 849 "LU", "# of track overflow"); 850 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_cntnomem, 851 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 852 __offsetof(struct ipfw_context, ipfw_tks_cntnomem), ipfw_sysctl_stat, 853 "LU", "# of track counter allocation failure"); 854 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, frags, 855 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 856 __offsetof(struct ipfw_context, ipfw_frags), ipfw_sysctl_stat, 857 "LU", "# of IP fragements defraged"); 858 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defraged, 859 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 860 __offsetof(struct ipfw_context, ipfw_defraged), ipfw_sysctl_stat, 861 "LU", "# of IP packets after defrag"); 862 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defrag_remote, 863 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 864 __offsetof(struct ipfw_context, ipfw_defrag_remote), ipfw_sysctl_stat, 865 "LU", "# of IP packets after defrag dispatched to remote cpus"); 866 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlated, 867 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 868 __offsetof(struct ipfw_context, ipfw_xlated), ipfw_sysctl_stat, 869 "LU", "# address/port translations"); 870 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_split, 871 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 872 __offsetof(struct ipfw_context, ipfw_xlate_split), ipfw_sysctl_stat, 873 "LU", "# address/port translations split between different cpus"); 874 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_conflicts, 875 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 876 __offsetof(struct ipfw_context, ipfw_xlate_conflicts), ipfw_sysctl_stat, 877 "LU", "# address/port translations conflicts on remote cpu"); 878 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_cresolved, 879 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 880 __offsetof(struct ipfw_context, ipfw_xlate_cresolved), ipfw_sysctl_stat, 881 "LU", "# address/port translations conflicts resolved on remote cpu"); 882 883 static int ipfw_state_cmp(struct ipfw_state *, 884 struct ipfw_state *); 885 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt *, 886 struct ipfw_trkcnt *); 887 static int ipfw_track_cmp(struct ipfw_track *, 888 struct ipfw_track *); 889 890 RB_PROTOTYPE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp); 891 RB_GENERATE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp); 892 893 RB_PROTOTYPE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp); 894 RB_GENERATE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp); 895 896 RB_PROTOTYPE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp); 897 RB_GENERATE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp); 898 899 static int ipfw_chk(struct ip_fw_args *); 900 static void ipfw_track_expire_ipifunc(void *); 901 static void ipfw_state_expire_ipifunc(void *); 902 static void ipfw_keepalive(void *); 903 static int ipfw_state_expire_start(struct ipfw_context *, 904 int, int); 905 static void ipfw_crossref_timeo(void *); 906 static void ipfw_state_remove(struct ipfw_context *, 907 struct ipfw_state *); 908 static void ipfw_xlat_reap_timeo(void *); 909 static void ipfw_defrag_redispatch(struct mbuf *, int, 910 struct ip_fw *); 911 912 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token) 913 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token) 914 #define IPFW_TRKCNT_TOKINIT \ 915 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt"); 916 917 static void 918 sa_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 919 const struct sockaddr *netmask) 920 { 921 const u_char *cp1 = (const u_char *)src; 922 u_char *cp2 = (u_char *)dst; 923 const u_char *cp3 = (const u_char *)netmask; 924 u_char *cplim = cp2 + *cp3; 925 u_char *cplim2 = cp2 + *cp1; 926 927 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 928 cp3 += 2; 929 if (cplim > cplim2) 930 cplim = cplim2; 931 while (cp2 < cplim) 932 *cp2++ = *cp1++ & *cp3++; 933 if (cp2 < cplim2) 934 bzero(cp2, cplim2 - cp2); 935 } 936 937 static __inline uint16_t 938 pfil_cksum_fixup(uint16_t cksum, uint16_t old, uint16_t new, uint8_t udp) 939 { 940 uint32_t l; 941 942 if (udp && !cksum) 943 return (0x0000); 944 l = cksum + old - new; 945 l = (l >> 16) + (l & 65535); 946 l = l & 65535; 947 if (udp && !l) 948 return (0xFFFF); 949 return (l); 950 } 951 952 static __inline void 953 ipfw_key_build(struct ipfw_key *key, in_addr_t saddr, uint16_t sport, 954 in_addr_t daddr, uint16_t dport, uint8_t proto) 955 { 956 957 key->proto = proto; 958 key->swap = 0; 959 960 if (saddr < daddr) { 961 key->addr_u.addrs.addr1 = daddr; 962 key->addr_u.addrs.addr2 = saddr; 963 key->swap |= IPFW_KEY_SWAP_ADDRS; 964 } else { 965 key->addr_u.addrs.addr1 = saddr; 966 key->addr_u.addrs.addr2 = daddr; 967 } 968 969 if (sport < dport) { 970 key->port_u.ports.port1 = dport; 971 key->port_u.ports.port2 = sport; 972 key->swap |= IPFW_KEY_SWAP_PORTS; 973 } else { 974 key->port_u.ports.port1 = sport; 975 key->port_u.ports.port2 = dport; 976 } 977 978 if (sport == dport && (key->swap & IPFW_KEY_SWAP_ADDRS)) 979 key->swap |= IPFW_KEY_SWAP_PORTS; 980 if (saddr == daddr && (key->swap & IPFW_KEY_SWAP_PORTS)) 981 key->swap |= IPFW_KEY_SWAP_ADDRS; 982 } 983 984 static __inline void 985 ipfw_key_4tuple(const struct ipfw_key *key, in_addr_t *saddr, uint16_t *sport, 986 in_addr_t *daddr, uint16_t *dport) 987 { 988 989 if (key->swap & IPFW_KEY_SWAP_ADDRS) { 990 *saddr = key->addr_u.addrs.addr2; 991 *daddr = key->addr_u.addrs.addr1; 992 } else { 993 *saddr = key->addr_u.addrs.addr1; 994 *daddr = key->addr_u.addrs.addr2; 995 } 996 997 if (key->swap & IPFW_KEY_SWAP_PORTS) { 998 *sport = key->port_u.ports.port2; 999 *dport = key->port_u.ports.port1; 1000 } else { 1001 *sport = key->port_u.ports.port1; 1002 *dport = key->port_u.ports.port2; 1003 } 1004 } 1005 1006 static int 1007 ipfw_state_cmp(struct ipfw_state *s1, struct ipfw_state *s2) 1008 { 1009 1010 if (s1->st_proto > s2->st_proto) 1011 return (1); 1012 if (s1->st_proto < s2->st_proto) 1013 return (-1); 1014 1015 if (s1->st_addrs > s2->st_addrs) 1016 return (1); 1017 if (s1->st_addrs < s2->st_addrs) 1018 return (-1); 1019 1020 if (s1->st_ports > s2->st_ports) 1021 return (1); 1022 if (s1->st_ports < s2->st_ports) 1023 return (-1); 1024 1025 if (s1->st_swap == s2->st_swap || 1026 (s1->st_swap ^ s2->st_swap) == IPFW_KEY_SWAP_ALL) 1027 return (0); 1028 1029 if (s1->st_swap > s2->st_swap) 1030 return (1); 1031 else 1032 return (-1); 1033 } 1034 1035 static int 1036 ipfw_trkcnt_cmp(struct ipfw_trkcnt *t1, struct ipfw_trkcnt *t2) 1037 { 1038 1039 if (t1->tc_proto > t2->tc_proto) 1040 return (1); 1041 if (t1->tc_proto < t2->tc_proto) 1042 return (-1); 1043 1044 if (t1->tc_addrs > t2->tc_addrs) 1045 return (1); 1046 if (t1->tc_addrs < t2->tc_addrs) 1047 return (-1); 1048 1049 if (t1->tc_ports > t2->tc_ports) 1050 return (1); 1051 if (t1->tc_ports < t2->tc_ports) 1052 return (-1); 1053 1054 if (t1->tc_ruleid > t2->tc_ruleid) 1055 return (1); 1056 if (t1->tc_ruleid < t2->tc_ruleid) 1057 return (-1); 1058 1059 return (0); 1060 } 1061 1062 static int 1063 ipfw_track_cmp(struct ipfw_track *t1, struct ipfw_track *t2) 1064 { 1065 1066 if (t1->t_proto > t2->t_proto) 1067 return (1); 1068 if (t1->t_proto < t2->t_proto) 1069 return (-1); 1070 1071 if (t1->t_addrs > t2->t_addrs) 1072 return (1); 1073 if (t1->t_addrs < t2->t_addrs) 1074 return (-1); 1075 1076 if (t1->t_ports > t2->t_ports) 1077 return (1); 1078 if (t1->t_ports < t2->t_ports) 1079 return (-1); 1080 1081 if ((uintptr_t)t1->t_rule > (uintptr_t)t2->t_rule) 1082 return (1); 1083 if ((uintptr_t)t1->t_rule < (uintptr_t)t2->t_rule) 1084 return (-1); 1085 1086 return (0); 1087 } 1088 1089 static __inline struct ipfw_state * 1090 ipfw_state_link(struct ipfw_context *ctx, struct ipfw_state *s) 1091 { 1092 struct ipfw_state *dup; 1093 1094 KASSERT((s->st_flags & IPFW_STATE_F_LINKED) == 0, 1095 ("state %p was linked", s)); 1096 dup = RB_INSERT(ipfw_state_tree, &ctx->ipfw_state_tree, s); 1097 if (dup == NULL) { 1098 TAILQ_INSERT_TAIL(&ctx->ipfw_state_list, s, st_link); 1099 s->st_flags |= IPFW_STATE_F_LINKED; 1100 } 1101 return (dup); 1102 } 1103 1104 static __inline void 1105 ipfw_state_unlink(struct ipfw_context *ctx, struct ipfw_state *s) 1106 { 1107 1108 KASSERT(s->st_flags & IPFW_STATE_F_LINKED, 1109 ("state %p was not linked", s)); 1110 RB_REMOVE(ipfw_state_tree, &ctx->ipfw_state_tree, s); 1111 TAILQ_REMOVE(&ctx->ipfw_state_list, s, st_link); 1112 s->st_flags &= ~IPFW_STATE_F_LINKED; 1113 } 1114 1115 static void 1116 ipfw_state_max_set(int state_max) 1117 { 1118 1119 ipfw_state_max = state_max; 1120 /* Allow 5% states over-allocation. */ 1121 ipfw_state_loosecnt_updthr = (state_max / 20) / netisr_ncpus; 1122 } 1123 1124 static __inline int 1125 ipfw_state_cntcoll(void) 1126 { 1127 int cpu, state_cnt = 0; 1128 1129 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 1130 state_cnt += ipfw_ctx[cpu]->ipfw_state_cnt; 1131 return (state_cnt); 1132 } 1133 1134 static __inline int 1135 ipfw_state_cntsync(void) 1136 { 1137 int state_cnt; 1138 1139 state_cnt = ipfw_state_cntcoll(); 1140 ipfw_gd.ipfw_state_loosecnt = state_cnt; 1141 return (state_cnt); 1142 } 1143 1144 static __inline int 1145 ipfw_free_rule(struct ip_fw *rule) 1146 { 1147 KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d", mycpuid)); 1148 KASSERT(rule->refcnt > 0, ("invalid refcnt %u", rule->refcnt)); 1149 rule->refcnt--; 1150 if (rule->refcnt == 0) { 1151 if (rule->cross_rules != NULL) 1152 kfree(rule->cross_rules, M_IPFW); 1153 kfree(rule, M_IPFW); 1154 return 1; 1155 } 1156 return 0; 1157 } 1158 1159 static void 1160 ipfw_unref_rule(void *priv) 1161 { 1162 ipfw_free_rule(priv); 1163 #ifdef KLD_MODULE 1164 KASSERT(ipfw_gd.ipfw_refcnt > 0, 1165 ("invalid ipfw_refcnt %d", ipfw_gd.ipfw_refcnt)); 1166 atomic_subtract_int(&ipfw_gd.ipfw_refcnt, 1); 1167 #endif 1168 } 1169 1170 static __inline void 1171 ipfw_ref_rule(struct ip_fw *rule) 1172 { 1173 KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d", mycpuid)); 1174 #ifdef KLD_MODULE 1175 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1); 1176 #endif 1177 rule->refcnt++; 1178 } 1179 1180 /* 1181 * This macro maps an ip pointer into a layer3 header pointer of type T 1182 */ 1183 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl)) 1184 1185 static __inline int 1186 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) 1187 { 1188 int type = L3HDR(struct icmp,ip)->icmp_type; 1189 int idx_max = F_LEN(&cmd->o) - F_INSN_SIZE(ipfw_insn); 1190 int idx = type / 32; 1191 1192 if (idx >= idx_max) 1193 return (0); 1194 return (cmd->d[idx] & (1 << (type % 32))); 1195 } 1196 1197 static __inline int 1198 icmpcode_match(struct ip *ip, ipfw_insn_u32 *cmd) 1199 { 1200 int code = L3HDR(struct icmp,ip)->icmp_code; 1201 int idx_max = F_LEN(&cmd->o) - F_INSN_SIZE(ipfw_insn); 1202 int idx = code / 32; 1203 1204 if (idx >= idx_max) 1205 return (0); 1206 return (cmd->d[idx] & (1 << (code % 32))); 1207 } 1208 1209 #define TT ((1 << ICMP_ECHO) | \ 1210 (1 << ICMP_ROUTERSOLICIT) | \ 1211 (1 << ICMP_TSTAMP) | \ 1212 (1 << ICMP_IREQ) | \ 1213 (1 << ICMP_MASKREQ)) 1214 1215 static int 1216 is_icmp_query(struct ip *ip) 1217 { 1218 int type = L3HDR(struct icmp, ip)->icmp_type; 1219 1220 return (type < 32 && (TT & (1 << type))); 1221 } 1222 1223 #undef TT 1224 1225 /* 1226 * The following checks use two arrays of 8 or 16 bits to store the 1227 * bits that we want set or clear, respectively. They are in the 1228 * low and high half of cmd->arg1 or cmd->d[0]. 1229 * 1230 * We scan options and store the bits we find set. We succeed if 1231 * 1232 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 1233 * 1234 * The code is sometimes optimized not to store additional variables. 1235 */ 1236 static int 1237 flags_match(ipfw_insn *cmd, uint8_t bits) 1238 { 1239 u_char want_clear; 1240 bits = ~bits; 1241 1242 if (((cmd->arg1 & 0xff) & bits) != 0) 1243 return 0; /* some bits we want set were clear */ 1244 1245 want_clear = (cmd->arg1 >> 8) & 0xff; 1246 if ((want_clear & bits) != want_clear) 1247 return 0; /* some bits we want clear were set */ 1248 return 1; 1249 } 1250 1251 static int 1252 ipopts_match(struct ip *ip, ipfw_insn *cmd) 1253 { 1254 int optlen, bits = 0; 1255 u_char *cp = (u_char *)(ip + 1); 1256 int x = (ip->ip_hl << 2) - sizeof(struct ip); 1257 1258 for (; x > 0; x -= optlen, cp += optlen) { 1259 int opt = cp[IPOPT_OPTVAL]; 1260 1261 if (opt == IPOPT_EOL) 1262 break; 1263 1264 if (opt == IPOPT_NOP) { 1265 optlen = 1; 1266 } else { 1267 optlen = cp[IPOPT_OLEN]; 1268 if (optlen <= 0 || optlen > x) 1269 return 0; /* invalid or truncated */ 1270 } 1271 1272 switch (opt) { 1273 case IPOPT_LSRR: 1274 bits |= IP_FW_IPOPT_LSRR; 1275 break; 1276 1277 case IPOPT_SSRR: 1278 bits |= IP_FW_IPOPT_SSRR; 1279 break; 1280 1281 case IPOPT_RR: 1282 bits |= IP_FW_IPOPT_RR; 1283 break; 1284 1285 case IPOPT_TS: 1286 bits |= IP_FW_IPOPT_TS; 1287 break; 1288 1289 default: 1290 break; 1291 } 1292 } 1293 return (flags_match(cmd, bits)); 1294 } 1295 1296 static int 1297 tcpopts_match(struct ip *ip, ipfw_insn *cmd) 1298 { 1299 int optlen, bits = 0; 1300 struct tcphdr *tcp = L3HDR(struct tcphdr,ip); 1301 u_char *cp = (u_char *)(tcp + 1); 1302 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 1303 1304 for (; x > 0; x -= optlen, cp += optlen) { 1305 int opt = cp[0]; 1306 1307 if (opt == TCPOPT_EOL) 1308 break; 1309 1310 if (opt == TCPOPT_NOP) { 1311 optlen = 1; 1312 } else { 1313 optlen = cp[1]; 1314 if (optlen <= 0) 1315 break; 1316 } 1317 1318 switch (opt) { 1319 case TCPOPT_MAXSEG: 1320 bits |= IP_FW_TCPOPT_MSS; 1321 break; 1322 1323 case TCPOPT_WINDOW: 1324 bits |= IP_FW_TCPOPT_WINDOW; 1325 break; 1326 1327 case TCPOPT_SACK_PERMITTED: 1328 case TCPOPT_SACK: 1329 bits |= IP_FW_TCPOPT_SACK; 1330 break; 1331 1332 case TCPOPT_TIMESTAMP: 1333 bits |= IP_FW_TCPOPT_TS; 1334 break; 1335 1336 case TCPOPT_CC: 1337 case TCPOPT_CCNEW: 1338 case TCPOPT_CCECHO: 1339 bits |= IP_FW_TCPOPT_CC; 1340 break; 1341 1342 default: 1343 break; 1344 } 1345 } 1346 return (flags_match(cmd, bits)); 1347 } 1348 1349 static int 1350 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) 1351 { 1352 if (ifp == NULL) /* no iface with this packet, match fails */ 1353 return 0; 1354 1355 /* Check by name or by IP address */ 1356 if (cmd->name[0] != '\0') { /* match by name */ 1357 /* Check name */ 1358 if (cmd->p.glob) { 1359 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0) 1360 return(1); 1361 } else { 1362 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 1363 return(1); 1364 } 1365 } else { 1366 struct ifaddr_container *ifac; 1367 1368 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1369 struct ifaddr *ia = ifac->ifa; 1370 1371 if (ia->ifa_addr == NULL) 1372 continue; 1373 if (ia->ifa_addr->sa_family != AF_INET) 1374 continue; 1375 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 1376 (ia->ifa_addr))->sin_addr.s_addr) 1377 return(1); /* match */ 1378 } 1379 } 1380 return(0); /* no match, fail ... */ 1381 } 1382 1383 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 1384 1385 /* 1386 * We enter here when we have a rule with O_LOG. 1387 * XXX this function alone takes about 2Kbytes of code! 1388 */ 1389 static void 1390 ipfw_log(struct ipfw_context *ctx, struct ip_fw *f, u_int hlen, 1391 struct ether_header *eh, struct mbuf *m, struct ifnet *oif) 1392 { 1393 char *action; 1394 int limit_reached = 0; 1395 char action2[40], proto[48], fragment[28], abuf[INET_ADDRSTRLEN]; 1396 1397 fragment[0] = '\0'; 1398 proto[0] = '\0'; 1399 1400 if (f == NULL) { /* bogus pkt */ 1401 if (verbose_limit != 0 && 1402 ctx->ipfw_norule_counter >= verbose_limit) 1403 return; 1404 ctx->ipfw_norule_counter++; 1405 if (ctx->ipfw_norule_counter == verbose_limit) 1406 limit_reached = verbose_limit; 1407 action = "Refuse"; 1408 } else { /* O_LOG is the first action, find the real one */ 1409 ipfw_insn *cmd = ACTION_PTR(f); 1410 ipfw_insn_log *l = (ipfw_insn_log *)cmd; 1411 1412 if (l->max_log != 0 && l->log_left == 0) 1413 return; 1414 l->log_left--; 1415 if (l->log_left == 0) 1416 limit_reached = l->max_log; 1417 cmd += F_LEN(cmd); /* point to first action */ 1418 if (cmd->opcode == O_PROB) 1419 cmd += F_LEN(cmd); 1420 1421 action = action2; 1422 switch (cmd->opcode) { 1423 case O_DENY: 1424 action = "Deny"; 1425 break; 1426 1427 case O_REJECT: 1428 if (cmd->arg1==ICMP_REJECT_RST) { 1429 action = "Reset"; 1430 } else if (cmd->arg1==ICMP_UNREACH_HOST) { 1431 action = "Reject"; 1432 } else { 1433 ksnprintf(SNPARGS(action2, 0), "Unreach %d", 1434 cmd->arg1); 1435 } 1436 break; 1437 1438 case O_ACCEPT: 1439 action = "Accept"; 1440 break; 1441 1442 case O_COUNT: 1443 action = "Count"; 1444 break; 1445 1446 case O_DIVERT: 1447 ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1); 1448 break; 1449 1450 case O_TEE: 1451 ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1); 1452 break; 1453 1454 case O_SKIPTO: 1455 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1); 1456 break; 1457 1458 case O_PIPE: 1459 ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1); 1460 break; 1461 1462 case O_QUEUE: 1463 ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1); 1464 break; 1465 1466 case O_FORWARD_IP: 1467 { 1468 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; 1469 int len; 1470 1471 len = ksnprintf(SNPARGS(action2, 0), 1472 "Forward to %s", 1473 kinet_ntoa(sa->sa.sin_addr, abuf)); 1474 if (sa->sa.sin_port) { 1475 ksnprintf(SNPARGS(action2, len), ":%d", 1476 sa->sa.sin_port); 1477 } 1478 } 1479 break; 1480 1481 default: 1482 action = "UNKNOWN"; 1483 break; 1484 } 1485 } 1486 1487 if (hlen == 0) { /* non-ip */ 1488 ksnprintf(SNPARGS(proto, 0), "MAC"); 1489 } else { 1490 struct ip *ip = mtod(m, struct ip *); 1491 /* these three are all aliases to the same thing */ 1492 struct icmp *const icmp = L3HDR(struct icmp, ip); 1493 struct tcphdr *const tcp = (struct tcphdr *)icmp; 1494 struct udphdr *const udp = (struct udphdr *)icmp; 1495 1496 int ip_off, offset, ip_len; 1497 int len; 1498 1499 if (eh != NULL) { /* layer 2 packets are as on the wire */ 1500 ip_off = ntohs(ip->ip_off); 1501 ip_len = ntohs(ip->ip_len); 1502 } else { 1503 ip_off = ip->ip_off; 1504 ip_len = ip->ip_len; 1505 } 1506 offset = ip_off & IP_OFFMASK; 1507 switch (ip->ip_p) { 1508 case IPPROTO_TCP: 1509 len = ksnprintf(SNPARGS(proto, 0), "TCP %s", 1510 kinet_ntoa(ip->ip_src, abuf)); 1511 if (offset == 0) { 1512 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 1513 ntohs(tcp->th_sport), 1514 kinet_ntoa(ip->ip_dst, abuf), 1515 ntohs(tcp->th_dport)); 1516 } else { 1517 ksnprintf(SNPARGS(proto, len), " %s", 1518 kinet_ntoa(ip->ip_dst, abuf)); 1519 } 1520 break; 1521 1522 case IPPROTO_UDP: 1523 len = ksnprintf(SNPARGS(proto, 0), "UDP %s", 1524 kinet_ntoa(ip->ip_src, abuf)); 1525 if (offset == 0) { 1526 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 1527 ntohs(udp->uh_sport), 1528 kinet_ntoa(ip->ip_dst, abuf), 1529 ntohs(udp->uh_dport)); 1530 } else { 1531 ksnprintf(SNPARGS(proto, len), " %s", 1532 kinet_ntoa(ip->ip_dst, abuf)); 1533 } 1534 break; 1535 1536 case IPPROTO_ICMP: 1537 if (offset == 0) { 1538 len = ksnprintf(SNPARGS(proto, 0), 1539 "ICMP:%u.%u ", 1540 icmp->icmp_type, 1541 icmp->icmp_code); 1542 } else { 1543 len = ksnprintf(SNPARGS(proto, 0), "ICMP "); 1544 } 1545 len += ksnprintf(SNPARGS(proto, len), "%s", 1546 kinet_ntoa(ip->ip_src, abuf)); 1547 ksnprintf(SNPARGS(proto, len), " %s", 1548 kinet_ntoa(ip->ip_dst, abuf)); 1549 break; 1550 1551 default: 1552 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, 1553 kinet_ntoa(ip->ip_src, abuf)); 1554 ksnprintf(SNPARGS(proto, len), " %s", 1555 kinet_ntoa(ip->ip_dst, abuf)); 1556 break; 1557 } 1558 1559 if (ip_off & (IP_MF | IP_OFFMASK)) { 1560 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", 1561 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), 1562 offset << 3, (ip_off & IP_MF) ? "+" : ""); 1563 } 1564 } 1565 1566 if (oif || m->m_pkthdr.rcvif) { 1567 log(LOG_SECURITY | LOG_INFO, 1568 "ipfw: %d %s %s %s via %s%s\n", 1569 f ? f->rulenum : -1, 1570 action, proto, oif ? "out" : "in", 1571 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, 1572 fragment); 1573 } else { 1574 log(LOG_SECURITY | LOG_INFO, 1575 "ipfw: %d %s %s [no if info]%s\n", 1576 f ? f->rulenum : -1, 1577 action, proto, fragment); 1578 } 1579 1580 if (limit_reached) { 1581 log(LOG_SECURITY | LOG_NOTICE, 1582 "ipfw: limit %d reached on entry %d\n", 1583 limit_reached, f ? f->rulenum : -1); 1584 } 1585 } 1586 1587 #undef SNPARGS 1588 1589 static void 1590 ipfw_xlat_reap(struct ipfw_xlat *x, struct ipfw_xlat *slave_x) 1591 { 1592 struct ip_fw *rule = slave_x->xlat_rule; 1593 1594 KKASSERT(rule->cpuid == mycpuid); 1595 1596 /* No more cross references; free this pair now. */ 1597 kfree(x, M_IPFW); 1598 kfree(slave_x, M_IPFW); 1599 1600 /* See the comment in ipfw_ip_xlate_dispatch(). */ 1601 rule->cross_refs--; 1602 } 1603 1604 static void 1605 ipfw_xlat_reap_dispatch(netmsg_t nm) 1606 { 1607 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1608 struct ipfw_state *s, *ns; 1609 1610 ASSERT_NETISR_NCPUS(mycpuid); 1611 1612 crit_enter(); 1613 /* Reply ASAP. */ 1614 netisr_replymsg(&ctx->ipfw_xlatreap_nm, 0); 1615 crit_exit(); 1616 1617 /* TODO: limit scanning depth */ 1618 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_xlatreap, st_link, ns) { 1619 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 1620 struct ipfw_xlat *slave_x = x->xlat_pair; 1621 uint64_t crefs; 1622 1623 crefs = slave_x->xlat_crefs + x->xlat_crefs; 1624 if (crefs == 0) { 1625 TAILQ_REMOVE(&ctx->ipfw_xlatreap, &x->xlat_st, st_link); 1626 ipfw_xlat_reap(x, slave_x); 1627 } 1628 } 1629 if (!TAILQ_EMPTY(&ctx->ipfw_xlatreap)) { 1630 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo, 1631 &ctx->ipfw_xlatreap_nm); 1632 } 1633 } 1634 1635 static void 1636 ipfw_xlat_reap_timeo(void *xnm) 1637 { 1638 struct netmsg_base *nm = xnm; 1639 1640 KKASSERT(mycpuid < netisr_ncpus); 1641 1642 crit_enter(); 1643 if (nm->lmsg.ms_flags & MSGF_DONE) 1644 netisr_sendmsg_oncpu(nm); 1645 crit_exit(); 1646 } 1647 1648 static void 1649 ipfw_xlat_free_dispatch(netmsg_t nmsg) 1650 { 1651 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1652 struct ipfw_xlat *x = nmsg->lmsg.u.ms_resultp; 1653 struct ipfw_xlat *slave_x = x->xlat_pair; 1654 uint64_t crefs; 1655 1656 ASSERT_NETISR_NCPUS(mycpuid); 1657 1658 KKASSERT(slave_x != NULL); 1659 KKASSERT(slave_x->xlat_invalid && x->xlat_invalid); 1660 1661 KASSERT((x->xlat_flags & IPFW_STATE_F_LINKED) == 0, 1662 ("master xlat is still linked")); 1663 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED) 1664 ipfw_state_unlink(ctx, &slave_x->xlat_st); 1665 1666 /* See the comment in ipfw_ip_xlate_dispatch(). */ 1667 slave_x->xlat_crefs--; 1668 1669 crefs = slave_x->xlat_crefs + x->xlat_crefs; 1670 if (crefs == 0) { 1671 ipfw_xlat_reap(x, slave_x); 1672 return; 1673 } 1674 1675 if (TAILQ_EMPTY(&ctx->ipfw_xlatreap)) { 1676 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo, 1677 &ctx->ipfw_xlatreap_nm); 1678 } 1679 1680 /* 1681 * This pair is still referenced; defer its destruction. 1682 * YYY reuse st_link. 1683 */ 1684 TAILQ_INSERT_TAIL(&ctx->ipfw_xlatreap, &x->xlat_st, st_link); 1685 } 1686 1687 static __inline void 1688 ipfw_xlat_invalidate(struct ipfw_xlat *x) 1689 { 1690 1691 x->xlat_invalid = 1; 1692 x->xlat_pair->xlat_invalid = 1; 1693 } 1694 1695 static void 1696 ipfw_state_del(struct ipfw_context *ctx, struct ipfw_state *s) 1697 { 1698 struct ipfw_xlat *x, *slave_x; 1699 struct netmsg_base *nm; 1700 1701 KASSERT(s->st_type == O_KEEP_STATE || s->st_type == O_LIMIT || 1702 IPFW_ISXLAT(s->st_type), ("invalid state type %u", s->st_type)); 1703 KASSERT((s->st_flags & IPFW_STATE_F_XLATSLAVE) == 0, 1704 ("delete slave xlat")); 1705 1706 KASSERT(ctx->ipfw_state_cnt > 0, 1707 ("invalid state count %d", ctx->ipfw_state_cnt)); 1708 ctx->ipfw_state_cnt--; 1709 if (ctx->ipfw_state_loosecnt > 0) 1710 ctx->ipfw_state_loosecnt--; 1711 1712 /* 1713 * Unhook this state. 1714 */ 1715 if (s->st_track != NULL) { 1716 struct ipfw_track *t = s->st_track; 1717 1718 KASSERT(!LIST_EMPTY(&t->t_state_list), 1719 ("track state list is empty")); 1720 LIST_REMOVE(s, st_trklink); 1721 1722 KASSERT(*t->t_count > 0, 1723 ("invalid track count %d", *t->t_count)); 1724 atomic_subtract_int(t->t_count, 1); 1725 } 1726 ipfw_state_unlink(ctx, s); 1727 1728 /* 1729 * Free this state. Xlat requires special processing, 1730 * since xlat are paired state and they could be on 1731 * different cpus. 1732 */ 1733 1734 if (!IPFW_ISXLAT(s->st_type)) { 1735 /* Not xlat; free now. */ 1736 kfree(s, M_IPFW); 1737 /* Done! */ 1738 return; 1739 } 1740 x = (struct ipfw_xlat *)s; 1741 1742 if (x->xlat_pair == NULL) { 1743 /* Not setup yet; free now. */ 1744 kfree(x, M_IPFW); 1745 /* Done! */ 1746 return; 1747 } 1748 slave_x = x->xlat_pair; 1749 KKASSERT(slave_x->xlat_flags & IPFW_STATE_F_XLATSLAVE); 1750 1751 if (x->xlat_pcpu == mycpuid) { 1752 /* 1753 * Paired states are on the same cpu; delete this 1754 * pair now. 1755 */ 1756 KKASSERT(x->xlat_crefs == 0); 1757 KKASSERT(slave_x->xlat_crefs == 0); 1758 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED) 1759 ipfw_state_unlink(ctx, &slave_x->xlat_st); 1760 kfree(x, M_IPFW); 1761 kfree(slave_x, M_IPFW); 1762 return; 1763 } 1764 1765 /* 1766 * Free the paired states on the cpu owning the slave xlat. 1767 */ 1768 1769 /* 1770 * Mark the state pair invalid; completely deleting them 1771 * may take some time. 1772 */ 1773 ipfw_xlat_invalidate(x); 1774 1775 nm = &x->xlat_freenm; 1776 netmsg_init(nm, NULL, &netisr_apanic_rport, MSGF_PRIORITY, 1777 ipfw_xlat_free_dispatch); 1778 nm->lmsg.u.ms_resultp = x; 1779 1780 /* See the comment in ipfw_xlate_redispatch(). */ 1781 x->xlat_rule->cross_refs++; 1782 x->xlat_crefs++; 1783 1784 netisr_sendmsg(nm, x->xlat_pcpu); 1785 } 1786 1787 static void 1788 ipfw_state_remove(struct ipfw_context *ctx, struct ipfw_state *s) 1789 { 1790 1791 if (s->st_flags & IPFW_STATE_F_XLATSLAVE) { 1792 KKASSERT(IPFW_ISXLAT(s->st_type)); 1793 ipfw_xlat_invalidate((struct ipfw_xlat *)s); 1794 ipfw_state_unlink(ctx, s); 1795 return; 1796 } 1797 ipfw_state_del(ctx, s); 1798 } 1799 1800 static int 1801 ipfw_state_reap(struct ipfw_context *ctx, int reap_max) 1802 { 1803 struct ipfw_state *s, *anchor; 1804 int expired; 1805 1806 if (reap_max < ipfw_state_reap_min) 1807 reap_max = ipfw_state_reap_min; 1808 1809 if ((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0) { 1810 /* 1811 * Kick start state expiring. Ignore scan limit, 1812 * we are short of states. 1813 */ 1814 ctx->ipfw_flags |= IPFW_FLAG_STATEREAP; 1815 expired = ipfw_state_expire_start(ctx, INT_MAX, reap_max); 1816 ctx->ipfw_flags &= ~IPFW_FLAG_STATEREAP; 1817 return (expired); 1818 } 1819 1820 /* 1821 * States are being expired. 1822 */ 1823 1824 if (ctx->ipfw_state_cnt == 0) 1825 return (0); 1826 1827 expired = 0; 1828 anchor = &ctx->ipfw_stateexp_anch; 1829 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 1830 /* 1831 * Ignore scan limit; we are short of states. 1832 */ 1833 1834 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1835 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 1836 1837 if (IPFW_STATE_SCANSKIP(s)) 1838 continue; 1839 1840 if (IPFW_STATE_ISDEAD(s) || IPFW_STATE_TCPCLOSED(s)) { 1841 ipfw_state_del(ctx, s); 1842 if (++expired >= reap_max) 1843 break; 1844 if ((expired & 0xff) == 0 && 1845 ipfw_state_cntcoll() + ipfw_state_headroom <= 1846 ipfw_state_max) 1847 break; 1848 } 1849 } 1850 /* 1851 * NOTE: 1852 * Leave the anchor on the list, even if the end of the list has 1853 * been reached. ipfw_state_expire_more_dispatch() will handle 1854 * the removal. 1855 */ 1856 return (expired); 1857 } 1858 1859 static void 1860 ipfw_state_flush(struct ipfw_context *ctx, const struct ip_fw *rule) 1861 { 1862 struct ipfw_state *s, *sn; 1863 1864 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_state_list, st_link, sn) { 1865 if (IPFW_STATE_SCANSKIP(s)) 1866 continue; 1867 if (rule != NULL && s->st_rule != rule) 1868 continue; 1869 ipfw_state_del(ctx, s); 1870 } 1871 } 1872 1873 static void 1874 ipfw_state_expire_done(struct ipfw_context *ctx) 1875 { 1876 1877 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1878 ("stateexp is not in progress")); 1879 ctx->ipfw_flags &= ~IPFW_FLAG_STATEEXP; 1880 callout_reset(&ctx->ipfw_stateto_ch, hz, 1881 ipfw_state_expire_ipifunc, NULL); 1882 } 1883 1884 static void 1885 ipfw_state_expire_more(struct ipfw_context *ctx) 1886 { 1887 struct netmsg_base *nm = &ctx->ipfw_stateexp_more; 1888 1889 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1890 ("stateexp is not in progress")); 1891 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 1892 ("stateexp more did not finish")); 1893 netisr_sendmsg_oncpu(nm); 1894 } 1895 1896 static int 1897 ipfw_state_expire_loop(struct ipfw_context *ctx, struct ipfw_state *anchor, 1898 int scan_max, int expire_max) 1899 { 1900 struct ipfw_state *s; 1901 int scanned = 0, expired = 0; 1902 1903 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1904 ("stateexp is not in progress")); 1905 1906 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 1907 if (scanned++ >= scan_max) { 1908 ipfw_state_expire_more(ctx); 1909 return (expired); 1910 } 1911 1912 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1913 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 1914 1915 if (IPFW_STATE_SCANSKIP(s)) 1916 continue; 1917 1918 if (IPFW_STATE_ISDEAD(s) || 1919 ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) && 1920 IPFW_STATE_TCPCLOSED(s))) { 1921 ipfw_state_del(ctx, s); 1922 if (++expired >= expire_max) { 1923 ipfw_state_expire_more(ctx); 1924 return (expired); 1925 } 1926 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) && 1927 (expired & 0xff) == 0 && 1928 ipfw_state_cntcoll() + ipfw_state_headroom <= 1929 ipfw_state_max) { 1930 ipfw_state_expire_more(ctx); 1931 return (expired); 1932 } 1933 } 1934 } 1935 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1936 ipfw_state_expire_done(ctx); 1937 return (expired); 1938 } 1939 1940 static void 1941 ipfw_state_expire_more_dispatch(netmsg_t nm) 1942 { 1943 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1944 struct ipfw_state *anchor; 1945 1946 ASSERT_NETISR_NCPUS(mycpuid); 1947 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1948 ("statexp is not in progress")); 1949 1950 /* Reply ASAP */ 1951 netisr_replymsg(&nm->base, 0); 1952 1953 anchor = &ctx->ipfw_stateexp_anch; 1954 if (ctx->ipfw_state_cnt == 0) { 1955 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1956 ipfw_state_expire_done(ctx); 1957 return; 1958 } 1959 ipfw_state_expire_loop(ctx, anchor, 1960 ipfw_state_scan_max, ipfw_state_expire_max); 1961 } 1962 1963 static int 1964 ipfw_state_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max) 1965 { 1966 struct ipfw_state *anchor; 1967 1968 KASSERT((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0, 1969 ("stateexp is in progress")); 1970 ctx->ipfw_flags |= IPFW_FLAG_STATEEXP; 1971 1972 if (ctx->ipfw_state_cnt == 0) { 1973 ipfw_state_expire_done(ctx); 1974 return (0); 1975 } 1976 1977 /* 1978 * Do not expire more than once per second, it is useless. 1979 */ 1980 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) == 0 && 1981 ctx->ipfw_state_lastexp == time_uptime) { 1982 ipfw_state_expire_done(ctx); 1983 return (0); 1984 } 1985 ctx->ipfw_state_lastexp = time_uptime; 1986 1987 anchor = &ctx->ipfw_stateexp_anch; 1988 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link); 1989 return (ipfw_state_expire_loop(ctx, anchor, scan_max, expire_max)); 1990 } 1991 1992 static void 1993 ipfw_state_expire_dispatch(netmsg_t nm) 1994 { 1995 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1996 1997 ASSERT_NETISR_NCPUS(mycpuid); 1998 1999 /* Reply ASAP */ 2000 crit_enter(); 2001 netisr_replymsg(&nm->base, 0); 2002 crit_exit(); 2003 2004 if (ctx->ipfw_flags & IPFW_FLAG_STATEEXP) { 2005 /* Running; done. */ 2006 return; 2007 } 2008 ipfw_state_expire_start(ctx, 2009 ipfw_state_scan_max, ipfw_state_expire_max); 2010 } 2011 2012 static void 2013 ipfw_state_expire_ipifunc(void *dummy __unused) 2014 { 2015 struct netmsg_base *msg; 2016 2017 KKASSERT(mycpuid < netisr_ncpus); 2018 msg = &ipfw_ctx[mycpuid]->ipfw_stateexp_nm; 2019 2020 crit_enter(); 2021 if (msg->lmsg.ms_flags & MSGF_DONE) 2022 netisr_sendmsg_oncpu(msg); 2023 crit_exit(); 2024 } 2025 2026 static boolean_t 2027 ipfw_state_update_tcp(struct ipfw_state *s, int dir, const struct tcphdr *tcp) 2028 { 2029 uint32_t seq = ntohl(tcp->th_seq); 2030 uint32_t ack = ntohl(tcp->th_ack); 2031 2032 if (tcp->th_flags & TH_RST) 2033 return (TRUE); 2034 2035 if (dir == MATCH_FORWARD) { 2036 if ((s->st_flags & IPFW_STATE_F_SEQFWD) == 0) { 2037 s->st_flags |= IPFW_STATE_F_SEQFWD; 2038 s->st_seq_fwd = seq; 2039 } else if (SEQ_GEQ(seq, s->st_seq_fwd)) { 2040 s->st_seq_fwd = seq; 2041 } else { 2042 /* Out-of-sequence; done. */ 2043 return (FALSE); 2044 } 2045 if (tcp->th_flags & TH_ACK) { 2046 if ((s->st_flags & IPFW_STATE_F_ACKFWD) == 0) { 2047 s->st_flags |= IPFW_STATE_F_ACKFWD; 2048 s->st_ack_fwd = ack; 2049 } else if (SEQ_GEQ(ack, s->st_ack_fwd)) { 2050 s->st_ack_fwd = ack; 2051 } else { 2052 /* Out-of-sequence; done. */ 2053 return (FALSE); 2054 } 2055 2056 if ((s->st_state & ((TH_FIN | TH_ACK) << 8)) == 2057 (TH_FIN << 8) && s->st_ack_fwd == s->st_seq_rev + 1) 2058 s->st_state |= (TH_ACK << 8); 2059 } 2060 } else { 2061 if ((s->st_flags & IPFW_STATE_F_SEQREV) == 0) { 2062 s->st_flags |= IPFW_STATE_F_SEQREV; 2063 s->st_seq_rev = seq; 2064 } else if (SEQ_GEQ(seq, s->st_seq_rev)) { 2065 s->st_seq_rev = seq; 2066 } else { 2067 /* Out-of-sequence; done. */ 2068 return (FALSE); 2069 } 2070 if (tcp->th_flags & TH_ACK) { 2071 if ((s->st_flags & IPFW_STATE_F_ACKREV) == 0) { 2072 s->st_flags |= IPFW_STATE_F_ACKREV; 2073 s->st_ack_rev= ack; 2074 } else if (SEQ_GEQ(ack, s->st_ack_rev)) { 2075 s->st_ack_rev = ack; 2076 } else { 2077 /* Out-of-sequence; done. */ 2078 return (FALSE); 2079 } 2080 2081 if ((s->st_state & (TH_FIN | TH_ACK)) == TH_FIN && 2082 s->st_ack_rev == s->st_seq_fwd + 1) 2083 s->st_state |= TH_ACK; 2084 } 2085 } 2086 return (TRUE); 2087 } 2088 2089 static void 2090 ipfw_state_update(const struct ipfw_flow_id *pkt, int dir, 2091 const struct tcphdr *tcp, struct ipfw_state *s) 2092 { 2093 2094 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 2095 u_char flags = pkt->flags & IPFW_STATE_TCPFLAGS; 2096 2097 if (tcp != NULL && !ipfw_state_update_tcp(s, dir, tcp)) 2098 return; 2099 2100 s->st_state |= (dir == MATCH_FORWARD) ? flags : (flags << 8); 2101 switch (s->st_state & IPFW_STATE_TCPSTATES) { 2102 case TH_SYN: /* opening */ 2103 s->st_expire = time_uptime + dyn_syn_lifetime; 2104 break; 2105 2106 case BOTH_SYN: /* move to established */ 2107 case BOTH_SYN | TH_FIN: /* one side tries to close */ 2108 case BOTH_SYN | (TH_FIN << 8): 2109 s->st_expire = time_uptime + dyn_ack_lifetime; 2110 break; 2111 2112 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 2113 if ((s->st_state & BOTH_FINACK) == BOTH_FINACK) { 2114 /* And both FINs were ACKed. */ 2115 s->st_expire = time_uptime + dyn_fin_lifetime; 2116 } else { 2117 s->st_expire = time_uptime + 2118 dyn_finwait_lifetime; 2119 } 2120 break; 2121 2122 default: 2123 #if 0 2124 /* 2125 * reset or some invalid combination, but can also 2126 * occur if we use keep-state the wrong way. 2127 */ 2128 if ((s->st_state & ((TH_RST << 8) | TH_RST)) == 0) 2129 kprintf("invalid state: 0x%x\n", s->st_state); 2130 #endif 2131 s->st_expire = time_uptime + dyn_rst_lifetime; 2132 break; 2133 } 2134 } else if (pkt->proto == IPPROTO_UDP) { 2135 s->st_expire = time_uptime + dyn_udp_lifetime; 2136 } else { 2137 /* other protocols */ 2138 s->st_expire = time_uptime + dyn_short_lifetime; 2139 } 2140 } 2141 2142 /* 2143 * Lookup a state. 2144 */ 2145 static struct ipfw_state * 2146 ipfw_state_lookup(struct ipfw_context *ctx, const struct ipfw_flow_id *pkt, 2147 int *match_direction, const struct tcphdr *tcp) 2148 { 2149 struct ipfw_state *key, *s; 2150 int dir = MATCH_NONE; 2151 2152 key = &ctx->ipfw_state_tmpkey; 2153 ipfw_key_build(&key->st_key, pkt->src_ip, pkt->src_port, 2154 pkt->dst_ip, pkt->dst_port, pkt->proto); 2155 s = RB_FIND(ipfw_state_tree, &ctx->ipfw_state_tree, key); 2156 if (s == NULL) 2157 goto done; /* not found. */ 2158 if (IPFW_STATE_ISDEAD(s)) { 2159 ipfw_state_remove(ctx, s); 2160 s = NULL; 2161 goto done; 2162 } 2163 if ((pkt->flags & TH_SYN) && IPFW_STATE_TCPCLOSED(s)) { 2164 /* TCP ports recycling is too fast. */ 2165 ctx->ipfw_sts_tcprecycled++; 2166 ipfw_state_remove(ctx, s); 2167 s = NULL; 2168 goto done; 2169 } 2170 2171 if (s->st_swap == key->st_swap) { 2172 dir = MATCH_FORWARD; 2173 } else { 2174 KASSERT((s->st_swap & key->st_swap) == 0, 2175 ("found mismatch state")); 2176 dir = MATCH_REVERSE; 2177 } 2178 2179 /* Update this state. */ 2180 ipfw_state_update(pkt, dir, tcp, s); 2181 2182 if (s->st_track != NULL) { 2183 /* This track has been used. */ 2184 s->st_track->t_expire = time_uptime + dyn_short_lifetime; 2185 } 2186 done: 2187 if (match_direction) 2188 *match_direction = dir; 2189 return (s); 2190 } 2191 2192 static struct ipfw_state * 2193 ipfw_state_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2194 uint16_t type, struct ip_fw *rule, const struct tcphdr *tcp) 2195 { 2196 struct ipfw_state *s; 2197 size_t sz; 2198 2199 KASSERT(type == O_KEEP_STATE || type == O_LIMIT || IPFW_ISXLAT(type), 2200 ("invalid state type %u", type)); 2201 2202 sz = sizeof(struct ipfw_state); 2203 if (IPFW_ISXLAT(type)) 2204 sz = sizeof(struct ipfw_xlat); 2205 2206 s = kmalloc(sz, M_IPFW, M_INTWAIT | M_NULLOK | M_ZERO); 2207 if (s == NULL) { 2208 ctx->ipfw_sts_nomem++; 2209 return (NULL); 2210 } 2211 2212 ipfw_key_build(&s->st_key, id->src_ip, id->src_port, 2213 id->dst_ip, id->dst_port, id->proto); 2214 2215 s->st_rule = rule; 2216 s->st_type = type; 2217 if (IPFW_ISXLAT(type)) { 2218 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 2219 2220 x->xlat_dir = MATCH_NONE; 2221 x->xlat_pcpu = -1; 2222 } 2223 2224 /* 2225 * Update this state: 2226 * Set st_expire and st_state. 2227 */ 2228 ipfw_state_update(id, MATCH_FORWARD, tcp, s); 2229 2230 return (s); 2231 } 2232 2233 static struct ipfw_state * 2234 ipfw_state_add(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2235 uint16_t type, struct ip_fw *rule, struct ipfw_track *t, 2236 const struct tcphdr *tcp) 2237 { 2238 struct ipfw_state *s, *dup; 2239 2240 s = ipfw_state_alloc(ctx, id, type, rule, tcp); 2241 if (s == NULL) 2242 return (NULL); 2243 2244 ctx->ipfw_state_cnt++; 2245 ctx->ipfw_state_loosecnt++; 2246 if (ctx->ipfw_state_loosecnt >= ipfw_state_loosecnt_updthr) { 2247 ipfw_gd.ipfw_state_loosecnt += ctx->ipfw_state_loosecnt; 2248 ctx->ipfw_state_loosecnt = 0; 2249 } 2250 2251 dup = ipfw_state_link(ctx, s); 2252 if (dup != NULL) 2253 panic("ipfw: %u state exists %p", type, dup); 2254 2255 if (t != NULL) { 2256 /* Keep the track referenced. */ 2257 LIST_INSERT_HEAD(&t->t_state_list, s, st_trklink); 2258 s->st_track = t; 2259 } 2260 return (s); 2261 } 2262 2263 static boolean_t 2264 ipfw_track_free(struct ipfw_context *ctx, struct ipfw_track *t) 2265 { 2266 struct ipfw_trkcnt *trk; 2267 boolean_t trk_freed = FALSE; 2268 2269 KASSERT(t->t_count != NULL, ("track anchor")); 2270 KASSERT(LIST_EMPTY(&t->t_state_list), 2271 ("invalid track is still referenced")); 2272 2273 trk = t->t_trkcnt; 2274 KASSERT(trk != NULL, ("track has no trkcnt")); 2275 2276 RB_REMOVE(ipfw_track_tree, &ctx->ipfw_track_tree, t); 2277 TAILQ_REMOVE(&ctx->ipfw_track_list, t, t_link); 2278 kfree(t, M_IPFW); 2279 2280 /* 2281 * fdrop() style reference counting. 2282 * See kern/kern_descrip.c fdrop(). 2283 */ 2284 for (;;) { 2285 int refs = trk->tc_refs; 2286 2287 cpu_ccfence(); 2288 KASSERT(refs > 0, ("invalid trkcnt refs %d", refs)); 2289 if (refs == 1) { 2290 IPFW_TRKCNT_TOKGET; 2291 if (atomic_cmpset_int(&trk->tc_refs, refs, 0)) { 2292 KASSERT(trk->tc_count == 0, 2293 ("%d states reference this trkcnt", 2294 trk->tc_count)); 2295 RB_REMOVE(ipfw_trkcnt_tree, 2296 &ipfw_gd.ipfw_trkcnt_tree, trk); 2297 2298 KASSERT(ipfw_gd.ipfw_trkcnt_cnt > 0, 2299 ("invalid trkcnt cnt %d", 2300 ipfw_gd.ipfw_trkcnt_cnt)); 2301 ipfw_gd.ipfw_trkcnt_cnt--; 2302 IPFW_TRKCNT_TOKREL; 2303 2304 if (ctx->ipfw_trkcnt_spare == NULL) 2305 ctx->ipfw_trkcnt_spare = trk; 2306 else 2307 kfree(trk, M_IPFW); 2308 trk_freed = TRUE; 2309 break; /* done! */ 2310 } 2311 IPFW_TRKCNT_TOKREL; 2312 /* retry */ 2313 } else if (atomic_cmpset_int(&trk->tc_refs, refs, refs - 1)) { 2314 break; /* done! */ 2315 } 2316 /* retry */ 2317 } 2318 return (trk_freed); 2319 } 2320 2321 static void 2322 ipfw_track_flush(struct ipfw_context *ctx, struct ip_fw *rule) 2323 { 2324 struct ipfw_track *t, *tn; 2325 2326 TAILQ_FOREACH_MUTABLE(t, &ctx->ipfw_track_list, t_link, tn) { 2327 if (t->t_count == NULL) /* anchor */ 2328 continue; 2329 if (rule != NULL && t->t_rule != rule) 2330 continue; 2331 ipfw_track_free(ctx, t); 2332 } 2333 } 2334 2335 static boolean_t 2336 ipfw_track_state_expire(struct ipfw_context *ctx, struct ipfw_track *t, 2337 boolean_t reap) 2338 { 2339 struct ipfw_state *s, *sn; 2340 boolean_t ret = FALSE; 2341 2342 KASSERT(t->t_count != NULL, ("track anchor")); 2343 2344 if (LIST_EMPTY(&t->t_state_list)) 2345 return (FALSE); 2346 2347 /* 2348 * Do not expire more than once per second, it is useless. 2349 */ 2350 if (t->t_lastexp == time_uptime) 2351 return (FALSE); 2352 t->t_lastexp = time_uptime; 2353 2354 LIST_FOREACH_MUTABLE(s, &t->t_state_list, st_trklink, sn) { 2355 if (IPFW_STATE_ISDEAD(s) || (reap && IPFW_STATE_TCPCLOSED(s))) { 2356 KASSERT(s->st_track == t, 2357 ("state track %p does not match %p", 2358 s->st_track, t)); 2359 ipfw_state_del(ctx, s); 2360 ret = TRUE; 2361 } 2362 } 2363 return (ret); 2364 } 2365 2366 static __inline struct ipfw_trkcnt * 2367 ipfw_trkcnt_alloc(struct ipfw_context *ctx) 2368 { 2369 struct ipfw_trkcnt *trk; 2370 2371 if (ctx->ipfw_trkcnt_spare != NULL) { 2372 trk = ctx->ipfw_trkcnt_spare; 2373 ctx->ipfw_trkcnt_spare = NULL; 2374 } else { 2375 trk = kmalloc(sizeof(*trk), M_IPFW, 2376 M_INTWAIT | M_NULLOK | M_CACHEALIGN); 2377 } 2378 return (trk); 2379 } 2380 2381 static void 2382 ipfw_track_expire_done(struct ipfw_context *ctx) 2383 { 2384 2385 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2386 ("trackexp is not in progress")); 2387 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKEXP; 2388 callout_reset(&ctx->ipfw_trackto_ch, hz, 2389 ipfw_track_expire_ipifunc, NULL); 2390 } 2391 2392 static void 2393 ipfw_track_expire_more(struct ipfw_context *ctx) 2394 { 2395 struct netmsg_base *nm = &ctx->ipfw_trackexp_more; 2396 2397 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2398 ("trackexp is not in progress")); 2399 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 2400 ("trackexp more did not finish")); 2401 netisr_sendmsg_oncpu(nm); 2402 } 2403 2404 static int 2405 ipfw_track_expire_loop(struct ipfw_context *ctx, struct ipfw_track *anchor, 2406 int scan_max, int expire_max) 2407 { 2408 struct ipfw_track *t; 2409 int scanned = 0, expired = 0; 2410 boolean_t reap = FALSE; 2411 2412 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2413 ("trackexp is not in progress")); 2414 2415 if (ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) 2416 reap = TRUE; 2417 2418 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) { 2419 if (scanned++ >= scan_max) { 2420 ipfw_track_expire_more(ctx); 2421 return (expired); 2422 } 2423 2424 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2425 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link); 2426 2427 if (t->t_count == NULL) /* anchor */ 2428 continue; 2429 2430 ipfw_track_state_expire(ctx, t, reap); 2431 if (!LIST_EMPTY(&t->t_state_list)) { 2432 /* There are states referencing this track. */ 2433 continue; 2434 } 2435 2436 if (TIME_LEQ(t->t_expire, time_uptime) || reap) { 2437 /* Expired. */ 2438 if (ipfw_track_free(ctx, t)) { 2439 if (++expired >= expire_max) { 2440 ipfw_track_expire_more(ctx); 2441 return (expired); 2442 } 2443 } 2444 } 2445 } 2446 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2447 ipfw_track_expire_done(ctx); 2448 return (expired); 2449 } 2450 2451 static int 2452 ipfw_track_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max) 2453 { 2454 struct ipfw_track *anchor; 2455 2456 KASSERT((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0, 2457 ("trackexp is in progress")); 2458 ctx->ipfw_flags |= IPFW_FLAG_TRACKEXP; 2459 2460 if (RB_EMPTY(&ctx->ipfw_track_tree)) { 2461 ipfw_track_expire_done(ctx); 2462 return (0); 2463 } 2464 2465 /* 2466 * Do not expire more than once per second, it is useless. 2467 */ 2468 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) == 0 && 2469 ctx->ipfw_track_lastexp == time_uptime) { 2470 ipfw_track_expire_done(ctx); 2471 return (0); 2472 } 2473 ctx->ipfw_track_lastexp = time_uptime; 2474 2475 anchor = &ctx->ipfw_trackexp_anch; 2476 TAILQ_INSERT_HEAD(&ctx->ipfw_track_list, anchor, t_link); 2477 return (ipfw_track_expire_loop(ctx, anchor, scan_max, expire_max)); 2478 } 2479 2480 static void 2481 ipfw_track_expire_more_dispatch(netmsg_t nm) 2482 { 2483 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2484 struct ipfw_track *anchor; 2485 2486 ASSERT_NETISR_NCPUS(mycpuid); 2487 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2488 ("trackexp is not in progress")); 2489 2490 /* Reply ASAP */ 2491 netisr_replymsg(&nm->base, 0); 2492 2493 anchor = &ctx->ipfw_trackexp_anch; 2494 if (RB_EMPTY(&ctx->ipfw_track_tree)) { 2495 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2496 ipfw_track_expire_done(ctx); 2497 return; 2498 } 2499 ipfw_track_expire_loop(ctx, anchor, 2500 ipfw_track_scan_max, ipfw_track_expire_max); 2501 } 2502 2503 static void 2504 ipfw_track_expire_dispatch(netmsg_t nm) 2505 { 2506 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2507 2508 ASSERT_NETISR_NCPUS(mycpuid); 2509 2510 /* Reply ASAP */ 2511 crit_enter(); 2512 netisr_replymsg(&nm->base, 0); 2513 crit_exit(); 2514 2515 if (ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) { 2516 /* Running; done. */ 2517 return; 2518 } 2519 ipfw_track_expire_start(ctx, 2520 ipfw_track_scan_max, ipfw_track_expire_max); 2521 } 2522 2523 static void 2524 ipfw_track_expire_ipifunc(void *dummy __unused) 2525 { 2526 struct netmsg_base *msg; 2527 2528 KKASSERT(mycpuid < netisr_ncpus); 2529 msg = &ipfw_ctx[mycpuid]->ipfw_trackexp_nm; 2530 2531 crit_enter(); 2532 if (msg->lmsg.ms_flags & MSGF_DONE) 2533 netisr_sendmsg_oncpu(msg); 2534 crit_exit(); 2535 } 2536 2537 static int 2538 ipfw_track_reap(struct ipfw_context *ctx) 2539 { 2540 struct ipfw_track *t, *anchor; 2541 int expired; 2542 2543 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0) { 2544 /* 2545 * Kick start track expiring. Ignore scan limit, 2546 * we are short of tracks. 2547 */ 2548 ctx->ipfw_flags |= IPFW_FLAG_TRACKREAP; 2549 expired = ipfw_track_expire_start(ctx, INT_MAX, 2550 ipfw_track_reap_max); 2551 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKREAP; 2552 return (expired); 2553 } 2554 2555 /* 2556 * Tracks are being expired. 2557 */ 2558 2559 if (RB_EMPTY(&ctx->ipfw_track_tree)) 2560 return (0); 2561 2562 expired = 0; 2563 anchor = &ctx->ipfw_trackexp_anch; 2564 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) { 2565 /* 2566 * Ignore scan limit; we are short of tracks. 2567 */ 2568 2569 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2570 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link); 2571 2572 if (t->t_count == NULL) /* anchor */ 2573 continue; 2574 2575 ipfw_track_state_expire(ctx, t, TRUE); 2576 if (!LIST_EMPTY(&t->t_state_list)) { 2577 /* There are states referencing this track. */ 2578 continue; 2579 } 2580 2581 if (ipfw_track_free(ctx, t)) { 2582 if (++expired >= ipfw_track_reap_max) { 2583 ipfw_track_expire_more(ctx); 2584 break; 2585 } 2586 } 2587 } 2588 /* 2589 * NOTE: 2590 * Leave the anchor on the list, even if the end of the list has 2591 * been reached. ipfw_track_expire_more_dispatch() will handle 2592 * the removal. 2593 */ 2594 return (expired); 2595 } 2596 2597 static struct ipfw_track * 2598 ipfw_track_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2599 uint16_t limit_mask, struct ip_fw *rule) 2600 { 2601 struct ipfw_track *key, *t, *dup; 2602 struct ipfw_trkcnt *trk, *ret; 2603 boolean_t do_expire = FALSE; 2604 2605 KASSERT(rule->track_ruleid != 0, 2606 ("rule %u has no track ruleid", rule->rulenum)); 2607 2608 key = &ctx->ipfw_track_tmpkey; 2609 key->t_proto = id->proto; 2610 key->t_addrs = 0; 2611 key->t_ports = 0; 2612 key->t_rule = rule; 2613 if (limit_mask & DYN_SRC_ADDR) 2614 key->t_saddr = id->src_ip; 2615 if (limit_mask & DYN_DST_ADDR) 2616 key->t_daddr = id->dst_ip; 2617 if (limit_mask & DYN_SRC_PORT) 2618 key->t_sport = id->src_port; 2619 if (limit_mask & DYN_DST_PORT) 2620 key->t_dport = id->dst_port; 2621 2622 t = RB_FIND(ipfw_track_tree, &ctx->ipfw_track_tree, key); 2623 if (t != NULL) 2624 goto done; 2625 2626 t = kmalloc(sizeof(*t), M_IPFW, M_INTWAIT | M_NULLOK); 2627 if (t == NULL) { 2628 ctx->ipfw_tks_nomem++; 2629 return (NULL); 2630 } 2631 2632 t->t_key = key->t_key; 2633 t->t_rule = rule; 2634 t->t_lastexp = 0; 2635 LIST_INIT(&t->t_state_list); 2636 2637 if (ipfw_gd.ipfw_trkcnt_cnt >= ipfw_track_max) { 2638 time_t globexp, uptime; 2639 2640 trk = NULL; 2641 do_expire = TRUE; 2642 2643 /* 2644 * Do not expire globally more than once per second, 2645 * it is useless. 2646 */ 2647 uptime = time_uptime; 2648 globexp = ipfw_gd.ipfw_track_globexp; 2649 if (globexp != uptime && 2650 atomic_cmpset_long(&ipfw_gd.ipfw_track_globexp, 2651 globexp, uptime)) { 2652 int cpu; 2653 2654 /* Expire tracks on other CPUs. */ 2655 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 2656 if (cpu == mycpuid) 2657 continue; 2658 lwkt_send_ipiq(globaldata_find(cpu), 2659 ipfw_track_expire_ipifunc, NULL); 2660 } 2661 } 2662 } else { 2663 trk = ipfw_trkcnt_alloc(ctx); 2664 } 2665 if (trk == NULL) { 2666 struct ipfw_trkcnt *tkey; 2667 2668 tkey = &ctx->ipfw_trkcnt_tmpkey; 2669 key = NULL; /* tkey overlaps key */ 2670 2671 tkey->tc_key = t->t_key; 2672 tkey->tc_ruleid = rule->track_ruleid; 2673 2674 IPFW_TRKCNT_TOKGET; 2675 trk = RB_FIND(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree, 2676 tkey); 2677 if (trk == NULL) { 2678 IPFW_TRKCNT_TOKREL; 2679 if (do_expire) { 2680 ctx->ipfw_tks_reap++; 2681 if (ipfw_track_reap(ctx) > 0) { 2682 if (ipfw_gd.ipfw_trkcnt_cnt < 2683 ipfw_track_max) { 2684 trk = ipfw_trkcnt_alloc(ctx); 2685 if (trk != NULL) 2686 goto install; 2687 ctx->ipfw_tks_cntnomem++; 2688 } else { 2689 ctx->ipfw_tks_overflow++; 2690 } 2691 } else { 2692 ctx->ipfw_tks_reapfailed++; 2693 ctx->ipfw_tks_overflow++; 2694 } 2695 } else { 2696 ctx->ipfw_tks_cntnomem++; 2697 } 2698 kfree(t, M_IPFW); 2699 return (NULL); 2700 } 2701 KASSERT(trk->tc_refs > 0 && trk->tc_refs < netisr_ncpus, 2702 ("invalid trkcnt refs %d", trk->tc_refs)); 2703 atomic_add_int(&trk->tc_refs, 1); 2704 IPFW_TRKCNT_TOKREL; 2705 } else { 2706 install: 2707 trk->tc_key = t->t_key; 2708 trk->tc_ruleid = rule->track_ruleid; 2709 trk->tc_refs = 0; 2710 trk->tc_count = 0; 2711 trk->tc_expire = 0; 2712 trk->tc_rulenum = rule->rulenum; 2713 2714 IPFW_TRKCNT_TOKGET; 2715 ret = RB_INSERT(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree, 2716 trk); 2717 if (ret != NULL) { 2718 KASSERT(ret->tc_refs > 0 && 2719 ret->tc_refs < netisr_ncpus, 2720 ("invalid trkcnt refs %d", ret->tc_refs)); 2721 KASSERT(ctx->ipfw_trkcnt_spare == NULL, 2722 ("trkcnt spare was installed")); 2723 ctx->ipfw_trkcnt_spare = trk; 2724 trk = ret; 2725 } else { 2726 ipfw_gd.ipfw_trkcnt_cnt++; 2727 } 2728 atomic_add_int(&trk->tc_refs, 1); 2729 IPFW_TRKCNT_TOKREL; 2730 } 2731 t->t_count = &trk->tc_count; 2732 t->t_trkcnt = trk; 2733 2734 dup = RB_INSERT(ipfw_track_tree, &ctx->ipfw_track_tree, t); 2735 if (dup != NULL) 2736 panic("ipfw: track exists"); 2737 TAILQ_INSERT_TAIL(&ctx->ipfw_track_list, t, t_link); 2738 done: 2739 t->t_expire = time_uptime + dyn_short_lifetime; 2740 return (t); 2741 } 2742 2743 /* 2744 * Install state for rule type cmd->o.opcode 2745 * 2746 * Returns NULL if state is not installed because of errors or because 2747 * states limitations are enforced. 2748 */ 2749 static struct ipfw_state * 2750 ipfw_state_install(struct ipfw_context *ctx, struct ip_fw *rule, 2751 ipfw_insn_limit *cmd, struct ip_fw_args *args, const struct tcphdr *tcp) 2752 { 2753 struct ipfw_state *s; 2754 struct ipfw_track *t; 2755 int count, diff; 2756 2757 if (ipfw_gd.ipfw_state_loosecnt >= ipfw_state_max && 2758 (diff = (ipfw_state_cntsync() - ipfw_state_max)) >= 0) { 2759 boolean_t overflow = TRUE; 2760 2761 ctx->ipfw_sts_reap++; 2762 if (ipfw_state_reap(ctx, diff) == 0) 2763 ctx->ipfw_sts_reapfailed++; 2764 if (ipfw_state_cntsync() < ipfw_state_max) 2765 overflow = FALSE; 2766 2767 if (overflow) { 2768 time_t globexp, uptime; 2769 int cpu; 2770 2771 /* 2772 * Do not expire globally more than once per second, 2773 * it is useless. 2774 */ 2775 uptime = time_uptime; 2776 globexp = ipfw_gd.ipfw_state_globexp; 2777 if (globexp == uptime || 2778 !atomic_cmpset_long(&ipfw_gd.ipfw_state_globexp, 2779 globexp, uptime)) { 2780 ctx->ipfw_sts_overflow++; 2781 return (NULL); 2782 } 2783 2784 /* Expire states on other CPUs. */ 2785 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 2786 if (cpu == mycpuid) 2787 continue; 2788 lwkt_send_ipiq(globaldata_find(cpu), 2789 ipfw_state_expire_ipifunc, NULL); 2790 } 2791 ctx->ipfw_sts_overflow++; 2792 return (NULL); 2793 } 2794 } 2795 2796 switch (cmd->o.opcode) { 2797 case O_KEEP_STATE: /* bidir rule */ 2798 case O_REDIRECT: 2799 s = ipfw_state_add(ctx, &args->f_id, cmd->o.opcode, rule, NULL, 2800 tcp); 2801 if (s == NULL) 2802 return (NULL); 2803 break; 2804 2805 case O_LIMIT: /* limit number of sessions */ 2806 t = ipfw_track_alloc(ctx, &args->f_id, cmd->limit_mask, rule); 2807 if (t == NULL) 2808 return (NULL); 2809 2810 if (*t->t_count >= cmd->conn_limit) { 2811 if (!ipfw_track_state_expire(ctx, t, TRUE)) 2812 return (NULL); 2813 } 2814 for (;;) { 2815 count = *t->t_count; 2816 if (count >= cmd->conn_limit) 2817 return (NULL); 2818 if (atomic_cmpset_int(t->t_count, count, count + 1)) 2819 break; 2820 } 2821 2822 s = ipfw_state_add(ctx, &args->f_id, O_LIMIT, rule, t, tcp); 2823 if (s == NULL) { 2824 /* Undo damage. */ 2825 atomic_subtract_int(t->t_count, 1); 2826 return (NULL); 2827 } 2828 break; 2829 2830 default: 2831 panic("unknown state type %u\n", cmd->o.opcode); 2832 } 2833 2834 if (s->st_type == O_REDIRECT) { 2835 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 2836 ipfw_insn_rdr *r = (ipfw_insn_rdr *)cmd; 2837 2838 x->xlat_addr = r->addr.s_addr; 2839 x->xlat_port = r->port; 2840 x->xlat_ifp = args->m->m_pkthdr.rcvif; 2841 x->xlat_dir = MATCH_FORWARD; 2842 KKASSERT(x->xlat_ifp != NULL); 2843 } 2844 return (s); 2845 } 2846 2847 static int 2848 ipfw_table_lookup(struct ipfw_context *ctx, uint16_t tableid, 2849 const struct in_addr *in) 2850 { 2851 struct radix_node_head *rnh; 2852 struct sockaddr_in sin; 2853 struct ipfw_tblent *te; 2854 2855 KASSERT(tableid < ipfw_table_max, ("invalid tableid %u", tableid)); 2856 rnh = ctx->ipfw_tables[tableid]; 2857 if (rnh == NULL) 2858 return (0); /* no match */ 2859 2860 memset(&sin, 0, sizeof(sin)); 2861 sin.sin_family = AF_INET; 2862 sin.sin_len = sizeof(sin); 2863 sin.sin_addr = *in; 2864 2865 te = (struct ipfw_tblent *)rnh->rnh_matchaddr((char *)&sin, rnh); 2866 if (te == NULL) 2867 return (0); /* no match */ 2868 2869 te->te_use++; 2870 te->te_lastuse = time_second; 2871 return (1); /* match */ 2872 } 2873 2874 /* 2875 * Transmit a TCP packet, containing either a RST or a keepalive. 2876 * When flags & TH_RST, we are sending a RST packet, because of a 2877 * "reset" action matched the packet. 2878 * Otherwise we are sending a keepalive, and flags & TH_ 2879 * 2880 * Only {src,dst}_{ip,port} of "id" are used. 2881 */ 2882 static void 2883 send_pkt(const struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags) 2884 { 2885 struct mbuf *m; 2886 struct ip *ip; 2887 struct tcphdr *tcp; 2888 struct route sro; /* fake route */ 2889 2890 MGETHDR(m, M_NOWAIT, MT_HEADER); 2891 if (m == NULL) 2892 return; 2893 m->m_pkthdr.rcvif = NULL; 2894 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); 2895 m->m_data += max_linkhdr; 2896 2897 ip = mtod(m, struct ip *); 2898 bzero(ip, m->m_len); 2899 tcp = (struct tcphdr *)(ip + 1); /* no IP options */ 2900 ip->ip_p = IPPROTO_TCP; 2901 tcp->th_off = 5; 2902 2903 /* 2904 * Assume we are sending a RST (or a keepalive in the reverse 2905 * direction), swap src and destination addresses and ports. 2906 */ 2907 ip->ip_src.s_addr = htonl(id->dst_ip); 2908 ip->ip_dst.s_addr = htonl(id->src_ip); 2909 tcp->th_sport = htons(id->dst_port); 2910 tcp->th_dport = htons(id->src_port); 2911 if (flags & TH_RST) { /* we are sending a RST */ 2912 if (flags & TH_ACK) { 2913 tcp->th_seq = htonl(ack); 2914 tcp->th_ack = htonl(0); 2915 tcp->th_flags = TH_RST; 2916 } else { 2917 if (flags & TH_SYN) 2918 seq++; 2919 tcp->th_seq = htonl(0); 2920 tcp->th_ack = htonl(seq); 2921 tcp->th_flags = TH_RST | TH_ACK; 2922 } 2923 } else { 2924 /* 2925 * We are sending a keepalive. flags & TH_SYN determines 2926 * the direction, forward if set, reverse if clear. 2927 * NOTE: seq and ack are always assumed to be correct 2928 * as set by the caller. This may be confusing... 2929 */ 2930 if (flags & TH_SYN) { 2931 /* 2932 * we have to rewrite the correct addresses! 2933 */ 2934 ip->ip_dst.s_addr = htonl(id->dst_ip); 2935 ip->ip_src.s_addr = htonl(id->src_ip); 2936 tcp->th_dport = htons(id->dst_port); 2937 tcp->th_sport = htons(id->src_port); 2938 } 2939 tcp->th_seq = htonl(seq); 2940 tcp->th_ack = htonl(ack); 2941 tcp->th_flags = TH_ACK; 2942 } 2943 2944 /* 2945 * set ip_len to the payload size so we can compute 2946 * the tcp checksum on the pseudoheader 2947 * XXX check this, could save a couple of words ? 2948 */ 2949 ip->ip_len = htons(sizeof(struct tcphdr)); 2950 tcp->th_sum = in_cksum(m, m->m_pkthdr.len); 2951 2952 /* 2953 * now fill fields left out earlier 2954 */ 2955 ip->ip_ttl = ip_defttl; 2956 ip->ip_len = m->m_pkthdr.len; 2957 2958 bzero(&sro, sizeof(sro)); 2959 ip_rtaddr(ip->ip_dst, &sro); 2960 2961 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED; 2962 ip_output(m, NULL, &sro, 0, NULL, NULL); 2963 if (sro.ro_rt) 2964 RTFREE(sro.ro_rt); 2965 } 2966 2967 /* 2968 * Send a reject message, consuming the mbuf passed as an argument. 2969 */ 2970 static void 2971 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) 2972 { 2973 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 2974 /* We need the IP header in host order for icmp_error(). */ 2975 if (args->eh != NULL) { 2976 struct ip *ip = mtod(args->m, struct ip *); 2977 2978 ip->ip_len = ntohs(ip->ip_len); 2979 ip->ip_off = ntohs(ip->ip_off); 2980 } 2981 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 2982 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { 2983 struct tcphdr *const tcp = 2984 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 2985 2986 if ((tcp->th_flags & TH_RST) == 0) { 2987 send_pkt(&args->f_id, ntohl(tcp->th_seq), 2988 ntohl(tcp->th_ack), tcp->th_flags | TH_RST); 2989 } 2990 m_freem(args->m); 2991 } else { 2992 m_freem(args->m); 2993 } 2994 args->m = NULL; 2995 } 2996 2997 /* 2998 * Given an ip_fw *, lookup_next_rule will return a pointer 2999 * to the next rule, which can be either the jump 3000 * target (for skipto instructions) or the next one in the list (in 3001 * all other cases including a missing jump target). 3002 * The result is also written in the "next_rule" field of the rule. 3003 * Backward jumps are not allowed, so start looking from the next 3004 * rule... 3005 * 3006 * This never returns NULL -- in case we do not have an exact match, 3007 * the next rule is returned. When the ruleset is changed, 3008 * pointers are flushed so we are always correct. 3009 */ 3010 static struct ip_fw * 3011 lookup_next_rule(struct ip_fw *me) 3012 { 3013 struct ip_fw *rule = NULL; 3014 ipfw_insn *cmd; 3015 3016 /* look for action, in case it is a skipto */ 3017 cmd = ACTION_PTR(me); 3018 if (cmd->opcode == O_LOG) 3019 cmd += F_LEN(cmd); 3020 if (cmd->opcode == O_SKIPTO) { 3021 for (rule = me->next; rule; rule = rule->next) { 3022 if (rule->rulenum >= cmd->arg1) 3023 break; 3024 } 3025 } 3026 if (rule == NULL) /* failure or not a skipto */ 3027 rule = me->next; 3028 me->next_rule = rule; 3029 return rule; 3030 } 3031 3032 static int 3033 ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, 3034 enum ipfw_opcodes opcode, uid_t uid) 3035 { 3036 struct in_addr src_ip, dst_ip; 3037 struct inpcbinfo *pi; 3038 boolean_t wildcard; 3039 struct inpcb *pcb; 3040 3041 if (fid->proto == IPPROTO_TCP) { 3042 wildcard = FALSE; 3043 pi = &tcbinfo[mycpuid]; 3044 } else if (fid->proto == IPPROTO_UDP) { 3045 wildcard = TRUE; 3046 pi = &udbinfo[mycpuid]; 3047 } else { 3048 return 0; 3049 } 3050 3051 /* 3052 * Values in 'fid' are in host byte order 3053 */ 3054 dst_ip.s_addr = htonl(fid->dst_ip); 3055 src_ip.s_addr = htonl(fid->src_ip); 3056 if (oif) { 3057 pcb = in_pcblookup_hash(pi, 3058 dst_ip, htons(fid->dst_port), 3059 src_ip, htons(fid->src_port), 3060 wildcard, oif); 3061 } else { 3062 pcb = in_pcblookup_hash(pi, 3063 src_ip, htons(fid->src_port), 3064 dst_ip, htons(fid->dst_port), 3065 wildcard, NULL); 3066 } 3067 if (pcb == NULL || pcb->inp_socket == NULL) 3068 return 0; 3069 3070 if (opcode == O_UID) { 3071 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b)) 3072 return !socheckuid(pcb->inp_socket, uid); 3073 #undef socheckuid 3074 } else { 3075 return groupmember(uid, pcb->inp_socket->so_cred); 3076 } 3077 } 3078 3079 static int 3080 ipfw_match_ifip(ipfw_insn_ifip *cmd, const struct in_addr *ip) 3081 { 3082 3083 if (__predict_false((cmd->o.arg1 & IPFW_IFIP_VALID) == 0)) { 3084 struct ifaddr_container *ifac; 3085 struct ifnet *ifp; 3086 3087 ifp = ifunit_netisr(cmd->ifname); 3088 if (ifp == NULL) 3089 return (0); 3090 3091 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 3092 struct ifaddr *ia = ifac->ifa; 3093 3094 if (ia->ifa_addr == NULL) 3095 continue; 3096 if (ia->ifa_addr->sa_family != AF_INET) 3097 continue; 3098 3099 cmd->mask.s_addr = INADDR_ANY; 3100 if (cmd->o.arg1 & IPFW_IFIP_NET) { 3101 cmd->mask = ((struct sockaddr_in *) 3102 ia->ifa_netmask)->sin_addr; 3103 } 3104 if (cmd->mask.s_addr == INADDR_ANY) 3105 cmd->mask.s_addr = INADDR_BROADCAST; 3106 3107 cmd->addr = 3108 ((struct sockaddr_in *)ia->ifa_addr)->sin_addr; 3109 cmd->addr.s_addr &= cmd->mask.s_addr; 3110 3111 cmd->o.arg1 |= IPFW_IFIP_VALID; 3112 break; 3113 } 3114 if ((cmd->o.arg1 & IPFW_IFIP_VALID) == 0) 3115 return (0); 3116 } 3117 return ((ip->s_addr & cmd->mask.s_addr) == cmd->addr.s_addr); 3118 } 3119 3120 static void 3121 ipfw_xlate(const struct ipfw_xlat *x, struct mbuf *m, 3122 struct in_addr *old_addr, uint16_t *old_port) 3123 { 3124 struct ip *ip = mtod(m, struct ip *); 3125 struct in_addr *addr; 3126 uint16_t *port, *csum, dlen = 0; 3127 uint8_t udp = 0; 3128 boolean_t pseudo = FALSE; 3129 3130 if (x->xlat_flags & IPFW_STATE_F_XLATSRC) { 3131 addr = &ip->ip_src; 3132 switch (ip->ip_p) { 3133 case IPPROTO_TCP: 3134 port = &L3HDR(struct tcphdr, ip)->th_sport; 3135 csum = &L3HDR(struct tcphdr, ip)->th_sum; 3136 break; 3137 case IPPROTO_UDP: 3138 port = &L3HDR(struct udphdr, ip)->uh_sport; 3139 csum = &L3HDR(struct udphdr, ip)->uh_sum; 3140 udp = 1; 3141 break; 3142 default: 3143 panic("ipfw: unsupported src xlate proto %u", ip->ip_p); 3144 } 3145 } else { 3146 addr = &ip->ip_dst; 3147 switch (ip->ip_p) { 3148 case IPPROTO_TCP: 3149 port = &L3HDR(struct tcphdr, ip)->th_dport; 3150 csum = &L3HDR(struct tcphdr, ip)->th_sum; 3151 break; 3152 case IPPROTO_UDP: 3153 port = &L3HDR(struct udphdr, ip)->uh_dport; 3154 csum = &L3HDR(struct udphdr, ip)->uh_sum; 3155 udp = 1; 3156 break; 3157 default: 3158 panic("ipfw: unsupported dst xlate proto %u", ip->ip_p); 3159 } 3160 } 3161 if (old_addr != NULL) 3162 *old_addr = *addr; 3163 if (old_port != NULL) { 3164 if (x->xlat_port != 0) 3165 *old_port = *port; 3166 else 3167 *old_port = 0; 3168 } 3169 3170 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) { 3171 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) 3172 dlen = ip->ip_len - (ip->ip_hl << 2); 3173 pseudo = TRUE; 3174 } 3175 3176 if (!pseudo) { 3177 const uint16_t *oaddr, *naddr; 3178 3179 oaddr = (const uint16_t *)&addr->s_addr; 3180 naddr = (const uint16_t *)&x->xlat_addr; 3181 3182 ip->ip_sum = pfil_cksum_fixup(pfil_cksum_fixup(ip->ip_sum, 3183 oaddr[0], naddr[0], 0), oaddr[1], naddr[1], 0); 3184 *csum = pfil_cksum_fixup(pfil_cksum_fixup(*csum, 3185 oaddr[0], naddr[0], udp), oaddr[1], naddr[1], udp); 3186 } 3187 addr->s_addr = x->xlat_addr; 3188 3189 if (x->xlat_port != 0) { 3190 if (!pseudo) { 3191 *csum = pfil_cksum_fixup(*csum, *port, x->xlat_port, 3192 udp); 3193 } 3194 *port = x->xlat_port; 3195 } 3196 3197 if (pseudo) { 3198 *csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 3199 htons(dlen + ip->ip_p)); 3200 } 3201 } 3202 3203 static void 3204 ipfw_ip_xlate_dispatch(netmsg_t nmsg) 3205 { 3206 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg; 3207 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3208 struct mbuf *m = nm->m; 3209 struct ipfw_xlat *x = nm->arg1; 3210 struct ip_fw *rule = x->xlat_rule; 3211 3212 ASSERT_NETISR_NCPUS(mycpuid); 3213 KASSERT(rule->cpuid == mycpuid, 3214 ("rule does not belong to cpu%d", mycpuid)); 3215 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE, 3216 ("mbuf does not have ipfw continue rule")); 3217 3218 KASSERT(ctx->ipfw_cont_rule == NULL, 3219 ("pending ipfw continue rule")); 3220 KASSERT(ctx->ipfw_cont_xlat == NULL, 3221 ("pending ipfw continue xlat")); 3222 ctx->ipfw_cont_rule = rule; 3223 ctx->ipfw_cont_xlat = x; 3224 3225 if (nm->arg2 == 0) 3226 ip_input(m); 3227 else 3228 ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); 3229 3230 /* May not be cleared, if ipfw was unload/disabled. */ 3231 ctx->ipfw_cont_rule = NULL; 3232 ctx->ipfw_cont_xlat = NULL; 3233 3234 /* 3235 * This state is no longer used; decrement its xlat_crefs, 3236 * so this state can be deleted. 3237 */ 3238 x->xlat_crefs--; 3239 /* 3240 * This rule is no longer used; decrement its cross_refs, 3241 * so this rule can be deleted. 3242 * 3243 * NOTE: 3244 * Decrement cross_refs in the last step of this function, 3245 * so that the module could be unloaded safely. 3246 */ 3247 rule->cross_refs--; 3248 } 3249 3250 static void 3251 ipfw_xlate_redispatch(struct mbuf *m, int cpuid, struct ipfw_xlat *x, 3252 uint32_t flags) 3253 { 3254 struct netmsg_genpkt *nm; 3255 3256 KASSERT(x->xlat_pcpu == cpuid, ("xlat paired cpu%d, target cpu%d", 3257 x->xlat_pcpu, cpuid)); 3258 3259 /* 3260 * Bump cross_refs to prevent this rule and its siblings 3261 * from being deleted, while this mbuf is inflight. The 3262 * cross_refs of the sibling rule on the target cpu will 3263 * be decremented, once this mbuf is going to be filtered 3264 * on the target cpu. 3265 */ 3266 x->xlat_rule->cross_refs++; 3267 /* 3268 * Bump xlat_crefs to prevent this state and its paired 3269 * state from being deleted, while this mbuf is inflight. 3270 * The xlat_crefs of the paired state on the target cpu 3271 * will be decremented, once this mbuf is going to be 3272 * filtered on the target cpu. 3273 */ 3274 x->xlat_crefs++; 3275 3276 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE; 3277 if (flags & IPFW_XLATE_INSERT) 3278 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATINS; 3279 if (flags & IPFW_XLATE_FORWARD) 3280 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATFWD; 3281 3282 if ((flags & IPFW_XLATE_OUTPUT) == 0) { 3283 struct ip *ip = mtod(m, struct ip *); 3284 3285 /* 3286 * NOTE: 3287 * ip_input() expects ip_len/ip_off are in network 3288 * byte order. 3289 */ 3290 ip->ip_len = htons(ip->ip_len); 3291 ip->ip_off = htons(ip->ip_off); 3292 } 3293 3294 nm = &m->m_hdr.mh_genmsg; 3295 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0, 3296 ipfw_ip_xlate_dispatch); 3297 nm->m = m; 3298 nm->arg1 = x->xlat_pair; 3299 nm->arg2 = 0; 3300 if (flags & IPFW_XLATE_OUTPUT) 3301 nm->arg2 = 1; 3302 netisr_sendmsg(&nm->base, cpuid); 3303 } 3304 3305 static struct mbuf * 3306 ipfw_setup_local(struct mbuf *m, const int hlen, struct ip_fw_args *args, 3307 struct ip_fw_local *local, struct ip **ip0) 3308 { 3309 struct ip *ip = mtod(m, struct ip *); 3310 struct tcphdr *tcp; 3311 struct udphdr *udp; 3312 3313 /* 3314 * Collect parameters into local variables for faster matching. 3315 */ 3316 if (hlen == 0) { /* do not grab addresses for non-ip pkts */ 3317 local->proto = args->f_id.proto = 0; /* mark f_id invalid */ 3318 goto done; 3319 } 3320 3321 local->proto = args->f_id.proto = ip->ip_p; 3322 local->src_ip = ip->ip_src; 3323 local->dst_ip = ip->ip_dst; 3324 if (args->eh != NULL) { /* layer 2 packets are as on the wire */ 3325 local->offset = ntohs(ip->ip_off) & IP_OFFMASK; 3326 local->ip_len = ntohs(ip->ip_len); 3327 } else { 3328 local->offset = ip->ip_off & IP_OFFMASK; 3329 local->ip_len = ip->ip_len; 3330 } 3331 3332 #define PULLUP_TO(len) \ 3333 do { \ 3334 if (m->m_len < (len)) { \ 3335 args->m = m = m_pullup(m, (len)); \ 3336 if (m == NULL) { \ 3337 ip = NULL; \ 3338 goto done; \ 3339 } \ 3340 ip = mtod(m, struct ip *); \ 3341 } \ 3342 } while (0) 3343 3344 if (local->offset == 0) { 3345 switch (local->proto) { 3346 case IPPROTO_TCP: 3347 PULLUP_TO(hlen + sizeof(struct tcphdr)); 3348 local->tcp = tcp = L3HDR(struct tcphdr, ip); 3349 local->dst_port = tcp->th_dport; 3350 local->src_port = tcp->th_sport; 3351 args->f_id.flags = tcp->th_flags; 3352 break; 3353 3354 case IPPROTO_UDP: 3355 PULLUP_TO(hlen + sizeof(struct udphdr)); 3356 udp = L3HDR(struct udphdr, ip); 3357 local->dst_port = udp->uh_dport; 3358 local->src_port = udp->uh_sport; 3359 break; 3360 3361 case IPPROTO_ICMP: 3362 PULLUP_TO(hlen + 4); /* type, code and checksum. */ 3363 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; 3364 break; 3365 3366 default: 3367 break; 3368 } 3369 } 3370 3371 #undef PULLUP_TO 3372 3373 args->f_id.src_ip = ntohl(local->src_ip.s_addr); 3374 args->f_id.dst_ip = ntohl(local->dst_ip.s_addr); 3375 args->f_id.src_port = local->src_port = ntohs(local->src_port); 3376 args->f_id.dst_port = local->dst_port = ntohs(local->dst_port); 3377 done: 3378 *ip0 = ip; 3379 return (m); 3380 } 3381 3382 static struct mbuf * 3383 ipfw_rehashm(struct mbuf *m, const int hlen, struct ip_fw_args *args, 3384 struct ip_fw_local *local, struct ip **ip0) 3385 { 3386 struct ip *ip = mtod(m, struct ip *); 3387 3388 ip->ip_len = htons(ip->ip_len); 3389 ip->ip_off = htons(ip->ip_off); 3390 3391 m->m_flags &= ~M_HASH; 3392 ip_hashfn(&m, 0); 3393 args->m = m; 3394 if (m == NULL) { 3395 *ip0 = NULL; 3396 return (NULL); 3397 } 3398 KASSERT(m->m_flags & M_HASH, ("no hash")); 3399 3400 /* 'm' might be changed by ip_hashfn(). */ 3401 ip = mtod(m, struct ip *); 3402 ip->ip_len = ntohs(ip->ip_len); 3403 ip->ip_off = ntohs(ip->ip_off); 3404 3405 return (ipfw_setup_local(m, hlen, args, local, ip0)); 3406 } 3407 3408 /* 3409 * The main check routine for the firewall. 3410 * 3411 * All arguments are in args so we can modify them and return them 3412 * back to the caller. 3413 * 3414 * Parameters: 3415 * 3416 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 3417 * Starts with the IP header. 3418 * args->eh (in) Mac header if present, or NULL for layer3 packet. 3419 * args->oif Outgoing interface, or NULL if packet is incoming. 3420 * The incoming interface is in the mbuf. (in) 3421 * 3422 * args->rule Pointer to the last matching rule (in/out) 3423 * args->f_id Addresses grabbed from the packet (out) 3424 * 3425 * Return value: 3426 * 3427 * If the packet was denied/rejected and has been dropped, *m is equal 3428 * to NULL upon return. 3429 * 3430 * IP_FW_DENY the packet must be dropped. 3431 * IP_FW_PASS The packet is to be accepted and routed normally. 3432 * IP_FW_DIVERT Divert the packet to port (args->cookie) 3433 * IP_FW_TEE Tee the packet to port (args->cookie) 3434 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie) 3435 * IP_FW_CONTINUE Continue processing on another cpu. 3436 */ 3437 static int 3438 ipfw_chk(struct ip_fw_args *args) 3439 { 3440 /* 3441 * Local variables hold state during the processing of a packet. 3442 * 3443 * IMPORTANT NOTE: to speed up the processing of rules, there 3444 * are some assumption on the values of the variables, which 3445 * are documented here. Should you change them, please check 3446 * the implementation of the various instructions to make sure 3447 * that they still work. 3448 * 3449 * args->eh The MAC header. It is non-null for a layer2 3450 * packet, it is NULL for a layer-3 packet. 3451 * 3452 * m | args->m Pointer to the mbuf, as received from the caller. 3453 * It may change if ipfw_chk() does an m_pullup, or if it 3454 * consumes the packet because it calls send_reject(). 3455 * XXX This has to change, so that ipfw_chk() never modifies 3456 * or consumes the buffer. 3457 * ip is simply an alias of the value of m, and it is kept 3458 * in sync with it (the packet is supposed to start with 3459 * the ip header). 3460 */ 3461 struct mbuf *m = args->m; 3462 struct ip *ip = mtod(m, struct ip *); 3463 3464 /* 3465 * oif | args->oif If NULL, ipfw_chk has been called on the 3466 * inbound path (ether_input, ip_input). 3467 * If non-NULL, ipfw_chk has been called on the outbound path 3468 * (ether_output, ip_output). 3469 */ 3470 struct ifnet *oif = args->oif; 3471 3472 struct ip_fw *f = NULL; /* matching rule */ 3473 int retval = IP_FW_PASS; 3474 struct m_tag *mtag; 3475 struct divert_info *divinfo; 3476 struct ipfw_state *s; 3477 3478 /* 3479 * hlen The length of the IPv4 header. 3480 * hlen >0 means we have an IPv4 packet. 3481 */ 3482 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 3483 3484 struct ip_fw_local lc; 3485 3486 /* 3487 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 3488 * MATCH_NONE when checked and not matched (dyn_f = NULL), 3489 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL) 3490 */ 3491 int dyn_dir = MATCH_UNKNOWN; 3492 struct ip_fw *dyn_f = NULL; 3493 int cpuid = mycpuid; 3494 struct ipfw_context *ctx; 3495 3496 ASSERT_NETISR_NCPUS(cpuid); 3497 ctx = ipfw_ctx[cpuid]; 3498 3499 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED) 3500 return IP_FW_PASS; /* accept */ 3501 3502 if (args->eh == NULL || /* layer 3 packet */ 3503 (m->m_pkthdr.len >= sizeof(struct ip) && 3504 ntohs(args->eh->ether_type) == ETHERTYPE_IP)) 3505 hlen = ip->ip_hl << 2; 3506 3507 memset(&lc, 0, sizeof(lc)); 3508 3509 m = ipfw_setup_local(m, hlen, args, &lc, &ip); 3510 if (m == NULL) 3511 goto pullup_failed; 3512 3513 if (args->rule) { 3514 /* 3515 * Packet has already been tagged. Look for the next rule 3516 * to restart processing. 3517 * 3518 * If fw_one_pass != 0 then just accept it. 3519 * XXX should not happen here, but optimized out in 3520 * the caller. 3521 */ 3522 if (fw_one_pass && (args->flags & IP_FWARG_F_CONT) == 0) 3523 return IP_FW_PASS; 3524 args->flags &= ~IP_FWARG_F_CONT; 3525 3526 /* This rule is being/has been flushed */ 3527 if (ipfw_flushing) 3528 return IP_FW_DENY; 3529 3530 KASSERT(args->rule->cpuid == cpuid, 3531 ("rule used on cpu%d", cpuid)); 3532 3533 /* This rule was deleted */ 3534 if (args->rule->rule_flags & IPFW_RULE_F_INVALID) 3535 return IP_FW_DENY; 3536 3537 if (args->xlat != NULL) { 3538 struct ipfw_xlat *x = args->xlat; 3539 3540 /* This xlat is being deleted. */ 3541 if (x->xlat_invalid) 3542 return IP_FW_DENY; 3543 3544 f = args->rule; 3545 3546 dyn_f = f; 3547 dyn_dir = (args->flags & IP_FWARG_F_XLATFWD) ? 3548 MATCH_FORWARD : MATCH_REVERSE; 3549 3550 if (args->flags & IP_FWARG_F_XLATINS) { 3551 KASSERT(x->xlat_flags & IPFW_STATE_F_XLATSLAVE, 3552 ("not slave %u state", x->xlat_type)); 3553 s = ipfw_state_link(ctx, &x->xlat_st); 3554 if (s != NULL) { 3555 ctx->ipfw_xlate_conflicts++; 3556 if (IPFW_STATE_ISDEAD(s)) { 3557 ipfw_state_remove(ctx, s); 3558 s = ipfw_state_link(ctx, 3559 &x->xlat_st); 3560 } 3561 if (s != NULL) { 3562 if (bootverbose) { 3563 kprintf("ipfw: " 3564 "slave %u state " 3565 "conflicts %u state\n", 3566 x->xlat_type, 3567 s->st_type); 3568 } 3569 ipfw_xlat_invalidate(x); 3570 return IP_FW_DENY; 3571 } 3572 ctx->ipfw_xlate_cresolved++; 3573 } 3574 } else { 3575 ipfw_state_update(&args->f_id, dyn_dir, 3576 lc.tcp, &x->xlat_st); 3577 } 3578 } else { 3579 /* TODO: setup dyn_f, dyn_dir */ 3580 3581 f = args->rule->next_rule; 3582 if (f == NULL) 3583 f = lookup_next_rule(args->rule); 3584 } 3585 } else { 3586 /* 3587 * Find the starting rule. It can be either the first 3588 * one, or the one after divert_rule if asked so. 3589 */ 3590 int skipto; 3591 3592 KKASSERT((args->flags & 3593 (IP_FWARG_F_XLATINS | IP_FWARG_F_CONT)) == 0); 3594 KKASSERT(args->xlat == NULL); 3595 3596 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL); 3597 if (mtag != NULL) { 3598 divinfo = m_tag_data(mtag); 3599 skipto = divinfo->skipto; 3600 } else { 3601 skipto = 0; 3602 } 3603 3604 f = ctx->ipfw_layer3_chain; 3605 if (args->eh == NULL && skipto != 0) { 3606 /* No skipto during rule flushing */ 3607 if (ipfw_flushing) 3608 return IP_FW_DENY; 3609 3610 if (skipto >= IPFW_DEFAULT_RULE) 3611 return IP_FW_DENY; /* invalid */ 3612 3613 while (f && f->rulenum <= skipto) 3614 f = f->next; 3615 if (f == NULL) /* drop packet */ 3616 return IP_FW_DENY; 3617 } else if (ipfw_flushing) { 3618 /* Rules are being flushed; skip to default rule */ 3619 f = ctx->ipfw_default_rule; 3620 } 3621 } 3622 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL) 3623 m_tag_delete(m, mtag); 3624 3625 /* 3626 * Now scan the rules, and parse microinstructions for each rule. 3627 */ 3628 for (; f; f = f->next) { 3629 int l, cmdlen; 3630 ipfw_insn *cmd; 3631 int skip_or; /* skip rest of OR block */ 3632 3633 again: 3634 if (ctx->ipfw_set_disable & (1 << f->set)) { 3635 args->xlat = NULL; 3636 continue; 3637 } 3638 3639 if (args->xlat != NULL) { 3640 args->xlat = NULL; 3641 l = f->cmd_len - f->act_ofs; 3642 cmd = ACTION_PTR(f); 3643 } else { 3644 l = f->cmd_len; 3645 cmd = f->cmd; 3646 } 3647 3648 skip_or = 0; 3649 for (; l > 0; l -= cmdlen, cmd += cmdlen) { 3650 int match; 3651 3652 /* 3653 * check_body is a jump target used when we find a 3654 * CHECK_STATE, and need to jump to the body of 3655 * the target rule. 3656 */ 3657 check_body: 3658 cmdlen = F_LEN(cmd); 3659 /* 3660 * An OR block (insn_1 || .. || insn_n) has the 3661 * F_OR bit set in all but the last instruction. 3662 * The first match will set "skip_or", and cause 3663 * the following instructions to be skipped until 3664 * past the one with the F_OR bit clear. 3665 */ 3666 if (skip_or) { /* skip this instruction */ 3667 if ((cmd->len & F_OR) == 0) 3668 skip_or = 0; /* next one is good */ 3669 continue; 3670 } 3671 match = 0; /* set to 1 if we succeed */ 3672 3673 switch (cmd->opcode) { 3674 /* 3675 * The first set of opcodes compares the packet's 3676 * fields with some pattern, setting 'match' if a 3677 * match is found. At the end of the loop there is 3678 * logic to deal with F_NOT and F_OR flags associated 3679 * with the opcode. 3680 */ 3681 case O_NOP: 3682 match = 1; 3683 break; 3684 3685 case O_FORWARD_MAC: 3686 kprintf("ipfw: opcode %d unimplemented\n", 3687 cmd->opcode); 3688 break; 3689 3690 case O_GID: 3691 case O_UID: 3692 /* 3693 * We only check offset == 0 && proto != 0, 3694 * as this ensures that we have an IPv4 3695 * packet with the ports info. 3696 */ 3697 if (lc.offset!=0) 3698 break; 3699 3700 match = ipfw_match_uid(&args->f_id, oif, 3701 cmd->opcode, 3702 (uid_t)((ipfw_insn_u32 *)cmd)->d[0]); 3703 break; 3704 3705 case O_RECV: 3706 match = iface_match(m->m_pkthdr.rcvif, 3707 (ipfw_insn_if *)cmd); 3708 break; 3709 3710 case O_XMIT: 3711 match = iface_match(oif, (ipfw_insn_if *)cmd); 3712 break; 3713 3714 case O_VIA: 3715 match = iface_match(oif ? oif : 3716 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); 3717 break; 3718 3719 case O_MACADDR2: 3720 if (args->eh != NULL) { /* have MAC header */ 3721 uint32_t *want = (uint32_t *) 3722 ((ipfw_insn_mac *)cmd)->addr; 3723 uint32_t *mask = (uint32_t *) 3724 ((ipfw_insn_mac *)cmd)->mask; 3725 uint32_t *hdr = (uint32_t *)args->eh; 3726 3727 match = 3728 (want[0] == (hdr[0] & mask[0]) && 3729 want[1] == (hdr[1] & mask[1]) && 3730 want[2] == (hdr[2] & mask[2])); 3731 } 3732 break; 3733 3734 case O_MAC_TYPE: 3735 if (args->eh != NULL) { 3736 uint16_t t = 3737 ntohs(args->eh->ether_type); 3738 uint16_t *p = 3739 ((ipfw_insn_u16 *)cmd)->ports; 3740 int i; 3741 3742 /* Special vlan handling */ 3743 if (m->m_flags & M_VLANTAG) 3744 t = ETHERTYPE_VLAN; 3745 3746 for (i = cmdlen - 1; !match && i > 0; 3747 i--, p += 2) { 3748 match = 3749 (t >= p[0] && t <= p[1]); 3750 } 3751 } 3752 break; 3753 3754 case O_FRAG: 3755 match = (hlen > 0 && lc.offset != 0); 3756 break; 3757 3758 case O_IPFRAG: 3759 if (hlen > 0) { 3760 uint16_t off; 3761 3762 if (args->eh != NULL) 3763 off = ntohs(ip->ip_off); 3764 else 3765 off = ip->ip_off; 3766 if (off & (IP_MF | IP_OFFMASK)) 3767 match = 1; 3768 } 3769 break; 3770 3771 case O_IN: /* "out" is "not in" */ 3772 match = (oif == NULL); 3773 break; 3774 3775 case O_LAYER2: 3776 match = (args->eh != NULL); 3777 break; 3778 3779 case O_PROTO: 3780 /* 3781 * We do not allow an arg of 0 so the 3782 * check of "proto" only suffices. 3783 */ 3784 match = (lc.proto == cmd->arg1); 3785 break; 3786 3787 case O_IP_SRC: 3788 match = (hlen > 0 && 3789 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3790 lc.src_ip.s_addr); 3791 break; 3792 3793 case O_IP_SRC_MASK: 3794 match = (hlen > 0 && 3795 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3796 (lc.src_ip.s_addr & 3797 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 3798 break; 3799 3800 case O_IP_SRC_ME: 3801 if (hlen > 0) { 3802 struct ifnet *tif; 3803 3804 tif = INADDR_TO_IFP(&lc.src_ip); 3805 match = (tif != NULL); 3806 } 3807 break; 3808 3809 case O_IP_SRC_TABLE: 3810 match = ipfw_table_lookup(ctx, cmd->arg1, 3811 &lc.src_ip); 3812 break; 3813 3814 case O_IP_SRC_IFIP: 3815 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd, 3816 &lc.src_ip); 3817 break; 3818 3819 case O_IP_DST_SET: 3820 case O_IP_SRC_SET: 3821 if (hlen > 0) { 3822 uint32_t *d = (uint32_t *)(cmd + 1); 3823 uint32_t addr = 3824 cmd->opcode == O_IP_DST_SET ? 3825 args->f_id.dst_ip : 3826 args->f_id.src_ip; 3827 3828 if (addr < d[0]) 3829 break; 3830 addr -= d[0]; /* subtract base */ 3831 match = 3832 (addr < cmd->arg1) && 3833 (d[1 + (addr >> 5)] & 3834 (1 << (addr & 0x1f))); 3835 } 3836 break; 3837 3838 case O_IP_DST: 3839 match = (hlen > 0 && 3840 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3841 lc.dst_ip.s_addr); 3842 break; 3843 3844 case O_IP_DST_MASK: 3845 match = (hlen > 0) && 3846 (((ipfw_insn_ip *)cmd)->addr.s_addr == 3847 (lc.dst_ip.s_addr & 3848 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 3849 break; 3850 3851 case O_IP_DST_ME: 3852 if (hlen > 0) { 3853 struct ifnet *tif; 3854 3855 tif = INADDR_TO_IFP(&lc.dst_ip); 3856 match = (tif != NULL); 3857 } 3858 break; 3859 3860 case O_IP_DST_TABLE: 3861 match = ipfw_table_lookup(ctx, cmd->arg1, 3862 &lc.dst_ip); 3863 break; 3864 3865 case O_IP_DST_IFIP: 3866 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd, 3867 &lc.dst_ip); 3868 break; 3869 3870 case O_IP_SRCPORT: 3871 case O_IP_DSTPORT: 3872 /* 3873 * offset == 0 && proto != 0 is enough 3874 * to guarantee that we have an IPv4 3875 * packet with port info. 3876 */ 3877 if ((lc.proto==IPPROTO_UDP || 3878 lc.proto==IPPROTO_TCP) 3879 && lc.offset == 0) { 3880 uint16_t x = 3881 (cmd->opcode == O_IP_SRCPORT) ? 3882 lc.src_port : lc.dst_port; 3883 uint16_t *p = 3884 ((ipfw_insn_u16 *)cmd)->ports; 3885 int i; 3886 3887 for (i = cmdlen - 1; !match && i > 0; 3888 i--, p += 2) { 3889 match = 3890 (x >= p[0] && x <= p[1]); 3891 } 3892 } 3893 break; 3894 3895 case O_ICMPCODE: 3896 match = (lc.offset == 0 && 3897 lc.proto==IPPROTO_ICMP && 3898 icmpcode_match(ip, (ipfw_insn_u32 *)cmd)); 3899 break; 3900 3901 case O_ICMPTYPE: 3902 match = (lc.offset == 0 && 3903 lc.proto==IPPROTO_ICMP && 3904 icmptype_match(ip, (ipfw_insn_u32 *)cmd)); 3905 break; 3906 3907 case O_IPOPT: 3908 match = (hlen > 0 && ipopts_match(ip, cmd)); 3909 break; 3910 3911 case O_IPVER: 3912 match = (hlen > 0 && cmd->arg1 == ip->ip_v); 3913 break; 3914 3915 case O_IPTTL: 3916 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl); 3917 break; 3918 3919 case O_IPID: 3920 match = (hlen > 0 && 3921 cmd->arg1 == ntohs(ip->ip_id)); 3922 break; 3923 3924 case O_IPLEN: 3925 match = (hlen > 0 && cmd->arg1 == lc.ip_len); 3926 break; 3927 3928 case O_IPPRECEDENCE: 3929 match = (hlen > 0 && 3930 (cmd->arg1 == (ip->ip_tos & 0xe0))); 3931 break; 3932 3933 case O_IPTOS: 3934 match = (hlen > 0 && 3935 flags_match(cmd, ip->ip_tos)); 3936 break; 3937 3938 case O_TCPFLAGS: 3939 match = (lc.proto == IPPROTO_TCP && 3940 lc.offset == 0 && 3941 flags_match(cmd, 3942 L3HDR(struct tcphdr,ip)->th_flags)); 3943 break; 3944 3945 case O_TCPOPTS: 3946 match = (lc.proto == IPPROTO_TCP && 3947 lc.offset == 0 && tcpopts_match(ip, cmd)); 3948 break; 3949 3950 case O_TCPSEQ: 3951 match = (lc.proto == IPPROTO_TCP && 3952 lc.offset == 0 && 3953 ((ipfw_insn_u32 *)cmd)->d[0] == 3954 L3HDR(struct tcphdr,ip)->th_seq); 3955 break; 3956 3957 case O_TCPACK: 3958 match = (lc.proto == IPPROTO_TCP && 3959 lc.offset == 0 && 3960 ((ipfw_insn_u32 *)cmd)->d[0] == 3961 L3HDR(struct tcphdr,ip)->th_ack); 3962 break; 3963 3964 case O_TCPWIN: 3965 match = (lc.proto == IPPROTO_TCP && 3966 lc.offset == 0 && 3967 cmd->arg1 == 3968 L3HDR(struct tcphdr,ip)->th_win); 3969 break; 3970 3971 case O_ESTAB: 3972 /* reject packets which have SYN only */ 3973 /* XXX should i also check for TH_ACK ? */ 3974 match = (lc.proto == IPPROTO_TCP && 3975 lc.offset == 0 && 3976 (L3HDR(struct tcphdr,ip)->th_flags & 3977 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 3978 break; 3979 3980 case O_LOG: 3981 if (fw_verbose) { 3982 ipfw_log(ctx, f, hlen, args->eh, m, 3983 oif); 3984 } 3985 match = 1; 3986 break; 3987 3988 case O_PROB: 3989 match = (krandom() < 3990 ((ipfw_insn_u32 *)cmd)->d[0]); 3991 break; 3992 3993 /* 3994 * The second set of opcodes represents 'actions', 3995 * i.e. the terminal part of a rule once the packet 3996 * matches all previous patterns. 3997 * Typically there is only one action for each rule, 3998 * and the opcode is stored at the end of the rule 3999 * (but there are exceptions -- see below). 4000 * 4001 * In general, here we set retval and terminate the 4002 * outer loop (would be a 'break 3' in some language, 4003 * but we need to do a 'goto done'). 4004 * 4005 * Exceptions: 4006 * O_COUNT and O_SKIPTO actions: 4007 * instead of terminating, we jump to the next rule 4008 * ('goto next_rule', equivalent to a 'break 2'), 4009 * or to the SKIPTO target ('goto again' after 4010 * having set f, cmd and l), respectively. 4011 * 4012 * O_LIMIT and O_KEEP_STATE, O_REDIRECT: these opcodes 4013 * are not real 'actions', and are stored right 4014 * before the 'action' part of the rule. 4015 * These opcodes try to install an entry in the 4016 * state tables; if successful, we continue with 4017 * the next opcode (match=1; break;), otherwise 4018 * the packet must be dropped ('goto done' after 4019 * setting retval). If static rules are changed 4020 * during the state installation, the packet will 4021 * be dropped and rule's stats will not beupdated 4022 * ('return IP_FW_DENY'). 4023 * 4024 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 4025 * cause a lookup of the state table, and a jump 4026 * to the 'action' part of the parent rule 4027 * ('goto check_body') if an entry is found, or 4028 * (CHECK_STATE only) a jump to the next rule if 4029 * the entry is not found ('goto next_rule'). 4030 * The result of the lookup is cached to make 4031 * further instances of these opcodes are 4032 * effectively NOPs. If static rules are changed 4033 * during the state looking up, the packet will 4034 * be dropped and rule's stats will not be updated 4035 * ('return IP_FW_DENY'). 4036 */ 4037 case O_REDIRECT: 4038 if (f->cross_rules == NULL) { 4039 /* 4040 * This rule was not completely setup; 4041 * move on to the next rule. 4042 */ 4043 goto next_rule; 4044 } 4045 /* 4046 * Apply redirect only on input path and 4047 * only to non-fragment TCP segments or 4048 * UDP datagrams. 4049 * 4050 * Does _not_ work with layer2 filtering. 4051 */ 4052 if (oif != NULL || args->eh != NULL || 4053 (ip->ip_off & (IP_MF | IP_OFFMASK)) || 4054 (lc.proto != IPPROTO_TCP && 4055 lc.proto != IPPROTO_UDP)) 4056 break; 4057 /* FALL THROUGH */ 4058 case O_LIMIT: 4059 case O_KEEP_STATE: 4060 if (hlen == 0) 4061 break; 4062 s = ipfw_state_install(ctx, f, 4063 (ipfw_insn_limit *)cmd, args, lc.tcp); 4064 if (s == NULL) { 4065 retval = IP_FW_DENY; 4066 goto done; /* error/limit violation */ 4067 } 4068 s->st_pcnt++; 4069 s->st_bcnt += lc.ip_len; 4070 4071 if (s->st_type == O_REDIRECT) { 4072 struct in_addr oaddr; 4073 uint16_t oport; 4074 struct ipfw_xlat *slave_x, *x; 4075 struct ipfw_state *dup; 4076 4077 x = (struct ipfw_xlat *)s; 4078 ipfw_xlate(x, m, &oaddr, &oport); 4079 m = ipfw_rehashm(m, hlen, args, &lc, 4080 &ip); 4081 if (m == NULL) { 4082 ipfw_state_del(ctx, s); 4083 goto pullup_failed; 4084 } 4085 4086 cpuid = netisr_hashcpu( 4087 m->m_pkthdr.hash); 4088 4089 slave_x = (struct ipfw_xlat *) 4090 ipfw_state_alloc(ctx, &args->f_id, 4091 O_REDIRECT, f->cross_rules[cpuid], 4092 lc.tcp); 4093 if (slave_x == NULL) { 4094 ipfw_state_del(ctx, s); 4095 retval = IP_FW_DENY; 4096 goto done; 4097 } 4098 slave_x->xlat_addr = oaddr.s_addr; 4099 slave_x->xlat_port = oport; 4100 slave_x->xlat_dir = MATCH_REVERSE; 4101 slave_x->xlat_flags |= 4102 IPFW_STATE_F_XLATSRC | 4103 IPFW_STATE_F_XLATSLAVE; 4104 4105 slave_x->xlat_pair = x; 4106 slave_x->xlat_pcpu = mycpuid; 4107 x->xlat_pair = slave_x; 4108 x->xlat_pcpu = cpuid; 4109 4110 ctx->ipfw_xlated++; 4111 if (cpuid != mycpuid) { 4112 ctx->ipfw_xlate_split++; 4113 ipfw_xlate_redispatch( 4114 m, cpuid, x, 4115 IPFW_XLATE_INSERT | 4116 IPFW_XLATE_FORWARD); 4117 args->m = NULL; 4118 return (IP_FW_REDISPATCH); 4119 } 4120 4121 dup = ipfw_state_link(ctx, 4122 &slave_x->xlat_st); 4123 if (dup != NULL) { 4124 ctx->ipfw_xlate_conflicts++; 4125 if (IPFW_STATE_ISDEAD(dup)) { 4126 ipfw_state_remove(ctx, 4127 dup); 4128 dup = ipfw_state_link( 4129 ctx, &slave_x->xlat_st); 4130 } 4131 if (dup != NULL) { 4132 if (bootverbose) { 4133 kprintf("ipfw: " 4134 "slave %u state " 4135 "conflicts " 4136 "%u state\n", 4137 x->xlat_type, 4138 s->st_type); 4139 } 4140 ipfw_state_del(ctx, s); 4141 return (IP_FW_DENY); 4142 } 4143 ctx->ipfw_xlate_cresolved++; 4144 } 4145 } 4146 match = 1; 4147 break; 4148 4149 case O_PROBE_STATE: 4150 case O_CHECK_STATE: 4151 /* 4152 * States are checked at the first keep-state 4153 * check-state occurrence, with the result 4154 * being stored in dyn_dir. The compiler 4155 * introduces a PROBE_STATE instruction for 4156 * us when we have a KEEP_STATE/LIMIT/RDR 4157 * (because PROBE_STATE needs to be run first). 4158 */ 4159 s = NULL; 4160 if (dyn_dir == MATCH_UNKNOWN) { 4161 s = ipfw_state_lookup(ctx, 4162 &args->f_id, &dyn_dir, lc.tcp); 4163 } 4164 if (s == NULL || 4165 (s->st_type == O_REDIRECT && 4166 (args->eh != NULL || 4167 (ip->ip_off & (IP_MF | IP_OFFMASK)) || 4168 (lc.proto != IPPROTO_TCP && 4169 lc.proto != IPPROTO_UDP)))) { 4170 /* 4171 * State not found. If CHECK_STATE, 4172 * skip to next rule, if PROBE_STATE 4173 * just ignore and continue with next 4174 * opcode. 4175 */ 4176 if (cmd->opcode == O_CHECK_STATE) 4177 goto next_rule; 4178 match = 1; 4179 break; 4180 } 4181 4182 s->st_pcnt++; 4183 s->st_bcnt += lc.ip_len; 4184 4185 if (s->st_type == O_REDIRECT) { 4186 struct ipfw_xlat *x = 4187 (struct ipfw_xlat *)s; 4188 4189 if (oif != NULL && 4190 x->xlat_ifp == NULL) { 4191 KASSERT(x->xlat_flags & 4192 IPFW_STATE_F_XLATSLAVE, 4193 ("master rdr state " 4194 "missing ifp")); 4195 x->xlat_ifp = oif; 4196 } else if ( 4197 (oif != NULL && x->xlat_ifp!=oif) || 4198 (oif == NULL && 4199 x->xlat_ifp!=m->m_pkthdr.rcvif)) { 4200 retval = IP_FW_DENY; 4201 goto done; 4202 } 4203 if (x->xlat_dir != dyn_dir) 4204 goto skip_xlate; 4205 4206 ipfw_xlate(x, m, NULL, NULL); 4207 m = ipfw_rehashm(m, hlen, args, &lc, 4208 &ip); 4209 if (m == NULL) 4210 goto pullup_failed; 4211 4212 cpuid = netisr_hashcpu( 4213 m->m_pkthdr.hash); 4214 if (cpuid != mycpuid) { 4215 uint32_t xlate = 0; 4216 4217 if (oif != NULL) { 4218 xlate |= 4219 IPFW_XLATE_OUTPUT; 4220 } 4221 if (dyn_dir == MATCH_FORWARD) { 4222 xlate |= 4223 IPFW_XLATE_FORWARD; 4224 } 4225 ipfw_xlate_redispatch(m, cpuid, 4226 x, xlate); 4227 args->m = NULL; 4228 return (IP_FW_REDISPATCH); 4229 } 4230 4231 KKASSERT(x->xlat_pcpu == mycpuid); 4232 ipfw_state_update(&args->f_id, dyn_dir, 4233 lc.tcp, &x->xlat_pair->xlat_st); 4234 } 4235 skip_xlate: 4236 /* 4237 * Found a rule from a state; jump to the 4238 * 'action' part of the rule. 4239 */ 4240 f = s->st_rule; 4241 KKASSERT(f->cpuid == mycpuid); 4242 4243 cmd = ACTION_PTR(f); 4244 l = f->cmd_len - f->act_ofs; 4245 dyn_f = f; 4246 goto check_body; 4247 4248 case O_ACCEPT: 4249 retval = IP_FW_PASS; /* accept */ 4250 goto done; 4251 4252 case O_DEFRAG: 4253 if (f->cross_rules == NULL) { 4254 /* 4255 * This rule was not completely setup; 4256 * move on to the next rule. 4257 */ 4258 goto next_rule; 4259 } 4260 4261 /* 4262 * Don't defrag for l2 packets, output packets 4263 * or non-fragments. 4264 */ 4265 if (oif != NULL || args->eh != NULL || 4266 (ip->ip_off & (IP_MF | IP_OFFMASK)) == 0) 4267 goto next_rule; 4268 4269 ctx->ipfw_frags++; 4270 m = ip_reass(m); 4271 args->m = m; 4272 if (m == NULL) { 4273 retval = IP_FW_PASS; 4274 goto done; 4275 } 4276 ctx->ipfw_defraged++; 4277 KASSERT((m->m_flags & M_HASH) == 0, 4278 ("hash not cleared")); 4279 4280 /* Update statistics */ 4281 f->pcnt++; 4282 f->bcnt += lc.ip_len; 4283 f->timestamp = time_second; 4284 4285 ip = mtod(m, struct ip *); 4286 hlen = ip->ip_hl << 2; 4287 ip->ip_len += hlen; 4288 4289 ip->ip_len = htons(ip->ip_len); 4290 ip->ip_off = htons(ip->ip_off); 4291 4292 ip_hashfn(&m, 0); 4293 args->m = m; 4294 if (m == NULL) 4295 goto pullup_failed; 4296 4297 KASSERT(m->m_flags & M_HASH, ("no hash")); 4298 cpuid = netisr_hashcpu(m->m_pkthdr.hash); 4299 if (cpuid != mycpuid) { 4300 /* 4301 * NOTE: 4302 * ip_len/ip_off are in network byte 4303 * order. 4304 */ 4305 ctx->ipfw_defrag_remote++; 4306 ipfw_defrag_redispatch(m, cpuid, f); 4307 args->m = NULL; 4308 return (IP_FW_REDISPATCH); 4309 } 4310 4311 /* 'm' might be changed by ip_hashfn(). */ 4312 ip = mtod(m, struct ip *); 4313 ip->ip_len = ntohs(ip->ip_len); 4314 ip->ip_off = ntohs(ip->ip_off); 4315 4316 m = ipfw_setup_local(m, hlen, args, &lc, &ip); 4317 if (m == NULL) 4318 goto pullup_failed; 4319 4320 /* Move on. */ 4321 goto next_rule; 4322 4323 case O_PIPE: 4324 case O_QUEUE: 4325 args->rule = f; /* report matching rule */ 4326 args->cookie = cmd->arg1; 4327 retval = IP_FW_DUMMYNET; 4328 goto done; 4329 4330 case O_DIVERT: 4331 case O_TEE: 4332 if (args->eh) /* not on layer 2 */ 4333 break; 4334 4335 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT, 4336 sizeof(*divinfo), M_INTWAIT | M_NULLOK); 4337 if (mtag == NULL) { 4338 retval = IP_FW_DENY; 4339 goto done; 4340 } 4341 divinfo = m_tag_data(mtag); 4342 4343 divinfo->skipto = f->rulenum; 4344 divinfo->port = cmd->arg1; 4345 divinfo->tee = (cmd->opcode == O_TEE); 4346 m_tag_prepend(m, mtag); 4347 4348 args->cookie = cmd->arg1; 4349 retval = (cmd->opcode == O_DIVERT) ? 4350 IP_FW_DIVERT : IP_FW_TEE; 4351 goto done; 4352 4353 case O_COUNT: 4354 case O_SKIPTO: 4355 f->pcnt++; /* update stats */ 4356 f->bcnt += lc.ip_len; 4357 f->timestamp = time_second; 4358 if (cmd->opcode == O_COUNT) 4359 goto next_rule; 4360 /* handle skipto */ 4361 if (f->next_rule == NULL) 4362 lookup_next_rule(f); 4363 f = f->next_rule; 4364 goto again; 4365 4366 case O_REJECT: 4367 /* 4368 * Drop the packet and send a reject notice 4369 * if the packet is not ICMP (or is an ICMP 4370 * query), and it is not multicast/broadcast. 4371 */ 4372 if (hlen > 0 && 4373 (lc.proto != IPPROTO_ICMP || 4374 is_icmp_query(ip)) && 4375 !(m->m_flags & (M_BCAST|M_MCAST)) && 4376 !IN_MULTICAST(ntohl(lc.dst_ip.s_addr))) { 4377 send_reject(args, cmd->arg1, 4378 lc.offset, lc.ip_len); 4379 retval = IP_FW_DENY; 4380 goto done; 4381 } 4382 /* FALLTHROUGH */ 4383 case O_DENY: 4384 retval = IP_FW_DENY; 4385 goto done; 4386 4387 case O_FORWARD_IP: 4388 if (args->eh) /* not valid on layer2 pkts */ 4389 break; 4390 if (!dyn_f || dyn_dir == MATCH_FORWARD) { 4391 struct sockaddr_in *sin; 4392 4393 mtag = m_tag_get(PACKET_TAG_IPFORWARD, 4394 sizeof(*sin), M_INTWAIT | M_NULLOK); 4395 if (mtag == NULL) { 4396 retval = IP_FW_DENY; 4397 goto done; 4398 } 4399 sin = m_tag_data(mtag); 4400 4401 /* Structure copy */ 4402 *sin = ((ipfw_insn_sa *)cmd)->sa; 4403 4404 m_tag_prepend(m, mtag); 4405 m->m_pkthdr.fw_flags |= 4406 IPFORWARD_MBUF_TAGGED; 4407 m->m_pkthdr.fw_flags &= 4408 ~BRIDGE_MBUF_TAGGED; 4409 } 4410 retval = IP_FW_PASS; 4411 goto done; 4412 4413 default: 4414 panic("-- unknown opcode %d", cmd->opcode); 4415 } /* end of switch() on opcodes */ 4416 4417 if (cmd->len & F_NOT) 4418 match = !match; 4419 4420 if (match) { 4421 if (cmd->len & F_OR) 4422 skip_or = 1; 4423 } else { 4424 if (!(cmd->len & F_OR)) /* not an OR block, */ 4425 break; /* try next rule */ 4426 } 4427 4428 } /* end of inner for, scan opcodes */ 4429 4430 next_rule:; /* try next rule */ 4431 4432 } /* end of outer for, scan rules */ 4433 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n"); 4434 return IP_FW_DENY; 4435 4436 done: 4437 /* Update statistics */ 4438 f->pcnt++; 4439 f->bcnt += lc.ip_len; 4440 f->timestamp = time_second; 4441 return retval; 4442 4443 pullup_failed: 4444 if (fw_verbose) 4445 kprintf("pullup failed\n"); 4446 return IP_FW_DENY; 4447 } 4448 4449 static struct mbuf * 4450 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 4451 { 4452 struct m_tag *mtag; 4453 struct dn_pkt *pkt; 4454 ipfw_insn *cmd; 4455 const struct ipfw_flow_id *id; 4456 struct dn_flow_id *fid; 4457 4458 M_ASSERTPKTHDR(m); 4459 4460 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), 4461 M_INTWAIT | M_NULLOK); 4462 if (mtag == NULL) { 4463 m_freem(m); 4464 return (NULL); 4465 } 4466 m_tag_prepend(m, mtag); 4467 4468 pkt = m_tag_data(mtag); 4469 bzero(pkt, sizeof(*pkt)); 4470 4471 cmd = fwa->rule->cmd + fwa->rule->act_ofs; 4472 if (cmd->opcode == O_LOG) 4473 cmd += F_LEN(cmd); 4474 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, 4475 ("Rule is not PIPE or QUEUE, opcode %d", cmd->opcode)); 4476 4477 pkt->dn_m = m; 4478 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK); 4479 pkt->ifp = fwa->oif; 4480 pkt->pipe_nr = pipe_nr; 4481 4482 pkt->cpuid = mycpuid; 4483 pkt->msgport = netisr_curport(); 4484 4485 id = &fwa->f_id; 4486 fid = &pkt->id; 4487 fid->fid_dst_ip = id->dst_ip; 4488 fid->fid_src_ip = id->src_ip; 4489 fid->fid_dst_port = id->dst_port; 4490 fid->fid_src_port = id->src_port; 4491 fid->fid_proto = id->proto; 4492 fid->fid_flags = id->flags; 4493 4494 ipfw_ref_rule(fwa->rule); 4495 pkt->dn_priv = fwa->rule; 4496 pkt->dn_unref_priv = ipfw_unref_rule; 4497 4498 if (cmd->opcode == O_PIPE) 4499 pkt->dn_flags |= DN_FLAGS_IS_PIPE; 4500 4501 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED; 4502 return (m); 4503 } 4504 4505 /* 4506 * When a rule is added/deleted, clear the next_rule pointers in all rules. 4507 * These will be reconstructed on the fly as packets are matched. 4508 */ 4509 static void 4510 ipfw_flush_rule_ptrs(struct ipfw_context *ctx) 4511 { 4512 struct ip_fw *rule; 4513 4514 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 4515 rule->next_rule = NULL; 4516 } 4517 4518 static void 4519 ipfw_inc_static_count(struct ip_fw *rule) 4520 { 4521 /* Static rule's counts are updated only on CPU0 */ 4522 KKASSERT(mycpuid == 0); 4523 4524 static_count++; 4525 static_ioc_len += IOC_RULESIZE(rule); 4526 } 4527 4528 static void 4529 ipfw_dec_static_count(struct ip_fw *rule) 4530 { 4531 int l = IOC_RULESIZE(rule); 4532 4533 /* Static rule's counts are updated only on CPU0 */ 4534 KKASSERT(mycpuid == 0); 4535 4536 KASSERT(static_count > 0, ("invalid static count %u", static_count)); 4537 static_count--; 4538 4539 KASSERT(static_ioc_len >= l, 4540 ("invalid static len %u", static_ioc_len)); 4541 static_ioc_len -= l; 4542 } 4543 4544 static void 4545 ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule) 4546 { 4547 if (fwmsg->sibling != NULL) { 4548 KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1); 4549 fwmsg->sibling->sibling = rule; 4550 } 4551 fwmsg->sibling = rule; 4552 } 4553 4554 static struct ip_fw * 4555 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 4556 { 4557 struct ip_fw *rule; 4558 4559 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO); 4560 4561 rule->act_ofs = ioc_rule->act_ofs; 4562 rule->cmd_len = ioc_rule->cmd_len; 4563 rule->rulenum = ioc_rule->rulenum; 4564 rule->set = ioc_rule->set; 4565 rule->usr_flags = ioc_rule->usr_flags; 4566 4567 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */); 4568 4569 rule->refcnt = 1; 4570 rule->cpuid = mycpuid; 4571 rule->rule_flags = rule_flags; 4572 4573 return rule; 4574 } 4575 4576 static void 4577 ipfw_add_rule_dispatch(netmsg_t nmsg) 4578 { 4579 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 4580 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4581 struct ip_fw *rule; 4582 4583 ASSERT_NETISR_NCPUS(mycpuid); 4584 4585 rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->rule_flags); 4586 4587 /* 4588 * Insert rule into the pre-determined position 4589 */ 4590 if (fwmsg->prev_rule != NULL) { 4591 struct ip_fw *prev, *next; 4592 4593 prev = fwmsg->prev_rule; 4594 KKASSERT(prev->cpuid == mycpuid); 4595 4596 next = fwmsg->next_rule; 4597 KKASSERT(next->cpuid == mycpuid); 4598 4599 rule->next = next; 4600 prev->next = rule; 4601 4602 /* 4603 * Move to the position on the next CPU 4604 * before the msg is forwarded. 4605 */ 4606 fwmsg->prev_rule = prev->sibling; 4607 fwmsg->next_rule = next->sibling; 4608 } else { 4609 KKASSERT(fwmsg->next_rule == NULL); 4610 rule->next = ctx->ipfw_layer3_chain; 4611 ctx->ipfw_layer3_chain = rule; 4612 } 4613 4614 /* Link rule CPU sibling */ 4615 ipfw_link_sibling(fwmsg, rule); 4616 4617 ipfw_flush_rule_ptrs(ctx); 4618 4619 if (mycpuid == 0) { 4620 /* Statistics only need to be updated once */ 4621 ipfw_inc_static_count(rule); 4622 4623 /* Return the rule on CPU0 */ 4624 nmsg->lmsg.u.ms_resultp = rule; 4625 } 4626 4627 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) 4628 rule->track_ruleid = (uintptr_t)nmsg->lmsg.u.ms_resultp; 4629 4630 if (fwmsg->cross_rules != NULL) { 4631 /* Save rules for later use. */ 4632 fwmsg->cross_rules[mycpuid] = rule; 4633 } 4634 4635 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4636 } 4637 4638 static void 4639 ipfw_crossref_rule_dispatch(netmsg_t nmsg) 4640 { 4641 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 4642 struct ip_fw *rule = fwmsg->sibling; 4643 int sz = sizeof(struct ip_fw *) * netisr_ncpus; 4644 4645 ASSERT_NETISR_NCPUS(mycpuid); 4646 KASSERT(rule->rule_flags & IPFW_RULE_F_CROSSREF, 4647 ("not crossref rule")); 4648 4649 rule->cross_rules = kmalloc(sz, M_IPFW, M_WAITOK); 4650 memcpy(rule->cross_rules, fwmsg->cross_rules, sz); 4651 4652 fwmsg->sibling = rule->sibling; 4653 netisr_forwardmsg(&fwmsg->base, mycpuid + 1); 4654 } 4655 4656 /* 4657 * Add a new rule to the list. Copy the rule into a malloc'ed area, 4658 * then possibly create a rule number and add the rule to the list. 4659 * Update the rule_number in the input struct so the caller knows 4660 * it as well. 4661 */ 4662 static void 4663 ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 4664 { 4665 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4666 struct netmsg_ipfw fwmsg; 4667 struct ip_fw *f, *prev, *rule; 4668 4669 ASSERT_NETISR0; 4670 4671 /* 4672 * If rulenum is 0, find highest numbered rule before the 4673 * default rule, and add rule number incremental step. 4674 */ 4675 if (ioc_rule->rulenum == 0) { 4676 int step = autoinc_step; 4677 4678 KKASSERT(step >= IPFW_AUTOINC_STEP_MIN && 4679 step <= IPFW_AUTOINC_STEP_MAX); 4680 4681 /* 4682 * Locate the highest numbered rule before default 4683 */ 4684 for (f = ctx->ipfw_layer3_chain; f; f = f->next) { 4685 if (f->rulenum == IPFW_DEFAULT_RULE) 4686 break; 4687 ioc_rule->rulenum = f->rulenum; 4688 } 4689 if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step) 4690 ioc_rule->rulenum += step; 4691 } 4692 KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE && 4693 ioc_rule->rulenum != 0, 4694 ("invalid rule num %d", ioc_rule->rulenum)); 4695 4696 /* 4697 * Now find the right place for the new rule in the sorted list. 4698 */ 4699 for (prev = NULL, f = ctx->ipfw_layer3_chain; f; 4700 prev = f, f = f->next) { 4701 if (f->rulenum > ioc_rule->rulenum) { 4702 /* Found the location */ 4703 break; 4704 } 4705 } 4706 KASSERT(f != NULL, ("no default rule?!")); 4707 4708 /* 4709 * Duplicate the rule onto each CPU. 4710 * The rule duplicated on CPU0 will be returned. 4711 */ 4712 bzero(&fwmsg, sizeof(fwmsg)); 4713 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4714 ipfw_add_rule_dispatch); 4715 fwmsg.ioc_rule = ioc_rule; 4716 fwmsg.prev_rule = prev; 4717 fwmsg.next_rule = prev == NULL ? NULL : f; 4718 fwmsg.rule_flags = rule_flags; 4719 if (rule_flags & IPFW_RULE_F_CROSSREF) { 4720 fwmsg.cross_rules = kmalloc( 4721 sizeof(struct ip_fw *) * netisr_ncpus, M_TEMP, 4722 M_WAITOK | M_ZERO); 4723 } 4724 4725 netisr_domsg_global(&fwmsg.base); 4726 KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL); 4727 4728 rule = fwmsg.base.lmsg.u.ms_resultp; 4729 KKASSERT(rule != NULL && rule->cpuid == mycpuid); 4730 4731 if (fwmsg.cross_rules != NULL) { 4732 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, 4733 MSGF_PRIORITY, ipfw_crossref_rule_dispatch); 4734 fwmsg.sibling = rule; 4735 netisr_domsg_global(&fwmsg.base); 4736 KKASSERT(fwmsg.sibling == NULL); 4737 4738 kfree(fwmsg.cross_rules, M_TEMP); 4739 4740 #ifdef KLD_MODULE 4741 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1); 4742 #endif 4743 } 4744 4745 DPRINTF("++ installed rule %d, static count now %d\n", 4746 rule->rulenum, static_count); 4747 } 4748 4749 /* 4750 * Free storage associated with a static rule (including derived 4751 * states/tracks). 4752 * The caller is in charge of clearing rule pointers to avoid 4753 * dangling pointers. 4754 * @return a pointer to the next entry. 4755 * Arguments are not checked, so they better be correct. 4756 */ 4757 static struct ip_fw * 4758 ipfw_delete_rule(struct ipfw_context *ctx, 4759 struct ip_fw *prev, struct ip_fw *rule) 4760 { 4761 struct ip_fw *n; 4762 4763 n = rule->next; 4764 if (prev == NULL) 4765 ctx->ipfw_layer3_chain = n; 4766 else 4767 prev->next = n; 4768 4769 /* Mark the rule as invalid */ 4770 rule->rule_flags |= IPFW_RULE_F_INVALID; 4771 rule->next_rule = NULL; 4772 rule->sibling = NULL; 4773 #ifdef foo 4774 /* Don't reset cpuid here; keep various assertion working */ 4775 rule->cpuid = -1; 4776 #endif 4777 4778 /* Statistics only need to be updated once */ 4779 if (mycpuid == 0) 4780 ipfw_dec_static_count(rule); 4781 4782 if ((rule->rule_flags & IPFW_RULE_F_CROSSREF) == 0) { 4783 /* Try to free this rule */ 4784 ipfw_free_rule(rule); 4785 } else { 4786 /* TODO: check staging area. */ 4787 if (mycpuid == 0) { 4788 rule->next = ipfw_gd.ipfw_crossref_free; 4789 ipfw_gd.ipfw_crossref_free = rule; 4790 } 4791 } 4792 4793 /* Return the next rule */ 4794 return n; 4795 } 4796 4797 static void 4798 ipfw_flush_dispatch(netmsg_t nmsg) 4799 { 4800 int kill_default = nmsg->lmsg.u.ms_result; 4801 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4802 struct ip_fw *rule; 4803 4804 ASSERT_NETISR_NCPUS(mycpuid); 4805 4806 /* 4807 * Flush states. 4808 */ 4809 ipfw_state_flush(ctx, NULL); 4810 KASSERT(ctx->ipfw_state_cnt == 0, 4811 ("%d pcpu states remain", ctx->ipfw_state_cnt)); 4812 ctx->ipfw_state_loosecnt = 0; 4813 ctx->ipfw_state_lastexp = 0; 4814 4815 /* 4816 * Flush tracks. 4817 */ 4818 ipfw_track_flush(ctx, NULL); 4819 ctx->ipfw_track_lastexp = 0; 4820 if (ctx->ipfw_trkcnt_spare != NULL) { 4821 kfree(ctx->ipfw_trkcnt_spare, M_IPFW); 4822 ctx->ipfw_trkcnt_spare = NULL; 4823 } 4824 4825 ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */ 4826 4827 while ((rule = ctx->ipfw_layer3_chain) != NULL && 4828 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE)) 4829 ipfw_delete_rule(ctx, NULL, rule); 4830 4831 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4832 } 4833 4834 /* 4835 * Deletes all rules from a chain (including the default rule 4836 * if the second argument is set). 4837 */ 4838 static void 4839 ipfw_flush(int kill_default) 4840 { 4841 struct netmsg_base nmsg; 4842 #ifdef INVARIANTS 4843 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4844 int state_cnt; 4845 #endif 4846 4847 ASSERT_NETISR0; 4848 4849 /* 4850 * If 'kill_default' then caller has done the necessary 4851 * msgport syncing; unnecessary to do it again. 4852 */ 4853 if (!kill_default) { 4854 /* 4855 * Let ipfw_chk() know the rules are going to 4856 * be flushed, so it could jump directly to 4857 * the default rule. 4858 */ 4859 ipfw_flushing = 1; 4860 /* XXX use priority sync */ 4861 netmsg_service_sync(); 4862 } 4863 4864 /* 4865 * Press the 'flush' button 4866 */ 4867 bzero(&nmsg, sizeof(nmsg)); 4868 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4869 ipfw_flush_dispatch); 4870 nmsg.lmsg.u.ms_result = kill_default; 4871 netisr_domsg_global(&nmsg); 4872 ipfw_gd.ipfw_state_loosecnt = 0; 4873 ipfw_gd.ipfw_state_globexp = 0; 4874 ipfw_gd.ipfw_track_globexp = 0; 4875 4876 #ifdef INVARIANTS 4877 state_cnt = ipfw_state_cntcoll(); 4878 KASSERT(state_cnt == 0, ("%d states remain", state_cnt)); 4879 4880 KASSERT(ipfw_gd.ipfw_trkcnt_cnt == 0, 4881 ("%d trkcnts remain", ipfw_gd.ipfw_trkcnt_cnt)); 4882 4883 if (kill_default) { 4884 KASSERT(static_count == 0, 4885 ("%u static rules remain", static_count)); 4886 KASSERT(static_ioc_len == 0, 4887 ("%u bytes of static rules remain", static_ioc_len)); 4888 } else { 4889 KASSERT(static_count == 1, 4890 ("%u static rules remain", static_count)); 4891 KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule), 4892 ("%u bytes of static rules remain, should be %lu", 4893 static_ioc_len, 4894 (u_long)IOC_RULESIZE(ctx->ipfw_default_rule))); 4895 } 4896 #endif 4897 4898 /* Flush is done */ 4899 ipfw_flushing = 0; 4900 } 4901 4902 static void 4903 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg) 4904 { 4905 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4906 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4907 struct ip_fw *rule, *prev; 4908 4909 ASSERT_NETISR_NCPUS(mycpuid); 4910 4911 rule = dmsg->start_rule; 4912 KKASSERT(rule->cpuid == mycpuid); 4913 dmsg->start_rule = rule->sibling; 4914 4915 prev = dmsg->prev_rule; 4916 if (prev != NULL) { 4917 KKASSERT(prev->cpuid == mycpuid); 4918 4919 /* 4920 * Move to the position on the next CPU 4921 * before the msg is forwarded. 4922 */ 4923 dmsg->prev_rule = prev->sibling; 4924 } 4925 4926 /* 4927 * flush pointers outside the loop, then delete all matching 4928 * rules. 'prev' remains the same throughout the cycle. 4929 */ 4930 ipfw_flush_rule_ptrs(ctx); 4931 while (rule && rule->rulenum == dmsg->rulenum) { 4932 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) { 4933 /* Flush states generated by this rule. */ 4934 ipfw_state_flush(ctx, rule); 4935 } 4936 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) { 4937 /* Flush tracks generated by this rule. */ 4938 ipfw_track_flush(ctx, rule); 4939 } 4940 rule = ipfw_delete_rule(ctx, prev, rule); 4941 } 4942 4943 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4944 } 4945 4946 static int 4947 ipfw_alt_delete_rule(uint16_t rulenum) 4948 { 4949 struct ip_fw *prev, *rule; 4950 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4951 struct netmsg_del dmsg; 4952 4953 ASSERT_NETISR0; 4954 4955 /* 4956 * Locate first rule to delete 4957 */ 4958 for (prev = NULL, rule = ctx->ipfw_layer3_chain; 4959 rule && rule->rulenum < rulenum; 4960 prev = rule, rule = rule->next) 4961 ; /* EMPTY */ 4962 if (rule->rulenum != rulenum) 4963 return EINVAL; 4964 4965 /* 4966 * Get rid of the rule duplications on all CPUs 4967 */ 4968 bzero(&dmsg, sizeof(dmsg)); 4969 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4970 ipfw_alt_delete_rule_dispatch); 4971 dmsg.prev_rule = prev; 4972 dmsg.start_rule = rule; 4973 dmsg.rulenum = rulenum; 4974 4975 netisr_domsg_global(&dmsg.base); 4976 KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL); 4977 return 0; 4978 } 4979 4980 static void 4981 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg) 4982 { 4983 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4984 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4985 struct ip_fw *prev, *rule; 4986 #ifdef INVARIANTS 4987 int del = 0; 4988 #endif 4989 4990 ASSERT_NETISR_NCPUS(mycpuid); 4991 4992 ipfw_flush_rule_ptrs(ctx); 4993 4994 prev = NULL; 4995 rule = ctx->ipfw_layer3_chain; 4996 while (rule != NULL) { 4997 if (rule->set == dmsg->from_set) { 4998 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) { 4999 /* Flush states generated by this rule. */ 5000 ipfw_state_flush(ctx, rule); 5001 } 5002 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) { 5003 /* Flush tracks generated by this rule. */ 5004 ipfw_track_flush(ctx, rule); 5005 } 5006 rule = ipfw_delete_rule(ctx, prev, rule); 5007 #ifdef INVARIANTS 5008 del = 1; 5009 #endif 5010 } else { 5011 prev = rule; 5012 rule = rule->next; 5013 } 5014 } 5015 KASSERT(del, ("no match set?!")); 5016 5017 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5018 } 5019 5020 static int 5021 ipfw_alt_delete_ruleset(uint8_t set) 5022 { 5023 struct netmsg_del dmsg; 5024 int del; 5025 struct ip_fw *rule; 5026 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5027 5028 ASSERT_NETISR0; 5029 5030 /* 5031 * Check whether the 'set' exists. If it exists, 5032 * then check whether any rules within the set will 5033 * try to create states. 5034 */ 5035 del = 0; 5036 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5037 if (rule->set == set) 5038 del = 1; 5039 } 5040 if (!del) 5041 return 0; /* XXX EINVAL? */ 5042 5043 /* 5044 * Delete this set 5045 */ 5046 bzero(&dmsg, sizeof(dmsg)); 5047 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5048 ipfw_alt_delete_ruleset_dispatch); 5049 dmsg.from_set = set; 5050 netisr_domsg_global(&dmsg.base); 5051 5052 return 0; 5053 } 5054 5055 static void 5056 ipfw_alt_move_rule_dispatch(netmsg_t nmsg) 5057 { 5058 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5059 struct ip_fw *rule; 5060 5061 ASSERT_NETISR_NCPUS(mycpuid); 5062 5063 rule = dmsg->start_rule; 5064 KKASSERT(rule->cpuid == mycpuid); 5065 5066 /* 5067 * Move to the position on the next CPU 5068 * before the msg is forwarded. 5069 */ 5070 dmsg->start_rule = rule->sibling; 5071 5072 while (rule && rule->rulenum <= dmsg->rulenum) { 5073 if (rule->rulenum == dmsg->rulenum) 5074 rule->set = dmsg->to_set; 5075 rule = rule->next; 5076 } 5077 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5078 } 5079 5080 static int 5081 ipfw_alt_move_rule(uint16_t rulenum, uint8_t set) 5082 { 5083 struct netmsg_del dmsg; 5084 struct netmsg_base *nmsg; 5085 struct ip_fw *rule; 5086 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5087 5088 ASSERT_NETISR0; 5089 5090 /* 5091 * Locate first rule to move 5092 */ 5093 for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum; 5094 rule = rule->next) { 5095 if (rule->rulenum == rulenum && rule->set != set) 5096 break; 5097 } 5098 if (rule == NULL || rule->rulenum > rulenum) 5099 return 0; /* XXX error? */ 5100 5101 bzero(&dmsg, sizeof(dmsg)); 5102 nmsg = &dmsg.base; 5103 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5104 ipfw_alt_move_rule_dispatch); 5105 dmsg.start_rule = rule; 5106 dmsg.rulenum = rulenum; 5107 dmsg.to_set = set; 5108 5109 netisr_domsg_global(nmsg); 5110 KKASSERT(dmsg.start_rule == NULL); 5111 return 0; 5112 } 5113 5114 static void 5115 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg) 5116 { 5117 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5118 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5119 struct ip_fw *rule; 5120 5121 ASSERT_NETISR_NCPUS(mycpuid); 5122 5123 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5124 if (rule->set == dmsg->from_set) 5125 rule->set = dmsg->to_set; 5126 } 5127 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5128 } 5129 5130 static int 5131 ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set) 5132 { 5133 struct netmsg_del dmsg; 5134 struct netmsg_base *nmsg; 5135 5136 ASSERT_NETISR0; 5137 5138 bzero(&dmsg, sizeof(dmsg)); 5139 nmsg = &dmsg.base; 5140 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5141 ipfw_alt_move_ruleset_dispatch); 5142 dmsg.from_set = from_set; 5143 dmsg.to_set = to_set; 5144 5145 netisr_domsg_global(nmsg); 5146 return 0; 5147 } 5148 5149 static void 5150 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg) 5151 { 5152 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5153 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5154 struct ip_fw *rule; 5155 5156 ASSERT_NETISR_NCPUS(mycpuid); 5157 5158 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5159 if (rule->set == dmsg->from_set) 5160 rule->set = dmsg->to_set; 5161 else if (rule->set == dmsg->to_set) 5162 rule->set = dmsg->from_set; 5163 } 5164 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5165 } 5166 5167 static int 5168 ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2) 5169 { 5170 struct netmsg_del dmsg; 5171 struct netmsg_base *nmsg; 5172 5173 ASSERT_NETISR0; 5174 5175 bzero(&dmsg, sizeof(dmsg)); 5176 nmsg = &dmsg.base; 5177 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5178 ipfw_alt_swap_ruleset_dispatch); 5179 dmsg.from_set = set1; 5180 dmsg.to_set = set2; 5181 5182 netisr_domsg_global(nmsg); 5183 return 0; 5184 } 5185 5186 /* 5187 * Remove all rules with given number, and also do set manipulation. 5188 * 5189 * The argument is an uint32_t. The low 16 bit are the rule or set number, 5190 * the next 8 bits are the new set, the top 8 bits are the command: 5191 * 5192 * 0 delete rules with given number 5193 * 1 delete rules with given set number 5194 * 2 move rules with given number to new set 5195 * 3 move rules with given set number to new set 5196 * 4 swap sets with given numbers 5197 */ 5198 static int 5199 ipfw_ctl_alter(uint32_t arg) 5200 { 5201 uint16_t rulenum; 5202 uint8_t cmd, new_set; 5203 int error = 0; 5204 5205 ASSERT_NETISR0; 5206 5207 rulenum = arg & 0xffff; 5208 cmd = (arg >> 24) & 0xff; 5209 new_set = (arg >> 16) & 0xff; 5210 5211 if (cmd > 4) 5212 return EINVAL; 5213 if (new_set >= IPFW_DEFAULT_SET) 5214 return EINVAL; 5215 if (cmd == 0 || cmd == 2) { 5216 if (rulenum == IPFW_DEFAULT_RULE) 5217 return EINVAL; 5218 } else { 5219 if (rulenum >= IPFW_DEFAULT_SET) 5220 return EINVAL; 5221 } 5222 5223 switch (cmd) { 5224 case 0: /* delete rules with given number */ 5225 error = ipfw_alt_delete_rule(rulenum); 5226 break; 5227 5228 case 1: /* delete all rules with given set number */ 5229 error = ipfw_alt_delete_ruleset(rulenum); 5230 break; 5231 5232 case 2: /* move rules with given number to new set */ 5233 error = ipfw_alt_move_rule(rulenum, new_set); 5234 break; 5235 5236 case 3: /* move rules with given set number to new set */ 5237 error = ipfw_alt_move_ruleset(rulenum, new_set); 5238 break; 5239 5240 case 4: /* swap two sets */ 5241 error = ipfw_alt_swap_ruleset(rulenum, new_set); 5242 break; 5243 } 5244 return error; 5245 } 5246 5247 /* 5248 * Clear counters for a specific rule. 5249 */ 5250 static void 5251 clear_counters(struct ip_fw *rule, int log_only) 5252 { 5253 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); 5254 5255 if (log_only == 0) { 5256 rule->bcnt = rule->pcnt = 0; 5257 rule->timestamp = 0; 5258 } 5259 if (l->o.opcode == O_LOG) 5260 l->log_left = l->max_log; 5261 } 5262 5263 static void 5264 ipfw_zero_entry_dispatch(netmsg_t nmsg) 5265 { 5266 struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg; 5267 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5268 struct ip_fw *rule; 5269 5270 ASSERT_NETISR_NCPUS(mycpuid); 5271 5272 if (zmsg->rulenum == 0) { 5273 KKASSERT(zmsg->start_rule == NULL); 5274 5275 ctx->ipfw_norule_counter = 0; 5276 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 5277 clear_counters(rule, zmsg->log_only); 5278 } else { 5279 struct ip_fw *start = zmsg->start_rule; 5280 5281 KKASSERT(start->cpuid == mycpuid); 5282 KKASSERT(start->rulenum == zmsg->rulenum); 5283 5284 /* 5285 * We can have multiple rules with the same number, so we 5286 * need to clear them all. 5287 */ 5288 for (rule = start; rule && rule->rulenum == zmsg->rulenum; 5289 rule = rule->next) 5290 clear_counters(rule, zmsg->log_only); 5291 5292 /* 5293 * Move to the position on the next CPU 5294 * before the msg is forwarded. 5295 */ 5296 zmsg->start_rule = start->sibling; 5297 } 5298 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5299 } 5300 5301 /* 5302 * Reset some or all counters on firewall rules. 5303 * @arg frwl is null to clear all entries, or contains a specific 5304 * rule number. 5305 * @arg log_only is 1 if we only want to reset logs, zero otherwise. 5306 */ 5307 static int 5308 ipfw_ctl_zero_entry(int rulenum, int log_only) 5309 { 5310 struct netmsg_zent zmsg; 5311 struct netmsg_base *nmsg; 5312 const char *msg; 5313 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5314 5315 ASSERT_NETISR0; 5316 5317 bzero(&zmsg, sizeof(zmsg)); 5318 nmsg = &zmsg.base; 5319 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5320 ipfw_zero_entry_dispatch); 5321 zmsg.log_only = log_only; 5322 5323 if (rulenum == 0) { 5324 msg = log_only ? "ipfw: All logging counts reset.\n" 5325 : "ipfw: Accounting cleared.\n"; 5326 } else { 5327 struct ip_fw *rule; 5328 5329 /* 5330 * Locate the first rule with 'rulenum' 5331 */ 5332 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5333 if (rule->rulenum == rulenum) 5334 break; 5335 } 5336 if (rule == NULL) /* we did not find any matching rules */ 5337 return (EINVAL); 5338 zmsg.start_rule = rule; 5339 zmsg.rulenum = rulenum; 5340 5341 msg = log_only ? "ipfw: Entry %d logging count reset.\n" 5342 : "ipfw: Entry %d cleared.\n"; 5343 } 5344 netisr_domsg_global(nmsg); 5345 KKASSERT(zmsg.start_rule == NULL); 5346 5347 if (fw_verbose) 5348 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); 5349 return (0); 5350 } 5351 5352 /* 5353 * Check validity of the structure before insert. 5354 * Fortunately rules are simple, so this mostly need to check rule sizes. 5355 */ 5356 static int 5357 ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags) 5358 { 5359 int l, cmdlen = 0; 5360 int have_action = 0; 5361 ipfw_insn *cmd; 5362 5363 *rule_flags = 0; 5364 5365 /* Check for valid size */ 5366 if (size < sizeof(*rule)) { 5367 kprintf("ipfw: rule too short\n"); 5368 return EINVAL; 5369 } 5370 l = IOC_RULESIZE(rule); 5371 if (l != size) { 5372 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l); 5373 return EINVAL; 5374 } 5375 5376 /* Check rule number */ 5377 if (rule->rulenum == IPFW_DEFAULT_RULE) { 5378 kprintf("ipfw: invalid rule number\n"); 5379 return EINVAL; 5380 } 5381 5382 /* 5383 * Now go for the individual checks. Very simple ones, basically only 5384 * instruction sizes. 5385 */ 5386 for (l = rule->cmd_len, cmd = rule->cmd; l > 0; 5387 l -= cmdlen, cmd += cmdlen) { 5388 cmdlen = F_LEN(cmd); 5389 if (cmdlen > l) { 5390 kprintf("ipfw: opcode %d size truncated\n", 5391 cmd->opcode); 5392 return EINVAL; 5393 } 5394 5395 DPRINTF("ipfw: opcode %d\n", cmd->opcode); 5396 5397 if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT || 5398 IPFW_ISXLAT(cmd->opcode)) { 5399 /* This rule will generate states. */ 5400 *rule_flags |= IPFW_RULE_F_GENSTATE; 5401 if (cmd->opcode == O_LIMIT) 5402 *rule_flags |= IPFW_RULE_F_GENTRACK; 5403 } 5404 if (cmd->opcode == O_DEFRAG || IPFW_ISXLAT(cmd->opcode)) 5405 *rule_flags |= IPFW_RULE_F_CROSSREF; 5406 if (cmd->opcode == O_IP_SRC_IFIP || 5407 cmd->opcode == O_IP_DST_IFIP) { 5408 *rule_flags |= IPFW_RULE_F_DYNIFADDR; 5409 cmd->arg1 &= IPFW_IFIP_SETTINGS; 5410 } 5411 5412 switch (cmd->opcode) { 5413 case O_NOP: 5414 case O_PROBE_STATE: 5415 case O_KEEP_STATE: 5416 case O_PROTO: 5417 case O_IP_SRC_ME: 5418 case O_IP_DST_ME: 5419 case O_LAYER2: 5420 case O_IN: 5421 case O_FRAG: 5422 case O_IPFRAG: 5423 case O_IPOPT: 5424 case O_IPLEN: 5425 case O_IPID: 5426 case O_IPTOS: 5427 case O_IPPRECEDENCE: 5428 case O_IPTTL: 5429 case O_IPVER: 5430 case O_TCPWIN: 5431 case O_TCPFLAGS: 5432 case O_TCPOPTS: 5433 case O_ESTAB: 5434 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5435 goto bad_size; 5436 break; 5437 5438 case O_IP_SRC_TABLE: 5439 case O_IP_DST_TABLE: 5440 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5441 goto bad_size; 5442 if (cmd->arg1 >= ipfw_table_max) { 5443 kprintf("ipfw: invalid table id %u, max %d\n", 5444 cmd->arg1, ipfw_table_max); 5445 return EINVAL; 5446 } 5447 break; 5448 5449 case O_IP_SRC_IFIP: 5450 case O_IP_DST_IFIP: 5451 if (cmdlen != F_INSN_SIZE(ipfw_insn_ifip)) 5452 goto bad_size; 5453 break; 5454 5455 case O_ICMPCODE: 5456 case O_ICMPTYPE: 5457 if (cmdlen < F_INSN_SIZE(ipfw_insn_u32)) 5458 goto bad_size; 5459 break; 5460 5461 case O_UID: 5462 case O_GID: 5463 case O_IP_SRC: 5464 case O_IP_DST: 5465 case O_TCPSEQ: 5466 case O_TCPACK: 5467 case O_PROB: 5468 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 5469 goto bad_size; 5470 break; 5471 5472 case O_LIMIT: 5473 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) 5474 goto bad_size; 5475 break; 5476 case O_REDIRECT: 5477 if (cmdlen != F_INSN_SIZE(ipfw_insn_rdr)) 5478 goto bad_size; 5479 break; 5480 5481 case O_LOG: 5482 if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) 5483 goto bad_size; 5484 5485 ((ipfw_insn_log *)cmd)->log_left = 5486 ((ipfw_insn_log *)cmd)->max_log; 5487 5488 break; 5489 5490 case O_IP_SRC_MASK: 5491 case O_IP_DST_MASK: 5492 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip)) 5493 goto bad_size; 5494 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) { 5495 kprintf("ipfw: opcode %d, useless rule\n", 5496 cmd->opcode); 5497 return EINVAL; 5498 } 5499 break; 5500 5501 case O_IP_SRC_SET: 5502 case O_IP_DST_SET: 5503 if (cmd->arg1 == 0 || cmd->arg1 > 256) { 5504 kprintf("ipfw: invalid set size %d\n", 5505 cmd->arg1); 5506 return EINVAL; 5507 } 5508 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 5509 (cmd->arg1+31)/32 ) 5510 goto bad_size; 5511 break; 5512 5513 case O_MACADDR2: 5514 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) 5515 goto bad_size; 5516 break; 5517 5518 case O_MAC_TYPE: 5519 case O_IP_SRCPORT: 5520 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ 5521 if (cmdlen < 2 || cmdlen > 31) 5522 goto bad_size; 5523 break; 5524 5525 case O_RECV: 5526 case O_XMIT: 5527 case O_VIA: 5528 if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) 5529 goto bad_size; 5530 break; 5531 5532 case O_PIPE: 5533 case O_QUEUE: 5534 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) 5535 goto bad_size; 5536 goto check_action; 5537 5538 case O_FORWARD_IP: 5539 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) { 5540 goto bad_size; 5541 } else { 5542 in_addr_t fwd_addr; 5543 5544 fwd_addr = ((ipfw_insn_sa *)cmd)-> 5545 sa.sin_addr.s_addr; 5546 if (IN_MULTICAST(ntohl(fwd_addr))) { 5547 kprintf("ipfw: try forwarding to " 5548 "multicast address\n"); 5549 return EINVAL; 5550 } 5551 } 5552 goto check_action; 5553 5554 case O_FORWARD_MAC: /* XXX not implemented yet */ 5555 case O_CHECK_STATE: 5556 case O_COUNT: 5557 case O_ACCEPT: 5558 case O_DENY: 5559 case O_REJECT: 5560 case O_SKIPTO: 5561 case O_DIVERT: 5562 case O_TEE: 5563 case O_DEFRAG: 5564 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5565 goto bad_size; 5566 check_action: 5567 if (have_action) { 5568 kprintf("ipfw: opcode %d, multiple actions" 5569 " not allowed\n", 5570 cmd->opcode); 5571 return EINVAL; 5572 } 5573 have_action = 1; 5574 if (l != cmdlen) { 5575 kprintf("ipfw: opcode %d, action must be" 5576 " last opcode\n", 5577 cmd->opcode); 5578 return EINVAL; 5579 } 5580 break; 5581 default: 5582 kprintf("ipfw: opcode %d, unknown opcode\n", 5583 cmd->opcode); 5584 return EINVAL; 5585 } 5586 } 5587 if (have_action == 0) { 5588 kprintf("ipfw: missing action\n"); 5589 return EINVAL; 5590 } 5591 return 0; 5592 5593 bad_size: 5594 kprintf("ipfw: opcode %d size %d wrong\n", 5595 cmd->opcode, cmdlen); 5596 return EINVAL; 5597 } 5598 5599 static int 5600 ipfw_ctl_add_rule(struct sockopt *sopt) 5601 { 5602 struct ipfw_ioc_rule *ioc_rule; 5603 size_t size; 5604 uint32_t rule_flags; 5605 int error; 5606 5607 ASSERT_NETISR0; 5608 5609 size = sopt->sopt_valsize; 5610 if (size > (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX) || 5611 size < sizeof(*ioc_rule)) { 5612 return EINVAL; 5613 } 5614 if (size != (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX)) { 5615 sopt->sopt_val = krealloc(sopt->sopt_val, sizeof(uint32_t) * 5616 IPFW_RULE_SIZE_MAX, M_TEMP, M_WAITOK); 5617 } 5618 ioc_rule = sopt->sopt_val; 5619 5620 error = ipfw_check_ioc_rule(ioc_rule, size, &rule_flags); 5621 if (error) 5622 return error; 5623 5624 ipfw_add_rule(ioc_rule, rule_flags); 5625 5626 if (sopt->sopt_dir == SOPT_GET) 5627 sopt->sopt_valsize = IOC_RULESIZE(ioc_rule); 5628 return 0; 5629 } 5630 5631 static void * 5632 ipfw_copy_rule(const struct ipfw_context *ctx, const struct ip_fw *rule, 5633 struct ipfw_ioc_rule *ioc_rule) 5634 { 5635 const struct ip_fw *sibling; 5636 #ifdef INVARIANTS 5637 int i; 5638 #endif 5639 5640 ASSERT_NETISR0; 5641 KASSERT(rule->cpuid == 0, ("rule does not belong to cpu0")); 5642 5643 ioc_rule->act_ofs = rule->act_ofs; 5644 ioc_rule->cmd_len = rule->cmd_len; 5645 ioc_rule->rulenum = rule->rulenum; 5646 ioc_rule->set = rule->set; 5647 ioc_rule->usr_flags = rule->usr_flags; 5648 5649 ioc_rule->set_disable = ctx->ipfw_set_disable; 5650 ioc_rule->static_count = static_count; 5651 ioc_rule->static_len = static_ioc_len; 5652 5653 /* 5654 * Visit (read-only) all of the rule's duplications to get 5655 * the necessary statistics 5656 */ 5657 #ifdef INVARIANTS 5658 i = 0; 5659 #endif 5660 ioc_rule->pcnt = 0; 5661 ioc_rule->bcnt = 0; 5662 ioc_rule->timestamp = 0; 5663 for (sibling = rule; sibling != NULL; sibling = sibling->sibling) { 5664 ioc_rule->pcnt += sibling->pcnt; 5665 ioc_rule->bcnt += sibling->bcnt; 5666 if (sibling->timestamp > ioc_rule->timestamp) 5667 ioc_rule->timestamp = sibling->timestamp; 5668 #ifdef INVARIANTS 5669 ++i; 5670 #endif 5671 } 5672 KASSERT(i == netisr_ncpus, 5673 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus)); 5674 5675 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */); 5676 5677 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule)); 5678 } 5679 5680 static boolean_t 5681 ipfw_track_copy(const struct ipfw_trkcnt *trk, struct ipfw_ioc_state *ioc_state) 5682 { 5683 struct ipfw_ioc_flowid *ioc_id; 5684 5685 if (trk->tc_expire == 0) { 5686 /* Not a scanned one. */ 5687 return (FALSE); 5688 } 5689 5690 ioc_state->expire = TIME_LEQ(trk->tc_expire, time_uptime) ? 5691 0 : trk->tc_expire - time_uptime; 5692 ioc_state->pcnt = 0; 5693 ioc_state->bcnt = 0; 5694 5695 ioc_state->dyn_type = O_LIMIT_PARENT; 5696 ioc_state->count = trk->tc_count; 5697 5698 ioc_state->rulenum = trk->tc_rulenum; 5699 5700 ioc_id = &ioc_state->id; 5701 ioc_id->type = ETHERTYPE_IP; 5702 ioc_id->u.ip.proto = trk->tc_proto; 5703 ioc_id->u.ip.src_ip = trk->tc_saddr; 5704 ioc_id->u.ip.dst_ip = trk->tc_daddr; 5705 ioc_id->u.ip.src_port = trk->tc_sport; 5706 ioc_id->u.ip.dst_port = trk->tc_dport; 5707 5708 return (TRUE); 5709 } 5710 5711 static boolean_t 5712 ipfw_state_copy(const struct ipfw_state *s, struct ipfw_ioc_state *ioc_state) 5713 { 5714 struct ipfw_ioc_flowid *ioc_id; 5715 5716 if (IPFW_STATE_SCANSKIP(s)) 5717 return (FALSE); 5718 5719 ioc_state->expire = TIME_LEQ(s->st_expire, time_uptime) ? 5720 0 : s->st_expire - time_uptime; 5721 ioc_state->pcnt = s->st_pcnt; 5722 ioc_state->bcnt = s->st_bcnt; 5723 5724 ioc_state->dyn_type = s->st_type; 5725 ioc_state->count = 0; 5726 5727 ioc_state->rulenum = s->st_rule->rulenum; 5728 5729 ioc_id = &ioc_state->id; 5730 ioc_id->type = ETHERTYPE_IP; 5731 ioc_id->u.ip.proto = s->st_proto; 5732 ipfw_key_4tuple(&s->st_key, 5733 &ioc_id->u.ip.src_ip, &ioc_id->u.ip.src_port, 5734 &ioc_id->u.ip.dst_ip, &ioc_id->u.ip.dst_port); 5735 5736 if (IPFW_ISXLAT(s->st_type)) { 5737 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s; 5738 5739 if (x->xlat_port == 0) 5740 ioc_state->xlat_port = ioc_id->u.ip.dst_port; 5741 else 5742 ioc_state->xlat_port = ntohs(x->xlat_port); 5743 ioc_state->xlat_addr = ntohl(x->xlat_addr); 5744 5745 ioc_state->pcnt += x->xlat_pair->xlat_pcnt; 5746 ioc_state->bcnt += x->xlat_pair->xlat_bcnt; 5747 } 5748 5749 return (TRUE); 5750 } 5751 5752 static void 5753 ipfw_state_copy_dispatch(netmsg_t nmsg) 5754 { 5755 struct netmsg_cpstate *nm = (struct netmsg_cpstate *)nmsg; 5756 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5757 const struct ipfw_state *s; 5758 const struct ipfw_track *t; 5759 5760 ASSERT_NETISR_NCPUS(mycpuid); 5761 KASSERT(nm->state_cnt < nm->state_cntmax, 5762 ("invalid state count %d, max %d", 5763 nm->state_cnt, nm->state_cntmax)); 5764 5765 TAILQ_FOREACH(s, &ctx->ipfw_state_list, st_link) { 5766 if (ipfw_state_copy(s, nm->ioc_state)) { 5767 nm->ioc_state++; 5768 nm->state_cnt++; 5769 if (nm->state_cnt == nm->state_cntmax) 5770 goto done; 5771 } 5772 } 5773 5774 /* 5775 * Prepare tracks in the global track tree for userland. 5776 */ 5777 TAILQ_FOREACH(t, &ctx->ipfw_track_list, t_link) { 5778 struct ipfw_trkcnt *trk; 5779 5780 if (t->t_count == NULL) /* anchor */ 5781 continue; 5782 trk = t->t_trkcnt; 5783 5784 /* 5785 * Only one netisr can run this function at 5786 * any time, and only this function accesses 5787 * trkcnt's tc_expire, so this is safe w/o 5788 * ipfw_gd.ipfw_trkcnt_token. 5789 */ 5790 if (trk->tc_expire > t->t_expire) 5791 continue; 5792 trk->tc_expire = t->t_expire; 5793 } 5794 5795 /* 5796 * Copy tracks in the global track tree to userland in 5797 * the last netisr. 5798 */ 5799 if (mycpuid == netisr_ncpus - 1) { 5800 struct ipfw_trkcnt *trk; 5801 5802 KASSERT(nm->state_cnt < nm->state_cntmax, 5803 ("invalid state count %d, max %d", 5804 nm->state_cnt, nm->state_cntmax)); 5805 5806 IPFW_TRKCNT_TOKGET; 5807 RB_FOREACH(trk, ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree) { 5808 if (ipfw_track_copy(trk, nm->ioc_state)) { 5809 nm->ioc_state++; 5810 nm->state_cnt++; 5811 if (nm->state_cnt == nm->state_cntmax) { 5812 IPFW_TRKCNT_TOKREL; 5813 goto done; 5814 } 5815 } 5816 } 5817 IPFW_TRKCNT_TOKREL; 5818 } 5819 done: 5820 if (nm->state_cnt == nm->state_cntmax) { 5821 /* No more space; done. */ 5822 netisr_replymsg(&nm->base, 0); 5823 } else { 5824 netisr_forwardmsg(&nm->base, mycpuid + 1); 5825 } 5826 } 5827 5828 static int 5829 ipfw_ctl_get_rules(struct sockopt *sopt) 5830 { 5831 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5832 struct ip_fw *rule; 5833 void *bp; 5834 size_t size; 5835 int state_cnt; 5836 5837 ASSERT_NETISR0; 5838 5839 /* 5840 * pass up a copy of the current rules. Static rules 5841 * come first (the last of which has number IPFW_DEFAULT_RULE), 5842 * followed by a possibly empty list of states. 5843 */ 5844 5845 size = static_ioc_len; /* size of static rules */ 5846 5847 /* 5848 * Size of the states. 5849 * XXX take tracks as state for userland compat. 5850 */ 5851 state_cnt = ipfw_state_cntcoll() + ipfw_gd.ipfw_trkcnt_cnt; 5852 state_cnt = (state_cnt * 5) / 4; /* leave 25% headroom */ 5853 size += state_cnt * sizeof(struct ipfw_ioc_state); 5854 5855 if (sopt->sopt_valsize < size) { 5856 /* short length, no need to return incomplete rules */ 5857 /* XXX: if superuser, no need to zero buffer */ 5858 bzero(sopt->sopt_val, sopt->sopt_valsize); 5859 return 0; 5860 } 5861 bp = sopt->sopt_val; 5862 5863 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 5864 bp = ipfw_copy_rule(ctx, rule, bp); 5865 5866 if (state_cnt) { 5867 struct netmsg_cpstate nm; 5868 #ifdef INVARIANTS 5869 size_t old_size = size; 5870 #endif 5871 5872 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 5873 MSGF_PRIORITY, ipfw_state_copy_dispatch); 5874 nm.ioc_state = bp; 5875 nm.state_cntmax = state_cnt; 5876 nm.state_cnt = 0; 5877 netisr_domsg_global(&nm.base); 5878 5879 /* 5880 * The # of states may be shrinked after the snapshot 5881 * of the state count was taken. To give user a correct 5882 * state count, nm->state_cnt is used to recalculate 5883 * the actual size. 5884 */ 5885 size = static_ioc_len + 5886 (nm.state_cnt * sizeof(struct ipfw_ioc_state)); 5887 KKASSERT(size <= old_size); 5888 } 5889 5890 sopt->sopt_valsize = size; 5891 return 0; 5892 } 5893 5894 static void 5895 ipfw_set_disable_dispatch(netmsg_t nmsg) 5896 { 5897 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5898 5899 ASSERT_NETISR_NCPUS(mycpuid); 5900 5901 ctx->ipfw_set_disable = nmsg->lmsg.u.ms_result32; 5902 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5903 } 5904 5905 static void 5906 ipfw_ctl_set_disable(uint32_t disable, uint32_t enable) 5907 { 5908 struct netmsg_base nmsg; 5909 uint32_t set_disable; 5910 5911 ASSERT_NETISR0; 5912 5913 /* IPFW_DEFAULT_SET is always enabled */ 5914 enable |= (1 << IPFW_DEFAULT_SET); 5915 set_disable = (ipfw_ctx[mycpuid]->ipfw_set_disable | disable) & ~enable; 5916 5917 bzero(&nmsg, sizeof(nmsg)); 5918 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5919 ipfw_set_disable_dispatch); 5920 nmsg.lmsg.u.ms_result32 = set_disable; 5921 5922 netisr_domsg_global(&nmsg); 5923 } 5924 5925 static void 5926 ipfw_table_create_dispatch(netmsg_t nm) 5927 { 5928 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5929 int tblid = nm->lmsg.u.ms_result; 5930 5931 ASSERT_NETISR_NCPUS(mycpuid); 5932 5933 if (!rn_inithead((void **)&ctx->ipfw_tables[tblid], 5934 rn_cpumaskhead(mycpuid), 32)) 5935 panic("ipfw: create table%d failed", tblid); 5936 5937 netisr_forwardmsg(&nm->base, mycpuid + 1); 5938 } 5939 5940 static int 5941 ipfw_table_create(struct sockopt *sopt) 5942 { 5943 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5944 struct ipfw_ioc_table *tbl; 5945 struct netmsg_base nm; 5946 5947 ASSERT_NETISR0; 5948 5949 if (sopt->sopt_valsize != sizeof(*tbl)) 5950 return (EINVAL); 5951 5952 tbl = sopt->sopt_val; 5953 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 5954 return (EINVAL); 5955 5956 if (ctx->ipfw_tables[tbl->tableid] != NULL) 5957 return (EEXIST); 5958 5959 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5960 ipfw_table_create_dispatch); 5961 nm.lmsg.u.ms_result = tbl->tableid; 5962 netisr_domsg_global(&nm); 5963 5964 return (0); 5965 } 5966 5967 static void 5968 ipfw_table_killrn(struct radix_node_head *rnh, struct radix_node *rn) 5969 { 5970 struct radix_node *ret; 5971 5972 ret = rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); 5973 if (ret != rn) 5974 panic("deleted other table entry"); 5975 kfree(ret, M_IPFW); 5976 } 5977 5978 static int 5979 ipfw_table_killent(struct radix_node *rn, void *xrnh) 5980 { 5981 5982 ipfw_table_killrn(xrnh, rn); 5983 return (0); 5984 } 5985 5986 static void 5987 ipfw_table_flush_oncpu(struct ipfw_context *ctx, int tableid, 5988 int destroy) 5989 { 5990 struct radix_node_head *rnh; 5991 5992 ASSERT_NETISR_NCPUS(mycpuid); 5993 5994 rnh = ctx->ipfw_tables[tableid]; 5995 rnh->rnh_walktree(rnh, ipfw_table_killent, rnh); 5996 if (destroy) { 5997 Free(rnh); 5998 ctx->ipfw_tables[tableid] = NULL; 5999 } 6000 } 6001 6002 static void 6003 ipfw_table_flush_dispatch(netmsg_t nmsg) 6004 { 6005 struct netmsg_tblflush *nm = (struct netmsg_tblflush *)nmsg; 6006 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6007 6008 ASSERT_NETISR_NCPUS(mycpuid); 6009 6010 ipfw_table_flush_oncpu(ctx, nm->tableid, nm->destroy); 6011 netisr_forwardmsg(&nm->base, mycpuid + 1); 6012 } 6013 6014 static void 6015 ipfw_table_flushall_oncpu(struct ipfw_context *ctx, int destroy) 6016 { 6017 int i; 6018 6019 ASSERT_NETISR_NCPUS(mycpuid); 6020 6021 for (i = 0; i < ipfw_table_max; ++i) { 6022 if (ctx->ipfw_tables[i] != NULL) 6023 ipfw_table_flush_oncpu(ctx, i, destroy); 6024 } 6025 } 6026 6027 static void 6028 ipfw_table_flushall_dispatch(netmsg_t nmsg) 6029 { 6030 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6031 6032 ASSERT_NETISR_NCPUS(mycpuid); 6033 6034 ipfw_table_flushall_oncpu(ctx, 0); 6035 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6036 } 6037 6038 static int 6039 ipfw_table_flush(struct sockopt *sopt) 6040 { 6041 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6042 struct ipfw_ioc_table *tbl; 6043 struct netmsg_tblflush nm; 6044 6045 ASSERT_NETISR0; 6046 6047 if (sopt->sopt_valsize != sizeof(*tbl)) 6048 return (EINVAL); 6049 6050 tbl = sopt->sopt_val; 6051 if (sopt->sopt_name == IP_FW_TBL_FLUSH && tbl->tableid < 0) { 6052 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6053 MSGF_PRIORITY, ipfw_table_flushall_dispatch); 6054 netisr_domsg_global(&nm.base); 6055 return (0); 6056 } 6057 6058 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 6059 return (EINVAL); 6060 6061 if (ctx->ipfw_tables[tbl->tableid] == NULL) 6062 return (ENOENT); 6063 6064 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6065 ipfw_table_flush_dispatch); 6066 nm.tableid = tbl->tableid; 6067 nm.destroy = 0; 6068 if (sopt->sopt_name == IP_FW_TBL_DESTROY) 6069 nm.destroy = 1; 6070 netisr_domsg_global(&nm.base); 6071 6072 return (0); 6073 } 6074 6075 static int 6076 ipfw_table_cntent(struct radix_node *rn __unused, void *xcnt) 6077 { 6078 int *cnt = xcnt; 6079 6080 (*cnt)++; 6081 return (0); 6082 } 6083 6084 static int 6085 ipfw_table_cpent(struct radix_node *rn, void *xcp) 6086 { 6087 struct ipfw_table_cp *cp = xcp; 6088 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6089 struct ipfw_ioc_tblent *ioc_te; 6090 #ifdef INVARIANTS 6091 int cnt; 6092 #endif 6093 6094 KASSERT(cp->te_idx < cp->te_cnt, ("invalid table cp idx %d, cnt %d", 6095 cp->te_idx, cp->te_cnt)); 6096 ioc_te = &cp->te[cp->te_idx]; 6097 6098 if (te->te_nodes->rn_mask != NULL) { 6099 memcpy(&ioc_te->netmask, te->te_nodes->rn_mask, 6100 *te->te_nodes->rn_mask); 6101 } else { 6102 ioc_te->netmask.sin_len = 0; 6103 } 6104 memcpy(&ioc_te->key, &te->te_key, sizeof(ioc_te->key)); 6105 6106 ioc_te->use = te->te_use; 6107 ioc_te->last_used = te->te_lastuse; 6108 #ifdef INVARIANTS 6109 cnt = 1; 6110 #endif 6111 6112 while ((te = te->te_sibling) != NULL) { 6113 #ifdef INVARIANTS 6114 ++cnt; 6115 #endif 6116 ioc_te->use += te->te_use; 6117 if (te->te_lastuse > ioc_te->last_used) 6118 ioc_te->last_used = te->te_lastuse; 6119 } 6120 KASSERT(cnt == netisr_ncpus, 6121 ("invalid # of tblent %d, should be %d", cnt, netisr_ncpus)); 6122 6123 cp->te_idx++; 6124 6125 return (0); 6126 } 6127 6128 static int 6129 ipfw_table_get(struct sockopt *sopt) 6130 { 6131 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6132 struct radix_node_head *rnh; 6133 struct ipfw_ioc_table *tbl; 6134 struct ipfw_ioc_tblcont *cont; 6135 struct ipfw_table_cp cp; 6136 int cnt = 0, sz; 6137 6138 ASSERT_NETISR0; 6139 6140 if (sopt->sopt_valsize < sizeof(*tbl)) 6141 return (EINVAL); 6142 6143 tbl = sopt->sopt_val; 6144 if (tbl->tableid < 0) { 6145 struct ipfw_ioc_tbllist *list; 6146 int i; 6147 6148 /* 6149 * List available table ids. 6150 */ 6151 for (i = 0; i < ipfw_table_max; ++i) { 6152 if (ctx->ipfw_tables[i] != NULL) 6153 ++cnt; 6154 } 6155 6156 sz = __offsetof(struct ipfw_ioc_tbllist, tables[cnt]); 6157 if (sopt->sopt_valsize < sz) { 6158 bzero(sopt->sopt_val, sopt->sopt_valsize); 6159 return (E2BIG); 6160 } 6161 list = sopt->sopt_val; 6162 list->tablecnt = cnt; 6163 6164 cnt = 0; 6165 for (i = 0; i < ipfw_table_max; ++i) { 6166 if (ctx->ipfw_tables[i] != NULL) { 6167 KASSERT(cnt < list->tablecnt, 6168 ("invalid idx %d, cnt %d", 6169 cnt, list->tablecnt)); 6170 list->tables[cnt++] = i; 6171 } 6172 } 6173 sopt->sopt_valsize = sz; 6174 return (0); 6175 } else if (tbl->tableid >= ipfw_table_max) { 6176 return (EINVAL); 6177 } 6178 6179 rnh = ctx->ipfw_tables[tbl->tableid]; 6180 if (rnh == NULL) 6181 return (ENOENT); 6182 rnh->rnh_walktree(rnh, ipfw_table_cntent, &cnt); 6183 6184 sz = __offsetof(struct ipfw_ioc_tblcont, ent[cnt]); 6185 if (sopt->sopt_valsize < sz) { 6186 bzero(sopt->sopt_val, sopt->sopt_valsize); 6187 return (E2BIG); 6188 } 6189 cont = sopt->sopt_val; 6190 cont->entcnt = cnt; 6191 6192 cp.te = cont->ent; 6193 cp.te_idx = 0; 6194 cp.te_cnt = cnt; 6195 rnh->rnh_walktree(rnh, ipfw_table_cpent, &cp); 6196 6197 sopt->sopt_valsize = sz; 6198 return (0); 6199 } 6200 6201 static void 6202 ipfw_table_add_dispatch(netmsg_t nmsg) 6203 { 6204 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; 6205 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6206 struct radix_node_head *rnh; 6207 struct ipfw_tblent *te; 6208 6209 ASSERT_NETISR_NCPUS(mycpuid); 6210 6211 rnh = ctx->ipfw_tables[nm->tableid]; 6212 6213 te = kmalloc(sizeof(*te), M_IPFW, M_WAITOK | M_ZERO); 6214 te->te_nodes->rn_key = (char *)&te->te_key; 6215 memcpy(&te->te_key, nm->key, sizeof(te->te_key)); 6216 6217 if (rnh->rnh_addaddr((char *)&te->te_key, (char *)nm->netmask, rnh, 6218 te->te_nodes) == NULL) { 6219 if (mycpuid == 0) { 6220 kfree(te, M_IPFW); 6221 netisr_replymsg(&nm->base, EEXIST); 6222 return; 6223 } 6224 panic("rnh_addaddr failed"); 6225 } 6226 6227 /* Link siblings. */ 6228 if (nm->sibling != NULL) 6229 nm->sibling->te_sibling = te; 6230 nm->sibling = te; 6231 6232 netisr_forwardmsg(&nm->base, mycpuid + 1); 6233 } 6234 6235 static void 6236 ipfw_table_del_dispatch(netmsg_t nmsg) 6237 { 6238 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; 6239 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6240 struct radix_node_head *rnh; 6241 struct radix_node *rn; 6242 6243 ASSERT_NETISR_NCPUS(mycpuid); 6244 6245 rnh = ctx->ipfw_tables[nm->tableid]; 6246 rn = rnh->rnh_deladdr((char *)nm->key, (char *)nm->netmask, rnh); 6247 if (rn == NULL) { 6248 if (mycpuid == 0) { 6249 netisr_replymsg(&nm->base, ESRCH); 6250 return; 6251 } 6252 panic("rnh_deladdr failed"); 6253 } 6254 kfree(rn, M_IPFW); 6255 6256 netisr_forwardmsg(&nm->base, mycpuid + 1); 6257 } 6258 6259 static int 6260 ipfw_table_alt(struct sockopt *sopt) 6261 { 6262 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6263 struct ipfw_ioc_tblcont *tbl; 6264 struct ipfw_ioc_tblent *te; 6265 struct sockaddr_in key0; 6266 struct sockaddr *netmask = NULL, *key; 6267 struct netmsg_tblent nm; 6268 6269 ASSERT_NETISR0; 6270 6271 if (sopt->sopt_valsize != sizeof(*tbl)) 6272 return (EINVAL); 6273 tbl = sopt->sopt_val; 6274 6275 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 6276 return (EINVAL); 6277 if (tbl->entcnt != 1) 6278 return (EINVAL); 6279 6280 if (ctx->ipfw_tables[tbl->tableid] == NULL) 6281 return (ENOENT); 6282 te = &tbl->ent[0]; 6283 6284 if (te->key.sin_family != AF_INET || 6285 te->key.sin_port != 0 || 6286 te->key.sin_len != sizeof(struct sockaddr_in)) 6287 return (EINVAL); 6288 key = (struct sockaddr *)&te->key; 6289 6290 if (te->netmask.sin_len != 0) { 6291 if (te->netmask.sin_port != 0 || 6292 te->netmask.sin_len > sizeof(struct sockaddr_in)) 6293 return (EINVAL); 6294 netmask = (struct sockaddr *)&te->netmask; 6295 sa_maskedcopy(key, (struct sockaddr *)&key0, netmask); 6296 key = (struct sockaddr *)&key0; 6297 } 6298 6299 if (sopt->sopt_name == IP_FW_TBL_ADD) { 6300 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6301 MSGF_PRIORITY, ipfw_table_add_dispatch); 6302 } else { 6303 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6304 MSGF_PRIORITY, ipfw_table_del_dispatch); 6305 } 6306 nm.key = key; 6307 nm.netmask = netmask; 6308 nm.tableid = tbl->tableid; 6309 nm.sibling = NULL; 6310 return (netisr_domsg_global(&nm.base)); 6311 } 6312 6313 static int 6314 ipfw_table_zeroent(struct radix_node *rn, void *arg __unused) 6315 { 6316 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6317 6318 te->te_use = 0; 6319 te->te_lastuse = 0; 6320 return (0); 6321 } 6322 6323 static void 6324 ipfw_table_zero_dispatch(netmsg_t nmsg) 6325 { 6326 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6327 struct radix_node_head *rnh; 6328 6329 ASSERT_NETISR_NCPUS(mycpuid); 6330 6331 rnh = ctx->ipfw_tables[nmsg->lmsg.u.ms_result]; 6332 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); 6333 6334 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6335 } 6336 6337 static void 6338 ipfw_table_zeroall_dispatch(netmsg_t nmsg) 6339 { 6340 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6341 int i; 6342 6343 ASSERT_NETISR_NCPUS(mycpuid); 6344 6345 for (i = 0; i < ipfw_table_max; ++i) { 6346 struct radix_node_head *rnh = ctx->ipfw_tables[i]; 6347 6348 if (rnh != NULL) 6349 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); 6350 } 6351 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6352 } 6353 6354 static int 6355 ipfw_table_zero(struct sockopt *sopt) 6356 { 6357 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6358 struct netmsg_base nm; 6359 struct ipfw_ioc_table *tbl; 6360 6361 ASSERT_NETISR0; 6362 6363 if (sopt->sopt_valsize != sizeof(*tbl)) 6364 return (EINVAL); 6365 tbl = sopt->sopt_val; 6366 6367 if (tbl->tableid < 0) { 6368 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6369 ipfw_table_zeroall_dispatch); 6370 netisr_domsg_global(&nm); 6371 return (0); 6372 } else if (tbl->tableid >= ipfw_table_max) { 6373 return (EINVAL); 6374 } else if (ctx->ipfw_tables[tbl->tableid] == NULL) { 6375 return (ENOENT); 6376 } 6377 6378 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6379 ipfw_table_zero_dispatch); 6380 nm.lmsg.u.ms_result = tbl->tableid; 6381 netisr_domsg_global(&nm); 6382 6383 return (0); 6384 } 6385 6386 static int 6387 ipfw_table_killexp(struct radix_node *rn, void *xnm) 6388 { 6389 struct netmsg_tblexp *nm = xnm; 6390 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6391 6392 if (te->te_expired) { 6393 ipfw_table_killrn(nm->rnh, rn); 6394 nm->expcnt++; 6395 } 6396 return (0); 6397 } 6398 6399 static void 6400 ipfw_table_expire_dispatch(netmsg_t nmsg) 6401 { 6402 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; 6403 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6404 struct radix_node_head *rnh; 6405 6406 ASSERT_NETISR_NCPUS(mycpuid); 6407 6408 rnh = ctx->ipfw_tables[nm->tableid]; 6409 nm->rnh = rnh; 6410 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); 6411 6412 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), 6413 ("not all expired addresses (%d) were deleted (%d)", 6414 nm->cnt * (mycpuid + 1), nm->expcnt)); 6415 6416 netisr_forwardmsg(&nm->base, mycpuid + 1); 6417 } 6418 6419 static void 6420 ipfw_table_expireall_dispatch(netmsg_t nmsg) 6421 { 6422 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; 6423 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6424 int i; 6425 6426 ASSERT_NETISR_NCPUS(mycpuid); 6427 6428 for (i = 0; i < ipfw_table_max; ++i) { 6429 struct radix_node_head *rnh = ctx->ipfw_tables[i]; 6430 6431 if (rnh == NULL) 6432 continue; 6433 nm->rnh = rnh; 6434 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); 6435 } 6436 6437 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), 6438 ("not all expired addresses (%d) were deleted (%d)", 6439 nm->cnt * (mycpuid + 1), nm->expcnt)); 6440 6441 netisr_forwardmsg(&nm->base, mycpuid + 1); 6442 } 6443 6444 static int 6445 ipfw_table_markexp(struct radix_node *rn, void *xnm) 6446 { 6447 struct netmsg_tblexp *nm = xnm; 6448 struct ipfw_tblent *te; 6449 time_t lastuse; 6450 6451 te = (struct ipfw_tblent *)rn; 6452 lastuse = te->te_lastuse; 6453 6454 while ((te = te->te_sibling) != NULL) { 6455 if (te->te_lastuse > lastuse) 6456 lastuse = te->te_lastuse; 6457 } 6458 if (!TIME_LEQ(lastuse + nm->expire, time_second)) { 6459 /* Not expired */ 6460 return (0); 6461 } 6462 6463 te = (struct ipfw_tblent *)rn; 6464 te->te_expired = 1; 6465 while ((te = te->te_sibling) != NULL) 6466 te->te_expired = 1; 6467 nm->cnt++; 6468 6469 return (0); 6470 } 6471 6472 static int 6473 ipfw_table_expire(struct sockopt *sopt) 6474 { 6475 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6476 struct netmsg_tblexp nm; 6477 struct ipfw_ioc_tblexp *tbl; 6478 struct radix_node_head *rnh; 6479 6480 ASSERT_NETISR0; 6481 6482 if (sopt->sopt_valsize != sizeof(*tbl)) 6483 return (EINVAL); 6484 tbl = sopt->sopt_val; 6485 tbl->expcnt = 0; 6486 6487 nm.expcnt = 0; 6488 nm.cnt = 0; 6489 nm.expire = tbl->expire; 6490 6491 if (tbl->tableid < 0) { 6492 int i; 6493 6494 for (i = 0; i < ipfw_table_max; ++i) { 6495 rnh = ctx->ipfw_tables[i]; 6496 if (rnh == NULL) 6497 continue; 6498 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); 6499 } 6500 if (nm.cnt == 0) { 6501 /* No addresses can be expired. */ 6502 return (0); 6503 } 6504 tbl->expcnt = nm.cnt; 6505 6506 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6507 MSGF_PRIORITY, ipfw_table_expireall_dispatch); 6508 nm.tableid = -1; 6509 netisr_domsg_global(&nm.base); 6510 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, 6511 ("not all expired addresses (%d) were deleted (%d)", 6512 nm.cnt * netisr_ncpus, nm.expcnt)); 6513 6514 return (0); 6515 } else if (tbl->tableid >= ipfw_table_max) { 6516 return (EINVAL); 6517 } 6518 6519 rnh = ctx->ipfw_tables[tbl->tableid]; 6520 if (rnh == NULL) 6521 return (ENOENT); 6522 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); 6523 if (nm.cnt == 0) { 6524 /* No addresses can be expired. */ 6525 return (0); 6526 } 6527 tbl->expcnt = nm.cnt; 6528 6529 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6530 ipfw_table_expire_dispatch); 6531 nm.tableid = tbl->tableid; 6532 netisr_domsg_global(&nm.base); 6533 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, 6534 ("not all expired addresses (%d) were deleted (%d)", 6535 nm.cnt * netisr_ncpus, nm.expcnt)); 6536 return (0); 6537 } 6538 6539 static void 6540 ipfw_crossref_free_dispatch(netmsg_t nmsg) 6541 { 6542 struct ip_fw *rule = nmsg->lmsg.u.ms_resultp; 6543 6544 KKASSERT((rule->rule_flags & 6545 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) == 6546 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)); 6547 ipfw_free_rule(rule); 6548 6549 netisr_replymsg(&nmsg->base, 0); 6550 } 6551 6552 static void 6553 ipfw_crossref_reap(void) 6554 { 6555 struct ip_fw *rule, *prev = NULL; 6556 6557 ASSERT_NETISR0; 6558 6559 rule = ipfw_gd.ipfw_crossref_free; 6560 while (rule != NULL) { 6561 uint64_t inflight = 0; 6562 int i; 6563 6564 for (i = 0; i < netisr_ncpus; ++i) 6565 inflight += rule->cross_rules[i]->cross_refs; 6566 if (inflight == 0) { 6567 struct ip_fw *f = rule; 6568 6569 /* 6570 * Unlink. 6571 */ 6572 rule = rule->next; 6573 if (prev != NULL) 6574 prev->next = rule; 6575 else 6576 ipfw_gd.ipfw_crossref_free = rule; 6577 6578 /* 6579 * Free. 6580 */ 6581 for (i = 1; i < netisr_ncpus; ++i) { 6582 struct netmsg_base nm; 6583 6584 netmsg_init(&nm, NULL, &curthread->td_msgport, 6585 MSGF_PRIORITY, ipfw_crossref_free_dispatch); 6586 nm.lmsg.u.ms_resultp = f->cross_rules[i]; 6587 netisr_domsg(&nm, i); 6588 } 6589 KKASSERT((f->rule_flags & 6590 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) == 6591 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)); 6592 ipfw_unref_rule(f); 6593 } else { 6594 prev = rule; 6595 rule = rule->next; 6596 } 6597 } 6598 6599 if (ipfw_gd.ipfw_crossref_free != NULL) { 6600 callout_reset(&ipfw_gd.ipfw_crossref_ch, hz, 6601 ipfw_crossref_timeo, NULL); 6602 } 6603 } 6604 6605 /* 6606 * {set|get}sockopt parser. 6607 */ 6608 static int 6609 ipfw_ctl(struct sockopt *sopt) 6610 { 6611 int error, rulenum; 6612 uint32_t *masks; 6613 size_t size; 6614 6615 ASSERT_NETISR0; 6616 6617 error = 0; 6618 6619 switch (sopt->sopt_name) { 6620 case IP_FW_GET: 6621 error = ipfw_ctl_get_rules(sopt); 6622 break; 6623 6624 case IP_FW_FLUSH: 6625 ipfw_flush(0 /* keep default rule */); 6626 break; 6627 6628 case IP_FW_ADD: 6629 error = ipfw_ctl_add_rule(sopt); 6630 break; 6631 6632 case IP_FW_DEL: 6633 /* 6634 * IP_FW_DEL is used for deleting single rules or sets, 6635 * and (ab)used to atomically manipulate sets. 6636 * Argument size is used to distinguish between the two: 6637 * sizeof(uint32_t) 6638 * delete single rule or set of rules, 6639 * or reassign rules (or sets) to a different set. 6640 * 2 * sizeof(uint32_t) 6641 * atomic disable/enable sets. 6642 * first uint32_t contains sets to be disabled, 6643 * second uint32_t contains sets to be enabled. 6644 */ 6645 masks = sopt->sopt_val; 6646 size = sopt->sopt_valsize; 6647 if (size == sizeof(*masks)) { 6648 /* 6649 * Delete or reassign static rule 6650 */ 6651 error = ipfw_ctl_alter(masks[0]); 6652 } else if (size == (2 * sizeof(*masks))) { 6653 /* 6654 * Set enable/disable 6655 */ 6656 ipfw_ctl_set_disable(masks[0], masks[1]); 6657 } else { 6658 error = EINVAL; 6659 } 6660 break; 6661 6662 case IP_FW_ZERO: 6663 case IP_FW_RESETLOG: /* argument is an int, the rule number */ 6664 rulenum = 0; 6665 6666 if (sopt->sopt_val != 0) { 6667 error = soopt_to_kbuf(sopt, &rulenum, 6668 sizeof(int), sizeof(int)); 6669 if (error) 6670 break; 6671 } 6672 error = ipfw_ctl_zero_entry(rulenum, 6673 sopt->sopt_name == IP_FW_RESETLOG); 6674 break; 6675 6676 case IP_FW_TBL_CREATE: 6677 error = ipfw_table_create(sopt); 6678 break; 6679 6680 case IP_FW_TBL_ADD: 6681 case IP_FW_TBL_DEL: 6682 error = ipfw_table_alt(sopt); 6683 break; 6684 6685 case IP_FW_TBL_FLUSH: 6686 case IP_FW_TBL_DESTROY: 6687 error = ipfw_table_flush(sopt); 6688 break; 6689 6690 case IP_FW_TBL_GET: 6691 error = ipfw_table_get(sopt); 6692 break; 6693 6694 case IP_FW_TBL_ZERO: 6695 error = ipfw_table_zero(sopt); 6696 break; 6697 6698 case IP_FW_TBL_EXPIRE: 6699 error = ipfw_table_expire(sopt); 6700 break; 6701 6702 default: 6703 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name); 6704 error = EINVAL; 6705 } 6706 6707 ipfw_crossref_reap(); 6708 return error; 6709 } 6710 6711 static void 6712 ipfw_keepalive_done(struct ipfw_context *ctx) 6713 { 6714 6715 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6716 ("keepalive is not in progress")); 6717 ctx->ipfw_flags &= ~IPFW_FLAG_KEEPALIVE; 6718 callout_reset(&ctx->ipfw_keepalive_ch, dyn_keepalive_period * hz, 6719 ipfw_keepalive, NULL); 6720 } 6721 6722 static void 6723 ipfw_keepalive_more(struct ipfw_context *ctx) 6724 { 6725 struct netmsg_base *nm = &ctx->ipfw_keepalive_more; 6726 6727 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6728 ("keepalive is not in progress")); 6729 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 6730 ("keepalive more did not finish")); 6731 netisr_sendmsg_oncpu(nm); 6732 } 6733 6734 static void 6735 ipfw_keepalive_loop(struct ipfw_context *ctx, struct ipfw_state *anchor) 6736 { 6737 struct ipfw_state *s; 6738 int scanned = 0, expired = 0, kept = 0; 6739 6740 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6741 ("keepalive is not in progress")); 6742 6743 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 6744 uint32_t ack_rev, ack_fwd; 6745 struct ipfw_flow_id id; 6746 uint8_t send_dir; 6747 6748 if (scanned++ >= ipfw_state_scan_max) { 6749 ipfw_keepalive_more(ctx); 6750 return; 6751 } 6752 6753 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6754 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 6755 6756 /* 6757 * NOTE: 6758 * Don't use IPFW_STATE_SCANSKIP; need to perform keepalive 6759 * on slave xlat. 6760 */ 6761 if (s->st_type == O_ANCHOR) 6762 continue; 6763 6764 if (IPFW_STATE_ISDEAD(s)) { 6765 ipfw_state_remove(ctx, s); 6766 if (++expired >= ipfw_state_expire_max) { 6767 ipfw_keepalive_more(ctx); 6768 return; 6769 } 6770 continue; 6771 } 6772 6773 /* 6774 * Keep alive processing 6775 */ 6776 6777 if (s->st_proto != IPPROTO_TCP) 6778 continue; 6779 if ((s->st_state & IPFW_STATE_TCPSTATES) != BOTH_SYN) 6780 continue; 6781 if (TIME_LEQ(time_uptime + dyn_keepalive_interval, 6782 s->st_expire)) 6783 continue; /* too early */ 6784 6785 ipfw_key_4tuple(&s->st_key, &id.src_ip, &id.src_port, 6786 &id.dst_ip, &id.dst_port); 6787 ack_rev = s->st_ack_rev; 6788 ack_fwd = s->st_ack_fwd; 6789 6790 #define SEND_FWD 0x1 6791 #define SEND_REV 0x2 6792 6793 if (IPFW_ISXLAT(s->st_type)) { 6794 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s; 6795 6796 if (x->xlat_dir == MATCH_FORWARD) 6797 send_dir = SEND_FWD; 6798 else 6799 send_dir = SEND_REV; 6800 } else { 6801 send_dir = SEND_FWD | SEND_REV; 6802 } 6803 6804 if (send_dir & SEND_REV) 6805 send_pkt(&id, ack_rev - 1, ack_fwd, TH_SYN); 6806 if (send_dir & SEND_FWD) 6807 send_pkt(&id, ack_fwd - 1, ack_rev, 0); 6808 6809 #undef SEND_FWD 6810 #undef SEND_REV 6811 6812 if (++kept >= ipfw_keepalive_max) { 6813 ipfw_keepalive_more(ctx); 6814 return; 6815 } 6816 } 6817 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6818 ipfw_keepalive_done(ctx); 6819 } 6820 6821 static void 6822 ipfw_keepalive_more_dispatch(netmsg_t nm) 6823 { 6824 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6825 struct ipfw_state *anchor; 6826 6827 ASSERT_NETISR_NCPUS(mycpuid); 6828 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6829 ("keepalive is not in progress")); 6830 6831 /* Reply ASAP */ 6832 netisr_replymsg(&nm->base, 0); 6833 6834 anchor = &ctx->ipfw_keepalive_anch; 6835 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) { 6836 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6837 ipfw_keepalive_done(ctx); 6838 return; 6839 } 6840 ipfw_keepalive_loop(ctx, anchor); 6841 } 6842 6843 /* 6844 * This procedure is only used to handle keepalives. It is invoked 6845 * every dyn_keepalive_period 6846 */ 6847 static void 6848 ipfw_keepalive_dispatch(netmsg_t nm) 6849 { 6850 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6851 struct ipfw_state *anchor; 6852 6853 ASSERT_NETISR_NCPUS(mycpuid); 6854 KASSERT((ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE) == 0, 6855 ("keepalive is in progress")); 6856 ctx->ipfw_flags |= IPFW_FLAG_KEEPALIVE; 6857 6858 /* Reply ASAP */ 6859 crit_enter(); 6860 netisr_replymsg(&nm->base, 0); 6861 crit_exit(); 6862 6863 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) { 6864 ipfw_keepalive_done(ctx); 6865 return; 6866 } 6867 6868 anchor = &ctx->ipfw_keepalive_anch; 6869 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link); 6870 ipfw_keepalive_loop(ctx, anchor); 6871 } 6872 6873 /* 6874 * This procedure is only used to handle keepalives. It is invoked 6875 * every dyn_keepalive_period 6876 */ 6877 static void 6878 ipfw_keepalive(void *dummy __unused) 6879 { 6880 struct netmsg_base *msg; 6881 6882 KKASSERT(mycpuid < netisr_ncpus); 6883 msg = &ipfw_ctx[mycpuid]->ipfw_keepalive_nm; 6884 6885 crit_enter(); 6886 if (msg->lmsg.ms_flags & MSGF_DONE) 6887 netisr_sendmsg_oncpu(msg); 6888 crit_exit(); 6889 } 6890 6891 static void 6892 ipfw_ip_input_dispatch(netmsg_t nmsg) 6893 { 6894 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg; 6895 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6896 struct mbuf *m = nm->m; 6897 struct ip_fw *rule = nm->arg1; 6898 6899 ASSERT_NETISR_NCPUS(mycpuid); 6900 KASSERT(rule->cpuid == mycpuid, 6901 ("rule does not belong to cpu%d", mycpuid)); 6902 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE, 6903 ("mbuf does not have ipfw continue rule")); 6904 6905 KASSERT(ctx->ipfw_cont_rule == NULL, 6906 ("pending ipfw continue rule")); 6907 ctx->ipfw_cont_rule = rule; 6908 ip_input(m); 6909 6910 /* May not be cleared, if ipfw was unload/disabled. */ 6911 ctx->ipfw_cont_rule = NULL; 6912 6913 /* 6914 * This rule is no longer used; decrement its cross_refs, 6915 * so this rule can be deleted. 6916 */ 6917 rule->cross_refs--; 6918 } 6919 6920 static void 6921 ipfw_defrag_redispatch(struct mbuf *m, int cpuid, struct ip_fw *rule) 6922 { 6923 struct netmsg_genpkt *nm; 6924 6925 KASSERT(cpuid != mycpuid, ("continue on the same cpu%d", cpuid)); 6926 6927 /* 6928 * NOTE: 6929 * Bump cross_refs to prevent this rule and its siblings 6930 * from being deleted, while this mbuf is inflight. The 6931 * cross_refs of the sibling rule on the target cpu will 6932 * be decremented, once this mbuf is going to be filtered 6933 * on the target cpu. 6934 */ 6935 rule->cross_refs++; 6936 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE; 6937 6938 nm = &m->m_hdr.mh_genmsg; 6939 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0, 6940 ipfw_ip_input_dispatch); 6941 nm->m = m; 6942 nm->arg1 = rule->cross_rules[cpuid]; 6943 netisr_sendmsg(&nm->base, cpuid); 6944 } 6945 6946 static void 6947 ipfw_init_args(struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif) 6948 { 6949 6950 args->flags = 0; 6951 args->rule = NULL; 6952 args->xlat = NULL; 6953 6954 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 6955 struct m_tag *mtag; 6956 6957 /* Extract info from dummynet tag */ 6958 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 6959 KKASSERT(mtag != NULL); 6960 args->rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 6961 KKASSERT(args->rule != NULL); 6962 6963 m_tag_delete(m, mtag); 6964 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 6965 } else if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) { 6966 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6967 6968 KKASSERT(ctx->ipfw_cont_rule != NULL); 6969 args->rule = ctx->ipfw_cont_rule; 6970 ctx->ipfw_cont_rule = NULL; 6971 6972 if (ctx->ipfw_cont_xlat != NULL) { 6973 args->xlat = ctx->ipfw_cont_xlat; 6974 ctx->ipfw_cont_xlat = NULL; 6975 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATINS) { 6976 args->flags |= IP_FWARG_F_XLATINS; 6977 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATINS; 6978 } 6979 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATFWD) { 6980 args->flags |= IP_FWARG_F_XLATFWD; 6981 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATFWD; 6982 } 6983 } 6984 KKASSERT((m->m_pkthdr.fw_flags & 6985 (IPFW_MBUF_XLATINS | IPFW_MBUF_XLATFWD)) == 0); 6986 6987 args->flags |= IP_FWARG_F_CONT; 6988 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_CONTINUE; 6989 } 6990 6991 args->eh = NULL; 6992 args->oif = oif; 6993 args->m = m; 6994 } 6995 6996 static int 6997 ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 6998 { 6999 struct ip_fw_args args; 7000 struct mbuf *m = *m0; 7001 int tee = 0, error = 0, ret; 7002 7003 ipfw_init_args(&args, m, NULL); 7004 7005 ret = ipfw_chk(&args); 7006 m = args.m; 7007 if (m == NULL) { 7008 if (ret != IP_FW_REDISPATCH) 7009 error = EACCES; 7010 goto back; 7011 } 7012 7013 switch (ret) { 7014 case IP_FW_PASS: 7015 break; 7016 7017 case IP_FW_DENY: 7018 m_freem(m); 7019 m = NULL; 7020 error = EACCES; 7021 break; 7022 7023 case IP_FW_DUMMYNET: 7024 /* Send packet to the appropriate pipe */ 7025 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_IN, &args); 7026 break; 7027 7028 case IP_FW_TEE: 7029 tee = 1; 7030 /* FALL THROUGH */ 7031 7032 case IP_FW_DIVERT: 7033 /* 7034 * Must clear bridge tag when changing 7035 */ 7036 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 7037 if (ip_divert_p != NULL) { 7038 m = ip_divert_p(m, tee, 1); 7039 } else { 7040 m_freem(m); 7041 m = NULL; 7042 /* not sure this is the right error msg */ 7043 error = EACCES; 7044 } 7045 break; 7046 7047 default: 7048 panic("unknown ipfw return value: %d", ret); 7049 } 7050 back: 7051 *m0 = m; 7052 return error; 7053 } 7054 7055 static int 7056 ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 7057 { 7058 struct ip_fw_args args; 7059 struct mbuf *m = *m0; 7060 int tee = 0, error = 0, ret; 7061 7062 ipfw_init_args(&args, m, ifp); 7063 7064 ret = ipfw_chk(&args); 7065 m = args.m; 7066 if (m == NULL) { 7067 if (ret != IP_FW_REDISPATCH) 7068 error = EACCES; 7069 goto back; 7070 } 7071 7072 switch (ret) { 7073 case IP_FW_PASS: 7074 break; 7075 7076 case IP_FW_DENY: 7077 m_freem(m); 7078 m = NULL; 7079 error = EACCES; 7080 break; 7081 7082 case IP_FW_DUMMYNET: 7083 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_OUT, &args); 7084 break; 7085 7086 case IP_FW_TEE: 7087 tee = 1; 7088 /* FALL THROUGH */ 7089 7090 case IP_FW_DIVERT: 7091 if (ip_divert_p != NULL) { 7092 m = ip_divert_p(m, tee, 0); 7093 } else { 7094 m_freem(m); 7095 m = NULL; 7096 /* not sure this is the right error msg */ 7097 error = EACCES; 7098 } 7099 break; 7100 7101 default: 7102 panic("unknown ipfw return value: %d", ret); 7103 } 7104 back: 7105 *m0 = m; 7106 return error; 7107 } 7108 7109 static void 7110 ipfw_hook(void) 7111 { 7112 struct pfil_head *pfh; 7113 7114 ASSERT_NETISR0; 7115 7116 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 7117 if (pfh == NULL) 7118 return; 7119 7120 pfil_add_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 7121 pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 7122 } 7123 7124 static void 7125 ipfw_dehook(void) 7126 { 7127 struct pfil_head *pfh; 7128 7129 ASSERT_NETISR0; 7130 7131 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 7132 if (pfh == NULL) 7133 return; 7134 7135 pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 7136 pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 7137 } 7138 7139 static int 7140 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS) 7141 { 7142 int dyn_cnt; 7143 7144 dyn_cnt = ipfw_state_cntcoll(); 7145 dyn_cnt += ipfw_gd.ipfw_trkcnt_cnt; 7146 7147 return (sysctl_handle_int(oidp, &dyn_cnt, 0, req)); 7148 } 7149 7150 static int 7151 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS) 7152 { 7153 int state_cnt; 7154 7155 state_cnt = ipfw_state_cntcoll(); 7156 return (sysctl_handle_int(oidp, &state_cnt, 0, req)); 7157 } 7158 7159 static int 7160 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS) 7161 { 7162 int state_max, error; 7163 7164 state_max = ipfw_state_max; 7165 error = sysctl_handle_int(oidp, &state_max, 0, req); 7166 if (error || req->newptr == NULL) 7167 return (error); 7168 7169 if (state_max < 1) 7170 return (EINVAL); 7171 7172 ipfw_state_max_set(state_max); 7173 return (0); 7174 } 7175 7176 static int 7177 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS) 7178 { 7179 int dyn_max, error; 7180 7181 dyn_max = ipfw_state_max + ipfw_track_max; 7182 7183 error = sysctl_handle_int(oidp, &dyn_max, 0, req); 7184 if (error || req->newptr == NULL) 7185 return (error); 7186 7187 if (dyn_max < 2) 7188 return (EINVAL); 7189 7190 ipfw_state_max_set(dyn_max / 2); 7191 ipfw_track_max = dyn_max / 2; 7192 return (0); 7193 } 7194 7195 static void 7196 ipfw_sysctl_enable_dispatch(netmsg_t nmsg) 7197 { 7198 int enable = nmsg->lmsg.u.ms_result; 7199 7200 ASSERT_NETISR0; 7201 7202 if (fw_enable == enable) 7203 goto reply; 7204 7205 fw_enable = enable; 7206 if (fw_enable) 7207 ipfw_hook(); 7208 else 7209 ipfw_dehook(); 7210 reply: 7211 netisr_replymsg(&nmsg->base, 0); 7212 } 7213 7214 static int 7215 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS) 7216 { 7217 struct netmsg_base nmsg; 7218 int enable, error; 7219 7220 enable = fw_enable; 7221 error = sysctl_handle_int(oidp, &enable, 0, req); 7222 if (error || req->newptr == NULL) 7223 return error; 7224 7225 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7226 ipfw_sysctl_enable_dispatch); 7227 nmsg.lmsg.u.ms_result = enable; 7228 7229 return netisr_domsg(&nmsg, 0); 7230 } 7231 7232 static int 7233 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS) 7234 { 7235 return sysctl_int_range(oidp, arg1, arg2, req, 7236 IPFW_AUTOINC_STEP_MIN, IPFW_AUTOINC_STEP_MAX); 7237 } 7238 7239 static int 7240 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS) 7241 { 7242 7243 return sysctl_int_range(oidp, arg1, arg2, req, 1, INT_MAX); 7244 } 7245 7246 static int 7247 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS) 7248 { 7249 u_long stat = 0; 7250 int cpu, error; 7251 7252 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7253 stat += *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)); 7254 7255 error = sysctl_handle_long(oidp, &stat, 0, req); 7256 if (error || req->newptr == NULL) 7257 return (error); 7258 7259 /* Zero out this stat. */ 7260 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7261 *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)) = 0; 7262 return (0); 7263 } 7264 7265 static void 7266 ipfw_ctx_init_dispatch(netmsg_t nmsg) 7267 { 7268 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 7269 struct ipfw_context *ctx; 7270 struct ip_fw *def_rule; 7271 7272 ASSERT_NETISR_NCPUS(mycpuid); 7273 7274 ctx = kmalloc(__offsetof(struct ipfw_context, 7275 ipfw_tables[ipfw_table_max]), M_IPFW, M_WAITOK | M_ZERO); 7276 7277 RB_INIT(&ctx->ipfw_state_tree); 7278 TAILQ_INIT(&ctx->ipfw_state_list); 7279 7280 RB_INIT(&ctx->ipfw_track_tree); 7281 TAILQ_INIT(&ctx->ipfw_track_list); 7282 7283 callout_init_mp(&ctx->ipfw_stateto_ch); 7284 netmsg_init(&ctx->ipfw_stateexp_nm, NULL, &netisr_adone_rport, 7285 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_state_expire_dispatch); 7286 ctx->ipfw_stateexp_anch.st_type = O_ANCHOR; 7287 netmsg_init(&ctx->ipfw_stateexp_more, NULL, &netisr_adone_rport, 7288 MSGF_DROPABLE, ipfw_state_expire_more_dispatch); 7289 7290 callout_init_mp(&ctx->ipfw_trackto_ch); 7291 netmsg_init(&ctx->ipfw_trackexp_nm, NULL, &netisr_adone_rport, 7292 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_track_expire_dispatch); 7293 netmsg_init(&ctx->ipfw_trackexp_more, NULL, &netisr_adone_rport, 7294 MSGF_DROPABLE, ipfw_track_expire_more_dispatch); 7295 7296 callout_init_mp(&ctx->ipfw_keepalive_ch); 7297 netmsg_init(&ctx->ipfw_keepalive_nm, NULL, &netisr_adone_rport, 7298 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_keepalive_dispatch); 7299 ctx->ipfw_keepalive_anch.st_type = O_ANCHOR; 7300 netmsg_init(&ctx->ipfw_keepalive_more, NULL, &netisr_adone_rport, 7301 MSGF_DROPABLE, ipfw_keepalive_more_dispatch); 7302 7303 callout_init_mp(&ctx->ipfw_xlatreap_ch); 7304 netmsg_init(&ctx->ipfw_xlatreap_nm, NULL, &netisr_adone_rport, 7305 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_xlat_reap_dispatch); 7306 TAILQ_INIT(&ctx->ipfw_xlatreap); 7307 7308 ipfw_ctx[mycpuid] = ctx; 7309 7310 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO); 7311 7312 def_rule->act_ofs = 0; 7313 def_rule->rulenum = IPFW_DEFAULT_RULE; 7314 def_rule->cmd_len = 1; 7315 def_rule->set = IPFW_DEFAULT_SET; 7316 7317 def_rule->cmd[0].len = 1; 7318 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 7319 def_rule->cmd[0].opcode = O_ACCEPT; 7320 #else 7321 if (filters_default_to_accept) 7322 def_rule->cmd[0].opcode = O_ACCEPT; 7323 else 7324 def_rule->cmd[0].opcode = O_DENY; 7325 #endif 7326 7327 def_rule->refcnt = 1; 7328 def_rule->cpuid = mycpuid; 7329 7330 /* Install the default rule */ 7331 ctx->ipfw_default_rule = def_rule; 7332 ctx->ipfw_layer3_chain = def_rule; 7333 7334 /* Link rule CPU sibling */ 7335 ipfw_link_sibling(fwmsg, def_rule); 7336 7337 /* Statistics only need to be updated once */ 7338 if (mycpuid == 0) 7339 ipfw_inc_static_count(def_rule); 7340 7341 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7342 } 7343 7344 static void 7345 ipfw_crossref_reap_dispatch(netmsg_t nmsg) 7346 { 7347 7348 crit_enter(); 7349 /* Reply ASAP */ 7350 netisr_replymsg(&nmsg->base, 0); 7351 crit_exit(); 7352 ipfw_crossref_reap(); 7353 } 7354 7355 static void 7356 ipfw_crossref_timeo(void *dummy __unused) 7357 { 7358 struct netmsg_base *msg = &ipfw_gd.ipfw_crossref_nm; 7359 7360 KKASSERT(mycpuid == 0); 7361 7362 crit_enter(); 7363 if (msg->lmsg.ms_flags & MSGF_DONE) 7364 netisr_sendmsg_oncpu(msg); 7365 crit_exit(); 7366 } 7367 7368 static void 7369 ipfw_ifaddr_dispatch(netmsg_t nmsg) 7370 { 7371 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 7372 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 7373 struct ip_fw *f; 7374 7375 ASSERT_NETISR_NCPUS(mycpuid); 7376 7377 for (f = ctx->ipfw_layer3_chain; f != NULL; f = f->next) { 7378 int l, cmdlen; 7379 ipfw_insn *cmd; 7380 7381 if ((f->rule_flags & IPFW_RULE_F_DYNIFADDR) == 0) 7382 continue; 7383 7384 for (l = f->cmd_len, cmd = f->cmd; l > 0; 7385 l -= cmdlen, cmd += cmdlen) { 7386 cmdlen = F_LEN(cmd); 7387 if (cmd->opcode == O_IP_SRC_IFIP || 7388 cmd->opcode == O_IP_DST_IFIP) { 7389 if (strncmp(ifp->if_xname, 7390 ((ipfw_insn_ifip *)cmd)->ifname, 7391 IFNAMSIZ) == 0) 7392 cmd->arg1 &= ~IPFW_IFIP_VALID; 7393 } 7394 } 7395 } 7396 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7397 } 7398 7399 static void 7400 ipfw_ifaddr(void *arg __unused, struct ifnet *ifp, 7401 enum ifaddr_event event __unused, struct ifaddr *ifa __unused) 7402 { 7403 struct netmsg_base nm; 7404 7405 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7406 ipfw_ifaddr_dispatch); 7407 nm.lmsg.u.ms_resultp = ifp; 7408 netisr_domsg_global(&nm); 7409 } 7410 7411 static void 7412 ipfw_init_dispatch(netmsg_t nmsg) 7413 { 7414 struct netmsg_ipfw fwmsg; 7415 int error = 0, cpu; 7416 7417 ASSERT_NETISR0; 7418 7419 if (IPFW_LOADED) { 7420 kprintf("IP firewall already loaded\n"); 7421 error = EEXIST; 7422 goto reply; 7423 } 7424 7425 if (ipfw_table_max > UINT16_MAX || ipfw_table_max <= 0) 7426 ipfw_table_max = UINT16_MAX; 7427 7428 /* Initialize global track tree. */ 7429 RB_INIT(&ipfw_gd.ipfw_trkcnt_tree); 7430 IPFW_TRKCNT_TOKINIT; 7431 7432 /* GC for freed crossref rules. */ 7433 callout_init_mp(&ipfw_gd.ipfw_crossref_ch); 7434 netmsg_init(&ipfw_gd.ipfw_crossref_nm, NULL, &netisr_adone_rport, 7435 MSGF_PRIORITY | MSGF_DROPABLE, ipfw_crossref_reap_dispatch); 7436 7437 ipfw_state_max_set(ipfw_state_max); 7438 ipfw_state_headroom = 8 * netisr_ncpus; 7439 7440 bzero(&fwmsg, sizeof(fwmsg)); 7441 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7442 ipfw_ctx_init_dispatch); 7443 netisr_domsg_global(&fwmsg.base); 7444 7445 ip_fw_chk_ptr = ipfw_chk; 7446 ip_fw_ctl_ptr = ipfw_ctl; 7447 ip_fw_dn_io_ptr = ipfw_dummynet_io; 7448 7449 kprintf("ipfw2 initialized, default to %s, logging ", 7450 ipfw_ctx[mycpuid]->ipfw_default_rule->cmd[0].opcode == 7451 O_ACCEPT ? "accept" : "deny"); 7452 7453 #ifdef IPFIREWALL_VERBOSE 7454 fw_verbose = 1; 7455 #endif 7456 #ifdef IPFIREWALL_VERBOSE_LIMIT 7457 verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 7458 #endif 7459 if (fw_verbose == 0) { 7460 kprintf("disabled\n"); 7461 } else if (verbose_limit == 0) { 7462 kprintf("unlimited\n"); 7463 } else { 7464 kprintf("limited to %d packets/entry by default\n", 7465 verbose_limit); 7466 } 7467 7468 ip_fw_loaded = 1; 7469 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 7470 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_stateto_ch, hz, 7471 ipfw_state_expire_ipifunc, NULL, cpu); 7472 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_trackto_ch, hz, 7473 ipfw_track_expire_ipifunc, NULL, cpu); 7474 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_keepalive_ch, hz, 7475 ipfw_keepalive, NULL, cpu); 7476 } 7477 7478 if (fw_enable) 7479 ipfw_hook(); 7480 7481 ipfw_ifaddr_event = EVENTHANDLER_REGISTER(ifaddr_event, ipfw_ifaddr, 7482 NULL, EVENTHANDLER_PRI_ANY); 7483 if (ipfw_ifaddr_event == NULL) 7484 kprintf("ipfw: ifaddr_event register failed\n"); 7485 7486 reply: 7487 netisr_replymsg(&nmsg->base, error); 7488 } 7489 7490 static int 7491 ipfw_init(void) 7492 { 7493 struct netmsg_base smsg; 7494 7495 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7496 ipfw_init_dispatch); 7497 return netisr_domsg(&smsg, 0); 7498 } 7499 7500 #ifdef KLD_MODULE 7501 7502 static void 7503 ipfw_ctx_fini_dispatch(netmsg_t nmsg) 7504 { 7505 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 7506 7507 ASSERT_NETISR_NCPUS(mycpuid); 7508 7509 callout_cancel(&ctx->ipfw_stateto_ch); 7510 callout_cancel(&ctx->ipfw_trackto_ch); 7511 callout_cancel(&ctx->ipfw_keepalive_ch); 7512 callout_cancel(&ctx->ipfw_xlatreap_ch); 7513 7514 crit_enter(); 7515 netisr_dropmsg(&ctx->ipfw_stateexp_more); 7516 netisr_dropmsg(&ctx->ipfw_stateexp_nm); 7517 netisr_dropmsg(&ctx->ipfw_trackexp_more); 7518 netisr_dropmsg(&ctx->ipfw_trackexp_nm); 7519 netisr_dropmsg(&ctx->ipfw_keepalive_more); 7520 netisr_dropmsg(&ctx->ipfw_keepalive_nm); 7521 netisr_dropmsg(&ctx->ipfw_xlatreap_nm); 7522 crit_exit(); 7523 7524 ipfw_table_flushall_oncpu(ctx, 1); 7525 7526 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7527 } 7528 7529 static void 7530 ipfw_fini_dispatch(netmsg_t nmsg) 7531 { 7532 struct netmsg_base nm; 7533 int error = 0, cpu; 7534 7535 ASSERT_NETISR0; 7536 7537 ipfw_crossref_reap(); 7538 7539 if (ipfw_gd.ipfw_refcnt != 0) { 7540 error = EBUSY; 7541 goto reply; 7542 } 7543 7544 ip_fw_loaded = 0; 7545 ipfw_dehook(); 7546 7547 /* Synchronize any inflight state/track expire IPIs. */ 7548 lwkt_synchronize_ipiqs("ipfwfini"); 7549 7550 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7551 ipfw_ctx_fini_dispatch); 7552 netisr_domsg_global(&nm); 7553 7554 callout_cancel(&ipfw_gd.ipfw_crossref_ch); 7555 crit_enter(); 7556 netisr_dropmsg(&ipfw_gd.ipfw_crossref_nm); 7557 crit_exit(); 7558 7559 if (ipfw_ifaddr_event != NULL) 7560 EVENTHANDLER_DEREGISTER(ifaddr_event, ipfw_ifaddr_event); 7561 7562 ip_fw_chk_ptr = NULL; 7563 ip_fw_ctl_ptr = NULL; 7564 ip_fw_dn_io_ptr = NULL; 7565 ipfw_flush(1 /* kill default rule */); 7566 7567 /* Free pre-cpu context */ 7568 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7569 kfree(ipfw_ctx[cpu], M_IPFW); 7570 7571 kprintf("IP firewall unloaded\n"); 7572 reply: 7573 netisr_replymsg(&nmsg->base, error); 7574 } 7575 7576 static void 7577 ipfw_fflush_dispatch(netmsg_t nmsg) 7578 { 7579 7580 ipfw_flush(0 /* keep default rule */); 7581 ipfw_crossref_reap(); 7582 netisr_replymsg(&nmsg->base, 0); 7583 } 7584 7585 static int 7586 ipfw_fini(void) 7587 { 7588 struct netmsg_base smsg; 7589 int i = 0; 7590 7591 for (;;) { 7592 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7593 ipfw_fflush_dispatch); 7594 netisr_domsg(&smsg, 0); 7595 7596 if (ipfw_gd.ipfw_refcnt == 0) 7597 break; 7598 kprintf("ipfw: flush pending %d\n", ++i); 7599 tsleep(&smsg, 0, "ipfwff", (3 * hz) / 2); 7600 } 7601 7602 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7603 ipfw_fini_dispatch); 7604 return netisr_domsg(&smsg, 0); 7605 } 7606 7607 #endif /* KLD_MODULE */ 7608 7609 static int 7610 ipfw_modevent(module_t mod, int type, void *unused) 7611 { 7612 int err = 0; 7613 7614 switch (type) { 7615 case MOD_LOAD: 7616 err = ipfw_init(); 7617 break; 7618 7619 case MOD_UNLOAD: 7620 #ifndef KLD_MODULE 7621 kprintf("ipfw statically compiled, cannot unload\n"); 7622 err = EBUSY; 7623 #else 7624 err = ipfw_fini(); 7625 #endif 7626 break; 7627 default: 7628 break; 7629 } 7630 return err; 7631 } 7632 7633 static moduledata_t ipfwmod = { 7634 "ipfw", 7635 ipfw_modevent, 7636 0 7637 }; 7638 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY); 7639 MODULE_VERSION(ipfw, 1); 7640