1 /* 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $ 26 */ 27 28 /* 29 * Implement IP packet firewall (new version) 30 */ 31 32 #include "opt_ipfw.h" 33 #include "opt_inet.h" 34 #ifndef INET 35 #error IPFIREWALL requires INET. 36 #endif /* INET */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/kernel.h> 43 #include <sys/proc.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 #include <sys/ucred.h> 49 #include <sys/in_cksum.h> 50 #include <sys/limits.h> 51 #include <sys/lock.h> 52 #include <sys/tree.h> 53 54 #include <net/if.h> 55 #include <net/route.h> 56 #include <net/pfil.h> 57 #include <net/dummynet/ip_dummynet.h> 58 59 #include <sys/thread2.h> 60 #include <net/netmsg2.h> 61 62 #include <netinet/in.h> 63 #include <netinet/in_systm.h> 64 #include <netinet/in_var.h> 65 #include <netinet/in_pcb.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_icmp.h> 69 #include <netinet/tcp.h> 70 #include <netinet/tcp_seq.h> 71 #include <netinet/tcp_timer.h> 72 #include <netinet/tcp_var.h> 73 #include <netinet/tcpip.h> 74 #include <netinet/udp.h> 75 #include <netinet/udp_var.h> 76 #include <netinet/ip_divert.h> 77 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ 78 79 #include <net/ipfw/ip_fw2.h> 80 81 #ifdef IPFIREWALL_DEBUG 82 #define DPRINTF(fmt, ...) \ 83 do { \ 84 if (fw_debug > 0) \ 85 kprintf(fmt, __VA_ARGS__); \ 86 } while (0) 87 #else 88 #define DPRINTF(fmt, ...) ((void)0) 89 #endif 90 91 /* 92 * Description about per-CPU rule duplication: 93 * 94 * Module loading/unloading and all ioctl operations are serialized 95 * by netisr0, so we don't have any ordering or locking problems. 96 * 97 * Following graph shows how operation on per-CPU rule list is 98 * performed [2 CPU case]: 99 * 100 * CPU0 CPU1 101 * 102 * netisr0 <------------------------------------+ 103 * domsg | 104 * : | 105 * :(delete/add...) | 106 * : | 107 * : netmsg | netmsg 108 * forwardmsg---------->netisr1 | 109 * : | 110 * :(delete/add...) | 111 * : | 112 * : | 113 * replymsg--------------+ 114 * 115 * 116 * 117 * Rule structure [2 CPU case] 118 * 119 * CPU0 CPU1 120 * 121 * layer3_chain layer3_chain 122 * | | 123 * V V 124 * +-------+ sibling +-------+ sibling 125 * | rule1 |--------->| rule1 |--------->NULL 126 * +-------+ +-------+ 127 * | | 128 * |next |next 129 * V V 130 * +-------+ sibling +-------+ sibling 131 * | rule2 |--------->| rule2 |--------->NULL 132 * +-------+ +-------+ 133 * 134 * ip_fw.sibling: 135 * 1) Ease statistics calculation during IP_FW_GET. We only need to 136 * iterate layer3_chain in netisr0; the current rule's duplication 137 * to the other CPUs could safely be read-only accessed through 138 * ip_fw.sibling. 139 * 2) Accelerate rule insertion and deletion, e.g. rule insertion: 140 * a) In netisr0 rule3 is determined to be inserted between rule1 141 * and rule2. To make this decision we need to iterate the 142 * layer3_chain in netisr0. The netmsg, which is used to insert 143 * the rule, will contain rule1 in netisr0 as prev_rule and rule2 144 * in netisr0 as next_rule. 145 * b) After the insertion in netisr0 is done, we will move on to 146 * netisr1. But instead of relocating the rule3's position in 147 * netisr1 by iterating the layer3_chain in netisr1, we set the 148 * netmsg's prev_rule to rule1->sibling and next_rule to 149 * rule2->sibling before the netmsg is forwarded to netisr1 from 150 * netisr0. 151 */ 152 153 /* 154 * Description of states and tracks. 155 * 156 * Both states and tracks are stored in per-cpu RB trees instead of 157 * per-cpu hash tables to avoid the worst case hash degeneration. 158 * 159 * The lifetimes of states and tracks are regulated by dyn_*_lifetime, 160 * measured in seconds and depending on the flags. 161 * 162 * When a packet is received, its address fields are first masked with 163 * the mask defined for the rule, then matched against the entries in 164 * the per-cpu state RB tree. States are generated by 'keep-state' 165 * and 'limit' options. 166 * 167 * The max number of states is ipfw_state_max. When we reach the 168 * maximum number of states we do not create anymore. This is done to 169 * avoid consuming too much memory, but also too much time when 170 * searching on each packet. 171 * 172 * Each state holds a pointer to the parent ipfw rule of the current 173 * CPU so we know what action to perform. States are removed when the 174 * parent rule is deleted. XXX we should make them survive. 175 * 176 * There are some limitations with states -- we do not obey the 177 * 'randomized match', and we do not do multiple passes through the 178 * firewall. XXX check the latter!!! 179 * 180 * States grow independently on each CPU, e.g. 2 CPU case: 181 * 182 * CPU0 CPU1 183 * ................... ................... 184 * : state RB tree : : state RB tree : 185 * : : : : 186 * : state1 state2 : : state3 : 187 * : | | : : | : 188 * :.....|....|......: :........|........: 189 * | | | 190 * | | |st_rule 191 * | | | 192 * V V V 193 * +-------+ +-------+ 194 * | rule1 | | rule1 | 195 * +-------+ +-------+ 196 * 197 * Tracks are used to enforce limits on the number of sessions. Tracks 198 * are generated by 'limit' option. 199 * 200 * The max number of tracks is ipfw_track_max. When we reach the 201 * maximum number of tracks we do not create anymore. This is done to 202 * avoid consuming too much memory. 203 * 204 * Tracks are organized into two layers, track counter RB tree is 205 * shared between CPUs, track RB tree is per-cpu. States generated by 206 * 'limit' option are linked to the track in addition to the per-cpu 207 * state RB tree; mainly to ease expiration. e.g. 2 CPU case: 208 * 209 * .............................. 210 * : track counter RB tree : 211 * : : 212 * : +-----------+ : 213 * : | trkcnt1 | : 214 * : | | : 215 * : +--->counter<----+ : 216 * : | | | | : 217 * : | +-----------+ | : 218 * :......|................|....: 219 * | | 220 * CPU0 | | CPU1 221 * ................. |t_count | ................. 222 * : track RB tree : | | : track RB tree : 223 * : : | | : : 224 * : +-->track1-------+ +--------track2 : 225 * : | A : : : 226 * : | | : : : 227 * :.|.....|.......: :...............: 228 * | +----------------+ 229 * | .................... | 230 * | : state RB tree : |st_track 231 * | : : | 232 * +---state1 state2---+ 233 * : | | : 234 * :.....|.......|....: 235 * | | 236 * | |st_rule 237 * V V 238 * +----------+ 239 * | rule1 | 240 * +----------+ 241 */ 242 243 #define IPFW_AUTOINC_STEP_MIN 1 244 #define IPFW_AUTOINC_STEP_MAX 1000 245 #define IPFW_AUTOINC_STEP_DEF 100 246 247 #define IPFW_TABLE_MAX_DEF 64 248 249 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */ 250 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */ 251 252 #define MATCH_REVERSE 0 253 #define MATCH_FORWARD 1 254 #define MATCH_NONE 2 255 #define MATCH_UNKNOWN 3 256 257 #define TIME_LEQ(a, b) ((a) - (b) <= 0) 258 259 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST) 260 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \ 261 (IPFW_STATE_TCPFLAGS << 8)) 262 263 #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 264 #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 265 #define BOTH_RST (TH_RST | (TH_RST << 8)) 266 /* TH_ACK here means FIN was ACKed. */ 267 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8)) 268 269 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \ 270 (((s)->st_state & BOTH_RST) || \ 271 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK)) 272 273 #define O_ANCHOR O_NOP 274 275 #define IPFW_ISXLAT(type) ((type) == O_REDIRECT) 276 #define IPFW_XLAT_INVALID(s) (IPFW_ISXLAT((s)->st_type) && \ 277 ((struct ipfw_xlat *)(s))->xlat_invalid) 278 279 #define IPFW_MBUF_XLATINS FW_MBUF_PRIVATE1 280 #define IPFW_MBUF_XLATFWD FW_MBUF_PRIVATE2 281 282 #define IPFW_XLATE_INSERT 0x0001 283 #define IPFW_XLATE_FORWARD 0x0002 284 #define IPFW_XLATE_OUTPUT 0x0004 285 286 struct netmsg_ipfw { 287 struct netmsg_base base; 288 const struct ipfw_ioc_rule *ioc_rule; 289 struct ip_fw *next_rule; 290 struct ip_fw *prev_rule; 291 struct ip_fw *sibling; 292 uint32_t rule_flags; 293 struct ip_fw **cross_rules; 294 }; 295 296 struct netmsg_del { 297 struct netmsg_base base; 298 struct ip_fw *start_rule; 299 struct ip_fw *prev_rule; 300 uint16_t rulenum; 301 uint8_t from_set; 302 uint8_t to_set; 303 }; 304 305 struct netmsg_zent { 306 struct netmsg_base base; 307 struct ip_fw *start_rule; 308 uint16_t rulenum; 309 uint16_t log_only; 310 }; 311 312 struct netmsg_cpstate { 313 struct netmsg_base base; 314 struct ipfw_ioc_state *ioc_state; 315 int state_cntmax; 316 int state_cnt; 317 }; 318 319 struct netmsg_tblent { 320 struct netmsg_base base; 321 struct sockaddr *key; 322 struct sockaddr *netmask; 323 struct ipfw_tblent *sibling; 324 int tableid; 325 }; 326 327 struct netmsg_tblflush { 328 struct netmsg_base base; 329 int tableid; 330 int destroy; 331 }; 332 333 struct netmsg_tblexp { 334 struct netmsg_base base; 335 time_t expire; 336 int tableid; 337 int cnt; 338 int expcnt; 339 struct radix_node_head *rnh; 340 }; 341 342 struct ipfw_table_cp { 343 struct ipfw_ioc_tblent *te; 344 int te_idx; 345 int te_cnt; 346 }; 347 348 struct ip_fw_local { 349 /* 350 * offset The offset of a fragment. offset != 0 means that 351 * we have a fragment at this offset of an IPv4 packet. 352 * offset == 0 means that (if this is an IPv4 packet) 353 * this is the first or only fragment. 354 */ 355 u_short offset; 356 357 /* 358 * Local copies of addresses. They are only valid if we have 359 * an IP packet. 360 * 361 * proto The protocol. Set to 0 for non-ip packets, 362 * or to the protocol read from the packet otherwise. 363 * proto != 0 means that we have an IPv4 packet. 364 * 365 * src_port, dst_port port numbers, in HOST format. Only 366 * valid for TCP and UDP packets. 367 * 368 * src_ip, dst_ip ip addresses, in NETWORK format. 369 * Only valid for IPv4 packets. 370 */ 371 uint8_t proto; 372 uint16_t src_port; /* NOTE: host format */ 373 uint16_t dst_port; /* NOTE: host format */ 374 struct in_addr src_ip; /* NOTE: network format */ 375 struct in_addr dst_ip; /* NOTE: network format */ 376 uint16_t ip_len; /* NOTE: host format */ 377 struct tcphdr *tcp; 378 }; 379 380 struct ipfw_addrs { 381 uint32_t addr1; /* host byte order */ 382 uint32_t addr2; /* host byte order */ 383 }; 384 385 struct ipfw_ports { 386 uint16_t port1; /* host byte order */ 387 uint16_t port2; /* host byte order */ 388 }; 389 390 struct ipfw_key { 391 union { 392 struct ipfw_addrs addrs; 393 uint64_t value; 394 } addr_u; 395 union { 396 struct ipfw_ports ports; 397 uint32_t value; 398 } port_u; 399 uint8_t proto; 400 uint8_t swap; /* IPFW_KEY_SWAP_ */ 401 uint16_t rsvd2; 402 }; 403 404 #define IPFW_KEY_SWAP_ADDRS 0x1 405 #define IPFW_KEY_SWAP_PORTS 0x2 406 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS) 407 408 struct ipfw_trkcnt { 409 RB_ENTRY(ipfw_trkcnt) tc_rblink; 410 struct ipfw_key tc_key; 411 uintptr_t tc_ruleid; 412 int tc_refs; 413 int tc_count; 414 time_t tc_expire; /* userland get-only */ 415 uint16_t tc_rulenum; /* userland get-only */ 416 } __cachealign; 417 418 #define tc_addrs tc_key.addr_u.value 419 #define tc_ports tc_key.port_u.value 420 #define tc_proto tc_key.proto 421 #define tc_saddr tc_key.addr_u.addrs.addr1 422 #define tc_daddr tc_key.addr_u.addrs.addr2 423 #define tc_sport tc_key.port_u.ports.port1 424 #define tc_dport tc_key.port_u.ports.port2 425 426 RB_HEAD(ipfw_trkcnt_tree, ipfw_trkcnt); 427 428 struct ipfw_state; 429 430 struct ipfw_track { 431 RB_ENTRY(ipfw_track) t_rblink; 432 struct ipfw_key t_key; 433 struct ip_fw *t_rule; 434 time_t t_lastexp; 435 LIST_HEAD(, ipfw_state) t_state_list; 436 time_t t_expire; 437 volatile int *t_count; 438 struct ipfw_trkcnt *t_trkcnt; 439 TAILQ_ENTRY(ipfw_track) t_link; 440 }; 441 442 #define t_addrs t_key.addr_u.value 443 #define t_ports t_key.port_u.value 444 #define t_proto t_key.proto 445 #define t_saddr t_key.addr_u.addrs.addr1 446 #define t_daddr t_key.addr_u.addrs.addr2 447 #define t_sport t_key.port_u.ports.port1 448 #define t_dport t_key.port_u.ports.port2 449 450 RB_HEAD(ipfw_track_tree, ipfw_track); 451 TAILQ_HEAD(ipfw_track_list, ipfw_track); 452 453 struct ipfw_state { 454 RB_ENTRY(ipfw_state) st_rblink; 455 struct ipfw_key st_key; 456 457 time_t st_expire; /* expire time */ 458 struct ip_fw *st_rule; 459 460 uint64_t st_pcnt; /* packets */ 461 uint64_t st_bcnt; /* bytes */ 462 463 /* 464 * st_state: 465 * State of this rule, typically a combination of TCP flags. 466 * 467 * st_ack_fwd/st_ack_rev: 468 * Most recent ACKs in forward and reverse direction. They 469 * are used to generate keepalives. 470 */ 471 uint32_t st_state; 472 uint32_t st_ack_fwd; /* host byte order */ 473 uint32_t st_seq_fwd; /* host byte order */ 474 uint32_t st_ack_rev; /* host byte order */ 475 uint32_t st_seq_rev; /* host byte order */ 476 477 uint16_t st_flags; /* IPFW_STATE_F_ */ 478 uint16_t st_type; /* KEEP_STATE/LIMIT/RDR */ 479 struct ipfw_track *st_track; 480 481 LIST_ENTRY(ipfw_state) st_trklink; 482 TAILQ_ENTRY(ipfw_state) st_link; 483 }; 484 485 #define st_addrs st_key.addr_u.value 486 #define st_ports st_key.port_u.value 487 #define st_proto st_key.proto 488 #define st_swap st_key.swap 489 490 #define IPFW_STATE_F_ACKFWD 0x0001 491 #define IPFW_STATE_F_SEQFWD 0x0002 492 #define IPFW_STATE_F_ACKREV 0x0004 493 #define IPFW_STATE_F_SEQREV 0x0008 494 #define IPFW_STATE_F_XLATSRC 0x0010 495 #define IPFW_STATE_F_XLATSLAVE 0x0020 496 #define IPFW_STATE_F_LINKED 0x0040 497 498 #define IPFW_STATE_SCANSKIP(s) ((s)->st_type == O_ANCHOR || \ 499 ((s)->st_flags & IPFW_STATE_F_XLATSLAVE)) 500 501 /* Expired or being deleted. */ 502 #define IPFW_STATE_ISDEAD(s) (TIME_LEQ((s)->st_expire, time_uptime) || \ 503 IPFW_XLAT_INVALID((s))) 504 505 TAILQ_HEAD(ipfw_state_list, ipfw_state); 506 RB_HEAD(ipfw_state_tree, ipfw_state); 507 508 struct ipfw_xlat { 509 struct ipfw_state xlat_st; /* MUST be the first field */ 510 uint32_t xlat_addr; /* network byte order */ 511 uint16_t xlat_port; /* network byte order */ 512 uint16_t xlat_dir; /* MATCH_ */ 513 struct ifnet *xlat_ifp; /* matching ifnet */ 514 struct ipfw_xlat *xlat_pair; /* paired state */ 515 int xlat_pcpu; /* paired cpu */ 516 volatile int xlat_invalid; /* invalid, but not dtor yet */ 517 volatile uint64_t xlat_crefs; /* cross references */ 518 struct netmsg_base xlat_freenm; /* for remote free */ 519 }; 520 521 #define xlat_type xlat_st.st_type 522 #define xlat_flags xlat_st.st_flags 523 #define xlat_rule xlat_st.st_rule 524 #define xlat_bcnt xlat_st.st_bcnt 525 #define xlat_pcnt xlat_st.st_pcnt 526 527 struct ipfw_tblent { 528 struct radix_node te_nodes[2]; 529 struct sockaddr_in te_key; 530 u_long te_use; 531 time_t te_lastuse; 532 struct ipfw_tblent *te_sibling; 533 volatile int te_expired; 534 }; 535 536 struct ipfw_context { 537 struct ip_fw *ipfw_layer3_chain; /* rules for layer3 */ 538 struct ip_fw *ipfw_default_rule; /* default rule */ 539 uint64_t ipfw_norule_counter; /* ipfw_log(NULL) stat*/ 540 541 /* 542 * ipfw_set_disable contains one bit per set value (0..31). 543 * If the bit is set, all rules with the corresponding set 544 * are disabled. Set IPDW_DEFAULT_SET is reserved for the 545 * default rule and CANNOT be disabled. 546 */ 547 uint32_t ipfw_set_disable; 548 549 uint8_t ipfw_flags; /* IPFW_FLAG_ */ 550 551 struct ip_fw *ipfw_cont_rule; 552 struct ipfw_xlat *ipfw_cont_xlat; 553 554 struct ipfw_state_tree ipfw_state_tree; 555 struct ipfw_state_list ipfw_state_list; 556 int ipfw_state_loosecnt; 557 int ipfw_state_cnt; 558 559 union { 560 struct ipfw_state state; 561 struct ipfw_track track; 562 struct ipfw_trkcnt trkcnt; 563 } ipfw_tmpkey; 564 565 struct ipfw_track_tree ipfw_track_tree; 566 struct ipfw_track_list ipfw_track_list; 567 struct ipfw_trkcnt *ipfw_trkcnt_spare; 568 569 struct callout ipfw_stateto_ch; 570 time_t ipfw_state_lastexp; 571 struct netmsg_base ipfw_stateexp_nm; 572 struct netmsg_base ipfw_stateexp_more; 573 struct ipfw_state ipfw_stateexp_anch; 574 575 struct callout ipfw_trackto_ch; 576 time_t ipfw_track_lastexp; 577 struct netmsg_base ipfw_trackexp_nm; 578 struct netmsg_base ipfw_trackexp_more; 579 struct ipfw_track ipfw_trackexp_anch; 580 581 struct callout ipfw_keepalive_ch; 582 struct netmsg_base ipfw_keepalive_nm; 583 struct netmsg_base ipfw_keepalive_more; 584 struct ipfw_state ipfw_keepalive_anch; 585 586 struct callout ipfw_xlatreap_ch; 587 struct netmsg_base ipfw_xlatreap_nm; 588 struct ipfw_state_list ipfw_xlatreap; 589 590 /* 591 * Statistics 592 */ 593 u_long ipfw_sts_reap; 594 u_long ipfw_sts_reapfailed; 595 u_long ipfw_sts_overflow; 596 u_long ipfw_sts_nomem; 597 u_long ipfw_sts_tcprecycled; 598 599 u_long ipfw_tks_nomem; 600 u_long ipfw_tks_reap; 601 u_long ipfw_tks_reapfailed; 602 u_long ipfw_tks_overflow; 603 u_long ipfw_tks_cntnomem; 604 605 u_long ipfw_frags; 606 u_long ipfw_defraged; 607 u_long ipfw_defrag_remote; 608 609 u_long ipfw_xlated; 610 u_long ipfw_xlate_split; 611 u_long ipfw_xlate_conflicts; 612 u_long ipfw_xlate_cresolved; 613 614 /* Last field */ 615 struct radix_node_head *ipfw_tables[]; 616 }; 617 618 #define IPFW_FLAG_KEEPALIVE 0x01 619 #define IPFW_FLAG_STATEEXP 0x02 620 #define IPFW_FLAG_TRACKEXP 0x04 621 #define IPFW_FLAG_STATEREAP 0x08 622 #define IPFW_FLAG_TRACKREAP 0x10 623 624 #define ipfw_state_tmpkey ipfw_tmpkey.state 625 #define ipfw_track_tmpkey ipfw_tmpkey.track 626 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt 627 628 struct ipfw_global { 629 int ipfw_state_loosecnt; /* cache aligned */ 630 time_t ipfw_state_globexp __cachealign; 631 632 struct lwkt_token ipfw_trkcnt_token __cachealign; 633 struct ipfw_trkcnt_tree ipfw_trkcnt_tree; 634 int ipfw_trkcnt_cnt; 635 time_t ipfw_track_globexp; 636 637 /* Accessed in netisr0. */ 638 struct ip_fw *ipfw_crossref_free __cachealign; 639 struct callout ipfw_crossref_ch; 640 struct netmsg_base ipfw_crossref_nm; 641 642 #ifdef KLD_MODULE 643 /* 644 * Module can not be unloaded, if there are references to 645 * certains rules of ipfw(4), e.g. dummynet(4) 646 */ 647 int ipfw_refcnt __cachealign; 648 #endif 649 } __cachealign; 650 651 static struct ipfw_context *ipfw_ctx[MAXCPU]; 652 653 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); 654 655 /* 656 * Following two global variables are accessed and updated only 657 * in netisr0. 658 */ 659 static uint32_t static_count; /* # of static rules */ 660 static uint32_t static_ioc_len; /* bytes of static rules */ 661 662 /* 663 * If 1, then ipfw static rules are being flushed, 664 * ipfw_chk() will skip to the default rule. 665 */ 666 static int ipfw_flushing; 667 668 static int fw_verbose; 669 static int verbose_limit; 670 671 static int fw_debug; 672 static int autoinc_step = IPFW_AUTOINC_STEP_DEF; 673 674 static int ipfw_table_max = IPFW_TABLE_MAX_DEF; 675 676 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS); 677 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS); 678 679 TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max); 680 681 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 682 SYSCTL_NODE(_net_inet_ip_fw, OID_AUTO, stats, CTLFLAG_RW, 0, 683 "Firewall statistics"); 684 685 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, 686 &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw"); 687 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW, 688 &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I", 689 "Rule number autincrement step"); 690 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, 691 &fw_one_pass, 0, 692 "Only do a single pass through ipfw when using dummynet(4)"); 693 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 694 &fw_debug, 0, "Enable printing of debug ip_fw statements"); 695 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, 696 &fw_verbose, 0, "Log matches to ipfw rules"); 697 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 698 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); 699 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, table_max, CTLFLAG_RD, 700 &ipfw_table_max, 0, "Max # of tables"); 701 702 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS); 703 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS); 704 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS); 705 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS); 706 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS); 707 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS); 708 709 /* 710 * Timeouts for various events in handing states. 711 * 712 * NOTE: 713 * 1 == 0~1 second. 714 * 2 == 1~2 second(s). 715 * 716 * We use 2 seconds for FIN lifetime, so that the states will not be 717 * ripped prematurely. 718 */ 719 static uint32_t dyn_ack_lifetime = 300; 720 static uint32_t dyn_syn_lifetime = 20; 721 static uint32_t dyn_finwait_lifetime = 20; 722 static uint32_t dyn_fin_lifetime = 2; 723 static uint32_t dyn_rst_lifetime = 2; 724 static uint32_t dyn_udp_lifetime = 10; 725 static uint32_t dyn_short_lifetime = 5; /* used by tracks too */ 726 727 /* 728 * Keepalives are sent if dyn_keepalive is set. They are sent every 729 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 730 * seconds of lifetime of a rule. 731 */ 732 static uint32_t dyn_keepalive_interval = 20; 733 static uint32_t dyn_keepalive_period = 5; 734 static uint32_t dyn_keepalive = 1; /* do send keepalives */ 735 736 static struct ipfw_global ipfw_gd; 737 static int ipfw_state_loosecnt_updthr; 738 static int ipfw_state_max = 4096; /* max # of states */ 739 static int ipfw_track_max = 4096; /* max # of tracks */ 740 741 static int ipfw_state_headroom; /* setup at module load time */ 742 static int ipfw_state_reap_min = 8; 743 static int ipfw_state_expire_max = 32; 744 static int ipfw_state_scan_max = 256; 745 static int ipfw_keepalive_max = 8; 746 static int ipfw_track_reap_max = 4; 747 static int ipfw_track_expire_max = 16; 748 static int ipfw_track_scan_max = 128; 749 750 static eventhandler_tag ipfw_ifaddr_event; 751 752 /* Compat */ 753 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count, 754 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_dyncnt, "I", 755 "Number of states and tracks"); 756 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, 757 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_dynmax, "I", 758 "Max number of states and tracks"); 759 760 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_cnt, 761 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_statecnt, "I", 762 "Number of states"); 763 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_max, 764 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_statemax, "I", 765 "Max number of states"); 766 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, state_headroom, CTLFLAG_RW, 767 &ipfw_state_headroom, 0, "headroom for state reap"); 768 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_cnt, CTLFLAG_RD, 769 &ipfw_gd.ipfw_trkcnt_cnt, 0, "Number of tracks"); 770 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_max, CTLFLAG_RW, 771 &ipfw_track_max, 0, "Max number of tracks"); 772 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, 773 &static_count, 0, "Number of static rules"); 774 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, 775 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); 776 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, 777 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); 778 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, 779 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); 780 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_finwait_lifetime, CTLFLAG_RW, 781 &dyn_finwait_lifetime, 0, "Lifetime of dyn. rules for fin wait"); 782 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, 783 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); 784 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, 785 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); 786 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, 787 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); 788 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, 789 &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); 790 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_scan_max, 791 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_scan_max, 0, ipfw_sysctl_scancnt, 792 "I", "# of states to scan for each expire iteration"); 793 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_expire_max, 794 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_expire_max, 0, ipfw_sysctl_scancnt, 795 "I", "# of states to expire for each expire iteration"); 796 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, keepalive_max, 797 CTLTYPE_INT | CTLFLAG_RW, &ipfw_keepalive_max, 0, ipfw_sysctl_scancnt, 798 "I", "# of states to expire for each expire iteration"); 799 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_reap_min, 800 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_reap_min, 0, ipfw_sysctl_scancnt, 801 "I", "# of states to reap for state shortage"); 802 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_scan_max, 803 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_scan_max, 0, ipfw_sysctl_scancnt, 804 "I", "# of tracks to scan for each expire iteration"); 805 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_expire_max, 806 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_expire_max, 0, ipfw_sysctl_scancnt, 807 "I", "# of tracks to expire for each expire iteration"); 808 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_reap_max, 809 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_reap_max, 0, ipfw_sysctl_scancnt, 810 "I", "# of tracks to reap for track shortage"); 811 812 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reap, 813 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 814 __offsetof(struct ipfw_context, ipfw_sts_reap), ipfw_sysctl_stat, 815 "LU", "# of state reaps due to states shortage"); 816 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reapfailed, 817 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 818 __offsetof(struct ipfw_context, ipfw_sts_reapfailed), ipfw_sysctl_stat, 819 "LU", "# of state reap failure"); 820 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_overflow, 821 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 822 __offsetof(struct ipfw_context, ipfw_sts_overflow), ipfw_sysctl_stat, 823 "LU", "# of state overflow"); 824 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_nomem, 825 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 826 __offsetof(struct ipfw_context, ipfw_sts_nomem), ipfw_sysctl_stat, 827 "LU", "# of state allocation failure"); 828 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_tcprecycled, 829 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 830 __offsetof(struct ipfw_context, ipfw_sts_tcprecycled), ipfw_sysctl_stat, 831 "LU", "# of state deleted due to fast TCP port recycling"); 832 833 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_nomem, 834 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 835 __offsetof(struct ipfw_context, ipfw_tks_nomem), ipfw_sysctl_stat, 836 "LU", "# of track allocation failure"); 837 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reap, 838 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 839 __offsetof(struct ipfw_context, ipfw_tks_reap), ipfw_sysctl_stat, 840 "LU", "# of track reap due to tracks shortage"); 841 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reapfailed, 842 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 843 __offsetof(struct ipfw_context, ipfw_tks_reapfailed), ipfw_sysctl_stat, 844 "LU", "# of track reap failure"); 845 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_overflow, 846 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 847 __offsetof(struct ipfw_context, ipfw_tks_overflow), ipfw_sysctl_stat, 848 "LU", "# of track overflow"); 849 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_cntnomem, 850 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 851 __offsetof(struct ipfw_context, ipfw_tks_cntnomem), ipfw_sysctl_stat, 852 "LU", "# of track counter allocation failure"); 853 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, frags, 854 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 855 __offsetof(struct ipfw_context, ipfw_frags), ipfw_sysctl_stat, 856 "LU", "# of IP fragements defraged"); 857 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defraged, 858 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 859 __offsetof(struct ipfw_context, ipfw_defraged), ipfw_sysctl_stat, 860 "LU", "# of IP packets after defrag"); 861 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defrag_remote, 862 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 863 __offsetof(struct ipfw_context, ipfw_defrag_remote), ipfw_sysctl_stat, 864 "LU", "# of IP packets after defrag dispatched to remote cpus"); 865 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlated, 866 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 867 __offsetof(struct ipfw_context, ipfw_xlated), ipfw_sysctl_stat, 868 "LU", "# address/port translations"); 869 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_split, 870 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 871 __offsetof(struct ipfw_context, ipfw_xlate_split), ipfw_sysctl_stat, 872 "LU", "# address/port translations split between different cpus"); 873 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_conflicts, 874 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 875 __offsetof(struct ipfw_context, ipfw_xlate_conflicts), ipfw_sysctl_stat, 876 "LU", "# address/port translations conflicts on remote cpu"); 877 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_cresolved, 878 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 879 __offsetof(struct ipfw_context, ipfw_xlate_cresolved), ipfw_sysctl_stat, 880 "LU", "# address/port translations conflicts resolved on remote cpu"); 881 882 static int ipfw_state_cmp(struct ipfw_state *, 883 struct ipfw_state *); 884 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt *, 885 struct ipfw_trkcnt *); 886 static int ipfw_track_cmp(struct ipfw_track *, 887 struct ipfw_track *); 888 889 RB_PROTOTYPE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp); 890 RB_GENERATE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp); 891 892 RB_PROTOTYPE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp); 893 RB_GENERATE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp); 894 895 RB_PROTOTYPE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp); 896 RB_GENERATE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp); 897 898 static int ipfw_chk(struct ip_fw_args *); 899 static void ipfw_track_expire_ipifunc(void *); 900 static void ipfw_state_expire_ipifunc(void *); 901 static void ipfw_keepalive(void *); 902 static int ipfw_state_expire_start(struct ipfw_context *, 903 int, int); 904 static void ipfw_crossref_timeo(void *); 905 static void ipfw_state_remove(struct ipfw_context *, 906 struct ipfw_state *); 907 static void ipfw_xlat_reap_timeo(void *); 908 static void ipfw_defrag_redispatch(struct mbuf *, int, 909 struct ip_fw *); 910 911 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token) 912 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token) 913 #define IPFW_TRKCNT_TOKINIT \ 914 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt"); 915 916 static void 917 sa_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 918 const struct sockaddr *netmask) 919 { 920 const u_char *cp1 = (const u_char *)src; 921 u_char *cp2 = (u_char *)dst; 922 const u_char *cp3 = (const u_char *)netmask; 923 u_char *cplim = cp2 + *cp3; 924 u_char *cplim2 = cp2 + *cp1; 925 926 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 927 cp3 += 2; 928 if (cplim > cplim2) 929 cplim = cplim2; 930 while (cp2 < cplim) 931 *cp2++ = *cp1++ & *cp3++; 932 if (cp2 < cplim2) 933 bzero(cp2, cplim2 - cp2); 934 } 935 936 static __inline uint16_t 937 pfil_cksum_fixup(uint16_t cksum, uint16_t old, uint16_t new, uint8_t udp) 938 { 939 uint32_t l; 940 941 if (udp && !cksum) 942 return (0x0000); 943 l = cksum + old - new; 944 l = (l >> 16) + (l & 65535); 945 l = l & 65535; 946 if (udp && !l) 947 return (0xFFFF); 948 return (l); 949 } 950 951 static __inline void 952 ipfw_key_build(struct ipfw_key *key, in_addr_t saddr, uint16_t sport, 953 in_addr_t daddr, uint16_t dport, uint8_t proto) 954 { 955 956 key->proto = proto; 957 key->swap = 0; 958 959 if (saddr < daddr) { 960 key->addr_u.addrs.addr1 = daddr; 961 key->addr_u.addrs.addr2 = saddr; 962 key->swap |= IPFW_KEY_SWAP_ADDRS; 963 } else { 964 key->addr_u.addrs.addr1 = saddr; 965 key->addr_u.addrs.addr2 = daddr; 966 } 967 968 if (sport < dport) { 969 key->port_u.ports.port1 = dport; 970 key->port_u.ports.port2 = sport; 971 key->swap |= IPFW_KEY_SWAP_PORTS; 972 } else { 973 key->port_u.ports.port1 = sport; 974 key->port_u.ports.port2 = dport; 975 } 976 977 if (sport == dport && (key->swap & IPFW_KEY_SWAP_ADDRS)) 978 key->swap |= IPFW_KEY_SWAP_PORTS; 979 if (saddr == daddr && (key->swap & IPFW_KEY_SWAP_PORTS)) 980 key->swap |= IPFW_KEY_SWAP_ADDRS; 981 } 982 983 static __inline void 984 ipfw_key_4tuple(const struct ipfw_key *key, in_addr_t *saddr, uint16_t *sport, 985 in_addr_t *daddr, uint16_t *dport) 986 { 987 988 if (key->swap & IPFW_KEY_SWAP_ADDRS) { 989 *saddr = key->addr_u.addrs.addr2; 990 *daddr = key->addr_u.addrs.addr1; 991 } else { 992 *saddr = key->addr_u.addrs.addr1; 993 *daddr = key->addr_u.addrs.addr2; 994 } 995 996 if (key->swap & IPFW_KEY_SWAP_PORTS) { 997 *sport = key->port_u.ports.port2; 998 *dport = key->port_u.ports.port1; 999 } else { 1000 *sport = key->port_u.ports.port1; 1001 *dport = key->port_u.ports.port2; 1002 } 1003 } 1004 1005 static int 1006 ipfw_state_cmp(struct ipfw_state *s1, struct ipfw_state *s2) 1007 { 1008 1009 if (s1->st_proto > s2->st_proto) 1010 return (1); 1011 if (s1->st_proto < s2->st_proto) 1012 return (-1); 1013 1014 if (s1->st_addrs > s2->st_addrs) 1015 return (1); 1016 if (s1->st_addrs < s2->st_addrs) 1017 return (-1); 1018 1019 if (s1->st_ports > s2->st_ports) 1020 return (1); 1021 if (s1->st_ports < s2->st_ports) 1022 return (-1); 1023 1024 if (s1->st_swap == s2->st_swap || 1025 (s1->st_swap ^ s2->st_swap) == IPFW_KEY_SWAP_ALL) 1026 return (0); 1027 1028 if (s1->st_swap > s2->st_swap) 1029 return (1); 1030 else 1031 return (-1); 1032 } 1033 1034 static int 1035 ipfw_trkcnt_cmp(struct ipfw_trkcnt *t1, struct ipfw_trkcnt *t2) 1036 { 1037 1038 if (t1->tc_proto > t2->tc_proto) 1039 return (1); 1040 if (t1->tc_proto < t2->tc_proto) 1041 return (-1); 1042 1043 if (t1->tc_addrs > t2->tc_addrs) 1044 return (1); 1045 if (t1->tc_addrs < t2->tc_addrs) 1046 return (-1); 1047 1048 if (t1->tc_ports > t2->tc_ports) 1049 return (1); 1050 if (t1->tc_ports < t2->tc_ports) 1051 return (-1); 1052 1053 if (t1->tc_ruleid > t2->tc_ruleid) 1054 return (1); 1055 if (t1->tc_ruleid < t2->tc_ruleid) 1056 return (-1); 1057 1058 return (0); 1059 } 1060 1061 static int 1062 ipfw_track_cmp(struct ipfw_track *t1, struct ipfw_track *t2) 1063 { 1064 1065 if (t1->t_proto > t2->t_proto) 1066 return (1); 1067 if (t1->t_proto < t2->t_proto) 1068 return (-1); 1069 1070 if (t1->t_addrs > t2->t_addrs) 1071 return (1); 1072 if (t1->t_addrs < t2->t_addrs) 1073 return (-1); 1074 1075 if (t1->t_ports > t2->t_ports) 1076 return (1); 1077 if (t1->t_ports < t2->t_ports) 1078 return (-1); 1079 1080 if ((uintptr_t)t1->t_rule > (uintptr_t)t2->t_rule) 1081 return (1); 1082 if ((uintptr_t)t1->t_rule < (uintptr_t)t2->t_rule) 1083 return (-1); 1084 1085 return (0); 1086 } 1087 1088 static __inline struct ipfw_state * 1089 ipfw_state_link(struct ipfw_context *ctx, struct ipfw_state *s) 1090 { 1091 struct ipfw_state *dup; 1092 1093 KASSERT((s->st_flags & IPFW_STATE_F_LINKED) == 0, 1094 ("state %p was linked", s)); 1095 dup = RB_INSERT(ipfw_state_tree, &ctx->ipfw_state_tree, s); 1096 if (dup == NULL) { 1097 TAILQ_INSERT_TAIL(&ctx->ipfw_state_list, s, st_link); 1098 s->st_flags |= IPFW_STATE_F_LINKED; 1099 } 1100 return (dup); 1101 } 1102 1103 static __inline void 1104 ipfw_state_unlink(struct ipfw_context *ctx, struct ipfw_state *s) 1105 { 1106 1107 KASSERT(s->st_flags & IPFW_STATE_F_LINKED, 1108 ("state %p was not linked", s)); 1109 RB_REMOVE(ipfw_state_tree, &ctx->ipfw_state_tree, s); 1110 TAILQ_REMOVE(&ctx->ipfw_state_list, s, st_link); 1111 s->st_flags &= ~IPFW_STATE_F_LINKED; 1112 } 1113 1114 static void 1115 ipfw_state_max_set(int state_max) 1116 { 1117 1118 ipfw_state_max = state_max; 1119 /* Allow 5% states over-allocation. */ 1120 ipfw_state_loosecnt_updthr = (state_max / 20) / netisr_ncpus; 1121 } 1122 1123 static __inline int 1124 ipfw_state_cntcoll(void) 1125 { 1126 int cpu, state_cnt = 0; 1127 1128 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 1129 state_cnt += ipfw_ctx[cpu]->ipfw_state_cnt; 1130 return (state_cnt); 1131 } 1132 1133 static __inline int 1134 ipfw_state_cntsync(void) 1135 { 1136 int state_cnt; 1137 1138 state_cnt = ipfw_state_cntcoll(); 1139 ipfw_gd.ipfw_state_loosecnt = state_cnt; 1140 return (state_cnt); 1141 } 1142 1143 static __inline int 1144 ipfw_free_rule(struct ip_fw *rule) 1145 { 1146 KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d", mycpuid)); 1147 KASSERT(rule->refcnt > 0, ("invalid refcnt %u", rule->refcnt)); 1148 rule->refcnt--; 1149 if (rule->refcnt == 0) { 1150 if (rule->cross_rules != NULL) 1151 kfree(rule->cross_rules, M_IPFW); 1152 kfree(rule, M_IPFW); 1153 return 1; 1154 } 1155 return 0; 1156 } 1157 1158 static void 1159 ipfw_unref_rule(void *priv) 1160 { 1161 ipfw_free_rule(priv); 1162 #ifdef KLD_MODULE 1163 KASSERT(ipfw_gd.ipfw_refcnt > 0, 1164 ("invalid ipfw_refcnt %d", ipfw_gd.ipfw_refcnt)); 1165 atomic_subtract_int(&ipfw_gd.ipfw_refcnt, 1); 1166 #endif 1167 } 1168 1169 static __inline void 1170 ipfw_ref_rule(struct ip_fw *rule) 1171 { 1172 KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d", mycpuid)); 1173 #ifdef KLD_MODULE 1174 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1); 1175 #endif 1176 rule->refcnt++; 1177 } 1178 1179 /* 1180 * This macro maps an ip pointer into a layer3 header pointer of type T 1181 */ 1182 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl)) 1183 1184 static __inline int 1185 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) 1186 { 1187 int type = L3HDR(struct icmp,ip)->icmp_type; 1188 int idx_max = F_LEN(&cmd->o) - F_INSN_SIZE(ipfw_insn); 1189 int idx = type / 32; 1190 1191 if (idx >= idx_max) 1192 return (0); 1193 return (cmd->d[idx] & (1 << (type % 32))); 1194 } 1195 1196 static __inline int 1197 icmpcode_match(struct ip *ip, ipfw_insn_u32 *cmd) 1198 { 1199 int code = L3HDR(struct icmp,ip)->icmp_code; 1200 int idx_max = F_LEN(&cmd->o) - F_INSN_SIZE(ipfw_insn); 1201 int idx = code / 32; 1202 1203 if (idx >= idx_max) 1204 return (0); 1205 return (cmd->d[idx] & (1 << (code % 32))); 1206 } 1207 1208 #define TT ((1 << ICMP_ECHO) | \ 1209 (1 << ICMP_ROUTERSOLICIT) | \ 1210 (1 << ICMP_TSTAMP) | \ 1211 (1 << ICMP_IREQ) | \ 1212 (1 << ICMP_MASKREQ)) 1213 1214 static int 1215 is_icmp_query(struct ip *ip) 1216 { 1217 int type = L3HDR(struct icmp, ip)->icmp_type; 1218 1219 return (type < 32 && (TT & (1 << type))); 1220 } 1221 1222 #undef TT 1223 1224 /* 1225 * The following checks use two arrays of 8 or 16 bits to store the 1226 * bits that we want set or clear, respectively. They are in the 1227 * low and high half of cmd->arg1 or cmd->d[0]. 1228 * 1229 * We scan options and store the bits we find set. We succeed if 1230 * 1231 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 1232 * 1233 * The code is sometimes optimized not to store additional variables. 1234 */ 1235 static int 1236 flags_match(ipfw_insn *cmd, uint8_t bits) 1237 { 1238 u_char want_clear; 1239 bits = ~bits; 1240 1241 if (((cmd->arg1 & 0xff) & bits) != 0) 1242 return 0; /* some bits we want set were clear */ 1243 1244 want_clear = (cmd->arg1 >> 8) & 0xff; 1245 if ((want_clear & bits) != want_clear) 1246 return 0; /* some bits we want clear were set */ 1247 return 1; 1248 } 1249 1250 static int 1251 ipopts_match(struct ip *ip, ipfw_insn *cmd) 1252 { 1253 int optlen, bits = 0; 1254 u_char *cp = (u_char *)(ip + 1); 1255 int x = (ip->ip_hl << 2) - sizeof(struct ip); 1256 1257 for (; x > 0; x -= optlen, cp += optlen) { 1258 int opt = cp[IPOPT_OPTVAL]; 1259 1260 if (opt == IPOPT_EOL) 1261 break; 1262 1263 if (opt == IPOPT_NOP) { 1264 optlen = 1; 1265 } else { 1266 optlen = cp[IPOPT_OLEN]; 1267 if (optlen <= 0 || optlen > x) 1268 return 0; /* invalid or truncated */ 1269 } 1270 1271 switch (opt) { 1272 case IPOPT_LSRR: 1273 bits |= IP_FW_IPOPT_LSRR; 1274 break; 1275 1276 case IPOPT_SSRR: 1277 bits |= IP_FW_IPOPT_SSRR; 1278 break; 1279 1280 case IPOPT_RR: 1281 bits |= IP_FW_IPOPT_RR; 1282 break; 1283 1284 case IPOPT_TS: 1285 bits |= IP_FW_IPOPT_TS; 1286 break; 1287 1288 default: 1289 break; 1290 } 1291 } 1292 return (flags_match(cmd, bits)); 1293 } 1294 1295 static int 1296 tcpopts_match(struct ip *ip, ipfw_insn *cmd) 1297 { 1298 int optlen, bits = 0; 1299 struct tcphdr *tcp = L3HDR(struct tcphdr,ip); 1300 u_char *cp = (u_char *)(tcp + 1); 1301 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 1302 1303 for (; x > 0; x -= optlen, cp += optlen) { 1304 int opt = cp[0]; 1305 1306 if (opt == TCPOPT_EOL) 1307 break; 1308 1309 if (opt == TCPOPT_NOP) { 1310 optlen = 1; 1311 } else { 1312 optlen = cp[1]; 1313 if (optlen <= 0) 1314 break; 1315 } 1316 1317 switch (opt) { 1318 case TCPOPT_MAXSEG: 1319 bits |= IP_FW_TCPOPT_MSS; 1320 break; 1321 1322 case TCPOPT_WINDOW: 1323 bits |= IP_FW_TCPOPT_WINDOW; 1324 break; 1325 1326 case TCPOPT_SACK_PERMITTED: 1327 case TCPOPT_SACK: 1328 bits |= IP_FW_TCPOPT_SACK; 1329 break; 1330 1331 case TCPOPT_TIMESTAMP: 1332 bits |= IP_FW_TCPOPT_TS; 1333 break; 1334 1335 case TCPOPT_CC: 1336 case TCPOPT_CCNEW: 1337 case TCPOPT_CCECHO: 1338 bits |= IP_FW_TCPOPT_CC; 1339 break; 1340 1341 default: 1342 break; 1343 } 1344 } 1345 return (flags_match(cmd, bits)); 1346 } 1347 1348 static int 1349 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) 1350 { 1351 if (ifp == NULL) /* no iface with this packet, match fails */ 1352 return 0; 1353 1354 /* Check by name or by IP address */ 1355 if (cmd->name[0] != '\0') { /* match by name */ 1356 /* Check name */ 1357 if (cmd->p.glob) { 1358 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0) 1359 return(1); 1360 } else { 1361 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 1362 return(1); 1363 } 1364 } else { 1365 struct ifaddr_container *ifac; 1366 1367 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1368 struct ifaddr *ia = ifac->ifa; 1369 1370 if (ia->ifa_addr == NULL) 1371 continue; 1372 if (ia->ifa_addr->sa_family != AF_INET) 1373 continue; 1374 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 1375 (ia->ifa_addr))->sin_addr.s_addr) 1376 return(1); /* match */ 1377 } 1378 } 1379 return(0); /* no match, fail ... */ 1380 } 1381 1382 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 1383 1384 /* 1385 * We enter here when we have a rule with O_LOG. 1386 * XXX this function alone takes about 2Kbytes of code! 1387 */ 1388 static void 1389 ipfw_log(struct ipfw_context *ctx, struct ip_fw *f, u_int hlen, 1390 struct ether_header *eh, struct mbuf *m, struct ifnet *oif) 1391 { 1392 char *action; 1393 int limit_reached = 0; 1394 char action2[40], proto[48], fragment[28], abuf[INET_ADDRSTRLEN]; 1395 1396 fragment[0] = '\0'; 1397 proto[0] = '\0'; 1398 1399 if (f == NULL) { /* bogus pkt */ 1400 if (verbose_limit != 0 && 1401 ctx->ipfw_norule_counter >= verbose_limit) 1402 return; 1403 ctx->ipfw_norule_counter++; 1404 if (ctx->ipfw_norule_counter == verbose_limit) 1405 limit_reached = verbose_limit; 1406 action = "Refuse"; 1407 } else { /* O_LOG is the first action, find the real one */ 1408 ipfw_insn *cmd = ACTION_PTR(f); 1409 ipfw_insn_log *l = (ipfw_insn_log *)cmd; 1410 1411 if (l->max_log != 0 && l->log_left == 0) 1412 return; 1413 l->log_left--; 1414 if (l->log_left == 0) 1415 limit_reached = l->max_log; 1416 cmd += F_LEN(cmd); /* point to first action */ 1417 if (cmd->opcode == O_PROB) 1418 cmd += F_LEN(cmd); 1419 1420 action = action2; 1421 switch (cmd->opcode) { 1422 case O_DENY: 1423 action = "Deny"; 1424 break; 1425 1426 case O_REJECT: 1427 if (cmd->arg1==ICMP_REJECT_RST) { 1428 action = "Reset"; 1429 } else if (cmd->arg1==ICMP_UNREACH_HOST) { 1430 action = "Reject"; 1431 } else { 1432 ksnprintf(SNPARGS(action2, 0), "Unreach %d", 1433 cmd->arg1); 1434 } 1435 break; 1436 1437 case O_ACCEPT: 1438 action = "Accept"; 1439 break; 1440 1441 case O_COUNT: 1442 action = "Count"; 1443 break; 1444 1445 case O_DIVERT: 1446 ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1); 1447 break; 1448 1449 case O_TEE: 1450 ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1); 1451 break; 1452 1453 case O_SKIPTO: 1454 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1); 1455 break; 1456 1457 case O_PIPE: 1458 ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1); 1459 break; 1460 1461 case O_QUEUE: 1462 ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1); 1463 break; 1464 1465 case O_FORWARD_IP: 1466 { 1467 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; 1468 int len; 1469 1470 len = ksnprintf(SNPARGS(action2, 0), 1471 "Forward to %s", 1472 kinet_ntoa(sa->sa.sin_addr, abuf)); 1473 if (sa->sa.sin_port) { 1474 ksnprintf(SNPARGS(action2, len), ":%d", 1475 sa->sa.sin_port); 1476 } 1477 } 1478 break; 1479 1480 default: 1481 action = "UNKNOWN"; 1482 break; 1483 } 1484 } 1485 1486 if (hlen == 0) { /* non-ip */ 1487 ksnprintf(SNPARGS(proto, 0), "MAC"); 1488 } else { 1489 struct ip *ip = mtod(m, struct ip *); 1490 /* these three are all aliases to the same thing */ 1491 struct icmp *const icmp = L3HDR(struct icmp, ip); 1492 struct tcphdr *const tcp = (struct tcphdr *)icmp; 1493 struct udphdr *const udp = (struct udphdr *)icmp; 1494 1495 int ip_off, offset, ip_len; 1496 int len; 1497 1498 ip_off = ntohs(ip->ip_off); 1499 ip_len = ntohs(ip->ip_len); 1500 offset = ip_off & IP_OFFMASK; 1501 1502 switch (ip->ip_p) { 1503 case IPPROTO_TCP: 1504 len = ksnprintf(SNPARGS(proto, 0), "TCP %s", 1505 kinet_ntoa(ip->ip_src, abuf)); 1506 if (offset == 0) { 1507 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 1508 ntohs(tcp->th_sport), 1509 kinet_ntoa(ip->ip_dst, abuf), 1510 ntohs(tcp->th_dport)); 1511 } else { 1512 ksnprintf(SNPARGS(proto, len), " %s", 1513 kinet_ntoa(ip->ip_dst, abuf)); 1514 } 1515 break; 1516 1517 case IPPROTO_UDP: 1518 len = ksnprintf(SNPARGS(proto, 0), "UDP %s", 1519 kinet_ntoa(ip->ip_src, abuf)); 1520 if (offset == 0) { 1521 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 1522 ntohs(udp->uh_sport), 1523 kinet_ntoa(ip->ip_dst, abuf), 1524 ntohs(udp->uh_dport)); 1525 } else { 1526 ksnprintf(SNPARGS(proto, len), " %s", 1527 kinet_ntoa(ip->ip_dst, abuf)); 1528 } 1529 break; 1530 1531 case IPPROTO_ICMP: 1532 if (offset == 0) { 1533 len = ksnprintf(SNPARGS(proto, 0), 1534 "ICMP:%u.%u ", 1535 icmp->icmp_type, 1536 icmp->icmp_code); 1537 } else { 1538 len = ksnprintf(SNPARGS(proto, 0), "ICMP "); 1539 } 1540 len += ksnprintf(SNPARGS(proto, len), "%s", 1541 kinet_ntoa(ip->ip_src, abuf)); 1542 ksnprintf(SNPARGS(proto, len), " %s", 1543 kinet_ntoa(ip->ip_dst, abuf)); 1544 break; 1545 1546 default: 1547 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, 1548 kinet_ntoa(ip->ip_src, abuf)); 1549 ksnprintf(SNPARGS(proto, len), " %s", 1550 kinet_ntoa(ip->ip_dst, abuf)); 1551 break; 1552 } 1553 1554 if (ip_off & (IP_MF | IP_OFFMASK)) { 1555 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", 1556 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), 1557 offset << 3, (ip_off & IP_MF) ? "+" : ""); 1558 } 1559 } 1560 1561 if (oif || m->m_pkthdr.rcvif) { 1562 log(LOG_SECURITY | LOG_INFO, 1563 "ipfw: %d %s %s %s via %s%s\n", 1564 f ? f->rulenum : -1, 1565 action, proto, oif ? "out" : "in", 1566 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, 1567 fragment); 1568 } else { 1569 log(LOG_SECURITY | LOG_INFO, 1570 "ipfw: %d %s %s [no if info]%s\n", 1571 f ? f->rulenum : -1, 1572 action, proto, fragment); 1573 } 1574 1575 if (limit_reached) { 1576 log(LOG_SECURITY | LOG_NOTICE, 1577 "ipfw: limit %d reached on entry %d\n", 1578 limit_reached, f ? f->rulenum : -1); 1579 } 1580 } 1581 1582 #undef SNPARGS 1583 1584 static void 1585 ipfw_xlat_reap(struct ipfw_xlat *x, struct ipfw_xlat *slave_x) 1586 { 1587 struct ip_fw *rule = slave_x->xlat_rule; 1588 1589 KKASSERT(rule->cpuid == mycpuid); 1590 1591 /* No more cross references; free this pair now. */ 1592 kfree(x, M_IPFW); 1593 kfree(slave_x, M_IPFW); 1594 1595 /* See the comment in ipfw_ip_xlate_dispatch(). */ 1596 rule->cross_refs--; 1597 } 1598 1599 static void 1600 ipfw_xlat_reap_dispatch(netmsg_t nm) 1601 { 1602 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1603 struct ipfw_state *s, *ns; 1604 1605 ASSERT_NETISR_NCPUS(mycpuid); 1606 1607 crit_enter(); 1608 /* Reply ASAP. */ 1609 netisr_replymsg(&ctx->ipfw_xlatreap_nm, 0); 1610 crit_exit(); 1611 1612 /* TODO: limit scanning depth */ 1613 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_xlatreap, st_link, ns) { 1614 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 1615 struct ipfw_xlat *slave_x = x->xlat_pair; 1616 uint64_t crefs; 1617 1618 crefs = slave_x->xlat_crefs + x->xlat_crefs; 1619 if (crefs == 0) { 1620 TAILQ_REMOVE(&ctx->ipfw_xlatreap, &x->xlat_st, st_link); 1621 ipfw_xlat_reap(x, slave_x); 1622 } 1623 } 1624 if (!TAILQ_EMPTY(&ctx->ipfw_xlatreap)) { 1625 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo, 1626 &ctx->ipfw_xlatreap_nm); 1627 } 1628 } 1629 1630 static void 1631 ipfw_xlat_reap_timeo(void *xnm) 1632 { 1633 struct netmsg_base *nm = xnm; 1634 1635 KKASSERT(mycpuid < netisr_ncpus); 1636 1637 crit_enter(); 1638 if (nm->lmsg.ms_flags & MSGF_DONE) 1639 netisr_sendmsg_oncpu(nm); 1640 crit_exit(); 1641 } 1642 1643 static void 1644 ipfw_xlat_free_dispatch(netmsg_t nmsg) 1645 { 1646 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1647 struct ipfw_xlat *x = nmsg->lmsg.u.ms_resultp; 1648 struct ipfw_xlat *slave_x = x->xlat_pair; 1649 uint64_t crefs; 1650 1651 ASSERT_NETISR_NCPUS(mycpuid); 1652 1653 KKASSERT(slave_x != NULL); 1654 KKASSERT(slave_x->xlat_invalid && x->xlat_invalid); 1655 1656 KASSERT((x->xlat_flags & IPFW_STATE_F_LINKED) == 0, 1657 ("master xlat is still linked")); 1658 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED) 1659 ipfw_state_unlink(ctx, &slave_x->xlat_st); 1660 1661 /* See the comment in ipfw_ip_xlate_dispatch(). */ 1662 slave_x->xlat_crefs--; 1663 1664 crefs = slave_x->xlat_crefs + x->xlat_crefs; 1665 if (crefs == 0) { 1666 ipfw_xlat_reap(x, slave_x); 1667 return; 1668 } 1669 1670 if (TAILQ_EMPTY(&ctx->ipfw_xlatreap)) { 1671 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo, 1672 &ctx->ipfw_xlatreap_nm); 1673 } 1674 1675 /* 1676 * This pair is still referenced; defer its destruction. 1677 * YYY reuse st_link. 1678 */ 1679 TAILQ_INSERT_TAIL(&ctx->ipfw_xlatreap, &x->xlat_st, st_link); 1680 } 1681 1682 static __inline void 1683 ipfw_xlat_invalidate(struct ipfw_xlat *x) 1684 { 1685 1686 x->xlat_invalid = 1; 1687 x->xlat_pair->xlat_invalid = 1; 1688 } 1689 1690 static void 1691 ipfw_state_del(struct ipfw_context *ctx, struct ipfw_state *s) 1692 { 1693 struct ipfw_xlat *x, *slave_x; 1694 struct netmsg_base *nm; 1695 1696 KASSERT(s->st_type == O_KEEP_STATE || s->st_type == O_LIMIT || 1697 IPFW_ISXLAT(s->st_type), ("invalid state type %u", s->st_type)); 1698 KASSERT((s->st_flags & IPFW_STATE_F_XLATSLAVE) == 0, 1699 ("delete slave xlat")); 1700 1701 KASSERT(ctx->ipfw_state_cnt > 0, 1702 ("invalid state count %d", ctx->ipfw_state_cnt)); 1703 ctx->ipfw_state_cnt--; 1704 if (ctx->ipfw_state_loosecnt > 0) 1705 ctx->ipfw_state_loosecnt--; 1706 1707 /* 1708 * Unhook this state. 1709 */ 1710 if (s->st_track != NULL) { 1711 struct ipfw_track *t = s->st_track; 1712 1713 KASSERT(!LIST_EMPTY(&t->t_state_list), 1714 ("track state list is empty")); 1715 LIST_REMOVE(s, st_trklink); 1716 1717 KASSERT(*t->t_count > 0, 1718 ("invalid track count %d", *t->t_count)); 1719 atomic_subtract_int(t->t_count, 1); 1720 } 1721 ipfw_state_unlink(ctx, s); 1722 1723 /* 1724 * Free this state. Xlat requires special processing, 1725 * since xlat are paired state and they could be on 1726 * different cpus. 1727 */ 1728 1729 if (!IPFW_ISXLAT(s->st_type)) { 1730 /* Not xlat; free now. */ 1731 kfree(s, M_IPFW); 1732 /* Done! */ 1733 return; 1734 } 1735 x = (struct ipfw_xlat *)s; 1736 1737 if (x->xlat_pair == NULL) { 1738 /* Not setup yet; free now. */ 1739 kfree(x, M_IPFW); 1740 /* Done! */ 1741 return; 1742 } 1743 slave_x = x->xlat_pair; 1744 KKASSERT(slave_x->xlat_flags & IPFW_STATE_F_XLATSLAVE); 1745 1746 if (x->xlat_pcpu == mycpuid) { 1747 /* 1748 * Paired states are on the same cpu; delete this 1749 * pair now. 1750 */ 1751 KKASSERT(x->xlat_crefs == 0); 1752 KKASSERT(slave_x->xlat_crefs == 0); 1753 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED) 1754 ipfw_state_unlink(ctx, &slave_x->xlat_st); 1755 kfree(x, M_IPFW); 1756 kfree(slave_x, M_IPFW); 1757 return; 1758 } 1759 1760 /* 1761 * Free the paired states on the cpu owning the slave xlat. 1762 */ 1763 1764 /* 1765 * Mark the state pair invalid; completely deleting them 1766 * may take some time. 1767 */ 1768 ipfw_xlat_invalidate(x); 1769 1770 nm = &x->xlat_freenm; 1771 netmsg_init(nm, NULL, &netisr_apanic_rport, MSGF_PRIORITY, 1772 ipfw_xlat_free_dispatch); 1773 nm->lmsg.u.ms_resultp = x; 1774 1775 /* See the comment in ipfw_xlate_redispatch(). */ 1776 x->xlat_rule->cross_refs++; 1777 x->xlat_crefs++; 1778 1779 netisr_sendmsg(nm, x->xlat_pcpu); 1780 } 1781 1782 static void 1783 ipfw_state_remove(struct ipfw_context *ctx, struct ipfw_state *s) 1784 { 1785 1786 if (s->st_flags & IPFW_STATE_F_XLATSLAVE) { 1787 KKASSERT(IPFW_ISXLAT(s->st_type)); 1788 ipfw_xlat_invalidate((struct ipfw_xlat *)s); 1789 ipfw_state_unlink(ctx, s); 1790 return; 1791 } 1792 ipfw_state_del(ctx, s); 1793 } 1794 1795 static int 1796 ipfw_state_reap(struct ipfw_context *ctx, int reap_max) 1797 { 1798 struct ipfw_state *s, *anchor; 1799 int expired; 1800 1801 if (reap_max < ipfw_state_reap_min) 1802 reap_max = ipfw_state_reap_min; 1803 1804 if ((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0) { 1805 /* 1806 * Kick start state expiring. Ignore scan limit, 1807 * we are short of states. 1808 */ 1809 ctx->ipfw_flags |= IPFW_FLAG_STATEREAP; 1810 expired = ipfw_state_expire_start(ctx, INT_MAX, reap_max); 1811 ctx->ipfw_flags &= ~IPFW_FLAG_STATEREAP; 1812 return (expired); 1813 } 1814 1815 /* 1816 * States are being expired. 1817 */ 1818 1819 if (ctx->ipfw_state_cnt == 0) 1820 return (0); 1821 1822 expired = 0; 1823 anchor = &ctx->ipfw_stateexp_anch; 1824 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 1825 /* 1826 * Ignore scan limit; we are short of states. 1827 */ 1828 1829 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1830 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 1831 1832 if (IPFW_STATE_SCANSKIP(s)) 1833 continue; 1834 1835 if (IPFW_STATE_ISDEAD(s) || IPFW_STATE_TCPCLOSED(s)) { 1836 ipfw_state_del(ctx, s); 1837 if (++expired >= reap_max) 1838 break; 1839 if ((expired & 0xff) == 0 && 1840 ipfw_state_cntcoll() + ipfw_state_headroom <= 1841 ipfw_state_max) 1842 break; 1843 } 1844 } 1845 /* 1846 * NOTE: 1847 * Leave the anchor on the list, even if the end of the list has 1848 * been reached. ipfw_state_expire_more_dispatch() will handle 1849 * the removal. 1850 */ 1851 return (expired); 1852 } 1853 1854 static void 1855 ipfw_state_flush(struct ipfw_context *ctx, const struct ip_fw *rule) 1856 { 1857 struct ipfw_state *s, *sn; 1858 1859 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_state_list, st_link, sn) { 1860 if (IPFW_STATE_SCANSKIP(s)) 1861 continue; 1862 if (rule != NULL && s->st_rule != rule) 1863 continue; 1864 ipfw_state_del(ctx, s); 1865 } 1866 } 1867 1868 static void 1869 ipfw_state_expire_done(struct ipfw_context *ctx) 1870 { 1871 1872 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1873 ("stateexp is not in progress")); 1874 ctx->ipfw_flags &= ~IPFW_FLAG_STATEEXP; 1875 callout_reset(&ctx->ipfw_stateto_ch, hz, 1876 ipfw_state_expire_ipifunc, NULL); 1877 } 1878 1879 static void 1880 ipfw_state_expire_more(struct ipfw_context *ctx) 1881 { 1882 struct netmsg_base *nm = &ctx->ipfw_stateexp_more; 1883 1884 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1885 ("stateexp is not in progress")); 1886 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 1887 ("stateexp more did not finish")); 1888 netisr_sendmsg_oncpu(nm); 1889 } 1890 1891 static int 1892 ipfw_state_expire_loop(struct ipfw_context *ctx, struct ipfw_state *anchor, 1893 int scan_max, int expire_max) 1894 { 1895 struct ipfw_state *s; 1896 int scanned = 0, expired = 0; 1897 1898 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1899 ("stateexp is not in progress")); 1900 1901 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 1902 if (scanned++ >= scan_max) { 1903 ipfw_state_expire_more(ctx); 1904 return (expired); 1905 } 1906 1907 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1908 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 1909 1910 if (IPFW_STATE_SCANSKIP(s)) 1911 continue; 1912 1913 if (IPFW_STATE_ISDEAD(s) || 1914 ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) && 1915 IPFW_STATE_TCPCLOSED(s))) { 1916 ipfw_state_del(ctx, s); 1917 if (++expired >= expire_max) { 1918 ipfw_state_expire_more(ctx); 1919 return (expired); 1920 } 1921 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) && 1922 (expired & 0xff) == 0 && 1923 ipfw_state_cntcoll() + ipfw_state_headroom <= 1924 ipfw_state_max) { 1925 ipfw_state_expire_more(ctx); 1926 return (expired); 1927 } 1928 } 1929 } 1930 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1931 ipfw_state_expire_done(ctx); 1932 return (expired); 1933 } 1934 1935 static void 1936 ipfw_state_expire_more_dispatch(netmsg_t nm) 1937 { 1938 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1939 struct ipfw_state *anchor; 1940 1941 ASSERT_NETISR_NCPUS(mycpuid); 1942 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1943 ("statexp is not in progress")); 1944 1945 /* Reply ASAP */ 1946 netisr_replymsg(&nm->base, 0); 1947 1948 anchor = &ctx->ipfw_stateexp_anch; 1949 if (ctx->ipfw_state_cnt == 0) { 1950 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1951 ipfw_state_expire_done(ctx); 1952 return; 1953 } 1954 ipfw_state_expire_loop(ctx, anchor, 1955 ipfw_state_scan_max, ipfw_state_expire_max); 1956 } 1957 1958 static int 1959 ipfw_state_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max) 1960 { 1961 struct ipfw_state *anchor; 1962 1963 KASSERT((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0, 1964 ("stateexp is in progress")); 1965 ctx->ipfw_flags |= IPFW_FLAG_STATEEXP; 1966 1967 if (ctx->ipfw_state_cnt == 0) { 1968 ipfw_state_expire_done(ctx); 1969 return (0); 1970 } 1971 1972 /* 1973 * Do not expire more than once per second, it is useless. 1974 */ 1975 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) == 0 && 1976 ctx->ipfw_state_lastexp == time_uptime) { 1977 ipfw_state_expire_done(ctx); 1978 return (0); 1979 } 1980 ctx->ipfw_state_lastexp = time_uptime; 1981 1982 anchor = &ctx->ipfw_stateexp_anch; 1983 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link); 1984 return (ipfw_state_expire_loop(ctx, anchor, scan_max, expire_max)); 1985 } 1986 1987 static void 1988 ipfw_state_expire_dispatch(netmsg_t nm) 1989 { 1990 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1991 1992 ASSERT_NETISR_NCPUS(mycpuid); 1993 1994 /* Reply ASAP */ 1995 crit_enter(); 1996 netisr_replymsg(&nm->base, 0); 1997 crit_exit(); 1998 1999 if (ctx->ipfw_flags & IPFW_FLAG_STATEEXP) { 2000 /* Running; done. */ 2001 return; 2002 } 2003 ipfw_state_expire_start(ctx, 2004 ipfw_state_scan_max, ipfw_state_expire_max); 2005 } 2006 2007 static void 2008 ipfw_state_expire_ipifunc(void *dummy __unused) 2009 { 2010 struct netmsg_base *msg; 2011 2012 KKASSERT(mycpuid < netisr_ncpus); 2013 msg = &ipfw_ctx[mycpuid]->ipfw_stateexp_nm; 2014 2015 crit_enter(); 2016 if (msg->lmsg.ms_flags & MSGF_DONE) 2017 netisr_sendmsg_oncpu(msg); 2018 crit_exit(); 2019 } 2020 2021 static boolean_t 2022 ipfw_state_update_tcp(struct ipfw_state *s, int dir, const struct tcphdr *tcp) 2023 { 2024 uint32_t seq = ntohl(tcp->th_seq); 2025 uint32_t ack = ntohl(tcp->th_ack); 2026 2027 if (tcp->th_flags & TH_RST) 2028 return (TRUE); 2029 2030 if (dir == MATCH_FORWARD) { 2031 if ((s->st_flags & IPFW_STATE_F_SEQFWD) == 0) { 2032 s->st_flags |= IPFW_STATE_F_SEQFWD; 2033 s->st_seq_fwd = seq; 2034 } else if (SEQ_GEQ(seq, s->st_seq_fwd)) { 2035 s->st_seq_fwd = seq; 2036 } else { 2037 /* Out-of-sequence; done. */ 2038 return (FALSE); 2039 } 2040 if (tcp->th_flags & TH_ACK) { 2041 if ((s->st_flags & IPFW_STATE_F_ACKFWD) == 0) { 2042 s->st_flags |= IPFW_STATE_F_ACKFWD; 2043 s->st_ack_fwd = ack; 2044 } else if (SEQ_GEQ(ack, s->st_ack_fwd)) { 2045 s->st_ack_fwd = ack; 2046 } else { 2047 /* Out-of-sequence; done. */ 2048 return (FALSE); 2049 } 2050 2051 if ((s->st_state & ((TH_FIN | TH_ACK) << 8)) == 2052 (TH_FIN << 8) && s->st_ack_fwd == s->st_seq_rev + 1) 2053 s->st_state |= (TH_ACK << 8); 2054 } 2055 } else { 2056 if ((s->st_flags & IPFW_STATE_F_SEQREV) == 0) { 2057 s->st_flags |= IPFW_STATE_F_SEQREV; 2058 s->st_seq_rev = seq; 2059 } else if (SEQ_GEQ(seq, s->st_seq_rev)) { 2060 s->st_seq_rev = seq; 2061 } else { 2062 /* Out-of-sequence; done. */ 2063 return (FALSE); 2064 } 2065 if (tcp->th_flags & TH_ACK) { 2066 if ((s->st_flags & IPFW_STATE_F_ACKREV) == 0) { 2067 s->st_flags |= IPFW_STATE_F_ACKREV; 2068 s->st_ack_rev= ack; 2069 } else if (SEQ_GEQ(ack, s->st_ack_rev)) { 2070 s->st_ack_rev = ack; 2071 } else { 2072 /* Out-of-sequence; done. */ 2073 return (FALSE); 2074 } 2075 2076 if ((s->st_state & (TH_FIN | TH_ACK)) == TH_FIN && 2077 s->st_ack_rev == s->st_seq_fwd + 1) 2078 s->st_state |= TH_ACK; 2079 } 2080 } 2081 return (TRUE); 2082 } 2083 2084 static void 2085 ipfw_state_update(const struct ipfw_flow_id *pkt, int dir, 2086 const struct tcphdr *tcp, struct ipfw_state *s) 2087 { 2088 2089 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 2090 u_char flags = pkt->flags & IPFW_STATE_TCPFLAGS; 2091 2092 if (tcp != NULL && !ipfw_state_update_tcp(s, dir, tcp)) 2093 return; 2094 2095 s->st_state |= (dir == MATCH_FORWARD) ? flags : (flags << 8); 2096 switch (s->st_state & IPFW_STATE_TCPSTATES) { 2097 case TH_SYN: /* opening */ 2098 s->st_expire = time_uptime + dyn_syn_lifetime; 2099 break; 2100 2101 case BOTH_SYN: /* move to established */ 2102 case BOTH_SYN | TH_FIN: /* one side tries to close */ 2103 case BOTH_SYN | (TH_FIN << 8): 2104 s->st_expire = time_uptime + dyn_ack_lifetime; 2105 break; 2106 2107 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 2108 if ((s->st_state & BOTH_FINACK) == BOTH_FINACK) { 2109 /* And both FINs were ACKed. */ 2110 s->st_expire = time_uptime + dyn_fin_lifetime; 2111 } else { 2112 s->st_expire = time_uptime + 2113 dyn_finwait_lifetime; 2114 } 2115 break; 2116 2117 default: 2118 #if 0 2119 /* 2120 * reset or some invalid combination, but can also 2121 * occur if we use keep-state the wrong way. 2122 */ 2123 if ((s->st_state & ((TH_RST << 8) | TH_RST)) == 0) 2124 kprintf("invalid state: 0x%x\n", s->st_state); 2125 #endif 2126 s->st_expire = time_uptime + dyn_rst_lifetime; 2127 break; 2128 } 2129 } else if (pkt->proto == IPPROTO_UDP) { 2130 s->st_expire = time_uptime + dyn_udp_lifetime; 2131 } else { 2132 /* other protocols */ 2133 s->st_expire = time_uptime + dyn_short_lifetime; 2134 } 2135 } 2136 2137 /* 2138 * Lookup a state. 2139 */ 2140 static struct ipfw_state * 2141 ipfw_state_lookup(struct ipfw_context *ctx, const struct ipfw_flow_id *pkt, 2142 int *match_direction, const struct tcphdr *tcp) 2143 { 2144 struct ipfw_state *key, *s; 2145 int dir = MATCH_NONE; 2146 2147 key = &ctx->ipfw_state_tmpkey; 2148 ipfw_key_build(&key->st_key, pkt->src_ip, pkt->src_port, 2149 pkt->dst_ip, pkt->dst_port, pkt->proto); 2150 s = RB_FIND(ipfw_state_tree, &ctx->ipfw_state_tree, key); 2151 if (s == NULL) 2152 goto done; /* not found. */ 2153 if (IPFW_STATE_ISDEAD(s)) { 2154 ipfw_state_remove(ctx, s); 2155 s = NULL; 2156 goto done; 2157 } 2158 if ((pkt->flags & TH_SYN) && IPFW_STATE_TCPCLOSED(s)) { 2159 /* TCP ports recycling is too fast. */ 2160 ctx->ipfw_sts_tcprecycled++; 2161 ipfw_state_remove(ctx, s); 2162 s = NULL; 2163 goto done; 2164 } 2165 2166 if (s->st_swap == key->st_swap) { 2167 dir = MATCH_FORWARD; 2168 } else { 2169 KASSERT((s->st_swap & key->st_swap) == 0, 2170 ("found mismatch state")); 2171 dir = MATCH_REVERSE; 2172 } 2173 2174 /* Update this state. */ 2175 ipfw_state_update(pkt, dir, tcp, s); 2176 2177 if (s->st_track != NULL) { 2178 /* This track has been used. */ 2179 s->st_track->t_expire = time_uptime + dyn_short_lifetime; 2180 } 2181 done: 2182 if (match_direction) 2183 *match_direction = dir; 2184 return (s); 2185 } 2186 2187 static struct ipfw_state * 2188 ipfw_state_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2189 uint16_t type, struct ip_fw *rule, const struct tcphdr *tcp) 2190 { 2191 struct ipfw_state *s; 2192 size_t sz; 2193 2194 KASSERT(type == O_KEEP_STATE || type == O_LIMIT || IPFW_ISXLAT(type), 2195 ("invalid state type %u", type)); 2196 2197 sz = sizeof(struct ipfw_state); 2198 if (IPFW_ISXLAT(type)) 2199 sz = sizeof(struct ipfw_xlat); 2200 2201 s = kmalloc(sz, M_IPFW, M_INTWAIT | M_NULLOK | M_ZERO); 2202 if (s == NULL) { 2203 ctx->ipfw_sts_nomem++; 2204 return (NULL); 2205 } 2206 2207 ipfw_key_build(&s->st_key, id->src_ip, id->src_port, 2208 id->dst_ip, id->dst_port, id->proto); 2209 2210 s->st_rule = rule; 2211 s->st_type = type; 2212 if (IPFW_ISXLAT(type)) { 2213 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 2214 2215 x->xlat_dir = MATCH_NONE; 2216 x->xlat_pcpu = -1; 2217 } 2218 2219 /* 2220 * Update this state: 2221 * Set st_expire and st_state. 2222 */ 2223 ipfw_state_update(id, MATCH_FORWARD, tcp, s); 2224 2225 return (s); 2226 } 2227 2228 static struct ipfw_state * 2229 ipfw_state_add(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2230 uint16_t type, struct ip_fw *rule, struct ipfw_track *t, 2231 const struct tcphdr *tcp) 2232 { 2233 struct ipfw_state *s, *dup; 2234 2235 s = ipfw_state_alloc(ctx, id, type, rule, tcp); 2236 if (s == NULL) 2237 return (NULL); 2238 2239 ctx->ipfw_state_cnt++; 2240 ctx->ipfw_state_loosecnt++; 2241 if (ctx->ipfw_state_loosecnt >= ipfw_state_loosecnt_updthr) { 2242 ipfw_gd.ipfw_state_loosecnt += ctx->ipfw_state_loosecnt; 2243 ctx->ipfw_state_loosecnt = 0; 2244 } 2245 2246 dup = ipfw_state_link(ctx, s); 2247 if (dup != NULL) 2248 panic("ipfw: %u state exists %p", type, dup); 2249 2250 if (t != NULL) { 2251 /* Keep the track referenced. */ 2252 LIST_INSERT_HEAD(&t->t_state_list, s, st_trklink); 2253 s->st_track = t; 2254 } 2255 return (s); 2256 } 2257 2258 static boolean_t 2259 ipfw_track_free(struct ipfw_context *ctx, struct ipfw_track *t) 2260 { 2261 struct ipfw_trkcnt *trk; 2262 boolean_t trk_freed = FALSE; 2263 2264 KASSERT(t->t_count != NULL, ("track anchor")); 2265 KASSERT(LIST_EMPTY(&t->t_state_list), 2266 ("invalid track is still referenced")); 2267 2268 trk = t->t_trkcnt; 2269 KASSERT(trk != NULL, ("track has no trkcnt")); 2270 2271 RB_REMOVE(ipfw_track_tree, &ctx->ipfw_track_tree, t); 2272 TAILQ_REMOVE(&ctx->ipfw_track_list, t, t_link); 2273 kfree(t, M_IPFW); 2274 2275 /* 2276 * fdrop() style reference counting. 2277 * See kern/kern_descrip.c fdrop(). 2278 */ 2279 for (;;) { 2280 int refs = trk->tc_refs; 2281 2282 cpu_ccfence(); 2283 KASSERT(refs > 0, ("invalid trkcnt refs %d", refs)); 2284 if (refs == 1) { 2285 IPFW_TRKCNT_TOKGET; 2286 if (atomic_cmpset_int(&trk->tc_refs, refs, 0)) { 2287 KASSERT(trk->tc_count == 0, 2288 ("%d states reference this trkcnt", 2289 trk->tc_count)); 2290 RB_REMOVE(ipfw_trkcnt_tree, 2291 &ipfw_gd.ipfw_trkcnt_tree, trk); 2292 2293 KASSERT(ipfw_gd.ipfw_trkcnt_cnt > 0, 2294 ("invalid trkcnt cnt %d", 2295 ipfw_gd.ipfw_trkcnt_cnt)); 2296 ipfw_gd.ipfw_trkcnt_cnt--; 2297 IPFW_TRKCNT_TOKREL; 2298 2299 if (ctx->ipfw_trkcnt_spare == NULL) 2300 ctx->ipfw_trkcnt_spare = trk; 2301 else 2302 kfree(trk, M_IPFW); 2303 trk_freed = TRUE; 2304 break; /* done! */ 2305 } 2306 IPFW_TRKCNT_TOKREL; 2307 /* retry */ 2308 } else if (atomic_cmpset_int(&trk->tc_refs, refs, refs - 1)) { 2309 break; /* done! */ 2310 } 2311 /* retry */ 2312 } 2313 return (trk_freed); 2314 } 2315 2316 static void 2317 ipfw_track_flush(struct ipfw_context *ctx, struct ip_fw *rule) 2318 { 2319 struct ipfw_track *t, *tn; 2320 2321 TAILQ_FOREACH_MUTABLE(t, &ctx->ipfw_track_list, t_link, tn) { 2322 if (t->t_count == NULL) /* anchor */ 2323 continue; 2324 if (rule != NULL && t->t_rule != rule) 2325 continue; 2326 ipfw_track_free(ctx, t); 2327 } 2328 } 2329 2330 static boolean_t 2331 ipfw_track_state_expire(struct ipfw_context *ctx, struct ipfw_track *t, 2332 boolean_t reap) 2333 { 2334 struct ipfw_state *s, *sn; 2335 boolean_t ret = FALSE; 2336 2337 KASSERT(t->t_count != NULL, ("track anchor")); 2338 2339 if (LIST_EMPTY(&t->t_state_list)) 2340 return (FALSE); 2341 2342 /* 2343 * Do not expire more than once per second, it is useless. 2344 */ 2345 if (t->t_lastexp == time_uptime) 2346 return (FALSE); 2347 t->t_lastexp = time_uptime; 2348 2349 LIST_FOREACH_MUTABLE(s, &t->t_state_list, st_trklink, sn) { 2350 if (IPFW_STATE_ISDEAD(s) || (reap && IPFW_STATE_TCPCLOSED(s))) { 2351 KASSERT(s->st_track == t, 2352 ("state track %p does not match %p", 2353 s->st_track, t)); 2354 ipfw_state_del(ctx, s); 2355 ret = TRUE; 2356 } 2357 } 2358 return (ret); 2359 } 2360 2361 static __inline struct ipfw_trkcnt * 2362 ipfw_trkcnt_alloc(struct ipfw_context *ctx) 2363 { 2364 struct ipfw_trkcnt *trk; 2365 2366 if (ctx->ipfw_trkcnt_spare != NULL) { 2367 trk = ctx->ipfw_trkcnt_spare; 2368 ctx->ipfw_trkcnt_spare = NULL; 2369 } else { 2370 trk = kmalloc(sizeof(*trk), M_IPFW, 2371 M_INTWAIT | M_NULLOK | M_CACHEALIGN); 2372 } 2373 return (trk); 2374 } 2375 2376 static void 2377 ipfw_track_expire_done(struct ipfw_context *ctx) 2378 { 2379 2380 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2381 ("trackexp is not in progress")); 2382 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKEXP; 2383 callout_reset(&ctx->ipfw_trackto_ch, hz, 2384 ipfw_track_expire_ipifunc, NULL); 2385 } 2386 2387 static void 2388 ipfw_track_expire_more(struct ipfw_context *ctx) 2389 { 2390 struct netmsg_base *nm = &ctx->ipfw_trackexp_more; 2391 2392 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2393 ("trackexp is not in progress")); 2394 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 2395 ("trackexp more did not finish")); 2396 netisr_sendmsg_oncpu(nm); 2397 } 2398 2399 static int 2400 ipfw_track_expire_loop(struct ipfw_context *ctx, struct ipfw_track *anchor, 2401 int scan_max, int expire_max) 2402 { 2403 struct ipfw_track *t; 2404 int scanned = 0, expired = 0; 2405 boolean_t reap = FALSE; 2406 2407 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2408 ("trackexp is not in progress")); 2409 2410 if (ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) 2411 reap = TRUE; 2412 2413 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) { 2414 if (scanned++ >= scan_max) { 2415 ipfw_track_expire_more(ctx); 2416 return (expired); 2417 } 2418 2419 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2420 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link); 2421 2422 if (t->t_count == NULL) /* anchor */ 2423 continue; 2424 2425 ipfw_track_state_expire(ctx, t, reap); 2426 if (!LIST_EMPTY(&t->t_state_list)) { 2427 /* There are states referencing this track. */ 2428 continue; 2429 } 2430 2431 if (TIME_LEQ(t->t_expire, time_uptime) || reap) { 2432 /* Expired. */ 2433 if (ipfw_track_free(ctx, t)) { 2434 if (++expired >= expire_max) { 2435 ipfw_track_expire_more(ctx); 2436 return (expired); 2437 } 2438 } 2439 } 2440 } 2441 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2442 ipfw_track_expire_done(ctx); 2443 return (expired); 2444 } 2445 2446 static int 2447 ipfw_track_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max) 2448 { 2449 struct ipfw_track *anchor; 2450 2451 KASSERT((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0, 2452 ("trackexp is in progress")); 2453 ctx->ipfw_flags |= IPFW_FLAG_TRACKEXP; 2454 2455 if (RB_EMPTY(&ctx->ipfw_track_tree)) { 2456 ipfw_track_expire_done(ctx); 2457 return (0); 2458 } 2459 2460 /* 2461 * Do not expire more than once per second, it is useless. 2462 */ 2463 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) == 0 && 2464 ctx->ipfw_track_lastexp == time_uptime) { 2465 ipfw_track_expire_done(ctx); 2466 return (0); 2467 } 2468 ctx->ipfw_track_lastexp = time_uptime; 2469 2470 anchor = &ctx->ipfw_trackexp_anch; 2471 TAILQ_INSERT_HEAD(&ctx->ipfw_track_list, anchor, t_link); 2472 return (ipfw_track_expire_loop(ctx, anchor, scan_max, expire_max)); 2473 } 2474 2475 static void 2476 ipfw_track_expire_more_dispatch(netmsg_t nm) 2477 { 2478 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2479 struct ipfw_track *anchor; 2480 2481 ASSERT_NETISR_NCPUS(mycpuid); 2482 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2483 ("trackexp is not in progress")); 2484 2485 /* Reply ASAP */ 2486 netisr_replymsg(&nm->base, 0); 2487 2488 anchor = &ctx->ipfw_trackexp_anch; 2489 if (RB_EMPTY(&ctx->ipfw_track_tree)) { 2490 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2491 ipfw_track_expire_done(ctx); 2492 return; 2493 } 2494 ipfw_track_expire_loop(ctx, anchor, 2495 ipfw_track_scan_max, ipfw_track_expire_max); 2496 } 2497 2498 static void 2499 ipfw_track_expire_dispatch(netmsg_t nm) 2500 { 2501 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2502 2503 ASSERT_NETISR_NCPUS(mycpuid); 2504 2505 /* Reply ASAP */ 2506 crit_enter(); 2507 netisr_replymsg(&nm->base, 0); 2508 crit_exit(); 2509 2510 if (ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) { 2511 /* Running; done. */ 2512 return; 2513 } 2514 ipfw_track_expire_start(ctx, 2515 ipfw_track_scan_max, ipfw_track_expire_max); 2516 } 2517 2518 static void 2519 ipfw_track_expire_ipifunc(void *dummy __unused) 2520 { 2521 struct netmsg_base *msg; 2522 2523 KKASSERT(mycpuid < netisr_ncpus); 2524 msg = &ipfw_ctx[mycpuid]->ipfw_trackexp_nm; 2525 2526 crit_enter(); 2527 if (msg->lmsg.ms_flags & MSGF_DONE) 2528 netisr_sendmsg_oncpu(msg); 2529 crit_exit(); 2530 } 2531 2532 static int 2533 ipfw_track_reap(struct ipfw_context *ctx) 2534 { 2535 struct ipfw_track *t, *anchor; 2536 int expired; 2537 2538 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0) { 2539 /* 2540 * Kick start track expiring. Ignore scan limit, 2541 * we are short of tracks. 2542 */ 2543 ctx->ipfw_flags |= IPFW_FLAG_TRACKREAP; 2544 expired = ipfw_track_expire_start(ctx, INT_MAX, 2545 ipfw_track_reap_max); 2546 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKREAP; 2547 return (expired); 2548 } 2549 2550 /* 2551 * Tracks are being expired. 2552 */ 2553 2554 if (RB_EMPTY(&ctx->ipfw_track_tree)) 2555 return (0); 2556 2557 expired = 0; 2558 anchor = &ctx->ipfw_trackexp_anch; 2559 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) { 2560 /* 2561 * Ignore scan limit; we are short of tracks. 2562 */ 2563 2564 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2565 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link); 2566 2567 if (t->t_count == NULL) /* anchor */ 2568 continue; 2569 2570 ipfw_track_state_expire(ctx, t, TRUE); 2571 if (!LIST_EMPTY(&t->t_state_list)) { 2572 /* There are states referencing this track. */ 2573 continue; 2574 } 2575 2576 if (ipfw_track_free(ctx, t)) { 2577 if (++expired >= ipfw_track_reap_max) { 2578 ipfw_track_expire_more(ctx); 2579 break; 2580 } 2581 } 2582 } 2583 /* 2584 * NOTE: 2585 * Leave the anchor on the list, even if the end of the list has 2586 * been reached. ipfw_track_expire_more_dispatch() will handle 2587 * the removal. 2588 */ 2589 return (expired); 2590 } 2591 2592 static struct ipfw_track * 2593 ipfw_track_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2594 uint16_t limit_mask, struct ip_fw *rule) 2595 { 2596 struct ipfw_track *key, *t, *dup; 2597 struct ipfw_trkcnt *trk, *ret; 2598 boolean_t do_expire = FALSE; 2599 2600 KASSERT(rule->track_ruleid != 0, 2601 ("rule %u has no track ruleid", rule->rulenum)); 2602 2603 key = &ctx->ipfw_track_tmpkey; 2604 key->t_proto = id->proto; 2605 key->t_addrs = 0; 2606 key->t_ports = 0; 2607 key->t_rule = rule; 2608 if (limit_mask & DYN_SRC_ADDR) 2609 key->t_saddr = id->src_ip; 2610 if (limit_mask & DYN_DST_ADDR) 2611 key->t_daddr = id->dst_ip; 2612 if (limit_mask & DYN_SRC_PORT) 2613 key->t_sport = id->src_port; 2614 if (limit_mask & DYN_DST_PORT) 2615 key->t_dport = id->dst_port; 2616 2617 t = RB_FIND(ipfw_track_tree, &ctx->ipfw_track_tree, key); 2618 if (t != NULL) 2619 goto done; 2620 2621 t = kmalloc(sizeof(*t), M_IPFW, M_INTWAIT | M_NULLOK); 2622 if (t == NULL) { 2623 ctx->ipfw_tks_nomem++; 2624 return (NULL); 2625 } 2626 2627 t->t_key = key->t_key; 2628 t->t_rule = rule; 2629 t->t_lastexp = 0; 2630 LIST_INIT(&t->t_state_list); 2631 2632 if (ipfw_gd.ipfw_trkcnt_cnt >= ipfw_track_max) { 2633 time_t globexp, uptime; 2634 2635 trk = NULL; 2636 do_expire = TRUE; 2637 2638 /* 2639 * Do not expire globally more than once per second, 2640 * it is useless. 2641 */ 2642 uptime = time_uptime; 2643 globexp = ipfw_gd.ipfw_track_globexp; 2644 if (globexp != uptime && 2645 atomic_cmpset_long(&ipfw_gd.ipfw_track_globexp, 2646 globexp, uptime)) { 2647 int cpu; 2648 2649 /* Expire tracks on other CPUs. */ 2650 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 2651 if (cpu == mycpuid) 2652 continue; 2653 lwkt_send_ipiq(globaldata_find(cpu), 2654 ipfw_track_expire_ipifunc, NULL); 2655 } 2656 } 2657 } else { 2658 trk = ipfw_trkcnt_alloc(ctx); 2659 } 2660 if (trk == NULL) { 2661 struct ipfw_trkcnt *tkey; 2662 2663 tkey = &ctx->ipfw_trkcnt_tmpkey; 2664 key = NULL; /* tkey overlaps key */ 2665 2666 tkey->tc_key = t->t_key; 2667 tkey->tc_ruleid = rule->track_ruleid; 2668 2669 IPFW_TRKCNT_TOKGET; 2670 trk = RB_FIND(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree, 2671 tkey); 2672 if (trk == NULL) { 2673 IPFW_TRKCNT_TOKREL; 2674 if (do_expire) { 2675 ctx->ipfw_tks_reap++; 2676 if (ipfw_track_reap(ctx) > 0) { 2677 if (ipfw_gd.ipfw_trkcnt_cnt < 2678 ipfw_track_max) { 2679 trk = ipfw_trkcnt_alloc(ctx); 2680 if (trk != NULL) 2681 goto install; 2682 ctx->ipfw_tks_cntnomem++; 2683 } else { 2684 ctx->ipfw_tks_overflow++; 2685 } 2686 } else { 2687 ctx->ipfw_tks_reapfailed++; 2688 ctx->ipfw_tks_overflow++; 2689 } 2690 } else { 2691 ctx->ipfw_tks_cntnomem++; 2692 } 2693 kfree(t, M_IPFW); 2694 return (NULL); 2695 } 2696 KASSERT(trk->tc_refs > 0 && trk->tc_refs < netisr_ncpus, 2697 ("invalid trkcnt refs %d", trk->tc_refs)); 2698 atomic_add_int(&trk->tc_refs, 1); 2699 IPFW_TRKCNT_TOKREL; 2700 } else { 2701 install: 2702 trk->tc_key = t->t_key; 2703 trk->tc_ruleid = rule->track_ruleid; 2704 trk->tc_refs = 0; 2705 trk->tc_count = 0; 2706 trk->tc_expire = 0; 2707 trk->tc_rulenum = rule->rulenum; 2708 2709 IPFW_TRKCNT_TOKGET; 2710 ret = RB_INSERT(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree, 2711 trk); 2712 if (ret != NULL) { 2713 KASSERT(ret->tc_refs > 0 && 2714 ret->tc_refs < netisr_ncpus, 2715 ("invalid trkcnt refs %d", ret->tc_refs)); 2716 KASSERT(ctx->ipfw_trkcnt_spare == NULL, 2717 ("trkcnt spare was installed")); 2718 ctx->ipfw_trkcnt_spare = trk; 2719 trk = ret; 2720 } else { 2721 ipfw_gd.ipfw_trkcnt_cnt++; 2722 } 2723 atomic_add_int(&trk->tc_refs, 1); 2724 IPFW_TRKCNT_TOKREL; 2725 } 2726 t->t_count = &trk->tc_count; 2727 t->t_trkcnt = trk; 2728 2729 dup = RB_INSERT(ipfw_track_tree, &ctx->ipfw_track_tree, t); 2730 if (dup != NULL) 2731 panic("ipfw: track exists"); 2732 TAILQ_INSERT_TAIL(&ctx->ipfw_track_list, t, t_link); 2733 done: 2734 t->t_expire = time_uptime + dyn_short_lifetime; 2735 return (t); 2736 } 2737 2738 /* 2739 * Install state for rule type cmd->o.opcode 2740 * 2741 * Returns NULL if state is not installed because of errors or because 2742 * states limitations are enforced. 2743 */ 2744 static struct ipfw_state * 2745 ipfw_state_install(struct ipfw_context *ctx, struct ip_fw *rule, 2746 ipfw_insn_limit *cmd, struct ip_fw_args *args, const struct tcphdr *tcp) 2747 { 2748 struct ipfw_state *s; 2749 struct ipfw_track *t; 2750 int count, diff; 2751 2752 if (ipfw_gd.ipfw_state_loosecnt >= ipfw_state_max && 2753 (diff = (ipfw_state_cntsync() - ipfw_state_max)) >= 0) { 2754 boolean_t overflow = TRUE; 2755 2756 ctx->ipfw_sts_reap++; 2757 if (ipfw_state_reap(ctx, diff) == 0) 2758 ctx->ipfw_sts_reapfailed++; 2759 if (ipfw_state_cntsync() < ipfw_state_max) 2760 overflow = FALSE; 2761 2762 if (overflow) { 2763 time_t globexp, uptime; 2764 int cpu; 2765 2766 /* 2767 * Do not expire globally more than once per second, 2768 * it is useless. 2769 */ 2770 uptime = time_uptime; 2771 globexp = ipfw_gd.ipfw_state_globexp; 2772 if (globexp == uptime || 2773 !atomic_cmpset_long(&ipfw_gd.ipfw_state_globexp, 2774 globexp, uptime)) { 2775 ctx->ipfw_sts_overflow++; 2776 return (NULL); 2777 } 2778 2779 /* Expire states on other CPUs. */ 2780 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 2781 if (cpu == mycpuid) 2782 continue; 2783 lwkt_send_ipiq(globaldata_find(cpu), 2784 ipfw_state_expire_ipifunc, NULL); 2785 } 2786 ctx->ipfw_sts_overflow++; 2787 return (NULL); 2788 } 2789 } 2790 2791 switch (cmd->o.opcode) { 2792 case O_KEEP_STATE: /* bidir rule */ 2793 case O_REDIRECT: 2794 s = ipfw_state_add(ctx, &args->f_id, cmd->o.opcode, rule, NULL, 2795 tcp); 2796 if (s == NULL) 2797 return (NULL); 2798 break; 2799 2800 case O_LIMIT: /* limit number of sessions */ 2801 t = ipfw_track_alloc(ctx, &args->f_id, cmd->limit_mask, rule); 2802 if (t == NULL) 2803 return (NULL); 2804 2805 if (*t->t_count >= cmd->conn_limit) { 2806 if (!ipfw_track_state_expire(ctx, t, TRUE)) 2807 return (NULL); 2808 } 2809 for (;;) { 2810 count = *t->t_count; 2811 if (count >= cmd->conn_limit) 2812 return (NULL); 2813 if (atomic_cmpset_int(t->t_count, count, count + 1)) 2814 break; 2815 } 2816 2817 s = ipfw_state_add(ctx, &args->f_id, O_LIMIT, rule, t, tcp); 2818 if (s == NULL) { 2819 /* Undo damage. */ 2820 atomic_subtract_int(t->t_count, 1); 2821 return (NULL); 2822 } 2823 break; 2824 2825 default: 2826 panic("unknown state type %u\n", cmd->o.opcode); 2827 } 2828 2829 if (s->st_type == O_REDIRECT) { 2830 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 2831 ipfw_insn_rdr *r = (ipfw_insn_rdr *)cmd; 2832 2833 x->xlat_addr = r->addr.s_addr; 2834 x->xlat_port = r->port; 2835 x->xlat_ifp = args->m->m_pkthdr.rcvif; 2836 x->xlat_dir = MATCH_FORWARD; 2837 KKASSERT(x->xlat_ifp != NULL); 2838 } 2839 return (s); 2840 } 2841 2842 static int 2843 ipfw_table_lookup(struct ipfw_context *ctx, uint16_t tableid, 2844 const struct in_addr *in) 2845 { 2846 struct radix_node_head *rnh; 2847 struct sockaddr_in sin; 2848 struct ipfw_tblent *te; 2849 2850 KASSERT(tableid < ipfw_table_max, ("invalid tableid %u", tableid)); 2851 rnh = ctx->ipfw_tables[tableid]; 2852 if (rnh == NULL) 2853 return (0); /* no match */ 2854 2855 memset(&sin, 0, sizeof(sin)); 2856 sin.sin_family = AF_INET; 2857 sin.sin_len = sizeof(sin); 2858 sin.sin_addr = *in; 2859 2860 te = (struct ipfw_tblent *)rnh->rnh_matchaddr(&sin, rnh); 2861 if (te == NULL) 2862 return (0); /* no match */ 2863 2864 te->te_use++; 2865 te->te_lastuse = time_second; 2866 return (1); /* match */ 2867 } 2868 2869 /* 2870 * Transmit a TCP packet, containing either a RST or a keepalive. 2871 * When flags & TH_RST, we are sending a RST packet, because of a 2872 * "reset" action matched the packet. 2873 * Otherwise we are sending a keepalive, and flags & TH_ 2874 * 2875 * Only {src,dst}_{ip,port} of "id" are used. 2876 */ 2877 static void 2878 send_pkt(const struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags) 2879 { 2880 struct mbuf *m; 2881 struct ip *ip; 2882 struct tcphdr *tcp; 2883 struct route sro; /* fake route */ 2884 2885 MGETHDR(m, M_NOWAIT, MT_HEADER); 2886 if (m == NULL) 2887 return; 2888 m->m_pkthdr.rcvif = NULL; 2889 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); 2890 m->m_data += max_linkhdr; 2891 2892 ip = mtod(m, struct ip *); 2893 bzero(ip, m->m_len); 2894 tcp = (struct tcphdr *)(ip + 1); /* no IP options */ 2895 ip->ip_p = IPPROTO_TCP; 2896 tcp->th_off = 5; 2897 2898 /* 2899 * Assume we are sending a RST (or a keepalive in the reverse 2900 * direction), swap src and destination addresses and ports. 2901 */ 2902 ip->ip_src.s_addr = htonl(id->dst_ip); 2903 ip->ip_dst.s_addr = htonl(id->src_ip); 2904 tcp->th_sport = htons(id->dst_port); 2905 tcp->th_dport = htons(id->src_port); 2906 if (flags & TH_RST) { /* we are sending a RST */ 2907 if (flags & TH_ACK) { 2908 tcp->th_seq = htonl(ack); 2909 tcp->th_ack = htonl(0); 2910 tcp->th_flags = TH_RST; 2911 } else { 2912 if (flags & TH_SYN) 2913 seq++; 2914 tcp->th_seq = htonl(0); 2915 tcp->th_ack = htonl(seq); 2916 tcp->th_flags = TH_RST | TH_ACK; 2917 } 2918 } else { 2919 /* 2920 * We are sending a keepalive. flags & TH_SYN determines 2921 * the direction, forward if set, reverse if clear. 2922 * NOTE: seq and ack are always assumed to be correct 2923 * as set by the caller. This may be confusing... 2924 */ 2925 if (flags & TH_SYN) { 2926 /* 2927 * we have to rewrite the correct addresses! 2928 */ 2929 ip->ip_dst.s_addr = htonl(id->dst_ip); 2930 ip->ip_src.s_addr = htonl(id->src_ip); 2931 tcp->th_dport = htons(id->dst_port); 2932 tcp->th_sport = htons(id->src_port); 2933 } 2934 tcp->th_seq = htonl(seq); 2935 tcp->th_ack = htonl(ack); 2936 tcp->th_flags = TH_ACK; 2937 } 2938 2939 /* 2940 * set ip_len to the payload size so we can compute 2941 * the tcp checksum on the pseudoheader 2942 * XXX check this, could save a couple of words ? 2943 */ 2944 ip->ip_len = htons(sizeof(struct tcphdr)); 2945 tcp->th_sum = in_cksum(m, m->m_pkthdr.len); 2946 2947 /* 2948 * now fill fields left out earlier 2949 */ 2950 ip->ip_ttl = ip_defttl; 2951 ip->ip_len = htons(m->m_pkthdr.len); 2952 2953 bzero(&sro, sizeof(sro)); 2954 ip_rtaddr(ip->ip_dst, &sro); 2955 2956 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED; 2957 ip_output(m, NULL, &sro, 0, NULL, NULL); 2958 if (sro.ro_rt) 2959 RTFREE(sro.ro_rt); 2960 } 2961 2962 /* 2963 * Send a reject message, consuming the mbuf passed as an argument. 2964 */ 2965 static void 2966 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) 2967 { 2968 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 2969 /* IP header is always left in network order */ 2970 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 2971 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { 2972 struct tcphdr *const tcp = 2973 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 2974 2975 if ((tcp->th_flags & TH_RST) == 0) { 2976 send_pkt(&args->f_id, ntohl(tcp->th_seq), 2977 ntohl(tcp->th_ack), tcp->th_flags | TH_RST); 2978 } 2979 m_freem(args->m); 2980 } else { 2981 m_freem(args->m); 2982 } 2983 args->m = NULL; 2984 } 2985 2986 /* 2987 * Given an ip_fw *, lookup_next_rule will return a pointer 2988 * to the next rule, which can be either the jump 2989 * target (for skipto instructions) or the next one in the list (in 2990 * all other cases including a missing jump target). 2991 * The result is also written in the "next_rule" field of the rule. 2992 * Backward jumps are not allowed, so start looking from the next 2993 * rule... 2994 * 2995 * This never returns NULL -- in case we do not have an exact match, 2996 * the next rule is returned. When the ruleset is changed, 2997 * pointers are flushed so we are always correct. 2998 */ 2999 static struct ip_fw * 3000 lookup_next_rule(struct ip_fw *me) 3001 { 3002 struct ip_fw *rule = NULL; 3003 ipfw_insn *cmd; 3004 3005 /* look for action, in case it is a skipto */ 3006 cmd = ACTION_PTR(me); 3007 if (cmd->opcode == O_LOG) 3008 cmd += F_LEN(cmd); 3009 if (cmd->opcode == O_SKIPTO) { 3010 for (rule = me->next; rule; rule = rule->next) { 3011 if (rule->rulenum >= cmd->arg1) 3012 break; 3013 } 3014 } 3015 if (rule == NULL) /* failure or not a skipto */ 3016 rule = me->next; 3017 me->next_rule = rule; 3018 return rule; 3019 } 3020 3021 static int 3022 ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, 3023 enum ipfw_opcodes opcode, uid_t uid) 3024 { 3025 struct in_addr src_ip, dst_ip; 3026 struct inpcbinfo *pi; 3027 boolean_t wildcard; 3028 struct inpcb *pcb; 3029 3030 if (fid->proto == IPPROTO_TCP) { 3031 wildcard = FALSE; 3032 pi = &tcbinfo[mycpuid]; 3033 } else if (fid->proto == IPPROTO_UDP) { 3034 wildcard = TRUE; 3035 pi = &udbinfo[mycpuid]; 3036 } else { 3037 return 0; 3038 } 3039 3040 /* 3041 * Values in 'fid' are in host byte order 3042 */ 3043 dst_ip.s_addr = htonl(fid->dst_ip); 3044 src_ip.s_addr = htonl(fid->src_ip); 3045 if (oif) { 3046 pcb = in_pcblookup_hash(pi, 3047 dst_ip, htons(fid->dst_port), 3048 src_ip, htons(fid->src_port), 3049 wildcard, oif); 3050 } else { 3051 pcb = in_pcblookup_hash(pi, 3052 src_ip, htons(fid->src_port), 3053 dst_ip, htons(fid->dst_port), 3054 wildcard, NULL); 3055 } 3056 if (pcb == NULL || pcb->inp_socket == NULL) 3057 return 0; 3058 3059 if (opcode == O_UID) { 3060 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b)) 3061 return !socheckuid(pcb->inp_socket, uid); 3062 #undef socheckuid 3063 } else { 3064 return groupmember(uid, pcb->inp_socket->so_cred); 3065 } 3066 } 3067 3068 static int 3069 ipfw_match_ifip(ipfw_insn_ifip *cmd, const struct in_addr *ip) 3070 { 3071 3072 if (__predict_false((cmd->o.arg1 & IPFW_IFIP_VALID) == 0)) { 3073 struct ifaddr_container *ifac; 3074 struct ifnet *ifp; 3075 3076 ifp = ifunit_netisr(cmd->ifname); 3077 if (ifp == NULL) 3078 return (0); 3079 3080 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 3081 struct ifaddr *ia = ifac->ifa; 3082 3083 if (ia->ifa_addr == NULL) 3084 continue; 3085 if (ia->ifa_addr->sa_family != AF_INET) 3086 continue; 3087 3088 cmd->mask.s_addr = INADDR_ANY; 3089 if (cmd->o.arg1 & IPFW_IFIP_NET) { 3090 cmd->mask = ((struct sockaddr_in *) 3091 ia->ifa_netmask)->sin_addr; 3092 } 3093 if (cmd->mask.s_addr == INADDR_ANY) 3094 cmd->mask.s_addr = INADDR_BROADCAST; 3095 3096 cmd->addr = 3097 ((struct sockaddr_in *)ia->ifa_addr)->sin_addr; 3098 cmd->addr.s_addr &= cmd->mask.s_addr; 3099 3100 cmd->o.arg1 |= IPFW_IFIP_VALID; 3101 break; 3102 } 3103 if ((cmd->o.arg1 & IPFW_IFIP_VALID) == 0) 3104 return (0); 3105 } 3106 return ((ip->s_addr & cmd->mask.s_addr) == cmd->addr.s_addr); 3107 } 3108 3109 static void 3110 ipfw_xlate(const struct ipfw_xlat *x, struct mbuf *m, 3111 struct in_addr *old_addr, uint16_t *old_port) 3112 { 3113 struct ip *ip = mtod(m, struct ip *); 3114 struct in_addr *addr; 3115 uint16_t *port, *csum, dlen = 0; 3116 uint8_t udp = 0; 3117 boolean_t pseudo = FALSE; 3118 3119 if (x->xlat_flags & IPFW_STATE_F_XLATSRC) { 3120 addr = &ip->ip_src; 3121 switch (ip->ip_p) { 3122 case IPPROTO_TCP: 3123 port = &L3HDR(struct tcphdr, ip)->th_sport; 3124 csum = &L3HDR(struct tcphdr, ip)->th_sum; 3125 break; 3126 case IPPROTO_UDP: 3127 port = &L3HDR(struct udphdr, ip)->uh_sport; 3128 csum = &L3HDR(struct udphdr, ip)->uh_sum; 3129 udp = 1; 3130 break; 3131 default: 3132 panic("ipfw: unsupported src xlate proto %u", ip->ip_p); 3133 } 3134 } else { 3135 addr = &ip->ip_dst; 3136 switch (ip->ip_p) { 3137 case IPPROTO_TCP: 3138 port = &L3HDR(struct tcphdr, ip)->th_dport; 3139 csum = &L3HDR(struct tcphdr, ip)->th_sum; 3140 break; 3141 case IPPROTO_UDP: 3142 port = &L3HDR(struct udphdr, ip)->uh_dport; 3143 csum = &L3HDR(struct udphdr, ip)->uh_sum; 3144 udp = 1; 3145 break; 3146 default: 3147 panic("ipfw: unsupported dst xlate proto %u", ip->ip_p); 3148 } 3149 } 3150 if (old_addr != NULL) 3151 *old_addr = *addr; 3152 if (old_port != NULL) { 3153 if (x->xlat_port != 0) 3154 *old_port = *port; 3155 else 3156 *old_port = 0; 3157 } 3158 3159 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) { 3160 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) 3161 dlen = ntohs(ip->ip_len) - (ip->ip_hl << 2); 3162 pseudo = TRUE; 3163 } 3164 3165 if (!pseudo) { 3166 const uint16_t *oaddr, *naddr; 3167 3168 oaddr = (const uint16_t *)&addr->s_addr; 3169 naddr = (const uint16_t *)&x->xlat_addr; 3170 3171 ip->ip_sum = pfil_cksum_fixup(pfil_cksum_fixup(ip->ip_sum, 3172 oaddr[0], naddr[0], 0), oaddr[1], naddr[1], 0); 3173 *csum = pfil_cksum_fixup(pfil_cksum_fixup(*csum, 3174 oaddr[0], naddr[0], udp), oaddr[1], naddr[1], udp); 3175 } 3176 addr->s_addr = x->xlat_addr; 3177 3178 if (x->xlat_port != 0) { 3179 if (!pseudo) { 3180 *csum = pfil_cksum_fixup(*csum, *port, x->xlat_port, 3181 udp); 3182 } 3183 *port = x->xlat_port; 3184 } 3185 3186 if (pseudo) { 3187 *csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 3188 htons(dlen + ip->ip_p)); 3189 } 3190 } 3191 3192 static void 3193 ipfw_ip_xlate_dispatch(netmsg_t nmsg) 3194 { 3195 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg; 3196 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3197 struct mbuf *m = nm->m; 3198 struct ipfw_xlat *x = nm->arg1; 3199 struct ip_fw *rule = x->xlat_rule; 3200 3201 ASSERT_NETISR_NCPUS(mycpuid); 3202 KASSERT(rule->cpuid == mycpuid, 3203 ("rule does not belong to cpu%d", mycpuid)); 3204 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE, 3205 ("mbuf does not have ipfw continue rule")); 3206 3207 KASSERT(ctx->ipfw_cont_rule == NULL, 3208 ("pending ipfw continue rule")); 3209 KASSERT(ctx->ipfw_cont_xlat == NULL, 3210 ("pending ipfw continue xlat")); 3211 ctx->ipfw_cont_rule = rule; 3212 ctx->ipfw_cont_xlat = x; 3213 3214 if (nm->arg2 == 0) 3215 ip_input(m); 3216 else 3217 ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); 3218 3219 /* May not be cleared, if ipfw was unload/disabled. */ 3220 ctx->ipfw_cont_rule = NULL; 3221 ctx->ipfw_cont_xlat = NULL; 3222 3223 /* 3224 * This state is no longer used; decrement its xlat_crefs, 3225 * so this state can be deleted. 3226 */ 3227 x->xlat_crefs--; 3228 /* 3229 * This rule is no longer used; decrement its cross_refs, 3230 * so this rule can be deleted. 3231 * 3232 * NOTE: 3233 * Decrement cross_refs in the last step of this function, 3234 * so that the module could be unloaded safely. 3235 */ 3236 rule->cross_refs--; 3237 } 3238 3239 static void 3240 ipfw_xlate_redispatch(struct mbuf *m, int cpuid, struct ipfw_xlat *x, 3241 uint32_t flags) 3242 { 3243 struct netmsg_genpkt *nm; 3244 3245 KASSERT(x->xlat_pcpu == cpuid, ("xlat paired cpu%d, target cpu%d", 3246 x->xlat_pcpu, cpuid)); 3247 3248 /* 3249 * Bump cross_refs to prevent this rule and its siblings 3250 * from being deleted, while this mbuf is inflight. The 3251 * cross_refs of the sibling rule on the target cpu will 3252 * be decremented, once this mbuf is going to be filtered 3253 * on the target cpu. 3254 */ 3255 x->xlat_rule->cross_refs++; 3256 /* 3257 * Bump xlat_crefs to prevent this state and its paired 3258 * state from being deleted, while this mbuf is inflight. 3259 * The xlat_crefs of the paired state on the target cpu 3260 * will be decremented, once this mbuf is going to be 3261 * filtered on the target cpu. 3262 */ 3263 x->xlat_crefs++; 3264 3265 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE; 3266 if (flags & IPFW_XLATE_INSERT) 3267 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATINS; 3268 if (flags & IPFW_XLATE_FORWARD) 3269 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATFWD; 3270 3271 /* 3272 * NOTE: We always leave ip_len and ip_off in network 3273 * order across all network layers. 3274 */ 3275 nm = &m->m_hdr.mh_genmsg; 3276 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0, 3277 ipfw_ip_xlate_dispatch); 3278 nm->m = m; 3279 nm->arg1 = x->xlat_pair; 3280 nm->arg2 = 0; 3281 if (flags & IPFW_XLATE_OUTPUT) 3282 nm->arg2 = 1; 3283 netisr_sendmsg(&nm->base, cpuid); 3284 } 3285 3286 static struct mbuf * 3287 ipfw_setup_local(struct mbuf *m, const int hlen, struct ip_fw_args *args, 3288 struct ip_fw_local *local, struct ip **ip0) 3289 { 3290 struct ip *ip = mtod(m, struct ip *); 3291 struct tcphdr *tcp; 3292 struct udphdr *udp; 3293 3294 /* 3295 * Collect parameters into local variables for faster matching. 3296 */ 3297 if (hlen == 0) { /* do not grab addresses for non-ip pkts */ 3298 local->proto = args->f_id.proto = 0; /* mark f_id invalid */ 3299 goto done; 3300 } 3301 3302 local->proto = args->f_id.proto = ip->ip_p; 3303 local->src_ip = ip->ip_src; 3304 local->dst_ip = ip->ip_dst; 3305 local->offset = ntohs(ip->ip_off) & IP_OFFMASK; 3306 local->ip_len = ntohs(ip->ip_len); 3307 3308 #define PULLUP_TO(len) \ 3309 do { \ 3310 if (m->m_len < (len)) { \ 3311 args->m = m = m_pullup(m, (len)); \ 3312 if (m == NULL) { \ 3313 ip = NULL; \ 3314 goto done; \ 3315 } \ 3316 ip = mtod(m, struct ip *); \ 3317 } \ 3318 } while (0) 3319 3320 if (local->offset == 0) { 3321 switch (local->proto) { 3322 case IPPROTO_TCP: 3323 PULLUP_TO(hlen + sizeof(struct tcphdr)); 3324 local->tcp = tcp = L3HDR(struct tcphdr, ip); 3325 local->dst_port = tcp->th_dport; 3326 local->src_port = tcp->th_sport; 3327 args->f_id.flags = tcp->th_flags; 3328 break; 3329 3330 case IPPROTO_UDP: 3331 PULLUP_TO(hlen + sizeof(struct udphdr)); 3332 udp = L3HDR(struct udphdr, ip); 3333 local->dst_port = udp->uh_dport; 3334 local->src_port = udp->uh_sport; 3335 break; 3336 3337 case IPPROTO_ICMP: 3338 PULLUP_TO(hlen + 4); /* type, code and checksum. */ 3339 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; 3340 break; 3341 3342 default: 3343 break; 3344 } 3345 } 3346 3347 #undef PULLUP_TO 3348 3349 args->f_id.src_ip = ntohl(local->src_ip.s_addr); 3350 args->f_id.dst_ip = ntohl(local->dst_ip.s_addr); 3351 args->f_id.src_port = local->src_port = ntohs(local->src_port); 3352 args->f_id.dst_port = local->dst_port = ntohs(local->dst_port); 3353 done: 3354 *ip0 = ip; 3355 return (m); 3356 } 3357 3358 static struct mbuf * 3359 ipfw_rehashm(struct mbuf *m, const int hlen, struct ip_fw_args *args, 3360 struct ip_fw_local *local, struct ip **ip0) 3361 { 3362 m->m_flags &= ~M_HASH; 3363 ip_hashfn(&m, 0); 3364 args->m = m; 3365 if (m == NULL) { 3366 *ip0 = NULL; 3367 return (NULL); 3368 } 3369 KASSERT(m->m_flags & M_HASH, ("no hash")); 3370 3371 /* 'm' might be changed by ip_hashfn(). */ 3372 return (ipfw_setup_local(m, hlen, args, local, ip0)); 3373 } 3374 3375 /* 3376 * The main check routine for the firewall. 3377 * 3378 * All arguments are in args so we can modify them and return them 3379 * back to the caller. 3380 * 3381 * Parameters: 3382 * 3383 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 3384 * Starts with the IP header. 3385 * args->eh (in) Mac header if present, or NULL for layer3 packet. 3386 * args->oif Outgoing interface, or NULL if packet is incoming. 3387 * The incoming interface is in the mbuf. (in) 3388 * 3389 * args->rule Pointer to the last matching rule (in/out) 3390 * args->f_id Addresses grabbed from the packet (out) 3391 * 3392 * Return value: 3393 * 3394 * If the packet was denied/rejected and has been dropped, *m is equal 3395 * to NULL upon return. 3396 * 3397 * IP_FW_DENY the packet must be dropped. 3398 * IP_FW_PASS The packet is to be accepted and routed normally. 3399 * IP_FW_DIVERT Divert the packet to port (args->cookie) 3400 * IP_FW_TEE Tee the packet to port (args->cookie) 3401 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie) 3402 * IP_FW_CONTINUE Continue processing on another cpu. 3403 */ 3404 static int 3405 ipfw_chk(struct ip_fw_args *args) 3406 { 3407 /* 3408 * Local variables hold state during the processing of a packet. 3409 * 3410 * IMPORTANT NOTE: to speed up the processing of rules, there 3411 * are some assumption on the values of the variables, which 3412 * are documented here. Should you change them, please check 3413 * the implementation of the various instructions to make sure 3414 * that they still work. 3415 * 3416 * args->eh The MAC header. It is non-null for a layer2 3417 * packet, it is NULL for a layer-3 packet. 3418 * 3419 * m | args->m Pointer to the mbuf, as received from the caller. 3420 * It may change if ipfw_chk() does an m_pullup, or if it 3421 * consumes the packet because it calls send_reject(). 3422 * XXX This has to change, so that ipfw_chk() never modifies 3423 * or consumes the buffer. 3424 * ip is simply an alias of the value of m, and it is kept 3425 * in sync with it (the packet is supposed to start with 3426 * the ip header). 3427 */ 3428 struct mbuf *m = args->m; 3429 struct ip *ip = mtod(m, struct ip *); 3430 3431 /* 3432 * oif | args->oif If NULL, ipfw_chk has been called on the 3433 * inbound path (ether_input, ip_input). 3434 * If non-NULL, ipfw_chk has been called on the outbound path 3435 * (ether_output, ip_output). 3436 */ 3437 struct ifnet *oif = args->oif; 3438 3439 struct ip_fw *f = NULL; /* matching rule */ 3440 int retval = IP_FW_PASS; 3441 struct m_tag *mtag; 3442 struct divert_info *divinfo; 3443 struct ipfw_state *s; 3444 3445 /* 3446 * hlen The length of the IPv4 header. 3447 * hlen >0 means we have an IPv4 packet. 3448 */ 3449 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 3450 3451 struct ip_fw_local lc; 3452 3453 /* 3454 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 3455 * MATCH_NONE when checked and not matched (dyn_f = NULL), 3456 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL) 3457 */ 3458 int dyn_dir = MATCH_UNKNOWN; 3459 struct ip_fw *dyn_f = NULL; 3460 int cpuid = mycpuid; 3461 struct ipfw_context *ctx; 3462 3463 ASSERT_NETISR_NCPUS(cpuid); 3464 ctx = ipfw_ctx[cpuid]; 3465 3466 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED) 3467 return IP_FW_PASS; /* accept */ 3468 3469 if (args->eh == NULL || /* layer 3 packet */ 3470 (m->m_pkthdr.len >= sizeof(struct ip) && 3471 ntohs(args->eh->ether_type) == ETHERTYPE_IP)) 3472 hlen = ip->ip_hl << 2; 3473 3474 memset(&lc, 0, sizeof(lc)); 3475 3476 m = ipfw_setup_local(m, hlen, args, &lc, &ip); 3477 if (m == NULL) 3478 goto pullup_failed; 3479 3480 if (args->rule) { 3481 /* 3482 * Packet has already been tagged. Look for the next rule 3483 * to restart processing. 3484 * 3485 * If fw_one_pass != 0 then just accept it. 3486 * XXX should not happen here, but optimized out in 3487 * the caller. 3488 */ 3489 if (fw_one_pass && (args->flags & IP_FWARG_F_CONT) == 0) 3490 return IP_FW_PASS; 3491 args->flags &= ~IP_FWARG_F_CONT; 3492 3493 /* This rule is being/has been flushed */ 3494 if (ipfw_flushing) 3495 return IP_FW_DENY; 3496 3497 KASSERT(args->rule->cpuid == cpuid, 3498 ("rule used on cpu%d", cpuid)); 3499 3500 /* This rule was deleted */ 3501 if (args->rule->rule_flags & IPFW_RULE_F_INVALID) 3502 return IP_FW_DENY; 3503 3504 if (args->xlat != NULL) { 3505 struct ipfw_xlat *x = args->xlat; 3506 3507 /* This xlat is being deleted. */ 3508 if (x->xlat_invalid) 3509 return IP_FW_DENY; 3510 3511 f = args->rule; 3512 3513 dyn_f = f; 3514 dyn_dir = (args->flags & IP_FWARG_F_XLATFWD) ? 3515 MATCH_FORWARD : MATCH_REVERSE; 3516 3517 if (args->flags & IP_FWARG_F_XLATINS) { 3518 KASSERT(x->xlat_flags & IPFW_STATE_F_XLATSLAVE, 3519 ("not slave %u state", x->xlat_type)); 3520 s = ipfw_state_link(ctx, &x->xlat_st); 3521 if (s != NULL) { 3522 ctx->ipfw_xlate_conflicts++; 3523 if (IPFW_STATE_ISDEAD(s)) { 3524 ipfw_state_remove(ctx, s); 3525 s = ipfw_state_link(ctx, 3526 &x->xlat_st); 3527 } 3528 if (s != NULL) { 3529 if (bootverbose) { 3530 kprintf("ipfw: " 3531 "slave %u state " 3532 "conflicts %u state\n", 3533 x->xlat_type, 3534 s->st_type); 3535 } 3536 ipfw_xlat_invalidate(x); 3537 return IP_FW_DENY; 3538 } 3539 ctx->ipfw_xlate_cresolved++; 3540 } 3541 } else { 3542 ipfw_state_update(&args->f_id, dyn_dir, 3543 lc.tcp, &x->xlat_st); 3544 } 3545 } else { 3546 /* TODO: setup dyn_f, dyn_dir */ 3547 3548 f = args->rule->next_rule; 3549 if (f == NULL) 3550 f = lookup_next_rule(args->rule); 3551 } 3552 } else { 3553 /* 3554 * Find the starting rule. It can be either the first 3555 * one, or the one after divert_rule if asked so. 3556 */ 3557 int skipto; 3558 3559 KKASSERT((args->flags & 3560 (IP_FWARG_F_XLATINS | IP_FWARG_F_CONT)) == 0); 3561 KKASSERT(args->xlat == NULL); 3562 3563 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL); 3564 if (mtag != NULL) { 3565 divinfo = m_tag_data(mtag); 3566 skipto = divinfo->skipto; 3567 } else { 3568 skipto = 0; 3569 } 3570 3571 f = ctx->ipfw_layer3_chain; 3572 if (args->eh == NULL && skipto != 0) { 3573 /* No skipto during rule flushing */ 3574 if (ipfw_flushing) 3575 return IP_FW_DENY; 3576 3577 if (skipto >= IPFW_DEFAULT_RULE) 3578 return IP_FW_DENY; /* invalid */ 3579 3580 while (f && f->rulenum <= skipto) 3581 f = f->next; 3582 if (f == NULL) /* drop packet */ 3583 return IP_FW_DENY; 3584 } else if (ipfw_flushing) { 3585 /* Rules are being flushed; skip to default rule */ 3586 f = ctx->ipfw_default_rule; 3587 } 3588 } 3589 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL) 3590 m_tag_delete(m, mtag); 3591 3592 /* 3593 * Now scan the rules, and parse microinstructions for each rule. 3594 */ 3595 for (; f; f = f->next) { 3596 int l, cmdlen; 3597 ipfw_insn *cmd; 3598 int skip_or; /* skip rest of OR block */ 3599 3600 again: 3601 if (ctx->ipfw_set_disable & (1 << f->set)) { 3602 args->xlat = NULL; 3603 continue; 3604 } 3605 3606 if (args->xlat != NULL) { 3607 args->xlat = NULL; 3608 l = f->cmd_len - f->act_ofs; 3609 cmd = ACTION_PTR(f); 3610 } else { 3611 l = f->cmd_len; 3612 cmd = f->cmd; 3613 } 3614 3615 skip_or = 0; 3616 for (; l > 0; l -= cmdlen, cmd += cmdlen) { 3617 int match; 3618 3619 /* 3620 * check_body is a jump target used when we find a 3621 * CHECK_STATE, and need to jump to the body of 3622 * the target rule. 3623 */ 3624 check_body: 3625 cmdlen = F_LEN(cmd); 3626 /* 3627 * An OR block (insn_1 || .. || insn_n) has the 3628 * F_OR bit set in all but the last instruction. 3629 * The first match will set "skip_or", and cause 3630 * the following instructions to be skipped until 3631 * past the one with the F_OR bit clear. 3632 */ 3633 if (skip_or) { /* skip this instruction */ 3634 if ((cmd->len & F_OR) == 0) 3635 skip_or = 0; /* next one is good */ 3636 continue; 3637 } 3638 match = 0; /* set to 1 if we succeed */ 3639 3640 switch (cmd->opcode) { 3641 /* 3642 * The first set of opcodes compares the packet's 3643 * fields with some pattern, setting 'match' if a 3644 * match is found. At the end of the loop there is 3645 * logic to deal with F_NOT and F_OR flags associated 3646 * with the opcode. 3647 */ 3648 case O_NOP: 3649 match = 1; 3650 break; 3651 3652 case O_FORWARD_MAC: 3653 kprintf("ipfw: opcode %d unimplemented\n", 3654 cmd->opcode); 3655 break; 3656 3657 case O_GID: 3658 case O_UID: 3659 /* 3660 * We only check offset == 0 && proto != 0, 3661 * as this ensures that we have an IPv4 3662 * packet with the ports info. 3663 */ 3664 if (lc.offset!=0) 3665 break; 3666 3667 match = ipfw_match_uid(&args->f_id, oif, 3668 cmd->opcode, 3669 (uid_t)((ipfw_insn_u32 *)cmd)->d[0]); 3670 break; 3671 3672 case O_RECV: 3673 match = iface_match(m->m_pkthdr.rcvif, 3674 (ipfw_insn_if *)cmd); 3675 break; 3676 3677 case O_XMIT: 3678 match = iface_match(oif, (ipfw_insn_if *)cmd); 3679 break; 3680 3681 case O_VIA: 3682 match = iface_match(oif ? oif : 3683 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); 3684 break; 3685 3686 case O_MACADDR2: 3687 if (args->eh != NULL) { /* have MAC header */ 3688 uint32_t *want = (uint32_t *) 3689 ((ipfw_insn_mac *)cmd)->addr; 3690 uint32_t *mask = (uint32_t *) 3691 ((ipfw_insn_mac *)cmd)->mask; 3692 uint32_t *hdr = (uint32_t *)args->eh; 3693 3694 match = 3695 (want[0] == (hdr[0] & mask[0]) && 3696 want[1] == (hdr[1] & mask[1]) && 3697 want[2] == (hdr[2] & mask[2])); 3698 } 3699 break; 3700 3701 case O_MAC_TYPE: 3702 if (args->eh != NULL) { 3703 uint16_t t = 3704 ntohs(args->eh->ether_type); 3705 uint16_t *p = 3706 ((ipfw_insn_u16 *)cmd)->ports; 3707 int i; 3708 3709 /* Special vlan handling */ 3710 if (m->m_flags & M_VLANTAG) 3711 t = ETHERTYPE_VLAN; 3712 3713 for (i = cmdlen - 1; !match && i > 0; 3714 i--, p += 2) { 3715 match = 3716 (t >= p[0] && t <= p[1]); 3717 } 3718 } 3719 break; 3720 3721 case O_FRAG: 3722 match = (hlen > 0 && lc.offset != 0); 3723 break; 3724 3725 case O_IPFRAG: 3726 if (hlen > 0) { 3727 uint16_t off; 3728 3729 off = ntohs(ip->ip_off); 3730 if (off & (IP_MF | IP_OFFMASK)) 3731 match = 1; 3732 } 3733 break; 3734 3735 case O_IN: /* "out" is "not in" */ 3736 match = (oif == NULL); 3737 break; 3738 3739 case O_LAYER2: 3740 match = (args->eh != NULL); 3741 break; 3742 3743 case O_PROTO: 3744 /* 3745 * We do not allow an arg of 0 so the 3746 * check of "proto" only suffices. 3747 */ 3748 match = (lc.proto == cmd->arg1); 3749 break; 3750 3751 case O_IP_SRC: 3752 match = (hlen > 0 && 3753 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3754 lc.src_ip.s_addr); 3755 break; 3756 3757 case O_IP_SRC_MASK: 3758 match = (hlen > 0 && 3759 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3760 (lc.src_ip.s_addr & 3761 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 3762 break; 3763 3764 case O_IP_SRC_ME: 3765 if (hlen > 0) { 3766 struct ifnet *tif; 3767 3768 tif = INADDR_TO_IFP(&lc.src_ip); 3769 match = (tif != NULL); 3770 } 3771 break; 3772 3773 case O_IP_SRC_TABLE: 3774 match = ipfw_table_lookup(ctx, cmd->arg1, 3775 &lc.src_ip); 3776 break; 3777 3778 case O_IP_SRC_IFIP: 3779 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd, 3780 &lc.src_ip); 3781 break; 3782 3783 case O_IP_DST_SET: 3784 case O_IP_SRC_SET: 3785 if (hlen > 0) { 3786 uint32_t *d = (uint32_t *)(cmd + 1); 3787 uint32_t addr = 3788 cmd->opcode == O_IP_DST_SET ? 3789 args->f_id.dst_ip : 3790 args->f_id.src_ip; 3791 3792 if (addr < d[0]) 3793 break; 3794 addr -= d[0]; /* subtract base */ 3795 match = 3796 (addr < cmd->arg1) && 3797 (d[1 + (addr >> 5)] & 3798 (1 << (addr & 0x1f))); 3799 } 3800 break; 3801 3802 case O_IP_DST: 3803 match = (hlen > 0 && 3804 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3805 lc.dst_ip.s_addr); 3806 break; 3807 3808 case O_IP_DST_MASK: 3809 match = (hlen > 0) && 3810 (((ipfw_insn_ip *)cmd)->addr.s_addr == 3811 (lc.dst_ip.s_addr & 3812 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 3813 break; 3814 3815 case O_IP_DST_ME: 3816 if (hlen > 0) { 3817 struct ifnet *tif; 3818 3819 tif = INADDR_TO_IFP(&lc.dst_ip); 3820 match = (tif != NULL); 3821 } 3822 break; 3823 3824 case O_IP_DST_TABLE: 3825 match = ipfw_table_lookup(ctx, cmd->arg1, 3826 &lc.dst_ip); 3827 break; 3828 3829 case O_IP_DST_IFIP: 3830 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd, 3831 &lc.dst_ip); 3832 break; 3833 3834 case O_IP_SRCPORT: 3835 case O_IP_DSTPORT: 3836 /* 3837 * offset == 0 && proto != 0 is enough 3838 * to guarantee that we have an IPv4 3839 * packet with port info. 3840 */ 3841 if ((lc.proto==IPPROTO_UDP || 3842 lc.proto==IPPROTO_TCP) 3843 && lc.offset == 0) { 3844 uint16_t x = 3845 (cmd->opcode == O_IP_SRCPORT) ? 3846 lc.src_port : lc.dst_port; 3847 uint16_t *p = 3848 ((ipfw_insn_u16 *)cmd)->ports; 3849 int i; 3850 3851 for (i = cmdlen - 1; !match && i > 0; 3852 i--, p += 2) { 3853 match = 3854 (x >= p[0] && x <= p[1]); 3855 } 3856 } 3857 break; 3858 3859 case O_ICMPCODE: 3860 match = (lc.offset == 0 && 3861 lc.proto==IPPROTO_ICMP && 3862 icmpcode_match(ip, (ipfw_insn_u32 *)cmd)); 3863 break; 3864 3865 case O_ICMPTYPE: 3866 match = (lc.offset == 0 && 3867 lc.proto==IPPROTO_ICMP && 3868 icmptype_match(ip, (ipfw_insn_u32 *)cmd)); 3869 break; 3870 3871 case O_IPOPT: 3872 match = (hlen > 0 && ipopts_match(ip, cmd)); 3873 break; 3874 3875 case O_IPVER: 3876 match = (hlen > 0 && cmd->arg1 == ip->ip_v); 3877 break; 3878 3879 case O_IPTTL: 3880 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl); 3881 break; 3882 3883 case O_IPID: 3884 match = (hlen > 0 && 3885 cmd->arg1 == ntohs(ip->ip_id)); 3886 break; 3887 3888 case O_IPLEN: 3889 match = (hlen > 0 && cmd->arg1 == lc.ip_len); 3890 break; 3891 3892 case O_IPPRECEDENCE: 3893 match = (hlen > 0 && 3894 (cmd->arg1 == (ip->ip_tos & 0xe0))); 3895 break; 3896 3897 case O_IPTOS: 3898 match = (hlen > 0 && 3899 flags_match(cmd, ip->ip_tos)); 3900 break; 3901 3902 case O_TCPFLAGS: 3903 match = (lc.proto == IPPROTO_TCP && 3904 lc.offset == 0 && 3905 flags_match(cmd, 3906 L3HDR(struct tcphdr,ip)->th_flags)); 3907 break; 3908 3909 case O_TCPOPTS: 3910 match = (lc.proto == IPPROTO_TCP && 3911 lc.offset == 0 && tcpopts_match(ip, cmd)); 3912 break; 3913 3914 case O_TCPSEQ: 3915 match = (lc.proto == IPPROTO_TCP && 3916 lc.offset == 0 && 3917 ((ipfw_insn_u32 *)cmd)->d[0] == 3918 L3HDR(struct tcphdr,ip)->th_seq); 3919 break; 3920 3921 case O_TCPACK: 3922 match = (lc.proto == IPPROTO_TCP && 3923 lc.offset == 0 && 3924 ((ipfw_insn_u32 *)cmd)->d[0] == 3925 L3HDR(struct tcphdr,ip)->th_ack); 3926 break; 3927 3928 case O_TCPWIN: 3929 match = (lc.proto == IPPROTO_TCP && 3930 lc.offset == 0 && 3931 cmd->arg1 == 3932 L3HDR(struct tcphdr,ip)->th_win); 3933 break; 3934 3935 case O_ESTAB: 3936 /* reject packets which have SYN only */ 3937 /* XXX should i also check for TH_ACK ? */ 3938 match = (lc.proto == IPPROTO_TCP && 3939 lc.offset == 0 && 3940 (L3HDR(struct tcphdr,ip)->th_flags & 3941 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 3942 break; 3943 3944 case O_LOG: 3945 if (fw_verbose) { 3946 ipfw_log(ctx, f, hlen, args->eh, m, 3947 oif); 3948 } 3949 match = 1; 3950 break; 3951 3952 case O_PROB: 3953 match = (krandom() < 3954 ((ipfw_insn_u32 *)cmd)->d[0]); 3955 break; 3956 3957 /* 3958 * The second set of opcodes represents 'actions', 3959 * i.e. the terminal part of a rule once the packet 3960 * matches all previous patterns. 3961 * Typically there is only one action for each rule, 3962 * and the opcode is stored at the end of the rule 3963 * (but there are exceptions -- see below). 3964 * 3965 * In general, here we set retval and terminate the 3966 * outer loop (would be a 'break 3' in some language, 3967 * but we need to do a 'goto done'). 3968 * 3969 * Exceptions: 3970 * O_COUNT and O_SKIPTO actions: 3971 * instead of terminating, we jump to the next rule 3972 * ('goto next_rule', equivalent to a 'break 2'), 3973 * or to the SKIPTO target ('goto again' after 3974 * having set f, cmd and l), respectively. 3975 * 3976 * O_LIMIT and O_KEEP_STATE, O_REDIRECT: these opcodes 3977 * are not real 'actions', and are stored right 3978 * before the 'action' part of the rule. 3979 * These opcodes try to install an entry in the 3980 * state tables; if successful, we continue with 3981 * the next opcode (match=1; break;), otherwise 3982 * the packet must be dropped ('goto done' after 3983 * setting retval). If static rules are changed 3984 * during the state installation, the packet will 3985 * be dropped and rule's stats will not beupdated 3986 * ('return IP_FW_DENY'). 3987 * 3988 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 3989 * cause a lookup of the state table, and a jump 3990 * to the 'action' part of the parent rule 3991 * ('goto check_body') if an entry is found, or 3992 * (CHECK_STATE only) a jump to the next rule if 3993 * the entry is not found ('goto next_rule'). 3994 * The result of the lookup is cached to make 3995 * further instances of these opcodes are 3996 * effectively NOPs. If static rules are changed 3997 * during the state looking up, the packet will 3998 * be dropped and rule's stats will not be updated 3999 * ('return IP_FW_DENY'). 4000 */ 4001 case O_REDIRECT: 4002 if (f->cross_rules == NULL) { 4003 /* 4004 * This rule was not completely setup; 4005 * move on to the next rule. 4006 */ 4007 goto next_rule; 4008 } 4009 /* 4010 * Apply redirect only on input path and 4011 * only to non-fragment TCP segments or 4012 * UDP datagrams. 4013 * 4014 * Does _not_ work with layer2 filtering. 4015 */ 4016 if (oif != NULL || args->eh != NULL || 4017 (ip->ip_off & htons(IP_MF | IP_OFFMASK)) || 4018 (lc.proto != IPPROTO_TCP && 4019 lc.proto != IPPROTO_UDP)) 4020 break; 4021 /* FALL THROUGH */ 4022 case O_LIMIT: 4023 case O_KEEP_STATE: 4024 if (hlen == 0) 4025 break; 4026 s = ipfw_state_install(ctx, f, 4027 (ipfw_insn_limit *)cmd, args, lc.tcp); 4028 if (s == NULL) { 4029 retval = IP_FW_DENY; 4030 goto done; /* error/limit violation */ 4031 } 4032 s->st_pcnt++; 4033 s->st_bcnt += lc.ip_len; 4034 4035 if (s->st_type == O_REDIRECT) { 4036 struct in_addr oaddr; 4037 uint16_t oport; 4038 struct ipfw_xlat *slave_x, *x; 4039 struct ipfw_state *dup; 4040 4041 x = (struct ipfw_xlat *)s; 4042 ipfw_xlate(x, m, &oaddr, &oport); 4043 m = ipfw_rehashm(m, hlen, args, &lc, 4044 &ip); 4045 if (m == NULL) { 4046 ipfw_state_del(ctx, s); 4047 goto pullup_failed; 4048 } 4049 4050 cpuid = netisr_hashcpu( 4051 m->m_pkthdr.hash); 4052 4053 slave_x = (struct ipfw_xlat *) 4054 ipfw_state_alloc(ctx, &args->f_id, 4055 O_REDIRECT, f->cross_rules[cpuid], 4056 lc.tcp); 4057 if (slave_x == NULL) { 4058 ipfw_state_del(ctx, s); 4059 retval = IP_FW_DENY; 4060 goto done; 4061 } 4062 slave_x->xlat_addr = oaddr.s_addr; 4063 slave_x->xlat_port = oport; 4064 slave_x->xlat_dir = MATCH_REVERSE; 4065 slave_x->xlat_flags |= 4066 IPFW_STATE_F_XLATSRC | 4067 IPFW_STATE_F_XLATSLAVE; 4068 4069 slave_x->xlat_pair = x; 4070 slave_x->xlat_pcpu = mycpuid; 4071 x->xlat_pair = slave_x; 4072 x->xlat_pcpu = cpuid; 4073 4074 ctx->ipfw_xlated++; 4075 if (cpuid != mycpuid) { 4076 ctx->ipfw_xlate_split++; 4077 ipfw_xlate_redispatch( 4078 m, cpuid, x, 4079 IPFW_XLATE_INSERT | 4080 IPFW_XLATE_FORWARD); 4081 args->m = NULL; 4082 return (IP_FW_REDISPATCH); 4083 } 4084 4085 dup = ipfw_state_link(ctx, 4086 &slave_x->xlat_st); 4087 if (dup != NULL) { 4088 ctx->ipfw_xlate_conflicts++; 4089 if (IPFW_STATE_ISDEAD(dup)) { 4090 ipfw_state_remove(ctx, 4091 dup); 4092 dup = ipfw_state_link( 4093 ctx, &slave_x->xlat_st); 4094 } 4095 if (dup != NULL) { 4096 if (bootverbose) { 4097 kprintf("ipfw: " 4098 "slave %u state " 4099 "conflicts " 4100 "%u state\n", 4101 x->xlat_type, 4102 s->st_type); 4103 } 4104 ipfw_state_del(ctx, s); 4105 return (IP_FW_DENY); 4106 } 4107 ctx->ipfw_xlate_cresolved++; 4108 } 4109 } 4110 match = 1; 4111 break; 4112 4113 case O_PROBE_STATE: 4114 case O_CHECK_STATE: 4115 /* 4116 * States are checked at the first keep-state 4117 * check-state occurrence, with the result 4118 * being stored in dyn_dir. The compiler 4119 * introduces a PROBE_STATE instruction for 4120 * us when we have a KEEP_STATE/LIMIT/RDR 4121 * (because PROBE_STATE needs to be run first). 4122 */ 4123 s = NULL; 4124 if (dyn_dir == MATCH_UNKNOWN) { 4125 s = ipfw_state_lookup(ctx, 4126 &args->f_id, &dyn_dir, lc.tcp); 4127 } 4128 if (s == NULL || 4129 (s->st_type == O_REDIRECT && 4130 (args->eh != NULL || 4131 (ip->ip_off & htons(IP_MF | IP_OFFMASK)) || 4132 (lc.proto != IPPROTO_TCP && 4133 lc.proto != IPPROTO_UDP)))) { 4134 /* 4135 * State not found. If CHECK_STATE, 4136 * skip to next rule, if PROBE_STATE 4137 * just ignore and continue with next 4138 * opcode. 4139 */ 4140 if (cmd->opcode == O_CHECK_STATE) 4141 goto next_rule; 4142 match = 1; 4143 break; 4144 } 4145 4146 s->st_pcnt++; 4147 s->st_bcnt += lc.ip_len; 4148 4149 if (s->st_type == O_REDIRECT) { 4150 struct ipfw_xlat *x = 4151 (struct ipfw_xlat *)s; 4152 4153 if (oif != NULL && 4154 x->xlat_ifp == NULL) { 4155 KASSERT(x->xlat_flags & 4156 IPFW_STATE_F_XLATSLAVE, 4157 ("master rdr state " 4158 "missing ifp")); 4159 x->xlat_ifp = oif; 4160 } else if ( 4161 (oif != NULL && x->xlat_ifp!=oif) || 4162 (oif == NULL && 4163 x->xlat_ifp!=m->m_pkthdr.rcvif)) { 4164 retval = IP_FW_DENY; 4165 goto done; 4166 } 4167 if (x->xlat_dir != dyn_dir) 4168 goto skip_xlate; 4169 4170 ipfw_xlate(x, m, NULL, NULL); 4171 m = ipfw_rehashm(m, hlen, args, &lc, 4172 &ip); 4173 if (m == NULL) 4174 goto pullup_failed; 4175 4176 cpuid = netisr_hashcpu( 4177 m->m_pkthdr.hash); 4178 if (cpuid != mycpuid) { 4179 uint32_t xlate = 0; 4180 4181 if (oif != NULL) { 4182 xlate |= 4183 IPFW_XLATE_OUTPUT; 4184 } 4185 if (dyn_dir == MATCH_FORWARD) { 4186 xlate |= 4187 IPFW_XLATE_FORWARD; 4188 } 4189 ipfw_xlate_redispatch(m, cpuid, 4190 x, xlate); 4191 args->m = NULL; 4192 return (IP_FW_REDISPATCH); 4193 } 4194 4195 KKASSERT(x->xlat_pcpu == mycpuid); 4196 ipfw_state_update(&args->f_id, dyn_dir, 4197 lc.tcp, &x->xlat_pair->xlat_st); 4198 } 4199 skip_xlate: 4200 /* 4201 * Found a rule from a state; jump to the 4202 * 'action' part of the rule. 4203 */ 4204 f = s->st_rule; 4205 KKASSERT(f->cpuid == mycpuid); 4206 4207 cmd = ACTION_PTR(f); 4208 l = f->cmd_len - f->act_ofs; 4209 dyn_f = f; 4210 goto check_body; 4211 4212 case O_ACCEPT: 4213 retval = IP_FW_PASS; /* accept */ 4214 goto done; 4215 4216 case O_DEFRAG: 4217 if (f->cross_rules == NULL) { 4218 /* 4219 * This rule was not completely setup; 4220 * move on to the next rule. 4221 */ 4222 goto next_rule; 4223 } 4224 4225 /* 4226 * Don't defrag for l2 packets, output packets 4227 * or non-fragments. 4228 */ 4229 if (oif != NULL || args->eh != NULL || 4230 (ip->ip_off & htons(IP_MF | IP_OFFMASK)) == 0) 4231 goto next_rule; 4232 4233 ctx->ipfw_frags++; 4234 m = ip_reass(m); 4235 args->m = m; 4236 if (m == NULL) { 4237 retval = IP_FW_PASS; 4238 goto done; 4239 } 4240 ctx->ipfw_defraged++; 4241 KASSERT((m->m_flags & M_HASH) == 0, 4242 ("hash not cleared")); 4243 4244 /* Update statistics */ 4245 f->pcnt++; 4246 f->bcnt += lc.ip_len; 4247 f->timestamp = time_second; 4248 4249 ip = mtod(m, struct ip *); 4250 hlen = ip->ip_hl << 2; 4251 ip->ip_len = htons(ntohs(ip->ip_len) + hlen); 4252 4253 ip_hashfn(&m, 0); 4254 args->m = m; 4255 if (m == NULL) 4256 goto pullup_failed; 4257 4258 KASSERT(m->m_flags & M_HASH, ("no hash")); 4259 cpuid = netisr_hashcpu(m->m_pkthdr.hash); 4260 if (cpuid != mycpuid) { 4261 ctx->ipfw_defrag_remote++; 4262 ipfw_defrag_redispatch(m, cpuid, f); 4263 args->m = NULL; 4264 return (IP_FW_REDISPATCH); 4265 } 4266 4267 /* 'm' might be changed by ip_hashfn(). */ 4268 ip = mtod(m, struct ip *); 4269 4270 m = ipfw_setup_local(m, hlen, args, &lc, &ip); 4271 if (m == NULL) 4272 goto pullup_failed; 4273 4274 /* Move on. */ 4275 goto next_rule; 4276 4277 case O_PIPE: 4278 case O_QUEUE: 4279 args->rule = f; /* report matching rule */ 4280 args->cookie = cmd->arg1; 4281 retval = IP_FW_DUMMYNET; 4282 goto done; 4283 4284 case O_DIVERT: 4285 case O_TEE: 4286 if (args->eh) /* not on layer 2 */ 4287 break; 4288 4289 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT, 4290 sizeof(*divinfo), M_INTWAIT | M_NULLOK); 4291 if (mtag == NULL) { 4292 retval = IP_FW_DENY; 4293 goto done; 4294 } 4295 divinfo = m_tag_data(mtag); 4296 4297 divinfo->skipto = f->rulenum; 4298 divinfo->port = cmd->arg1; 4299 divinfo->tee = (cmd->opcode == O_TEE); 4300 m_tag_prepend(m, mtag); 4301 4302 args->cookie = cmd->arg1; 4303 retval = (cmd->opcode == O_DIVERT) ? 4304 IP_FW_DIVERT : IP_FW_TEE; 4305 goto done; 4306 4307 case O_COUNT: 4308 case O_SKIPTO: 4309 f->pcnt++; /* update stats */ 4310 f->bcnt += lc.ip_len; 4311 f->timestamp = time_second; 4312 if (cmd->opcode == O_COUNT) 4313 goto next_rule; 4314 /* handle skipto */ 4315 if (f->next_rule == NULL) 4316 lookup_next_rule(f); 4317 f = f->next_rule; 4318 goto again; 4319 4320 case O_REJECT: 4321 /* 4322 * Drop the packet and send a reject notice 4323 * if the packet is not ICMP (or is an ICMP 4324 * query), and it is not multicast/broadcast. 4325 */ 4326 if (hlen > 0 && 4327 (lc.proto != IPPROTO_ICMP || 4328 is_icmp_query(ip)) && 4329 !(m->m_flags & (M_BCAST|M_MCAST)) && 4330 !IN_MULTICAST(ntohl(lc.dst_ip.s_addr))) { 4331 send_reject(args, cmd->arg1, 4332 lc.offset, lc.ip_len); 4333 retval = IP_FW_DENY; 4334 goto done; 4335 } 4336 /* FALLTHROUGH */ 4337 case O_DENY: 4338 retval = IP_FW_DENY; 4339 goto done; 4340 4341 case O_FORWARD_IP: 4342 if (args->eh) /* not valid on layer2 pkts */ 4343 break; 4344 if (!dyn_f || dyn_dir == MATCH_FORWARD) { 4345 struct sockaddr_in *sin; 4346 4347 mtag = m_tag_get(PACKET_TAG_IPFORWARD, 4348 sizeof(*sin), M_INTWAIT | M_NULLOK); 4349 if (mtag == NULL) { 4350 retval = IP_FW_DENY; 4351 goto done; 4352 } 4353 sin = m_tag_data(mtag); 4354 4355 /* Structure copy */ 4356 *sin = ((ipfw_insn_sa *)cmd)->sa; 4357 4358 m_tag_prepend(m, mtag); 4359 m->m_pkthdr.fw_flags |= 4360 IPFORWARD_MBUF_TAGGED; 4361 m->m_pkthdr.fw_flags &= 4362 ~BRIDGE_MBUF_TAGGED; 4363 } 4364 retval = IP_FW_PASS; 4365 goto done; 4366 4367 default: 4368 panic("-- unknown opcode %d", cmd->opcode); 4369 } /* end of switch() on opcodes */ 4370 4371 if (cmd->len & F_NOT) 4372 match = !match; 4373 4374 if (match) { 4375 if (cmd->len & F_OR) 4376 skip_or = 1; 4377 } else { 4378 if (!(cmd->len & F_OR)) /* not an OR block, */ 4379 break; /* try next rule */ 4380 } 4381 4382 } /* end of inner for, scan opcodes */ 4383 4384 next_rule:; /* try next rule */ 4385 4386 } /* end of outer for, scan rules */ 4387 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n"); 4388 return IP_FW_DENY; 4389 4390 done: 4391 /* Update statistics */ 4392 f->pcnt++; 4393 f->bcnt += lc.ip_len; 4394 f->timestamp = time_second; 4395 return retval; 4396 4397 pullup_failed: 4398 if (fw_verbose) 4399 kprintf("pullup failed\n"); 4400 return IP_FW_DENY; 4401 } 4402 4403 static struct mbuf * 4404 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 4405 { 4406 struct m_tag *mtag; 4407 struct dn_pkt *pkt; 4408 ipfw_insn *cmd; 4409 const struct ipfw_flow_id *id; 4410 struct dn_flow_id *fid; 4411 4412 M_ASSERTPKTHDR(m); 4413 4414 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), 4415 M_INTWAIT | M_NULLOK); 4416 if (mtag == NULL) { 4417 m_freem(m); 4418 return (NULL); 4419 } 4420 m_tag_prepend(m, mtag); 4421 4422 pkt = m_tag_data(mtag); 4423 bzero(pkt, sizeof(*pkt)); 4424 4425 cmd = fwa->rule->cmd + fwa->rule->act_ofs; 4426 if (cmd->opcode == O_LOG) 4427 cmd += F_LEN(cmd); 4428 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, 4429 ("Rule is not PIPE or QUEUE, opcode %d", cmd->opcode)); 4430 4431 pkt->dn_m = m; 4432 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK); 4433 pkt->ifp = fwa->oif; 4434 pkt->pipe_nr = pipe_nr; 4435 4436 pkt->cpuid = mycpuid; 4437 pkt->msgport = netisr_curport(); 4438 4439 id = &fwa->f_id; 4440 fid = &pkt->id; 4441 fid->fid_dst_ip = id->dst_ip; 4442 fid->fid_src_ip = id->src_ip; 4443 fid->fid_dst_port = id->dst_port; 4444 fid->fid_src_port = id->src_port; 4445 fid->fid_proto = id->proto; 4446 fid->fid_flags = id->flags; 4447 4448 ipfw_ref_rule(fwa->rule); 4449 pkt->dn_priv = fwa->rule; 4450 pkt->dn_unref_priv = ipfw_unref_rule; 4451 4452 if (cmd->opcode == O_PIPE) 4453 pkt->dn_flags |= DN_FLAGS_IS_PIPE; 4454 4455 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED; 4456 return (m); 4457 } 4458 4459 /* 4460 * When a rule is added/deleted, clear the next_rule pointers in all rules. 4461 * These will be reconstructed on the fly as packets are matched. 4462 */ 4463 static void 4464 ipfw_flush_rule_ptrs(struct ipfw_context *ctx) 4465 { 4466 struct ip_fw *rule; 4467 4468 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 4469 rule->next_rule = NULL; 4470 } 4471 4472 static void 4473 ipfw_inc_static_count(struct ip_fw *rule) 4474 { 4475 /* Static rule's counts are updated only on CPU0 */ 4476 KKASSERT(mycpuid == 0); 4477 4478 static_count++; 4479 static_ioc_len += IOC_RULESIZE(rule); 4480 } 4481 4482 static void 4483 ipfw_dec_static_count(struct ip_fw *rule) 4484 { 4485 int l = IOC_RULESIZE(rule); 4486 4487 /* Static rule's counts are updated only on CPU0 */ 4488 KKASSERT(mycpuid == 0); 4489 4490 KASSERT(static_count > 0, ("invalid static count %u", static_count)); 4491 static_count--; 4492 4493 KASSERT(static_ioc_len >= l, 4494 ("invalid static len %u", static_ioc_len)); 4495 static_ioc_len -= l; 4496 } 4497 4498 static void 4499 ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule) 4500 { 4501 if (fwmsg->sibling != NULL) { 4502 KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1); 4503 fwmsg->sibling->sibling = rule; 4504 } 4505 fwmsg->sibling = rule; 4506 } 4507 4508 static struct ip_fw * 4509 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 4510 { 4511 struct ip_fw *rule; 4512 4513 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO); 4514 4515 rule->act_ofs = ioc_rule->act_ofs; 4516 rule->cmd_len = ioc_rule->cmd_len; 4517 rule->rulenum = ioc_rule->rulenum; 4518 rule->set = ioc_rule->set; 4519 rule->usr_flags = ioc_rule->usr_flags; 4520 4521 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */); 4522 4523 rule->refcnt = 1; 4524 rule->cpuid = mycpuid; 4525 rule->rule_flags = rule_flags; 4526 4527 return rule; 4528 } 4529 4530 static void 4531 ipfw_add_rule_dispatch(netmsg_t nmsg) 4532 { 4533 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 4534 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4535 struct ip_fw *rule; 4536 4537 ASSERT_NETISR_NCPUS(mycpuid); 4538 4539 rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->rule_flags); 4540 4541 /* 4542 * Insert rule into the pre-determined position 4543 */ 4544 if (fwmsg->prev_rule != NULL) { 4545 struct ip_fw *prev, *next; 4546 4547 prev = fwmsg->prev_rule; 4548 KKASSERT(prev->cpuid == mycpuid); 4549 4550 next = fwmsg->next_rule; 4551 KKASSERT(next->cpuid == mycpuid); 4552 4553 rule->next = next; 4554 prev->next = rule; 4555 4556 /* 4557 * Move to the position on the next CPU 4558 * before the msg is forwarded. 4559 */ 4560 fwmsg->prev_rule = prev->sibling; 4561 fwmsg->next_rule = next->sibling; 4562 } else { 4563 KKASSERT(fwmsg->next_rule == NULL); 4564 rule->next = ctx->ipfw_layer3_chain; 4565 ctx->ipfw_layer3_chain = rule; 4566 } 4567 4568 /* Link rule CPU sibling */ 4569 ipfw_link_sibling(fwmsg, rule); 4570 4571 ipfw_flush_rule_ptrs(ctx); 4572 4573 if (mycpuid == 0) { 4574 /* Statistics only need to be updated once */ 4575 ipfw_inc_static_count(rule); 4576 4577 /* Return the rule on CPU0 */ 4578 nmsg->lmsg.u.ms_resultp = rule; 4579 } 4580 4581 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) 4582 rule->track_ruleid = (uintptr_t)nmsg->lmsg.u.ms_resultp; 4583 4584 if (fwmsg->cross_rules != NULL) { 4585 /* Save rules for later use. */ 4586 fwmsg->cross_rules[mycpuid] = rule; 4587 } 4588 4589 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4590 } 4591 4592 static void 4593 ipfw_crossref_rule_dispatch(netmsg_t nmsg) 4594 { 4595 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 4596 struct ip_fw *rule = fwmsg->sibling; 4597 int sz = sizeof(struct ip_fw *) * netisr_ncpus; 4598 4599 ASSERT_NETISR_NCPUS(mycpuid); 4600 KASSERT(rule->rule_flags & IPFW_RULE_F_CROSSREF, 4601 ("not crossref rule")); 4602 4603 rule->cross_rules = kmalloc(sz, M_IPFW, M_WAITOK); 4604 memcpy(rule->cross_rules, fwmsg->cross_rules, sz); 4605 4606 fwmsg->sibling = rule->sibling; 4607 netisr_forwardmsg(&fwmsg->base, mycpuid + 1); 4608 } 4609 4610 /* 4611 * Add a new rule to the list. Copy the rule into a malloc'ed area, 4612 * then possibly create a rule number and add the rule to the list. 4613 * Update the rule_number in the input struct so the caller knows 4614 * it as well. 4615 */ 4616 static void 4617 ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 4618 { 4619 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4620 struct netmsg_ipfw fwmsg; 4621 struct ip_fw *f, *prev, *rule; 4622 4623 ASSERT_NETISR0; 4624 4625 /* 4626 * If rulenum is 0, find highest numbered rule before the 4627 * default rule, and add rule number incremental step. 4628 */ 4629 if (ioc_rule->rulenum == 0) { 4630 int step = autoinc_step; 4631 4632 KKASSERT(step >= IPFW_AUTOINC_STEP_MIN && 4633 step <= IPFW_AUTOINC_STEP_MAX); 4634 4635 /* 4636 * Locate the highest numbered rule before default 4637 */ 4638 for (f = ctx->ipfw_layer3_chain; f; f = f->next) { 4639 if (f->rulenum == IPFW_DEFAULT_RULE) 4640 break; 4641 ioc_rule->rulenum = f->rulenum; 4642 } 4643 if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step) 4644 ioc_rule->rulenum += step; 4645 } 4646 KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE && 4647 ioc_rule->rulenum != 0, 4648 ("invalid rule num %d", ioc_rule->rulenum)); 4649 4650 /* 4651 * Now find the right place for the new rule in the sorted list. 4652 */ 4653 for (prev = NULL, f = ctx->ipfw_layer3_chain; f; 4654 prev = f, f = f->next) { 4655 if (f->rulenum > ioc_rule->rulenum) { 4656 /* Found the location */ 4657 break; 4658 } 4659 } 4660 KASSERT(f != NULL, ("no default rule?!")); 4661 4662 /* 4663 * Duplicate the rule onto each CPU. 4664 * The rule duplicated on CPU0 will be returned. 4665 */ 4666 bzero(&fwmsg, sizeof(fwmsg)); 4667 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4668 ipfw_add_rule_dispatch); 4669 fwmsg.ioc_rule = ioc_rule; 4670 fwmsg.prev_rule = prev; 4671 fwmsg.next_rule = prev == NULL ? NULL : f; 4672 fwmsg.rule_flags = rule_flags; 4673 if (rule_flags & IPFW_RULE_F_CROSSREF) { 4674 fwmsg.cross_rules = kmalloc( 4675 sizeof(struct ip_fw *) * netisr_ncpus, M_TEMP, 4676 M_WAITOK | M_ZERO); 4677 } 4678 4679 netisr_domsg_global(&fwmsg.base); 4680 KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL); 4681 4682 rule = fwmsg.base.lmsg.u.ms_resultp; 4683 KKASSERT(rule != NULL && rule->cpuid == mycpuid); 4684 4685 if (fwmsg.cross_rules != NULL) { 4686 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, 4687 MSGF_PRIORITY, ipfw_crossref_rule_dispatch); 4688 fwmsg.sibling = rule; 4689 netisr_domsg_global(&fwmsg.base); 4690 KKASSERT(fwmsg.sibling == NULL); 4691 4692 kfree(fwmsg.cross_rules, M_TEMP); 4693 4694 #ifdef KLD_MODULE 4695 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1); 4696 #endif 4697 } 4698 4699 DPRINTF("++ installed rule %d, static count now %d\n", 4700 rule->rulenum, static_count); 4701 } 4702 4703 /* 4704 * Free storage associated with a static rule (including derived 4705 * states/tracks). 4706 * The caller is in charge of clearing rule pointers to avoid 4707 * dangling pointers. 4708 * @return a pointer to the next entry. 4709 * Arguments are not checked, so they better be correct. 4710 */ 4711 static struct ip_fw * 4712 ipfw_delete_rule(struct ipfw_context *ctx, 4713 struct ip_fw *prev, struct ip_fw *rule) 4714 { 4715 struct ip_fw *n; 4716 4717 n = rule->next; 4718 if (prev == NULL) 4719 ctx->ipfw_layer3_chain = n; 4720 else 4721 prev->next = n; 4722 4723 /* Mark the rule as invalid */ 4724 rule->rule_flags |= IPFW_RULE_F_INVALID; 4725 rule->next_rule = NULL; 4726 rule->sibling = NULL; 4727 #ifdef foo 4728 /* Don't reset cpuid here; keep various assertion working */ 4729 rule->cpuid = -1; 4730 #endif 4731 4732 /* Statistics only need to be updated once */ 4733 if (mycpuid == 0) 4734 ipfw_dec_static_count(rule); 4735 4736 if ((rule->rule_flags & IPFW_RULE_F_CROSSREF) == 0) { 4737 /* Try to free this rule */ 4738 ipfw_free_rule(rule); 4739 } else { 4740 /* TODO: check staging area. */ 4741 if (mycpuid == 0) { 4742 rule->next = ipfw_gd.ipfw_crossref_free; 4743 ipfw_gd.ipfw_crossref_free = rule; 4744 } 4745 } 4746 4747 /* Return the next rule */ 4748 return n; 4749 } 4750 4751 static void 4752 ipfw_flush_dispatch(netmsg_t nmsg) 4753 { 4754 int kill_default = nmsg->lmsg.u.ms_result; 4755 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4756 struct ip_fw *rule; 4757 4758 ASSERT_NETISR_NCPUS(mycpuid); 4759 4760 /* 4761 * Flush states. 4762 */ 4763 ipfw_state_flush(ctx, NULL); 4764 KASSERT(ctx->ipfw_state_cnt == 0, 4765 ("%d pcpu states remain", ctx->ipfw_state_cnt)); 4766 ctx->ipfw_state_loosecnt = 0; 4767 ctx->ipfw_state_lastexp = 0; 4768 4769 /* 4770 * Flush tracks. 4771 */ 4772 ipfw_track_flush(ctx, NULL); 4773 ctx->ipfw_track_lastexp = 0; 4774 if (ctx->ipfw_trkcnt_spare != NULL) { 4775 kfree(ctx->ipfw_trkcnt_spare, M_IPFW); 4776 ctx->ipfw_trkcnt_spare = NULL; 4777 } 4778 4779 ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */ 4780 4781 while ((rule = ctx->ipfw_layer3_chain) != NULL && 4782 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE)) 4783 ipfw_delete_rule(ctx, NULL, rule); 4784 4785 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4786 } 4787 4788 /* 4789 * Deletes all rules from a chain (including the default rule 4790 * if the second argument is set). 4791 */ 4792 static void 4793 ipfw_flush(int kill_default) 4794 { 4795 struct netmsg_base nmsg; 4796 #ifdef INVARIANTS 4797 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4798 int state_cnt; 4799 #endif 4800 4801 ASSERT_NETISR0; 4802 4803 /* 4804 * If 'kill_default' then caller has done the necessary 4805 * msgport syncing; unnecessary to do it again. 4806 */ 4807 if (!kill_default) { 4808 /* 4809 * Let ipfw_chk() know the rules are going to 4810 * be flushed, so it could jump directly to 4811 * the default rule. 4812 */ 4813 ipfw_flushing = 1; 4814 /* XXX use priority sync */ 4815 netmsg_service_sync(); 4816 } 4817 4818 /* 4819 * Press the 'flush' button 4820 */ 4821 bzero(&nmsg, sizeof(nmsg)); 4822 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4823 ipfw_flush_dispatch); 4824 nmsg.lmsg.u.ms_result = kill_default; 4825 netisr_domsg_global(&nmsg); 4826 ipfw_gd.ipfw_state_loosecnt = 0; 4827 ipfw_gd.ipfw_state_globexp = 0; 4828 ipfw_gd.ipfw_track_globexp = 0; 4829 4830 #ifdef INVARIANTS 4831 state_cnt = ipfw_state_cntcoll(); 4832 KASSERT(state_cnt == 0, ("%d states remain", state_cnt)); 4833 4834 KASSERT(ipfw_gd.ipfw_trkcnt_cnt == 0, 4835 ("%d trkcnts remain", ipfw_gd.ipfw_trkcnt_cnt)); 4836 4837 if (kill_default) { 4838 KASSERT(static_count == 0, 4839 ("%u static rules remain", static_count)); 4840 KASSERT(static_ioc_len == 0, 4841 ("%u bytes of static rules remain", static_ioc_len)); 4842 } else { 4843 KASSERT(static_count == 1, 4844 ("%u static rules remain", static_count)); 4845 KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule), 4846 ("%u bytes of static rules remain, should be %lu", 4847 static_ioc_len, 4848 (u_long)IOC_RULESIZE(ctx->ipfw_default_rule))); 4849 } 4850 #endif 4851 4852 /* Flush is done */ 4853 ipfw_flushing = 0; 4854 } 4855 4856 static void 4857 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg) 4858 { 4859 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4860 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4861 struct ip_fw *rule, *prev; 4862 4863 ASSERT_NETISR_NCPUS(mycpuid); 4864 4865 rule = dmsg->start_rule; 4866 KKASSERT(rule->cpuid == mycpuid); 4867 dmsg->start_rule = rule->sibling; 4868 4869 prev = dmsg->prev_rule; 4870 if (prev != NULL) { 4871 KKASSERT(prev->cpuid == mycpuid); 4872 4873 /* 4874 * Move to the position on the next CPU 4875 * before the msg is forwarded. 4876 */ 4877 dmsg->prev_rule = prev->sibling; 4878 } 4879 4880 /* 4881 * flush pointers outside the loop, then delete all matching 4882 * rules. 'prev' remains the same throughout the cycle. 4883 */ 4884 ipfw_flush_rule_ptrs(ctx); 4885 while (rule && rule->rulenum == dmsg->rulenum) { 4886 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) { 4887 /* Flush states generated by this rule. */ 4888 ipfw_state_flush(ctx, rule); 4889 } 4890 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) { 4891 /* Flush tracks generated by this rule. */ 4892 ipfw_track_flush(ctx, rule); 4893 } 4894 rule = ipfw_delete_rule(ctx, prev, rule); 4895 } 4896 4897 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4898 } 4899 4900 static int 4901 ipfw_alt_delete_rule(uint16_t rulenum) 4902 { 4903 struct ip_fw *prev, *rule; 4904 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4905 struct netmsg_del dmsg; 4906 4907 ASSERT_NETISR0; 4908 4909 /* 4910 * Locate first rule to delete 4911 */ 4912 for (prev = NULL, rule = ctx->ipfw_layer3_chain; 4913 rule && rule->rulenum < rulenum; 4914 prev = rule, rule = rule->next) 4915 ; /* EMPTY */ 4916 if (rule->rulenum != rulenum) 4917 return EINVAL; 4918 4919 /* 4920 * Get rid of the rule duplications on all CPUs 4921 */ 4922 bzero(&dmsg, sizeof(dmsg)); 4923 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4924 ipfw_alt_delete_rule_dispatch); 4925 dmsg.prev_rule = prev; 4926 dmsg.start_rule = rule; 4927 dmsg.rulenum = rulenum; 4928 4929 netisr_domsg_global(&dmsg.base); 4930 KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL); 4931 return 0; 4932 } 4933 4934 static void 4935 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg) 4936 { 4937 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4938 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4939 struct ip_fw *prev, *rule; 4940 #ifdef INVARIANTS 4941 int del = 0; 4942 #endif 4943 4944 ASSERT_NETISR_NCPUS(mycpuid); 4945 4946 ipfw_flush_rule_ptrs(ctx); 4947 4948 prev = NULL; 4949 rule = ctx->ipfw_layer3_chain; 4950 while (rule != NULL) { 4951 if (rule->set == dmsg->from_set) { 4952 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) { 4953 /* Flush states generated by this rule. */ 4954 ipfw_state_flush(ctx, rule); 4955 } 4956 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) { 4957 /* Flush tracks generated by this rule. */ 4958 ipfw_track_flush(ctx, rule); 4959 } 4960 rule = ipfw_delete_rule(ctx, prev, rule); 4961 #ifdef INVARIANTS 4962 del = 1; 4963 #endif 4964 } else { 4965 prev = rule; 4966 rule = rule->next; 4967 } 4968 } 4969 KASSERT(del, ("no match set?!")); 4970 4971 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4972 } 4973 4974 static int 4975 ipfw_alt_delete_ruleset(uint8_t set) 4976 { 4977 struct netmsg_del dmsg; 4978 int del; 4979 struct ip_fw *rule; 4980 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4981 4982 ASSERT_NETISR0; 4983 4984 /* 4985 * Check whether the 'set' exists. If it exists, 4986 * then check whether any rules within the set will 4987 * try to create states. 4988 */ 4989 del = 0; 4990 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 4991 if (rule->set == set) 4992 del = 1; 4993 } 4994 if (!del) 4995 return 0; /* XXX EINVAL? */ 4996 4997 /* 4998 * Delete this set 4999 */ 5000 bzero(&dmsg, sizeof(dmsg)); 5001 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5002 ipfw_alt_delete_ruleset_dispatch); 5003 dmsg.from_set = set; 5004 netisr_domsg_global(&dmsg.base); 5005 5006 return 0; 5007 } 5008 5009 static void 5010 ipfw_alt_move_rule_dispatch(netmsg_t nmsg) 5011 { 5012 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5013 struct ip_fw *rule; 5014 5015 ASSERT_NETISR_NCPUS(mycpuid); 5016 5017 rule = dmsg->start_rule; 5018 KKASSERT(rule->cpuid == mycpuid); 5019 5020 /* 5021 * Move to the position on the next CPU 5022 * before the msg is forwarded. 5023 */ 5024 dmsg->start_rule = rule->sibling; 5025 5026 while (rule && rule->rulenum <= dmsg->rulenum) { 5027 if (rule->rulenum == dmsg->rulenum) 5028 rule->set = dmsg->to_set; 5029 rule = rule->next; 5030 } 5031 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5032 } 5033 5034 static int 5035 ipfw_alt_move_rule(uint16_t rulenum, uint8_t set) 5036 { 5037 struct netmsg_del dmsg; 5038 struct netmsg_base *nmsg; 5039 struct ip_fw *rule; 5040 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5041 5042 ASSERT_NETISR0; 5043 5044 /* 5045 * Locate first rule to move 5046 */ 5047 for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum; 5048 rule = rule->next) { 5049 if (rule->rulenum == rulenum && rule->set != set) 5050 break; 5051 } 5052 if (rule == NULL || rule->rulenum > rulenum) 5053 return 0; /* XXX error? */ 5054 5055 bzero(&dmsg, sizeof(dmsg)); 5056 nmsg = &dmsg.base; 5057 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5058 ipfw_alt_move_rule_dispatch); 5059 dmsg.start_rule = rule; 5060 dmsg.rulenum = rulenum; 5061 dmsg.to_set = set; 5062 5063 netisr_domsg_global(nmsg); 5064 KKASSERT(dmsg.start_rule == NULL); 5065 return 0; 5066 } 5067 5068 static void 5069 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg) 5070 { 5071 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5072 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5073 struct ip_fw *rule; 5074 5075 ASSERT_NETISR_NCPUS(mycpuid); 5076 5077 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5078 if (rule->set == dmsg->from_set) 5079 rule->set = dmsg->to_set; 5080 } 5081 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5082 } 5083 5084 static int 5085 ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set) 5086 { 5087 struct netmsg_del dmsg; 5088 struct netmsg_base *nmsg; 5089 5090 ASSERT_NETISR0; 5091 5092 bzero(&dmsg, sizeof(dmsg)); 5093 nmsg = &dmsg.base; 5094 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5095 ipfw_alt_move_ruleset_dispatch); 5096 dmsg.from_set = from_set; 5097 dmsg.to_set = to_set; 5098 5099 netisr_domsg_global(nmsg); 5100 return 0; 5101 } 5102 5103 static void 5104 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg) 5105 { 5106 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5107 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5108 struct ip_fw *rule; 5109 5110 ASSERT_NETISR_NCPUS(mycpuid); 5111 5112 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5113 if (rule->set == dmsg->from_set) 5114 rule->set = dmsg->to_set; 5115 else if (rule->set == dmsg->to_set) 5116 rule->set = dmsg->from_set; 5117 } 5118 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5119 } 5120 5121 static int 5122 ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2) 5123 { 5124 struct netmsg_del dmsg; 5125 struct netmsg_base *nmsg; 5126 5127 ASSERT_NETISR0; 5128 5129 bzero(&dmsg, sizeof(dmsg)); 5130 nmsg = &dmsg.base; 5131 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5132 ipfw_alt_swap_ruleset_dispatch); 5133 dmsg.from_set = set1; 5134 dmsg.to_set = set2; 5135 5136 netisr_domsg_global(nmsg); 5137 return 0; 5138 } 5139 5140 /* 5141 * Remove all rules with given number, and also do set manipulation. 5142 * 5143 * The argument is an uint32_t. The low 16 bit are the rule or set number, 5144 * the next 8 bits are the new set, the top 8 bits are the command: 5145 * 5146 * 0 delete rules with given number 5147 * 1 delete rules with given set number 5148 * 2 move rules with given number to new set 5149 * 3 move rules with given set number to new set 5150 * 4 swap sets with given numbers 5151 */ 5152 static int 5153 ipfw_ctl_alter(uint32_t arg) 5154 { 5155 uint16_t rulenum; 5156 uint8_t cmd, new_set; 5157 int error = 0; 5158 5159 ASSERT_NETISR0; 5160 5161 rulenum = arg & 0xffff; 5162 cmd = (arg >> 24) & 0xff; 5163 new_set = (arg >> 16) & 0xff; 5164 5165 if (cmd > 4) 5166 return EINVAL; 5167 if (new_set >= IPFW_DEFAULT_SET) 5168 return EINVAL; 5169 if (cmd == 0 || cmd == 2) { 5170 if (rulenum == IPFW_DEFAULT_RULE) 5171 return EINVAL; 5172 } else { 5173 if (rulenum >= IPFW_DEFAULT_SET) 5174 return EINVAL; 5175 } 5176 5177 switch (cmd) { 5178 case 0: /* delete rules with given number */ 5179 error = ipfw_alt_delete_rule(rulenum); 5180 break; 5181 5182 case 1: /* delete all rules with given set number */ 5183 error = ipfw_alt_delete_ruleset(rulenum); 5184 break; 5185 5186 case 2: /* move rules with given number to new set */ 5187 error = ipfw_alt_move_rule(rulenum, new_set); 5188 break; 5189 5190 case 3: /* move rules with given set number to new set */ 5191 error = ipfw_alt_move_ruleset(rulenum, new_set); 5192 break; 5193 5194 case 4: /* swap two sets */ 5195 error = ipfw_alt_swap_ruleset(rulenum, new_set); 5196 break; 5197 } 5198 return error; 5199 } 5200 5201 /* 5202 * Clear counters for a specific rule. 5203 */ 5204 static void 5205 clear_counters(struct ip_fw *rule, int log_only) 5206 { 5207 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); 5208 5209 if (log_only == 0) { 5210 rule->bcnt = rule->pcnt = 0; 5211 rule->timestamp = 0; 5212 } 5213 if (l->o.opcode == O_LOG) 5214 l->log_left = l->max_log; 5215 } 5216 5217 static void 5218 ipfw_zero_entry_dispatch(netmsg_t nmsg) 5219 { 5220 struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg; 5221 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5222 struct ip_fw *rule; 5223 5224 ASSERT_NETISR_NCPUS(mycpuid); 5225 5226 if (zmsg->rulenum == 0) { 5227 KKASSERT(zmsg->start_rule == NULL); 5228 5229 ctx->ipfw_norule_counter = 0; 5230 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 5231 clear_counters(rule, zmsg->log_only); 5232 } else { 5233 struct ip_fw *start = zmsg->start_rule; 5234 5235 KKASSERT(start->cpuid == mycpuid); 5236 KKASSERT(start->rulenum == zmsg->rulenum); 5237 5238 /* 5239 * We can have multiple rules with the same number, so we 5240 * need to clear them all. 5241 */ 5242 for (rule = start; rule && rule->rulenum == zmsg->rulenum; 5243 rule = rule->next) 5244 clear_counters(rule, zmsg->log_only); 5245 5246 /* 5247 * Move to the position on the next CPU 5248 * before the msg is forwarded. 5249 */ 5250 zmsg->start_rule = start->sibling; 5251 } 5252 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5253 } 5254 5255 /* 5256 * Reset some or all counters on firewall rules. 5257 * @arg frwl is null to clear all entries, or contains a specific 5258 * rule number. 5259 * @arg log_only is 1 if we only want to reset logs, zero otherwise. 5260 */ 5261 static int 5262 ipfw_ctl_zero_entry(int rulenum, int log_only) 5263 { 5264 struct netmsg_zent zmsg; 5265 struct netmsg_base *nmsg; 5266 const char *msg; 5267 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5268 5269 ASSERT_NETISR0; 5270 5271 bzero(&zmsg, sizeof(zmsg)); 5272 nmsg = &zmsg.base; 5273 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5274 ipfw_zero_entry_dispatch); 5275 zmsg.log_only = log_only; 5276 5277 if (rulenum == 0) { 5278 msg = log_only ? "ipfw: All logging counts reset.\n" 5279 : "ipfw: Accounting cleared.\n"; 5280 } else { 5281 struct ip_fw *rule; 5282 5283 /* 5284 * Locate the first rule with 'rulenum' 5285 */ 5286 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5287 if (rule->rulenum == rulenum) 5288 break; 5289 } 5290 if (rule == NULL) /* we did not find any matching rules */ 5291 return (EINVAL); 5292 zmsg.start_rule = rule; 5293 zmsg.rulenum = rulenum; 5294 5295 msg = log_only ? "ipfw: Entry %d logging count reset.\n" 5296 : "ipfw: Entry %d cleared.\n"; 5297 } 5298 netisr_domsg_global(nmsg); 5299 KKASSERT(zmsg.start_rule == NULL); 5300 5301 if (fw_verbose) 5302 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); 5303 return (0); 5304 } 5305 5306 /* 5307 * Check validity of the structure before insert. 5308 * Fortunately rules are simple, so this mostly need to check rule sizes. 5309 */ 5310 static int 5311 ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags) 5312 { 5313 int l, cmdlen = 0; 5314 int have_action = 0; 5315 ipfw_insn *cmd; 5316 5317 *rule_flags = 0; 5318 5319 /* Check for valid size */ 5320 if (size < sizeof(*rule)) { 5321 kprintf("ipfw: rule too short\n"); 5322 return EINVAL; 5323 } 5324 l = IOC_RULESIZE(rule); 5325 if (l != size) { 5326 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l); 5327 return EINVAL; 5328 } 5329 5330 /* Check rule number */ 5331 if (rule->rulenum == IPFW_DEFAULT_RULE) { 5332 kprintf("ipfw: invalid rule number\n"); 5333 return EINVAL; 5334 } 5335 5336 /* 5337 * Now go for the individual checks. Very simple ones, basically only 5338 * instruction sizes. 5339 */ 5340 for (l = rule->cmd_len, cmd = rule->cmd; l > 0; 5341 l -= cmdlen, cmd += cmdlen) { 5342 cmdlen = F_LEN(cmd); 5343 if (cmdlen > l) { 5344 kprintf("ipfw: opcode %d size truncated\n", 5345 cmd->opcode); 5346 return EINVAL; 5347 } 5348 5349 DPRINTF("ipfw: opcode %d\n", cmd->opcode); 5350 5351 if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT || 5352 IPFW_ISXLAT(cmd->opcode)) { 5353 /* This rule will generate states. */ 5354 *rule_flags |= IPFW_RULE_F_GENSTATE; 5355 if (cmd->opcode == O_LIMIT) 5356 *rule_flags |= IPFW_RULE_F_GENTRACK; 5357 } 5358 if (cmd->opcode == O_DEFRAG || IPFW_ISXLAT(cmd->opcode)) 5359 *rule_flags |= IPFW_RULE_F_CROSSREF; 5360 if (cmd->opcode == O_IP_SRC_IFIP || 5361 cmd->opcode == O_IP_DST_IFIP) { 5362 *rule_flags |= IPFW_RULE_F_DYNIFADDR; 5363 cmd->arg1 &= IPFW_IFIP_SETTINGS; 5364 } 5365 5366 switch (cmd->opcode) { 5367 case O_NOP: 5368 case O_PROBE_STATE: 5369 case O_KEEP_STATE: 5370 case O_PROTO: 5371 case O_IP_SRC_ME: 5372 case O_IP_DST_ME: 5373 case O_LAYER2: 5374 case O_IN: 5375 case O_FRAG: 5376 case O_IPFRAG: 5377 case O_IPOPT: 5378 case O_IPLEN: 5379 case O_IPID: 5380 case O_IPTOS: 5381 case O_IPPRECEDENCE: 5382 case O_IPTTL: 5383 case O_IPVER: 5384 case O_TCPWIN: 5385 case O_TCPFLAGS: 5386 case O_TCPOPTS: 5387 case O_ESTAB: 5388 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5389 goto bad_size; 5390 break; 5391 5392 case O_IP_SRC_TABLE: 5393 case O_IP_DST_TABLE: 5394 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5395 goto bad_size; 5396 if (cmd->arg1 >= ipfw_table_max) { 5397 kprintf("ipfw: invalid table id %u, max %d\n", 5398 cmd->arg1, ipfw_table_max); 5399 return EINVAL; 5400 } 5401 break; 5402 5403 case O_IP_SRC_IFIP: 5404 case O_IP_DST_IFIP: 5405 if (cmdlen != F_INSN_SIZE(ipfw_insn_ifip)) 5406 goto bad_size; 5407 break; 5408 5409 case O_ICMPCODE: 5410 case O_ICMPTYPE: 5411 if (cmdlen < F_INSN_SIZE(ipfw_insn_u32)) 5412 goto bad_size; 5413 break; 5414 5415 case O_UID: 5416 case O_GID: 5417 case O_IP_SRC: 5418 case O_IP_DST: 5419 case O_TCPSEQ: 5420 case O_TCPACK: 5421 case O_PROB: 5422 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 5423 goto bad_size; 5424 break; 5425 5426 case O_LIMIT: 5427 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) 5428 goto bad_size; 5429 break; 5430 case O_REDIRECT: 5431 if (cmdlen != F_INSN_SIZE(ipfw_insn_rdr)) 5432 goto bad_size; 5433 break; 5434 5435 case O_LOG: 5436 if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) 5437 goto bad_size; 5438 5439 ((ipfw_insn_log *)cmd)->log_left = 5440 ((ipfw_insn_log *)cmd)->max_log; 5441 5442 break; 5443 5444 case O_IP_SRC_MASK: 5445 case O_IP_DST_MASK: 5446 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip)) 5447 goto bad_size; 5448 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) { 5449 kprintf("ipfw: opcode %d, useless rule\n", 5450 cmd->opcode); 5451 return EINVAL; 5452 } 5453 break; 5454 5455 case O_IP_SRC_SET: 5456 case O_IP_DST_SET: 5457 if (cmd->arg1 == 0 || cmd->arg1 > 256) { 5458 kprintf("ipfw: invalid set size %d\n", 5459 cmd->arg1); 5460 return EINVAL; 5461 } 5462 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 5463 (cmd->arg1+31)/32 ) 5464 goto bad_size; 5465 break; 5466 5467 case O_MACADDR2: 5468 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) 5469 goto bad_size; 5470 break; 5471 5472 case O_MAC_TYPE: 5473 case O_IP_SRCPORT: 5474 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ 5475 if (cmdlen < 2 || cmdlen > 31) 5476 goto bad_size; 5477 break; 5478 5479 case O_RECV: 5480 case O_XMIT: 5481 case O_VIA: 5482 if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) 5483 goto bad_size; 5484 break; 5485 5486 case O_PIPE: 5487 case O_QUEUE: 5488 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) 5489 goto bad_size; 5490 goto check_action; 5491 5492 case O_FORWARD_IP: 5493 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) { 5494 goto bad_size; 5495 } else { 5496 in_addr_t fwd_addr; 5497 5498 fwd_addr = ((ipfw_insn_sa *)cmd)-> 5499 sa.sin_addr.s_addr; 5500 if (IN_MULTICAST(ntohl(fwd_addr))) { 5501 kprintf("ipfw: try forwarding to " 5502 "multicast address\n"); 5503 return EINVAL; 5504 } 5505 } 5506 goto check_action; 5507 5508 case O_FORWARD_MAC: /* XXX not implemented yet */ 5509 case O_CHECK_STATE: 5510 case O_COUNT: 5511 case O_ACCEPT: 5512 case O_DENY: 5513 case O_REJECT: 5514 case O_SKIPTO: 5515 case O_DIVERT: 5516 case O_TEE: 5517 case O_DEFRAG: 5518 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5519 goto bad_size; 5520 check_action: 5521 if (have_action) { 5522 kprintf("ipfw: opcode %d, multiple actions" 5523 " not allowed\n", 5524 cmd->opcode); 5525 return EINVAL; 5526 } 5527 have_action = 1; 5528 if (l != cmdlen) { 5529 kprintf("ipfw: opcode %d, action must be" 5530 " last opcode\n", 5531 cmd->opcode); 5532 return EINVAL; 5533 } 5534 break; 5535 default: 5536 kprintf("ipfw: opcode %d, unknown opcode\n", 5537 cmd->opcode); 5538 return EINVAL; 5539 } 5540 } 5541 if (have_action == 0) { 5542 kprintf("ipfw: missing action\n"); 5543 return EINVAL; 5544 } 5545 return 0; 5546 5547 bad_size: 5548 kprintf("ipfw: opcode %d size %d wrong\n", 5549 cmd->opcode, cmdlen); 5550 return EINVAL; 5551 } 5552 5553 static int 5554 ipfw_ctl_add_rule(struct sockopt *sopt) 5555 { 5556 struct ipfw_ioc_rule *ioc_rule; 5557 size_t size; 5558 uint32_t rule_flags; 5559 int error; 5560 5561 ASSERT_NETISR0; 5562 5563 size = sopt->sopt_valsize; 5564 if (size > (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX) || 5565 size < sizeof(*ioc_rule)) { 5566 return EINVAL; 5567 } 5568 if (size != (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX)) { 5569 sopt->sopt_val = krealloc(sopt->sopt_val, sizeof(uint32_t) * 5570 IPFW_RULE_SIZE_MAX, M_TEMP, M_WAITOK); 5571 } 5572 ioc_rule = sopt->sopt_val; 5573 5574 error = ipfw_check_ioc_rule(ioc_rule, size, &rule_flags); 5575 if (error) 5576 return error; 5577 5578 ipfw_add_rule(ioc_rule, rule_flags); 5579 5580 if (sopt->sopt_dir == SOPT_GET) 5581 sopt->sopt_valsize = IOC_RULESIZE(ioc_rule); 5582 return 0; 5583 } 5584 5585 static void * 5586 ipfw_copy_rule(const struct ipfw_context *ctx, const struct ip_fw *rule, 5587 struct ipfw_ioc_rule *ioc_rule) 5588 { 5589 const struct ip_fw *sibling; 5590 #ifdef INVARIANTS 5591 int i; 5592 #endif 5593 5594 ASSERT_NETISR0; 5595 KASSERT(rule->cpuid == 0, ("rule does not belong to cpu0")); 5596 5597 ioc_rule->act_ofs = rule->act_ofs; 5598 ioc_rule->cmd_len = rule->cmd_len; 5599 ioc_rule->rulenum = rule->rulenum; 5600 ioc_rule->set = rule->set; 5601 ioc_rule->usr_flags = rule->usr_flags; 5602 5603 ioc_rule->set_disable = ctx->ipfw_set_disable; 5604 ioc_rule->static_count = static_count; 5605 ioc_rule->static_len = static_ioc_len; 5606 5607 /* 5608 * Visit (read-only) all of the rule's duplications to get 5609 * the necessary statistics 5610 */ 5611 #ifdef INVARIANTS 5612 i = 0; 5613 #endif 5614 ioc_rule->pcnt = 0; 5615 ioc_rule->bcnt = 0; 5616 ioc_rule->timestamp = 0; 5617 for (sibling = rule; sibling != NULL; sibling = sibling->sibling) { 5618 ioc_rule->pcnt += sibling->pcnt; 5619 ioc_rule->bcnt += sibling->bcnt; 5620 if (sibling->timestamp > ioc_rule->timestamp) 5621 ioc_rule->timestamp = sibling->timestamp; 5622 #ifdef INVARIANTS 5623 ++i; 5624 #endif 5625 } 5626 KASSERT(i == netisr_ncpus, 5627 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus)); 5628 5629 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */); 5630 5631 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule)); 5632 } 5633 5634 static boolean_t 5635 ipfw_track_copy(const struct ipfw_trkcnt *trk, struct ipfw_ioc_state *ioc_state) 5636 { 5637 struct ipfw_ioc_flowid *ioc_id; 5638 5639 if (trk->tc_expire == 0) { 5640 /* Not a scanned one. */ 5641 return (FALSE); 5642 } 5643 5644 ioc_state->expire = TIME_LEQ(trk->tc_expire, time_uptime) ? 5645 0 : trk->tc_expire - time_uptime; 5646 ioc_state->pcnt = 0; 5647 ioc_state->bcnt = 0; 5648 5649 ioc_state->dyn_type = O_LIMIT_PARENT; 5650 ioc_state->count = trk->tc_count; 5651 5652 ioc_state->rulenum = trk->tc_rulenum; 5653 5654 ioc_id = &ioc_state->id; 5655 ioc_id->type = ETHERTYPE_IP; 5656 ioc_id->u.ip.proto = trk->tc_proto; 5657 ioc_id->u.ip.src_ip = trk->tc_saddr; 5658 ioc_id->u.ip.dst_ip = trk->tc_daddr; 5659 ioc_id->u.ip.src_port = trk->tc_sport; 5660 ioc_id->u.ip.dst_port = trk->tc_dport; 5661 5662 return (TRUE); 5663 } 5664 5665 static boolean_t 5666 ipfw_state_copy(const struct ipfw_state *s, struct ipfw_ioc_state *ioc_state) 5667 { 5668 struct ipfw_ioc_flowid *ioc_id; 5669 5670 if (IPFW_STATE_SCANSKIP(s)) 5671 return (FALSE); 5672 5673 ioc_state->expire = TIME_LEQ(s->st_expire, time_uptime) ? 5674 0 : s->st_expire - time_uptime; 5675 ioc_state->pcnt = s->st_pcnt; 5676 ioc_state->bcnt = s->st_bcnt; 5677 5678 ioc_state->dyn_type = s->st_type; 5679 ioc_state->count = 0; 5680 5681 ioc_state->rulenum = s->st_rule->rulenum; 5682 5683 ioc_id = &ioc_state->id; 5684 ioc_id->type = ETHERTYPE_IP; 5685 ioc_id->u.ip.proto = s->st_proto; 5686 ipfw_key_4tuple(&s->st_key, 5687 &ioc_id->u.ip.src_ip, &ioc_id->u.ip.src_port, 5688 &ioc_id->u.ip.dst_ip, &ioc_id->u.ip.dst_port); 5689 5690 if (IPFW_ISXLAT(s->st_type)) { 5691 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s; 5692 5693 if (x->xlat_port == 0) 5694 ioc_state->xlat_port = ioc_id->u.ip.dst_port; 5695 else 5696 ioc_state->xlat_port = ntohs(x->xlat_port); 5697 ioc_state->xlat_addr = ntohl(x->xlat_addr); 5698 5699 ioc_state->pcnt += x->xlat_pair->xlat_pcnt; 5700 ioc_state->bcnt += x->xlat_pair->xlat_bcnt; 5701 } 5702 5703 return (TRUE); 5704 } 5705 5706 static void 5707 ipfw_state_copy_dispatch(netmsg_t nmsg) 5708 { 5709 struct netmsg_cpstate *nm = (struct netmsg_cpstate *)nmsg; 5710 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5711 const struct ipfw_state *s; 5712 const struct ipfw_track *t; 5713 5714 ASSERT_NETISR_NCPUS(mycpuid); 5715 KASSERT(nm->state_cnt < nm->state_cntmax, 5716 ("invalid state count %d, max %d", 5717 nm->state_cnt, nm->state_cntmax)); 5718 5719 TAILQ_FOREACH(s, &ctx->ipfw_state_list, st_link) { 5720 if (ipfw_state_copy(s, nm->ioc_state)) { 5721 nm->ioc_state++; 5722 nm->state_cnt++; 5723 if (nm->state_cnt == nm->state_cntmax) 5724 goto done; 5725 } 5726 } 5727 5728 /* 5729 * Prepare tracks in the global track tree for userland. 5730 */ 5731 TAILQ_FOREACH(t, &ctx->ipfw_track_list, t_link) { 5732 struct ipfw_trkcnt *trk; 5733 5734 if (t->t_count == NULL) /* anchor */ 5735 continue; 5736 trk = t->t_trkcnt; 5737 5738 /* 5739 * Only one netisr can run this function at 5740 * any time, and only this function accesses 5741 * trkcnt's tc_expire, so this is safe w/o 5742 * ipfw_gd.ipfw_trkcnt_token. 5743 */ 5744 if (trk->tc_expire > t->t_expire) 5745 continue; 5746 trk->tc_expire = t->t_expire; 5747 } 5748 5749 /* 5750 * Copy tracks in the global track tree to userland in 5751 * the last netisr. 5752 */ 5753 if (mycpuid == netisr_ncpus - 1) { 5754 struct ipfw_trkcnt *trk; 5755 5756 KASSERT(nm->state_cnt < nm->state_cntmax, 5757 ("invalid state count %d, max %d", 5758 nm->state_cnt, nm->state_cntmax)); 5759 5760 IPFW_TRKCNT_TOKGET; 5761 RB_FOREACH(trk, ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree) { 5762 if (ipfw_track_copy(trk, nm->ioc_state)) { 5763 nm->ioc_state++; 5764 nm->state_cnt++; 5765 if (nm->state_cnt == nm->state_cntmax) { 5766 IPFW_TRKCNT_TOKREL; 5767 goto done; 5768 } 5769 } 5770 } 5771 IPFW_TRKCNT_TOKREL; 5772 } 5773 done: 5774 if (nm->state_cnt == nm->state_cntmax) { 5775 /* No more space; done. */ 5776 netisr_replymsg(&nm->base, 0); 5777 } else { 5778 netisr_forwardmsg(&nm->base, mycpuid + 1); 5779 } 5780 } 5781 5782 static int 5783 ipfw_ctl_get_rules(struct sockopt *sopt) 5784 { 5785 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5786 struct ip_fw *rule; 5787 void *bp; 5788 size_t size; 5789 int state_cnt; 5790 5791 ASSERT_NETISR0; 5792 5793 /* 5794 * pass up a copy of the current rules. Static rules 5795 * come first (the last of which has number IPFW_DEFAULT_RULE), 5796 * followed by a possibly empty list of states. 5797 */ 5798 5799 size = static_ioc_len; /* size of static rules */ 5800 5801 /* 5802 * Size of the states. 5803 * XXX take tracks as state for userland compat. 5804 */ 5805 state_cnt = ipfw_state_cntcoll() + ipfw_gd.ipfw_trkcnt_cnt; 5806 state_cnt = (state_cnt * 5) / 4; /* leave 25% headroom */ 5807 size += state_cnt * sizeof(struct ipfw_ioc_state); 5808 5809 if (sopt->sopt_valsize < size) { 5810 /* short length, no need to return incomplete rules */ 5811 /* XXX: if superuser, no need to zero buffer */ 5812 bzero(sopt->sopt_val, sopt->sopt_valsize); 5813 return 0; 5814 } 5815 bp = sopt->sopt_val; 5816 5817 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 5818 bp = ipfw_copy_rule(ctx, rule, bp); 5819 5820 if (state_cnt) { 5821 struct netmsg_cpstate nm; 5822 #ifdef INVARIANTS 5823 size_t old_size = size; 5824 #endif 5825 5826 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 5827 MSGF_PRIORITY, ipfw_state_copy_dispatch); 5828 nm.ioc_state = bp; 5829 nm.state_cntmax = state_cnt; 5830 nm.state_cnt = 0; 5831 netisr_domsg_global(&nm.base); 5832 5833 /* 5834 * The # of states may be shrinked after the snapshot 5835 * of the state count was taken. To give user a correct 5836 * state count, nm->state_cnt is used to recalculate 5837 * the actual size. 5838 */ 5839 size = static_ioc_len + 5840 (nm.state_cnt * sizeof(struct ipfw_ioc_state)); 5841 KKASSERT(size <= old_size); 5842 } 5843 5844 sopt->sopt_valsize = size; 5845 return 0; 5846 } 5847 5848 static void 5849 ipfw_set_disable_dispatch(netmsg_t nmsg) 5850 { 5851 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5852 5853 ASSERT_NETISR_NCPUS(mycpuid); 5854 5855 ctx->ipfw_set_disable = nmsg->lmsg.u.ms_result32; 5856 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5857 } 5858 5859 static void 5860 ipfw_ctl_set_disable(uint32_t disable, uint32_t enable) 5861 { 5862 struct netmsg_base nmsg; 5863 uint32_t set_disable; 5864 5865 ASSERT_NETISR0; 5866 5867 /* IPFW_DEFAULT_SET is always enabled */ 5868 enable |= (1 << IPFW_DEFAULT_SET); 5869 set_disable = (ipfw_ctx[mycpuid]->ipfw_set_disable | disable) & ~enable; 5870 5871 bzero(&nmsg, sizeof(nmsg)); 5872 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5873 ipfw_set_disable_dispatch); 5874 nmsg.lmsg.u.ms_result32 = set_disable; 5875 5876 netisr_domsg_global(&nmsg); 5877 } 5878 5879 static void 5880 ipfw_table_create_dispatch(netmsg_t nm) 5881 { 5882 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5883 int tblid = nm->lmsg.u.ms_result; 5884 5885 ASSERT_NETISR_NCPUS(mycpuid); 5886 5887 if (!rn_inithead(&ctx->ipfw_tables[tblid], rn_cpumaskhead(mycpuid), 5888 offsetof(struct sockaddr_in, sin_addr))) 5889 panic("ipfw: create table%d failed", tblid); 5890 5891 netisr_forwardmsg(&nm->base, mycpuid + 1); 5892 } 5893 5894 static int 5895 ipfw_table_create(struct sockopt *sopt) 5896 { 5897 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5898 struct ipfw_ioc_table *tbl; 5899 struct netmsg_base nm; 5900 5901 ASSERT_NETISR0; 5902 5903 if (sopt->sopt_valsize != sizeof(*tbl)) 5904 return (EINVAL); 5905 5906 tbl = sopt->sopt_val; 5907 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 5908 return (EINVAL); 5909 5910 if (ctx->ipfw_tables[tbl->tableid] != NULL) 5911 return (EEXIST); 5912 5913 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5914 ipfw_table_create_dispatch); 5915 nm.lmsg.u.ms_result = tbl->tableid; 5916 netisr_domsg_global(&nm); 5917 5918 return (0); 5919 } 5920 5921 static void 5922 ipfw_table_killent(struct radix_node *rn) 5923 { 5924 struct ipfw_tblent *te; 5925 5926 te = (struct ipfw_tblent *)rn; 5927 kfree(te, M_IPFW); 5928 } 5929 5930 static void 5931 ipfw_table_flush_oncpu(struct ipfw_context *ctx, int tableid, 5932 int destroy) 5933 { 5934 struct radix_node_head *rnh; 5935 5936 ASSERT_NETISR_NCPUS(mycpuid); 5937 5938 rnh = ctx->ipfw_tables[tableid]; 5939 rn_flush(rnh, ipfw_table_killent); 5940 if (destroy) { 5941 rn_freehead(rnh); 5942 ctx->ipfw_tables[tableid] = NULL; 5943 } 5944 } 5945 5946 static void 5947 ipfw_table_flush_dispatch(netmsg_t nmsg) 5948 { 5949 struct netmsg_tblflush *nm = (struct netmsg_tblflush *)nmsg; 5950 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5951 5952 ASSERT_NETISR_NCPUS(mycpuid); 5953 5954 ipfw_table_flush_oncpu(ctx, nm->tableid, nm->destroy); 5955 netisr_forwardmsg(&nm->base, mycpuid + 1); 5956 } 5957 5958 static void 5959 ipfw_table_flushall_oncpu(struct ipfw_context *ctx, int destroy) 5960 { 5961 int i; 5962 5963 ASSERT_NETISR_NCPUS(mycpuid); 5964 5965 for (i = 0; i < ipfw_table_max; ++i) { 5966 if (ctx->ipfw_tables[i] != NULL) 5967 ipfw_table_flush_oncpu(ctx, i, destroy); 5968 } 5969 } 5970 5971 static void 5972 ipfw_table_flushall_dispatch(netmsg_t nmsg) 5973 { 5974 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5975 5976 ASSERT_NETISR_NCPUS(mycpuid); 5977 5978 ipfw_table_flushall_oncpu(ctx, 0); 5979 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5980 } 5981 5982 static int 5983 ipfw_table_flush(struct sockopt *sopt) 5984 { 5985 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5986 struct ipfw_ioc_table *tbl; 5987 struct netmsg_tblflush nm; 5988 5989 ASSERT_NETISR0; 5990 5991 if (sopt->sopt_valsize != sizeof(*tbl)) 5992 return (EINVAL); 5993 5994 tbl = sopt->sopt_val; 5995 if (sopt->sopt_name == IP_FW_TBL_FLUSH && tbl->tableid < 0) { 5996 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 5997 MSGF_PRIORITY, ipfw_table_flushall_dispatch); 5998 netisr_domsg_global(&nm.base); 5999 return (0); 6000 } 6001 6002 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 6003 return (EINVAL); 6004 6005 if (ctx->ipfw_tables[tbl->tableid] == NULL) 6006 return (ENOENT); 6007 6008 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6009 ipfw_table_flush_dispatch); 6010 nm.tableid = tbl->tableid; 6011 nm.destroy = 0; 6012 if (sopt->sopt_name == IP_FW_TBL_DESTROY) 6013 nm.destroy = 1; 6014 netisr_domsg_global(&nm.base); 6015 6016 return (0); 6017 } 6018 6019 static int 6020 ipfw_table_cntent(struct radix_node *rn __unused, void *xcnt) 6021 { 6022 int *cnt = xcnt; 6023 6024 (*cnt)++; 6025 return (0); 6026 } 6027 6028 static int 6029 ipfw_table_cpent(struct radix_node *rn, void *xcp) 6030 { 6031 struct ipfw_table_cp *cp = xcp; 6032 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6033 struct ipfw_ioc_tblent *ioc_te; 6034 #ifdef INVARIANTS 6035 int cnt; 6036 #endif 6037 6038 KASSERT(cp->te_idx < cp->te_cnt, ("invalid table cp idx %d, cnt %d", 6039 cp->te_idx, cp->te_cnt)); 6040 ioc_te = &cp->te[cp->te_idx]; 6041 6042 if (te->te_nodes->rn_mask != NULL) { 6043 memcpy(&ioc_te->netmask, te->te_nodes->rn_mask, 6044 *te->te_nodes->rn_mask); 6045 } else { 6046 ioc_te->netmask.sin_len = 0; 6047 } 6048 memcpy(&ioc_te->key, &te->te_key, sizeof(ioc_te->key)); 6049 6050 ioc_te->use = te->te_use; 6051 ioc_te->last_used = te->te_lastuse; 6052 #ifdef INVARIANTS 6053 cnt = 1; 6054 #endif 6055 6056 while ((te = te->te_sibling) != NULL) { 6057 #ifdef INVARIANTS 6058 ++cnt; 6059 #endif 6060 ioc_te->use += te->te_use; 6061 if (te->te_lastuse > ioc_te->last_used) 6062 ioc_te->last_used = te->te_lastuse; 6063 } 6064 KASSERT(cnt == netisr_ncpus, 6065 ("invalid # of tblent %d, should be %d", cnt, netisr_ncpus)); 6066 6067 cp->te_idx++; 6068 6069 return (0); 6070 } 6071 6072 static int 6073 ipfw_table_get(struct sockopt *sopt) 6074 { 6075 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6076 struct radix_node_head *rnh; 6077 struct ipfw_ioc_table *tbl; 6078 struct ipfw_ioc_tblcont *cont; 6079 struct ipfw_table_cp cp; 6080 int cnt = 0, sz; 6081 6082 ASSERT_NETISR0; 6083 6084 if (sopt->sopt_valsize < sizeof(*tbl)) 6085 return (EINVAL); 6086 6087 tbl = sopt->sopt_val; 6088 if (tbl->tableid < 0) { 6089 struct ipfw_ioc_tbllist *list; 6090 int i; 6091 6092 /* 6093 * List available table ids. 6094 */ 6095 for (i = 0; i < ipfw_table_max; ++i) { 6096 if (ctx->ipfw_tables[i] != NULL) 6097 ++cnt; 6098 } 6099 6100 sz = __offsetof(struct ipfw_ioc_tbllist, tables[cnt]); 6101 if (sopt->sopt_valsize < sz) { 6102 bzero(sopt->sopt_val, sopt->sopt_valsize); 6103 return (E2BIG); 6104 } 6105 list = sopt->sopt_val; 6106 list->tablecnt = cnt; 6107 6108 cnt = 0; 6109 for (i = 0; i < ipfw_table_max; ++i) { 6110 if (ctx->ipfw_tables[i] != NULL) { 6111 KASSERT(cnt < list->tablecnt, 6112 ("invalid idx %d, cnt %d", 6113 cnt, list->tablecnt)); 6114 list->tables[cnt++] = i; 6115 } 6116 } 6117 sopt->sopt_valsize = sz; 6118 return (0); 6119 } else if (tbl->tableid >= ipfw_table_max) { 6120 return (EINVAL); 6121 } 6122 6123 rnh = ctx->ipfw_tables[tbl->tableid]; 6124 if (rnh == NULL) 6125 return (ENOENT); 6126 rnh->rnh_walktree(rnh, ipfw_table_cntent, &cnt); 6127 6128 sz = __offsetof(struct ipfw_ioc_tblcont, ent[cnt]); 6129 if (sopt->sopt_valsize < sz) { 6130 bzero(sopt->sopt_val, sopt->sopt_valsize); 6131 return (E2BIG); 6132 } 6133 cont = sopt->sopt_val; 6134 cont->entcnt = cnt; 6135 6136 cp.te = cont->ent; 6137 cp.te_idx = 0; 6138 cp.te_cnt = cnt; 6139 rnh->rnh_walktree(rnh, ipfw_table_cpent, &cp); 6140 6141 sopt->sopt_valsize = sz; 6142 return (0); 6143 } 6144 6145 static void 6146 ipfw_table_add_dispatch(netmsg_t nmsg) 6147 { 6148 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; 6149 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6150 struct radix_node_head *rnh; 6151 struct ipfw_tblent *te; 6152 6153 ASSERT_NETISR_NCPUS(mycpuid); 6154 6155 rnh = ctx->ipfw_tables[nm->tableid]; 6156 6157 te = kmalloc(sizeof(*te), M_IPFW, M_WAITOK | M_ZERO); 6158 te->te_nodes->rn_key = (char *)&te->te_key; 6159 memcpy(&te->te_key, nm->key, sizeof(te->te_key)); 6160 6161 if (rnh->rnh_addaddr(&te->te_key, nm->netmask, rnh, te->te_nodes) 6162 == NULL) { 6163 if (mycpuid == 0) { 6164 kfree(te, M_IPFW); 6165 netisr_replymsg(&nm->base, EEXIST); 6166 return; 6167 } 6168 panic("rnh_addaddr failed"); 6169 } 6170 6171 /* Link siblings. */ 6172 if (nm->sibling != NULL) 6173 nm->sibling->te_sibling = te; 6174 nm->sibling = te; 6175 6176 netisr_forwardmsg(&nm->base, mycpuid + 1); 6177 } 6178 6179 static void 6180 ipfw_table_del_dispatch(netmsg_t nmsg) 6181 { 6182 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; 6183 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6184 struct radix_node_head *rnh; 6185 struct radix_node *rn; 6186 6187 ASSERT_NETISR_NCPUS(mycpuid); 6188 6189 rnh = ctx->ipfw_tables[nm->tableid]; 6190 rn = rnh->rnh_deladdr(nm->key, nm->netmask, rnh); 6191 if (rn == NULL) { 6192 if (mycpuid == 0) { 6193 netisr_replymsg(&nm->base, ESRCH); 6194 return; 6195 } 6196 panic("rnh_deladdr failed"); 6197 } 6198 kfree(rn, M_IPFW); 6199 6200 netisr_forwardmsg(&nm->base, mycpuid + 1); 6201 } 6202 6203 static int 6204 ipfw_table_alt(struct sockopt *sopt) 6205 { 6206 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6207 struct ipfw_ioc_tblcont *tbl; 6208 struct ipfw_ioc_tblent *te; 6209 struct sockaddr_in key0; 6210 struct sockaddr *netmask = NULL, *key; 6211 struct netmsg_tblent nm; 6212 6213 ASSERT_NETISR0; 6214 6215 if (sopt->sopt_valsize != sizeof(*tbl)) 6216 return (EINVAL); 6217 tbl = sopt->sopt_val; 6218 6219 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 6220 return (EINVAL); 6221 if (tbl->entcnt != 1) 6222 return (EINVAL); 6223 6224 if (ctx->ipfw_tables[tbl->tableid] == NULL) 6225 return (ENOENT); 6226 te = &tbl->ent[0]; 6227 6228 if (te->key.sin_family != AF_INET || 6229 te->key.sin_port != 0 || 6230 te->key.sin_len != sizeof(struct sockaddr_in)) 6231 return (EINVAL); 6232 key = (struct sockaddr *)&te->key; 6233 6234 if (te->netmask.sin_len != 0) { 6235 if (te->netmask.sin_port != 0 || 6236 te->netmask.sin_len > sizeof(struct sockaddr_in)) 6237 return (EINVAL); 6238 netmask = (struct sockaddr *)&te->netmask; 6239 sa_maskedcopy(key, (struct sockaddr *)&key0, netmask); 6240 key = (struct sockaddr *)&key0; 6241 } 6242 6243 if (sopt->sopt_name == IP_FW_TBL_ADD) { 6244 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6245 MSGF_PRIORITY, ipfw_table_add_dispatch); 6246 } else { 6247 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6248 MSGF_PRIORITY, ipfw_table_del_dispatch); 6249 } 6250 nm.key = key; 6251 nm.netmask = netmask; 6252 nm.tableid = tbl->tableid; 6253 nm.sibling = NULL; 6254 return (netisr_domsg_global(&nm.base)); 6255 } 6256 6257 static int 6258 ipfw_table_zeroent(struct radix_node *rn, void *arg __unused) 6259 { 6260 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6261 6262 te->te_use = 0; 6263 te->te_lastuse = 0; 6264 return (0); 6265 } 6266 6267 static void 6268 ipfw_table_zero_dispatch(netmsg_t nmsg) 6269 { 6270 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6271 struct radix_node_head *rnh; 6272 6273 ASSERT_NETISR_NCPUS(mycpuid); 6274 6275 rnh = ctx->ipfw_tables[nmsg->lmsg.u.ms_result]; 6276 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); 6277 6278 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6279 } 6280 6281 static void 6282 ipfw_table_zeroall_dispatch(netmsg_t nmsg) 6283 { 6284 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6285 int i; 6286 6287 ASSERT_NETISR_NCPUS(mycpuid); 6288 6289 for (i = 0; i < ipfw_table_max; ++i) { 6290 struct radix_node_head *rnh = ctx->ipfw_tables[i]; 6291 6292 if (rnh != NULL) 6293 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); 6294 } 6295 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6296 } 6297 6298 static int 6299 ipfw_table_zero(struct sockopt *sopt) 6300 { 6301 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6302 struct netmsg_base nm; 6303 struct ipfw_ioc_table *tbl; 6304 6305 ASSERT_NETISR0; 6306 6307 if (sopt->sopt_valsize != sizeof(*tbl)) 6308 return (EINVAL); 6309 tbl = sopt->sopt_val; 6310 6311 if (tbl->tableid < 0) { 6312 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6313 ipfw_table_zeroall_dispatch); 6314 netisr_domsg_global(&nm); 6315 return (0); 6316 } else if (tbl->tableid >= ipfw_table_max) { 6317 return (EINVAL); 6318 } else if (ctx->ipfw_tables[tbl->tableid] == NULL) { 6319 return (ENOENT); 6320 } 6321 6322 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6323 ipfw_table_zero_dispatch); 6324 nm.lmsg.u.ms_result = tbl->tableid; 6325 netisr_domsg_global(&nm); 6326 6327 return (0); 6328 } 6329 6330 static int 6331 ipfw_table_killexp(struct radix_node *rn, void *xnm) 6332 { 6333 struct netmsg_tblexp *nm = xnm; 6334 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6335 struct radix_node *ret; 6336 6337 if (te->te_expired) { 6338 ret = nm->rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, nm->rnh); 6339 if (ret != rn) 6340 panic("deleted other table entry"); 6341 kfree(ret, M_IPFW); 6342 nm->expcnt++; 6343 } 6344 return (0); 6345 } 6346 6347 static void 6348 ipfw_table_expire_dispatch(netmsg_t nmsg) 6349 { 6350 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; 6351 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6352 struct radix_node_head *rnh; 6353 6354 ASSERT_NETISR_NCPUS(mycpuid); 6355 6356 rnh = ctx->ipfw_tables[nm->tableid]; 6357 nm->rnh = rnh; 6358 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); 6359 6360 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), 6361 ("not all expired addresses (%d) were deleted (%d)", 6362 nm->cnt * (mycpuid + 1), nm->expcnt)); 6363 6364 netisr_forwardmsg(&nm->base, mycpuid + 1); 6365 } 6366 6367 static void 6368 ipfw_table_expireall_dispatch(netmsg_t nmsg) 6369 { 6370 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; 6371 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6372 int i; 6373 6374 ASSERT_NETISR_NCPUS(mycpuid); 6375 6376 for (i = 0; i < ipfw_table_max; ++i) { 6377 struct radix_node_head *rnh = ctx->ipfw_tables[i]; 6378 6379 if (rnh == NULL) 6380 continue; 6381 nm->rnh = rnh; 6382 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); 6383 } 6384 6385 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), 6386 ("not all expired addresses (%d) were deleted (%d)", 6387 nm->cnt * (mycpuid + 1), nm->expcnt)); 6388 6389 netisr_forwardmsg(&nm->base, mycpuid + 1); 6390 } 6391 6392 static int 6393 ipfw_table_markexp(struct radix_node *rn, void *xnm) 6394 { 6395 struct netmsg_tblexp *nm = xnm; 6396 struct ipfw_tblent *te; 6397 time_t lastuse; 6398 6399 te = (struct ipfw_tblent *)rn; 6400 lastuse = te->te_lastuse; 6401 6402 while ((te = te->te_sibling) != NULL) { 6403 if (te->te_lastuse > lastuse) 6404 lastuse = te->te_lastuse; 6405 } 6406 if (!TIME_LEQ(lastuse + nm->expire, time_second)) { 6407 /* Not expired */ 6408 return (0); 6409 } 6410 6411 te = (struct ipfw_tblent *)rn; 6412 te->te_expired = 1; 6413 while ((te = te->te_sibling) != NULL) 6414 te->te_expired = 1; 6415 nm->cnt++; 6416 6417 return (0); 6418 } 6419 6420 static int 6421 ipfw_table_expire(struct sockopt *sopt) 6422 { 6423 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6424 struct netmsg_tblexp nm; 6425 struct ipfw_ioc_tblexp *tbl; 6426 struct radix_node_head *rnh; 6427 6428 ASSERT_NETISR0; 6429 6430 if (sopt->sopt_valsize != sizeof(*tbl)) 6431 return (EINVAL); 6432 tbl = sopt->sopt_val; 6433 tbl->expcnt = 0; 6434 6435 nm.expcnt = 0; 6436 nm.cnt = 0; 6437 nm.expire = tbl->expire; 6438 6439 if (tbl->tableid < 0) { 6440 int i; 6441 6442 for (i = 0; i < ipfw_table_max; ++i) { 6443 rnh = ctx->ipfw_tables[i]; 6444 if (rnh == NULL) 6445 continue; 6446 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); 6447 } 6448 if (nm.cnt == 0) { 6449 /* No addresses can be expired. */ 6450 return (0); 6451 } 6452 tbl->expcnt = nm.cnt; 6453 6454 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6455 MSGF_PRIORITY, ipfw_table_expireall_dispatch); 6456 nm.tableid = -1; 6457 netisr_domsg_global(&nm.base); 6458 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, 6459 ("not all expired addresses (%d) were deleted (%d)", 6460 nm.cnt * netisr_ncpus, nm.expcnt)); 6461 6462 return (0); 6463 } else if (tbl->tableid >= ipfw_table_max) { 6464 return (EINVAL); 6465 } 6466 6467 rnh = ctx->ipfw_tables[tbl->tableid]; 6468 if (rnh == NULL) 6469 return (ENOENT); 6470 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); 6471 if (nm.cnt == 0) { 6472 /* No addresses can be expired. */ 6473 return (0); 6474 } 6475 tbl->expcnt = nm.cnt; 6476 6477 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6478 ipfw_table_expire_dispatch); 6479 nm.tableid = tbl->tableid; 6480 netisr_domsg_global(&nm.base); 6481 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, 6482 ("not all expired addresses (%d) were deleted (%d)", 6483 nm.cnt * netisr_ncpus, nm.expcnt)); 6484 return (0); 6485 } 6486 6487 static void 6488 ipfw_crossref_free_dispatch(netmsg_t nmsg) 6489 { 6490 struct ip_fw *rule = nmsg->lmsg.u.ms_resultp; 6491 6492 KKASSERT((rule->rule_flags & 6493 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) == 6494 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)); 6495 ipfw_free_rule(rule); 6496 6497 netisr_replymsg(&nmsg->base, 0); 6498 } 6499 6500 static void 6501 ipfw_crossref_reap(void) 6502 { 6503 struct ip_fw *rule, *prev = NULL; 6504 6505 ASSERT_NETISR0; 6506 6507 rule = ipfw_gd.ipfw_crossref_free; 6508 while (rule != NULL) { 6509 uint64_t inflight = 0; 6510 int i; 6511 6512 for (i = 0; i < netisr_ncpus; ++i) 6513 inflight += rule->cross_rules[i]->cross_refs; 6514 if (inflight == 0) { 6515 struct ip_fw *f = rule; 6516 6517 /* 6518 * Unlink. 6519 */ 6520 rule = rule->next; 6521 if (prev != NULL) 6522 prev->next = rule; 6523 else 6524 ipfw_gd.ipfw_crossref_free = rule; 6525 6526 /* 6527 * Free. 6528 */ 6529 for (i = 1; i < netisr_ncpus; ++i) { 6530 struct netmsg_base nm; 6531 6532 netmsg_init(&nm, NULL, &curthread->td_msgport, 6533 MSGF_PRIORITY, ipfw_crossref_free_dispatch); 6534 nm.lmsg.u.ms_resultp = f->cross_rules[i]; 6535 netisr_domsg(&nm, i); 6536 } 6537 KKASSERT((f->rule_flags & 6538 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) == 6539 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)); 6540 ipfw_unref_rule(f); 6541 } else { 6542 prev = rule; 6543 rule = rule->next; 6544 } 6545 } 6546 6547 if (ipfw_gd.ipfw_crossref_free != NULL) { 6548 callout_reset(&ipfw_gd.ipfw_crossref_ch, hz, 6549 ipfw_crossref_timeo, NULL); 6550 } 6551 } 6552 6553 /* 6554 * {set|get}sockopt parser. 6555 */ 6556 static int 6557 ipfw_ctl(struct sockopt *sopt) 6558 { 6559 int error, rulenum; 6560 uint32_t *masks; 6561 size_t size; 6562 6563 ASSERT_NETISR0; 6564 6565 error = 0; 6566 6567 switch (sopt->sopt_name) { 6568 case IP_FW_GET: 6569 error = ipfw_ctl_get_rules(sopt); 6570 break; 6571 6572 case IP_FW_FLUSH: 6573 ipfw_flush(0 /* keep default rule */); 6574 break; 6575 6576 case IP_FW_ADD: 6577 error = ipfw_ctl_add_rule(sopt); 6578 break; 6579 6580 case IP_FW_DEL: 6581 /* 6582 * IP_FW_DEL is used for deleting single rules or sets, 6583 * and (ab)used to atomically manipulate sets. 6584 * Argument size is used to distinguish between the two: 6585 * sizeof(uint32_t) 6586 * delete single rule or set of rules, 6587 * or reassign rules (or sets) to a different set. 6588 * 2 * sizeof(uint32_t) 6589 * atomic disable/enable sets. 6590 * first uint32_t contains sets to be disabled, 6591 * second uint32_t contains sets to be enabled. 6592 */ 6593 masks = sopt->sopt_val; 6594 size = sopt->sopt_valsize; 6595 if (size == sizeof(*masks)) { 6596 /* 6597 * Delete or reassign static rule 6598 */ 6599 error = ipfw_ctl_alter(masks[0]); 6600 } else if (size == (2 * sizeof(*masks))) { 6601 /* 6602 * Set enable/disable 6603 */ 6604 ipfw_ctl_set_disable(masks[0], masks[1]); 6605 } else { 6606 error = EINVAL; 6607 } 6608 break; 6609 6610 case IP_FW_ZERO: 6611 case IP_FW_RESETLOG: /* argument is an int, the rule number */ 6612 rulenum = 0; 6613 6614 if (sopt->sopt_val != 0) { 6615 error = soopt_to_kbuf(sopt, &rulenum, 6616 sizeof(int), sizeof(int)); 6617 if (error) 6618 break; 6619 } 6620 error = ipfw_ctl_zero_entry(rulenum, 6621 sopt->sopt_name == IP_FW_RESETLOG); 6622 break; 6623 6624 case IP_FW_TBL_CREATE: 6625 error = ipfw_table_create(sopt); 6626 break; 6627 6628 case IP_FW_TBL_ADD: 6629 case IP_FW_TBL_DEL: 6630 error = ipfw_table_alt(sopt); 6631 break; 6632 6633 case IP_FW_TBL_FLUSH: 6634 case IP_FW_TBL_DESTROY: 6635 error = ipfw_table_flush(sopt); 6636 break; 6637 6638 case IP_FW_TBL_GET: 6639 error = ipfw_table_get(sopt); 6640 break; 6641 6642 case IP_FW_TBL_ZERO: 6643 error = ipfw_table_zero(sopt); 6644 break; 6645 6646 case IP_FW_TBL_EXPIRE: 6647 error = ipfw_table_expire(sopt); 6648 break; 6649 6650 default: 6651 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name); 6652 error = EINVAL; 6653 } 6654 6655 ipfw_crossref_reap(); 6656 return error; 6657 } 6658 6659 static void 6660 ipfw_keepalive_done(struct ipfw_context *ctx) 6661 { 6662 6663 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6664 ("keepalive is not in progress")); 6665 ctx->ipfw_flags &= ~IPFW_FLAG_KEEPALIVE; 6666 callout_reset(&ctx->ipfw_keepalive_ch, dyn_keepalive_period * hz, 6667 ipfw_keepalive, NULL); 6668 } 6669 6670 static void 6671 ipfw_keepalive_more(struct ipfw_context *ctx) 6672 { 6673 struct netmsg_base *nm = &ctx->ipfw_keepalive_more; 6674 6675 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6676 ("keepalive is not in progress")); 6677 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 6678 ("keepalive more did not finish")); 6679 netisr_sendmsg_oncpu(nm); 6680 } 6681 6682 static void 6683 ipfw_keepalive_loop(struct ipfw_context *ctx, struct ipfw_state *anchor) 6684 { 6685 struct ipfw_state *s; 6686 int scanned = 0, expired = 0, kept = 0; 6687 6688 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6689 ("keepalive is not in progress")); 6690 6691 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 6692 uint32_t ack_rev, ack_fwd; 6693 struct ipfw_flow_id id; 6694 uint8_t send_dir; 6695 6696 if (scanned++ >= ipfw_state_scan_max) { 6697 ipfw_keepalive_more(ctx); 6698 return; 6699 } 6700 6701 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6702 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 6703 6704 /* 6705 * NOTE: 6706 * Don't use IPFW_STATE_SCANSKIP; need to perform keepalive 6707 * on slave xlat. 6708 */ 6709 if (s->st_type == O_ANCHOR) 6710 continue; 6711 6712 if (IPFW_STATE_ISDEAD(s)) { 6713 ipfw_state_remove(ctx, s); 6714 if (++expired >= ipfw_state_expire_max) { 6715 ipfw_keepalive_more(ctx); 6716 return; 6717 } 6718 continue; 6719 } 6720 6721 /* 6722 * Keep alive processing 6723 */ 6724 6725 if (s->st_proto != IPPROTO_TCP) 6726 continue; 6727 if ((s->st_state & IPFW_STATE_TCPSTATES) != BOTH_SYN) 6728 continue; 6729 if (TIME_LEQ(time_uptime + dyn_keepalive_interval, 6730 s->st_expire)) 6731 continue; /* too early */ 6732 6733 ipfw_key_4tuple(&s->st_key, &id.src_ip, &id.src_port, 6734 &id.dst_ip, &id.dst_port); 6735 ack_rev = s->st_ack_rev; 6736 ack_fwd = s->st_ack_fwd; 6737 6738 #define SEND_FWD 0x1 6739 #define SEND_REV 0x2 6740 6741 if (IPFW_ISXLAT(s->st_type)) { 6742 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s; 6743 6744 if (x->xlat_dir == MATCH_FORWARD) 6745 send_dir = SEND_FWD; 6746 else 6747 send_dir = SEND_REV; 6748 } else { 6749 send_dir = SEND_FWD | SEND_REV; 6750 } 6751 6752 if (send_dir & SEND_REV) 6753 send_pkt(&id, ack_rev - 1, ack_fwd, TH_SYN); 6754 if (send_dir & SEND_FWD) 6755 send_pkt(&id, ack_fwd - 1, ack_rev, 0); 6756 6757 #undef SEND_FWD 6758 #undef SEND_REV 6759 6760 if (++kept >= ipfw_keepalive_max) { 6761 ipfw_keepalive_more(ctx); 6762 return; 6763 } 6764 } 6765 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6766 ipfw_keepalive_done(ctx); 6767 } 6768 6769 static void 6770 ipfw_keepalive_more_dispatch(netmsg_t nm) 6771 { 6772 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6773 struct ipfw_state *anchor; 6774 6775 ASSERT_NETISR_NCPUS(mycpuid); 6776 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6777 ("keepalive is not in progress")); 6778 6779 /* Reply ASAP */ 6780 netisr_replymsg(&nm->base, 0); 6781 6782 anchor = &ctx->ipfw_keepalive_anch; 6783 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) { 6784 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6785 ipfw_keepalive_done(ctx); 6786 return; 6787 } 6788 ipfw_keepalive_loop(ctx, anchor); 6789 } 6790 6791 /* 6792 * This procedure is only used to handle keepalives. It is invoked 6793 * every dyn_keepalive_period 6794 */ 6795 static void 6796 ipfw_keepalive_dispatch(netmsg_t nm) 6797 { 6798 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6799 struct ipfw_state *anchor; 6800 6801 ASSERT_NETISR_NCPUS(mycpuid); 6802 KASSERT((ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE) == 0, 6803 ("keepalive is in progress")); 6804 ctx->ipfw_flags |= IPFW_FLAG_KEEPALIVE; 6805 6806 /* Reply ASAP */ 6807 crit_enter(); 6808 netisr_replymsg(&nm->base, 0); 6809 crit_exit(); 6810 6811 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) { 6812 ipfw_keepalive_done(ctx); 6813 return; 6814 } 6815 6816 anchor = &ctx->ipfw_keepalive_anch; 6817 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link); 6818 ipfw_keepalive_loop(ctx, anchor); 6819 } 6820 6821 /* 6822 * This procedure is only used to handle keepalives. It is invoked 6823 * every dyn_keepalive_period 6824 */ 6825 static void 6826 ipfw_keepalive(void *dummy __unused) 6827 { 6828 struct netmsg_base *msg; 6829 6830 KKASSERT(mycpuid < netisr_ncpus); 6831 msg = &ipfw_ctx[mycpuid]->ipfw_keepalive_nm; 6832 6833 crit_enter(); 6834 if (msg->lmsg.ms_flags & MSGF_DONE) 6835 netisr_sendmsg_oncpu(msg); 6836 crit_exit(); 6837 } 6838 6839 static void 6840 ipfw_ip_input_dispatch(netmsg_t nmsg) 6841 { 6842 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg; 6843 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6844 struct mbuf *m = nm->m; 6845 struct ip_fw *rule = nm->arg1; 6846 6847 ASSERT_NETISR_NCPUS(mycpuid); 6848 KASSERT(rule->cpuid == mycpuid, 6849 ("rule does not belong to cpu%d", mycpuid)); 6850 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE, 6851 ("mbuf does not have ipfw continue rule")); 6852 6853 KASSERT(ctx->ipfw_cont_rule == NULL, 6854 ("pending ipfw continue rule")); 6855 ctx->ipfw_cont_rule = rule; 6856 ip_input(m); 6857 6858 /* May not be cleared, if ipfw was unload/disabled. */ 6859 ctx->ipfw_cont_rule = NULL; 6860 6861 /* 6862 * This rule is no longer used; decrement its cross_refs, 6863 * so this rule can be deleted. 6864 */ 6865 rule->cross_refs--; 6866 } 6867 6868 static void 6869 ipfw_defrag_redispatch(struct mbuf *m, int cpuid, struct ip_fw *rule) 6870 { 6871 struct netmsg_genpkt *nm; 6872 6873 KASSERT(cpuid != mycpuid, ("continue on the same cpu%d", cpuid)); 6874 6875 /* 6876 * NOTE: 6877 * Bump cross_refs to prevent this rule and its siblings 6878 * from being deleted, while this mbuf is inflight. The 6879 * cross_refs of the sibling rule on the target cpu will 6880 * be decremented, once this mbuf is going to be filtered 6881 * on the target cpu. 6882 */ 6883 rule->cross_refs++; 6884 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE; 6885 6886 nm = &m->m_hdr.mh_genmsg; 6887 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0, 6888 ipfw_ip_input_dispatch); 6889 nm->m = m; 6890 nm->arg1 = rule->cross_rules[cpuid]; 6891 netisr_sendmsg(&nm->base, cpuid); 6892 } 6893 6894 static void 6895 ipfw_init_args(struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif) 6896 { 6897 6898 args->flags = 0; 6899 args->rule = NULL; 6900 args->xlat = NULL; 6901 6902 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 6903 struct m_tag *mtag; 6904 6905 /* Extract info from dummynet tag */ 6906 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 6907 KKASSERT(mtag != NULL); 6908 args->rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 6909 KKASSERT(args->rule != NULL); 6910 6911 m_tag_delete(m, mtag); 6912 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 6913 } else if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) { 6914 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6915 6916 KKASSERT(ctx->ipfw_cont_rule != NULL); 6917 args->rule = ctx->ipfw_cont_rule; 6918 ctx->ipfw_cont_rule = NULL; 6919 6920 if (ctx->ipfw_cont_xlat != NULL) { 6921 args->xlat = ctx->ipfw_cont_xlat; 6922 ctx->ipfw_cont_xlat = NULL; 6923 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATINS) { 6924 args->flags |= IP_FWARG_F_XLATINS; 6925 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATINS; 6926 } 6927 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATFWD) { 6928 args->flags |= IP_FWARG_F_XLATFWD; 6929 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATFWD; 6930 } 6931 } 6932 KKASSERT((m->m_pkthdr.fw_flags & 6933 (IPFW_MBUF_XLATINS | IPFW_MBUF_XLATFWD)) == 0); 6934 6935 args->flags |= IP_FWARG_F_CONT; 6936 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_CONTINUE; 6937 } 6938 6939 args->eh = NULL; 6940 args->oif = oif; 6941 args->m = m; 6942 } 6943 6944 static int 6945 ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 6946 { 6947 struct ip_fw_args args; 6948 struct mbuf *m = *m0; 6949 int tee = 0, error = 0, ret; 6950 6951 ipfw_init_args(&args, m, NULL); 6952 6953 ret = ipfw_chk(&args); 6954 m = args.m; 6955 if (m == NULL) { 6956 if (ret != IP_FW_REDISPATCH) 6957 error = EACCES; 6958 goto back; 6959 } 6960 6961 switch (ret) { 6962 case IP_FW_PASS: 6963 break; 6964 6965 case IP_FW_DENY: 6966 m_freem(m); 6967 m = NULL; 6968 error = EACCES; 6969 break; 6970 6971 case IP_FW_DUMMYNET: 6972 /* Send packet to the appropriate pipe */ 6973 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_IN, &args); 6974 break; 6975 6976 case IP_FW_TEE: 6977 tee = 1; 6978 /* FALL THROUGH */ 6979 6980 case IP_FW_DIVERT: 6981 /* 6982 * Must clear bridge tag when changing 6983 */ 6984 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 6985 if (ip_divert_p != NULL) { 6986 m = ip_divert_p(m, tee, 1); 6987 } else { 6988 m_freem(m); 6989 m = NULL; 6990 /* not sure this is the right error msg */ 6991 error = EACCES; 6992 } 6993 break; 6994 6995 default: 6996 panic("unknown ipfw return value: %d", ret); 6997 } 6998 back: 6999 *m0 = m; 7000 return error; 7001 } 7002 7003 static int 7004 ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 7005 { 7006 struct ip_fw_args args; 7007 struct mbuf *m = *m0; 7008 int tee = 0, error = 0, ret; 7009 7010 ipfw_init_args(&args, m, ifp); 7011 7012 ret = ipfw_chk(&args); 7013 m = args.m; 7014 if (m == NULL) { 7015 if (ret != IP_FW_REDISPATCH) 7016 error = EACCES; 7017 goto back; 7018 } 7019 7020 switch (ret) { 7021 case IP_FW_PASS: 7022 break; 7023 7024 case IP_FW_DENY: 7025 m_freem(m); 7026 m = NULL; 7027 error = EACCES; 7028 break; 7029 7030 case IP_FW_DUMMYNET: 7031 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_OUT, &args); 7032 break; 7033 7034 case IP_FW_TEE: 7035 tee = 1; 7036 /* FALL THROUGH */ 7037 7038 case IP_FW_DIVERT: 7039 if (ip_divert_p != NULL) { 7040 m = ip_divert_p(m, tee, 0); 7041 } else { 7042 m_freem(m); 7043 m = NULL; 7044 /* not sure this is the right error msg */ 7045 error = EACCES; 7046 } 7047 break; 7048 7049 default: 7050 panic("unknown ipfw return value: %d", ret); 7051 } 7052 back: 7053 *m0 = m; 7054 return error; 7055 } 7056 7057 static void 7058 ipfw_hook(void) 7059 { 7060 struct pfil_head *pfh; 7061 7062 ASSERT_NETISR0; 7063 7064 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 7065 if (pfh == NULL) 7066 return; 7067 7068 pfil_add_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 7069 pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 7070 } 7071 7072 static void 7073 ipfw_dehook(void) 7074 { 7075 struct pfil_head *pfh; 7076 7077 ASSERT_NETISR0; 7078 7079 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 7080 if (pfh == NULL) 7081 return; 7082 7083 pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 7084 pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 7085 } 7086 7087 static int 7088 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS) 7089 { 7090 int dyn_cnt; 7091 7092 dyn_cnt = ipfw_state_cntcoll(); 7093 dyn_cnt += ipfw_gd.ipfw_trkcnt_cnt; 7094 7095 return (sysctl_handle_int(oidp, &dyn_cnt, 0, req)); 7096 } 7097 7098 static int 7099 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS) 7100 { 7101 int state_cnt; 7102 7103 state_cnt = ipfw_state_cntcoll(); 7104 return (sysctl_handle_int(oidp, &state_cnt, 0, req)); 7105 } 7106 7107 static int 7108 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS) 7109 { 7110 int state_max, error; 7111 7112 state_max = ipfw_state_max; 7113 error = sysctl_handle_int(oidp, &state_max, 0, req); 7114 if (error || req->newptr == NULL) 7115 return (error); 7116 7117 if (state_max < 1) 7118 return (EINVAL); 7119 7120 ipfw_state_max_set(state_max); 7121 return (0); 7122 } 7123 7124 static int 7125 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS) 7126 { 7127 int dyn_max, error; 7128 7129 dyn_max = ipfw_state_max + ipfw_track_max; 7130 7131 error = sysctl_handle_int(oidp, &dyn_max, 0, req); 7132 if (error || req->newptr == NULL) 7133 return (error); 7134 7135 if (dyn_max < 2) 7136 return (EINVAL); 7137 7138 ipfw_state_max_set(dyn_max / 2); 7139 ipfw_track_max = dyn_max / 2; 7140 return (0); 7141 } 7142 7143 static void 7144 ipfw_sysctl_enable_dispatch(netmsg_t nmsg) 7145 { 7146 int enable = nmsg->lmsg.u.ms_result; 7147 7148 ASSERT_NETISR0; 7149 7150 if (fw_enable == enable) 7151 goto reply; 7152 7153 fw_enable = enable; 7154 if (fw_enable) 7155 ipfw_hook(); 7156 else 7157 ipfw_dehook(); 7158 reply: 7159 netisr_replymsg(&nmsg->base, 0); 7160 } 7161 7162 static int 7163 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS) 7164 { 7165 struct netmsg_base nmsg; 7166 int enable, error; 7167 7168 enable = fw_enable; 7169 error = sysctl_handle_int(oidp, &enable, 0, req); 7170 if (error || req->newptr == NULL) 7171 return error; 7172 7173 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7174 ipfw_sysctl_enable_dispatch); 7175 nmsg.lmsg.u.ms_result = enable; 7176 7177 return netisr_domsg(&nmsg, 0); 7178 } 7179 7180 static int 7181 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS) 7182 { 7183 return sysctl_int_range(oidp, arg1, arg2, req, 7184 IPFW_AUTOINC_STEP_MIN, IPFW_AUTOINC_STEP_MAX); 7185 } 7186 7187 static int 7188 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS) 7189 { 7190 7191 return sysctl_int_range(oidp, arg1, arg2, req, 1, INT_MAX); 7192 } 7193 7194 static int 7195 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS) 7196 { 7197 u_long stat = 0; 7198 int cpu, error; 7199 7200 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7201 stat += *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)); 7202 7203 error = sysctl_handle_long(oidp, &stat, 0, req); 7204 if (error || req->newptr == NULL) 7205 return (error); 7206 7207 /* Zero out this stat. */ 7208 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7209 *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)) = 0; 7210 return (0); 7211 } 7212 7213 static void 7214 ipfw_ctx_init_dispatch(netmsg_t nmsg) 7215 { 7216 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 7217 struct ipfw_context *ctx; 7218 struct ip_fw *def_rule; 7219 7220 ASSERT_NETISR_NCPUS(mycpuid); 7221 7222 ctx = kmalloc(__offsetof(struct ipfw_context, 7223 ipfw_tables[ipfw_table_max]), M_IPFW, M_WAITOK | M_ZERO); 7224 7225 RB_INIT(&ctx->ipfw_state_tree); 7226 TAILQ_INIT(&ctx->ipfw_state_list); 7227 7228 RB_INIT(&ctx->ipfw_track_tree); 7229 TAILQ_INIT(&ctx->ipfw_track_list); 7230 7231 callout_init_mp(&ctx->ipfw_stateto_ch); 7232 netmsg_init(&ctx->ipfw_stateexp_nm, NULL, &netisr_adone_rport, 7233 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_state_expire_dispatch); 7234 ctx->ipfw_stateexp_anch.st_type = O_ANCHOR; 7235 netmsg_init(&ctx->ipfw_stateexp_more, NULL, &netisr_adone_rport, 7236 MSGF_DROPABLE, ipfw_state_expire_more_dispatch); 7237 7238 callout_init_mp(&ctx->ipfw_trackto_ch); 7239 netmsg_init(&ctx->ipfw_trackexp_nm, NULL, &netisr_adone_rport, 7240 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_track_expire_dispatch); 7241 netmsg_init(&ctx->ipfw_trackexp_more, NULL, &netisr_adone_rport, 7242 MSGF_DROPABLE, ipfw_track_expire_more_dispatch); 7243 7244 callout_init_mp(&ctx->ipfw_keepalive_ch); 7245 netmsg_init(&ctx->ipfw_keepalive_nm, NULL, &netisr_adone_rport, 7246 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_keepalive_dispatch); 7247 ctx->ipfw_keepalive_anch.st_type = O_ANCHOR; 7248 netmsg_init(&ctx->ipfw_keepalive_more, NULL, &netisr_adone_rport, 7249 MSGF_DROPABLE, ipfw_keepalive_more_dispatch); 7250 7251 callout_init_mp(&ctx->ipfw_xlatreap_ch); 7252 netmsg_init(&ctx->ipfw_xlatreap_nm, NULL, &netisr_adone_rport, 7253 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_xlat_reap_dispatch); 7254 TAILQ_INIT(&ctx->ipfw_xlatreap); 7255 7256 ipfw_ctx[mycpuid] = ctx; 7257 7258 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO); 7259 7260 def_rule->act_ofs = 0; 7261 def_rule->rulenum = IPFW_DEFAULT_RULE; 7262 def_rule->cmd_len = 1; 7263 def_rule->set = IPFW_DEFAULT_SET; 7264 7265 def_rule->cmd[0].len = 1; 7266 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 7267 def_rule->cmd[0].opcode = O_ACCEPT; 7268 #else 7269 if (filters_default_to_accept) 7270 def_rule->cmd[0].opcode = O_ACCEPT; 7271 else 7272 def_rule->cmd[0].opcode = O_DENY; 7273 #endif 7274 7275 def_rule->refcnt = 1; 7276 def_rule->cpuid = mycpuid; 7277 7278 /* Install the default rule */ 7279 ctx->ipfw_default_rule = def_rule; 7280 ctx->ipfw_layer3_chain = def_rule; 7281 7282 /* Link rule CPU sibling */ 7283 ipfw_link_sibling(fwmsg, def_rule); 7284 7285 /* Statistics only need to be updated once */ 7286 if (mycpuid == 0) 7287 ipfw_inc_static_count(def_rule); 7288 7289 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7290 } 7291 7292 static void 7293 ipfw_crossref_reap_dispatch(netmsg_t nmsg) 7294 { 7295 7296 crit_enter(); 7297 /* Reply ASAP */ 7298 netisr_replymsg(&nmsg->base, 0); 7299 crit_exit(); 7300 ipfw_crossref_reap(); 7301 } 7302 7303 static void 7304 ipfw_crossref_timeo(void *dummy __unused) 7305 { 7306 struct netmsg_base *msg = &ipfw_gd.ipfw_crossref_nm; 7307 7308 KKASSERT(mycpuid == 0); 7309 7310 crit_enter(); 7311 if (msg->lmsg.ms_flags & MSGF_DONE) 7312 netisr_sendmsg_oncpu(msg); 7313 crit_exit(); 7314 } 7315 7316 static void 7317 ipfw_ifaddr_dispatch(netmsg_t nmsg) 7318 { 7319 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 7320 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 7321 struct ip_fw *f; 7322 7323 ASSERT_NETISR_NCPUS(mycpuid); 7324 7325 for (f = ctx->ipfw_layer3_chain; f != NULL; f = f->next) { 7326 int l, cmdlen; 7327 ipfw_insn *cmd; 7328 7329 if ((f->rule_flags & IPFW_RULE_F_DYNIFADDR) == 0) 7330 continue; 7331 7332 for (l = f->cmd_len, cmd = f->cmd; l > 0; 7333 l -= cmdlen, cmd += cmdlen) { 7334 cmdlen = F_LEN(cmd); 7335 if (cmd->opcode == O_IP_SRC_IFIP || 7336 cmd->opcode == O_IP_DST_IFIP) { 7337 if (strncmp(ifp->if_xname, 7338 ((ipfw_insn_ifip *)cmd)->ifname, 7339 IFNAMSIZ) == 0) 7340 cmd->arg1 &= ~IPFW_IFIP_VALID; 7341 } 7342 } 7343 } 7344 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7345 } 7346 7347 static void 7348 ipfw_ifaddr(void *arg __unused, struct ifnet *ifp, 7349 enum ifaddr_event event __unused, struct ifaddr *ifa __unused) 7350 { 7351 struct netmsg_base nm; 7352 7353 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7354 ipfw_ifaddr_dispatch); 7355 nm.lmsg.u.ms_resultp = ifp; 7356 netisr_domsg_global(&nm); 7357 } 7358 7359 static void 7360 ipfw_init_dispatch(netmsg_t nmsg) 7361 { 7362 struct netmsg_ipfw fwmsg; 7363 int error = 0, cpu; 7364 7365 ASSERT_NETISR0; 7366 7367 if (IPFW_LOADED) { 7368 kprintf("IP firewall already loaded\n"); 7369 error = EEXIST; 7370 goto reply; 7371 } 7372 7373 if (ipfw_table_max > UINT16_MAX || ipfw_table_max <= 0) 7374 ipfw_table_max = UINT16_MAX; 7375 7376 /* Initialize global track tree. */ 7377 RB_INIT(&ipfw_gd.ipfw_trkcnt_tree); 7378 IPFW_TRKCNT_TOKINIT; 7379 7380 /* GC for freed crossref rules. */ 7381 callout_init_mp(&ipfw_gd.ipfw_crossref_ch); 7382 netmsg_init(&ipfw_gd.ipfw_crossref_nm, NULL, &netisr_adone_rport, 7383 MSGF_PRIORITY | MSGF_DROPABLE, ipfw_crossref_reap_dispatch); 7384 7385 ipfw_state_max_set(ipfw_state_max); 7386 ipfw_state_headroom = 8 * netisr_ncpus; 7387 7388 bzero(&fwmsg, sizeof(fwmsg)); 7389 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7390 ipfw_ctx_init_dispatch); 7391 netisr_domsg_global(&fwmsg.base); 7392 7393 ip_fw_chk_ptr = ipfw_chk; 7394 ip_fw_ctl_ptr = ipfw_ctl; 7395 ip_fw_dn_io_ptr = ipfw_dummynet_io; 7396 7397 kprintf("ipfw2 initialized, default to %s, logging ", 7398 ipfw_ctx[mycpuid]->ipfw_default_rule->cmd[0].opcode == 7399 O_ACCEPT ? "accept" : "deny"); 7400 7401 #ifdef IPFIREWALL_VERBOSE 7402 fw_verbose = 1; 7403 #endif 7404 #ifdef IPFIREWALL_VERBOSE_LIMIT 7405 verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 7406 #endif 7407 if (fw_verbose == 0) { 7408 kprintf("disabled\n"); 7409 } else if (verbose_limit == 0) { 7410 kprintf("unlimited\n"); 7411 } else { 7412 kprintf("limited to %d packets/entry by default\n", 7413 verbose_limit); 7414 } 7415 7416 ip_fw_loaded = 1; 7417 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 7418 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_stateto_ch, hz, 7419 ipfw_state_expire_ipifunc, NULL, cpu); 7420 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_trackto_ch, hz, 7421 ipfw_track_expire_ipifunc, NULL, cpu); 7422 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_keepalive_ch, hz, 7423 ipfw_keepalive, NULL, cpu); 7424 } 7425 7426 if (fw_enable) 7427 ipfw_hook(); 7428 7429 ipfw_ifaddr_event = EVENTHANDLER_REGISTER(ifaddr_event, ipfw_ifaddr, 7430 NULL, EVENTHANDLER_PRI_ANY); 7431 if (ipfw_ifaddr_event == NULL) 7432 kprintf("ipfw: ifaddr_event register failed\n"); 7433 7434 reply: 7435 netisr_replymsg(&nmsg->base, error); 7436 } 7437 7438 static int 7439 ipfw_init(void) 7440 { 7441 struct netmsg_base smsg; 7442 7443 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7444 ipfw_init_dispatch); 7445 return netisr_domsg(&smsg, 0); 7446 } 7447 7448 #ifdef KLD_MODULE 7449 7450 static void 7451 ipfw_ctx_fini_dispatch(netmsg_t nmsg) 7452 { 7453 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 7454 7455 ASSERT_NETISR_NCPUS(mycpuid); 7456 7457 callout_cancel(&ctx->ipfw_stateto_ch); 7458 callout_cancel(&ctx->ipfw_trackto_ch); 7459 callout_cancel(&ctx->ipfw_keepalive_ch); 7460 callout_cancel(&ctx->ipfw_xlatreap_ch); 7461 7462 crit_enter(); 7463 netisr_dropmsg(&ctx->ipfw_stateexp_more); 7464 netisr_dropmsg(&ctx->ipfw_stateexp_nm); 7465 netisr_dropmsg(&ctx->ipfw_trackexp_more); 7466 netisr_dropmsg(&ctx->ipfw_trackexp_nm); 7467 netisr_dropmsg(&ctx->ipfw_keepalive_more); 7468 netisr_dropmsg(&ctx->ipfw_keepalive_nm); 7469 netisr_dropmsg(&ctx->ipfw_xlatreap_nm); 7470 crit_exit(); 7471 7472 ipfw_table_flushall_oncpu(ctx, 1); 7473 7474 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7475 } 7476 7477 static void 7478 ipfw_fini_dispatch(netmsg_t nmsg) 7479 { 7480 struct netmsg_base nm; 7481 int error = 0, cpu; 7482 7483 ASSERT_NETISR0; 7484 7485 ipfw_crossref_reap(); 7486 7487 if (ipfw_gd.ipfw_refcnt != 0) { 7488 error = EBUSY; 7489 goto reply; 7490 } 7491 7492 ip_fw_loaded = 0; 7493 ipfw_dehook(); 7494 7495 /* Synchronize any inflight state/track expire IPIs. */ 7496 lwkt_synchronize_ipiqs("ipfwfini"); 7497 7498 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7499 ipfw_ctx_fini_dispatch); 7500 netisr_domsg_global(&nm); 7501 7502 callout_cancel(&ipfw_gd.ipfw_crossref_ch); 7503 crit_enter(); 7504 netisr_dropmsg(&ipfw_gd.ipfw_crossref_nm); 7505 crit_exit(); 7506 7507 if (ipfw_ifaddr_event != NULL) 7508 EVENTHANDLER_DEREGISTER(ifaddr_event, ipfw_ifaddr_event); 7509 7510 ip_fw_chk_ptr = NULL; 7511 ip_fw_ctl_ptr = NULL; 7512 ip_fw_dn_io_ptr = NULL; 7513 ipfw_flush(1 /* kill default rule */); 7514 7515 /* Free pre-cpu context */ 7516 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7517 kfree(ipfw_ctx[cpu], M_IPFW); 7518 7519 kprintf("IP firewall unloaded\n"); 7520 reply: 7521 netisr_replymsg(&nmsg->base, error); 7522 } 7523 7524 static void 7525 ipfw_fflush_dispatch(netmsg_t nmsg) 7526 { 7527 7528 ipfw_flush(0 /* keep default rule */); 7529 ipfw_crossref_reap(); 7530 netisr_replymsg(&nmsg->base, 0); 7531 } 7532 7533 static int 7534 ipfw_fini(void) 7535 { 7536 struct netmsg_base smsg; 7537 int i = 0; 7538 7539 for (;;) { 7540 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7541 ipfw_fflush_dispatch); 7542 netisr_domsg(&smsg, 0); 7543 7544 if (ipfw_gd.ipfw_refcnt == 0) 7545 break; 7546 kprintf("ipfw: flush pending %d\n", ++i); 7547 tsleep(&smsg, 0, "ipfwff", (3 * hz) / 2); 7548 } 7549 7550 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7551 ipfw_fini_dispatch); 7552 return netisr_domsg(&smsg, 0); 7553 } 7554 7555 #endif /* KLD_MODULE */ 7556 7557 static int 7558 ipfw_modevent(module_t mod, int type, void *unused) 7559 { 7560 int err = 0; 7561 7562 switch (type) { 7563 case MOD_LOAD: 7564 err = ipfw_init(); 7565 break; 7566 7567 case MOD_UNLOAD: 7568 #ifndef KLD_MODULE 7569 kprintf("ipfw statically compiled, cannot unload\n"); 7570 err = EBUSY; 7571 #else 7572 err = ipfw_fini(); 7573 #endif 7574 break; 7575 default: 7576 break; 7577 } 7578 return err; 7579 } 7580 7581 static moduledata_t ipfwmod = { 7582 "ipfw", 7583 ipfw_modevent, 7584 0 7585 }; 7586 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY); 7587 MODULE_VERSION(ipfw, 1); 7588