1 /* 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $ 26 */ 27 28 /* 29 * Implement IP packet firewall (new version) 30 */ 31 32 #include "opt_ipfw.h" 33 #include "opt_inet.h" 34 #ifndef INET 35 #error IPFIREWALL requires INET. 36 #endif /* INET */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/kernel.h> 43 #include <sys/proc.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 #include <sys/ucred.h> 49 #include <sys/in_cksum.h> 50 #include <sys/limits.h> 51 #include <sys/lock.h> 52 #include <sys/tree.h> 53 54 #include <net/if.h> 55 #include <net/route.h> 56 #include <net/pfil.h> 57 #include <net/dummynet/ip_dummynet.h> 58 59 #include <sys/thread2.h> 60 #include <sys/mplock2.h> 61 #include <net/netmsg2.h> 62 63 #include <netinet/in.h> 64 #include <netinet/in_systm.h> 65 #include <netinet/in_var.h> 66 #include <netinet/in_pcb.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/tcp.h> 71 #include <netinet/tcp_seq.h> 72 #include <netinet/tcp_timer.h> 73 #include <netinet/tcp_var.h> 74 #include <netinet/tcpip.h> 75 #include <netinet/udp.h> 76 #include <netinet/udp_var.h> 77 #include <netinet/ip_divert.h> 78 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ 79 80 #include <net/ipfw/ip_fw2.h> 81 82 #ifdef IPFIREWALL_DEBUG 83 #define DPRINTF(fmt, ...) \ 84 do { \ 85 if (fw_debug > 0) \ 86 kprintf(fmt, __VA_ARGS__); \ 87 } while (0) 88 #else 89 #define DPRINTF(fmt, ...) ((void)0) 90 #endif 91 92 /* 93 * Description about per-CPU rule duplication: 94 * 95 * Module loading/unloading and all ioctl operations are serialized 96 * by netisr0, so we don't have any ordering or locking problems. 97 * 98 * Following graph shows how operation on per-CPU rule list is 99 * performed [2 CPU case]: 100 * 101 * CPU0 CPU1 102 * 103 * netisr0 <------------------------------------+ 104 * domsg | 105 * : | 106 * :(delete/add...) | 107 * : | 108 * : netmsg | netmsg 109 * forwardmsg---------->netisr1 | 110 * : | 111 * :(delete/add...) | 112 * : | 113 * : | 114 * replymsg--------------+ 115 * 116 * 117 * 118 * Rule structure [2 CPU case] 119 * 120 * CPU0 CPU1 121 * 122 * layer3_chain layer3_chain 123 * | | 124 * V V 125 * +-------+ sibling +-------+ sibling 126 * | rule1 |--------->| rule1 |--------->NULL 127 * +-------+ +-------+ 128 * | | 129 * |next |next 130 * V V 131 * +-------+ sibling +-------+ sibling 132 * | rule2 |--------->| rule2 |--------->NULL 133 * +-------+ +-------+ 134 * 135 * ip_fw.sibling: 136 * 1) Ease statistics calculation during IP_FW_GET. We only need to 137 * iterate layer3_chain in netisr0; the current rule's duplication 138 * to the other CPUs could safely be read-only accessed through 139 * ip_fw.sibling. 140 * 2) Accelerate rule insertion and deletion, e.g. rule insertion: 141 * a) In netisr0 rule3 is determined to be inserted between rule1 142 * and rule2. To make this decision we need to iterate the 143 * layer3_chain in netisr0. The netmsg, which is used to insert 144 * the rule, will contain rule1 in netisr0 as prev_rule and rule2 145 * in netisr0 as next_rule. 146 * b) After the insertion in netisr0 is done, we will move on to 147 * netisr1. But instead of relocating the rule3's position in 148 * netisr1 by iterating the layer3_chain in netisr1, we set the 149 * netmsg's prev_rule to rule1->sibling and next_rule to 150 * rule2->sibling before the netmsg is forwarded to netisr1 from 151 * netisr0. 152 */ 153 154 /* 155 * Description of states and tracks. 156 * 157 * Both states and tracks are stored in per-cpu RB trees instead of 158 * per-cpu hash tables to avoid the worst case hash degeneration. 159 * 160 * The lifetimes of states and tracks are regulated by dyn_*_lifetime, 161 * measured in seconds and depending on the flags. 162 * 163 * When a packet is received, its address fields are first masked with 164 * the mask defined for the rule, then matched against the entries in 165 * the per-cpu state RB tree. States are generated by 'keep-state' 166 * and 'limit' options. 167 * 168 * The max number of states is ipfw_state_max. When we reach the 169 * maximum number of states we do not create anymore. This is done to 170 * avoid consuming too much memory, but also too much time when 171 * searching on each packet. 172 * 173 * Each state holds a pointer to the parent ipfw rule of the current 174 * CPU so we know what action to perform. States are removed when the 175 * parent rule is deleted. XXX we should make them survive. 176 * 177 * There are some limitations with states -- we do not obey the 178 * 'randomized match', and we do not do multiple passes through the 179 * firewall. XXX check the latter!!! 180 * 181 * States grow independently on each CPU, e.g. 2 CPU case: 182 * 183 * CPU0 CPU1 184 * ................... ................... 185 * : state RB tree : : state RB tree : 186 * : : : : 187 * : state1 state2 : : state3 : 188 * : | | : : | : 189 * :.....|....|......: :........|........: 190 * | | | 191 * | | |st_rule 192 * | | | 193 * V V V 194 * +-------+ +-------+ 195 * | rule1 | | rule1 | 196 * +-------+ +-------+ 197 * 198 * Tracks are used to enforce limits on the number of sessions. Tracks 199 * are generated by 'limit' option. 200 * 201 * The max number of tracks is ipfw_track_max. When we reach the 202 * maximum number of tracks we do not create anymore. This is done to 203 * avoid consuming too much memory. 204 * 205 * Tracks are organized into two layers, track counter RB tree is 206 * shared between CPUs, track RB tree is per-cpu. States generated by 207 * 'limit' option are linked to the track in addition to the per-cpu 208 * state RB tree; mainly to ease expiration. e.g. 2 CPU case: 209 * 210 * .............................. 211 * : track counter RB tree : 212 * : : 213 * : +-----------+ : 214 * : | trkcnt1 | : 215 * : | | : 216 * : +--->counter<----+ : 217 * : | | | | : 218 * : | +-----------+ | : 219 * :......|................|....: 220 * | | 221 * CPU0 | | CPU1 222 * ................. |t_count | ................. 223 * : track RB tree : | | : track RB tree : 224 * : : | | : : 225 * : +-->track1-------+ +--------track2 : 226 * : | A : : : 227 * : | | : : : 228 * :.|.....|.......: :...............: 229 * | +----------------+ 230 * | .................... | 231 * | : state RB tree : |st_track 232 * | : : | 233 * +---state1 state2---+ 234 * : | | : 235 * :.....|.......|....: 236 * | | 237 * | |st_rule 238 * V V 239 * +----------+ 240 * | rule1 | 241 * +----------+ 242 */ 243 244 #define IPFW_AUTOINC_STEP_MIN 1 245 #define IPFW_AUTOINC_STEP_MAX 1000 246 #define IPFW_AUTOINC_STEP_DEF 100 247 248 #define IPFW_TABLE_MAX_DEF 64 249 250 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */ 251 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */ 252 253 #define MATCH_REVERSE 0 254 #define MATCH_FORWARD 1 255 #define MATCH_NONE 2 256 #define MATCH_UNKNOWN 3 257 258 #define TIME_LEQ(a, b) ((a) - (b) <= 0) 259 260 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST) 261 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \ 262 (IPFW_STATE_TCPFLAGS << 8)) 263 264 #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 265 #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 266 #define BOTH_RST (TH_RST | (TH_RST << 8)) 267 /* TH_ACK here means FIN was ACKed. */ 268 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8)) 269 270 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \ 271 (((s)->st_state & BOTH_RST) || \ 272 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK)) 273 274 #define O_ANCHOR O_NOP 275 276 #define IPFW_ISXLAT(type) ((type) == O_REDIRECT) 277 #define IPFW_XLAT_INVALID(s) (IPFW_ISXLAT((s)->st_type) && \ 278 ((struct ipfw_xlat *)(s))->xlat_invalid) 279 280 #define IPFW_MBUF_XLATINS FW_MBUF_PRIVATE1 281 #define IPFW_MBUF_XLATFWD FW_MBUF_PRIVATE2 282 283 #define IPFW_XLATE_INSERT 0x0001 284 #define IPFW_XLATE_FORWARD 0x0002 285 #define IPFW_XLATE_OUTPUT 0x0004 286 287 struct netmsg_ipfw { 288 struct netmsg_base base; 289 const struct ipfw_ioc_rule *ioc_rule; 290 struct ip_fw *next_rule; 291 struct ip_fw *prev_rule; 292 struct ip_fw *sibling; 293 uint32_t rule_flags; 294 struct ip_fw **cross_rules; 295 }; 296 297 struct netmsg_del { 298 struct netmsg_base base; 299 struct ip_fw *start_rule; 300 struct ip_fw *prev_rule; 301 uint16_t rulenum; 302 uint8_t from_set; 303 uint8_t to_set; 304 }; 305 306 struct netmsg_zent { 307 struct netmsg_base base; 308 struct ip_fw *start_rule; 309 uint16_t rulenum; 310 uint16_t log_only; 311 }; 312 313 struct netmsg_cpstate { 314 struct netmsg_base base; 315 struct ipfw_ioc_state *ioc_state; 316 int state_cntmax; 317 int state_cnt; 318 }; 319 320 struct netmsg_tblent { 321 struct netmsg_base base; 322 struct sockaddr *key; 323 struct sockaddr *netmask; 324 struct ipfw_tblent *sibling; 325 int tableid; 326 }; 327 328 struct netmsg_tblflush { 329 struct netmsg_base base; 330 int tableid; 331 int destroy; 332 }; 333 334 struct netmsg_tblexp { 335 struct netmsg_base base; 336 time_t expire; 337 int tableid; 338 int cnt; 339 int expcnt; 340 struct radix_node_head *rnh; 341 }; 342 343 struct ipfw_table_cp { 344 struct ipfw_ioc_tblent *te; 345 int te_idx; 346 int te_cnt; 347 }; 348 349 struct ip_fw_local { 350 /* 351 * offset The offset of a fragment. offset != 0 means that 352 * we have a fragment at this offset of an IPv4 packet. 353 * offset == 0 means that (if this is an IPv4 packet) 354 * this is the first or only fragment. 355 */ 356 u_short offset; 357 358 /* 359 * Local copies of addresses. They are only valid if we have 360 * an IP packet. 361 * 362 * proto The protocol. Set to 0 for non-ip packets, 363 * or to the protocol read from the packet otherwise. 364 * proto != 0 means that we have an IPv4 packet. 365 * 366 * src_port, dst_port port numbers, in HOST format. Only 367 * valid for TCP and UDP packets. 368 * 369 * src_ip, dst_ip ip addresses, in NETWORK format. 370 * Only valid for IPv4 packets. 371 */ 372 uint8_t proto; 373 uint16_t src_port; /* NOTE: host format */ 374 uint16_t dst_port; /* NOTE: host format */ 375 struct in_addr src_ip; /* NOTE: network format */ 376 struct in_addr dst_ip; /* NOTE: network format */ 377 uint16_t ip_len; 378 struct tcphdr *tcp; 379 }; 380 381 struct ipfw_addrs { 382 uint32_t addr1; /* host byte order */ 383 uint32_t addr2; /* host byte order */ 384 }; 385 386 struct ipfw_ports { 387 uint16_t port1; /* host byte order */ 388 uint16_t port2; /* host byte order */ 389 }; 390 391 struct ipfw_key { 392 union { 393 struct ipfw_addrs addrs; 394 uint64_t value; 395 } addr_u; 396 union { 397 struct ipfw_ports ports; 398 uint32_t value; 399 } port_u; 400 uint8_t proto; 401 uint8_t swap; /* IPFW_KEY_SWAP_ */ 402 uint16_t rsvd2; 403 }; 404 405 #define IPFW_KEY_SWAP_ADDRS 0x1 406 #define IPFW_KEY_SWAP_PORTS 0x2 407 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS) 408 409 struct ipfw_trkcnt { 410 RB_ENTRY(ipfw_trkcnt) tc_rblink; 411 struct ipfw_key tc_key; 412 uintptr_t tc_ruleid; 413 int tc_refs; 414 int tc_count; 415 time_t tc_expire; /* userland get-only */ 416 uint16_t tc_rulenum; /* userland get-only */ 417 } __cachealign; 418 419 #define tc_addrs tc_key.addr_u.value 420 #define tc_ports tc_key.port_u.value 421 #define tc_proto tc_key.proto 422 #define tc_saddr tc_key.addr_u.addrs.addr1 423 #define tc_daddr tc_key.addr_u.addrs.addr2 424 #define tc_sport tc_key.port_u.ports.port1 425 #define tc_dport tc_key.port_u.ports.port2 426 427 RB_HEAD(ipfw_trkcnt_tree, ipfw_trkcnt); 428 429 struct ipfw_state; 430 431 struct ipfw_track { 432 RB_ENTRY(ipfw_track) t_rblink; 433 struct ipfw_key t_key; 434 struct ip_fw *t_rule; 435 time_t t_lastexp; 436 LIST_HEAD(, ipfw_state) t_state_list; 437 time_t t_expire; 438 volatile int *t_count; 439 struct ipfw_trkcnt *t_trkcnt; 440 TAILQ_ENTRY(ipfw_track) t_link; 441 }; 442 443 #define t_addrs t_key.addr_u.value 444 #define t_ports t_key.port_u.value 445 #define t_proto t_key.proto 446 #define t_saddr t_key.addr_u.addrs.addr1 447 #define t_daddr t_key.addr_u.addrs.addr2 448 #define t_sport t_key.port_u.ports.port1 449 #define t_dport t_key.port_u.ports.port2 450 451 RB_HEAD(ipfw_track_tree, ipfw_track); 452 TAILQ_HEAD(ipfw_track_list, ipfw_track); 453 454 struct ipfw_state { 455 RB_ENTRY(ipfw_state) st_rblink; 456 struct ipfw_key st_key; 457 458 time_t st_expire; /* expire time */ 459 struct ip_fw *st_rule; 460 461 uint64_t st_pcnt; /* packets */ 462 uint64_t st_bcnt; /* bytes */ 463 464 /* 465 * st_state: 466 * State of this rule, typically a combination of TCP flags. 467 * 468 * st_ack_fwd/st_ack_rev: 469 * Most recent ACKs in forward and reverse direction. They 470 * are used to generate keepalives. 471 */ 472 uint32_t st_state; 473 uint32_t st_ack_fwd; /* host byte order */ 474 uint32_t st_seq_fwd; /* host byte order */ 475 uint32_t st_ack_rev; /* host byte order */ 476 uint32_t st_seq_rev; /* host byte order */ 477 478 uint16_t st_flags; /* IPFW_STATE_F_ */ 479 uint16_t st_type; /* KEEP_STATE/LIMIT/RDR */ 480 struct ipfw_track *st_track; 481 482 LIST_ENTRY(ipfw_state) st_trklink; 483 TAILQ_ENTRY(ipfw_state) st_link; 484 }; 485 486 #define st_addrs st_key.addr_u.value 487 #define st_ports st_key.port_u.value 488 #define st_proto st_key.proto 489 #define st_swap st_key.swap 490 491 #define IPFW_STATE_F_ACKFWD 0x0001 492 #define IPFW_STATE_F_SEQFWD 0x0002 493 #define IPFW_STATE_F_ACKREV 0x0004 494 #define IPFW_STATE_F_SEQREV 0x0008 495 #define IPFW_STATE_F_XLATSRC 0x0010 496 #define IPFW_STATE_F_XLATSLAVE 0x0020 497 #define IPFW_STATE_F_LINKED 0x0040 498 499 #define IPFW_STATE_SCANSKIP(s) ((s)->st_type == O_ANCHOR || \ 500 ((s)->st_flags & IPFW_STATE_F_XLATSLAVE)) 501 502 /* Expired or being deleted. */ 503 #define IPFW_STATE_ISDEAD(s) (TIME_LEQ((s)->st_expire, time_uptime) || \ 504 IPFW_XLAT_INVALID((s))) 505 506 TAILQ_HEAD(ipfw_state_list, ipfw_state); 507 RB_HEAD(ipfw_state_tree, ipfw_state); 508 509 struct ipfw_xlat { 510 struct ipfw_state xlat_st; /* MUST be the first field */ 511 uint32_t xlat_addr; /* network byte order */ 512 uint16_t xlat_port; /* network byte order */ 513 uint16_t xlat_dir; /* MATCH_ */ 514 struct ifnet *xlat_ifp; /* matching ifnet */ 515 struct ipfw_xlat *xlat_pair; /* paired state */ 516 int xlat_pcpu; /* paired cpu */ 517 volatile int xlat_invalid; /* invalid, but not dtor yet */ 518 volatile uint64_t xlat_crefs; /* cross references */ 519 struct netmsg_base xlat_freenm; /* for remote free */ 520 }; 521 522 #define xlat_type xlat_st.st_type 523 #define xlat_flags xlat_st.st_flags 524 #define xlat_rule xlat_st.st_rule 525 #define xlat_bcnt xlat_st.st_bcnt 526 #define xlat_pcnt xlat_st.st_pcnt 527 528 struct ipfw_tblent { 529 struct radix_node te_nodes[2]; 530 struct sockaddr_in te_key; 531 u_long te_use; 532 time_t te_lastuse; 533 struct ipfw_tblent *te_sibling; 534 volatile int te_expired; 535 }; 536 537 struct ipfw_context { 538 struct ip_fw *ipfw_layer3_chain; /* rules for layer3 */ 539 struct ip_fw *ipfw_default_rule; /* default rule */ 540 uint64_t ipfw_norule_counter; /* ipfw_log(NULL) stat*/ 541 542 /* 543 * ipfw_set_disable contains one bit per set value (0..31). 544 * If the bit is set, all rules with the corresponding set 545 * are disabled. Set IPDW_DEFAULT_SET is reserved for the 546 * default rule and CANNOT be disabled. 547 */ 548 uint32_t ipfw_set_disable; 549 550 uint8_t ipfw_flags; /* IPFW_FLAG_ */ 551 552 struct ip_fw *ipfw_cont_rule; 553 struct ipfw_xlat *ipfw_cont_xlat; 554 555 struct ipfw_state_tree ipfw_state_tree; 556 struct ipfw_state_list ipfw_state_list; 557 int ipfw_state_loosecnt; 558 int ipfw_state_cnt; 559 560 union { 561 struct ipfw_state state; 562 struct ipfw_track track; 563 struct ipfw_trkcnt trkcnt; 564 } ipfw_tmpkey; 565 566 struct ipfw_track_tree ipfw_track_tree; 567 struct ipfw_track_list ipfw_track_list; 568 struct ipfw_trkcnt *ipfw_trkcnt_spare; 569 570 struct callout ipfw_stateto_ch; 571 time_t ipfw_state_lastexp; 572 struct netmsg_base ipfw_stateexp_nm; 573 struct netmsg_base ipfw_stateexp_more; 574 struct ipfw_state ipfw_stateexp_anch; 575 576 struct callout ipfw_trackto_ch; 577 time_t ipfw_track_lastexp; 578 struct netmsg_base ipfw_trackexp_nm; 579 struct netmsg_base ipfw_trackexp_more; 580 struct ipfw_track ipfw_trackexp_anch; 581 582 struct callout ipfw_keepalive_ch; 583 struct netmsg_base ipfw_keepalive_nm; 584 struct netmsg_base ipfw_keepalive_more; 585 struct ipfw_state ipfw_keepalive_anch; 586 587 struct callout ipfw_xlatreap_ch; 588 struct netmsg_base ipfw_xlatreap_nm; 589 struct ipfw_state_list ipfw_xlatreap; 590 591 /* 592 * Statistics 593 */ 594 u_long ipfw_sts_reap; 595 u_long ipfw_sts_reapfailed; 596 u_long ipfw_sts_overflow; 597 u_long ipfw_sts_nomem; 598 u_long ipfw_sts_tcprecycled; 599 600 u_long ipfw_tks_nomem; 601 u_long ipfw_tks_reap; 602 u_long ipfw_tks_reapfailed; 603 u_long ipfw_tks_overflow; 604 u_long ipfw_tks_cntnomem; 605 606 u_long ipfw_frags; 607 u_long ipfw_defraged; 608 u_long ipfw_defrag_remote; 609 610 u_long ipfw_xlated; 611 u_long ipfw_xlate_split; 612 u_long ipfw_xlate_conflicts; 613 u_long ipfw_xlate_cresolved; 614 615 /* Last field */ 616 struct radix_node_head *ipfw_tables[]; 617 }; 618 619 #define IPFW_FLAG_KEEPALIVE 0x01 620 #define IPFW_FLAG_STATEEXP 0x02 621 #define IPFW_FLAG_TRACKEXP 0x04 622 #define IPFW_FLAG_STATEREAP 0x08 623 #define IPFW_FLAG_TRACKREAP 0x10 624 625 #define ipfw_state_tmpkey ipfw_tmpkey.state 626 #define ipfw_track_tmpkey ipfw_tmpkey.track 627 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt 628 629 struct ipfw_global { 630 int ipfw_state_loosecnt; /* cache aligned */ 631 time_t ipfw_state_globexp __cachealign; 632 633 struct lwkt_token ipfw_trkcnt_token __cachealign; 634 struct ipfw_trkcnt_tree ipfw_trkcnt_tree; 635 int ipfw_trkcnt_cnt; 636 time_t ipfw_track_globexp; 637 638 /* Accessed in netisr0. */ 639 struct ip_fw *ipfw_crossref_free __cachealign; 640 struct callout ipfw_crossref_ch; 641 struct netmsg_base ipfw_crossref_nm; 642 643 #ifdef KLD_MODULE 644 /* 645 * Module can not be unloaded, if there are references to 646 * certains rules of ipfw(4), e.g. dummynet(4) 647 */ 648 int ipfw_refcnt __cachealign; 649 #endif 650 } __cachealign; 651 652 static struct ipfw_context *ipfw_ctx[MAXCPU]; 653 654 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); 655 656 /* 657 * Following two global variables are accessed and updated only 658 * in netisr0. 659 */ 660 static uint32_t static_count; /* # of static rules */ 661 static uint32_t static_ioc_len; /* bytes of static rules */ 662 663 /* 664 * If 1, then ipfw static rules are being flushed, 665 * ipfw_chk() will skip to the default rule. 666 */ 667 static int ipfw_flushing; 668 669 static int fw_verbose; 670 static int verbose_limit; 671 672 static int fw_debug; 673 static int autoinc_step = IPFW_AUTOINC_STEP_DEF; 674 675 static int ipfw_table_max = IPFW_TABLE_MAX_DEF; 676 677 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS); 678 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS); 679 680 TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max); 681 682 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 683 SYSCTL_NODE(_net_inet_ip_fw, OID_AUTO, stats, CTLFLAG_RW, 0, 684 "Firewall statistics"); 685 686 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, 687 &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw"); 688 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW, 689 &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I", 690 "Rule number autincrement step"); 691 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, 692 &fw_one_pass, 0, 693 "Only do a single pass through ipfw when using dummynet(4)"); 694 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 695 &fw_debug, 0, "Enable printing of debug ip_fw statements"); 696 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, 697 &fw_verbose, 0, "Log matches to ipfw rules"); 698 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 699 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); 700 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, table_max, CTLFLAG_RD, 701 &ipfw_table_max, 0, "Max # of tables"); 702 703 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS); 704 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS); 705 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS); 706 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS); 707 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS); 708 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS); 709 710 /* 711 * Timeouts for various events in handing states. 712 * 713 * NOTE: 714 * 1 == 0~1 second. 715 * 2 == 1~2 second(s). 716 * 717 * We use 2 seconds for FIN lifetime, so that the states will not be 718 * ripped prematurely. 719 */ 720 static uint32_t dyn_ack_lifetime = 300; 721 static uint32_t dyn_syn_lifetime = 20; 722 static uint32_t dyn_finwait_lifetime = 20; 723 static uint32_t dyn_fin_lifetime = 2; 724 static uint32_t dyn_rst_lifetime = 2; 725 static uint32_t dyn_udp_lifetime = 10; 726 static uint32_t dyn_short_lifetime = 5; /* used by tracks too */ 727 728 /* 729 * Keepalives are sent if dyn_keepalive is set. They are sent every 730 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 731 * seconds of lifetime of a rule. 732 */ 733 static uint32_t dyn_keepalive_interval = 20; 734 static uint32_t dyn_keepalive_period = 5; 735 static uint32_t dyn_keepalive = 1; /* do send keepalives */ 736 737 static struct ipfw_global ipfw_gd; 738 static int ipfw_state_loosecnt_updthr; 739 static int ipfw_state_max = 4096; /* max # of states */ 740 static int ipfw_track_max = 4096; /* max # of tracks */ 741 742 static int ipfw_state_headroom; /* setup at module load time */ 743 static int ipfw_state_reap_min = 8; 744 static int ipfw_state_expire_max = 32; 745 static int ipfw_state_scan_max = 256; 746 static int ipfw_keepalive_max = 8; 747 static int ipfw_track_reap_max = 4; 748 static int ipfw_track_expire_max = 16; 749 static int ipfw_track_scan_max = 128; 750 751 static eventhandler_tag ipfw_ifaddr_event; 752 753 /* Compat */ 754 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count, 755 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_dyncnt, "I", 756 "Number of states and tracks"); 757 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, 758 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_dynmax, "I", 759 "Max number of states and tracks"); 760 761 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_cnt, 762 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_statecnt, "I", 763 "Number of states"); 764 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_max, 765 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_statemax, "I", 766 "Max number of states"); 767 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, state_headroom, CTLFLAG_RW, 768 &ipfw_state_headroom, 0, "headroom for state reap"); 769 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_cnt, CTLFLAG_RD, 770 &ipfw_gd.ipfw_trkcnt_cnt, 0, "Number of tracks"); 771 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_max, CTLFLAG_RW, 772 &ipfw_track_max, 0, "Max number of tracks"); 773 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, 774 &static_count, 0, "Number of static rules"); 775 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, 776 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); 777 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, 778 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); 779 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, 780 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); 781 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_finwait_lifetime, CTLFLAG_RW, 782 &dyn_finwait_lifetime, 0, "Lifetime of dyn. rules for fin wait"); 783 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, 784 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); 785 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, 786 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); 787 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, 788 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); 789 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, 790 &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); 791 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_scan_max, 792 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_scan_max, 0, ipfw_sysctl_scancnt, 793 "I", "# of states to scan for each expire iteration"); 794 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_expire_max, 795 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_expire_max, 0, ipfw_sysctl_scancnt, 796 "I", "# of states to expire for each expire iteration"); 797 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, keepalive_max, 798 CTLTYPE_INT | CTLFLAG_RW, &ipfw_keepalive_max, 0, ipfw_sysctl_scancnt, 799 "I", "# of states to expire for each expire iteration"); 800 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_reap_min, 801 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_reap_min, 0, ipfw_sysctl_scancnt, 802 "I", "# of states to reap for state shortage"); 803 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_scan_max, 804 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_scan_max, 0, ipfw_sysctl_scancnt, 805 "I", "# of tracks to scan for each expire iteration"); 806 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_expire_max, 807 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_expire_max, 0, ipfw_sysctl_scancnt, 808 "I", "# of tracks to expire for each expire iteration"); 809 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_reap_max, 810 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_reap_max, 0, ipfw_sysctl_scancnt, 811 "I", "# of tracks to reap for track shortage"); 812 813 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reap, 814 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 815 __offsetof(struct ipfw_context, ipfw_sts_reap), ipfw_sysctl_stat, 816 "LU", "# of state reaps due to states shortage"); 817 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reapfailed, 818 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 819 __offsetof(struct ipfw_context, ipfw_sts_reapfailed), ipfw_sysctl_stat, 820 "LU", "# of state reap failure"); 821 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_overflow, 822 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 823 __offsetof(struct ipfw_context, ipfw_sts_overflow), ipfw_sysctl_stat, 824 "LU", "# of state overflow"); 825 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_nomem, 826 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 827 __offsetof(struct ipfw_context, ipfw_sts_nomem), ipfw_sysctl_stat, 828 "LU", "# of state allocation failure"); 829 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_tcprecycled, 830 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 831 __offsetof(struct ipfw_context, ipfw_sts_tcprecycled), ipfw_sysctl_stat, 832 "LU", "# of state deleted due to fast TCP port recycling"); 833 834 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_nomem, 835 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 836 __offsetof(struct ipfw_context, ipfw_tks_nomem), ipfw_sysctl_stat, 837 "LU", "# of track allocation failure"); 838 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reap, 839 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 840 __offsetof(struct ipfw_context, ipfw_tks_reap), ipfw_sysctl_stat, 841 "LU", "# of track reap due to tracks shortage"); 842 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reapfailed, 843 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 844 __offsetof(struct ipfw_context, ipfw_tks_reapfailed), ipfw_sysctl_stat, 845 "LU", "# of track reap failure"); 846 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_overflow, 847 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 848 __offsetof(struct ipfw_context, ipfw_tks_overflow), ipfw_sysctl_stat, 849 "LU", "# of track overflow"); 850 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_cntnomem, 851 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 852 __offsetof(struct ipfw_context, ipfw_tks_cntnomem), ipfw_sysctl_stat, 853 "LU", "# of track counter allocation failure"); 854 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, frags, 855 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 856 __offsetof(struct ipfw_context, ipfw_frags), ipfw_sysctl_stat, 857 "LU", "# of IP fragements defraged"); 858 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defraged, 859 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 860 __offsetof(struct ipfw_context, ipfw_defraged), ipfw_sysctl_stat, 861 "LU", "# of IP packets after defrag"); 862 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defrag_remote, 863 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 864 __offsetof(struct ipfw_context, ipfw_defrag_remote), ipfw_sysctl_stat, 865 "LU", "# of IP packets after defrag dispatched to remote cpus"); 866 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlated, 867 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 868 __offsetof(struct ipfw_context, ipfw_xlated), ipfw_sysctl_stat, 869 "LU", "# address/port translations"); 870 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_split, 871 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 872 __offsetof(struct ipfw_context, ipfw_xlate_split), ipfw_sysctl_stat, 873 "LU", "# address/port translations split between different cpus"); 874 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_conflicts, 875 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 876 __offsetof(struct ipfw_context, ipfw_xlate_conflicts), ipfw_sysctl_stat, 877 "LU", "# address/port translations conflicts on remote cpu"); 878 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, xlate_cresolved, 879 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 880 __offsetof(struct ipfw_context, ipfw_xlate_cresolved), ipfw_sysctl_stat, 881 "LU", "# address/port translations conflicts resolved on remote cpu"); 882 883 static int ipfw_state_cmp(struct ipfw_state *, 884 struct ipfw_state *); 885 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt *, 886 struct ipfw_trkcnt *); 887 static int ipfw_track_cmp(struct ipfw_track *, 888 struct ipfw_track *); 889 890 RB_PROTOTYPE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp); 891 RB_GENERATE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp); 892 893 RB_PROTOTYPE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp); 894 RB_GENERATE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp); 895 896 RB_PROTOTYPE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp); 897 RB_GENERATE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp); 898 899 static int ipfw_chk(struct ip_fw_args *); 900 static void ipfw_track_expire_ipifunc(void *); 901 static void ipfw_state_expire_ipifunc(void *); 902 static void ipfw_keepalive(void *); 903 static int ipfw_state_expire_start(struct ipfw_context *, 904 int, int); 905 static void ipfw_crossref_timeo(void *); 906 static void ipfw_state_remove(struct ipfw_context *, 907 struct ipfw_state *); 908 static void ipfw_xlat_reap_timeo(void *); 909 static void ipfw_defrag_redispatch(struct mbuf *, int, 910 struct ip_fw *); 911 912 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token) 913 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token) 914 #define IPFW_TRKCNT_TOKINIT \ 915 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt"); 916 917 static void 918 sa_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 919 const struct sockaddr *netmask) 920 { 921 const u_char *cp1 = (const u_char *)src; 922 u_char *cp2 = (u_char *)dst; 923 const u_char *cp3 = (const u_char *)netmask; 924 u_char *cplim = cp2 + *cp3; 925 u_char *cplim2 = cp2 + *cp1; 926 927 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 928 cp3 += 2; 929 if (cplim > cplim2) 930 cplim = cplim2; 931 while (cp2 < cplim) 932 *cp2++ = *cp1++ & *cp3++; 933 if (cp2 < cplim2) 934 bzero(cp2, cplim2 - cp2); 935 } 936 937 static __inline uint16_t 938 pfil_cksum_fixup(uint16_t cksum, uint16_t old, uint16_t new, uint8_t udp) 939 { 940 uint32_t l; 941 942 if (udp && !cksum) 943 return (0x0000); 944 l = cksum + old - new; 945 l = (l >> 16) + (l & 65535); 946 l = l & 65535; 947 if (udp && !l) 948 return (0xFFFF); 949 return (l); 950 } 951 952 static __inline void 953 ipfw_key_build(struct ipfw_key *key, in_addr_t saddr, uint16_t sport, 954 in_addr_t daddr, uint16_t dport, uint8_t proto) 955 { 956 957 key->proto = proto; 958 key->swap = 0; 959 960 if (saddr < daddr) { 961 key->addr_u.addrs.addr1 = daddr; 962 key->addr_u.addrs.addr2 = saddr; 963 key->swap |= IPFW_KEY_SWAP_ADDRS; 964 } else { 965 key->addr_u.addrs.addr1 = saddr; 966 key->addr_u.addrs.addr2 = daddr; 967 } 968 969 if (sport < dport) { 970 key->port_u.ports.port1 = dport; 971 key->port_u.ports.port2 = sport; 972 key->swap |= IPFW_KEY_SWAP_PORTS; 973 } else { 974 key->port_u.ports.port1 = sport; 975 key->port_u.ports.port2 = dport; 976 } 977 978 if (sport == dport && (key->swap & IPFW_KEY_SWAP_ADDRS)) 979 key->swap |= IPFW_KEY_SWAP_PORTS; 980 if (saddr == daddr && (key->swap & IPFW_KEY_SWAP_PORTS)) 981 key->swap |= IPFW_KEY_SWAP_ADDRS; 982 } 983 984 static __inline void 985 ipfw_key_4tuple(const struct ipfw_key *key, in_addr_t *saddr, uint16_t *sport, 986 in_addr_t *daddr, uint16_t *dport) 987 { 988 989 if (key->swap & IPFW_KEY_SWAP_ADDRS) { 990 *saddr = key->addr_u.addrs.addr2; 991 *daddr = key->addr_u.addrs.addr1; 992 } else { 993 *saddr = key->addr_u.addrs.addr1; 994 *daddr = key->addr_u.addrs.addr2; 995 } 996 997 if (key->swap & IPFW_KEY_SWAP_PORTS) { 998 *sport = key->port_u.ports.port2; 999 *dport = key->port_u.ports.port1; 1000 } else { 1001 *sport = key->port_u.ports.port1; 1002 *dport = key->port_u.ports.port2; 1003 } 1004 } 1005 1006 static int 1007 ipfw_state_cmp(struct ipfw_state *s1, struct ipfw_state *s2) 1008 { 1009 1010 if (s1->st_proto > s2->st_proto) 1011 return (1); 1012 if (s1->st_proto < s2->st_proto) 1013 return (-1); 1014 1015 if (s1->st_addrs > s2->st_addrs) 1016 return (1); 1017 if (s1->st_addrs < s2->st_addrs) 1018 return (-1); 1019 1020 if (s1->st_ports > s2->st_ports) 1021 return (1); 1022 if (s1->st_ports < s2->st_ports) 1023 return (-1); 1024 1025 if (s1->st_swap == s2->st_swap || 1026 (s1->st_swap ^ s2->st_swap) == IPFW_KEY_SWAP_ALL) 1027 return (0); 1028 1029 if (s1->st_swap > s2->st_swap) 1030 return (1); 1031 else 1032 return (-1); 1033 } 1034 1035 static int 1036 ipfw_trkcnt_cmp(struct ipfw_trkcnt *t1, struct ipfw_trkcnt *t2) 1037 { 1038 1039 if (t1->tc_proto > t2->tc_proto) 1040 return (1); 1041 if (t1->tc_proto < t2->tc_proto) 1042 return (-1); 1043 1044 if (t1->tc_addrs > t2->tc_addrs) 1045 return (1); 1046 if (t1->tc_addrs < t2->tc_addrs) 1047 return (-1); 1048 1049 if (t1->tc_ports > t2->tc_ports) 1050 return (1); 1051 if (t1->tc_ports < t2->tc_ports) 1052 return (-1); 1053 1054 if (t1->tc_ruleid > t2->tc_ruleid) 1055 return (1); 1056 if (t1->tc_ruleid < t2->tc_ruleid) 1057 return (-1); 1058 1059 return (0); 1060 } 1061 1062 static int 1063 ipfw_track_cmp(struct ipfw_track *t1, struct ipfw_track *t2) 1064 { 1065 1066 if (t1->t_proto > t2->t_proto) 1067 return (1); 1068 if (t1->t_proto < t2->t_proto) 1069 return (-1); 1070 1071 if (t1->t_addrs > t2->t_addrs) 1072 return (1); 1073 if (t1->t_addrs < t2->t_addrs) 1074 return (-1); 1075 1076 if (t1->t_ports > t2->t_ports) 1077 return (1); 1078 if (t1->t_ports < t2->t_ports) 1079 return (-1); 1080 1081 if ((uintptr_t)t1->t_rule > (uintptr_t)t2->t_rule) 1082 return (1); 1083 if ((uintptr_t)t1->t_rule < (uintptr_t)t2->t_rule) 1084 return (-1); 1085 1086 return (0); 1087 } 1088 1089 static __inline struct ipfw_state * 1090 ipfw_state_link(struct ipfw_context *ctx, struct ipfw_state *s) 1091 { 1092 struct ipfw_state *dup; 1093 1094 KASSERT((s->st_flags & IPFW_STATE_F_LINKED) == 0, 1095 ("state %p was linked", s)); 1096 dup = RB_INSERT(ipfw_state_tree, &ctx->ipfw_state_tree, s); 1097 if (dup == NULL) { 1098 TAILQ_INSERT_TAIL(&ctx->ipfw_state_list, s, st_link); 1099 s->st_flags |= IPFW_STATE_F_LINKED; 1100 } 1101 return (dup); 1102 } 1103 1104 static __inline void 1105 ipfw_state_unlink(struct ipfw_context *ctx, struct ipfw_state *s) 1106 { 1107 1108 KASSERT(s->st_flags & IPFW_STATE_F_LINKED, 1109 ("state %p was not linked", s)); 1110 RB_REMOVE(ipfw_state_tree, &ctx->ipfw_state_tree, s); 1111 TAILQ_REMOVE(&ctx->ipfw_state_list, s, st_link); 1112 s->st_flags &= ~IPFW_STATE_F_LINKED; 1113 } 1114 1115 static void 1116 ipfw_state_max_set(int state_max) 1117 { 1118 1119 ipfw_state_max = state_max; 1120 /* Allow 5% states over-allocation. */ 1121 ipfw_state_loosecnt_updthr = (state_max / 20) / netisr_ncpus; 1122 } 1123 1124 static __inline int 1125 ipfw_state_cntcoll(void) 1126 { 1127 int cpu, state_cnt = 0; 1128 1129 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 1130 state_cnt += ipfw_ctx[cpu]->ipfw_state_cnt; 1131 return (state_cnt); 1132 } 1133 1134 static __inline int 1135 ipfw_state_cntsync(void) 1136 { 1137 int state_cnt; 1138 1139 state_cnt = ipfw_state_cntcoll(); 1140 ipfw_gd.ipfw_state_loosecnt = state_cnt; 1141 return (state_cnt); 1142 } 1143 1144 static __inline int 1145 ipfw_free_rule(struct ip_fw *rule) 1146 { 1147 KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d", mycpuid)); 1148 KASSERT(rule->refcnt > 0, ("invalid refcnt %u", rule->refcnt)); 1149 rule->refcnt--; 1150 if (rule->refcnt == 0) { 1151 if (rule->cross_rules != NULL) 1152 kfree(rule->cross_rules, M_IPFW); 1153 kfree(rule, M_IPFW); 1154 return 1; 1155 } 1156 return 0; 1157 } 1158 1159 static void 1160 ipfw_unref_rule(void *priv) 1161 { 1162 ipfw_free_rule(priv); 1163 #ifdef KLD_MODULE 1164 KASSERT(ipfw_gd.ipfw_refcnt > 0, 1165 ("invalid ipfw_refcnt %d", ipfw_gd.ipfw_refcnt)); 1166 atomic_subtract_int(&ipfw_gd.ipfw_refcnt, 1); 1167 #endif 1168 } 1169 1170 static __inline void 1171 ipfw_ref_rule(struct ip_fw *rule) 1172 { 1173 KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d", mycpuid)); 1174 #ifdef KLD_MODULE 1175 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1); 1176 #endif 1177 rule->refcnt++; 1178 } 1179 1180 /* 1181 * This macro maps an ip pointer into a layer3 header pointer of type T 1182 */ 1183 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl)) 1184 1185 static __inline int 1186 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) 1187 { 1188 int type = L3HDR(struct icmp,ip)->icmp_type; 1189 1190 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1 << type))); 1191 } 1192 1193 #define TT ((1 << ICMP_ECHO) | \ 1194 (1 << ICMP_ROUTERSOLICIT) | \ 1195 (1 << ICMP_TSTAMP) | \ 1196 (1 << ICMP_IREQ) | \ 1197 (1 << ICMP_MASKREQ)) 1198 1199 static int 1200 is_icmp_query(struct ip *ip) 1201 { 1202 int type = L3HDR(struct icmp, ip)->icmp_type; 1203 1204 return (type <= ICMP_MAXTYPE && (TT & (1 << type))); 1205 } 1206 1207 #undef TT 1208 1209 /* 1210 * The following checks use two arrays of 8 or 16 bits to store the 1211 * bits that we want set or clear, respectively. They are in the 1212 * low and high half of cmd->arg1 or cmd->d[0]. 1213 * 1214 * We scan options and store the bits we find set. We succeed if 1215 * 1216 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 1217 * 1218 * The code is sometimes optimized not to store additional variables. 1219 */ 1220 static int 1221 flags_match(ipfw_insn *cmd, uint8_t bits) 1222 { 1223 u_char want_clear; 1224 bits = ~bits; 1225 1226 if (((cmd->arg1 & 0xff) & bits) != 0) 1227 return 0; /* some bits we want set were clear */ 1228 1229 want_clear = (cmd->arg1 >> 8) & 0xff; 1230 if ((want_clear & bits) != want_clear) 1231 return 0; /* some bits we want clear were set */ 1232 return 1; 1233 } 1234 1235 static int 1236 ipopts_match(struct ip *ip, ipfw_insn *cmd) 1237 { 1238 int optlen, bits = 0; 1239 u_char *cp = (u_char *)(ip + 1); 1240 int x = (ip->ip_hl << 2) - sizeof(struct ip); 1241 1242 for (; x > 0; x -= optlen, cp += optlen) { 1243 int opt = cp[IPOPT_OPTVAL]; 1244 1245 if (opt == IPOPT_EOL) 1246 break; 1247 1248 if (opt == IPOPT_NOP) { 1249 optlen = 1; 1250 } else { 1251 optlen = cp[IPOPT_OLEN]; 1252 if (optlen <= 0 || optlen > x) 1253 return 0; /* invalid or truncated */ 1254 } 1255 1256 switch (opt) { 1257 case IPOPT_LSRR: 1258 bits |= IP_FW_IPOPT_LSRR; 1259 break; 1260 1261 case IPOPT_SSRR: 1262 bits |= IP_FW_IPOPT_SSRR; 1263 break; 1264 1265 case IPOPT_RR: 1266 bits |= IP_FW_IPOPT_RR; 1267 break; 1268 1269 case IPOPT_TS: 1270 bits |= IP_FW_IPOPT_TS; 1271 break; 1272 1273 default: 1274 break; 1275 } 1276 } 1277 return (flags_match(cmd, bits)); 1278 } 1279 1280 static int 1281 tcpopts_match(struct ip *ip, ipfw_insn *cmd) 1282 { 1283 int optlen, bits = 0; 1284 struct tcphdr *tcp = L3HDR(struct tcphdr,ip); 1285 u_char *cp = (u_char *)(tcp + 1); 1286 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 1287 1288 for (; x > 0; x -= optlen, cp += optlen) { 1289 int opt = cp[0]; 1290 1291 if (opt == TCPOPT_EOL) 1292 break; 1293 1294 if (opt == TCPOPT_NOP) { 1295 optlen = 1; 1296 } else { 1297 optlen = cp[1]; 1298 if (optlen <= 0) 1299 break; 1300 } 1301 1302 switch (opt) { 1303 case TCPOPT_MAXSEG: 1304 bits |= IP_FW_TCPOPT_MSS; 1305 break; 1306 1307 case TCPOPT_WINDOW: 1308 bits |= IP_FW_TCPOPT_WINDOW; 1309 break; 1310 1311 case TCPOPT_SACK_PERMITTED: 1312 case TCPOPT_SACK: 1313 bits |= IP_FW_TCPOPT_SACK; 1314 break; 1315 1316 case TCPOPT_TIMESTAMP: 1317 bits |= IP_FW_TCPOPT_TS; 1318 break; 1319 1320 case TCPOPT_CC: 1321 case TCPOPT_CCNEW: 1322 case TCPOPT_CCECHO: 1323 bits |= IP_FW_TCPOPT_CC; 1324 break; 1325 1326 default: 1327 break; 1328 } 1329 } 1330 return (flags_match(cmd, bits)); 1331 } 1332 1333 static int 1334 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) 1335 { 1336 if (ifp == NULL) /* no iface with this packet, match fails */ 1337 return 0; 1338 1339 /* Check by name or by IP address */ 1340 if (cmd->name[0] != '\0') { /* match by name */ 1341 /* Check name */ 1342 if (cmd->p.glob) { 1343 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0) 1344 return(1); 1345 } else { 1346 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 1347 return(1); 1348 } 1349 } else { 1350 struct ifaddr_container *ifac; 1351 1352 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1353 struct ifaddr *ia = ifac->ifa; 1354 1355 if (ia->ifa_addr == NULL) 1356 continue; 1357 if (ia->ifa_addr->sa_family != AF_INET) 1358 continue; 1359 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 1360 (ia->ifa_addr))->sin_addr.s_addr) 1361 return(1); /* match */ 1362 } 1363 } 1364 return(0); /* no match, fail ... */ 1365 } 1366 1367 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 1368 1369 /* 1370 * We enter here when we have a rule with O_LOG. 1371 * XXX this function alone takes about 2Kbytes of code! 1372 */ 1373 static void 1374 ipfw_log(struct ipfw_context *ctx, struct ip_fw *f, u_int hlen, 1375 struct ether_header *eh, struct mbuf *m, struct ifnet *oif) 1376 { 1377 char *action; 1378 int limit_reached = 0; 1379 char action2[40], proto[48], fragment[28], abuf[INET_ADDRSTRLEN]; 1380 1381 fragment[0] = '\0'; 1382 proto[0] = '\0'; 1383 1384 if (f == NULL) { /* bogus pkt */ 1385 if (verbose_limit != 0 && 1386 ctx->ipfw_norule_counter >= verbose_limit) 1387 return; 1388 ctx->ipfw_norule_counter++; 1389 if (ctx->ipfw_norule_counter == verbose_limit) 1390 limit_reached = verbose_limit; 1391 action = "Refuse"; 1392 } else { /* O_LOG is the first action, find the real one */ 1393 ipfw_insn *cmd = ACTION_PTR(f); 1394 ipfw_insn_log *l = (ipfw_insn_log *)cmd; 1395 1396 if (l->max_log != 0 && l->log_left == 0) 1397 return; 1398 l->log_left--; 1399 if (l->log_left == 0) 1400 limit_reached = l->max_log; 1401 cmd += F_LEN(cmd); /* point to first action */ 1402 if (cmd->opcode == O_PROB) 1403 cmd += F_LEN(cmd); 1404 1405 action = action2; 1406 switch (cmd->opcode) { 1407 case O_DENY: 1408 action = "Deny"; 1409 break; 1410 1411 case O_REJECT: 1412 if (cmd->arg1==ICMP_REJECT_RST) { 1413 action = "Reset"; 1414 } else if (cmd->arg1==ICMP_UNREACH_HOST) { 1415 action = "Reject"; 1416 } else { 1417 ksnprintf(SNPARGS(action2, 0), "Unreach %d", 1418 cmd->arg1); 1419 } 1420 break; 1421 1422 case O_ACCEPT: 1423 action = "Accept"; 1424 break; 1425 1426 case O_COUNT: 1427 action = "Count"; 1428 break; 1429 1430 case O_DIVERT: 1431 ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1); 1432 break; 1433 1434 case O_TEE: 1435 ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1); 1436 break; 1437 1438 case O_SKIPTO: 1439 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1); 1440 break; 1441 1442 case O_PIPE: 1443 ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1); 1444 break; 1445 1446 case O_QUEUE: 1447 ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1); 1448 break; 1449 1450 case O_FORWARD_IP: 1451 { 1452 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; 1453 int len; 1454 1455 len = ksnprintf(SNPARGS(action2, 0), 1456 "Forward to %s", 1457 kinet_ntoa(sa->sa.sin_addr, abuf)); 1458 if (sa->sa.sin_port) { 1459 ksnprintf(SNPARGS(action2, len), ":%d", 1460 sa->sa.sin_port); 1461 } 1462 } 1463 break; 1464 1465 default: 1466 action = "UNKNOWN"; 1467 break; 1468 } 1469 } 1470 1471 if (hlen == 0) { /* non-ip */ 1472 ksnprintf(SNPARGS(proto, 0), "MAC"); 1473 } else { 1474 struct ip *ip = mtod(m, struct ip *); 1475 /* these three are all aliases to the same thing */ 1476 struct icmp *const icmp = L3HDR(struct icmp, ip); 1477 struct tcphdr *const tcp = (struct tcphdr *)icmp; 1478 struct udphdr *const udp = (struct udphdr *)icmp; 1479 1480 int ip_off, offset, ip_len; 1481 int len; 1482 1483 if (eh != NULL) { /* layer 2 packets are as on the wire */ 1484 ip_off = ntohs(ip->ip_off); 1485 ip_len = ntohs(ip->ip_len); 1486 } else { 1487 ip_off = ip->ip_off; 1488 ip_len = ip->ip_len; 1489 } 1490 offset = ip_off & IP_OFFMASK; 1491 switch (ip->ip_p) { 1492 case IPPROTO_TCP: 1493 len = ksnprintf(SNPARGS(proto, 0), "TCP %s", 1494 kinet_ntoa(ip->ip_src, abuf)); 1495 if (offset == 0) { 1496 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 1497 ntohs(tcp->th_sport), 1498 kinet_ntoa(ip->ip_dst, abuf), 1499 ntohs(tcp->th_dport)); 1500 } else { 1501 ksnprintf(SNPARGS(proto, len), " %s", 1502 kinet_ntoa(ip->ip_dst, abuf)); 1503 } 1504 break; 1505 1506 case IPPROTO_UDP: 1507 len = ksnprintf(SNPARGS(proto, 0), "UDP %s", 1508 kinet_ntoa(ip->ip_src, abuf)); 1509 if (offset == 0) { 1510 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 1511 ntohs(udp->uh_sport), 1512 kinet_ntoa(ip->ip_dst, abuf), 1513 ntohs(udp->uh_dport)); 1514 } else { 1515 ksnprintf(SNPARGS(proto, len), " %s", 1516 kinet_ntoa(ip->ip_dst, abuf)); 1517 } 1518 break; 1519 1520 case IPPROTO_ICMP: 1521 if (offset == 0) { 1522 len = ksnprintf(SNPARGS(proto, 0), 1523 "ICMP:%u.%u ", 1524 icmp->icmp_type, 1525 icmp->icmp_code); 1526 } else { 1527 len = ksnprintf(SNPARGS(proto, 0), "ICMP "); 1528 } 1529 len += ksnprintf(SNPARGS(proto, len), "%s", 1530 kinet_ntoa(ip->ip_src, abuf)); 1531 ksnprintf(SNPARGS(proto, len), " %s", 1532 kinet_ntoa(ip->ip_dst, abuf)); 1533 break; 1534 1535 default: 1536 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, 1537 kinet_ntoa(ip->ip_src, abuf)); 1538 ksnprintf(SNPARGS(proto, len), " %s", 1539 kinet_ntoa(ip->ip_dst, abuf)); 1540 break; 1541 } 1542 1543 if (ip_off & (IP_MF | IP_OFFMASK)) { 1544 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", 1545 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), 1546 offset << 3, (ip_off & IP_MF) ? "+" : ""); 1547 } 1548 } 1549 1550 if (oif || m->m_pkthdr.rcvif) { 1551 log(LOG_SECURITY | LOG_INFO, 1552 "ipfw: %d %s %s %s via %s%s\n", 1553 f ? f->rulenum : -1, 1554 action, proto, oif ? "out" : "in", 1555 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, 1556 fragment); 1557 } else { 1558 log(LOG_SECURITY | LOG_INFO, 1559 "ipfw: %d %s %s [no if info]%s\n", 1560 f ? f->rulenum : -1, 1561 action, proto, fragment); 1562 } 1563 1564 if (limit_reached) { 1565 log(LOG_SECURITY | LOG_NOTICE, 1566 "ipfw: limit %d reached on entry %d\n", 1567 limit_reached, f ? f->rulenum : -1); 1568 } 1569 } 1570 1571 #undef SNPARGS 1572 1573 static void 1574 ipfw_xlat_reap(struct ipfw_xlat *x, struct ipfw_xlat *slave_x) 1575 { 1576 struct ip_fw *rule = slave_x->xlat_rule; 1577 1578 KKASSERT(rule->cpuid == mycpuid); 1579 1580 /* No more cross references; free this pair now. */ 1581 kfree(x, M_IPFW); 1582 kfree(slave_x, M_IPFW); 1583 1584 /* See the comment in ipfw_ip_xlate_dispatch(). */ 1585 rule->cross_refs--; 1586 } 1587 1588 static void 1589 ipfw_xlat_reap_dispatch(netmsg_t nm) 1590 { 1591 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1592 struct ipfw_state *s, *ns; 1593 1594 ASSERT_NETISR_NCPUS(mycpuid); 1595 1596 crit_enter(); 1597 /* Reply ASAP. */ 1598 netisr_replymsg(&ctx->ipfw_xlatreap_nm, 0); 1599 crit_exit(); 1600 1601 /* TODO: limit scanning depth */ 1602 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_xlatreap, st_link, ns) { 1603 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 1604 struct ipfw_xlat *slave_x = x->xlat_pair; 1605 uint64_t crefs; 1606 1607 crefs = slave_x->xlat_crefs + x->xlat_crefs; 1608 if (crefs == 0) { 1609 TAILQ_REMOVE(&ctx->ipfw_xlatreap, &x->xlat_st, st_link); 1610 ipfw_xlat_reap(x, slave_x); 1611 } 1612 } 1613 if (!TAILQ_EMPTY(&ctx->ipfw_xlatreap)) { 1614 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo, 1615 &ctx->ipfw_xlatreap_nm); 1616 } 1617 } 1618 1619 static void 1620 ipfw_xlat_reap_timeo(void *xnm) 1621 { 1622 struct netmsg_base *nm = xnm; 1623 1624 KKASSERT(mycpuid < netisr_ncpus); 1625 1626 crit_enter(); 1627 if (nm->lmsg.ms_flags & MSGF_DONE) 1628 netisr_sendmsg_oncpu(nm); 1629 crit_exit(); 1630 } 1631 1632 static void 1633 ipfw_xlat_free_dispatch(netmsg_t nmsg) 1634 { 1635 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1636 struct ipfw_xlat *x = nmsg->lmsg.u.ms_resultp; 1637 struct ipfw_xlat *slave_x = x->xlat_pair; 1638 uint64_t crefs; 1639 1640 ASSERT_NETISR_NCPUS(mycpuid); 1641 1642 KKASSERT(slave_x != NULL); 1643 KKASSERT(slave_x->xlat_invalid && x->xlat_invalid); 1644 1645 KASSERT((x->xlat_flags & IPFW_STATE_F_LINKED) == 0, 1646 ("master xlat is still linked")); 1647 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED) 1648 ipfw_state_unlink(ctx, &slave_x->xlat_st); 1649 1650 /* See the comment in ipfw_ip_xlate_dispatch(). */ 1651 slave_x->xlat_crefs--; 1652 1653 crefs = slave_x->xlat_crefs + x->xlat_crefs; 1654 if (crefs == 0) { 1655 ipfw_xlat_reap(x, slave_x); 1656 return; 1657 } 1658 1659 if (TAILQ_EMPTY(&ctx->ipfw_xlatreap)) { 1660 callout_reset(&ctx->ipfw_xlatreap_ch, 2, ipfw_xlat_reap_timeo, 1661 &ctx->ipfw_xlatreap_nm); 1662 } 1663 1664 /* 1665 * This pair is still referenced; defer its destruction. 1666 * YYY reuse st_link. 1667 */ 1668 TAILQ_INSERT_TAIL(&ctx->ipfw_xlatreap, &x->xlat_st, st_link); 1669 } 1670 1671 static __inline void 1672 ipfw_xlat_invalidate(struct ipfw_xlat *x) 1673 { 1674 1675 x->xlat_invalid = 1; 1676 x->xlat_pair->xlat_invalid = 1; 1677 } 1678 1679 static void 1680 ipfw_state_del(struct ipfw_context *ctx, struct ipfw_state *s) 1681 { 1682 struct ipfw_xlat *x, *slave_x; 1683 struct netmsg_base *nm; 1684 1685 KASSERT(s->st_type == O_KEEP_STATE || s->st_type == O_LIMIT || 1686 IPFW_ISXLAT(s->st_type), ("invalid state type %u", s->st_type)); 1687 KASSERT((s->st_flags & IPFW_STATE_F_XLATSLAVE) == 0, 1688 ("delete slave xlat")); 1689 1690 KASSERT(ctx->ipfw_state_cnt > 0, 1691 ("invalid state count %d", ctx->ipfw_state_cnt)); 1692 ctx->ipfw_state_cnt--; 1693 if (ctx->ipfw_state_loosecnt > 0) 1694 ctx->ipfw_state_loosecnt--; 1695 1696 /* 1697 * Unhook this state. 1698 */ 1699 if (s->st_track != NULL) { 1700 struct ipfw_track *t = s->st_track; 1701 1702 KASSERT(!LIST_EMPTY(&t->t_state_list), 1703 ("track state list is empty")); 1704 LIST_REMOVE(s, st_trklink); 1705 1706 KASSERT(*t->t_count > 0, 1707 ("invalid track count %d", *t->t_count)); 1708 atomic_subtract_int(t->t_count, 1); 1709 } 1710 ipfw_state_unlink(ctx, s); 1711 1712 /* 1713 * Free this state. Xlat requires special processing, 1714 * since xlat are paired state and they could be on 1715 * different cpus. 1716 */ 1717 1718 if (!IPFW_ISXLAT(s->st_type)) { 1719 /* Not xlat; free now. */ 1720 kfree(s, M_IPFW); 1721 /* Done! */ 1722 return; 1723 } 1724 x = (struct ipfw_xlat *)s; 1725 1726 if (x->xlat_pair == NULL) { 1727 /* Not setup yet; free now. */ 1728 kfree(x, M_IPFW); 1729 /* Done! */ 1730 return; 1731 } 1732 slave_x = x->xlat_pair; 1733 KKASSERT(slave_x->xlat_flags & IPFW_STATE_F_XLATSLAVE); 1734 1735 if (x->xlat_pcpu == mycpuid) { 1736 /* 1737 * Paired states are on the same cpu; delete this 1738 * pair now. 1739 */ 1740 KKASSERT(x->xlat_crefs == 0); 1741 KKASSERT(slave_x->xlat_crefs == 0); 1742 if (slave_x->xlat_flags & IPFW_STATE_F_LINKED) 1743 ipfw_state_unlink(ctx, &slave_x->xlat_st); 1744 kfree(x, M_IPFW); 1745 kfree(slave_x, M_IPFW); 1746 return; 1747 } 1748 1749 /* 1750 * Free the paired states on the cpu owning the slave xlat. 1751 */ 1752 1753 /* 1754 * Mark the state pair invalid; completely deleting them 1755 * may take some time. 1756 */ 1757 ipfw_xlat_invalidate(x); 1758 1759 nm = &x->xlat_freenm; 1760 netmsg_init(nm, NULL, &netisr_apanic_rport, MSGF_PRIORITY, 1761 ipfw_xlat_free_dispatch); 1762 nm->lmsg.u.ms_resultp = x; 1763 1764 /* See the comment in ipfw_xlate_redispatch(). */ 1765 x->xlat_rule->cross_refs++; 1766 x->xlat_crefs++; 1767 1768 netisr_sendmsg(nm, x->xlat_pcpu); 1769 } 1770 1771 static void 1772 ipfw_state_remove(struct ipfw_context *ctx, struct ipfw_state *s) 1773 { 1774 1775 if (s->st_flags & IPFW_STATE_F_XLATSLAVE) { 1776 KKASSERT(IPFW_ISXLAT(s->st_type)); 1777 ipfw_xlat_invalidate((struct ipfw_xlat *)s); 1778 ipfw_state_unlink(ctx, s); 1779 return; 1780 } 1781 ipfw_state_del(ctx, s); 1782 } 1783 1784 static int 1785 ipfw_state_reap(struct ipfw_context *ctx, int reap_max) 1786 { 1787 struct ipfw_state *s, *anchor; 1788 int expired; 1789 1790 if (reap_max < ipfw_state_reap_min) 1791 reap_max = ipfw_state_reap_min; 1792 1793 if ((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0) { 1794 /* 1795 * Kick start state expiring. Ignore scan limit, 1796 * we are short of states. 1797 */ 1798 ctx->ipfw_flags |= IPFW_FLAG_STATEREAP; 1799 expired = ipfw_state_expire_start(ctx, INT_MAX, reap_max); 1800 ctx->ipfw_flags &= ~IPFW_FLAG_STATEREAP; 1801 return (expired); 1802 } 1803 1804 /* 1805 * States are being expired. 1806 */ 1807 1808 if (ctx->ipfw_state_cnt == 0) 1809 return (0); 1810 1811 expired = 0; 1812 anchor = &ctx->ipfw_stateexp_anch; 1813 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 1814 /* 1815 * Ignore scan limit; we are short of states. 1816 */ 1817 1818 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1819 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 1820 1821 if (IPFW_STATE_SCANSKIP(s)) 1822 continue; 1823 1824 if (IPFW_STATE_ISDEAD(s) || IPFW_STATE_TCPCLOSED(s)) { 1825 ipfw_state_del(ctx, s); 1826 if (++expired >= reap_max) 1827 break; 1828 if ((expired & 0xff) == 0 && 1829 ipfw_state_cntcoll() + ipfw_state_headroom <= 1830 ipfw_state_max) 1831 break; 1832 } 1833 } 1834 /* 1835 * NOTE: 1836 * Leave the anchor on the list, even if the end of the list has 1837 * been reached. ipfw_state_expire_more_dispatch() will handle 1838 * the removal. 1839 */ 1840 return (expired); 1841 } 1842 1843 static void 1844 ipfw_state_flush(struct ipfw_context *ctx, const struct ip_fw *rule) 1845 { 1846 struct ipfw_state *s, *sn; 1847 1848 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_state_list, st_link, sn) { 1849 if (IPFW_STATE_SCANSKIP(s)) 1850 continue; 1851 if (rule != NULL && s->st_rule != rule) 1852 continue; 1853 ipfw_state_del(ctx, s); 1854 } 1855 } 1856 1857 static void 1858 ipfw_state_expire_done(struct ipfw_context *ctx) 1859 { 1860 1861 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1862 ("stateexp is not in progress")); 1863 ctx->ipfw_flags &= ~IPFW_FLAG_STATEEXP; 1864 callout_reset(&ctx->ipfw_stateto_ch, hz, 1865 ipfw_state_expire_ipifunc, NULL); 1866 } 1867 1868 static void 1869 ipfw_state_expire_more(struct ipfw_context *ctx) 1870 { 1871 struct netmsg_base *nm = &ctx->ipfw_stateexp_more; 1872 1873 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1874 ("stateexp is not in progress")); 1875 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 1876 ("stateexp more did not finish")); 1877 netisr_sendmsg_oncpu(nm); 1878 } 1879 1880 static int 1881 ipfw_state_expire_loop(struct ipfw_context *ctx, struct ipfw_state *anchor, 1882 int scan_max, int expire_max) 1883 { 1884 struct ipfw_state *s; 1885 int scanned = 0, expired = 0; 1886 1887 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1888 ("stateexp is not in progress")); 1889 1890 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 1891 if (scanned++ >= scan_max) { 1892 ipfw_state_expire_more(ctx); 1893 return (expired); 1894 } 1895 1896 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1897 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 1898 1899 if (IPFW_STATE_SCANSKIP(s)) 1900 continue; 1901 1902 if (IPFW_STATE_ISDEAD(s) || 1903 ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) && 1904 IPFW_STATE_TCPCLOSED(s))) { 1905 ipfw_state_del(ctx, s); 1906 if (++expired >= expire_max) { 1907 ipfw_state_expire_more(ctx); 1908 return (expired); 1909 } 1910 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) && 1911 (expired & 0xff) == 0 && 1912 ipfw_state_cntcoll() + ipfw_state_headroom <= 1913 ipfw_state_max) { 1914 ipfw_state_expire_more(ctx); 1915 return (expired); 1916 } 1917 } 1918 } 1919 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1920 ipfw_state_expire_done(ctx); 1921 return (expired); 1922 } 1923 1924 static void 1925 ipfw_state_expire_more_dispatch(netmsg_t nm) 1926 { 1927 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1928 struct ipfw_state *anchor; 1929 1930 ASSERT_NETISR_NCPUS(mycpuid); 1931 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1932 ("statexp is not in progress")); 1933 1934 /* Reply ASAP */ 1935 netisr_replymsg(&nm->base, 0); 1936 1937 anchor = &ctx->ipfw_stateexp_anch; 1938 if (ctx->ipfw_state_cnt == 0) { 1939 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1940 ipfw_state_expire_done(ctx); 1941 return; 1942 } 1943 ipfw_state_expire_loop(ctx, anchor, 1944 ipfw_state_scan_max, ipfw_state_expire_max); 1945 } 1946 1947 static int 1948 ipfw_state_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max) 1949 { 1950 struct ipfw_state *anchor; 1951 1952 KASSERT((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0, 1953 ("stateexp is in progress")); 1954 ctx->ipfw_flags |= IPFW_FLAG_STATEEXP; 1955 1956 if (ctx->ipfw_state_cnt == 0) { 1957 ipfw_state_expire_done(ctx); 1958 return (0); 1959 } 1960 1961 /* 1962 * Do not expire more than once per second, it is useless. 1963 */ 1964 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) == 0 && 1965 ctx->ipfw_state_lastexp == time_uptime) { 1966 ipfw_state_expire_done(ctx); 1967 return (0); 1968 } 1969 ctx->ipfw_state_lastexp = time_uptime; 1970 1971 anchor = &ctx->ipfw_stateexp_anch; 1972 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link); 1973 return (ipfw_state_expire_loop(ctx, anchor, scan_max, expire_max)); 1974 } 1975 1976 static void 1977 ipfw_state_expire_dispatch(netmsg_t nm) 1978 { 1979 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1980 1981 ASSERT_NETISR_NCPUS(mycpuid); 1982 1983 /* Reply ASAP */ 1984 crit_enter(); 1985 netisr_replymsg(&nm->base, 0); 1986 crit_exit(); 1987 1988 if (ctx->ipfw_flags & IPFW_FLAG_STATEEXP) { 1989 /* Running; done. */ 1990 return; 1991 } 1992 ipfw_state_expire_start(ctx, 1993 ipfw_state_scan_max, ipfw_state_expire_max); 1994 } 1995 1996 static void 1997 ipfw_state_expire_ipifunc(void *dummy __unused) 1998 { 1999 struct netmsg_base *msg; 2000 2001 KKASSERT(mycpuid < netisr_ncpus); 2002 msg = &ipfw_ctx[mycpuid]->ipfw_stateexp_nm; 2003 2004 crit_enter(); 2005 if (msg->lmsg.ms_flags & MSGF_DONE) 2006 netisr_sendmsg_oncpu(msg); 2007 crit_exit(); 2008 } 2009 2010 static boolean_t 2011 ipfw_state_update_tcp(struct ipfw_state *s, int dir, const struct tcphdr *tcp) 2012 { 2013 uint32_t seq = ntohl(tcp->th_seq); 2014 uint32_t ack = ntohl(tcp->th_ack); 2015 2016 if (tcp->th_flags & TH_RST) 2017 return (TRUE); 2018 2019 if (dir == MATCH_FORWARD) { 2020 if ((s->st_flags & IPFW_STATE_F_SEQFWD) == 0) { 2021 s->st_flags |= IPFW_STATE_F_SEQFWD; 2022 s->st_seq_fwd = seq; 2023 } else if (SEQ_GEQ(seq, s->st_seq_fwd)) { 2024 s->st_seq_fwd = seq; 2025 } else { 2026 /* Out-of-sequence; done. */ 2027 return (FALSE); 2028 } 2029 if (tcp->th_flags & TH_ACK) { 2030 if ((s->st_flags & IPFW_STATE_F_ACKFWD) == 0) { 2031 s->st_flags |= IPFW_STATE_F_ACKFWD; 2032 s->st_ack_fwd = ack; 2033 } else if (SEQ_GEQ(ack, s->st_ack_fwd)) { 2034 s->st_ack_fwd = ack; 2035 } else { 2036 /* Out-of-sequence; done. */ 2037 return (FALSE); 2038 } 2039 2040 if ((s->st_state & ((TH_FIN | TH_ACK) << 8)) == 2041 (TH_FIN << 8) && s->st_ack_fwd == s->st_seq_rev + 1) 2042 s->st_state |= (TH_ACK << 8); 2043 } 2044 } else { 2045 if ((s->st_flags & IPFW_STATE_F_SEQREV) == 0) { 2046 s->st_flags |= IPFW_STATE_F_SEQREV; 2047 s->st_seq_rev = seq; 2048 } else if (SEQ_GEQ(seq, s->st_seq_rev)) { 2049 s->st_seq_rev = seq; 2050 } else { 2051 /* Out-of-sequence; done. */ 2052 return (FALSE); 2053 } 2054 if (tcp->th_flags & TH_ACK) { 2055 if ((s->st_flags & IPFW_STATE_F_ACKREV) == 0) { 2056 s->st_flags |= IPFW_STATE_F_ACKREV; 2057 s->st_ack_rev= ack; 2058 } else if (SEQ_GEQ(ack, s->st_ack_rev)) { 2059 s->st_ack_rev = ack; 2060 } else { 2061 /* Out-of-sequence; done. */ 2062 return (FALSE); 2063 } 2064 2065 if ((s->st_state & (TH_FIN | TH_ACK)) == TH_FIN && 2066 s->st_ack_rev == s->st_seq_fwd + 1) 2067 s->st_state |= TH_ACK; 2068 } 2069 } 2070 return (TRUE); 2071 } 2072 2073 static void 2074 ipfw_state_update(const struct ipfw_flow_id *pkt, int dir, 2075 const struct tcphdr *tcp, struct ipfw_state *s) 2076 { 2077 2078 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 2079 u_char flags = pkt->flags & IPFW_STATE_TCPFLAGS; 2080 2081 if (tcp != NULL && !ipfw_state_update_tcp(s, dir, tcp)) 2082 return; 2083 2084 s->st_state |= (dir == MATCH_FORWARD) ? flags : (flags << 8); 2085 switch (s->st_state & IPFW_STATE_TCPSTATES) { 2086 case TH_SYN: /* opening */ 2087 s->st_expire = time_uptime + dyn_syn_lifetime; 2088 break; 2089 2090 case BOTH_SYN: /* move to established */ 2091 case BOTH_SYN | TH_FIN: /* one side tries to close */ 2092 case BOTH_SYN | (TH_FIN << 8): 2093 s->st_expire = time_uptime + dyn_ack_lifetime; 2094 break; 2095 2096 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 2097 if ((s->st_state & BOTH_FINACK) == BOTH_FINACK) { 2098 /* And both FINs were ACKed. */ 2099 s->st_expire = time_uptime + dyn_fin_lifetime; 2100 } else { 2101 s->st_expire = time_uptime + 2102 dyn_finwait_lifetime; 2103 } 2104 break; 2105 2106 default: 2107 #if 0 2108 /* 2109 * reset or some invalid combination, but can also 2110 * occur if we use keep-state the wrong way. 2111 */ 2112 if ((s->st_state & ((TH_RST << 8) | TH_RST)) == 0) 2113 kprintf("invalid state: 0x%x\n", s->st_state); 2114 #endif 2115 s->st_expire = time_uptime + dyn_rst_lifetime; 2116 break; 2117 } 2118 } else if (pkt->proto == IPPROTO_UDP) { 2119 s->st_expire = time_uptime + dyn_udp_lifetime; 2120 } else { 2121 /* other protocols */ 2122 s->st_expire = time_uptime + dyn_short_lifetime; 2123 } 2124 } 2125 2126 /* 2127 * Lookup a state. 2128 */ 2129 static struct ipfw_state * 2130 ipfw_state_lookup(struct ipfw_context *ctx, const struct ipfw_flow_id *pkt, 2131 int *match_direction, const struct tcphdr *tcp) 2132 { 2133 struct ipfw_state *key, *s; 2134 int dir = MATCH_NONE; 2135 2136 key = &ctx->ipfw_state_tmpkey; 2137 ipfw_key_build(&key->st_key, pkt->src_ip, pkt->src_port, 2138 pkt->dst_ip, pkt->dst_port, pkt->proto); 2139 s = RB_FIND(ipfw_state_tree, &ctx->ipfw_state_tree, key); 2140 if (s == NULL) 2141 goto done; /* not found. */ 2142 if (IPFW_STATE_ISDEAD(s)) { 2143 ipfw_state_remove(ctx, s); 2144 s = NULL; 2145 goto done; 2146 } 2147 if ((pkt->flags & TH_SYN) && IPFW_STATE_TCPCLOSED(s)) { 2148 /* TCP ports recycling is too fast. */ 2149 ctx->ipfw_sts_tcprecycled++; 2150 ipfw_state_remove(ctx, s); 2151 s = NULL; 2152 goto done; 2153 } 2154 2155 if (s->st_swap == key->st_swap) { 2156 dir = MATCH_FORWARD; 2157 } else { 2158 KASSERT((s->st_swap & key->st_swap) == 0, 2159 ("found mismatch state")); 2160 dir = MATCH_REVERSE; 2161 } 2162 2163 /* Update this state. */ 2164 ipfw_state_update(pkt, dir, tcp, s); 2165 2166 if (s->st_track != NULL) { 2167 /* This track has been used. */ 2168 s->st_track->t_expire = time_uptime + dyn_short_lifetime; 2169 } 2170 done: 2171 if (match_direction) 2172 *match_direction = dir; 2173 return (s); 2174 } 2175 2176 static struct ipfw_state * 2177 ipfw_state_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2178 uint16_t type, struct ip_fw *rule, const struct tcphdr *tcp) 2179 { 2180 struct ipfw_state *s; 2181 size_t sz; 2182 2183 KASSERT(type == O_KEEP_STATE || type == O_LIMIT || IPFW_ISXLAT(type), 2184 ("invalid state type %u", type)); 2185 2186 sz = sizeof(struct ipfw_state); 2187 if (IPFW_ISXLAT(type)) 2188 sz = sizeof(struct ipfw_xlat); 2189 2190 s = kmalloc(sz, M_IPFW, M_INTWAIT | M_NULLOK | M_ZERO); 2191 if (s == NULL) { 2192 ctx->ipfw_sts_nomem++; 2193 return (NULL); 2194 } 2195 2196 ipfw_key_build(&s->st_key, id->src_ip, id->src_port, 2197 id->dst_ip, id->dst_port, id->proto); 2198 2199 s->st_rule = rule; 2200 s->st_type = type; 2201 if (IPFW_ISXLAT(type)) { 2202 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 2203 2204 x->xlat_dir = MATCH_NONE; 2205 x->xlat_pcpu = -1; 2206 } 2207 2208 /* 2209 * Update this state: 2210 * Set st_expire and st_state. 2211 */ 2212 ipfw_state_update(id, MATCH_FORWARD, tcp, s); 2213 2214 return (s); 2215 } 2216 2217 static struct ipfw_state * 2218 ipfw_state_add(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2219 uint16_t type, struct ip_fw *rule, struct ipfw_track *t, 2220 const struct tcphdr *tcp) 2221 { 2222 struct ipfw_state *s, *dup; 2223 2224 s = ipfw_state_alloc(ctx, id, type, rule, tcp); 2225 if (s == NULL) 2226 return (NULL); 2227 2228 ctx->ipfw_state_cnt++; 2229 ctx->ipfw_state_loosecnt++; 2230 if (ctx->ipfw_state_loosecnt >= ipfw_state_loosecnt_updthr) { 2231 ipfw_gd.ipfw_state_loosecnt += ctx->ipfw_state_loosecnt; 2232 ctx->ipfw_state_loosecnt = 0; 2233 } 2234 2235 dup = ipfw_state_link(ctx, s); 2236 if (dup != NULL) 2237 panic("ipfw: %u state exists %p", type, dup); 2238 2239 if (t != NULL) { 2240 /* Keep the track referenced. */ 2241 LIST_INSERT_HEAD(&t->t_state_list, s, st_trklink); 2242 s->st_track = t; 2243 } 2244 return (s); 2245 } 2246 2247 static boolean_t 2248 ipfw_track_free(struct ipfw_context *ctx, struct ipfw_track *t) 2249 { 2250 struct ipfw_trkcnt *trk; 2251 boolean_t trk_freed = FALSE; 2252 2253 KASSERT(t->t_count != NULL, ("track anchor")); 2254 KASSERT(LIST_EMPTY(&t->t_state_list), 2255 ("invalid track is still referenced")); 2256 2257 trk = t->t_trkcnt; 2258 KASSERT(trk != NULL, ("track has no trkcnt")); 2259 2260 RB_REMOVE(ipfw_track_tree, &ctx->ipfw_track_tree, t); 2261 TAILQ_REMOVE(&ctx->ipfw_track_list, t, t_link); 2262 kfree(t, M_IPFW); 2263 2264 /* 2265 * fdrop() style reference counting. 2266 * See kern/kern_descrip.c fdrop(). 2267 */ 2268 for (;;) { 2269 int refs = trk->tc_refs; 2270 2271 cpu_ccfence(); 2272 KASSERT(refs > 0, ("invalid trkcnt refs %d", refs)); 2273 if (refs == 1) { 2274 IPFW_TRKCNT_TOKGET; 2275 if (atomic_cmpset_int(&trk->tc_refs, refs, 0)) { 2276 KASSERT(trk->tc_count == 0, 2277 ("%d states reference this trkcnt", 2278 trk->tc_count)); 2279 RB_REMOVE(ipfw_trkcnt_tree, 2280 &ipfw_gd.ipfw_trkcnt_tree, trk); 2281 2282 KASSERT(ipfw_gd.ipfw_trkcnt_cnt > 0, 2283 ("invalid trkcnt cnt %d", 2284 ipfw_gd.ipfw_trkcnt_cnt)); 2285 ipfw_gd.ipfw_trkcnt_cnt--; 2286 IPFW_TRKCNT_TOKREL; 2287 2288 if (ctx->ipfw_trkcnt_spare == NULL) 2289 ctx->ipfw_trkcnt_spare = trk; 2290 else 2291 kfree(trk, M_IPFW); 2292 trk_freed = TRUE; 2293 break; /* done! */ 2294 } 2295 IPFW_TRKCNT_TOKREL; 2296 /* retry */ 2297 } else if (atomic_cmpset_int(&trk->tc_refs, refs, refs - 1)) { 2298 break; /* done! */ 2299 } 2300 /* retry */ 2301 } 2302 return (trk_freed); 2303 } 2304 2305 static void 2306 ipfw_track_flush(struct ipfw_context *ctx, struct ip_fw *rule) 2307 { 2308 struct ipfw_track *t, *tn; 2309 2310 TAILQ_FOREACH_MUTABLE(t, &ctx->ipfw_track_list, t_link, tn) { 2311 if (t->t_count == NULL) /* anchor */ 2312 continue; 2313 if (rule != NULL && t->t_rule != rule) 2314 continue; 2315 ipfw_track_free(ctx, t); 2316 } 2317 } 2318 2319 static boolean_t 2320 ipfw_track_state_expire(struct ipfw_context *ctx, struct ipfw_track *t, 2321 boolean_t reap) 2322 { 2323 struct ipfw_state *s, *sn; 2324 boolean_t ret = FALSE; 2325 2326 KASSERT(t->t_count != NULL, ("track anchor")); 2327 2328 if (LIST_EMPTY(&t->t_state_list)) 2329 return (FALSE); 2330 2331 /* 2332 * Do not expire more than once per second, it is useless. 2333 */ 2334 if (t->t_lastexp == time_uptime) 2335 return (FALSE); 2336 t->t_lastexp = time_uptime; 2337 2338 LIST_FOREACH_MUTABLE(s, &t->t_state_list, st_trklink, sn) { 2339 if (IPFW_STATE_ISDEAD(s) || (reap && IPFW_STATE_TCPCLOSED(s))) { 2340 KASSERT(s->st_track == t, 2341 ("state track %p does not match %p", 2342 s->st_track, t)); 2343 ipfw_state_del(ctx, s); 2344 ret = TRUE; 2345 } 2346 } 2347 return (ret); 2348 } 2349 2350 static __inline struct ipfw_trkcnt * 2351 ipfw_trkcnt_alloc(struct ipfw_context *ctx) 2352 { 2353 struct ipfw_trkcnt *trk; 2354 2355 if (ctx->ipfw_trkcnt_spare != NULL) { 2356 trk = ctx->ipfw_trkcnt_spare; 2357 ctx->ipfw_trkcnt_spare = NULL; 2358 } else { 2359 trk = kmalloc_cachealign(sizeof(*trk), M_IPFW, 2360 M_INTWAIT | M_NULLOK); 2361 } 2362 return (trk); 2363 } 2364 2365 static void 2366 ipfw_track_expire_done(struct ipfw_context *ctx) 2367 { 2368 2369 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2370 ("trackexp is not in progress")); 2371 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKEXP; 2372 callout_reset(&ctx->ipfw_trackto_ch, hz, 2373 ipfw_track_expire_ipifunc, NULL); 2374 } 2375 2376 static void 2377 ipfw_track_expire_more(struct ipfw_context *ctx) 2378 { 2379 struct netmsg_base *nm = &ctx->ipfw_trackexp_more; 2380 2381 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2382 ("trackexp is not in progress")); 2383 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 2384 ("trackexp more did not finish")); 2385 netisr_sendmsg_oncpu(nm); 2386 } 2387 2388 static int 2389 ipfw_track_expire_loop(struct ipfw_context *ctx, struct ipfw_track *anchor, 2390 int scan_max, int expire_max) 2391 { 2392 struct ipfw_track *t; 2393 int scanned = 0, expired = 0; 2394 boolean_t reap = FALSE; 2395 2396 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2397 ("trackexp is not in progress")); 2398 2399 if (ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) 2400 reap = TRUE; 2401 2402 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) { 2403 if (scanned++ >= scan_max) { 2404 ipfw_track_expire_more(ctx); 2405 return (expired); 2406 } 2407 2408 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2409 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link); 2410 2411 if (t->t_count == NULL) /* anchor */ 2412 continue; 2413 2414 ipfw_track_state_expire(ctx, t, reap); 2415 if (!LIST_EMPTY(&t->t_state_list)) { 2416 /* There are states referencing this track. */ 2417 continue; 2418 } 2419 2420 if (TIME_LEQ(t->t_expire, time_uptime) || reap) { 2421 /* Expired. */ 2422 if (ipfw_track_free(ctx, t)) { 2423 if (++expired >= expire_max) { 2424 ipfw_track_expire_more(ctx); 2425 return (expired); 2426 } 2427 } 2428 } 2429 } 2430 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2431 ipfw_track_expire_done(ctx); 2432 return (expired); 2433 } 2434 2435 static int 2436 ipfw_track_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max) 2437 { 2438 struct ipfw_track *anchor; 2439 2440 KASSERT((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0, 2441 ("trackexp is in progress")); 2442 ctx->ipfw_flags |= IPFW_FLAG_TRACKEXP; 2443 2444 if (RB_EMPTY(&ctx->ipfw_track_tree)) { 2445 ipfw_track_expire_done(ctx); 2446 return (0); 2447 } 2448 2449 /* 2450 * Do not expire more than once per second, it is useless. 2451 */ 2452 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) == 0 && 2453 ctx->ipfw_track_lastexp == time_uptime) { 2454 ipfw_track_expire_done(ctx); 2455 return (0); 2456 } 2457 ctx->ipfw_track_lastexp = time_uptime; 2458 2459 anchor = &ctx->ipfw_trackexp_anch; 2460 TAILQ_INSERT_HEAD(&ctx->ipfw_track_list, anchor, t_link); 2461 return (ipfw_track_expire_loop(ctx, anchor, scan_max, expire_max)); 2462 } 2463 2464 static void 2465 ipfw_track_expire_more_dispatch(netmsg_t nm) 2466 { 2467 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2468 struct ipfw_track *anchor; 2469 2470 ASSERT_NETISR_NCPUS(mycpuid); 2471 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2472 ("trackexp is not in progress")); 2473 2474 /* Reply ASAP */ 2475 netisr_replymsg(&nm->base, 0); 2476 2477 anchor = &ctx->ipfw_trackexp_anch; 2478 if (RB_EMPTY(&ctx->ipfw_track_tree)) { 2479 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2480 ipfw_track_expire_done(ctx); 2481 return; 2482 } 2483 ipfw_track_expire_loop(ctx, anchor, 2484 ipfw_track_scan_max, ipfw_track_expire_max); 2485 } 2486 2487 static void 2488 ipfw_track_expire_dispatch(netmsg_t nm) 2489 { 2490 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2491 2492 ASSERT_NETISR_NCPUS(mycpuid); 2493 2494 /* Reply ASAP */ 2495 crit_enter(); 2496 netisr_replymsg(&nm->base, 0); 2497 crit_exit(); 2498 2499 if (ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) { 2500 /* Running; done. */ 2501 return; 2502 } 2503 ipfw_track_expire_start(ctx, 2504 ipfw_track_scan_max, ipfw_track_expire_max); 2505 } 2506 2507 static void 2508 ipfw_track_expire_ipifunc(void *dummy __unused) 2509 { 2510 struct netmsg_base *msg; 2511 2512 KKASSERT(mycpuid < netisr_ncpus); 2513 msg = &ipfw_ctx[mycpuid]->ipfw_trackexp_nm; 2514 2515 crit_enter(); 2516 if (msg->lmsg.ms_flags & MSGF_DONE) 2517 netisr_sendmsg_oncpu(msg); 2518 crit_exit(); 2519 } 2520 2521 static int 2522 ipfw_track_reap(struct ipfw_context *ctx) 2523 { 2524 struct ipfw_track *t, *anchor; 2525 int expired; 2526 2527 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0) { 2528 /* 2529 * Kick start track expiring. Ignore scan limit, 2530 * we are short of tracks. 2531 */ 2532 ctx->ipfw_flags |= IPFW_FLAG_TRACKREAP; 2533 expired = ipfw_track_expire_start(ctx, INT_MAX, 2534 ipfw_track_reap_max); 2535 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKREAP; 2536 return (expired); 2537 } 2538 2539 /* 2540 * Tracks are being expired. 2541 */ 2542 2543 if (RB_EMPTY(&ctx->ipfw_track_tree)) 2544 return (0); 2545 2546 expired = 0; 2547 anchor = &ctx->ipfw_trackexp_anch; 2548 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) { 2549 /* 2550 * Ignore scan limit; we are short of tracks. 2551 */ 2552 2553 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2554 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link); 2555 2556 if (t->t_count == NULL) /* anchor */ 2557 continue; 2558 2559 ipfw_track_state_expire(ctx, t, TRUE); 2560 if (!LIST_EMPTY(&t->t_state_list)) { 2561 /* There are states referencing this track. */ 2562 continue; 2563 } 2564 2565 if (ipfw_track_free(ctx, t)) { 2566 if (++expired >= ipfw_track_reap_max) { 2567 ipfw_track_expire_more(ctx); 2568 break; 2569 } 2570 } 2571 } 2572 /* 2573 * NOTE: 2574 * Leave the anchor on the list, even if the end of the list has 2575 * been reached. ipfw_track_expire_more_dispatch() will handle 2576 * the removal. 2577 */ 2578 return (expired); 2579 } 2580 2581 static struct ipfw_track * 2582 ipfw_track_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2583 uint16_t limit_mask, struct ip_fw *rule) 2584 { 2585 struct ipfw_track *key, *t, *dup; 2586 struct ipfw_trkcnt *trk, *ret; 2587 boolean_t do_expire = FALSE; 2588 2589 KASSERT(rule->track_ruleid != 0, 2590 ("rule %u has no track ruleid", rule->rulenum)); 2591 2592 key = &ctx->ipfw_track_tmpkey; 2593 key->t_proto = id->proto; 2594 key->t_addrs = 0; 2595 key->t_ports = 0; 2596 key->t_rule = rule; 2597 if (limit_mask & DYN_SRC_ADDR) 2598 key->t_saddr = id->src_ip; 2599 if (limit_mask & DYN_DST_ADDR) 2600 key->t_daddr = id->dst_ip; 2601 if (limit_mask & DYN_SRC_PORT) 2602 key->t_sport = id->src_port; 2603 if (limit_mask & DYN_DST_PORT) 2604 key->t_dport = id->dst_port; 2605 2606 t = RB_FIND(ipfw_track_tree, &ctx->ipfw_track_tree, key); 2607 if (t != NULL) 2608 goto done; 2609 2610 t = kmalloc(sizeof(*t), M_IPFW, M_INTWAIT | M_NULLOK); 2611 if (t == NULL) { 2612 ctx->ipfw_tks_nomem++; 2613 return (NULL); 2614 } 2615 2616 t->t_key = key->t_key; 2617 t->t_rule = rule; 2618 t->t_lastexp = 0; 2619 LIST_INIT(&t->t_state_list); 2620 2621 if (ipfw_gd.ipfw_trkcnt_cnt >= ipfw_track_max) { 2622 time_t globexp, uptime; 2623 2624 trk = NULL; 2625 do_expire = TRUE; 2626 2627 /* 2628 * Do not expire globally more than once per second, 2629 * it is useless. 2630 */ 2631 uptime = time_uptime; 2632 globexp = ipfw_gd.ipfw_track_globexp; 2633 if (globexp != uptime && 2634 atomic_cmpset_long(&ipfw_gd.ipfw_track_globexp, 2635 globexp, uptime)) { 2636 int cpu; 2637 2638 /* Expire tracks on other CPUs. */ 2639 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 2640 if (cpu == mycpuid) 2641 continue; 2642 lwkt_send_ipiq(globaldata_find(cpu), 2643 ipfw_track_expire_ipifunc, NULL); 2644 } 2645 } 2646 } else { 2647 trk = ipfw_trkcnt_alloc(ctx); 2648 } 2649 if (trk == NULL) { 2650 struct ipfw_trkcnt *tkey; 2651 2652 tkey = &ctx->ipfw_trkcnt_tmpkey; 2653 key = NULL; /* tkey overlaps key */ 2654 2655 tkey->tc_key = t->t_key; 2656 tkey->tc_ruleid = rule->track_ruleid; 2657 2658 IPFW_TRKCNT_TOKGET; 2659 trk = RB_FIND(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree, 2660 tkey); 2661 if (trk == NULL) { 2662 IPFW_TRKCNT_TOKREL; 2663 if (do_expire) { 2664 ctx->ipfw_tks_reap++; 2665 if (ipfw_track_reap(ctx) > 0) { 2666 if (ipfw_gd.ipfw_trkcnt_cnt < 2667 ipfw_track_max) { 2668 trk = ipfw_trkcnt_alloc(ctx); 2669 if (trk != NULL) 2670 goto install; 2671 ctx->ipfw_tks_cntnomem++; 2672 } else { 2673 ctx->ipfw_tks_overflow++; 2674 } 2675 } else { 2676 ctx->ipfw_tks_reapfailed++; 2677 ctx->ipfw_tks_overflow++; 2678 } 2679 } else { 2680 ctx->ipfw_tks_cntnomem++; 2681 } 2682 kfree(t, M_IPFW); 2683 return (NULL); 2684 } 2685 KASSERT(trk->tc_refs > 0 && trk->tc_refs < netisr_ncpus, 2686 ("invalid trkcnt refs %d", trk->tc_refs)); 2687 atomic_add_int(&trk->tc_refs, 1); 2688 IPFW_TRKCNT_TOKREL; 2689 } else { 2690 install: 2691 trk->tc_key = t->t_key; 2692 trk->tc_ruleid = rule->track_ruleid; 2693 trk->tc_refs = 0; 2694 trk->tc_count = 0; 2695 trk->tc_expire = 0; 2696 trk->tc_rulenum = rule->rulenum; 2697 2698 IPFW_TRKCNT_TOKGET; 2699 ret = RB_INSERT(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree, 2700 trk); 2701 if (ret != NULL) { 2702 KASSERT(ret->tc_refs > 0 && 2703 ret->tc_refs < netisr_ncpus, 2704 ("invalid trkcnt refs %d", ret->tc_refs)); 2705 KASSERT(ctx->ipfw_trkcnt_spare == NULL, 2706 ("trkcnt spare was installed")); 2707 ctx->ipfw_trkcnt_spare = trk; 2708 trk = ret; 2709 } else { 2710 ipfw_gd.ipfw_trkcnt_cnt++; 2711 } 2712 atomic_add_int(&trk->tc_refs, 1); 2713 IPFW_TRKCNT_TOKREL; 2714 } 2715 t->t_count = &trk->tc_count; 2716 t->t_trkcnt = trk; 2717 2718 dup = RB_INSERT(ipfw_track_tree, &ctx->ipfw_track_tree, t); 2719 if (dup != NULL) 2720 panic("ipfw: track exists"); 2721 TAILQ_INSERT_TAIL(&ctx->ipfw_track_list, t, t_link); 2722 done: 2723 t->t_expire = time_uptime + dyn_short_lifetime; 2724 return (t); 2725 } 2726 2727 /* 2728 * Install state for rule type cmd->o.opcode 2729 * 2730 * Returns NULL if state is not installed because of errors or because 2731 * states limitations are enforced. 2732 */ 2733 static struct ipfw_state * 2734 ipfw_state_install(struct ipfw_context *ctx, struct ip_fw *rule, 2735 ipfw_insn_limit *cmd, struct ip_fw_args *args, const struct tcphdr *tcp) 2736 { 2737 struct ipfw_state *s; 2738 struct ipfw_track *t; 2739 int count, diff; 2740 2741 if (ipfw_gd.ipfw_state_loosecnt >= ipfw_state_max && 2742 (diff = (ipfw_state_cntsync() - ipfw_state_max)) >= 0) { 2743 boolean_t overflow = TRUE; 2744 2745 ctx->ipfw_sts_reap++; 2746 if (ipfw_state_reap(ctx, diff) == 0) 2747 ctx->ipfw_sts_reapfailed++; 2748 if (ipfw_state_cntsync() < ipfw_state_max) 2749 overflow = FALSE; 2750 2751 if (overflow) { 2752 time_t globexp, uptime; 2753 int cpu; 2754 2755 /* 2756 * Do not expire globally more than once per second, 2757 * it is useless. 2758 */ 2759 uptime = time_uptime; 2760 globexp = ipfw_gd.ipfw_state_globexp; 2761 if (globexp == uptime || 2762 !atomic_cmpset_long(&ipfw_gd.ipfw_state_globexp, 2763 globexp, uptime)) { 2764 ctx->ipfw_sts_overflow++; 2765 return (NULL); 2766 } 2767 2768 /* Expire states on other CPUs. */ 2769 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 2770 if (cpu == mycpuid) 2771 continue; 2772 lwkt_send_ipiq(globaldata_find(cpu), 2773 ipfw_state_expire_ipifunc, NULL); 2774 } 2775 ctx->ipfw_sts_overflow++; 2776 return (NULL); 2777 } 2778 } 2779 2780 switch (cmd->o.opcode) { 2781 case O_KEEP_STATE: /* bidir rule */ 2782 case O_REDIRECT: 2783 s = ipfw_state_add(ctx, &args->f_id, cmd->o.opcode, rule, NULL, 2784 tcp); 2785 if (s == NULL) 2786 return (NULL); 2787 break; 2788 2789 case O_LIMIT: /* limit number of sessions */ 2790 t = ipfw_track_alloc(ctx, &args->f_id, cmd->limit_mask, rule); 2791 if (t == NULL) 2792 return (NULL); 2793 2794 if (*t->t_count >= cmd->conn_limit) { 2795 if (!ipfw_track_state_expire(ctx, t, TRUE)) 2796 return (NULL); 2797 } 2798 for (;;) { 2799 count = *t->t_count; 2800 if (count >= cmd->conn_limit) 2801 return (NULL); 2802 if (atomic_cmpset_int(t->t_count, count, count + 1)) 2803 break; 2804 } 2805 2806 s = ipfw_state_add(ctx, &args->f_id, O_LIMIT, rule, t, tcp); 2807 if (s == NULL) { 2808 /* Undo damage. */ 2809 atomic_subtract_int(t->t_count, 1); 2810 return (NULL); 2811 } 2812 break; 2813 2814 default: 2815 panic("unknown state type %u\n", cmd->o.opcode); 2816 } 2817 2818 if (s->st_type == O_REDIRECT) { 2819 struct ipfw_xlat *x = (struct ipfw_xlat *)s; 2820 ipfw_insn_rdr *r = (ipfw_insn_rdr *)cmd; 2821 2822 x->xlat_addr = r->addr.s_addr; 2823 x->xlat_port = r->port; 2824 x->xlat_ifp = args->m->m_pkthdr.rcvif; 2825 x->xlat_dir = MATCH_FORWARD; 2826 KKASSERT(x->xlat_ifp != NULL); 2827 } 2828 return (s); 2829 } 2830 2831 static int 2832 ipfw_table_lookup(struct ipfw_context *ctx, uint16_t tableid, 2833 const struct in_addr *in) 2834 { 2835 struct radix_node_head *rnh; 2836 struct sockaddr_in sin; 2837 struct ipfw_tblent *te; 2838 2839 KASSERT(tableid < ipfw_table_max, ("invalid tableid %u", tableid)); 2840 rnh = ctx->ipfw_tables[tableid]; 2841 if (rnh == NULL) 2842 return (0); /* no match */ 2843 2844 memset(&sin, 0, sizeof(sin)); 2845 sin.sin_family = AF_INET; 2846 sin.sin_len = sizeof(sin); 2847 sin.sin_addr = *in; 2848 2849 te = (struct ipfw_tblent *)rnh->rnh_matchaddr((char *)&sin, rnh); 2850 if (te == NULL) 2851 return (0); /* no match */ 2852 2853 te->te_use++; 2854 te->te_lastuse = time_second; 2855 return (1); /* match */ 2856 } 2857 2858 /* 2859 * Transmit a TCP packet, containing either a RST or a keepalive. 2860 * When flags & TH_RST, we are sending a RST packet, because of a 2861 * "reset" action matched the packet. 2862 * Otherwise we are sending a keepalive, and flags & TH_ 2863 * 2864 * Only {src,dst}_{ip,port} of "id" are used. 2865 */ 2866 static void 2867 send_pkt(const struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags) 2868 { 2869 struct mbuf *m; 2870 struct ip *ip; 2871 struct tcphdr *tcp; 2872 struct route sro; /* fake route */ 2873 2874 MGETHDR(m, M_NOWAIT, MT_HEADER); 2875 if (m == NULL) 2876 return; 2877 m->m_pkthdr.rcvif = NULL; 2878 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); 2879 m->m_data += max_linkhdr; 2880 2881 ip = mtod(m, struct ip *); 2882 bzero(ip, m->m_len); 2883 tcp = (struct tcphdr *)(ip + 1); /* no IP options */ 2884 ip->ip_p = IPPROTO_TCP; 2885 tcp->th_off = 5; 2886 2887 /* 2888 * Assume we are sending a RST (or a keepalive in the reverse 2889 * direction), swap src and destination addresses and ports. 2890 */ 2891 ip->ip_src.s_addr = htonl(id->dst_ip); 2892 ip->ip_dst.s_addr = htonl(id->src_ip); 2893 tcp->th_sport = htons(id->dst_port); 2894 tcp->th_dport = htons(id->src_port); 2895 if (flags & TH_RST) { /* we are sending a RST */ 2896 if (flags & TH_ACK) { 2897 tcp->th_seq = htonl(ack); 2898 tcp->th_ack = htonl(0); 2899 tcp->th_flags = TH_RST; 2900 } else { 2901 if (flags & TH_SYN) 2902 seq++; 2903 tcp->th_seq = htonl(0); 2904 tcp->th_ack = htonl(seq); 2905 tcp->th_flags = TH_RST | TH_ACK; 2906 } 2907 } else { 2908 /* 2909 * We are sending a keepalive. flags & TH_SYN determines 2910 * the direction, forward if set, reverse if clear. 2911 * NOTE: seq and ack are always assumed to be correct 2912 * as set by the caller. This may be confusing... 2913 */ 2914 if (flags & TH_SYN) { 2915 /* 2916 * we have to rewrite the correct addresses! 2917 */ 2918 ip->ip_dst.s_addr = htonl(id->dst_ip); 2919 ip->ip_src.s_addr = htonl(id->src_ip); 2920 tcp->th_dport = htons(id->dst_port); 2921 tcp->th_sport = htons(id->src_port); 2922 } 2923 tcp->th_seq = htonl(seq); 2924 tcp->th_ack = htonl(ack); 2925 tcp->th_flags = TH_ACK; 2926 } 2927 2928 /* 2929 * set ip_len to the payload size so we can compute 2930 * the tcp checksum on the pseudoheader 2931 * XXX check this, could save a couple of words ? 2932 */ 2933 ip->ip_len = htons(sizeof(struct tcphdr)); 2934 tcp->th_sum = in_cksum(m, m->m_pkthdr.len); 2935 2936 /* 2937 * now fill fields left out earlier 2938 */ 2939 ip->ip_ttl = ip_defttl; 2940 ip->ip_len = m->m_pkthdr.len; 2941 2942 bzero(&sro, sizeof(sro)); 2943 ip_rtaddr(ip->ip_dst, &sro); 2944 2945 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED; 2946 ip_output(m, NULL, &sro, 0, NULL, NULL); 2947 if (sro.ro_rt) 2948 RTFREE(sro.ro_rt); 2949 } 2950 2951 /* 2952 * Send a reject message, consuming the mbuf passed as an argument. 2953 */ 2954 static void 2955 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) 2956 { 2957 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 2958 /* We need the IP header in host order for icmp_error(). */ 2959 if (args->eh != NULL) { 2960 struct ip *ip = mtod(args->m, struct ip *); 2961 2962 ip->ip_len = ntohs(ip->ip_len); 2963 ip->ip_off = ntohs(ip->ip_off); 2964 } 2965 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 2966 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { 2967 struct tcphdr *const tcp = 2968 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 2969 2970 if ((tcp->th_flags & TH_RST) == 0) { 2971 send_pkt(&args->f_id, ntohl(tcp->th_seq), 2972 ntohl(tcp->th_ack), tcp->th_flags | TH_RST); 2973 } 2974 m_freem(args->m); 2975 } else { 2976 m_freem(args->m); 2977 } 2978 args->m = NULL; 2979 } 2980 2981 /* 2982 * Given an ip_fw *, lookup_next_rule will return a pointer 2983 * to the next rule, which can be either the jump 2984 * target (for skipto instructions) or the next one in the list (in 2985 * all other cases including a missing jump target). 2986 * The result is also written in the "next_rule" field of the rule. 2987 * Backward jumps are not allowed, so start looking from the next 2988 * rule... 2989 * 2990 * This never returns NULL -- in case we do not have an exact match, 2991 * the next rule is returned. When the ruleset is changed, 2992 * pointers are flushed so we are always correct. 2993 */ 2994 static struct ip_fw * 2995 lookup_next_rule(struct ip_fw *me) 2996 { 2997 struct ip_fw *rule = NULL; 2998 ipfw_insn *cmd; 2999 3000 /* look for action, in case it is a skipto */ 3001 cmd = ACTION_PTR(me); 3002 if (cmd->opcode == O_LOG) 3003 cmd += F_LEN(cmd); 3004 if (cmd->opcode == O_SKIPTO) { 3005 for (rule = me->next; rule; rule = rule->next) { 3006 if (rule->rulenum >= cmd->arg1) 3007 break; 3008 } 3009 } 3010 if (rule == NULL) /* failure or not a skipto */ 3011 rule = me->next; 3012 me->next_rule = rule; 3013 return rule; 3014 } 3015 3016 static int 3017 ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, 3018 enum ipfw_opcodes opcode, uid_t uid) 3019 { 3020 struct in_addr src_ip, dst_ip; 3021 struct inpcbinfo *pi; 3022 boolean_t wildcard; 3023 struct inpcb *pcb; 3024 3025 if (fid->proto == IPPROTO_TCP) { 3026 wildcard = FALSE; 3027 pi = &tcbinfo[mycpuid]; 3028 } else if (fid->proto == IPPROTO_UDP) { 3029 wildcard = TRUE; 3030 pi = &udbinfo[mycpuid]; 3031 } else { 3032 return 0; 3033 } 3034 3035 /* 3036 * Values in 'fid' are in host byte order 3037 */ 3038 dst_ip.s_addr = htonl(fid->dst_ip); 3039 src_ip.s_addr = htonl(fid->src_ip); 3040 if (oif) { 3041 pcb = in_pcblookup_hash(pi, 3042 dst_ip, htons(fid->dst_port), 3043 src_ip, htons(fid->src_port), 3044 wildcard, oif); 3045 } else { 3046 pcb = in_pcblookup_hash(pi, 3047 src_ip, htons(fid->src_port), 3048 dst_ip, htons(fid->dst_port), 3049 wildcard, NULL); 3050 } 3051 if (pcb == NULL || pcb->inp_socket == NULL) 3052 return 0; 3053 3054 if (opcode == O_UID) { 3055 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b)) 3056 return !socheckuid(pcb->inp_socket, uid); 3057 #undef socheckuid 3058 } else { 3059 return groupmember(uid, pcb->inp_socket->so_cred); 3060 } 3061 } 3062 3063 static int 3064 ipfw_match_ifip(ipfw_insn_ifip *cmd, const struct in_addr *ip) 3065 { 3066 3067 if (__predict_false((cmd->o.arg1 & IPFW_IFIP_VALID) == 0)) { 3068 struct ifaddr_container *ifac; 3069 struct ifnet *ifp; 3070 3071 ifp = ifunit_netisr(cmd->ifname); 3072 if (ifp == NULL) 3073 return (0); 3074 3075 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 3076 struct ifaddr *ia = ifac->ifa; 3077 3078 if (ia->ifa_addr == NULL) 3079 continue; 3080 if (ia->ifa_addr->sa_family != AF_INET) 3081 continue; 3082 3083 cmd->mask.s_addr = INADDR_ANY; 3084 if (cmd->o.arg1 & IPFW_IFIP_NET) { 3085 cmd->mask = ((struct sockaddr_in *) 3086 ia->ifa_netmask)->sin_addr; 3087 } 3088 if (cmd->mask.s_addr == INADDR_ANY) 3089 cmd->mask.s_addr = INADDR_BROADCAST; 3090 3091 cmd->addr = 3092 ((struct sockaddr_in *)ia->ifa_addr)->sin_addr; 3093 cmd->addr.s_addr &= cmd->mask.s_addr; 3094 3095 cmd->o.arg1 |= IPFW_IFIP_VALID; 3096 break; 3097 } 3098 if ((cmd->o.arg1 & IPFW_IFIP_VALID) == 0) 3099 return (0); 3100 } 3101 return ((ip->s_addr & cmd->mask.s_addr) == cmd->addr.s_addr); 3102 } 3103 3104 static void 3105 ipfw_xlate(const struct ipfw_xlat *x, struct mbuf *m, 3106 struct in_addr *old_addr, uint16_t *old_port) 3107 { 3108 struct ip *ip = mtod(m, struct ip *); 3109 struct in_addr *addr; 3110 uint16_t *port, *csum, dlen = 0; 3111 uint8_t udp = 0; 3112 boolean_t pseudo = FALSE; 3113 3114 if (x->xlat_flags & IPFW_STATE_F_XLATSRC) { 3115 addr = &ip->ip_src; 3116 switch (ip->ip_p) { 3117 case IPPROTO_TCP: 3118 port = &L3HDR(struct tcphdr, ip)->th_sport; 3119 csum = &L3HDR(struct tcphdr, ip)->th_sum; 3120 break; 3121 case IPPROTO_UDP: 3122 port = &L3HDR(struct udphdr, ip)->uh_sport; 3123 csum = &L3HDR(struct udphdr, ip)->uh_sum; 3124 udp = 1; 3125 break; 3126 default: 3127 panic("ipfw: unsupported src xlate proto %u", ip->ip_p); 3128 } 3129 } else { 3130 addr = &ip->ip_dst; 3131 switch (ip->ip_p) { 3132 case IPPROTO_TCP: 3133 port = &L3HDR(struct tcphdr, ip)->th_dport; 3134 csum = &L3HDR(struct tcphdr, ip)->th_sum; 3135 break; 3136 case IPPROTO_UDP: 3137 port = &L3HDR(struct udphdr, ip)->uh_dport; 3138 csum = &L3HDR(struct udphdr, ip)->uh_sum; 3139 udp = 1; 3140 break; 3141 default: 3142 panic("ipfw: unsupported dst xlate proto %u", ip->ip_p); 3143 } 3144 } 3145 if (old_addr != NULL) 3146 *old_addr = *addr; 3147 if (old_port != NULL) { 3148 if (x->xlat_port != 0) 3149 *old_port = *port; 3150 else 3151 *old_port = 0; 3152 } 3153 3154 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) { 3155 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) 3156 dlen = ip->ip_len - (ip->ip_hl << 2); 3157 pseudo = TRUE; 3158 } 3159 3160 if (!pseudo) { 3161 const uint16_t *oaddr, *naddr; 3162 3163 oaddr = (const uint16_t *)&addr->s_addr; 3164 naddr = (const uint16_t *)&x->xlat_addr; 3165 3166 ip->ip_sum = pfil_cksum_fixup(pfil_cksum_fixup(ip->ip_sum, 3167 oaddr[0], naddr[0], 0), oaddr[1], naddr[1], 0); 3168 *csum = pfil_cksum_fixup(pfil_cksum_fixup(*csum, 3169 oaddr[0], naddr[0], udp), oaddr[1], naddr[1], udp); 3170 } 3171 addr->s_addr = x->xlat_addr; 3172 3173 if (x->xlat_port != 0) { 3174 if (!pseudo) { 3175 *csum = pfil_cksum_fixup(*csum, *port, x->xlat_port, 3176 udp); 3177 } 3178 *port = x->xlat_port; 3179 } 3180 3181 if (pseudo) { 3182 *csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 3183 htons(dlen + ip->ip_p)); 3184 } 3185 } 3186 3187 static void 3188 ipfw_ip_xlate_dispatch(netmsg_t nmsg) 3189 { 3190 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg; 3191 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3192 struct mbuf *m = nm->m; 3193 struct ipfw_xlat *x = nm->arg1; 3194 struct ip_fw *rule = x->xlat_rule; 3195 3196 ASSERT_NETISR_NCPUS(mycpuid); 3197 KASSERT(rule->cpuid == mycpuid, 3198 ("rule does not belong to cpu%d", mycpuid)); 3199 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE, 3200 ("mbuf does not have ipfw continue rule")); 3201 3202 KASSERT(ctx->ipfw_cont_rule == NULL, 3203 ("pending ipfw continue rule")); 3204 KASSERT(ctx->ipfw_cont_xlat == NULL, 3205 ("pending ipfw continue xlat")); 3206 ctx->ipfw_cont_rule = rule; 3207 ctx->ipfw_cont_xlat = x; 3208 3209 if (nm->arg2 == 0) 3210 ip_input(m); 3211 else 3212 ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); 3213 3214 /* May not be cleared, if ipfw was unload/disabled. */ 3215 ctx->ipfw_cont_rule = NULL; 3216 ctx->ipfw_cont_xlat = NULL; 3217 3218 /* 3219 * This state is no longer used; decrement its xlat_crefs, 3220 * so this state can be deleted. 3221 */ 3222 x->xlat_crefs--; 3223 /* 3224 * This rule is no longer used; decrement its cross_refs, 3225 * so this rule can be deleted. 3226 * 3227 * NOTE: 3228 * Decrement cross_refs in the last step of this function, 3229 * so that the module could be unloaded safely. 3230 */ 3231 rule->cross_refs--; 3232 } 3233 3234 static void 3235 ipfw_xlate_redispatch(struct mbuf *m, int cpuid, struct ipfw_xlat *x, 3236 uint32_t flags) 3237 { 3238 struct netmsg_genpkt *nm; 3239 3240 KASSERT(x->xlat_pcpu == cpuid, ("xlat paired cpu%d, target cpu%d", 3241 x->xlat_pcpu, cpuid)); 3242 3243 /* 3244 * Bump cross_refs to prevent this rule and its siblings 3245 * from being deleted, while this mbuf is inflight. The 3246 * cross_refs of the sibling rule on the target cpu will 3247 * be decremented, once this mbuf is going to be filtered 3248 * on the target cpu. 3249 */ 3250 x->xlat_rule->cross_refs++; 3251 /* 3252 * Bump xlat_crefs to prevent this state and its paired 3253 * state from being deleted, while this mbuf is inflight. 3254 * The xlat_crefs of the paired state on the target cpu 3255 * will be decremented, once this mbuf is going to be 3256 * filtered on the target cpu. 3257 */ 3258 x->xlat_crefs++; 3259 3260 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE; 3261 if (flags & IPFW_XLATE_INSERT) 3262 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATINS; 3263 if (flags & IPFW_XLATE_FORWARD) 3264 m->m_pkthdr.fw_flags |= IPFW_MBUF_XLATFWD; 3265 3266 if ((flags & IPFW_XLATE_OUTPUT) == 0) { 3267 struct ip *ip = mtod(m, struct ip *); 3268 3269 /* 3270 * NOTE: 3271 * ip_input() expects ip_len/ip_off are in network 3272 * byte order. 3273 */ 3274 ip->ip_len = htons(ip->ip_len); 3275 ip->ip_off = htons(ip->ip_off); 3276 } 3277 3278 nm = &m->m_hdr.mh_genmsg; 3279 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0, 3280 ipfw_ip_xlate_dispatch); 3281 nm->m = m; 3282 nm->arg1 = x->xlat_pair; 3283 nm->arg2 = 0; 3284 if (flags & IPFW_XLATE_OUTPUT) 3285 nm->arg2 = 1; 3286 netisr_sendmsg(&nm->base, cpuid); 3287 } 3288 3289 static struct mbuf * 3290 ipfw_setup_local(struct mbuf *m, const int hlen, struct ip_fw_args *args, 3291 struct ip_fw_local *local, struct ip **ip0) 3292 { 3293 struct ip *ip = mtod(m, struct ip *); 3294 struct tcphdr *tcp; 3295 struct udphdr *udp; 3296 3297 /* 3298 * Collect parameters into local variables for faster matching. 3299 */ 3300 if (hlen == 0) { /* do not grab addresses for non-ip pkts */ 3301 local->proto = args->f_id.proto = 0; /* mark f_id invalid */ 3302 goto done; 3303 } 3304 3305 local->proto = args->f_id.proto = ip->ip_p; 3306 local->src_ip = ip->ip_src; 3307 local->dst_ip = ip->ip_dst; 3308 if (args->eh != NULL) { /* layer 2 packets are as on the wire */ 3309 local->offset = ntohs(ip->ip_off) & IP_OFFMASK; 3310 local->ip_len = ntohs(ip->ip_len); 3311 } else { 3312 local->offset = ip->ip_off & IP_OFFMASK; 3313 local->ip_len = ip->ip_len; 3314 } 3315 3316 #define PULLUP_TO(len) \ 3317 do { \ 3318 if (m->m_len < (len)) { \ 3319 args->m = m = m_pullup(m, (len)); \ 3320 if (m == NULL) { \ 3321 ip = NULL; \ 3322 goto done; \ 3323 } \ 3324 ip = mtod(m, struct ip *); \ 3325 } \ 3326 } while (0) 3327 3328 if (local->offset == 0) { 3329 switch (local->proto) { 3330 case IPPROTO_TCP: 3331 PULLUP_TO(hlen + sizeof(struct tcphdr)); 3332 local->tcp = tcp = L3HDR(struct tcphdr, ip); 3333 local->dst_port = tcp->th_dport; 3334 local->src_port = tcp->th_sport; 3335 args->f_id.flags = tcp->th_flags; 3336 break; 3337 3338 case IPPROTO_UDP: 3339 PULLUP_TO(hlen + sizeof(struct udphdr)); 3340 udp = L3HDR(struct udphdr, ip); 3341 local->dst_port = udp->uh_dport; 3342 local->src_port = udp->uh_sport; 3343 break; 3344 3345 case IPPROTO_ICMP: 3346 PULLUP_TO(hlen + 4); /* type, code and checksum. */ 3347 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; 3348 break; 3349 3350 default: 3351 break; 3352 } 3353 } 3354 3355 #undef PULLUP_TO 3356 3357 args->f_id.src_ip = ntohl(local->src_ip.s_addr); 3358 args->f_id.dst_ip = ntohl(local->dst_ip.s_addr); 3359 args->f_id.src_port = local->src_port = ntohs(local->src_port); 3360 args->f_id.dst_port = local->dst_port = ntohs(local->dst_port); 3361 done: 3362 *ip0 = ip; 3363 return (m); 3364 } 3365 3366 static struct mbuf * 3367 ipfw_rehashm(struct mbuf *m, const int hlen, struct ip_fw_args *args, 3368 struct ip_fw_local *local, struct ip **ip0) 3369 { 3370 struct ip *ip = mtod(m, struct ip *); 3371 3372 ip->ip_len = htons(ip->ip_len); 3373 ip->ip_off = htons(ip->ip_off); 3374 3375 m->m_flags &= ~M_HASH; 3376 ip_hashfn(&m, 0); 3377 args->m = m; 3378 if (m == NULL) { 3379 *ip0 = NULL; 3380 return (NULL); 3381 } 3382 KASSERT(m->m_flags & M_HASH, ("no hash")); 3383 3384 /* 'm' might be changed by ip_hashfn(). */ 3385 ip = mtod(m, struct ip *); 3386 ip->ip_len = ntohs(ip->ip_len); 3387 ip->ip_off = ntohs(ip->ip_off); 3388 3389 return (ipfw_setup_local(m, hlen, args, local, ip0)); 3390 } 3391 3392 /* 3393 * The main check routine for the firewall. 3394 * 3395 * All arguments are in args so we can modify them and return them 3396 * back to the caller. 3397 * 3398 * Parameters: 3399 * 3400 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 3401 * Starts with the IP header. 3402 * args->eh (in) Mac header if present, or NULL for layer3 packet. 3403 * args->oif Outgoing interface, or NULL if packet is incoming. 3404 * The incoming interface is in the mbuf. (in) 3405 * 3406 * args->rule Pointer to the last matching rule (in/out) 3407 * args->f_id Addresses grabbed from the packet (out) 3408 * 3409 * Return value: 3410 * 3411 * If the packet was denied/rejected and has been dropped, *m is equal 3412 * to NULL upon return. 3413 * 3414 * IP_FW_DENY the packet must be dropped. 3415 * IP_FW_PASS The packet is to be accepted and routed normally. 3416 * IP_FW_DIVERT Divert the packet to port (args->cookie) 3417 * IP_FW_TEE Tee the packet to port (args->cookie) 3418 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie) 3419 * IP_FW_CONTINUE Continue processing on another cpu. 3420 */ 3421 static int 3422 ipfw_chk(struct ip_fw_args *args) 3423 { 3424 /* 3425 * Local variables hold state during the processing of a packet. 3426 * 3427 * IMPORTANT NOTE: to speed up the processing of rules, there 3428 * are some assumption on the values of the variables, which 3429 * are documented here. Should you change them, please check 3430 * the implementation of the various instructions to make sure 3431 * that they still work. 3432 * 3433 * args->eh The MAC header. It is non-null for a layer2 3434 * packet, it is NULL for a layer-3 packet. 3435 * 3436 * m | args->m Pointer to the mbuf, as received from the caller. 3437 * It may change if ipfw_chk() does an m_pullup, or if it 3438 * consumes the packet because it calls send_reject(). 3439 * XXX This has to change, so that ipfw_chk() never modifies 3440 * or consumes the buffer. 3441 * ip is simply an alias of the value of m, and it is kept 3442 * in sync with it (the packet is supposed to start with 3443 * the ip header). 3444 */ 3445 struct mbuf *m = args->m; 3446 struct ip *ip = mtod(m, struct ip *); 3447 3448 /* 3449 * oif | args->oif If NULL, ipfw_chk has been called on the 3450 * inbound path (ether_input, ip_input). 3451 * If non-NULL, ipfw_chk has been called on the outbound path 3452 * (ether_output, ip_output). 3453 */ 3454 struct ifnet *oif = args->oif; 3455 3456 struct ip_fw *f = NULL; /* matching rule */ 3457 int retval = IP_FW_PASS; 3458 struct m_tag *mtag; 3459 struct divert_info *divinfo; 3460 struct ipfw_state *s; 3461 3462 /* 3463 * hlen The length of the IPv4 header. 3464 * hlen >0 means we have an IPv4 packet. 3465 */ 3466 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 3467 3468 struct ip_fw_local lc; 3469 3470 /* 3471 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 3472 * MATCH_NONE when checked and not matched (dyn_f = NULL), 3473 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL) 3474 */ 3475 int dyn_dir = MATCH_UNKNOWN; 3476 struct ip_fw *dyn_f = NULL; 3477 int cpuid = mycpuid; 3478 struct ipfw_context *ctx; 3479 3480 ASSERT_NETISR_NCPUS(cpuid); 3481 ctx = ipfw_ctx[cpuid]; 3482 3483 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED) 3484 return IP_FW_PASS; /* accept */ 3485 3486 if (args->eh == NULL || /* layer 3 packet */ 3487 (m->m_pkthdr.len >= sizeof(struct ip) && 3488 ntohs(args->eh->ether_type) == ETHERTYPE_IP)) 3489 hlen = ip->ip_hl << 2; 3490 3491 memset(&lc, 0, sizeof(lc)); 3492 3493 m = ipfw_setup_local(m, hlen, args, &lc, &ip); 3494 if (m == NULL) 3495 goto pullup_failed; 3496 3497 if (args->rule) { 3498 /* 3499 * Packet has already been tagged. Look for the next rule 3500 * to restart processing. 3501 * 3502 * If fw_one_pass != 0 then just accept it. 3503 * XXX should not happen here, but optimized out in 3504 * the caller. 3505 */ 3506 if (fw_one_pass && (args->flags & IP_FWARG_F_CONT) == 0) 3507 return IP_FW_PASS; 3508 args->flags &= ~IP_FWARG_F_CONT; 3509 3510 /* This rule is being/has been flushed */ 3511 if (ipfw_flushing) 3512 return IP_FW_DENY; 3513 3514 KASSERT(args->rule->cpuid == cpuid, 3515 ("rule used on cpu%d", cpuid)); 3516 3517 /* This rule was deleted */ 3518 if (args->rule->rule_flags & IPFW_RULE_F_INVALID) 3519 return IP_FW_DENY; 3520 3521 if (args->xlat != NULL) { 3522 struct ipfw_xlat *x = args->xlat; 3523 3524 /* This xlat is being deleted. */ 3525 if (x->xlat_invalid) 3526 return IP_FW_DENY; 3527 3528 f = args->rule; 3529 3530 dyn_f = f; 3531 dyn_dir = (args->flags & IP_FWARG_F_XLATFWD) ? 3532 MATCH_FORWARD : MATCH_REVERSE; 3533 3534 if (args->flags & IP_FWARG_F_XLATINS) { 3535 KASSERT(x->xlat_flags & IPFW_STATE_F_XLATSLAVE, 3536 ("not slave %u state", x->xlat_type)); 3537 s = ipfw_state_link(ctx, &x->xlat_st); 3538 if (s != NULL) { 3539 ctx->ipfw_xlate_conflicts++; 3540 if (IPFW_STATE_ISDEAD(s)) { 3541 ipfw_state_remove(ctx, s); 3542 s = ipfw_state_link(ctx, 3543 &x->xlat_st); 3544 } 3545 if (s != NULL) { 3546 if (bootverbose) { 3547 kprintf("ipfw: " 3548 "slave %u state " 3549 "conflicts %u state\n", 3550 x->xlat_type, 3551 s->st_type); 3552 } 3553 ipfw_xlat_invalidate(x); 3554 return IP_FW_DENY; 3555 } 3556 ctx->ipfw_xlate_cresolved++; 3557 } 3558 } else { 3559 ipfw_state_update(&args->f_id, dyn_dir, 3560 lc.tcp, &x->xlat_st); 3561 } 3562 } else { 3563 /* TODO: setup dyn_f, dyn_dir */ 3564 3565 f = args->rule->next_rule; 3566 if (f == NULL) 3567 f = lookup_next_rule(args->rule); 3568 } 3569 } else { 3570 /* 3571 * Find the starting rule. It can be either the first 3572 * one, or the one after divert_rule if asked so. 3573 */ 3574 int skipto; 3575 3576 KKASSERT((args->flags & 3577 (IP_FWARG_F_XLATINS | IP_FWARG_F_CONT)) == 0); 3578 KKASSERT(args->xlat == NULL); 3579 3580 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL); 3581 if (mtag != NULL) { 3582 divinfo = m_tag_data(mtag); 3583 skipto = divinfo->skipto; 3584 } else { 3585 skipto = 0; 3586 } 3587 3588 f = ctx->ipfw_layer3_chain; 3589 if (args->eh == NULL && skipto != 0) { 3590 /* No skipto during rule flushing */ 3591 if (ipfw_flushing) 3592 return IP_FW_DENY; 3593 3594 if (skipto >= IPFW_DEFAULT_RULE) 3595 return IP_FW_DENY; /* invalid */ 3596 3597 while (f && f->rulenum <= skipto) 3598 f = f->next; 3599 if (f == NULL) /* drop packet */ 3600 return IP_FW_DENY; 3601 } else if (ipfw_flushing) { 3602 /* Rules are being flushed; skip to default rule */ 3603 f = ctx->ipfw_default_rule; 3604 } 3605 } 3606 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL) 3607 m_tag_delete(m, mtag); 3608 3609 /* 3610 * Now scan the rules, and parse microinstructions for each rule. 3611 */ 3612 for (; f; f = f->next) { 3613 int l, cmdlen; 3614 ipfw_insn *cmd; 3615 int skip_or; /* skip rest of OR block */ 3616 3617 again: 3618 if (ctx->ipfw_set_disable & (1 << f->set)) { 3619 args->xlat = NULL; 3620 continue; 3621 } 3622 3623 if (args->xlat != NULL) { 3624 args->xlat = NULL; 3625 l = f->cmd_len - f->act_ofs; 3626 cmd = ACTION_PTR(f); 3627 } else { 3628 l = f->cmd_len; 3629 cmd = f->cmd; 3630 } 3631 3632 skip_or = 0; 3633 for (; l > 0; l -= cmdlen, cmd += cmdlen) { 3634 int match; 3635 3636 /* 3637 * check_body is a jump target used when we find a 3638 * CHECK_STATE, and need to jump to the body of 3639 * the target rule. 3640 */ 3641 check_body: 3642 cmdlen = F_LEN(cmd); 3643 /* 3644 * An OR block (insn_1 || .. || insn_n) has the 3645 * F_OR bit set in all but the last instruction. 3646 * The first match will set "skip_or", and cause 3647 * the following instructions to be skipped until 3648 * past the one with the F_OR bit clear. 3649 */ 3650 if (skip_or) { /* skip this instruction */ 3651 if ((cmd->len & F_OR) == 0) 3652 skip_or = 0; /* next one is good */ 3653 continue; 3654 } 3655 match = 0; /* set to 1 if we succeed */ 3656 3657 switch (cmd->opcode) { 3658 /* 3659 * The first set of opcodes compares the packet's 3660 * fields with some pattern, setting 'match' if a 3661 * match is found. At the end of the loop there is 3662 * logic to deal with F_NOT and F_OR flags associated 3663 * with the opcode. 3664 */ 3665 case O_NOP: 3666 match = 1; 3667 break; 3668 3669 case O_FORWARD_MAC: 3670 kprintf("ipfw: opcode %d unimplemented\n", 3671 cmd->opcode); 3672 break; 3673 3674 case O_GID: 3675 case O_UID: 3676 /* 3677 * We only check offset == 0 && proto != 0, 3678 * as this ensures that we have an IPv4 3679 * packet with the ports info. 3680 */ 3681 if (lc.offset!=0) 3682 break; 3683 3684 match = ipfw_match_uid(&args->f_id, oif, 3685 cmd->opcode, 3686 (uid_t)((ipfw_insn_u32 *)cmd)->d[0]); 3687 break; 3688 3689 case O_RECV: 3690 match = iface_match(m->m_pkthdr.rcvif, 3691 (ipfw_insn_if *)cmd); 3692 break; 3693 3694 case O_XMIT: 3695 match = iface_match(oif, (ipfw_insn_if *)cmd); 3696 break; 3697 3698 case O_VIA: 3699 match = iface_match(oif ? oif : 3700 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); 3701 break; 3702 3703 case O_MACADDR2: 3704 if (args->eh != NULL) { /* have MAC header */ 3705 uint32_t *want = (uint32_t *) 3706 ((ipfw_insn_mac *)cmd)->addr; 3707 uint32_t *mask = (uint32_t *) 3708 ((ipfw_insn_mac *)cmd)->mask; 3709 uint32_t *hdr = (uint32_t *)args->eh; 3710 3711 match = 3712 (want[0] == (hdr[0] & mask[0]) && 3713 want[1] == (hdr[1] & mask[1]) && 3714 want[2] == (hdr[2] & mask[2])); 3715 } 3716 break; 3717 3718 case O_MAC_TYPE: 3719 if (args->eh != NULL) { 3720 uint16_t t = 3721 ntohs(args->eh->ether_type); 3722 uint16_t *p = 3723 ((ipfw_insn_u16 *)cmd)->ports; 3724 int i; 3725 3726 /* Special vlan handling */ 3727 if (m->m_flags & M_VLANTAG) 3728 t = ETHERTYPE_VLAN; 3729 3730 for (i = cmdlen - 1; !match && i > 0; 3731 i--, p += 2) { 3732 match = 3733 (t >= p[0] && t <= p[1]); 3734 } 3735 } 3736 break; 3737 3738 case O_FRAG: 3739 match = (hlen > 0 && lc.offset != 0); 3740 break; 3741 3742 case O_IPFRAG: 3743 if (hlen > 0) { 3744 uint16_t off; 3745 3746 if (args->eh != NULL) 3747 off = ntohs(ip->ip_off); 3748 else 3749 off = ip->ip_off; 3750 if (off & (IP_MF | IP_OFFMASK)) 3751 match = 1; 3752 } 3753 break; 3754 3755 case O_IN: /* "out" is "not in" */ 3756 match = (oif == NULL); 3757 break; 3758 3759 case O_LAYER2: 3760 match = (args->eh != NULL); 3761 break; 3762 3763 case O_PROTO: 3764 /* 3765 * We do not allow an arg of 0 so the 3766 * check of "proto" only suffices. 3767 */ 3768 match = (lc.proto == cmd->arg1); 3769 break; 3770 3771 case O_IP_SRC: 3772 match = (hlen > 0 && 3773 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3774 lc.src_ip.s_addr); 3775 break; 3776 3777 case O_IP_SRC_MASK: 3778 match = (hlen > 0 && 3779 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3780 (lc.src_ip.s_addr & 3781 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 3782 break; 3783 3784 case O_IP_SRC_ME: 3785 if (hlen > 0) { 3786 struct ifnet *tif; 3787 3788 tif = INADDR_TO_IFP(&lc.src_ip); 3789 match = (tif != NULL); 3790 } 3791 break; 3792 3793 case O_IP_SRC_TABLE: 3794 match = ipfw_table_lookup(ctx, cmd->arg1, 3795 &lc.src_ip); 3796 break; 3797 3798 case O_IP_SRC_IFIP: 3799 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd, 3800 &lc.src_ip); 3801 break; 3802 3803 case O_IP_DST_SET: 3804 case O_IP_SRC_SET: 3805 if (hlen > 0) { 3806 uint32_t *d = (uint32_t *)(cmd + 1); 3807 uint32_t addr = 3808 cmd->opcode == O_IP_DST_SET ? 3809 args->f_id.dst_ip : 3810 args->f_id.src_ip; 3811 3812 if (addr < d[0]) 3813 break; 3814 addr -= d[0]; /* subtract base */ 3815 match = 3816 (addr < cmd->arg1) && 3817 (d[1 + (addr >> 5)] & 3818 (1 << (addr & 0x1f))); 3819 } 3820 break; 3821 3822 case O_IP_DST: 3823 match = (hlen > 0 && 3824 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3825 lc.dst_ip.s_addr); 3826 break; 3827 3828 case O_IP_DST_MASK: 3829 match = (hlen > 0) && 3830 (((ipfw_insn_ip *)cmd)->addr.s_addr == 3831 (lc.dst_ip.s_addr & 3832 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 3833 break; 3834 3835 case O_IP_DST_ME: 3836 if (hlen > 0) { 3837 struct ifnet *tif; 3838 3839 tif = INADDR_TO_IFP(&lc.dst_ip); 3840 match = (tif != NULL); 3841 } 3842 break; 3843 3844 case O_IP_DST_TABLE: 3845 match = ipfw_table_lookup(ctx, cmd->arg1, 3846 &lc.dst_ip); 3847 break; 3848 3849 case O_IP_DST_IFIP: 3850 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd, 3851 &lc.dst_ip); 3852 break; 3853 3854 case O_IP_SRCPORT: 3855 case O_IP_DSTPORT: 3856 /* 3857 * offset == 0 && proto != 0 is enough 3858 * to guarantee that we have an IPv4 3859 * packet with port info. 3860 */ 3861 if ((lc.proto==IPPROTO_UDP || 3862 lc.proto==IPPROTO_TCP) 3863 && lc.offset == 0) { 3864 uint16_t x = 3865 (cmd->opcode == O_IP_SRCPORT) ? 3866 lc.src_port : lc.dst_port; 3867 uint16_t *p = 3868 ((ipfw_insn_u16 *)cmd)->ports; 3869 int i; 3870 3871 for (i = cmdlen - 1; !match && i > 0; 3872 i--, p += 2) { 3873 match = 3874 (x >= p[0] && x <= p[1]); 3875 } 3876 } 3877 break; 3878 3879 case O_ICMPTYPE: 3880 match = (lc.offset == 0 && 3881 lc.proto==IPPROTO_ICMP && 3882 icmptype_match(ip, (ipfw_insn_u32 *)cmd)); 3883 break; 3884 3885 case O_IPOPT: 3886 match = (hlen > 0 && ipopts_match(ip, cmd)); 3887 break; 3888 3889 case O_IPVER: 3890 match = (hlen > 0 && cmd->arg1 == ip->ip_v); 3891 break; 3892 3893 case O_IPTTL: 3894 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl); 3895 break; 3896 3897 case O_IPID: 3898 match = (hlen > 0 && 3899 cmd->arg1 == ntohs(ip->ip_id)); 3900 break; 3901 3902 case O_IPLEN: 3903 match = (hlen > 0 && cmd->arg1 == lc.ip_len); 3904 break; 3905 3906 case O_IPPRECEDENCE: 3907 match = (hlen > 0 && 3908 (cmd->arg1 == (ip->ip_tos & 0xe0))); 3909 break; 3910 3911 case O_IPTOS: 3912 match = (hlen > 0 && 3913 flags_match(cmd, ip->ip_tos)); 3914 break; 3915 3916 case O_TCPFLAGS: 3917 match = (lc.proto == IPPROTO_TCP && 3918 lc.offset == 0 && 3919 flags_match(cmd, 3920 L3HDR(struct tcphdr,ip)->th_flags)); 3921 break; 3922 3923 case O_TCPOPTS: 3924 match = (lc.proto == IPPROTO_TCP && 3925 lc.offset == 0 && tcpopts_match(ip, cmd)); 3926 break; 3927 3928 case O_TCPSEQ: 3929 match = (lc.proto == IPPROTO_TCP && 3930 lc.offset == 0 && 3931 ((ipfw_insn_u32 *)cmd)->d[0] == 3932 L3HDR(struct tcphdr,ip)->th_seq); 3933 break; 3934 3935 case O_TCPACK: 3936 match = (lc.proto == IPPROTO_TCP && 3937 lc.offset == 0 && 3938 ((ipfw_insn_u32 *)cmd)->d[0] == 3939 L3HDR(struct tcphdr,ip)->th_ack); 3940 break; 3941 3942 case O_TCPWIN: 3943 match = (lc.proto == IPPROTO_TCP && 3944 lc.offset == 0 && 3945 cmd->arg1 == 3946 L3HDR(struct tcphdr,ip)->th_win); 3947 break; 3948 3949 case O_ESTAB: 3950 /* reject packets which have SYN only */ 3951 /* XXX should i also check for TH_ACK ? */ 3952 match = (lc.proto == IPPROTO_TCP && 3953 lc.offset == 0 && 3954 (L3HDR(struct tcphdr,ip)->th_flags & 3955 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 3956 break; 3957 3958 case O_LOG: 3959 if (fw_verbose) { 3960 ipfw_log(ctx, f, hlen, args->eh, m, 3961 oif); 3962 } 3963 match = 1; 3964 break; 3965 3966 case O_PROB: 3967 match = (krandom() < 3968 ((ipfw_insn_u32 *)cmd)->d[0]); 3969 break; 3970 3971 /* 3972 * The second set of opcodes represents 'actions', 3973 * i.e. the terminal part of a rule once the packet 3974 * matches all previous patterns. 3975 * Typically there is only one action for each rule, 3976 * and the opcode is stored at the end of the rule 3977 * (but there are exceptions -- see below). 3978 * 3979 * In general, here we set retval and terminate the 3980 * outer loop (would be a 'break 3' in some language, 3981 * but we need to do a 'goto done'). 3982 * 3983 * Exceptions: 3984 * O_COUNT and O_SKIPTO actions: 3985 * instead of terminating, we jump to the next rule 3986 * ('goto next_rule', equivalent to a 'break 2'), 3987 * or to the SKIPTO target ('goto again' after 3988 * having set f, cmd and l), respectively. 3989 * 3990 * O_LIMIT and O_KEEP_STATE, O_REDIRECT: these opcodes 3991 * are not real 'actions', and are stored right 3992 * before the 'action' part of the rule. 3993 * These opcodes try to install an entry in the 3994 * state tables; if successful, we continue with 3995 * the next opcode (match=1; break;), otherwise 3996 * the packet must be dropped ('goto done' after 3997 * setting retval). If static rules are changed 3998 * during the state installation, the packet will 3999 * be dropped and rule's stats will not beupdated 4000 * ('return IP_FW_DENY'). 4001 * 4002 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 4003 * cause a lookup of the state table, and a jump 4004 * to the 'action' part of the parent rule 4005 * ('goto check_body') if an entry is found, or 4006 * (CHECK_STATE only) a jump to the next rule if 4007 * the entry is not found ('goto next_rule'). 4008 * The result of the lookup is cached to make 4009 * further instances of these opcodes are 4010 * effectively NOPs. If static rules are changed 4011 * during the state looking up, the packet will 4012 * be dropped and rule's stats will not be updated 4013 * ('return IP_FW_DENY'). 4014 */ 4015 case O_REDIRECT: 4016 if (f->cross_rules == NULL) { 4017 /* 4018 * This rule was not completely setup; 4019 * move on to the next rule. 4020 */ 4021 goto next_rule; 4022 } 4023 /* 4024 * Apply redirect only on input path and 4025 * only to non-fragment TCP segments or 4026 * UDP datagrams. 4027 * 4028 * Does _not_ work with layer2 filtering. 4029 */ 4030 if (oif != NULL || args->eh != NULL || 4031 (ip->ip_off & (IP_MF | IP_OFFMASK)) || 4032 (lc.proto != IPPROTO_TCP && 4033 lc.proto != IPPROTO_UDP)) 4034 break; 4035 /* FALL THROUGH */ 4036 case O_LIMIT: 4037 case O_KEEP_STATE: 4038 if (hlen == 0) 4039 break; 4040 s = ipfw_state_install(ctx, f, 4041 (ipfw_insn_limit *)cmd, args, lc.tcp); 4042 if (s == NULL) { 4043 retval = IP_FW_DENY; 4044 goto done; /* error/limit violation */ 4045 } 4046 s->st_pcnt++; 4047 s->st_bcnt += lc.ip_len; 4048 4049 if (s->st_type == O_REDIRECT) { 4050 struct in_addr oaddr; 4051 uint16_t oport; 4052 struct ipfw_xlat *slave_x, *x; 4053 struct ipfw_state *dup; 4054 4055 x = (struct ipfw_xlat *)s; 4056 ipfw_xlate(x, m, &oaddr, &oport); 4057 m = ipfw_rehashm(m, hlen, args, &lc, 4058 &ip); 4059 if (m == NULL) { 4060 ipfw_state_del(ctx, s); 4061 goto pullup_failed; 4062 } 4063 4064 cpuid = netisr_hashcpu( 4065 m->m_pkthdr.hash); 4066 4067 slave_x = (struct ipfw_xlat *) 4068 ipfw_state_alloc(ctx, &args->f_id, 4069 O_REDIRECT, f->cross_rules[cpuid], 4070 lc.tcp); 4071 if (slave_x == NULL) { 4072 ipfw_state_del(ctx, s); 4073 retval = IP_FW_DENY; 4074 goto done; 4075 } 4076 slave_x->xlat_addr = oaddr.s_addr; 4077 slave_x->xlat_port = oport; 4078 slave_x->xlat_dir = MATCH_REVERSE; 4079 slave_x->xlat_flags |= 4080 IPFW_STATE_F_XLATSRC | 4081 IPFW_STATE_F_XLATSLAVE; 4082 4083 slave_x->xlat_pair = x; 4084 slave_x->xlat_pcpu = mycpuid; 4085 x->xlat_pair = slave_x; 4086 x->xlat_pcpu = cpuid; 4087 4088 ctx->ipfw_xlated++; 4089 if (cpuid != mycpuid) { 4090 ctx->ipfw_xlate_split++; 4091 ipfw_xlate_redispatch( 4092 m, cpuid, x, 4093 IPFW_XLATE_INSERT | 4094 IPFW_XLATE_FORWARD); 4095 args->m = NULL; 4096 return (IP_FW_REDISPATCH); 4097 } 4098 4099 dup = ipfw_state_link(ctx, 4100 &slave_x->xlat_st); 4101 if (dup != NULL) { 4102 ctx->ipfw_xlate_conflicts++; 4103 if (IPFW_STATE_ISDEAD(dup)) { 4104 ipfw_state_remove(ctx, 4105 dup); 4106 dup = ipfw_state_link( 4107 ctx, &slave_x->xlat_st); 4108 } 4109 if (dup != NULL) { 4110 if (bootverbose) { 4111 kprintf("ipfw: " 4112 "slave %u state " 4113 "conflicts " 4114 "%u state\n", 4115 x->xlat_type, 4116 s->st_type); 4117 } 4118 ipfw_state_del(ctx, s); 4119 return (IP_FW_DENY); 4120 } 4121 ctx->ipfw_xlate_cresolved++; 4122 } 4123 } 4124 match = 1; 4125 break; 4126 4127 case O_PROBE_STATE: 4128 case O_CHECK_STATE: 4129 /* 4130 * States are checked at the first keep-state 4131 * check-state occurrence, with the result 4132 * being stored in dyn_dir. The compiler 4133 * introduces a PROBE_STATE instruction for 4134 * us when we have a KEEP_STATE/LIMIT/RDR 4135 * (because PROBE_STATE needs to be run first). 4136 */ 4137 s = NULL; 4138 if (dyn_dir == MATCH_UNKNOWN) { 4139 s = ipfw_state_lookup(ctx, 4140 &args->f_id, &dyn_dir, lc.tcp); 4141 } 4142 if (s == NULL || 4143 (s->st_type == O_REDIRECT && 4144 (args->eh != NULL || 4145 (ip->ip_off & (IP_MF | IP_OFFMASK)) || 4146 (lc.proto != IPPROTO_TCP && 4147 lc.proto != IPPROTO_UDP)))) { 4148 /* 4149 * State not found. If CHECK_STATE, 4150 * skip to next rule, if PROBE_STATE 4151 * just ignore and continue with next 4152 * opcode. 4153 */ 4154 if (cmd->opcode == O_CHECK_STATE) 4155 goto next_rule; 4156 match = 1; 4157 break; 4158 } 4159 4160 s->st_pcnt++; 4161 s->st_bcnt += lc.ip_len; 4162 4163 if (s->st_type == O_REDIRECT) { 4164 struct ipfw_xlat *x = 4165 (struct ipfw_xlat *)s; 4166 4167 if (oif != NULL && 4168 x->xlat_ifp == NULL) { 4169 KASSERT(x->xlat_flags & 4170 IPFW_STATE_F_XLATSLAVE, 4171 ("master rdr state " 4172 "missing ifp")); 4173 x->xlat_ifp = oif; 4174 } else if ( 4175 (oif != NULL && x->xlat_ifp!=oif) || 4176 (oif == NULL && 4177 x->xlat_ifp!=m->m_pkthdr.rcvif)) { 4178 retval = IP_FW_DENY; 4179 goto done; 4180 } 4181 if (x->xlat_dir != dyn_dir) 4182 goto skip_xlate; 4183 4184 ipfw_xlate(x, m, NULL, NULL); 4185 m = ipfw_rehashm(m, hlen, args, &lc, 4186 &ip); 4187 if (m == NULL) 4188 goto pullup_failed; 4189 4190 cpuid = netisr_hashcpu( 4191 m->m_pkthdr.hash); 4192 if (cpuid != mycpuid) { 4193 uint32_t xlate = 0; 4194 4195 if (oif != NULL) { 4196 xlate |= 4197 IPFW_XLATE_OUTPUT; 4198 } 4199 if (dyn_dir == MATCH_FORWARD) { 4200 xlate |= 4201 IPFW_XLATE_FORWARD; 4202 } 4203 ipfw_xlate_redispatch(m, cpuid, 4204 x, xlate); 4205 args->m = NULL; 4206 return (IP_FW_REDISPATCH); 4207 } 4208 4209 KKASSERT(x->xlat_pcpu == mycpuid); 4210 ipfw_state_update(&args->f_id, dyn_dir, 4211 lc.tcp, &x->xlat_pair->xlat_st); 4212 } 4213 skip_xlate: 4214 /* 4215 * Found a rule from a state; jump to the 4216 * 'action' part of the rule. 4217 */ 4218 f = s->st_rule; 4219 KKASSERT(f->cpuid == mycpuid); 4220 4221 cmd = ACTION_PTR(f); 4222 l = f->cmd_len - f->act_ofs; 4223 dyn_f = f; 4224 goto check_body; 4225 4226 case O_ACCEPT: 4227 retval = IP_FW_PASS; /* accept */ 4228 goto done; 4229 4230 case O_DEFRAG: 4231 if (f->cross_rules == NULL) { 4232 /* 4233 * This rule was not completely setup; 4234 * move on to the next rule. 4235 */ 4236 goto next_rule; 4237 } 4238 4239 /* 4240 * Don't defrag for l2 packets, output packets 4241 * or non-fragments. 4242 */ 4243 if (oif != NULL || args->eh != NULL || 4244 (ip->ip_off & (IP_MF | IP_OFFMASK)) == 0) 4245 goto next_rule; 4246 4247 ctx->ipfw_frags++; 4248 m = ip_reass(m); 4249 args->m = m; 4250 if (m == NULL) { 4251 retval = IP_FW_PASS; 4252 goto done; 4253 } 4254 ctx->ipfw_defraged++; 4255 KASSERT((m->m_flags & M_HASH) == 0, 4256 ("hash not cleared")); 4257 4258 /* Update statistics */ 4259 f->pcnt++; 4260 f->bcnt += lc.ip_len; 4261 f->timestamp = time_second; 4262 4263 ip = mtod(m, struct ip *); 4264 hlen = ip->ip_hl << 2; 4265 ip->ip_len += hlen; 4266 4267 ip->ip_len = htons(ip->ip_len); 4268 ip->ip_off = htons(ip->ip_off); 4269 4270 ip_hashfn(&m, 0); 4271 args->m = m; 4272 if (m == NULL) 4273 goto pullup_failed; 4274 4275 KASSERT(m->m_flags & M_HASH, ("no hash")); 4276 cpuid = netisr_hashcpu(m->m_pkthdr.hash); 4277 if (cpuid != mycpuid) { 4278 /* 4279 * NOTE: 4280 * ip_len/ip_off are in network byte 4281 * order. 4282 */ 4283 ctx->ipfw_defrag_remote++; 4284 ipfw_defrag_redispatch(m, cpuid, f); 4285 args->m = NULL; 4286 return (IP_FW_REDISPATCH); 4287 } 4288 4289 /* 'm' might be changed by ip_hashfn(). */ 4290 ip = mtod(m, struct ip *); 4291 ip->ip_len = ntohs(ip->ip_len); 4292 ip->ip_off = ntohs(ip->ip_off); 4293 4294 m = ipfw_setup_local(m, hlen, args, &lc, &ip); 4295 if (m == NULL) 4296 goto pullup_failed; 4297 4298 /* Move on. */ 4299 goto next_rule; 4300 4301 case O_PIPE: 4302 case O_QUEUE: 4303 args->rule = f; /* report matching rule */ 4304 args->cookie = cmd->arg1; 4305 retval = IP_FW_DUMMYNET; 4306 goto done; 4307 4308 case O_DIVERT: 4309 case O_TEE: 4310 if (args->eh) /* not on layer 2 */ 4311 break; 4312 4313 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT, 4314 sizeof(*divinfo), M_INTWAIT | M_NULLOK); 4315 if (mtag == NULL) { 4316 retval = IP_FW_DENY; 4317 goto done; 4318 } 4319 divinfo = m_tag_data(mtag); 4320 4321 divinfo->skipto = f->rulenum; 4322 divinfo->port = cmd->arg1; 4323 divinfo->tee = (cmd->opcode == O_TEE); 4324 m_tag_prepend(m, mtag); 4325 4326 args->cookie = cmd->arg1; 4327 retval = (cmd->opcode == O_DIVERT) ? 4328 IP_FW_DIVERT : IP_FW_TEE; 4329 goto done; 4330 4331 case O_COUNT: 4332 case O_SKIPTO: 4333 f->pcnt++; /* update stats */ 4334 f->bcnt += lc.ip_len; 4335 f->timestamp = time_second; 4336 if (cmd->opcode == O_COUNT) 4337 goto next_rule; 4338 /* handle skipto */ 4339 if (f->next_rule == NULL) 4340 lookup_next_rule(f); 4341 f = f->next_rule; 4342 goto again; 4343 4344 case O_REJECT: 4345 /* 4346 * Drop the packet and send a reject notice 4347 * if the packet is not ICMP (or is an ICMP 4348 * query), and it is not multicast/broadcast. 4349 */ 4350 if (hlen > 0 && 4351 (lc.proto != IPPROTO_ICMP || 4352 is_icmp_query(ip)) && 4353 !(m->m_flags & (M_BCAST|M_MCAST)) && 4354 !IN_MULTICAST(ntohl(lc.dst_ip.s_addr))) { 4355 send_reject(args, cmd->arg1, 4356 lc.offset, lc.ip_len); 4357 retval = IP_FW_DENY; 4358 goto done; 4359 } 4360 /* FALLTHROUGH */ 4361 case O_DENY: 4362 retval = IP_FW_DENY; 4363 goto done; 4364 4365 case O_FORWARD_IP: 4366 if (args->eh) /* not valid on layer2 pkts */ 4367 break; 4368 if (!dyn_f || dyn_dir == MATCH_FORWARD) { 4369 struct sockaddr_in *sin; 4370 4371 mtag = m_tag_get(PACKET_TAG_IPFORWARD, 4372 sizeof(*sin), M_INTWAIT | M_NULLOK); 4373 if (mtag == NULL) { 4374 retval = IP_FW_DENY; 4375 goto done; 4376 } 4377 sin = m_tag_data(mtag); 4378 4379 /* Structure copy */ 4380 *sin = ((ipfw_insn_sa *)cmd)->sa; 4381 4382 m_tag_prepend(m, mtag); 4383 m->m_pkthdr.fw_flags |= 4384 IPFORWARD_MBUF_TAGGED; 4385 m->m_pkthdr.fw_flags &= 4386 ~BRIDGE_MBUF_TAGGED; 4387 } 4388 retval = IP_FW_PASS; 4389 goto done; 4390 4391 default: 4392 panic("-- unknown opcode %d", cmd->opcode); 4393 } /* end of switch() on opcodes */ 4394 4395 if (cmd->len & F_NOT) 4396 match = !match; 4397 4398 if (match) { 4399 if (cmd->len & F_OR) 4400 skip_or = 1; 4401 } else { 4402 if (!(cmd->len & F_OR)) /* not an OR block, */ 4403 break; /* try next rule */ 4404 } 4405 4406 } /* end of inner for, scan opcodes */ 4407 4408 next_rule:; /* try next rule */ 4409 4410 } /* end of outer for, scan rules */ 4411 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n"); 4412 return IP_FW_DENY; 4413 4414 done: 4415 /* Update statistics */ 4416 f->pcnt++; 4417 f->bcnt += lc.ip_len; 4418 f->timestamp = time_second; 4419 return retval; 4420 4421 pullup_failed: 4422 if (fw_verbose) 4423 kprintf("pullup failed\n"); 4424 return IP_FW_DENY; 4425 } 4426 4427 static struct mbuf * 4428 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 4429 { 4430 struct m_tag *mtag; 4431 struct dn_pkt *pkt; 4432 ipfw_insn *cmd; 4433 const struct ipfw_flow_id *id; 4434 struct dn_flow_id *fid; 4435 4436 M_ASSERTPKTHDR(m); 4437 4438 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), 4439 M_INTWAIT | M_NULLOK); 4440 if (mtag == NULL) { 4441 m_freem(m); 4442 return (NULL); 4443 } 4444 m_tag_prepend(m, mtag); 4445 4446 pkt = m_tag_data(mtag); 4447 bzero(pkt, sizeof(*pkt)); 4448 4449 cmd = fwa->rule->cmd + fwa->rule->act_ofs; 4450 if (cmd->opcode == O_LOG) 4451 cmd += F_LEN(cmd); 4452 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, 4453 ("Rule is not PIPE or QUEUE, opcode %d", cmd->opcode)); 4454 4455 pkt->dn_m = m; 4456 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK); 4457 pkt->ifp = fwa->oif; 4458 pkt->pipe_nr = pipe_nr; 4459 4460 pkt->cpuid = mycpuid; 4461 pkt->msgport = netisr_curport(); 4462 4463 id = &fwa->f_id; 4464 fid = &pkt->id; 4465 fid->fid_dst_ip = id->dst_ip; 4466 fid->fid_src_ip = id->src_ip; 4467 fid->fid_dst_port = id->dst_port; 4468 fid->fid_src_port = id->src_port; 4469 fid->fid_proto = id->proto; 4470 fid->fid_flags = id->flags; 4471 4472 ipfw_ref_rule(fwa->rule); 4473 pkt->dn_priv = fwa->rule; 4474 pkt->dn_unref_priv = ipfw_unref_rule; 4475 4476 if (cmd->opcode == O_PIPE) 4477 pkt->dn_flags |= DN_FLAGS_IS_PIPE; 4478 4479 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED; 4480 return (m); 4481 } 4482 4483 /* 4484 * When a rule is added/deleted, clear the next_rule pointers in all rules. 4485 * These will be reconstructed on the fly as packets are matched. 4486 */ 4487 static void 4488 ipfw_flush_rule_ptrs(struct ipfw_context *ctx) 4489 { 4490 struct ip_fw *rule; 4491 4492 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 4493 rule->next_rule = NULL; 4494 } 4495 4496 static void 4497 ipfw_inc_static_count(struct ip_fw *rule) 4498 { 4499 /* Static rule's counts are updated only on CPU0 */ 4500 KKASSERT(mycpuid == 0); 4501 4502 static_count++; 4503 static_ioc_len += IOC_RULESIZE(rule); 4504 } 4505 4506 static void 4507 ipfw_dec_static_count(struct ip_fw *rule) 4508 { 4509 int l = IOC_RULESIZE(rule); 4510 4511 /* Static rule's counts are updated only on CPU0 */ 4512 KKASSERT(mycpuid == 0); 4513 4514 KASSERT(static_count > 0, ("invalid static count %u", static_count)); 4515 static_count--; 4516 4517 KASSERT(static_ioc_len >= l, 4518 ("invalid static len %u", static_ioc_len)); 4519 static_ioc_len -= l; 4520 } 4521 4522 static void 4523 ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule) 4524 { 4525 if (fwmsg->sibling != NULL) { 4526 KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1); 4527 fwmsg->sibling->sibling = rule; 4528 } 4529 fwmsg->sibling = rule; 4530 } 4531 4532 static struct ip_fw * 4533 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 4534 { 4535 struct ip_fw *rule; 4536 4537 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO); 4538 4539 rule->act_ofs = ioc_rule->act_ofs; 4540 rule->cmd_len = ioc_rule->cmd_len; 4541 rule->rulenum = ioc_rule->rulenum; 4542 rule->set = ioc_rule->set; 4543 rule->usr_flags = ioc_rule->usr_flags; 4544 4545 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */); 4546 4547 rule->refcnt = 1; 4548 rule->cpuid = mycpuid; 4549 rule->rule_flags = rule_flags; 4550 4551 return rule; 4552 } 4553 4554 static void 4555 ipfw_add_rule_dispatch(netmsg_t nmsg) 4556 { 4557 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 4558 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4559 struct ip_fw *rule; 4560 4561 ASSERT_NETISR_NCPUS(mycpuid); 4562 4563 rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->rule_flags); 4564 4565 /* 4566 * Insert rule into the pre-determined position 4567 */ 4568 if (fwmsg->prev_rule != NULL) { 4569 struct ip_fw *prev, *next; 4570 4571 prev = fwmsg->prev_rule; 4572 KKASSERT(prev->cpuid == mycpuid); 4573 4574 next = fwmsg->next_rule; 4575 KKASSERT(next->cpuid == mycpuid); 4576 4577 rule->next = next; 4578 prev->next = rule; 4579 4580 /* 4581 * Move to the position on the next CPU 4582 * before the msg is forwarded. 4583 */ 4584 fwmsg->prev_rule = prev->sibling; 4585 fwmsg->next_rule = next->sibling; 4586 } else { 4587 KKASSERT(fwmsg->next_rule == NULL); 4588 rule->next = ctx->ipfw_layer3_chain; 4589 ctx->ipfw_layer3_chain = rule; 4590 } 4591 4592 /* Link rule CPU sibling */ 4593 ipfw_link_sibling(fwmsg, rule); 4594 4595 ipfw_flush_rule_ptrs(ctx); 4596 4597 if (mycpuid == 0) { 4598 /* Statistics only need to be updated once */ 4599 ipfw_inc_static_count(rule); 4600 4601 /* Return the rule on CPU0 */ 4602 nmsg->lmsg.u.ms_resultp = rule; 4603 } 4604 4605 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) 4606 rule->track_ruleid = (uintptr_t)nmsg->lmsg.u.ms_resultp; 4607 4608 if (fwmsg->cross_rules != NULL) { 4609 /* Save rules for later use. */ 4610 fwmsg->cross_rules[mycpuid] = rule; 4611 } 4612 4613 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4614 } 4615 4616 static void 4617 ipfw_crossref_rule_dispatch(netmsg_t nmsg) 4618 { 4619 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 4620 struct ip_fw *rule = fwmsg->sibling; 4621 int sz = sizeof(struct ip_fw *) * netisr_ncpus; 4622 4623 ASSERT_NETISR_NCPUS(mycpuid); 4624 KASSERT(rule->rule_flags & IPFW_RULE_F_CROSSREF, 4625 ("not crossref rule")); 4626 4627 rule->cross_rules = kmalloc(sz, M_IPFW, M_WAITOK); 4628 memcpy(rule->cross_rules, fwmsg->cross_rules, sz); 4629 4630 fwmsg->sibling = rule->sibling; 4631 netisr_forwardmsg(&fwmsg->base, mycpuid + 1); 4632 } 4633 4634 /* 4635 * Add a new rule to the list. Copy the rule into a malloc'ed area, 4636 * then possibly create a rule number and add the rule to the list. 4637 * Update the rule_number in the input struct so the caller knows 4638 * it as well. 4639 */ 4640 static void 4641 ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 4642 { 4643 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4644 struct netmsg_ipfw fwmsg; 4645 struct ip_fw *f, *prev, *rule; 4646 4647 ASSERT_NETISR0; 4648 4649 /* 4650 * If rulenum is 0, find highest numbered rule before the 4651 * default rule, and add rule number incremental step. 4652 */ 4653 if (ioc_rule->rulenum == 0) { 4654 int step = autoinc_step; 4655 4656 KKASSERT(step >= IPFW_AUTOINC_STEP_MIN && 4657 step <= IPFW_AUTOINC_STEP_MAX); 4658 4659 /* 4660 * Locate the highest numbered rule before default 4661 */ 4662 for (f = ctx->ipfw_layer3_chain; f; f = f->next) { 4663 if (f->rulenum == IPFW_DEFAULT_RULE) 4664 break; 4665 ioc_rule->rulenum = f->rulenum; 4666 } 4667 if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step) 4668 ioc_rule->rulenum += step; 4669 } 4670 KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE && 4671 ioc_rule->rulenum != 0, 4672 ("invalid rule num %d", ioc_rule->rulenum)); 4673 4674 /* 4675 * Now find the right place for the new rule in the sorted list. 4676 */ 4677 for (prev = NULL, f = ctx->ipfw_layer3_chain; f; 4678 prev = f, f = f->next) { 4679 if (f->rulenum > ioc_rule->rulenum) { 4680 /* Found the location */ 4681 break; 4682 } 4683 } 4684 KASSERT(f != NULL, ("no default rule?!")); 4685 4686 /* 4687 * Duplicate the rule onto each CPU. 4688 * The rule duplicated on CPU0 will be returned. 4689 */ 4690 bzero(&fwmsg, sizeof(fwmsg)); 4691 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4692 ipfw_add_rule_dispatch); 4693 fwmsg.ioc_rule = ioc_rule; 4694 fwmsg.prev_rule = prev; 4695 fwmsg.next_rule = prev == NULL ? NULL : f; 4696 fwmsg.rule_flags = rule_flags; 4697 if (rule_flags & IPFW_RULE_F_CROSSREF) { 4698 fwmsg.cross_rules = kmalloc( 4699 sizeof(struct ip_fw *) * netisr_ncpus, M_TEMP, 4700 M_WAITOK | M_ZERO); 4701 } 4702 4703 netisr_domsg_global(&fwmsg.base); 4704 KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL); 4705 4706 rule = fwmsg.base.lmsg.u.ms_resultp; 4707 KKASSERT(rule != NULL && rule->cpuid == mycpuid); 4708 4709 if (fwmsg.cross_rules != NULL) { 4710 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, 4711 MSGF_PRIORITY, ipfw_crossref_rule_dispatch); 4712 fwmsg.sibling = rule; 4713 netisr_domsg_global(&fwmsg.base); 4714 KKASSERT(fwmsg.sibling == NULL); 4715 4716 kfree(fwmsg.cross_rules, M_TEMP); 4717 4718 #ifdef KLD_MODULE 4719 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1); 4720 #endif 4721 } 4722 4723 DPRINTF("++ installed rule %d, static count now %d\n", 4724 rule->rulenum, static_count); 4725 } 4726 4727 /* 4728 * Free storage associated with a static rule (including derived 4729 * states/tracks). 4730 * The caller is in charge of clearing rule pointers to avoid 4731 * dangling pointers. 4732 * @return a pointer to the next entry. 4733 * Arguments are not checked, so they better be correct. 4734 */ 4735 static struct ip_fw * 4736 ipfw_delete_rule(struct ipfw_context *ctx, 4737 struct ip_fw *prev, struct ip_fw *rule) 4738 { 4739 struct ip_fw *n; 4740 4741 n = rule->next; 4742 if (prev == NULL) 4743 ctx->ipfw_layer3_chain = n; 4744 else 4745 prev->next = n; 4746 4747 /* Mark the rule as invalid */ 4748 rule->rule_flags |= IPFW_RULE_F_INVALID; 4749 rule->next_rule = NULL; 4750 rule->sibling = NULL; 4751 #ifdef foo 4752 /* Don't reset cpuid here; keep various assertion working */ 4753 rule->cpuid = -1; 4754 #endif 4755 4756 /* Statistics only need to be updated once */ 4757 if (mycpuid == 0) 4758 ipfw_dec_static_count(rule); 4759 4760 if ((rule->rule_flags & IPFW_RULE_F_CROSSREF) == 0) { 4761 /* Try to free this rule */ 4762 ipfw_free_rule(rule); 4763 } else { 4764 /* TODO: check staging area. */ 4765 if (mycpuid == 0) { 4766 rule->next = ipfw_gd.ipfw_crossref_free; 4767 ipfw_gd.ipfw_crossref_free = rule; 4768 } 4769 } 4770 4771 /* Return the next rule */ 4772 return n; 4773 } 4774 4775 static void 4776 ipfw_flush_dispatch(netmsg_t nmsg) 4777 { 4778 int kill_default = nmsg->lmsg.u.ms_result; 4779 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4780 struct ip_fw *rule; 4781 4782 ASSERT_NETISR_NCPUS(mycpuid); 4783 4784 /* 4785 * Flush states. 4786 */ 4787 ipfw_state_flush(ctx, NULL); 4788 KASSERT(ctx->ipfw_state_cnt == 0, 4789 ("%d pcpu states remain", ctx->ipfw_state_cnt)); 4790 ctx->ipfw_state_loosecnt = 0; 4791 ctx->ipfw_state_lastexp = 0; 4792 4793 /* 4794 * Flush tracks. 4795 */ 4796 ipfw_track_flush(ctx, NULL); 4797 ctx->ipfw_track_lastexp = 0; 4798 if (ctx->ipfw_trkcnt_spare != NULL) { 4799 kfree(ctx->ipfw_trkcnt_spare, M_IPFW); 4800 ctx->ipfw_trkcnt_spare = NULL; 4801 } 4802 4803 ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */ 4804 4805 while ((rule = ctx->ipfw_layer3_chain) != NULL && 4806 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE)) 4807 ipfw_delete_rule(ctx, NULL, rule); 4808 4809 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4810 } 4811 4812 /* 4813 * Deletes all rules from a chain (including the default rule 4814 * if the second argument is set). 4815 */ 4816 static void 4817 ipfw_flush(int kill_default) 4818 { 4819 struct netmsg_base nmsg; 4820 #ifdef INVARIANTS 4821 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4822 int state_cnt; 4823 #endif 4824 4825 ASSERT_NETISR0; 4826 4827 /* 4828 * If 'kill_default' then caller has done the necessary 4829 * msgport syncing; unnecessary to do it again. 4830 */ 4831 if (!kill_default) { 4832 /* 4833 * Let ipfw_chk() know the rules are going to 4834 * be flushed, so it could jump directly to 4835 * the default rule. 4836 */ 4837 ipfw_flushing = 1; 4838 /* XXX use priority sync */ 4839 netmsg_service_sync(); 4840 } 4841 4842 /* 4843 * Press the 'flush' button 4844 */ 4845 bzero(&nmsg, sizeof(nmsg)); 4846 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4847 ipfw_flush_dispatch); 4848 nmsg.lmsg.u.ms_result = kill_default; 4849 netisr_domsg_global(&nmsg); 4850 ipfw_gd.ipfw_state_loosecnt = 0; 4851 ipfw_gd.ipfw_state_globexp = 0; 4852 ipfw_gd.ipfw_track_globexp = 0; 4853 4854 #ifdef INVARIANTS 4855 state_cnt = ipfw_state_cntcoll(); 4856 KASSERT(state_cnt == 0, ("%d states remain", state_cnt)); 4857 4858 KASSERT(ipfw_gd.ipfw_trkcnt_cnt == 0, 4859 ("%d trkcnts remain", ipfw_gd.ipfw_trkcnt_cnt)); 4860 4861 if (kill_default) { 4862 KASSERT(static_count == 0, 4863 ("%u static rules remain", static_count)); 4864 KASSERT(static_ioc_len == 0, 4865 ("%u bytes of static rules remain", static_ioc_len)); 4866 } else { 4867 KASSERT(static_count == 1, 4868 ("%u static rules remain", static_count)); 4869 KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule), 4870 ("%u bytes of static rules remain, should be %lu", 4871 static_ioc_len, 4872 (u_long)IOC_RULESIZE(ctx->ipfw_default_rule))); 4873 } 4874 #endif 4875 4876 /* Flush is done */ 4877 ipfw_flushing = 0; 4878 } 4879 4880 static void 4881 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg) 4882 { 4883 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4884 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4885 struct ip_fw *rule, *prev; 4886 4887 ASSERT_NETISR_NCPUS(mycpuid); 4888 4889 rule = dmsg->start_rule; 4890 KKASSERT(rule->cpuid == mycpuid); 4891 dmsg->start_rule = rule->sibling; 4892 4893 prev = dmsg->prev_rule; 4894 if (prev != NULL) { 4895 KKASSERT(prev->cpuid == mycpuid); 4896 4897 /* 4898 * Move to the position on the next CPU 4899 * before the msg is forwarded. 4900 */ 4901 dmsg->prev_rule = prev->sibling; 4902 } 4903 4904 /* 4905 * flush pointers outside the loop, then delete all matching 4906 * rules. 'prev' remains the same throughout the cycle. 4907 */ 4908 ipfw_flush_rule_ptrs(ctx); 4909 while (rule && rule->rulenum == dmsg->rulenum) { 4910 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) { 4911 /* Flush states generated by this rule. */ 4912 ipfw_state_flush(ctx, rule); 4913 } 4914 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) { 4915 /* Flush tracks generated by this rule. */ 4916 ipfw_track_flush(ctx, rule); 4917 } 4918 rule = ipfw_delete_rule(ctx, prev, rule); 4919 } 4920 4921 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4922 } 4923 4924 static int 4925 ipfw_alt_delete_rule(uint16_t rulenum) 4926 { 4927 struct ip_fw *prev, *rule; 4928 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4929 struct netmsg_del dmsg; 4930 4931 ASSERT_NETISR0; 4932 4933 /* 4934 * Locate first rule to delete 4935 */ 4936 for (prev = NULL, rule = ctx->ipfw_layer3_chain; 4937 rule && rule->rulenum < rulenum; 4938 prev = rule, rule = rule->next) 4939 ; /* EMPTY */ 4940 if (rule->rulenum != rulenum) 4941 return EINVAL; 4942 4943 /* 4944 * Get rid of the rule duplications on all CPUs 4945 */ 4946 bzero(&dmsg, sizeof(dmsg)); 4947 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4948 ipfw_alt_delete_rule_dispatch); 4949 dmsg.prev_rule = prev; 4950 dmsg.start_rule = rule; 4951 dmsg.rulenum = rulenum; 4952 4953 netisr_domsg_global(&dmsg.base); 4954 KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL); 4955 return 0; 4956 } 4957 4958 static void 4959 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg) 4960 { 4961 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4962 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4963 struct ip_fw *prev, *rule; 4964 #ifdef INVARIANTS 4965 int del = 0; 4966 #endif 4967 4968 ASSERT_NETISR_NCPUS(mycpuid); 4969 4970 ipfw_flush_rule_ptrs(ctx); 4971 4972 prev = NULL; 4973 rule = ctx->ipfw_layer3_chain; 4974 while (rule != NULL) { 4975 if (rule->set == dmsg->from_set) { 4976 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) { 4977 /* Flush states generated by this rule. */ 4978 ipfw_state_flush(ctx, rule); 4979 } 4980 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) { 4981 /* Flush tracks generated by this rule. */ 4982 ipfw_track_flush(ctx, rule); 4983 } 4984 rule = ipfw_delete_rule(ctx, prev, rule); 4985 #ifdef INVARIANTS 4986 del = 1; 4987 #endif 4988 } else { 4989 prev = rule; 4990 rule = rule->next; 4991 } 4992 } 4993 KASSERT(del, ("no match set?!")); 4994 4995 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4996 } 4997 4998 static int 4999 ipfw_alt_delete_ruleset(uint8_t set) 5000 { 5001 struct netmsg_del dmsg; 5002 int del; 5003 struct ip_fw *rule; 5004 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5005 5006 ASSERT_NETISR0; 5007 5008 /* 5009 * Check whether the 'set' exists. If it exists, 5010 * then check whether any rules within the set will 5011 * try to create states. 5012 */ 5013 del = 0; 5014 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5015 if (rule->set == set) 5016 del = 1; 5017 } 5018 if (!del) 5019 return 0; /* XXX EINVAL? */ 5020 5021 /* 5022 * Delete this set 5023 */ 5024 bzero(&dmsg, sizeof(dmsg)); 5025 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5026 ipfw_alt_delete_ruleset_dispatch); 5027 dmsg.from_set = set; 5028 netisr_domsg_global(&dmsg.base); 5029 5030 return 0; 5031 } 5032 5033 static void 5034 ipfw_alt_move_rule_dispatch(netmsg_t nmsg) 5035 { 5036 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5037 struct ip_fw *rule; 5038 5039 ASSERT_NETISR_NCPUS(mycpuid); 5040 5041 rule = dmsg->start_rule; 5042 KKASSERT(rule->cpuid == mycpuid); 5043 5044 /* 5045 * Move to the position on the next CPU 5046 * before the msg is forwarded. 5047 */ 5048 dmsg->start_rule = rule->sibling; 5049 5050 while (rule && rule->rulenum <= dmsg->rulenum) { 5051 if (rule->rulenum == dmsg->rulenum) 5052 rule->set = dmsg->to_set; 5053 rule = rule->next; 5054 } 5055 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5056 } 5057 5058 static int 5059 ipfw_alt_move_rule(uint16_t rulenum, uint8_t set) 5060 { 5061 struct netmsg_del dmsg; 5062 struct netmsg_base *nmsg; 5063 struct ip_fw *rule; 5064 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5065 5066 ASSERT_NETISR0; 5067 5068 /* 5069 * Locate first rule to move 5070 */ 5071 for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum; 5072 rule = rule->next) { 5073 if (rule->rulenum == rulenum && rule->set != set) 5074 break; 5075 } 5076 if (rule == NULL || rule->rulenum > rulenum) 5077 return 0; /* XXX error? */ 5078 5079 bzero(&dmsg, sizeof(dmsg)); 5080 nmsg = &dmsg.base; 5081 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5082 ipfw_alt_move_rule_dispatch); 5083 dmsg.start_rule = rule; 5084 dmsg.rulenum = rulenum; 5085 dmsg.to_set = set; 5086 5087 netisr_domsg_global(nmsg); 5088 KKASSERT(dmsg.start_rule == NULL); 5089 return 0; 5090 } 5091 5092 static void 5093 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg) 5094 { 5095 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5096 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5097 struct ip_fw *rule; 5098 5099 ASSERT_NETISR_NCPUS(mycpuid); 5100 5101 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5102 if (rule->set == dmsg->from_set) 5103 rule->set = dmsg->to_set; 5104 } 5105 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5106 } 5107 5108 static int 5109 ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set) 5110 { 5111 struct netmsg_del dmsg; 5112 struct netmsg_base *nmsg; 5113 5114 ASSERT_NETISR0; 5115 5116 bzero(&dmsg, sizeof(dmsg)); 5117 nmsg = &dmsg.base; 5118 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5119 ipfw_alt_move_ruleset_dispatch); 5120 dmsg.from_set = from_set; 5121 dmsg.to_set = to_set; 5122 5123 netisr_domsg_global(nmsg); 5124 return 0; 5125 } 5126 5127 static void 5128 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg) 5129 { 5130 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 5131 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5132 struct ip_fw *rule; 5133 5134 ASSERT_NETISR_NCPUS(mycpuid); 5135 5136 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5137 if (rule->set == dmsg->from_set) 5138 rule->set = dmsg->to_set; 5139 else if (rule->set == dmsg->to_set) 5140 rule->set = dmsg->from_set; 5141 } 5142 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5143 } 5144 5145 static int 5146 ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2) 5147 { 5148 struct netmsg_del dmsg; 5149 struct netmsg_base *nmsg; 5150 5151 ASSERT_NETISR0; 5152 5153 bzero(&dmsg, sizeof(dmsg)); 5154 nmsg = &dmsg.base; 5155 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5156 ipfw_alt_swap_ruleset_dispatch); 5157 dmsg.from_set = set1; 5158 dmsg.to_set = set2; 5159 5160 netisr_domsg_global(nmsg); 5161 return 0; 5162 } 5163 5164 /* 5165 * Remove all rules with given number, and also do set manipulation. 5166 * 5167 * The argument is an uint32_t. The low 16 bit are the rule or set number, 5168 * the next 8 bits are the new set, the top 8 bits are the command: 5169 * 5170 * 0 delete rules with given number 5171 * 1 delete rules with given set number 5172 * 2 move rules with given number to new set 5173 * 3 move rules with given set number to new set 5174 * 4 swap sets with given numbers 5175 */ 5176 static int 5177 ipfw_ctl_alter(uint32_t arg) 5178 { 5179 uint16_t rulenum; 5180 uint8_t cmd, new_set; 5181 int error = 0; 5182 5183 ASSERT_NETISR0; 5184 5185 rulenum = arg & 0xffff; 5186 cmd = (arg >> 24) & 0xff; 5187 new_set = (arg >> 16) & 0xff; 5188 5189 if (cmd > 4) 5190 return EINVAL; 5191 if (new_set >= IPFW_DEFAULT_SET) 5192 return EINVAL; 5193 if (cmd == 0 || cmd == 2) { 5194 if (rulenum == IPFW_DEFAULT_RULE) 5195 return EINVAL; 5196 } else { 5197 if (rulenum >= IPFW_DEFAULT_SET) 5198 return EINVAL; 5199 } 5200 5201 switch (cmd) { 5202 case 0: /* delete rules with given number */ 5203 error = ipfw_alt_delete_rule(rulenum); 5204 break; 5205 5206 case 1: /* delete all rules with given set number */ 5207 error = ipfw_alt_delete_ruleset(rulenum); 5208 break; 5209 5210 case 2: /* move rules with given number to new set */ 5211 error = ipfw_alt_move_rule(rulenum, new_set); 5212 break; 5213 5214 case 3: /* move rules with given set number to new set */ 5215 error = ipfw_alt_move_ruleset(rulenum, new_set); 5216 break; 5217 5218 case 4: /* swap two sets */ 5219 error = ipfw_alt_swap_ruleset(rulenum, new_set); 5220 break; 5221 } 5222 return error; 5223 } 5224 5225 /* 5226 * Clear counters for a specific rule. 5227 */ 5228 static void 5229 clear_counters(struct ip_fw *rule, int log_only) 5230 { 5231 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); 5232 5233 if (log_only == 0) { 5234 rule->bcnt = rule->pcnt = 0; 5235 rule->timestamp = 0; 5236 } 5237 if (l->o.opcode == O_LOG) 5238 l->log_left = l->max_log; 5239 } 5240 5241 static void 5242 ipfw_zero_entry_dispatch(netmsg_t nmsg) 5243 { 5244 struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg; 5245 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5246 struct ip_fw *rule; 5247 5248 ASSERT_NETISR_NCPUS(mycpuid); 5249 5250 if (zmsg->rulenum == 0) { 5251 KKASSERT(zmsg->start_rule == NULL); 5252 5253 ctx->ipfw_norule_counter = 0; 5254 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 5255 clear_counters(rule, zmsg->log_only); 5256 } else { 5257 struct ip_fw *start = zmsg->start_rule; 5258 5259 KKASSERT(start->cpuid == mycpuid); 5260 KKASSERT(start->rulenum == zmsg->rulenum); 5261 5262 /* 5263 * We can have multiple rules with the same number, so we 5264 * need to clear them all. 5265 */ 5266 for (rule = start; rule && rule->rulenum == zmsg->rulenum; 5267 rule = rule->next) 5268 clear_counters(rule, zmsg->log_only); 5269 5270 /* 5271 * Move to the position on the next CPU 5272 * before the msg is forwarded. 5273 */ 5274 zmsg->start_rule = start->sibling; 5275 } 5276 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5277 } 5278 5279 /* 5280 * Reset some or all counters on firewall rules. 5281 * @arg frwl is null to clear all entries, or contains a specific 5282 * rule number. 5283 * @arg log_only is 1 if we only want to reset logs, zero otherwise. 5284 */ 5285 static int 5286 ipfw_ctl_zero_entry(int rulenum, int log_only) 5287 { 5288 struct netmsg_zent zmsg; 5289 struct netmsg_base *nmsg; 5290 const char *msg; 5291 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5292 5293 ASSERT_NETISR0; 5294 5295 bzero(&zmsg, sizeof(zmsg)); 5296 nmsg = &zmsg.base; 5297 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5298 ipfw_zero_entry_dispatch); 5299 zmsg.log_only = log_only; 5300 5301 if (rulenum == 0) { 5302 msg = log_only ? "ipfw: All logging counts reset.\n" 5303 : "ipfw: Accounting cleared.\n"; 5304 } else { 5305 struct ip_fw *rule; 5306 5307 /* 5308 * Locate the first rule with 'rulenum' 5309 */ 5310 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 5311 if (rule->rulenum == rulenum) 5312 break; 5313 } 5314 if (rule == NULL) /* we did not find any matching rules */ 5315 return (EINVAL); 5316 zmsg.start_rule = rule; 5317 zmsg.rulenum = rulenum; 5318 5319 msg = log_only ? "ipfw: Entry %d logging count reset.\n" 5320 : "ipfw: Entry %d cleared.\n"; 5321 } 5322 netisr_domsg_global(nmsg); 5323 KKASSERT(zmsg.start_rule == NULL); 5324 5325 if (fw_verbose) 5326 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); 5327 return (0); 5328 } 5329 5330 /* 5331 * Check validity of the structure before insert. 5332 * Fortunately rules are simple, so this mostly need to check rule sizes. 5333 */ 5334 static int 5335 ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags) 5336 { 5337 int l, cmdlen = 0; 5338 int have_action = 0; 5339 ipfw_insn *cmd; 5340 5341 *rule_flags = 0; 5342 5343 /* Check for valid size */ 5344 if (size < sizeof(*rule)) { 5345 kprintf("ipfw: rule too short\n"); 5346 return EINVAL; 5347 } 5348 l = IOC_RULESIZE(rule); 5349 if (l != size) { 5350 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l); 5351 return EINVAL; 5352 } 5353 5354 /* Check rule number */ 5355 if (rule->rulenum == IPFW_DEFAULT_RULE) { 5356 kprintf("ipfw: invalid rule number\n"); 5357 return EINVAL; 5358 } 5359 5360 /* 5361 * Now go for the individual checks. Very simple ones, basically only 5362 * instruction sizes. 5363 */ 5364 for (l = rule->cmd_len, cmd = rule->cmd; l > 0; 5365 l -= cmdlen, cmd += cmdlen) { 5366 cmdlen = F_LEN(cmd); 5367 if (cmdlen > l) { 5368 kprintf("ipfw: opcode %d size truncated\n", 5369 cmd->opcode); 5370 return EINVAL; 5371 } 5372 5373 DPRINTF("ipfw: opcode %d\n", cmd->opcode); 5374 5375 if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT || 5376 IPFW_ISXLAT(cmd->opcode)) { 5377 /* This rule will generate states. */ 5378 *rule_flags |= IPFW_RULE_F_GENSTATE; 5379 if (cmd->opcode == O_LIMIT) 5380 *rule_flags |= IPFW_RULE_F_GENTRACK; 5381 } 5382 if (cmd->opcode == O_DEFRAG || IPFW_ISXLAT(cmd->opcode)) 5383 *rule_flags |= IPFW_RULE_F_CROSSREF; 5384 if (cmd->opcode == O_IP_SRC_IFIP || 5385 cmd->opcode == O_IP_DST_IFIP) { 5386 *rule_flags |= IPFW_RULE_F_DYNIFADDR; 5387 cmd->arg1 &= IPFW_IFIP_SETTINGS; 5388 } 5389 5390 switch (cmd->opcode) { 5391 case O_NOP: 5392 case O_PROBE_STATE: 5393 case O_KEEP_STATE: 5394 case O_PROTO: 5395 case O_IP_SRC_ME: 5396 case O_IP_DST_ME: 5397 case O_LAYER2: 5398 case O_IN: 5399 case O_FRAG: 5400 case O_IPFRAG: 5401 case O_IPOPT: 5402 case O_IPLEN: 5403 case O_IPID: 5404 case O_IPTOS: 5405 case O_IPPRECEDENCE: 5406 case O_IPTTL: 5407 case O_IPVER: 5408 case O_TCPWIN: 5409 case O_TCPFLAGS: 5410 case O_TCPOPTS: 5411 case O_ESTAB: 5412 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5413 goto bad_size; 5414 break; 5415 5416 case O_IP_SRC_TABLE: 5417 case O_IP_DST_TABLE: 5418 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5419 goto bad_size; 5420 if (cmd->arg1 >= ipfw_table_max) { 5421 kprintf("ipfw: invalid table id %u, max %d\n", 5422 cmd->arg1, ipfw_table_max); 5423 return EINVAL; 5424 } 5425 break; 5426 5427 case O_IP_SRC_IFIP: 5428 case O_IP_DST_IFIP: 5429 if (cmdlen != F_INSN_SIZE(ipfw_insn_ifip)) 5430 goto bad_size; 5431 break; 5432 5433 case O_UID: 5434 case O_GID: 5435 case O_IP_SRC: 5436 case O_IP_DST: 5437 case O_TCPSEQ: 5438 case O_TCPACK: 5439 case O_PROB: 5440 case O_ICMPTYPE: 5441 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 5442 goto bad_size; 5443 break; 5444 5445 case O_LIMIT: 5446 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) 5447 goto bad_size; 5448 break; 5449 case O_REDIRECT: 5450 if (cmdlen != F_INSN_SIZE(ipfw_insn_rdr)) 5451 goto bad_size; 5452 break; 5453 5454 case O_LOG: 5455 if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) 5456 goto bad_size; 5457 5458 ((ipfw_insn_log *)cmd)->log_left = 5459 ((ipfw_insn_log *)cmd)->max_log; 5460 5461 break; 5462 5463 case O_IP_SRC_MASK: 5464 case O_IP_DST_MASK: 5465 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip)) 5466 goto bad_size; 5467 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) { 5468 kprintf("ipfw: opcode %d, useless rule\n", 5469 cmd->opcode); 5470 return EINVAL; 5471 } 5472 break; 5473 5474 case O_IP_SRC_SET: 5475 case O_IP_DST_SET: 5476 if (cmd->arg1 == 0 || cmd->arg1 > 256) { 5477 kprintf("ipfw: invalid set size %d\n", 5478 cmd->arg1); 5479 return EINVAL; 5480 } 5481 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 5482 (cmd->arg1+31)/32 ) 5483 goto bad_size; 5484 break; 5485 5486 case O_MACADDR2: 5487 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) 5488 goto bad_size; 5489 break; 5490 5491 case O_MAC_TYPE: 5492 case O_IP_SRCPORT: 5493 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ 5494 if (cmdlen < 2 || cmdlen > 31) 5495 goto bad_size; 5496 break; 5497 5498 case O_RECV: 5499 case O_XMIT: 5500 case O_VIA: 5501 if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) 5502 goto bad_size; 5503 break; 5504 5505 case O_PIPE: 5506 case O_QUEUE: 5507 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) 5508 goto bad_size; 5509 goto check_action; 5510 5511 case O_FORWARD_IP: 5512 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) { 5513 goto bad_size; 5514 } else { 5515 in_addr_t fwd_addr; 5516 5517 fwd_addr = ((ipfw_insn_sa *)cmd)-> 5518 sa.sin_addr.s_addr; 5519 if (IN_MULTICAST(ntohl(fwd_addr))) { 5520 kprintf("ipfw: try forwarding to " 5521 "multicast address\n"); 5522 return EINVAL; 5523 } 5524 } 5525 goto check_action; 5526 5527 case O_FORWARD_MAC: /* XXX not implemented yet */ 5528 case O_CHECK_STATE: 5529 case O_COUNT: 5530 case O_ACCEPT: 5531 case O_DENY: 5532 case O_REJECT: 5533 case O_SKIPTO: 5534 case O_DIVERT: 5535 case O_TEE: 5536 case O_DEFRAG: 5537 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 5538 goto bad_size; 5539 check_action: 5540 if (have_action) { 5541 kprintf("ipfw: opcode %d, multiple actions" 5542 " not allowed\n", 5543 cmd->opcode); 5544 return EINVAL; 5545 } 5546 have_action = 1; 5547 if (l != cmdlen) { 5548 kprintf("ipfw: opcode %d, action must be" 5549 " last opcode\n", 5550 cmd->opcode); 5551 return EINVAL; 5552 } 5553 break; 5554 default: 5555 kprintf("ipfw: opcode %d, unknown opcode\n", 5556 cmd->opcode); 5557 return EINVAL; 5558 } 5559 } 5560 if (have_action == 0) { 5561 kprintf("ipfw: missing action\n"); 5562 return EINVAL; 5563 } 5564 return 0; 5565 5566 bad_size: 5567 kprintf("ipfw: opcode %d size %d wrong\n", 5568 cmd->opcode, cmdlen); 5569 return EINVAL; 5570 } 5571 5572 static int 5573 ipfw_ctl_add_rule(struct sockopt *sopt) 5574 { 5575 struct ipfw_ioc_rule *ioc_rule; 5576 size_t size; 5577 uint32_t rule_flags; 5578 int error; 5579 5580 ASSERT_NETISR0; 5581 5582 size = sopt->sopt_valsize; 5583 if (size > (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX) || 5584 size < sizeof(*ioc_rule)) { 5585 return EINVAL; 5586 } 5587 if (size != (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX)) { 5588 sopt->sopt_val = krealloc(sopt->sopt_val, sizeof(uint32_t) * 5589 IPFW_RULE_SIZE_MAX, M_TEMP, M_WAITOK); 5590 } 5591 ioc_rule = sopt->sopt_val; 5592 5593 error = ipfw_check_ioc_rule(ioc_rule, size, &rule_flags); 5594 if (error) 5595 return error; 5596 5597 ipfw_add_rule(ioc_rule, rule_flags); 5598 5599 if (sopt->sopt_dir == SOPT_GET) 5600 sopt->sopt_valsize = IOC_RULESIZE(ioc_rule); 5601 return 0; 5602 } 5603 5604 static void * 5605 ipfw_copy_rule(const struct ipfw_context *ctx, const struct ip_fw *rule, 5606 struct ipfw_ioc_rule *ioc_rule) 5607 { 5608 const struct ip_fw *sibling; 5609 #ifdef INVARIANTS 5610 int i; 5611 #endif 5612 5613 ASSERT_NETISR0; 5614 KASSERT(rule->cpuid == 0, ("rule does not belong to cpu0")); 5615 5616 ioc_rule->act_ofs = rule->act_ofs; 5617 ioc_rule->cmd_len = rule->cmd_len; 5618 ioc_rule->rulenum = rule->rulenum; 5619 ioc_rule->set = rule->set; 5620 ioc_rule->usr_flags = rule->usr_flags; 5621 5622 ioc_rule->set_disable = ctx->ipfw_set_disable; 5623 ioc_rule->static_count = static_count; 5624 ioc_rule->static_len = static_ioc_len; 5625 5626 /* 5627 * Visit (read-only) all of the rule's duplications to get 5628 * the necessary statistics 5629 */ 5630 #ifdef INVARIANTS 5631 i = 0; 5632 #endif 5633 ioc_rule->pcnt = 0; 5634 ioc_rule->bcnt = 0; 5635 ioc_rule->timestamp = 0; 5636 for (sibling = rule; sibling != NULL; sibling = sibling->sibling) { 5637 ioc_rule->pcnt += sibling->pcnt; 5638 ioc_rule->bcnt += sibling->bcnt; 5639 if (sibling->timestamp > ioc_rule->timestamp) 5640 ioc_rule->timestamp = sibling->timestamp; 5641 #ifdef INVARIANTS 5642 ++i; 5643 #endif 5644 } 5645 KASSERT(i == netisr_ncpus, 5646 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus)); 5647 5648 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */); 5649 5650 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule)); 5651 } 5652 5653 static boolean_t 5654 ipfw_track_copy(const struct ipfw_trkcnt *trk, struct ipfw_ioc_state *ioc_state) 5655 { 5656 struct ipfw_ioc_flowid *ioc_id; 5657 5658 if (trk->tc_expire == 0) { 5659 /* Not a scanned one. */ 5660 return (FALSE); 5661 } 5662 5663 ioc_state->expire = TIME_LEQ(trk->tc_expire, time_uptime) ? 5664 0 : trk->tc_expire - time_uptime; 5665 ioc_state->pcnt = 0; 5666 ioc_state->bcnt = 0; 5667 5668 ioc_state->dyn_type = O_LIMIT_PARENT; 5669 ioc_state->count = trk->tc_count; 5670 5671 ioc_state->rulenum = trk->tc_rulenum; 5672 5673 ioc_id = &ioc_state->id; 5674 ioc_id->type = ETHERTYPE_IP; 5675 ioc_id->u.ip.proto = trk->tc_proto; 5676 ioc_id->u.ip.src_ip = trk->tc_saddr; 5677 ioc_id->u.ip.dst_ip = trk->tc_daddr; 5678 ioc_id->u.ip.src_port = trk->tc_sport; 5679 ioc_id->u.ip.dst_port = trk->tc_dport; 5680 5681 return (TRUE); 5682 } 5683 5684 static boolean_t 5685 ipfw_state_copy(const struct ipfw_state *s, struct ipfw_ioc_state *ioc_state) 5686 { 5687 struct ipfw_ioc_flowid *ioc_id; 5688 5689 if (IPFW_STATE_SCANSKIP(s)) 5690 return (FALSE); 5691 5692 ioc_state->expire = TIME_LEQ(s->st_expire, time_uptime) ? 5693 0 : s->st_expire - time_uptime; 5694 ioc_state->pcnt = s->st_pcnt; 5695 ioc_state->bcnt = s->st_bcnt; 5696 5697 ioc_state->dyn_type = s->st_type; 5698 ioc_state->count = 0; 5699 5700 ioc_state->rulenum = s->st_rule->rulenum; 5701 5702 ioc_id = &ioc_state->id; 5703 ioc_id->type = ETHERTYPE_IP; 5704 ioc_id->u.ip.proto = s->st_proto; 5705 ipfw_key_4tuple(&s->st_key, 5706 &ioc_id->u.ip.src_ip, &ioc_id->u.ip.src_port, 5707 &ioc_id->u.ip.dst_ip, &ioc_id->u.ip.dst_port); 5708 5709 if (IPFW_ISXLAT(s->st_type)) { 5710 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s; 5711 5712 if (x->xlat_port == 0) 5713 ioc_state->xlat_port = ioc_id->u.ip.dst_port; 5714 else 5715 ioc_state->xlat_port = ntohs(x->xlat_port); 5716 ioc_state->xlat_addr = ntohl(x->xlat_addr); 5717 5718 ioc_state->pcnt += x->xlat_pair->xlat_pcnt; 5719 ioc_state->bcnt += x->xlat_pair->xlat_bcnt; 5720 } 5721 5722 return (TRUE); 5723 } 5724 5725 static void 5726 ipfw_state_copy_dispatch(netmsg_t nmsg) 5727 { 5728 struct netmsg_cpstate *nm = (struct netmsg_cpstate *)nmsg; 5729 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5730 const struct ipfw_state *s; 5731 const struct ipfw_track *t; 5732 5733 ASSERT_NETISR_NCPUS(mycpuid); 5734 KASSERT(nm->state_cnt < nm->state_cntmax, 5735 ("invalid state count %d, max %d", 5736 nm->state_cnt, nm->state_cntmax)); 5737 5738 TAILQ_FOREACH(s, &ctx->ipfw_state_list, st_link) { 5739 if (ipfw_state_copy(s, nm->ioc_state)) { 5740 nm->ioc_state++; 5741 nm->state_cnt++; 5742 if (nm->state_cnt == nm->state_cntmax) 5743 goto done; 5744 } 5745 } 5746 5747 /* 5748 * Prepare tracks in the global track tree for userland. 5749 */ 5750 TAILQ_FOREACH(t, &ctx->ipfw_track_list, t_link) { 5751 struct ipfw_trkcnt *trk; 5752 5753 if (t->t_count == NULL) /* anchor */ 5754 continue; 5755 trk = t->t_trkcnt; 5756 5757 /* 5758 * Only one netisr can run this function at 5759 * any time, and only this function accesses 5760 * trkcnt's tc_expire, so this is safe w/o 5761 * ipfw_gd.ipfw_trkcnt_token. 5762 */ 5763 if (trk->tc_expire > t->t_expire) 5764 continue; 5765 trk->tc_expire = t->t_expire; 5766 } 5767 5768 /* 5769 * Copy tracks in the global track tree to userland in 5770 * the last netisr. 5771 */ 5772 if (mycpuid == netisr_ncpus - 1) { 5773 struct ipfw_trkcnt *trk; 5774 5775 KASSERT(nm->state_cnt < nm->state_cntmax, 5776 ("invalid state count %d, max %d", 5777 nm->state_cnt, nm->state_cntmax)); 5778 5779 IPFW_TRKCNT_TOKGET; 5780 RB_FOREACH(trk, ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree) { 5781 if (ipfw_track_copy(trk, nm->ioc_state)) { 5782 nm->ioc_state++; 5783 nm->state_cnt++; 5784 if (nm->state_cnt == nm->state_cntmax) { 5785 IPFW_TRKCNT_TOKREL; 5786 goto done; 5787 } 5788 } 5789 } 5790 IPFW_TRKCNT_TOKREL; 5791 } 5792 done: 5793 if (nm->state_cnt == nm->state_cntmax) { 5794 /* No more space; done. */ 5795 netisr_replymsg(&nm->base, 0); 5796 } else { 5797 netisr_forwardmsg(&nm->base, mycpuid + 1); 5798 } 5799 } 5800 5801 static int 5802 ipfw_ctl_get_rules(struct sockopt *sopt) 5803 { 5804 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5805 struct ip_fw *rule; 5806 void *bp; 5807 size_t size; 5808 int state_cnt; 5809 5810 ASSERT_NETISR0; 5811 5812 /* 5813 * pass up a copy of the current rules. Static rules 5814 * come first (the last of which has number IPFW_DEFAULT_RULE), 5815 * followed by a possibly empty list of states. 5816 */ 5817 5818 size = static_ioc_len; /* size of static rules */ 5819 5820 /* 5821 * Size of the states. 5822 * XXX take tracks as state for userland compat. 5823 */ 5824 state_cnt = ipfw_state_cntcoll() + ipfw_gd.ipfw_trkcnt_cnt; 5825 state_cnt = (state_cnt * 5) / 4; /* leave 25% headroom */ 5826 size += state_cnt * sizeof(struct ipfw_ioc_state); 5827 5828 if (sopt->sopt_valsize < size) { 5829 /* short length, no need to return incomplete rules */ 5830 /* XXX: if superuser, no need to zero buffer */ 5831 bzero(sopt->sopt_val, sopt->sopt_valsize); 5832 return 0; 5833 } 5834 bp = sopt->sopt_val; 5835 5836 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 5837 bp = ipfw_copy_rule(ctx, rule, bp); 5838 5839 if (state_cnt) { 5840 struct netmsg_cpstate nm; 5841 #ifdef INVARIANTS 5842 size_t old_size = size; 5843 #endif 5844 5845 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 5846 MSGF_PRIORITY, ipfw_state_copy_dispatch); 5847 nm.ioc_state = bp; 5848 nm.state_cntmax = state_cnt; 5849 nm.state_cnt = 0; 5850 netisr_domsg_global(&nm.base); 5851 5852 /* 5853 * The # of states may be shrinked after the snapshot 5854 * of the state count was taken. To give user a correct 5855 * state count, nm->state_cnt is used to recalculate 5856 * the actual size. 5857 */ 5858 size = static_ioc_len + 5859 (nm.state_cnt * sizeof(struct ipfw_ioc_state)); 5860 KKASSERT(size <= old_size); 5861 } 5862 5863 sopt->sopt_valsize = size; 5864 return 0; 5865 } 5866 5867 static void 5868 ipfw_set_disable_dispatch(netmsg_t nmsg) 5869 { 5870 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5871 5872 ASSERT_NETISR_NCPUS(mycpuid); 5873 5874 ctx->ipfw_set_disable = nmsg->lmsg.u.ms_result32; 5875 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5876 } 5877 5878 static void 5879 ipfw_ctl_set_disable(uint32_t disable, uint32_t enable) 5880 { 5881 struct netmsg_base nmsg; 5882 uint32_t set_disable; 5883 5884 ASSERT_NETISR0; 5885 5886 /* IPFW_DEFAULT_SET is always enabled */ 5887 enable |= (1 << IPFW_DEFAULT_SET); 5888 set_disable = (ipfw_ctx[mycpuid]->ipfw_set_disable | disable) & ~enable; 5889 5890 bzero(&nmsg, sizeof(nmsg)); 5891 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5892 ipfw_set_disable_dispatch); 5893 nmsg.lmsg.u.ms_result32 = set_disable; 5894 5895 netisr_domsg_global(&nmsg); 5896 } 5897 5898 static void 5899 ipfw_table_create_dispatch(netmsg_t nm) 5900 { 5901 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5902 int tblid = nm->lmsg.u.ms_result; 5903 5904 ASSERT_NETISR_NCPUS(mycpuid); 5905 5906 if (!rn_inithead((void **)&ctx->ipfw_tables[tblid], 5907 rn_cpumaskhead(mycpuid), 32)) 5908 panic("ipfw: create table%d failed", tblid); 5909 5910 netisr_forwardmsg(&nm->base, mycpuid + 1); 5911 } 5912 5913 static int 5914 ipfw_table_create(struct sockopt *sopt) 5915 { 5916 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5917 struct ipfw_ioc_table *tbl; 5918 struct netmsg_base nm; 5919 5920 ASSERT_NETISR0; 5921 5922 if (sopt->sopt_valsize != sizeof(*tbl)) 5923 return (EINVAL); 5924 5925 tbl = sopt->sopt_val; 5926 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 5927 return (EINVAL); 5928 5929 if (ctx->ipfw_tables[tbl->tableid] != NULL) 5930 return (EEXIST); 5931 5932 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5933 ipfw_table_create_dispatch); 5934 nm.lmsg.u.ms_result = tbl->tableid; 5935 netisr_domsg_global(&nm); 5936 5937 return (0); 5938 } 5939 5940 static void 5941 ipfw_table_killrn(struct radix_node_head *rnh, struct radix_node *rn) 5942 { 5943 struct radix_node *ret; 5944 5945 ret = rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); 5946 if (ret != rn) 5947 panic("deleted other table entry"); 5948 kfree(ret, M_IPFW); 5949 } 5950 5951 static int 5952 ipfw_table_killent(struct radix_node *rn, void *xrnh) 5953 { 5954 5955 ipfw_table_killrn(xrnh, rn); 5956 return (0); 5957 } 5958 5959 static void 5960 ipfw_table_flush_oncpu(struct ipfw_context *ctx, int tableid, 5961 int destroy) 5962 { 5963 struct radix_node_head *rnh; 5964 5965 ASSERT_NETISR_NCPUS(mycpuid); 5966 5967 rnh = ctx->ipfw_tables[tableid]; 5968 rnh->rnh_walktree(rnh, ipfw_table_killent, rnh); 5969 if (destroy) { 5970 Free(rnh); 5971 ctx->ipfw_tables[tableid] = NULL; 5972 } 5973 } 5974 5975 static void 5976 ipfw_table_flush_dispatch(netmsg_t nmsg) 5977 { 5978 struct netmsg_tblflush *nm = (struct netmsg_tblflush *)nmsg; 5979 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5980 5981 ASSERT_NETISR_NCPUS(mycpuid); 5982 5983 ipfw_table_flush_oncpu(ctx, nm->tableid, nm->destroy); 5984 netisr_forwardmsg(&nm->base, mycpuid + 1); 5985 } 5986 5987 static void 5988 ipfw_table_flushall_oncpu(struct ipfw_context *ctx, int destroy) 5989 { 5990 int i; 5991 5992 ASSERT_NETISR_NCPUS(mycpuid); 5993 5994 for (i = 0; i < ipfw_table_max; ++i) { 5995 if (ctx->ipfw_tables[i] != NULL) 5996 ipfw_table_flush_oncpu(ctx, i, destroy); 5997 } 5998 } 5999 6000 static void 6001 ipfw_table_flushall_dispatch(netmsg_t nmsg) 6002 { 6003 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6004 6005 ASSERT_NETISR_NCPUS(mycpuid); 6006 6007 ipfw_table_flushall_oncpu(ctx, 0); 6008 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6009 } 6010 6011 static int 6012 ipfw_table_flush(struct sockopt *sopt) 6013 { 6014 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6015 struct ipfw_ioc_table *tbl; 6016 struct netmsg_tblflush nm; 6017 6018 ASSERT_NETISR0; 6019 6020 if (sopt->sopt_valsize != sizeof(*tbl)) 6021 return (EINVAL); 6022 6023 tbl = sopt->sopt_val; 6024 if (sopt->sopt_name == IP_FW_TBL_FLUSH && tbl->tableid < 0) { 6025 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6026 MSGF_PRIORITY, ipfw_table_flushall_dispatch); 6027 netisr_domsg_global(&nm.base); 6028 return (0); 6029 } 6030 6031 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 6032 return (EINVAL); 6033 6034 if (ctx->ipfw_tables[tbl->tableid] == NULL) 6035 return (ENOENT); 6036 6037 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6038 ipfw_table_flush_dispatch); 6039 nm.tableid = tbl->tableid; 6040 nm.destroy = 0; 6041 if (sopt->sopt_name == IP_FW_TBL_DESTROY) 6042 nm.destroy = 1; 6043 netisr_domsg_global(&nm.base); 6044 6045 return (0); 6046 } 6047 6048 static int 6049 ipfw_table_cntent(struct radix_node *rn __unused, void *xcnt) 6050 { 6051 int *cnt = xcnt; 6052 6053 (*cnt)++; 6054 return (0); 6055 } 6056 6057 static int 6058 ipfw_table_cpent(struct radix_node *rn, void *xcp) 6059 { 6060 struct ipfw_table_cp *cp = xcp; 6061 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6062 struct ipfw_ioc_tblent *ioc_te; 6063 #ifdef INVARIANTS 6064 int cnt; 6065 #endif 6066 6067 KASSERT(cp->te_idx < cp->te_cnt, ("invalid table cp idx %d, cnt %d", 6068 cp->te_idx, cp->te_cnt)); 6069 ioc_te = &cp->te[cp->te_idx]; 6070 6071 if (te->te_nodes->rn_mask != NULL) { 6072 memcpy(&ioc_te->netmask, te->te_nodes->rn_mask, 6073 *te->te_nodes->rn_mask); 6074 } else { 6075 ioc_te->netmask.sin_len = 0; 6076 } 6077 memcpy(&ioc_te->key, &te->te_key, sizeof(ioc_te->key)); 6078 6079 ioc_te->use = te->te_use; 6080 ioc_te->last_used = te->te_lastuse; 6081 #ifdef INVARIANTS 6082 cnt = 1; 6083 #endif 6084 6085 while ((te = te->te_sibling) != NULL) { 6086 #ifdef INVARIANTS 6087 ++cnt; 6088 #endif 6089 ioc_te->use += te->te_use; 6090 if (te->te_lastuse > ioc_te->last_used) 6091 ioc_te->last_used = te->te_lastuse; 6092 } 6093 KASSERT(cnt == netisr_ncpus, 6094 ("invalid # of tblent %d, should be %d", cnt, netisr_ncpus)); 6095 6096 cp->te_idx++; 6097 6098 return (0); 6099 } 6100 6101 static int 6102 ipfw_table_get(struct sockopt *sopt) 6103 { 6104 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6105 struct radix_node_head *rnh; 6106 struct ipfw_ioc_table *tbl; 6107 struct ipfw_ioc_tblcont *cont; 6108 struct ipfw_table_cp cp; 6109 int cnt = 0, sz; 6110 6111 ASSERT_NETISR0; 6112 6113 if (sopt->sopt_valsize < sizeof(*tbl)) 6114 return (EINVAL); 6115 6116 tbl = sopt->sopt_val; 6117 if (tbl->tableid < 0) { 6118 struct ipfw_ioc_tbllist *list; 6119 int i; 6120 6121 /* 6122 * List available table ids. 6123 */ 6124 for (i = 0; i < ipfw_table_max; ++i) { 6125 if (ctx->ipfw_tables[i] != NULL) 6126 ++cnt; 6127 } 6128 6129 sz = __offsetof(struct ipfw_ioc_tbllist, tables[cnt]); 6130 if (sopt->sopt_valsize < sz) { 6131 bzero(sopt->sopt_val, sopt->sopt_valsize); 6132 return (E2BIG); 6133 } 6134 list = sopt->sopt_val; 6135 list->tablecnt = cnt; 6136 6137 cnt = 0; 6138 for (i = 0; i < ipfw_table_max; ++i) { 6139 if (ctx->ipfw_tables[i] != NULL) { 6140 KASSERT(cnt < list->tablecnt, 6141 ("invalid idx %d, cnt %d", 6142 cnt, list->tablecnt)); 6143 list->tables[cnt++] = i; 6144 } 6145 } 6146 sopt->sopt_valsize = sz; 6147 return (0); 6148 } else if (tbl->tableid >= ipfw_table_max) { 6149 return (EINVAL); 6150 } 6151 6152 rnh = ctx->ipfw_tables[tbl->tableid]; 6153 if (rnh == NULL) 6154 return (ENOENT); 6155 rnh->rnh_walktree(rnh, ipfw_table_cntent, &cnt); 6156 6157 sz = __offsetof(struct ipfw_ioc_tblcont, ent[cnt]); 6158 if (sopt->sopt_valsize < sz) { 6159 bzero(sopt->sopt_val, sopt->sopt_valsize); 6160 return (E2BIG); 6161 } 6162 cont = sopt->sopt_val; 6163 cont->entcnt = cnt; 6164 6165 cp.te = cont->ent; 6166 cp.te_idx = 0; 6167 cp.te_cnt = cnt; 6168 rnh->rnh_walktree(rnh, ipfw_table_cpent, &cp); 6169 6170 sopt->sopt_valsize = sz; 6171 return (0); 6172 } 6173 6174 static void 6175 ipfw_table_add_dispatch(netmsg_t nmsg) 6176 { 6177 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; 6178 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6179 struct radix_node_head *rnh; 6180 struct ipfw_tblent *te; 6181 6182 ASSERT_NETISR_NCPUS(mycpuid); 6183 6184 rnh = ctx->ipfw_tables[nm->tableid]; 6185 6186 te = kmalloc(sizeof(*te), M_IPFW, M_WAITOK | M_ZERO); 6187 te->te_nodes->rn_key = (char *)&te->te_key; 6188 memcpy(&te->te_key, nm->key, sizeof(te->te_key)); 6189 6190 if (rnh->rnh_addaddr((char *)&te->te_key, (char *)nm->netmask, rnh, 6191 te->te_nodes) == NULL) { 6192 if (mycpuid == 0) { 6193 kfree(te, M_IPFW); 6194 netisr_replymsg(&nm->base, EEXIST); 6195 return; 6196 } 6197 panic("rnh_addaddr failed"); 6198 } 6199 6200 /* Link siblings. */ 6201 if (nm->sibling != NULL) 6202 nm->sibling->te_sibling = te; 6203 nm->sibling = te; 6204 6205 netisr_forwardmsg(&nm->base, mycpuid + 1); 6206 } 6207 6208 static void 6209 ipfw_table_del_dispatch(netmsg_t nmsg) 6210 { 6211 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; 6212 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6213 struct radix_node_head *rnh; 6214 struct radix_node *rn; 6215 6216 ASSERT_NETISR_NCPUS(mycpuid); 6217 6218 rnh = ctx->ipfw_tables[nm->tableid]; 6219 rn = rnh->rnh_deladdr((char *)nm->key, (char *)nm->netmask, rnh); 6220 if (rn == NULL) { 6221 if (mycpuid == 0) { 6222 netisr_replymsg(&nm->base, ESRCH); 6223 return; 6224 } 6225 panic("rnh_deladdr failed"); 6226 } 6227 kfree(rn, M_IPFW); 6228 6229 netisr_forwardmsg(&nm->base, mycpuid + 1); 6230 } 6231 6232 static int 6233 ipfw_table_alt(struct sockopt *sopt) 6234 { 6235 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6236 struct ipfw_ioc_tblcont *tbl; 6237 struct ipfw_ioc_tblent *te; 6238 struct sockaddr_in key0; 6239 struct sockaddr *netmask = NULL, *key; 6240 struct netmsg_tblent nm; 6241 6242 ASSERT_NETISR0; 6243 6244 if (sopt->sopt_valsize != sizeof(*tbl)) 6245 return (EINVAL); 6246 tbl = sopt->sopt_val; 6247 6248 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 6249 return (EINVAL); 6250 if (tbl->entcnt != 1) 6251 return (EINVAL); 6252 6253 if (ctx->ipfw_tables[tbl->tableid] == NULL) 6254 return (ENOENT); 6255 te = &tbl->ent[0]; 6256 6257 if (te->key.sin_family != AF_INET || 6258 te->key.sin_port != 0 || 6259 te->key.sin_len != sizeof(struct sockaddr_in)) 6260 return (EINVAL); 6261 key = (struct sockaddr *)&te->key; 6262 6263 if (te->netmask.sin_len != 0) { 6264 if (te->netmask.sin_port != 0 || 6265 te->netmask.sin_len > sizeof(struct sockaddr_in)) 6266 return (EINVAL); 6267 netmask = (struct sockaddr *)&te->netmask; 6268 sa_maskedcopy(key, (struct sockaddr *)&key0, netmask); 6269 key = (struct sockaddr *)&key0; 6270 } 6271 6272 if (sopt->sopt_name == IP_FW_TBL_ADD) { 6273 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6274 MSGF_PRIORITY, ipfw_table_add_dispatch); 6275 } else { 6276 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6277 MSGF_PRIORITY, ipfw_table_del_dispatch); 6278 } 6279 nm.key = key; 6280 nm.netmask = netmask; 6281 nm.tableid = tbl->tableid; 6282 nm.sibling = NULL; 6283 return (netisr_domsg_global(&nm.base)); 6284 } 6285 6286 static int 6287 ipfw_table_zeroent(struct radix_node *rn, void *arg __unused) 6288 { 6289 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6290 6291 te->te_use = 0; 6292 te->te_lastuse = 0; 6293 return (0); 6294 } 6295 6296 static void 6297 ipfw_table_zero_dispatch(netmsg_t nmsg) 6298 { 6299 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6300 struct radix_node_head *rnh; 6301 6302 ASSERT_NETISR_NCPUS(mycpuid); 6303 6304 rnh = ctx->ipfw_tables[nmsg->lmsg.u.ms_result]; 6305 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); 6306 6307 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6308 } 6309 6310 static void 6311 ipfw_table_zeroall_dispatch(netmsg_t nmsg) 6312 { 6313 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6314 int i; 6315 6316 ASSERT_NETISR_NCPUS(mycpuid); 6317 6318 for (i = 0; i < ipfw_table_max; ++i) { 6319 struct radix_node_head *rnh = ctx->ipfw_tables[i]; 6320 6321 if (rnh != NULL) 6322 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); 6323 } 6324 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6325 } 6326 6327 static int 6328 ipfw_table_zero(struct sockopt *sopt) 6329 { 6330 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6331 struct netmsg_base nm; 6332 struct ipfw_ioc_table *tbl; 6333 6334 ASSERT_NETISR0; 6335 6336 if (sopt->sopt_valsize != sizeof(*tbl)) 6337 return (EINVAL); 6338 tbl = sopt->sopt_val; 6339 6340 if (tbl->tableid < 0) { 6341 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6342 ipfw_table_zeroall_dispatch); 6343 netisr_domsg_global(&nm); 6344 return (0); 6345 } else if (tbl->tableid >= ipfw_table_max) { 6346 return (EINVAL); 6347 } else if (ctx->ipfw_tables[tbl->tableid] == NULL) { 6348 return (ENOENT); 6349 } 6350 6351 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6352 ipfw_table_zero_dispatch); 6353 nm.lmsg.u.ms_result = tbl->tableid; 6354 netisr_domsg_global(&nm); 6355 6356 return (0); 6357 } 6358 6359 static int 6360 ipfw_table_killexp(struct radix_node *rn, void *xnm) 6361 { 6362 struct netmsg_tblexp *nm = xnm; 6363 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 6364 6365 if (te->te_expired) { 6366 ipfw_table_killrn(nm->rnh, rn); 6367 nm->expcnt++; 6368 } 6369 return (0); 6370 } 6371 6372 static void 6373 ipfw_table_expire_dispatch(netmsg_t nmsg) 6374 { 6375 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; 6376 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6377 struct radix_node_head *rnh; 6378 6379 ASSERT_NETISR_NCPUS(mycpuid); 6380 6381 rnh = ctx->ipfw_tables[nm->tableid]; 6382 nm->rnh = rnh; 6383 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); 6384 6385 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), 6386 ("not all expired addresses (%d) were deleted (%d)", 6387 nm->cnt * (mycpuid + 1), nm->expcnt)); 6388 6389 netisr_forwardmsg(&nm->base, mycpuid + 1); 6390 } 6391 6392 static void 6393 ipfw_table_expireall_dispatch(netmsg_t nmsg) 6394 { 6395 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; 6396 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6397 int i; 6398 6399 ASSERT_NETISR_NCPUS(mycpuid); 6400 6401 for (i = 0; i < ipfw_table_max; ++i) { 6402 struct radix_node_head *rnh = ctx->ipfw_tables[i]; 6403 6404 if (rnh == NULL) 6405 continue; 6406 nm->rnh = rnh; 6407 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); 6408 } 6409 6410 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), 6411 ("not all expired addresses (%d) were deleted (%d)", 6412 nm->cnt * (mycpuid + 1), nm->expcnt)); 6413 6414 netisr_forwardmsg(&nm->base, mycpuid + 1); 6415 } 6416 6417 static int 6418 ipfw_table_markexp(struct radix_node *rn, void *xnm) 6419 { 6420 struct netmsg_tblexp *nm = xnm; 6421 struct ipfw_tblent *te; 6422 time_t lastuse; 6423 6424 te = (struct ipfw_tblent *)rn; 6425 lastuse = te->te_lastuse; 6426 6427 while ((te = te->te_sibling) != NULL) { 6428 if (te->te_lastuse > lastuse) 6429 lastuse = te->te_lastuse; 6430 } 6431 if (!TIME_LEQ(lastuse + nm->expire, time_second)) { 6432 /* Not expired */ 6433 return (0); 6434 } 6435 6436 te = (struct ipfw_tblent *)rn; 6437 te->te_expired = 1; 6438 while ((te = te->te_sibling) != NULL) 6439 te->te_expired = 1; 6440 nm->cnt++; 6441 6442 return (0); 6443 } 6444 6445 static int 6446 ipfw_table_expire(struct sockopt *sopt) 6447 { 6448 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6449 struct netmsg_tblexp nm; 6450 struct ipfw_ioc_tblexp *tbl; 6451 struct radix_node_head *rnh; 6452 6453 ASSERT_NETISR0; 6454 6455 if (sopt->sopt_valsize != sizeof(*tbl)) 6456 return (EINVAL); 6457 tbl = sopt->sopt_val; 6458 tbl->expcnt = 0; 6459 6460 nm.expcnt = 0; 6461 nm.cnt = 0; 6462 nm.expire = tbl->expire; 6463 6464 if (tbl->tableid < 0) { 6465 int i; 6466 6467 for (i = 0; i < ipfw_table_max; ++i) { 6468 rnh = ctx->ipfw_tables[i]; 6469 if (rnh == NULL) 6470 continue; 6471 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); 6472 } 6473 if (nm.cnt == 0) { 6474 /* No addresses can be expired. */ 6475 return (0); 6476 } 6477 tbl->expcnt = nm.cnt; 6478 6479 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 6480 MSGF_PRIORITY, ipfw_table_expireall_dispatch); 6481 nm.tableid = -1; 6482 netisr_domsg_global(&nm.base); 6483 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, 6484 ("not all expired addresses (%d) were deleted (%d)", 6485 nm.cnt * netisr_ncpus, nm.expcnt)); 6486 6487 return (0); 6488 } else if (tbl->tableid >= ipfw_table_max) { 6489 return (EINVAL); 6490 } 6491 6492 rnh = ctx->ipfw_tables[tbl->tableid]; 6493 if (rnh == NULL) 6494 return (ENOENT); 6495 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); 6496 if (nm.cnt == 0) { 6497 /* No addresses can be expired. */ 6498 return (0); 6499 } 6500 tbl->expcnt = nm.cnt; 6501 6502 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6503 ipfw_table_expire_dispatch); 6504 nm.tableid = tbl->tableid; 6505 netisr_domsg_global(&nm.base); 6506 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, 6507 ("not all expired addresses (%d) were deleted (%d)", 6508 nm.cnt * netisr_ncpus, nm.expcnt)); 6509 return (0); 6510 } 6511 6512 static void 6513 ipfw_crossref_free_dispatch(netmsg_t nmsg) 6514 { 6515 struct ip_fw *rule = nmsg->lmsg.u.ms_resultp; 6516 6517 KKASSERT((rule->rule_flags & 6518 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) == 6519 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)); 6520 ipfw_free_rule(rule); 6521 6522 netisr_replymsg(&nmsg->base, 0); 6523 } 6524 6525 static void 6526 ipfw_crossref_reap(void) 6527 { 6528 struct ip_fw *rule, *prev = NULL; 6529 6530 ASSERT_NETISR0; 6531 6532 rule = ipfw_gd.ipfw_crossref_free; 6533 while (rule != NULL) { 6534 uint64_t inflight = 0; 6535 int i; 6536 6537 for (i = 0; i < netisr_ncpus; ++i) 6538 inflight += rule->cross_rules[i]->cross_refs; 6539 if (inflight == 0) { 6540 struct ip_fw *f = rule; 6541 6542 /* 6543 * Unlink. 6544 */ 6545 rule = rule->next; 6546 if (prev != NULL) 6547 prev->next = rule; 6548 else 6549 ipfw_gd.ipfw_crossref_free = rule; 6550 6551 /* 6552 * Free. 6553 */ 6554 for (i = 1; i < netisr_ncpus; ++i) { 6555 struct netmsg_base nm; 6556 6557 netmsg_init(&nm, NULL, &curthread->td_msgport, 6558 MSGF_PRIORITY, ipfw_crossref_free_dispatch); 6559 nm.lmsg.u.ms_resultp = f->cross_rules[i]; 6560 netisr_domsg(&nm, i); 6561 } 6562 KKASSERT((f->rule_flags & 6563 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) == 6564 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)); 6565 ipfw_unref_rule(f); 6566 } else { 6567 prev = rule; 6568 rule = rule->next; 6569 } 6570 } 6571 6572 if (ipfw_gd.ipfw_crossref_free != NULL) { 6573 callout_reset(&ipfw_gd.ipfw_crossref_ch, hz, 6574 ipfw_crossref_timeo, NULL); 6575 } 6576 } 6577 6578 /* 6579 * {set|get}sockopt parser. 6580 */ 6581 static int 6582 ipfw_ctl(struct sockopt *sopt) 6583 { 6584 int error, rulenum; 6585 uint32_t *masks; 6586 size_t size; 6587 6588 ASSERT_NETISR0; 6589 6590 error = 0; 6591 6592 switch (sopt->sopt_name) { 6593 case IP_FW_GET: 6594 error = ipfw_ctl_get_rules(sopt); 6595 break; 6596 6597 case IP_FW_FLUSH: 6598 ipfw_flush(0 /* keep default rule */); 6599 break; 6600 6601 case IP_FW_ADD: 6602 error = ipfw_ctl_add_rule(sopt); 6603 break; 6604 6605 case IP_FW_DEL: 6606 /* 6607 * IP_FW_DEL is used for deleting single rules or sets, 6608 * and (ab)used to atomically manipulate sets. 6609 * Argument size is used to distinguish between the two: 6610 * sizeof(uint32_t) 6611 * delete single rule or set of rules, 6612 * or reassign rules (or sets) to a different set. 6613 * 2 * sizeof(uint32_t) 6614 * atomic disable/enable sets. 6615 * first uint32_t contains sets to be disabled, 6616 * second uint32_t contains sets to be enabled. 6617 */ 6618 masks = sopt->sopt_val; 6619 size = sopt->sopt_valsize; 6620 if (size == sizeof(*masks)) { 6621 /* 6622 * Delete or reassign static rule 6623 */ 6624 error = ipfw_ctl_alter(masks[0]); 6625 } else if (size == (2 * sizeof(*masks))) { 6626 /* 6627 * Set enable/disable 6628 */ 6629 ipfw_ctl_set_disable(masks[0], masks[1]); 6630 } else { 6631 error = EINVAL; 6632 } 6633 break; 6634 6635 case IP_FW_ZERO: 6636 case IP_FW_RESETLOG: /* argument is an int, the rule number */ 6637 rulenum = 0; 6638 6639 if (sopt->sopt_val != 0) { 6640 error = soopt_to_kbuf(sopt, &rulenum, 6641 sizeof(int), sizeof(int)); 6642 if (error) 6643 break; 6644 } 6645 error = ipfw_ctl_zero_entry(rulenum, 6646 sopt->sopt_name == IP_FW_RESETLOG); 6647 break; 6648 6649 case IP_FW_TBL_CREATE: 6650 error = ipfw_table_create(sopt); 6651 break; 6652 6653 case IP_FW_TBL_ADD: 6654 case IP_FW_TBL_DEL: 6655 error = ipfw_table_alt(sopt); 6656 break; 6657 6658 case IP_FW_TBL_FLUSH: 6659 case IP_FW_TBL_DESTROY: 6660 error = ipfw_table_flush(sopt); 6661 break; 6662 6663 case IP_FW_TBL_GET: 6664 error = ipfw_table_get(sopt); 6665 break; 6666 6667 case IP_FW_TBL_ZERO: 6668 error = ipfw_table_zero(sopt); 6669 break; 6670 6671 case IP_FW_TBL_EXPIRE: 6672 error = ipfw_table_expire(sopt); 6673 break; 6674 6675 default: 6676 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name); 6677 error = EINVAL; 6678 } 6679 6680 ipfw_crossref_reap(); 6681 return error; 6682 } 6683 6684 static void 6685 ipfw_keepalive_done(struct ipfw_context *ctx) 6686 { 6687 6688 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6689 ("keepalive is not in progress")); 6690 ctx->ipfw_flags &= ~IPFW_FLAG_KEEPALIVE; 6691 callout_reset(&ctx->ipfw_keepalive_ch, dyn_keepalive_period * hz, 6692 ipfw_keepalive, NULL); 6693 } 6694 6695 static void 6696 ipfw_keepalive_more(struct ipfw_context *ctx) 6697 { 6698 struct netmsg_base *nm = &ctx->ipfw_keepalive_more; 6699 6700 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6701 ("keepalive is not in progress")); 6702 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 6703 ("keepalive more did not finish")); 6704 netisr_sendmsg_oncpu(nm); 6705 } 6706 6707 static void 6708 ipfw_keepalive_loop(struct ipfw_context *ctx, struct ipfw_state *anchor) 6709 { 6710 struct ipfw_state *s; 6711 int scanned = 0, expired = 0, kept = 0; 6712 6713 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6714 ("keepalive is not in progress")); 6715 6716 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 6717 uint32_t ack_rev, ack_fwd; 6718 struct ipfw_flow_id id; 6719 uint8_t send_dir; 6720 6721 if (scanned++ >= ipfw_state_scan_max) { 6722 ipfw_keepalive_more(ctx); 6723 return; 6724 } 6725 6726 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6727 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 6728 6729 /* 6730 * NOTE: 6731 * Don't use IPFW_STATE_SCANSKIP; need to perform keepalive 6732 * on slave xlat. 6733 */ 6734 if (s->st_type == O_ANCHOR) 6735 continue; 6736 6737 if (IPFW_STATE_ISDEAD(s)) { 6738 ipfw_state_remove(ctx, s); 6739 if (++expired >= ipfw_state_expire_max) { 6740 ipfw_keepalive_more(ctx); 6741 return; 6742 } 6743 continue; 6744 } 6745 6746 /* 6747 * Keep alive processing 6748 */ 6749 6750 if (s->st_proto != IPPROTO_TCP) 6751 continue; 6752 if ((s->st_state & IPFW_STATE_TCPSTATES) != BOTH_SYN) 6753 continue; 6754 if (TIME_LEQ(time_uptime + dyn_keepalive_interval, 6755 s->st_expire)) 6756 continue; /* too early */ 6757 6758 ipfw_key_4tuple(&s->st_key, &id.src_ip, &id.src_port, 6759 &id.dst_ip, &id.dst_port); 6760 ack_rev = s->st_ack_rev; 6761 ack_fwd = s->st_ack_fwd; 6762 6763 #define SEND_FWD 0x1 6764 #define SEND_REV 0x2 6765 6766 if (IPFW_ISXLAT(s->st_type)) { 6767 const struct ipfw_xlat *x = (const struct ipfw_xlat *)s; 6768 6769 if (x->xlat_dir == MATCH_FORWARD) 6770 send_dir = SEND_FWD; 6771 else 6772 send_dir = SEND_REV; 6773 } else { 6774 send_dir = SEND_FWD | SEND_REV; 6775 } 6776 6777 if (send_dir & SEND_REV) 6778 send_pkt(&id, ack_rev - 1, ack_fwd, TH_SYN); 6779 if (send_dir & SEND_FWD) 6780 send_pkt(&id, ack_fwd - 1, ack_rev, 0); 6781 6782 #undef SEND_FWD 6783 #undef SEND_REV 6784 6785 if (++kept >= ipfw_keepalive_max) { 6786 ipfw_keepalive_more(ctx); 6787 return; 6788 } 6789 } 6790 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6791 ipfw_keepalive_done(ctx); 6792 } 6793 6794 static void 6795 ipfw_keepalive_more_dispatch(netmsg_t nm) 6796 { 6797 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6798 struct ipfw_state *anchor; 6799 6800 ASSERT_NETISR_NCPUS(mycpuid); 6801 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6802 ("keepalive is not in progress")); 6803 6804 /* Reply ASAP */ 6805 netisr_replymsg(&nm->base, 0); 6806 6807 anchor = &ctx->ipfw_keepalive_anch; 6808 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) { 6809 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6810 ipfw_keepalive_done(ctx); 6811 return; 6812 } 6813 ipfw_keepalive_loop(ctx, anchor); 6814 } 6815 6816 /* 6817 * This procedure is only used to handle keepalives. It is invoked 6818 * every dyn_keepalive_period 6819 */ 6820 static void 6821 ipfw_keepalive_dispatch(netmsg_t nm) 6822 { 6823 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6824 struct ipfw_state *anchor; 6825 6826 ASSERT_NETISR_NCPUS(mycpuid); 6827 KASSERT((ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE) == 0, 6828 ("keepalive is in progress")); 6829 ctx->ipfw_flags |= IPFW_FLAG_KEEPALIVE; 6830 6831 /* Reply ASAP */ 6832 crit_enter(); 6833 netisr_replymsg(&nm->base, 0); 6834 crit_exit(); 6835 6836 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) { 6837 ipfw_keepalive_done(ctx); 6838 return; 6839 } 6840 6841 anchor = &ctx->ipfw_keepalive_anch; 6842 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link); 6843 ipfw_keepalive_loop(ctx, anchor); 6844 } 6845 6846 /* 6847 * This procedure is only used to handle keepalives. It is invoked 6848 * every dyn_keepalive_period 6849 */ 6850 static void 6851 ipfw_keepalive(void *dummy __unused) 6852 { 6853 struct netmsg_base *msg; 6854 6855 KKASSERT(mycpuid < netisr_ncpus); 6856 msg = &ipfw_ctx[mycpuid]->ipfw_keepalive_nm; 6857 6858 crit_enter(); 6859 if (msg->lmsg.ms_flags & MSGF_DONE) 6860 netisr_sendmsg_oncpu(msg); 6861 crit_exit(); 6862 } 6863 6864 static void 6865 ipfw_ip_input_dispatch(netmsg_t nmsg) 6866 { 6867 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg; 6868 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6869 struct mbuf *m = nm->m; 6870 struct ip_fw *rule = nm->arg1; 6871 6872 ASSERT_NETISR_NCPUS(mycpuid); 6873 KASSERT(rule->cpuid == mycpuid, 6874 ("rule does not belong to cpu%d", mycpuid)); 6875 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE, 6876 ("mbuf does not have ipfw continue rule")); 6877 6878 KASSERT(ctx->ipfw_cont_rule == NULL, 6879 ("pending ipfw continue rule")); 6880 ctx->ipfw_cont_rule = rule; 6881 ip_input(m); 6882 6883 /* May not be cleared, if ipfw was unload/disabled. */ 6884 ctx->ipfw_cont_rule = NULL; 6885 6886 /* 6887 * This rule is no longer used; decrement its cross_refs, 6888 * so this rule can be deleted. 6889 */ 6890 rule->cross_refs--; 6891 } 6892 6893 static void 6894 ipfw_defrag_redispatch(struct mbuf *m, int cpuid, struct ip_fw *rule) 6895 { 6896 struct netmsg_genpkt *nm; 6897 6898 KASSERT(cpuid != mycpuid, ("continue on the same cpu%d", cpuid)); 6899 6900 /* 6901 * NOTE: 6902 * Bump cross_refs to prevent this rule and its siblings 6903 * from being deleted, while this mbuf is inflight. The 6904 * cross_refs of the sibling rule on the target cpu will 6905 * be decremented, once this mbuf is going to be filtered 6906 * on the target cpu. 6907 */ 6908 rule->cross_refs++; 6909 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE; 6910 6911 nm = &m->m_hdr.mh_genmsg; 6912 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0, 6913 ipfw_ip_input_dispatch); 6914 nm->m = m; 6915 nm->arg1 = rule->cross_rules[cpuid]; 6916 netisr_sendmsg(&nm->base, cpuid); 6917 } 6918 6919 static void 6920 ipfw_init_args(struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif) 6921 { 6922 6923 args->flags = 0; 6924 args->rule = NULL; 6925 args->xlat = NULL; 6926 6927 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 6928 struct m_tag *mtag; 6929 6930 /* Extract info from dummynet tag */ 6931 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 6932 KKASSERT(mtag != NULL); 6933 args->rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 6934 KKASSERT(args->rule != NULL); 6935 6936 m_tag_delete(m, mtag); 6937 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 6938 } else if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) { 6939 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6940 6941 KKASSERT(ctx->ipfw_cont_rule != NULL); 6942 args->rule = ctx->ipfw_cont_rule; 6943 ctx->ipfw_cont_rule = NULL; 6944 6945 if (ctx->ipfw_cont_xlat != NULL) { 6946 args->xlat = ctx->ipfw_cont_xlat; 6947 ctx->ipfw_cont_xlat = NULL; 6948 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATINS) { 6949 args->flags |= IP_FWARG_F_XLATINS; 6950 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATINS; 6951 } 6952 if (m->m_pkthdr.fw_flags & IPFW_MBUF_XLATFWD) { 6953 args->flags |= IP_FWARG_F_XLATFWD; 6954 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_XLATFWD; 6955 } 6956 } 6957 KKASSERT((m->m_pkthdr.fw_flags & 6958 (IPFW_MBUF_XLATINS | IPFW_MBUF_XLATFWD)) == 0); 6959 6960 args->flags |= IP_FWARG_F_CONT; 6961 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_CONTINUE; 6962 } 6963 6964 args->eh = NULL; 6965 args->oif = oif; 6966 args->m = m; 6967 } 6968 6969 static int 6970 ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 6971 { 6972 struct ip_fw_args args; 6973 struct mbuf *m = *m0; 6974 int tee = 0, error = 0, ret; 6975 6976 ipfw_init_args(&args, m, NULL); 6977 6978 ret = ipfw_chk(&args); 6979 m = args.m; 6980 if (m == NULL) { 6981 if (ret != IP_FW_REDISPATCH) 6982 error = EACCES; 6983 goto back; 6984 } 6985 6986 switch (ret) { 6987 case IP_FW_PASS: 6988 break; 6989 6990 case IP_FW_DENY: 6991 m_freem(m); 6992 m = NULL; 6993 error = EACCES; 6994 break; 6995 6996 case IP_FW_DUMMYNET: 6997 /* Send packet to the appropriate pipe */ 6998 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_IN, &args); 6999 break; 7000 7001 case IP_FW_TEE: 7002 tee = 1; 7003 /* FALL THROUGH */ 7004 7005 case IP_FW_DIVERT: 7006 /* 7007 * Must clear bridge tag when changing 7008 */ 7009 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 7010 if (ip_divert_p != NULL) { 7011 m = ip_divert_p(m, tee, 1); 7012 } else { 7013 m_freem(m); 7014 m = NULL; 7015 /* not sure this is the right error msg */ 7016 error = EACCES; 7017 } 7018 break; 7019 7020 default: 7021 panic("unknown ipfw return value: %d", ret); 7022 } 7023 back: 7024 *m0 = m; 7025 return error; 7026 } 7027 7028 static int 7029 ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 7030 { 7031 struct ip_fw_args args; 7032 struct mbuf *m = *m0; 7033 int tee = 0, error = 0, ret; 7034 7035 ipfw_init_args(&args, m, ifp); 7036 7037 ret = ipfw_chk(&args); 7038 m = args.m; 7039 if (m == NULL) { 7040 if (ret != IP_FW_REDISPATCH) 7041 error = EACCES; 7042 goto back; 7043 } 7044 7045 switch (ret) { 7046 case IP_FW_PASS: 7047 break; 7048 7049 case IP_FW_DENY: 7050 m_freem(m); 7051 m = NULL; 7052 error = EACCES; 7053 break; 7054 7055 case IP_FW_DUMMYNET: 7056 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_OUT, &args); 7057 break; 7058 7059 case IP_FW_TEE: 7060 tee = 1; 7061 /* FALL THROUGH */ 7062 7063 case IP_FW_DIVERT: 7064 if (ip_divert_p != NULL) { 7065 m = ip_divert_p(m, tee, 0); 7066 } else { 7067 m_freem(m); 7068 m = NULL; 7069 /* not sure this is the right error msg */ 7070 error = EACCES; 7071 } 7072 break; 7073 7074 default: 7075 panic("unknown ipfw return value: %d", ret); 7076 } 7077 back: 7078 *m0 = m; 7079 return error; 7080 } 7081 7082 static void 7083 ipfw_hook(void) 7084 { 7085 struct pfil_head *pfh; 7086 7087 ASSERT_NETISR0; 7088 7089 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 7090 if (pfh == NULL) 7091 return; 7092 7093 pfil_add_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 7094 pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 7095 } 7096 7097 static void 7098 ipfw_dehook(void) 7099 { 7100 struct pfil_head *pfh; 7101 7102 ASSERT_NETISR0; 7103 7104 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 7105 if (pfh == NULL) 7106 return; 7107 7108 pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 7109 pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 7110 } 7111 7112 static int 7113 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS) 7114 { 7115 int dyn_cnt; 7116 7117 dyn_cnt = ipfw_state_cntcoll(); 7118 dyn_cnt += ipfw_gd.ipfw_trkcnt_cnt; 7119 7120 return (sysctl_handle_int(oidp, &dyn_cnt, 0, req)); 7121 } 7122 7123 static int 7124 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS) 7125 { 7126 int state_cnt; 7127 7128 state_cnt = ipfw_state_cntcoll(); 7129 return (sysctl_handle_int(oidp, &state_cnt, 0, req)); 7130 } 7131 7132 static int 7133 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS) 7134 { 7135 int state_max, error; 7136 7137 state_max = ipfw_state_max; 7138 error = sysctl_handle_int(oidp, &state_max, 0, req); 7139 if (error || req->newptr == NULL) 7140 return (error); 7141 7142 if (state_max < 1) 7143 return (EINVAL); 7144 7145 ipfw_state_max_set(state_max); 7146 return (0); 7147 } 7148 7149 static int 7150 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS) 7151 { 7152 int dyn_max, error; 7153 7154 dyn_max = ipfw_state_max + ipfw_track_max; 7155 7156 error = sysctl_handle_int(oidp, &dyn_max, 0, req); 7157 if (error || req->newptr == NULL) 7158 return (error); 7159 7160 if (dyn_max < 2) 7161 return (EINVAL); 7162 7163 ipfw_state_max_set(dyn_max / 2); 7164 ipfw_track_max = dyn_max / 2; 7165 return (0); 7166 } 7167 7168 static void 7169 ipfw_sysctl_enable_dispatch(netmsg_t nmsg) 7170 { 7171 int enable = nmsg->lmsg.u.ms_result; 7172 7173 ASSERT_NETISR0; 7174 7175 if (fw_enable == enable) 7176 goto reply; 7177 7178 fw_enable = enable; 7179 if (fw_enable) 7180 ipfw_hook(); 7181 else 7182 ipfw_dehook(); 7183 reply: 7184 netisr_replymsg(&nmsg->base, 0); 7185 } 7186 7187 static int 7188 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS) 7189 { 7190 struct netmsg_base nmsg; 7191 int enable, error; 7192 7193 enable = fw_enable; 7194 error = sysctl_handle_int(oidp, &enable, 0, req); 7195 if (error || req->newptr == NULL) 7196 return error; 7197 7198 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7199 ipfw_sysctl_enable_dispatch); 7200 nmsg.lmsg.u.ms_result = enable; 7201 7202 return netisr_domsg(&nmsg, 0); 7203 } 7204 7205 static int 7206 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS) 7207 { 7208 return sysctl_int_range(oidp, arg1, arg2, req, 7209 IPFW_AUTOINC_STEP_MIN, IPFW_AUTOINC_STEP_MAX); 7210 } 7211 7212 static int 7213 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS) 7214 { 7215 7216 return sysctl_int_range(oidp, arg1, arg2, req, 1, INT_MAX); 7217 } 7218 7219 static int 7220 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS) 7221 { 7222 u_long stat = 0; 7223 int cpu, error; 7224 7225 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7226 stat += *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)); 7227 7228 error = sysctl_handle_long(oidp, &stat, 0, req); 7229 if (error || req->newptr == NULL) 7230 return (error); 7231 7232 /* Zero out this stat. */ 7233 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7234 *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)) = 0; 7235 return (0); 7236 } 7237 7238 static void 7239 ipfw_ctx_init_dispatch(netmsg_t nmsg) 7240 { 7241 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 7242 struct ipfw_context *ctx; 7243 struct ip_fw *def_rule; 7244 7245 ASSERT_NETISR_NCPUS(mycpuid); 7246 7247 ctx = kmalloc(__offsetof(struct ipfw_context, 7248 ipfw_tables[ipfw_table_max]), M_IPFW, M_WAITOK | M_ZERO); 7249 7250 RB_INIT(&ctx->ipfw_state_tree); 7251 TAILQ_INIT(&ctx->ipfw_state_list); 7252 7253 RB_INIT(&ctx->ipfw_track_tree); 7254 TAILQ_INIT(&ctx->ipfw_track_list); 7255 7256 callout_init_mp(&ctx->ipfw_stateto_ch); 7257 netmsg_init(&ctx->ipfw_stateexp_nm, NULL, &netisr_adone_rport, 7258 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_state_expire_dispatch); 7259 ctx->ipfw_stateexp_anch.st_type = O_ANCHOR; 7260 netmsg_init(&ctx->ipfw_stateexp_more, NULL, &netisr_adone_rport, 7261 MSGF_DROPABLE, ipfw_state_expire_more_dispatch); 7262 7263 callout_init_mp(&ctx->ipfw_trackto_ch); 7264 netmsg_init(&ctx->ipfw_trackexp_nm, NULL, &netisr_adone_rport, 7265 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_track_expire_dispatch); 7266 netmsg_init(&ctx->ipfw_trackexp_more, NULL, &netisr_adone_rport, 7267 MSGF_DROPABLE, ipfw_track_expire_more_dispatch); 7268 7269 callout_init_mp(&ctx->ipfw_keepalive_ch); 7270 netmsg_init(&ctx->ipfw_keepalive_nm, NULL, &netisr_adone_rport, 7271 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_keepalive_dispatch); 7272 ctx->ipfw_keepalive_anch.st_type = O_ANCHOR; 7273 netmsg_init(&ctx->ipfw_keepalive_more, NULL, &netisr_adone_rport, 7274 MSGF_DROPABLE, ipfw_keepalive_more_dispatch); 7275 7276 callout_init_mp(&ctx->ipfw_xlatreap_ch); 7277 netmsg_init(&ctx->ipfw_xlatreap_nm, NULL, &netisr_adone_rport, 7278 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_xlat_reap_dispatch); 7279 TAILQ_INIT(&ctx->ipfw_xlatreap); 7280 7281 ipfw_ctx[mycpuid] = ctx; 7282 7283 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO); 7284 7285 def_rule->act_ofs = 0; 7286 def_rule->rulenum = IPFW_DEFAULT_RULE; 7287 def_rule->cmd_len = 1; 7288 def_rule->set = IPFW_DEFAULT_SET; 7289 7290 def_rule->cmd[0].len = 1; 7291 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 7292 def_rule->cmd[0].opcode = O_ACCEPT; 7293 #else 7294 if (filters_default_to_accept) 7295 def_rule->cmd[0].opcode = O_ACCEPT; 7296 else 7297 def_rule->cmd[0].opcode = O_DENY; 7298 #endif 7299 7300 def_rule->refcnt = 1; 7301 def_rule->cpuid = mycpuid; 7302 7303 /* Install the default rule */ 7304 ctx->ipfw_default_rule = def_rule; 7305 ctx->ipfw_layer3_chain = def_rule; 7306 7307 /* Link rule CPU sibling */ 7308 ipfw_link_sibling(fwmsg, def_rule); 7309 7310 /* Statistics only need to be updated once */ 7311 if (mycpuid == 0) 7312 ipfw_inc_static_count(def_rule); 7313 7314 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7315 } 7316 7317 static void 7318 ipfw_crossref_reap_dispatch(netmsg_t nmsg) 7319 { 7320 7321 crit_enter(); 7322 /* Reply ASAP */ 7323 netisr_replymsg(&nmsg->base, 0); 7324 crit_exit(); 7325 ipfw_crossref_reap(); 7326 } 7327 7328 static void 7329 ipfw_crossref_timeo(void *dummy __unused) 7330 { 7331 struct netmsg_base *msg = &ipfw_gd.ipfw_crossref_nm; 7332 7333 KKASSERT(mycpuid == 0); 7334 7335 crit_enter(); 7336 if (msg->lmsg.ms_flags & MSGF_DONE) 7337 netisr_sendmsg_oncpu(msg); 7338 crit_exit(); 7339 } 7340 7341 static void 7342 ipfw_ifaddr_dispatch(netmsg_t nmsg) 7343 { 7344 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 7345 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 7346 struct ip_fw *f; 7347 7348 ASSERT_NETISR_NCPUS(mycpuid); 7349 7350 for (f = ctx->ipfw_layer3_chain; f != NULL; f = f->next) { 7351 int l, cmdlen; 7352 ipfw_insn *cmd; 7353 7354 if ((f->rule_flags & IPFW_RULE_F_DYNIFADDR) == 0) 7355 continue; 7356 7357 for (l = f->cmd_len, cmd = f->cmd; l > 0; 7358 l -= cmdlen, cmd += cmdlen) { 7359 cmdlen = F_LEN(cmd); 7360 if (cmd->opcode == O_IP_SRC_IFIP || 7361 cmd->opcode == O_IP_DST_IFIP) { 7362 if (strncmp(ifp->if_xname, 7363 ((ipfw_insn_ifip *)cmd)->ifname, 7364 IFNAMSIZ) == 0) 7365 cmd->arg1 &= ~IPFW_IFIP_VALID; 7366 } 7367 } 7368 } 7369 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7370 } 7371 7372 static void 7373 ipfw_ifaddr(void *arg __unused, struct ifnet *ifp, 7374 enum ifaddr_event event __unused, struct ifaddr *ifa __unused) 7375 { 7376 struct netmsg_base nm; 7377 7378 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7379 ipfw_ifaddr_dispatch); 7380 nm.lmsg.u.ms_resultp = ifp; 7381 netisr_domsg_global(&nm); 7382 } 7383 7384 static void 7385 ipfw_init_dispatch(netmsg_t nmsg) 7386 { 7387 struct netmsg_ipfw fwmsg; 7388 int error = 0, cpu; 7389 7390 ASSERT_NETISR0; 7391 7392 if (IPFW_LOADED) { 7393 kprintf("IP firewall already loaded\n"); 7394 error = EEXIST; 7395 goto reply; 7396 } 7397 7398 if (ipfw_table_max > UINT16_MAX || ipfw_table_max <= 0) 7399 ipfw_table_max = UINT16_MAX; 7400 7401 /* Initialize global track tree. */ 7402 RB_INIT(&ipfw_gd.ipfw_trkcnt_tree); 7403 IPFW_TRKCNT_TOKINIT; 7404 7405 /* GC for freed crossref rules. */ 7406 callout_init_mp(&ipfw_gd.ipfw_crossref_ch); 7407 netmsg_init(&ipfw_gd.ipfw_crossref_nm, NULL, &netisr_adone_rport, 7408 MSGF_PRIORITY | MSGF_DROPABLE, ipfw_crossref_reap_dispatch); 7409 7410 ipfw_state_max_set(ipfw_state_max); 7411 ipfw_state_headroom = 8 * netisr_ncpus; 7412 7413 bzero(&fwmsg, sizeof(fwmsg)); 7414 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7415 ipfw_ctx_init_dispatch); 7416 netisr_domsg_global(&fwmsg.base); 7417 7418 ip_fw_chk_ptr = ipfw_chk; 7419 ip_fw_ctl_ptr = ipfw_ctl; 7420 ip_fw_dn_io_ptr = ipfw_dummynet_io; 7421 7422 kprintf("ipfw2 initialized, default to %s, logging ", 7423 ipfw_ctx[mycpuid]->ipfw_default_rule->cmd[0].opcode == 7424 O_ACCEPT ? "accept" : "deny"); 7425 7426 #ifdef IPFIREWALL_VERBOSE 7427 fw_verbose = 1; 7428 #endif 7429 #ifdef IPFIREWALL_VERBOSE_LIMIT 7430 verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 7431 #endif 7432 if (fw_verbose == 0) { 7433 kprintf("disabled\n"); 7434 } else if (verbose_limit == 0) { 7435 kprintf("unlimited\n"); 7436 } else { 7437 kprintf("limited to %d packets/entry by default\n", 7438 verbose_limit); 7439 } 7440 7441 ip_fw_loaded = 1; 7442 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 7443 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_stateto_ch, hz, 7444 ipfw_state_expire_ipifunc, NULL, cpu); 7445 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_trackto_ch, hz, 7446 ipfw_track_expire_ipifunc, NULL, cpu); 7447 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_keepalive_ch, hz, 7448 ipfw_keepalive, NULL, cpu); 7449 } 7450 7451 if (fw_enable) 7452 ipfw_hook(); 7453 7454 ipfw_ifaddr_event = EVENTHANDLER_REGISTER(ifaddr_event, ipfw_ifaddr, 7455 NULL, EVENTHANDLER_PRI_ANY); 7456 if (ipfw_ifaddr_event == NULL) 7457 kprintf("ipfw: ifaddr_event register failed\n"); 7458 7459 reply: 7460 netisr_replymsg(&nmsg->base, error); 7461 } 7462 7463 static int 7464 ipfw_init(void) 7465 { 7466 struct netmsg_base smsg; 7467 7468 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7469 ipfw_init_dispatch); 7470 return netisr_domsg(&smsg, 0); 7471 } 7472 7473 #ifdef KLD_MODULE 7474 7475 static void 7476 ipfw_ctx_fini_dispatch(netmsg_t nmsg) 7477 { 7478 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 7479 7480 ASSERT_NETISR_NCPUS(mycpuid); 7481 7482 callout_stop_sync(&ctx->ipfw_stateto_ch); 7483 callout_stop_sync(&ctx->ipfw_trackto_ch); 7484 callout_stop_sync(&ctx->ipfw_keepalive_ch); 7485 callout_stop_sync(&ctx->ipfw_xlatreap_ch); 7486 7487 crit_enter(); 7488 netisr_dropmsg(&ctx->ipfw_stateexp_more); 7489 netisr_dropmsg(&ctx->ipfw_stateexp_nm); 7490 netisr_dropmsg(&ctx->ipfw_trackexp_more); 7491 netisr_dropmsg(&ctx->ipfw_trackexp_nm); 7492 netisr_dropmsg(&ctx->ipfw_keepalive_more); 7493 netisr_dropmsg(&ctx->ipfw_keepalive_nm); 7494 netisr_dropmsg(&ctx->ipfw_xlatreap_nm); 7495 crit_exit(); 7496 7497 ipfw_table_flushall_oncpu(ctx, 1); 7498 7499 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 7500 } 7501 7502 static void 7503 ipfw_fini_dispatch(netmsg_t nmsg) 7504 { 7505 struct netmsg_base nm; 7506 int error = 0, cpu; 7507 7508 ASSERT_NETISR0; 7509 7510 ipfw_crossref_reap(); 7511 7512 if (ipfw_gd.ipfw_refcnt != 0) { 7513 error = EBUSY; 7514 goto reply; 7515 } 7516 7517 ip_fw_loaded = 0; 7518 ipfw_dehook(); 7519 7520 /* Synchronize any inflight state/track expire IPIs. */ 7521 lwkt_synchronize_ipiqs("ipfwfini"); 7522 7523 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7524 ipfw_ctx_fini_dispatch); 7525 netisr_domsg_global(&nm); 7526 7527 callout_stop_sync(&ipfw_gd.ipfw_crossref_ch); 7528 crit_enter(); 7529 netisr_dropmsg(&ipfw_gd.ipfw_crossref_nm); 7530 crit_exit(); 7531 7532 if (ipfw_ifaddr_event != NULL) 7533 EVENTHANDLER_DEREGISTER(ifaddr_event, ipfw_ifaddr_event); 7534 7535 ip_fw_chk_ptr = NULL; 7536 ip_fw_ctl_ptr = NULL; 7537 ip_fw_dn_io_ptr = NULL; 7538 ipfw_flush(1 /* kill default rule */); 7539 7540 /* Free pre-cpu context */ 7541 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 7542 kfree(ipfw_ctx[cpu], M_IPFW); 7543 7544 kprintf("IP firewall unloaded\n"); 7545 reply: 7546 netisr_replymsg(&nmsg->base, error); 7547 } 7548 7549 static void 7550 ipfw_fflush_dispatch(netmsg_t nmsg) 7551 { 7552 7553 ipfw_flush(0 /* keep default rule */); 7554 ipfw_crossref_reap(); 7555 netisr_replymsg(&nmsg->base, 0); 7556 } 7557 7558 static int 7559 ipfw_fini(void) 7560 { 7561 struct netmsg_base smsg; 7562 int i = 0; 7563 7564 for (;;) { 7565 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7566 ipfw_fflush_dispatch); 7567 netisr_domsg(&smsg, 0); 7568 7569 if (ipfw_gd.ipfw_refcnt == 0) 7570 break; 7571 kprintf("ipfw: flush pending %d\n", ++i); 7572 tsleep(&smsg, 0, "ipfwff", (3 * hz) / 2); 7573 } 7574 7575 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 7576 ipfw_fini_dispatch); 7577 return netisr_domsg(&smsg, 0); 7578 } 7579 7580 #endif /* KLD_MODULE */ 7581 7582 static int 7583 ipfw_modevent(module_t mod, int type, void *unused) 7584 { 7585 int err = 0; 7586 7587 switch (type) { 7588 case MOD_LOAD: 7589 err = ipfw_init(); 7590 break; 7591 7592 case MOD_UNLOAD: 7593 #ifndef KLD_MODULE 7594 kprintf("ipfw statically compiled, cannot unload\n"); 7595 err = EBUSY; 7596 #else 7597 err = ipfw_fini(); 7598 #endif 7599 break; 7600 default: 7601 break; 7602 } 7603 return err; 7604 } 7605 7606 static moduledata_t ipfwmod = { 7607 "ipfw", 7608 ipfw_modevent, 7609 0 7610 }; 7611 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY); 7612 MODULE_VERSION(ipfw, 1); 7613