1 /* 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $ 26 */ 27 28 /* 29 * Implement IP packet firewall (new version) 30 */ 31 32 #include "opt_ipfw.h" 33 #include "opt_inet.h" 34 #ifndef INET 35 #error IPFIREWALL requires INET. 36 #endif /* INET */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/kernel.h> 43 #include <sys/proc.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 #include <sys/ucred.h> 49 #include <sys/in_cksum.h> 50 #include <sys/limits.h> 51 #include <sys/lock.h> 52 #include <sys/tree.h> 53 54 #include <net/if.h> 55 #include <net/route.h> 56 #include <net/pfil.h> 57 #include <net/dummynet/ip_dummynet.h> 58 59 #include <sys/thread2.h> 60 #include <sys/mplock2.h> 61 #include <net/netmsg2.h> 62 63 #include <netinet/in.h> 64 #include <netinet/in_systm.h> 65 #include <netinet/in_var.h> 66 #include <netinet/in_pcb.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/tcp.h> 71 #include <netinet/tcp_seq.h> 72 #include <netinet/tcp_timer.h> 73 #include <netinet/tcp_var.h> 74 #include <netinet/tcpip.h> 75 #include <netinet/udp.h> 76 #include <netinet/udp_var.h> 77 #include <netinet/ip_divert.h> 78 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ 79 80 #include <net/ipfw/ip_fw2.h> 81 82 #ifdef IPFIREWALL_DEBUG 83 #define DPRINTF(fmt, ...) \ 84 do { \ 85 if (fw_debug > 0) \ 86 kprintf(fmt, __VA_ARGS__); \ 87 } while (0) 88 #else 89 #define DPRINTF(fmt, ...) ((void)0) 90 #endif 91 92 /* 93 * Description about per-CPU rule duplication: 94 * 95 * Module loading/unloading and all ioctl operations are serialized 96 * by netisr0, so we don't have any ordering or locking problems. 97 * 98 * Following graph shows how operation on per-CPU rule list is 99 * performed [2 CPU case]: 100 * 101 * CPU0 CPU1 102 * 103 * netisr0 <------------------------------------+ 104 * domsg | 105 * : | 106 * :(delete/add...) | 107 * : | 108 * : netmsg | netmsg 109 * forwardmsg---------->netisr1 | 110 * : | 111 * :(delete/add...) | 112 * : | 113 * : | 114 * replymsg--------------+ 115 * 116 * 117 * 118 * Rule structure [2 CPU case] 119 * 120 * CPU0 CPU1 121 * 122 * layer3_chain layer3_chain 123 * | | 124 * V V 125 * +-------+ sibling +-------+ sibling 126 * | rule1 |--------->| rule1 |--------->NULL 127 * +-------+ +-------+ 128 * | | 129 * |next |next 130 * V V 131 * +-------+ sibling +-------+ sibling 132 * | rule2 |--------->| rule2 |--------->NULL 133 * +-------+ +-------+ 134 * 135 * ip_fw.sibling: 136 * 1) Ease statistics calculation during IP_FW_GET. We only need to 137 * iterate layer3_chain in netisr0; the current rule's duplication 138 * to the other CPUs could safely be read-only accessed through 139 * ip_fw.sibling. 140 * 2) Accelerate rule insertion and deletion, e.g. rule insertion: 141 * a) In netisr0 rule3 is determined to be inserted between rule1 142 * and rule2. To make this decision we need to iterate the 143 * layer3_chain in netisr0. The netmsg, which is used to insert 144 * the rule, will contain rule1 in netisr0 as prev_rule and rule2 145 * in netisr0 as next_rule. 146 * b) After the insertion in netisr0 is done, we will move on to 147 * netisr1. But instead of relocating the rule3's position in 148 * netisr1 by iterating the layer3_chain in netisr1, we set the 149 * netmsg's prev_rule to rule1->sibling and next_rule to 150 * rule2->sibling before the netmsg is forwarded to netisr1 from 151 * netisr0. 152 */ 153 154 /* 155 * Description of states and tracks. 156 * 157 * Both states and tracks are stored in per-cpu RB trees instead of 158 * per-cpu hash tables to avoid the worst case hash degeneration. 159 * 160 * The lifetimes of states and tracks are regulated by dyn_*_lifetime, 161 * measured in seconds and depending on the flags. 162 * 163 * When a packet is received, its address fields are first masked with 164 * the mask defined for the rule, then matched against the entries in 165 * the per-cpu state RB tree. States are generated by 'keep-state' 166 * and 'limit' options. 167 * 168 * The max number of states is ipfw_state_max. When we reach the 169 * maximum number of states we do not create anymore. This is done to 170 * avoid consuming too much memory, but also too much time when 171 * searching on each packet. 172 * 173 * Each state holds a pointer to the parent ipfw rule of the current 174 * CPU so we know what action to perform. States are removed when the 175 * parent rule is deleted. XXX we should make them survive. 176 * 177 * There are some limitations with states -- we do not obey the 178 * 'randomized match', and we do not do multiple passes through the 179 * firewall. XXX check the latter!!! 180 * 181 * States grow independently on each CPU, e.g. 2 CPU case: 182 * 183 * CPU0 CPU1 184 * ................... ................... 185 * : state RB tree : : state RB tree : 186 * : : : : 187 * : state1 state2 : : state3 : 188 * : | | : : | : 189 * :.....|....|......: :........|........: 190 * | | | 191 * | | |st_rule 192 * | | | 193 * V V V 194 * +-------+ +-------+ 195 * | rule1 | | rule1 | 196 * +-------+ +-------+ 197 * 198 * Tracks are used to enforce limits on the number of sessions. Tracks 199 * are generated by 'limit' option. 200 * 201 * The max number of tracks is ipfw_track_max. When we reach the 202 * maximum number of tracks we do not create anymore. This is done to 203 * avoid consuming too much memory. 204 * 205 * Tracks are organized into two layers, track counter RB tree is 206 * shared between CPUs, track RB tree is per-cpu. States generated by 207 * 'limit' option are linked to the track in addition to the per-cpu 208 * state RB tree; mainly to ease expiration. e.g. 2 CPU case: 209 * 210 * .............................. 211 * : track counter RB tree : 212 * : : 213 * : +-----------+ : 214 * : | trkcnt1 | : 215 * : | | : 216 * : +--->counter<----+ : 217 * : | | | | : 218 * : | +-----------+ | : 219 * :......|................|....: 220 * | | 221 * CPU0 | | CPU1 222 * ................. |t_count | ................. 223 * : track RB tree : | | : track RB tree : 224 * : : | | : : 225 * : +-->track1-------+ +--------track2 : 226 * : | A : : : 227 * : | | : : : 228 * :.|.....|.......: :...............: 229 * | +----------------+ 230 * | .................... | 231 * | : state RB tree : |st_track 232 * | : : | 233 * +---state1 state2---+ 234 * : | | : 235 * :.....|.......|....: 236 * | | 237 * | |st_rule 238 * V V 239 * +----------+ 240 * | rule1 | 241 * +----------+ 242 */ 243 244 #define IPFW_AUTOINC_STEP_MIN 1 245 #define IPFW_AUTOINC_STEP_MAX 1000 246 #define IPFW_AUTOINC_STEP_DEF 100 247 248 #define IPFW_TABLE_MAX_DEF 64 249 250 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */ 251 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */ 252 253 #define MATCH_REVERSE 0 254 #define MATCH_FORWARD 1 255 #define MATCH_NONE 2 256 #define MATCH_UNKNOWN 3 257 258 #define IPFW_STATE_TCPFLAGS (TH_SYN | TH_FIN | TH_RST) 259 #define IPFW_STATE_TCPSTATES (IPFW_STATE_TCPFLAGS | \ 260 (IPFW_STATE_TCPFLAGS << 8)) 261 262 #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 263 #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 264 #define BOTH_RST (TH_RST | (TH_RST << 8)) 265 /* TH_ACK here means FIN was ACKed. */ 266 #define BOTH_FINACK (TH_ACK | (TH_ACK << 8)) 267 268 #define IPFW_STATE_TCPCLOSED(s) ((s)->st_proto == IPPROTO_TCP && \ 269 (((s)->st_state & BOTH_RST) || \ 270 ((s)->st_state & BOTH_FINACK) == BOTH_FINACK)) 271 272 #define O_ANCHOR O_NOP 273 274 struct netmsg_ipfw { 275 struct netmsg_base base; 276 const struct ipfw_ioc_rule *ioc_rule; 277 struct ip_fw *next_rule; 278 struct ip_fw *prev_rule; 279 struct ip_fw *sibling; 280 uint32_t rule_flags; 281 struct ip_fw **cross_rules; 282 }; 283 284 struct netmsg_del { 285 struct netmsg_base base; 286 struct ip_fw *start_rule; 287 struct ip_fw *prev_rule; 288 uint16_t rulenum; 289 uint8_t from_set; 290 uint8_t to_set; 291 }; 292 293 struct netmsg_zent { 294 struct netmsg_base base; 295 struct ip_fw *start_rule; 296 uint16_t rulenum; 297 uint16_t log_only; 298 }; 299 300 struct netmsg_cpstate { 301 struct netmsg_base base; 302 struct ipfw_ioc_state *ioc_state; 303 int state_cntmax; 304 int state_cnt; 305 }; 306 307 struct netmsg_tblent { 308 struct netmsg_base base; 309 struct sockaddr *key; 310 struct sockaddr *netmask; 311 struct ipfw_tblent *sibling; 312 int tableid; 313 }; 314 315 struct netmsg_tblflush { 316 struct netmsg_base base; 317 int tableid; 318 int destroy; 319 }; 320 321 struct netmsg_tblexp { 322 struct netmsg_base base; 323 time_t expire; 324 int tableid; 325 int cnt; 326 int expcnt; 327 struct radix_node_head *rnh; 328 }; 329 330 struct ipfw_table_cp { 331 struct ipfw_ioc_tblent *te; 332 int te_idx; 333 int te_cnt; 334 }; 335 336 struct ip_fw_local { 337 /* 338 * offset The offset of a fragment. offset != 0 means that 339 * we have a fragment at this offset of an IPv4 packet. 340 * offset == 0 means that (if this is an IPv4 packet) 341 * this is the first or only fragment. 342 */ 343 u_short offset; 344 345 /* 346 * Local copies of addresses. They are only valid if we have 347 * an IP packet. 348 * 349 * proto The protocol. Set to 0 for non-ip packets, 350 * or to the protocol read from the packet otherwise. 351 * proto != 0 means that we have an IPv4 packet. 352 * 353 * src_port, dst_port port numbers, in HOST format. Only 354 * valid for TCP and UDP packets. 355 * 356 * src_ip, dst_ip ip addresses, in NETWORK format. 357 * Only valid for IPv4 packets. 358 */ 359 uint8_t proto; 360 uint16_t src_port; /* NOTE: host format */ 361 uint16_t dst_port; /* NOTE: host format */ 362 struct in_addr src_ip; /* NOTE: network format */ 363 struct in_addr dst_ip; /* NOTE: network format */ 364 uint16_t ip_len; 365 }; 366 367 struct ipfw_addrs { 368 uint32_t addr1; 369 uint32_t addr2; 370 }; 371 372 struct ipfw_ports { 373 uint16_t port1; 374 uint16_t port2; 375 }; 376 377 struct ipfw_key { 378 union { 379 struct ipfw_addrs addrs; 380 uint64_t value; 381 } addr_u; 382 union { 383 struct ipfw_ports ports; 384 uint32_t value; 385 } port_u; 386 uint8_t proto; 387 uint8_t swap; /* IPFW_KEY_SWAP_ */ 388 uint16_t rsvd2; 389 }; 390 391 #define IPFW_KEY_SWAP_ADDRS 0x1 392 #define IPFW_KEY_SWAP_PORTS 0x2 393 #define IPFW_KEY_SWAP_ALL (IPFW_KEY_SWAP_ADDRS | IPFW_KEY_SWAP_PORTS) 394 395 struct ipfw_trkcnt { 396 RB_ENTRY(ipfw_trkcnt) tc_rblink; 397 struct ipfw_key tc_key; 398 uintptr_t tc_ruleid; 399 int tc_refs; 400 int tc_count; 401 time_t tc_expire; /* userland get-only */ 402 uint16_t tc_rulenum; /* userland get-only */ 403 } __cachealign; 404 405 #define tc_addrs tc_key.addr_u.value 406 #define tc_ports tc_key.port_u.value 407 #define tc_proto tc_key.proto 408 #define tc_saddr tc_key.addr_u.addrs.addr1 409 #define tc_daddr tc_key.addr_u.addrs.addr2 410 #define tc_sport tc_key.port_u.ports.port1 411 #define tc_dport tc_key.port_u.ports.port2 412 413 RB_HEAD(ipfw_trkcnt_tree, ipfw_trkcnt); 414 415 struct ipfw_state; 416 417 struct ipfw_track { 418 RB_ENTRY(ipfw_track) t_rblink; 419 struct ipfw_key t_key; 420 struct ip_fw *t_rule; 421 time_t t_lastexp; 422 LIST_HEAD(, ipfw_state) t_state_list; 423 time_t t_expire; 424 volatile int *t_count; 425 struct ipfw_trkcnt *t_trkcnt; 426 TAILQ_ENTRY(ipfw_track) t_link; 427 }; 428 429 #define t_addrs t_key.addr_u.value 430 #define t_ports t_key.port_u.value 431 #define t_proto t_key.proto 432 #define t_saddr t_key.addr_u.addrs.addr1 433 #define t_daddr t_key.addr_u.addrs.addr2 434 #define t_sport t_key.port_u.ports.port1 435 #define t_dport t_key.port_u.ports.port2 436 437 RB_HEAD(ipfw_track_tree, ipfw_track); 438 TAILQ_HEAD(ipfw_track_list, ipfw_track); 439 440 struct ipfw_state { 441 RB_ENTRY(ipfw_state) st_rblink; 442 struct ipfw_key st_key; 443 444 time_t st_expire; /* expire time */ 445 struct ip_fw *st_rule; 446 447 uint64_t st_pcnt; /* packets */ 448 uint64_t st_bcnt; /* bytes */ 449 450 /* 451 * st_state: 452 * State of this rule, typically a combination of TCP flags. 453 * 454 * st_ack_fwd/st_ack_rev: 455 * Most recent ACKs in forward and reverse direction. They 456 * are used to generate keepalives. 457 */ 458 uint32_t st_state; 459 uint32_t st_ack_fwd; 460 uint32_t st_seq_fwd; 461 uint32_t st_ack_rev; 462 uint32_t st_seq_rev; 463 464 uint16_t st_flags; /* IPFW_STATE_F_ */ 465 uint16_t st_type; /* O_KEEP_STATE/O_LIMIT */ 466 struct ipfw_track *st_track; 467 468 LIST_ENTRY(ipfw_state) st_trklink; 469 TAILQ_ENTRY(ipfw_state) st_link; 470 }; 471 472 #define st_addrs st_key.addr_u.value 473 #define st_ports st_key.port_u.value 474 #define st_proto st_key.proto 475 #define st_swap st_key.swap 476 477 #define IPFW_STATE_F_ACKFWD 0x0001 478 #define IPFW_STATE_F_SEQFWD 0x0002 479 #define IPFW_STATE_F_ACKREV 0x0004 480 #define IPFW_STATE_F_SEQREV 0x0008 481 482 TAILQ_HEAD(ipfw_state_list, ipfw_state); 483 RB_HEAD(ipfw_state_tree, ipfw_state); 484 485 struct ipfw_tblent { 486 struct radix_node te_nodes[2]; 487 struct sockaddr_in te_key; 488 u_long te_use; 489 time_t te_lastuse; 490 struct ipfw_tblent *te_sibling; 491 volatile int te_expired; 492 }; 493 494 struct ipfw_context { 495 struct ip_fw *ipfw_layer3_chain; /* rules for layer3 */ 496 struct ip_fw *ipfw_default_rule; /* default rule */ 497 uint64_t ipfw_norule_counter; /* ipfw_log(NULL) stat*/ 498 499 /* 500 * ipfw_set_disable contains one bit per set value (0..31). 501 * If the bit is set, all rules with the corresponding set 502 * are disabled. Set IPDW_DEFAULT_SET is reserved for the 503 * default rule and CANNOT be disabled. 504 */ 505 uint32_t ipfw_set_disable; 506 507 uint8_t ipfw_flags; /* IPFW_FLAG_ */ 508 509 struct ip_fw *ipfw_cont_rule; 510 511 struct ipfw_state_tree ipfw_state_tree; 512 struct ipfw_state_list ipfw_state_list; 513 int ipfw_state_loosecnt; 514 int ipfw_state_cnt; 515 516 union { 517 struct ipfw_state state; 518 struct ipfw_track track; 519 struct ipfw_trkcnt trkcnt; 520 } ipfw_tmpkey; 521 522 struct ipfw_track_tree ipfw_track_tree; 523 struct ipfw_track_list ipfw_track_list; 524 struct ipfw_trkcnt *ipfw_trkcnt_spare; 525 526 struct callout ipfw_stateto_ch; 527 time_t ipfw_state_lastexp; 528 struct netmsg_base ipfw_stateexp_nm; 529 struct netmsg_base ipfw_stateexp_more; 530 struct ipfw_state ipfw_stateexp_anch; 531 532 struct callout ipfw_trackto_ch; 533 time_t ipfw_track_lastexp; 534 struct netmsg_base ipfw_trackexp_nm; 535 struct netmsg_base ipfw_trackexp_more; 536 struct ipfw_track ipfw_trackexp_anch; 537 538 struct callout ipfw_keepalive_ch; 539 struct netmsg_base ipfw_keepalive_nm; 540 struct netmsg_base ipfw_keepalive_more; 541 struct ipfw_state ipfw_keepalive_anch; 542 543 /* 544 * Statistics 545 */ 546 u_long ipfw_sts_reap; 547 u_long ipfw_sts_reapfailed; 548 u_long ipfw_sts_overflow; 549 u_long ipfw_sts_nomem; 550 u_long ipfw_sts_tcprecycled; 551 552 u_long ipfw_tks_nomem; 553 u_long ipfw_tks_reap; 554 u_long ipfw_tks_reapfailed; 555 u_long ipfw_tks_overflow; 556 u_long ipfw_tks_cntnomem; 557 558 u_long ipfw_frags; 559 u_long ipfw_defraged; 560 u_long ipfw_defrag_remote; 561 562 /* Last field */ 563 struct radix_node_head *ipfw_tables[]; 564 }; 565 566 #define IPFW_FLAG_KEEPALIVE 0x01 567 #define IPFW_FLAG_STATEEXP 0x02 568 #define IPFW_FLAG_TRACKEXP 0x04 569 #define IPFW_FLAG_STATEREAP 0x08 570 #define IPFW_FLAG_TRACKREAP 0x10 571 572 #define ipfw_state_tmpkey ipfw_tmpkey.state 573 #define ipfw_track_tmpkey ipfw_tmpkey.track 574 #define ipfw_trkcnt_tmpkey ipfw_tmpkey.trkcnt 575 576 struct ipfw_global { 577 int ipfw_state_loosecnt; /* cache aligned */ 578 time_t ipfw_state_globexp __cachealign; 579 580 struct lwkt_token ipfw_trkcnt_token __cachealign; 581 struct ipfw_trkcnt_tree ipfw_trkcnt_tree; 582 int ipfw_trkcnt_cnt; 583 time_t ipfw_track_globexp; 584 585 /* Accessed in netisr0. */ 586 struct ip_fw *ipfw_crossref_free __cachealign; 587 struct callout ipfw_crossref_ch; 588 struct netmsg_base ipfw_crossref_nm; 589 590 #ifdef KLD_MODULE 591 /* 592 * Module can not be unloaded, if there are references to 593 * certains rules of ipfw(4), e.g. dummynet(4) 594 */ 595 int ipfw_refcnt __cachealign; 596 #endif 597 } __cachealign; 598 599 static struct ipfw_context *ipfw_ctx[MAXCPU]; 600 601 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); 602 603 /* 604 * Following two global variables are accessed and updated only 605 * in netisr0. 606 */ 607 static uint32_t static_count; /* # of static rules */ 608 static uint32_t static_ioc_len; /* bytes of static rules */ 609 610 /* 611 * If 1, then ipfw static rules are being flushed, 612 * ipfw_chk() will skip to the default rule. 613 */ 614 static int ipfw_flushing; 615 616 static int fw_verbose; 617 static int verbose_limit; 618 619 static int fw_debug; 620 static int autoinc_step = IPFW_AUTOINC_STEP_DEF; 621 622 static int ipfw_table_max = IPFW_TABLE_MAX_DEF; 623 624 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS); 625 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS); 626 627 TUNABLE_INT("net.inet.ip.fw.table_max", &ipfw_table_max); 628 629 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 630 SYSCTL_NODE(_net_inet_ip_fw, OID_AUTO, stats, CTLFLAG_RW, 0, 631 "Firewall statistics"); 632 633 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, 634 &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw"); 635 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW, 636 &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I", 637 "Rule number autincrement step"); 638 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, 639 &fw_one_pass, 0, 640 "Only do a single pass through ipfw when using dummynet(4)"); 641 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 642 &fw_debug, 0, "Enable printing of debug ip_fw statements"); 643 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, 644 &fw_verbose, 0, "Log matches to ipfw rules"); 645 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 646 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); 647 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, table_max, CTLFLAG_RD, 648 &ipfw_table_max, 0, "Max # of tables"); 649 650 static int ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS); 651 static int ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS); 652 static int ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS); 653 static int ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS); 654 static int ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS); 655 static int ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS); 656 657 /* 658 * Timeouts for various events in handing states. 659 * 660 * NOTE: 661 * 1 == 0~1 second. 662 * 2 == 1~2 second(s). 663 * 664 * We use 2 seconds for FIN lifetime, so that the states will not be 665 * ripped prematurely. 666 */ 667 static uint32_t dyn_ack_lifetime = 300; 668 static uint32_t dyn_syn_lifetime = 20; 669 static uint32_t dyn_finwait_lifetime = 20; 670 static uint32_t dyn_fin_lifetime = 2; 671 static uint32_t dyn_rst_lifetime = 2; 672 static uint32_t dyn_udp_lifetime = 10; 673 static uint32_t dyn_short_lifetime = 5; /* used by tracks too */ 674 675 /* 676 * Keepalives are sent if dyn_keepalive is set. They are sent every 677 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 678 * seconds of lifetime of a rule. 679 */ 680 static uint32_t dyn_keepalive_interval = 20; 681 static uint32_t dyn_keepalive_period = 5; 682 static uint32_t dyn_keepalive = 1; /* do send keepalives */ 683 684 static struct ipfw_global ipfw_gd; 685 static int ipfw_state_loosecnt_updthr; 686 static int ipfw_state_max = 4096; /* max # of states */ 687 static int ipfw_track_max = 4096; /* max # of tracks */ 688 689 static int ipfw_state_headroom; /* setup at module load time */ 690 static int ipfw_state_reap_min = 8; 691 static int ipfw_state_expire_max = 32; 692 static int ipfw_state_scan_max = 256; 693 static int ipfw_keepalive_max = 8; 694 static int ipfw_track_reap_max = 4; 695 static int ipfw_track_expire_max = 16; 696 static int ipfw_track_scan_max = 128; 697 698 static eventhandler_tag ipfw_ifaddr_event; 699 700 /* Compat */ 701 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count, 702 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_dyncnt, "I", 703 "Number of states and tracks"); 704 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, 705 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_dynmax, "I", 706 "Max number of states and tracks"); 707 708 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_cnt, 709 CTLTYPE_INT | CTLFLAG_RD, NULL, 0, ipfw_sysctl_statecnt, "I", 710 "Number of states"); 711 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_max, 712 CTLTYPE_INT | CTLFLAG_RW, NULL, 0, ipfw_sysctl_statemax, "I", 713 "Max number of states"); 714 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, state_headroom, CTLFLAG_RW, 715 &ipfw_state_headroom, 0, "headroom for state reap"); 716 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_cnt, CTLFLAG_RD, 717 &ipfw_gd.ipfw_trkcnt_cnt, 0, "Number of tracks"); 718 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, track_max, CTLFLAG_RW, 719 &ipfw_track_max, 0, "Max number of tracks"); 720 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, 721 &static_count, 0, "Number of static rules"); 722 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, 723 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); 724 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, 725 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); 726 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, 727 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); 728 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_finwait_lifetime, CTLFLAG_RW, 729 &dyn_finwait_lifetime, 0, "Lifetime of dyn. rules for fin wait"); 730 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, 731 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); 732 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, 733 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); 734 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, 735 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); 736 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, 737 &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); 738 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_scan_max, 739 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_scan_max, 0, ipfw_sysctl_scancnt, 740 "I", "# of states to scan for each expire iteration"); 741 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_expire_max, 742 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_expire_max, 0, ipfw_sysctl_scancnt, 743 "I", "# of states to expire for each expire iteration"); 744 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, keepalive_max, 745 CTLTYPE_INT | CTLFLAG_RW, &ipfw_keepalive_max, 0, ipfw_sysctl_scancnt, 746 "I", "# of states to expire for each expire iteration"); 747 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, state_reap_min, 748 CTLTYPE_INT | CTLFLAG_RW, &ipfw_state_reap_min, 0, ipfw_sysctl_scancnt, 749 "I", "# of states to reap for state shortage"); 750 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_scan_max, 751 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_scan_max, 0, ipfw_sysctl_scancnt, 752 "I", "# of tracks to scan for each expire iteration"); 753 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_expire_max, 754 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_expire_max, 0, ipfw_sysctl_scancnt, 755 "I", "# of tracks to expire for each expire iteration"); 756 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, track_reap_max, 757 CTLTYPE_INT | CTLFLAG_RW, &ipfw_track_reap_max, 0, ipfw_sysctl_scancnt, 758 "I", "# of tracks to reap for track shortage"); 759 760 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reap, 761 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 762 __offsetof(struct ipfw_context, ipfw_sts_reap), ipfw_sysctl_stat, 763 "LU", "# of state reaps due to states shortage"); 764 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_reapfailed, 765 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 766 __offsetof(struct ipfw_context, ipfw_sts_reapfailed), ipfw_sysctl_stat, 767 "LU", "# of state reap failure"); 768 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_overflow, 769 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 770 __offsetof(struct ipfw_context, ipfw_sts_overflow), ipfw_sysctl_stat, 771 "LU", "# of state overflow"); 772 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_nomem, 773 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 774 __offsetof(struct ipfw_context, ipfw_sts_nomem), ipfw_sysctl_stat, 775 "LU", "# of state allocation failure"); 776 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, state_tcprecycled, 777 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 778 __offsetof(struct ipfw_context, ipfw_sts_tcprecycled), ipfw_sysctl_stat, 779 "LU", "# of state deleted due to fast TCP port recycling"); 780 781 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_nomem, 782 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 783 __offsetof(struct ipfw_context, ipfw_tks_nomem), ipfw_sysctl_stat, 784 "LU", "# of track allocation failure"); 785 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reap, 786 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 787 __offsetof(struct ipfw_context, ipfw_tks_reap), ipfw_sysctl_stat, 788 "LU", "# of track reap due to tracks shortage"); 789 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_reapfailed, 790 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 791 __offsetof(struct ipfw_context, ipfw_tks_reapfailed), ipfw_sysctl_stat, 792 "LU", "# of track reap failure"); 793 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_overflow, 794 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 795 __offsetof(struct ipfw_context, ipfw_tks_overflow), ipfw_sysctl_stat, 796 "LU", "# of track overflow"); 797 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, track_cntnomem, 798 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 799 __offsetof(struct ipfw_context, ipfw_tks_cntnomem), ipfw_sysctl_stat, 800 "LU", "# of track counter allocation failure"); 801 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, frags, 802 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 803 __offsetof(struct ipfw_context, ipfw_frags), ipfw_sysctl_stat, 804 "LU", "# of IP fragements defraged"); 805 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defraged, 806 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 807 __offsetof(struct ipfw_context, ipfw_defraged), ipfw_sysctl_stat, 808 "LU", "# of IP packets after defrag"); 809 SYSCTL_PROC(_net_inet_ip_fw_stats, OID_AUTO, defrag_remote, 810 CTLTYPE_ULONG | CTLFLAG_RW, NULL, 811 __offsetof(struct ipfw_context, ipfw_defrag_remote), ipfw_sysctl_stat, 812 "LU", "# of IP packets after defrag dispatched to remote cpus"); 813 814 static int ipfw_state_cmp(struct ipfw_state *, 815 struct ipfw_state *); 816 static int ipfw_trkcnt_cmp(struct ipfw_trkcnt *, 817 struct ipfw_trkcnt *); 818 static int ipfw_track_cmp(struct ipfw_track *, 819 struct ipfw_track *); 820 821 RB_PROTOTYPE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp); 822 RB_GENERATE(ipfw_state_tree, ipfw_state, st_rblink, ipfw_state_cmp); 823 824 RB_PROTOTYPE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp); 825 RB_GENERATE(ipfw_trkcnt_tree, ipfw_trkcnt, tc_rblink, ipfw_trkcnt_cmp); 826 827 RB_PROTOTYPE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp); 828 RB_GENERATE(ipfw_track_tree, ipfw_track, t_rblink, ipfw_track_cmp); 829 830 static ip_fw_chk_t ipfw_chk; 831 static void ipfw_track_expire_ipifunc(void *); 832 static void ipfw_state_expire_ipifunc(void *); 833 static void ipfw_keepalive(void *); 834 static int ipfw_state_expire_start(struct ipfw_context *, 835 int, int); 836 static void ipfw_crossref_timeo(void *); 837 838 #define IPFW_TRKCNT_TOKGET lwkt_gettoken(&ipfw_gd.ipfw_trkcnt_token) 839 #define IPFW_TRKCNT_TOKREL lwkt_reltoken(&ipfw_gd.ipfw_trkcnt_token) 840 #define IPFW_TRKCNT_TOKINIT \ 841 lwkt_token_init(&ipfw_gd.ipfw_trkcnt_token, "ipfw_trkcnt"); 842 843 static void 844 sa_maskedcopy(const struct sockaddr *src, struct sockaddr *dst, 845 const struct sockaddr *netmask) 846 { 847 const u_char *cp1 = (const u_char *)src; 848 u_char *cp2 = (u_char *)dst; 849 const u_char *cp3 = (const u_char *)netmask; 850 u_char *cplim = cp2 + *cp3; 851 u_char *cplim2 = cp2 + *cp1; 852 853 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 854 cp3 += 2; 855 if (cplim > cplim2) 856 cplim = cplim2; 857 while (cp2 < cplim) 858 *cp2++ = *cp1++ & *cp3++; 859 if (cp2 < cplim2) 860 bzero(cp2, cplim2 - cp2); 861 } 862 863 static __inline void 864 ipfw_key_build(struct ipfw_key *key, in_addr_t saddr, uint16_t sport, 865 in_addr_t daddr, uint16_t dport, uint8_t proto) 866 { 867 868 key->proto = proto; 869 key->swap = 0; 870 871 if (saddr < daddr) { 872 key->addr_u.addrs.addr1 = daddr; 873 key->addr_u.addrs.addr2 = saddr; 874 key->swap |= IPFW_KEY_SWAP_ADDRS; 875 } else { 876 key->addr_u.addrs.addr1 = saddr; 877 key->addr_u.addrs.addr2 = daddr; 878 } 879 880 if (sport < dport) { 881 key->port_u.ports.port1 = dport; 882 key->port_u.ports.port2 = sport; 883 key->swap |= IPFW_KEY_SWAP_PORTS; 884 } else { 885 key->port_u.ports.port1 = sport; 886 key->port_u.ports.port2 = dport; 887 } 888 889 if (sport == dport && (key->swap & IPFW_KEY_SWAP_ADDRS)) 890 key->swap |= IPFW_KEY_SWAP_PORTS; 891 if (saddr == daddr && (key->swap & IPFW_KEY_SWAP_PORTS)) 892 key->swap |= IPFW_KEY_SWAP_ADDRS; 893 } 894 895 static __inline void 896 ipfw_key_4tuple(const struct ipfw_key *key, in_addr_t *saddr, uint16_t *sport, 897 in_addr_t *daddr, uint16_t *dport) 898 { 899 900 if (key->swap & IPFW_KEY_SWAP_ADDRS) { 901 *saddr = key->addr_u.addrs.addr2; 902 *daddr = key->addr_u.addrs.addr1; 903 } else { 904 *saddr = key->addr_u.addrs.addr1; 905 *daddr = key->addr_u.addrs.addr2; 906 } 907 908 if (key->swap & IPFW_KEY_SWAP_PORTS) { 909 *sport = key->port_u.ports.port2; 910 *dport = key->port_u.ports.port1; 911 } else { 912 *sport = key->port_u.ports.port1; 913 *dport = key->port_u.ports.port2; 914 } 915 } 916 917 static int 918 ipfw_state_cmp(struct ipfw_state *s1, struct ipfw_state *s2) 919 { 920 921 if (s1->st_proto > s2->st_proto) 922 return (1); 923 if (s1->st_proto < s2->st_proto) 924 return (-1); 925 926 if (s1->st_addrs > s2->st_addrs) 927 return (1); 928 if (s1->st_addrs < s2->st_addrs) 929 return (-1); 930 931 if (s1->st_ports > s2->st_ports) 932 return (1); 933 if (s1->st_ports < s2->st_ports) 934 return (-1); 935 936 if (s1->st_swap == s2->st_swap || 937 (s1->st_swap ^ s2->st_swap) == IPFW_KEY_SWAP_ALL) 938 return (0); 939 940 if (s1->st_swap > s2->st_swap) 941 return (1); 942 else 943 return (-1); 944 } 945 946 static int 947 ipfw_trkcnt_cmp(struct ipfw_trkcnt *t1, struct ipfw_trkcnt *t2) 948 { 949 950 if (t1->tc_proto > t2->tc_proto) 951 return (1); 952 if (t1->tc_proto < t2->tc_proto) 953 return (-1); 954 955 if (t1->tc_addrs > t2->tc_addrs) 956 return (1); 957 if (t1->tc_addrs < t2->tc_addrs) 958 return (-1); 959 960 if (t1->tc_ports > t2->tc_ports) 961 return (1); 962 if (t1->tc_ports < t2->tc_ports) 963 return (-1); 964 965 if (t1->tc_ruleid > t2->tc_ruleid) 966 return (1); 967 if (t1->tc_ruleid < t2->tc_ruleid) 968 return (-1); 969 970 return (0); 971 } 972 973 static int 974 ipfw_track_cmp(struct ipfw_track *t1, struct ipfw_track *t2) 975 { 976 977 if (t1->t_proto > t2->t_proto) 978 return (1); 979 if (t1->t_proto < t2->t_proto) 980 return (-1); 981 982 if (t1->t_addrs > t2->t_addrs) 983 return (1); 984 if (t1->t_addrs < t2->t_addrs) 985 return (-1); 986 987 if (t1->t_ports > t2->t_ports) 988 return (1); 989 if (t1->t_ports < t2->t_ports) 990 return (-1); 991 992 if ((uintptr_t)t1->t_rule > (uintptr_t)t2->t_rule) 993 return (1); 994 if ((uintptr_t)t1->t_rule < (uintptr_t)t2->t_rule) 995 return (-1); 996 997 return (0); 998 } 999 1000 static void 1001 ipfw_state_max_set(int state_max) 1002 { 1003 1004 ipfw_state_max = state_max; 1005 /* Allow 5% states over-allocation. */ 1006 ipfw_state_loosecnt_updthr = (state_max / 20) / netisr_ncpus; 1007 } 1008 1009 static __inline int 1010 ipfw_state_cntcoll(void) 1011 { 1012 int cpu, state_cnt = 0; 1013 1014 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 1015 state_cnt += ipfw_ctx[cpu]->ipfw_state_cnt; 1016 return (state_cnt); 1017 } 1018 1019 static __inline int 1020 ipfw_state_cntsync(void) 1021 { 1022 int state_cnt; 1023 1024 state_cnt = ipfw_state_cntcoll(); 1025 ipfw_gd.ipfw_state_loosecnt = state_cnt; 1026 return (state_cnt); 1027 } 1028 1029 static __inline int 1030 ipfw_free_rule(struct ip_fw *rule) 1031 { 1032 KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d", mycpuid)); 1033 KASSERT(rule->refcnt > 0, ("invalid refcnt %u", rule->refcnt)); 1034 rule->refcnt--; 1035 if (rule->refcnt == 0) { 1036 if (rule->cross_rules != NULL) 1037 kfree(rule->cross_rules, M_IPFW); 1038 kfree(rule, M_IPFW); 1039 return 1; 1040 } 1041 return 0; 1042 } 1043 1044 static void 1045 ipfw_unref_rule(void *priv) 1046 { 1047 ipfw_free_rule(priv); 1048 #ifdef KLD_MODULE 1049 KASSERT(ipfw_gd.ipfw_refcnt > 0, 1050 ("invalid ipfw_refcnt %d", ipfw_gd.ipfw_refcnt)); 1051 atomic_subtract_int(&ipfw_gd.ipfw_refcnt, 1); 1052 #endif 1053 } 1054 1055 static __inline void 1056 ipfw_ref_rule(struct ip_fw *rule) 1057 { 1058 KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d", mycpuid)); 1059 #ifdef KLD_MODULE 1060 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1); 1061 #endif 1062 rule->refcnt++; 1063 } 1064 1065 /* 1066 * This macro maps an ip pointer into a layer3 header pointer of type T 1067 */ 1068 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl)) 1069 1070 static __inline int 1071 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) 1072 { 1073 int type = L3HDR(struct icmp,ip)->icmp_type; 1074 1075 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1 << type))); 1076 } 1077 1078 #define TT ((1 << ICMP_ECHO) | \ 1079 (1 << ICMP_ROUTERSOLICIT) | \ 1080 (1 << ICMP_TSTAMP) | \ 1081 (1 << ICMP_IREQ) | \ 1082 (1 << ICMP_MASKREQ)) 1083 1084 static int 1085 is_icmp_query(struct ip *ip) 1086 { 1087 int type = L3HDR(struct icmp, ip)->icmp_type; 1088 1089 return (type <= ICMP_MAXTYPE && (TT & (1 << type))); 1090 } 1091 1092 #undef TT 1093 1094 /* 1095 * The following checks use two arrays of 8 or 16 bits to store the 1096 * bits that we want set or clear, respectively. They are in the 1097 * low and high half of cmd->arg1 or cmd->d[0]. 1098 * 1099 * We scan options and store the bits we find set. We succeed if 1100 * 1101 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 1102 * 1103 * The code is sometimes optimized not to store additional variables. 1104 */ 1105 static int 1106 flags_match(ipfw_insn *cmd, uint8_t bits) 1107 { 1108 u_char want_clear; 1109 bits = ~bits; 1110 1111 if (((cmd->arg1 & 0xff) & bits) != 0) 1112 return 0; /* some bits we want set were clear */ 1113 1114 want_clear = (cmd->arg1 >> 8) & 0xff; 1115 if ((want_clear & bits) != want_clear) 1116 return 0; /* some bits we want clear were set */ 1117 return 1; 1118 } 1119 1120 static int 1121 ipopts_match(struct ip *ip, ipfw_insn *cmd) 1122 { 1123 int optlen, bits = 0; 1124 u_char *cp = (u_char *)(ip + 1); 1125 int x = (ip->ip_hl << 2) - sizeof(struct ip); 1126 1127 for (; x > 0; x -= optlen, cp += optlen) { 1128 int opt = cp[IPOPT_OPTVAL]; 1129 1130 if (opt == IPOPT_EOL) 1131 break; 1132 1133 if (opt == IPOPT_NOP) { 1134 optlen = 1; 1135 } else { 1136 optlen = cp[IPOPT_OLEN]; 1137 if (optlen <= 0 || optlen > x) 1138 return 0; /* invalid or truncated */ 1139 } 1140 1141 switch (opt) { 1142 case IPOPT_LSRR: 1143 bits |= IP_FW_IPOPT_LSRR; 1144 break; 1145 1146 case IPOPT_SSRR: 1147 bits |= IP_FW_IPOPT_SSRR; 1148 break; 1149 1150 case IPOPT_RR: 1151 bits |= IP_FW_IPOPT_RR; 1152 break; 1153 1154 case IPOPT_TS: 1155 bits |= IP_FW_IPOPT_TS; 1156 break; 1157 1158 default: 1159 break; 1160 } 1161 } 1162 return (flags_match(cmd, bits)); 1163 } 1164 1165 static int 1166 tcpopts_match(struct ip *ip, ipfw_insn *cmd) 1167 { 1168 int optlen, bits = 0; 1169 struct tcphdr *tcp = L3HDR(struct tcphdr,ip); 1170 u_char *cp = (u_char *)(tcp + 1); 1171 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 1172 1173 for (; x > 0; x -= optlen, cp += optlen) { 1174 int opt = cp[0]; 1175 1176 if (opt == TCPOPT_EOL) 1177 break; 1178 1179 if (opt == TCPOPT_NOP) { 1180 optlen = 1; 1181 } else { 1182 optlen = cp[1]; 1183 if (optlen <= 0) 1184 break; 1185 } 1186 1187 switch (opt) { 1188 case TCPOPT_MAXSEG: 1189 bits |= IP_FW_TCPOPT_MSS; 1190 break; 1191 1192 case TCPOPT_WINDOW: 1193 bits |= IP_FW_TCPOPT_WINDOW; 1194 break; 1195 1196 case TCPOPT_SACK_PERMITTED: 1197 case TCPOPT_SACK: 1198 bits |= IP_FW_TCPOPT_SACK; 1199 break; 1200 1201 case TCPOPT_TIMESTAMP: 1202 bits |= IP_FW_TCPOPT_TS; 1203 break; 1204 1205 case TCPOPT_CC: 1206 case TCPOPT_CCNEW: 1207 case TCPOPT_CCECHO: 1208 bits |= IP_FW_TCPOPT_CC; 1209 break; 1210 1211 default: 1212 break; 1213 } 1214 } 1215 return (flags_match(cmd, bits)); 1216 } 1217 1218 static int 1219 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) 1220 { 1221 if (ifp == NULL) /* no iface with this packet, match fails */ 1222 return 0; 1223 1224 /* Check by name or by IP address */ 1225 if (cmd->name[0] != '\0') { /* match by name */ 1226 /* Check name */ 1227 if (cmd->p.glob) { 1228 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0) 1229 return(1); 1230 } else { 1231 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 1232 return(1); 1233 } 1234 } else { 1235 struct ifaddr_container *ifac; 1236 1237 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1238 struct ifaddr *ia = ifac->ifa; 1239 1240 if (ia->ifa_addr == NULL) 1241 continue; 1242 if (ia->ifa_addr->sa_family != AF_INET) 1243 continue; 1244 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 1245 (ia->ifa_addr))->sin_addr.s_addr) 1246 return(1); /* match */ 1247 } 1248 } 1249 return(0); /* no match, fail ... */ 1250 } 1251 1252 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 1253 1254 /* 1255 * We enter here when we have a rule with O_LOG. 1256 * XXX this function alone takes about 2Kbytes of code! 1257 */ 1258 static void 1259 ipfw_log(struct ipfw_context *ctx, struct ip_fw *f, u_int hlen, 1260 struct ether_header *eh, struct mbuf *m, struct ifnet *oif) 1261 { 1262 char *action; 1263 int limit_reached = 0; 1264 char action2[40], proto[48], fragment[28], abuf[INET_ADDRSTRLEN]; 1265 1266 fragment[0] = '\0'; 1267 proto[0] = '\0'; 1268 1269 if (f == NULL) { /* bogus pkt */ 1270 if (verbose_limit != 0 && 1271 ctx->ipfw_norule_counter >= verbose_limit) 1272 return; 1273 ctx->ipfw_norule_counter++; 1274 if (ctx->ipfw_norule_counter == verbose_limit) 1275 limit_reached = verbose_limit; 1276 action = "Refuse"; 1277 } else { /* O_LOG is the first action, find the real one */ 1278 ipfw_insn *cmd = ACTION_PTR(f); 1279 ipfw_insn_log *l = (ipfw_insn_log *)cmd; 1280 1281 if (l->max_log != 0 && l->log_left == 0) 1282 return; 1283 l->log_left--; 1284 if (l->log_left == 0) 1285 limit_reached = l->max_log; 1286 cmd += F_LEN(cmd); /* point to first action */ 1287 if (cmd->opcode == O_PROB) 1288 cmd += F_LEN(cmd); 1289 1290 action = action2; 1291 switch (cmd->opcode) { 1292 case O_DENY: 1293 action = "Deny"; 1294 break; 1295 1296 case O_REJECT: 1297 if (cmd->arg1==ICMP_REJECT_RST) { 1298 action = "Reset"; 1299 } else if (cmd->arg1==ICMP_UNREACH_HOST) { 1300 action = "Reject"; 1301 } else { 1302 ksnprintf(SNPARGS(action2, 0), "Unreach %d", 1303 cmd->arg1); 1304 } 1305 break; 1306 1307 case O_ACCEPT: 1308 action = "Accept"; 1309 break; 1310 1311 case O_COUNT: 1312 action = "Count"; 1313 break; 1314 1315 case O_DIVERT: 1316 ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1); 1317 break; 1318 1319 case O_TEE: 1320 ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1); 1321 break; 1322 1323 case O_SKIPTO: 1324 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1); 1325 break; 1326 1327 case O_PIPE: 1328 ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1); 1329 break; 1330 1331 case O_QUEUE: 1332 ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1); 1333 break; 1334 1335 case O_FORWARD_IP: 1336 { 1337 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; 1338 int len; 1339 1340 len = ksnprintf(SNPARGS(action2, 0), 1341 "Forward to %s", 1342 kinet_ntoa(sa->sa.sin_addr, abuf)); 1343 if (sa->sa.sin_port) { 1344 ksnprintf(SNPARGS(action2, len), ":%d", 1345 sa->sa.sin_port); 1346 } 1347 } 1348 break; 1349 1350 default: 1351 action = "UNKNOWN"; 1352 break; 1353 } 1354 } 1355 1356 if (hlen == 0) { /* non-ip */ 1357 ksnprintf(SNPARGS(proto, 0), "MAC"); 1358 } else { 1359 struct ip *ip = mtod(m, struct ip *); 1360 /* these three are all aliases to the same thing */ 1361 struct icmp *const icmp = L3HDR(struct icmp, ip); 1362 struct tcphdr *const tcp = (struct tcphdr *)icmp; 1363 struct udphdr *const udp = (struct udphdr *)icmp; 1364 1365 int ip_off, offset, ip_len; 1366 int len; 1367 1368 if (eh != NULL) { /* layer 2 packets are as on the wire */ 1369 ip_off = ntohs(ip->ip_off); 1370 ip_len = ntohs(ip->ip_len); 1371 } else { 1372 ip_off = ip->ip_off; 1373 ip_len = ip->ip_len; 1374 } 1375 offset = ip_off & IP_OFFMASK; 1376 switch (ip->ip_p) { 1377 case IPPROTO_TCP: 1378 len = ksnprintf(SNPARGS(proto, 0), "TCP %s", 1379 kinet_ntoa(ip->ip_src, abuf)); 1380 if (offset == 0) { 1381 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 1382 ntohs(tcp->th_sport), 1383 kinet_ntoa(ip->ip_dst, abuf), 1384 ntohs(tcp->th_dport)); 1385 } else { 1386 ksnprintf(SNPARGS(proto, len), " %s", 1387 kinet_ntoa(ip->ip_dst, abuf)); 1388 } 1389 break; 1390 1391 case IPPROTO_UDP: 1392 len = ksnprintf(SNPARGS(proto, 0), "UDP %s", 1393 kinet_ntoa(ip->ip_src, abuf)); 1394 if (offset == 0) { 1395 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 1396 ntohs(udp->uh_sport), 1397 kinet_ntoa(ip->ip_dst, abuf), 1398 ntohs(udp->uh_dport)); 1399 } else { 1400 ksnprintf(SNPARGS(proto, len), " %s", 1401 kinet_ntoa(ip->ip_dst, abuf)); 1402 } 1403 break; 1404 1405 case IPPROTO_ICMP: 1406 if (offset == 0) { 1407 len = ksnprintf(SNPARGS(proto, 0), 1408 "ICMP:%u.%u ", 1409 icmp->icmp_type, 1410 icmp->icmp_code); 1411 } else { 1412 len = ksnprintf(SNPARGS(proto, 0), "ICMP "); 1413 } 1414 len += ksnprintf(SNPARGS(proto, len), "%s", 1415 kinet_ntoa(ip->ip_src, abuf)); 1416 ksnprintf(SNPARGS(proto, len), " %s", 1417 kinet_ntoa(ip->ip_dst, abuf)); 1418 break; 1419 1420 default: 1421 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, 1422 kinet_ntoa(ip->ip_src, abuf)); 1423 ksnprintf(SNPARGS(proto, len), " %s", 1424 kinet_ntoa(ip->ip_dst, abuf)); 1425 break; 1426 } 1427 1428 if (ip_off & (IP_MF | IP_OFFMASK)) { 1429 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", 1430 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), 1431 offset << 3, (ip_off & IP_MF) ? "+" : ""); 1432 } 1433 } 1434 1435 if (oif || m->m_pkthdr.rcvif) { 1436 log(LOG_SECURITY | LOG_INFO, 1437 "ipfw: %d %s %s %s via %s%s\n", 1438 f ? f->rulenum : -1, 1439 action, proto, oif ? "out" : "in", 1440 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, 1441 fragment); 1442 } else { 1443 log(LOG_SECURITY | LOG_INFO, 1444 "ipfw: %d %s %s [no if info]%s\n", 1445 f ? f->rulenum : -1, 1446 action, proto, fragment); 1447 } 1448 1449 if (limit_reached) { 1450 log(LOG_SECURITY | LOG_NOTICE, 1451 "ipfw: limit %d reached on entry %d\n", 1452 limit_reached, f ? f->rulenum : -1); 1453 } 1454 } 1455 1456 #undef SNPARGS 1457 1458 #define TIME_LEQ(a, b) ((a) - (b) <= 0) 1459 1460 static void 1461 ipfw_state_del(struct ipfw_context *ctx, struct ipfw_state *s) 1462 { 1463 1464 KASSERT(s->st_type == O_KEEP_STATE || s->st_type == O_LIMIT, 1465 ("invalid state type %u", s->st_type)); 1466 KASSERT(ctx->ipfw_state_cnt > 0, 1467 ("invalid state count %d", ctx->ipfw_state_cnt)); 1468 1469 if (s->st_track != NULL) { 1470 struct ipfw_track *t = s->st_track; 1471 1472 KASSERT(!LIST_EMPTY(&t->t_state_list), 1473 ("track state list is empty")); 1474 LIST_REMOVE(s, st_trklink); 1475 1476 KASSERT(*t->t_count > 0, 1477 ("invalid track count %d", *t->t_count)); 1478 atomic_subtract_int(t->t_count, 1); 1479 } 1480 1481 TAILQ_REMOVE(&ctx->ipfw_state_list, s, st_link); 1482 RB_REMOVE(ipfw_state_tree, &ctx->ipfw_state_tree, s); 1483 kfree(s, M_IPFW); 1484 1485 ctx->ipfw_state_cnt--; 1486 if (ctx->ipfw_state_loosecnt > 0) 1487 ctx->ipfw_state_loosecnt--; 1488 } 1489 1490 static int 1491 ipfw_state_reap(struct ipfw_context *ctx, int reap_max) 1492 { 1493 struct ipfw_state *s, *anchor; 1494 int expired; 1495 1496 if (reap_max < ipfw_state_reap_min) 1497 reap_max = ipfw_state_reap_min; 1498 1499 if ((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0) { 1500 /* 1501 * Kick start state expiring. Ignore scan limit, 1502 * we are short of states. 1503 */ 1504 ctx->ipfw_flags |= IPFW_FLAG_STATEREAP; 1505 expired = ipfw_state_expire_start(ctx, INT_MAX, reap_max); 1506 ctx->ipfw_flags &= ~IPFW_FLAG_STATEREAP; 1507 return (expired); 1508 } 1509 1510 /* 1511 * States are being expired. 1512 */ 1513 1514 if (ctx->ipfw_state_cnt == 0) 1515 return (0); 1516 1517 expired = 0; 1518 anchor = &ctx->ipfw_stateexp_anch; 1519 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 1520 /* 1521 * Ignore scan limit; we are short of states. 1522 */ 1523 1524 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1525 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 1526 1527 if (s->st_type == O_ANCHOR) 1528 continue; 1529 1530 if (IPFW_STATE_TCPCLOSED(s) || 1531 TIME_LEQ(s->st_expire, time_uptime)) { 1532 ipfw_state_del(ctx, s); 1533 if (++expired >= reap_max) 1534 break; 1535 if ((expired & 0xff) == 0 && 1536 ipfw_state_cntcoll() + ipfw_state_headroom <= 1537 ipfw_state_max) 1538 break; 1539 } 1540 } 1541 /* 1542 * NOTE: 1543 * Leave the anchor on the list, even if the end of the list has 1544 * been reached. ipfw_state_expire_more_dispatch() will handle 1545 * the removal. 1546 */ 1547 return (expired); 1548 } 1549 1550 static void 1551 ipfw_state_flush(struct ipfw_context *ctx, const struct ip_fw *rule) 1552 { 1553 struct ipfw_state *s, *sn; 1554 1555 TAILQ_FOREACH_MUTABLE(s, &ctx->ipfw_state_list, st_link, sn) { 1556 if (s->st_type == O_ANCHOR) 1557 continue; 1558 if (rule != NULL && s->st_rule != rule) 1559 continue; 1560 ipfw_state_del(ctx, s); 1561 } 1562 } 1563 1564 static void 1565 ipfw_state_expire_done(struct ipfw_context *ctx) 1566 { 1567 1568 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1569 ("stateexp is not in progress")); 1570 ctx->ipfw_flags &= ~IPFW_FLAG_STATEEXP; 1571 callout_reset(&ctx->ipfw_stateto_ch, hz, 1572 ipfw_state_expire_ipifunc, NULL); 1573 } 1574 1575 static void 1576 ipfw_state_expire_more(struct ipfw_context *ctx) 1577 { 1578 struct netmsg_base *nm = &ctx->ipfw_stateexp_more; 1579 1580 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1581 ("stateexp is not in progress")); 1582 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 1583 ("stateexp more did not finish")); 1584 netisr_sendmsg_oncpu(nm); 1585 } 1586 1587 static int 1588 ipfw_state_expire_loop(struct ipfw_context *ctx, struct ipfw_state *anchor, 1589 int scan_max, int expire_max) 1590 { 1591 struct ipfw_state *s; 1592 int scanned = 0, expired = 0; 1593 1594 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1595 ("stateexp is not in progress")); 1596 1597 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 1598 if (scanned++ >= scan_max) { 1599 ipfw_state_expire_more(ctx); 1600 return (expired); 1601 } 1602 1603 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1604 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 1605 1606 if (s->st_type == O_ANCHOR) 1607 continue; 1608 1609 if (TIME_LEQ(s->st_expire, time_uptime) || 1610 ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) && 1611 IPFW_STATE_TCPCLOSED(s))) { 1612 ipfw_state_del(ctx, s); 1613 if (++expired >= expire_max) { 1614 ipfw_state_expire_more(ctx); 1615 return (expired); 1616 } 1617 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) && 1618 (expired & 0xff) == 0 && 1619 ipfw_state_cntcoll() + ipfw_state_headroom <= 1620 ipfw_state_max) { 1621 ipfw_state_expire_more(ctx); 1622 return (expired); 1623 } 1624 } 1625 } 1626 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1627 ipfw_state_expire_done(ctx); 1628 return (expired); 1629 } 1630 1631 static void 1632 ipfw_state_expire_more_dispatch(netmsg_t nm) 1633 { 1634 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1635 struct ipfw_state *anchor; 1636 1637 ASSERT_NETISR_NCPUS(mycpuid); 1638 KASSERT(ctx->ipfw_flags & IPFW_FLAG_STATEEXP, 1639 ("statexp is not in progress")); 1640 1641 /* Reply ASAP */ 1642 netisr_replymsg(&nm->base, 0); 1643 1644 anchor = &ctx->ipfw_stateexp_anch; 1645 if (ctx->ipfw_state_cnt == 0) { 1646 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 1647 ipfw_state_expire_done(ctx); 1648 return; 1649 } 1650 ipfw_state_expire_loop(ctx, anchor, 1651 ipfw_state_scan_max, ipfw_state_expire_max); 1652 } 1653 1654 static int 1655 ipfw_state_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max) 1656 { 1657 struct ipfw_state *anchor; 1658 1659 KASSERT((ctx->ipfw_flags & IPFW_FLAG_STATEEXP) == 0, 1660 ("stateexp is in progress")); 1661 ctx->ipfw_flags |= IPFW_FLAG_STATEEXP; 1662 1663 if (ctx->ipfw_state_cnt == 0) { 1664 ipfw_state_expire_done(ctx); 1665 return (0); 1666 } 1667 1668 /* 1669 * Do not expire more than once per second, it is useless. 1670 */ 1671 if ((ctx->ipfw_flags & IPFW_FLAG_STATEREAP) == 0 && 1672 ctx->ipfw_state_lastexp == time_uptime) { 1673 ipfw_state_expire_done(ctx); 1674 return (0); 1675 } 1676 ctx->ipfw_state_lastexp = time_uptime; 1677 1678 anchor = &ctx->ipfw_stateexp_anch; 1679 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link); 1680 return (ipfw_state_expire_loop(ctx, anchor, scan_max, expire_max)); 1681 } 1682 1683 static void 1684 ipfw_state_expire_dispatch(netmsg_t nm) 1685 { 1686 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1687 1688 ASSERT_NETISR_NCPUS(mycpuid); 1689 1690 /* Reply ASAP */ 1691 crit_enter(); 1692 netisr_replymsg(&nm->base, 0); 1693 crit_exit(); 1694 1695 if (ctx->ipfw_flags & IPFW_FLAG_STATEEXP) { 1696 /* Running; done. */ 1697 return; 1698 } 1699 ipfw_state_expire_start(ctx, 1700 ipfw_state_scan_max, ipfw_state_expire_max); 1701 } 1702 1703 static void 1704 ipfw_state_expire_ipifunc(void *dummy __unused) 1705 { 1706 struct netmsg_base *msg; 1707 1708 KKASSERT(mycpuid < netisr_ncpus); 1709 msg = &ipfw_ctx[mycpuid]->ipfw_stateexp_nm; 1710 1711 crit_enter(); 1712 if (msg->lmsg.ms_flags & MSGF_DONE) 1713 netisr_sendmsg_oncpu(msg); 1714 crit_exit(); 1715 } 1716 1717 static boolean_t 1718 ipfw_state_update_tcp(struct ipfw_state *s, int dir, const struct tcphdr *tcp) 1719 { 1720 uint32_t seq = ntohl(tcp->th_seq); 1721 uint32_t ack = ntohl(tcp->th_ack); 1722 1723 if (tcp->th_flags & TH_RST) 1724 return (TRUE); 1725 1726 if (dir == MATCH_FORWARD) { 1727 if ((s->st_flags & IPFW_STATE_F_SEQFWD) == 0) { 1728 s->st_flags |= IPFW_STATE_F_SEQFWD; 1729 s->st_seq_fwd = seq; 1730 } else if (SEQ_GEQ(seq, s->st_seq_fwd)) { 1731 s->st_seq_fwd = seq; 1732 } else { 1733 /* Out-of-sequence; done. */ 1734 return (FALSE); 1735 } 1736 if (tcp->th_flags & TH_ACK) { 1737 if ((s->st_flags & IPFW_STATE_F_ACKFWD) == 0) { 1738 s->st_flags |= IPFW_STATE_F_ACKFWD; 1739 s->st_ack_fwd = ack; 1740 } else if (SEQ_GEQ(ack, s->st_ack_fwd)) { 1741 s->st_ack_fwd = ack; 1742 } else { 1743 /* Out-of-sequence; done. */ 1744 return (FALSE); 1745 } 1746 1747 if ((s->st_state & ((TH_FIN | TH_ACK) << 8)) == 1748 (TH_FIN << 8) && s->st_ack_fwd == s->st_seq_rev + 1) 1749 s->st_state |= (TH_ACK << 8); 1750 } 1751 } else { 1752 if ((s->st_flags & IPFW_STATE_F_SEQREV) == 0) { 1753 s->st_flags |= IPFW_STATE_F_SEQREV; 1754 s->st_seq_rev = seq; 1755 } else if (SEQ_GEQ(seq, s->st_seq_rev)) { 1756 s->st_seq_rev = seq; 1757 } else { 1758 /* Out-of-sequence; done. */ 1759 return (FALSE); 1760 } 1761 if (tcp->th_flags & TH_ACK) { 1762 if ((s->st_flags & IPFW_STATE_F_ACKREV) == 0) { 1763 s->st_flags |= IPFW_STATE_F_ACKREV; 1764 s->st_ack_rev= ack; 1765 } else if (SEQ_GEQ(ack, s->st_ack_rev)) { 1766 s->st_ack_rev = ack; 1767 } else { 1768 /* Out-of-sequence; done. */ 1769 return (FALSE); 1770 } 1771 1772 if ((s->st_state & (TH_FIN | TH_ACK)) == TH_FIN && 1773 s->st_ack_rev == s->st_seq_fwd + 1) 1774 s->st_state |= TH_ACK; 1775 } 1776 } 1777 return (TRUE); 1778 } 1779 1780 static void 1781 ipfw_state_update(const struct ipfw_flow_id *pkt, int dir, 1782 const struct tcphdr *tcp, struct ipfw_state *s) 1783 { 1784 1785 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 1786 u_char flags = pkt->flags & IPFW_STATE_TCPFLAGS; 1787 1788 if (tcp != NULL && !ipfw_state_update_tcp(s, dir, tcp)) 1789 return; 1790 1791 s->st_state |= (dir == MATCH_FORWARD) ? flags : (flags << 8); 1792 switch (s->st_state & IPFW_STATE_TCPSTATES) { 1793 case TH_SYN: /* opening */ 1794 s->st_expire = time_uptime + dyn_syn_lifetime; 1795 break; 1796 1797 case BOTH_SYN: /* move to established */ 1798 case BOTH_SYN | TH_FIN: /* one side tries to close */ 1799 case BOTH_SYN | (TH_FIN << 8): 1800 s->st_expire = time_uptime + dyn_ack_lifetime; 1801 break; 1802 1803 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 1804 if ((s->st_state & BOTH_FINACK) == BOTH_FINACK) { 1805 /* And both FINs were ACKed. */ 1806 s->st_expire = time_uptime + dyn_fin_lifetime; 1807 } else { 1808 s->st_expire = time_uptime + 1809 dyn_finwait_lifetime; 1810 } 1811 break; 1812 1813 default: 1814 #if 0 1815 /* 1816 * reset or some invalid combination, but can also 1817 * occur if we use keep-state the wrong way. 1818 */ 1819 if ((s->st_state & ((TH_RST << 8) | TH_RST)) == 0) 1820 kprintf("invalid state: 0x%x\n", s->st_state); 1821 #endif 1822 s->st_expire = time_uptime + dyn_rst_lifetime; 1823 break; 1824 } 1825 } else if (pkt->proto == IPPROTO_UDP) { 1826 s->st_expire = time_uptime + dyn_udp_lifetime; 1827 } else { 1828 /* other protocols */ 1829 s->st_expire = time_uptime + dyn_short_lifetime; 1830 } 1831 } 1832 1833 /* 1834 * Lookup a state. 1835 */ 1836 static struct ipfw_state * 1837 ipfw_state_lookup(struct ipfw_context *ctx, const struct ipfw_flow_id *pkt, 1838 int *match_direction, const struct tcphdr *tcp) 1839 { 1840 struct ipfw_state *key, *s; 1841 int dir = MATCH_NONE; 1842 1843 key = &ctx->ipfw_state_tmpkey; 1844 ipfw_key_build(&key->st_key, pkt->src_ip, pkt->src_port, 1845 pkt->dst_ip, pkt->dst_port, pkt->proto); 1846 s = RB_FIND(ipfw_state_tree, &ctx->ipfw_state_tree, key); 1847 if (s == NULL) 1848 goto done; /* not found. */ 1849 if (TIME_LEQ(s->st_expire, time_uptime)) { 1850 /* Expired. */ 1851 ipfw_state_del(ctx, s); 1852 s = NULL; 1853 goto done; 1854 } 1855 if ((pkt->flags & TH_SYN) && IPFW_STATE_TCPCLOSED(s)) { 1856 /* TCP ports recycling is too fast. */ 1857 ctx->ipfw_sts_tcprecycled++; 1858 ipfw_state_del(ctx, s); 1859 s = NULL; 1860 goto done; 1861 } 1862 1863 if (s->st_swap == key->st_swap) { 1864 dir = MATCH_FORWARD; 1865 } else { 1866 KASSERT((s->st_swap & key->st_swap) == 0, 1867 ("found mismatch state")); 1868 dir = MATCH_REVERSE; 1869 } 1870 1871 /* Update this state. */ 1872 ipfw_state_update(pkt, dir, tcp, s); 1873 1874 if (s->st_track != NULL) { 1875 /* This track has been used. */ 1876 s->st_track->t_expire = time_uptime + dyn_short_lifetime; 1877 } 1878 done: 1879 if (match_direction) 1880 *match_direction = dir; 1881 return (s); 1882 } 1883 1884 static __inline struct ip_fw * 1885 ipfw_state_lookup_rule(struct ipfw_context *ctx, const struct ipfw_flow_id *pkt, 1886 int *match_direction, const struct tcphdr *tcp, uint16_t len) 1887 { 1888 struct ipfw_state *s; 1889 1890 s = ipfw_state_lookup(ctx, pkt, match_direction, tcp); 1891 if (s == NULL) 1892 return (NULL); 1893 1894 KASSERT(s->st_rule->cpuid == mycpuid, 1895 ("rule %p (cpu%d) does not belong to the current cpu%d", 1896 s->st_rule, s->st_rule->cpuid, mycpuid)); 1897 1898 s->st_pcnt++; 1899 s->st_bcnt += len; 1900 1901 return (s->st_rule); 1902 } 1903 1904 static struct ipfw_state * 1905 ipfw_state_add(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 1906 uint16_t type, struct ip_fw *rule, struct ipfw_track *t, 1907 const struct tcphdr *tcp) 1908 { 1909 struct ipfw_state *s, *dup; 1910 1911 KASSERT(type == O_KEEP_STATE || type == O_LIMIT, 1912 ("invalid state type %u", type)); 1913 1914 s = kmalloc(sizeof(*s), M_IPFW, M_INTWAIT | M_NULLOK | M_ZERO); 1915 if (s == NULL) { 1916 ctx->ipfw_sts_nomem++; 1917 return (NULL); 1918 } 1919 1920 ipfw_key_build(&s->st_key, id->src_ip, id->src_port, 1921 id->dst_ip, id->dst_port, id->proto); 1922 1923 s->st_rule = rule; 1924 s->st_type = type; 1925 1926 ctx->ipfw_state_cnt++; 1927 ctx->ipfw_state_loosecnt++; 1928 if (ctx->ipfw_state_loosecnt >= ipfw_state_loosecnt_updthr) { 1929 ipfw_gd.ipfw_state_loosecnt += ctx->ipfw_state_loosecnt; 1930 ctx->ipfw_state_loosecnt = 0; 1931 } 1932 1933 dup = RB_INSERT(ipfw_state_tree, &ctx->ipfw_state_tree, s); 1934 if (dup != NULL) 1935 panic("ipfw: state exists"); 1936 TAILQ_INSERT_TAIL(&ctx->ipfw_state_list, s, st_link); 1937 1938 /* 1939 * Update this state: 1940 * Set st_expire and st_state. 1941 */ 1942 ipfw_state_update(id, MATCH_FORWARD, tcp, s); 1943 1944 if (t != NULL) { 1945 /* Keep the track referenced. */ 1946 LIST_INSERT_HEAD(&t->t_state_list, s, st_trklink); 1947 s->st_track = t; 1948 } 1949 return (s); 1950 } 1951 1952 static boolean_t 1953 ipfw_track_free(struct ipfw_context *ctx, struct ipfw_track *t) 1954 { 1955 struct ipfw_trkcnt *trk; 1956 boolean_t trk_freed = FALSE; 1957 1958 KASSERT(t->t_count != NULL, ("track anchor")); 1959 KASSERT(LIST_EMPTY(&t->t_state_list), 1960 ("invalid track is still referenced")); 1961 1962 trk = t->t_trkcnt; 1963 KASSERT(trk != NULL, ("track has no trkcnt")); 1964 1965 RB_REMOVE(ipfw_track_tree, &ctx->ipfw_track_tree, t); 1966 TAILQ_REMOVE(&ctx->ipfw_track_list, t, t_link); 1967 kfree(t, M_IPFW); 1968 1969 /* 1970 * fdrop() style reference counting. 1971 * See kern/kern_descrip.c fdrop(). 1972 */ 1973 for (;;) { 1974 int refs = trk->tc_refs; 1975 1976 cpu_ccfence(); 1977 KASSERT(refs > 0, ("invalid trkcnt refs %d", refs)); 1978 if (refs == 1) { 1979 IPFW_TRKCNT_TOKGET; 1980 if (atomic_cmpset_int(&trk->tc_refs, refs, 0)) { 1981 KASSERT(trk->tc_count == 0, 1982 ("%d states reference this trkcnt", 1983 trk->tc_count)); 1984 RB_REMOVE(ipfw_trkcnt_tree, 1985 &ipfw_gd.ipfw_trkcnt_tree, trk); 1986 1987 KASSERT(ipfw_gd.ipfw_trkcnt_cnt > 0, 1988 ("invalid trkcnt cnt %d", 1989 ipfw_gd.ipfw_trkcnt_cnt)); 1990 ipfw_gd.ipfw_trkcnt_cnt--; 1991 IPFW_TRKCNT_TOKREL; 1992 1993 if (ctx->ipfw_trkcnt_spare == NULL) 1994 ctx->ipfw_trkcnt_spare = trk; 1995 else 1996 kfree(trk, M_IPFW); 1997 trk_freed = TRUE; 1998 break; /* done! */ 1999 } 2000 IPFW_TRKCNT_TOKREL; 2001 /* retry */ 2002 } else if (atomic_cmpset_int(&trk->tc_refs, refs, refs - 1)) { 2003 break; /* done! */ 2004 } 2005 /* retry */ 2006 } 2007 return (trk_freed); 2008 } 2009 2010 static void 2011 ipfw_track_flush(struct ipfw_context *ctx, struct ip_fw *rule) 2012 { 2013 struct ipfw_track *t, *tn; 2014 2015 TAILQ_FOREACH_MUTABLE(t, &ctx->ipfw_track_list, t_link, tn) { 2016 if (t->t_count == NULL) /* anchor */ 2017 continue; 2018 if (rule != NULL && t->t_rule != rule) 2019 continue; 2020 ipfw_track_free(ctx, t); 2021 } 2022 } 2023 2024 static boolean_t 2025 ipfw_track_state_expire(struct ipfw_context *ctx, struct ipfw_track *t, 2026 boolean_t reap) 2027 { 2028 struct ipfw_state *s, *sn; 2029 boolean_t ret = FALSE; 2030 2031 KASSERT(t->t_count != NULL, ("track anchor")); 2032 2033 if (LIST_EMPTY(&t->t_state_list)) 2034 return (FALSE); 2035 2036 /* 2037 * Do not expire more than once per second, it is useless. 2038 */ 2039 if (t->t_lastexp == time_uptime) 2040 return (FALSE); 2041 t->t_lastexp = time_uptime; 2042 2043 LIST_FOREACH_MUTABLE(s, &t->t_state_list, st_trklink, sn) { 2044 if (TIME_LEQ(s->st_expire, time_uptime) || 2045 (reap && IPFW_STATE_TCPCLOSED(s))) { 2046 KASSERT(s->st_track == t, 2047 ("state track %p does not match %p", 2048 s->st_track, t)); 2049 ipfw_state_del(ctx, s); 2050 ret = TRUE; 2051 } 2052 } 2053 return (ret); 2054 } 2055 2056 static __inline struct ipfw_trkcnt * 2057 ipfw_trkcnt_alloc(struct ipfw_context *ctx) 2058 { 2059 struct ipfw_trkcnt *trk; 2060 2061 if (ctx->ipfw_trkcnt_spare != NULL) { 2062 trk = ctx->ipfw_trkcnt_spare; 2063 ctx->ipfw_trkcnt_spare = NULL; 2064 } else { 2065 trk = kmalloc_cachealign(sizeof(*trk), M_IPFW, 2066 M_INTWAIT | M_NULLOK); 2067 } 2068 return (trk); 2069 } 2070 2071 static void 2072 ipfw_track_expire_done(struct ipfw_context *ctx) 2073 { 2074 2075 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2076 ("trackexp is not in progress")); 2077 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKEXP; 2078 callout_reset(&ctx->ipfw_trackto_ch, hz, 2079 ipfw_track_expire_ipifunc, NULL); 2080 } 2081 2082 static void 2083 ipfw_track_expire_more(struct ipfw_context *ctx) 2084 { 2085 struct netmsg_base *nm = &ctx->ipfw_trackexp_more; 2086 2087 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2088 ("trackexp is not in progress")); 2089 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 2090 ("trackexp more did not finish")); 2091 netisr_sendmsg_oncpu(nm); 2092 } 2093 2094 static int 2095 ipfw_track_expire_loop(struct ipfw_context *ctx, struct ipfw_track *anchor, 2096 int scan_max, int expire_max) 2097 { 2098 struct ipfw_track *t; 2099 int scanned = 0, expired = 0; 2100 boolean_t reap = FALSE; 2101 2102 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2103 ("trackexp is not in progress")); 2104 2105 if (ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) 2106 reap = TRUE; 2107 2108 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) { 2109 if (scanned++ >= scan_max) { 2110 ipfw_track_expire_more(ctx); 2111 return (expired); 2112 } 2113 2114 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2115 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link); 2116 2117 if (t->t_count == NULL) /* anchor */ 2118 continue; 2119 2120 ipfw_track_state_expire(ctx, t, reap); 2121 if (!LIST_EMPTY(&t->t_state_list)) { 2122 /* There are states referencing this track. */ 2123 continue; 2124 } 2125 2126 if (TIME_LEQ(t->t_expire, time_uptime) || reap) { 2127 /* Expired. */ 2128 if (ipfw_track_free(ctx, t)) { 2129 if (++expired >= expire_max) { 2130 ipfw_track_expire_more(ctx); 2131 return (expired); 2132 } 2133 } 2134 } 2135 } 2136 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2137 ipfw_track_expire_done(ctx); 2138 return (expired); 2139 } 2140 2141 static int 2142 ipfw_track_expire_start(struct ipfw_context *ctx, int scan_max, int expire_max) 2143 { 2144 struct ipfw_track *anchor; 2145 2146 KASSERT((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0, 2147 ("trackexp is in progress")); 2148 ctx->ipfw_flags |= IPFW_FLAG_TRACKEXP; 2149 2150 if (RB_EMPTY(&ctx->ipfw_track_tree)) { 2151 ipfw_track_expire_done(ctx); 2152 return (0); 2153 } 2154 2155 /* 2156 * Do not expire more than once per second, it is useless. 2157 */ 2158 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKREAP) == 0 && 2159 ctx->ipfw_track_lastexp == time_uptime) { 2160 ipfw_track_expire_done(ctx); 2161 return (0); 2162 } 2163 ctx->ipfw_track_lastexp = time_uptime; 2164 2165 anchor = &ctx->ipfw_trackexp_anch; 2166 TAILQ_INSERT_HEAD(&ctx->ipfw_track_list, anchor, t_link); 2167 return (ipfw_track_expire_loop(ctx, anchor, scan_max, expire_max)); 2168 } 2169 2170 static void 2171 ipfw_track_expire_more_dispatch(netmsg_t nm) 2172 { 2173 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2174 struct ipfw_track *anchor; 2175 2176 ASSERT_NETISR_NCPUS(mycpuid); 2177 KASSERT(ctx->ipfw_flags & IPFW_FLAG_TRACKEXP, 2178 ("trackexp is not in progress")); 2179 2180 /* Reply ASAP */ 2181 netisr_replymsg(&nm->base, 0); 2182 2183 anchor = &ctx->ipfw_trackexp_anch; 2184 if (RB_EMPTY(&ctx->ipfw_track_tree)) { 2185 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2186 ipfw_track_expire_done(ctx); 2187 return; 2188 } 2189 ipfw_track_expire_loop(ctx, anchor, 2190 ipfw_track_scan_max, ipfw_track_expire_max); 2191 } 2192 2193 static void 2194 ipfw_track_expire_dispatch(netmsg_t nm) 2195 { 2196 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2197 2198 ASSERT_NETISR_NCPUS(mycpuid); 2199 2200 /* Reply ASAP */ 2201 crit_enter(); 2202 netisr_replymsg(&nm->base, 0); 2203 crit_exit(); 2204 2205 if (ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) { 2206 /* Running; done. */ 2207 return; 2208 } 2209 ipfw_track_expire_start(ctx, 2210 ipfw_track_scan_max, ipfw_track_expire_max); 2211 } 2212 2213 static void 2214 ipfw_track_expire_ipifunc(void *dummy __unused) 2215 { 2216 struct netmsg_base *msg; 2217 2218 KKASSERT(mycpuid < netisr_ncpus); 2219 msg = &ipfw_ctx[mycpuid]->ipfw_trackexp_nm; 2220 2221 crit_enter(); 2222 if (msg->lmsg.ms_flags & MSGF_DONE) 2223 netisr_sendmsg_oncpu(msg); 2224 crit_exit(); 2225 } 2226 2227 static int 2228 ipfw_track_reap(struct ipfw_context *ctx) 2229 { 2230 struct ipfw_track *t, *anchor; 2231 int expired; 2232 2233 if ((ctx->ipfw_flags & IPFW_FLAG_TRACKEXP) == 0) { 2234 /* 2235 * Kick start track expiring. Ignore scan limit, 2236 * we are short of tracks. 2237 */ 2238 ctx->ipfw_flags |= IPFW_FLAG_TRACKREAP; 2239 expired = ipfw_track_expire_start(ctx, INT_MAX, 2240 ipfw_track_reap_max); 2241 ctx->ipfw_flags &= ~IPFW_FLAG_TRACKREAP; 2242 return (expired); 2243 } 2244 2245 /* 2246 * Tracks are being expired. 2247 */ 2248 2249 if (RB_EMPTY(&ctx->ipfw_track_tree)) 2250 return (0); 2251 2252 expired = 0; 2253 anchor = &ctx->ipfw_trackexp_anch; 2254 while ((t = TAILQ_NEXT(anchor, t_link)) != NULL) { 2255 /* 2256 * Ignore scan limit; we are short of tracks. 2257 */ 2258 2259 TAILQ_REMOVE(&ctx->ipfw_track_list, anchor, t_link); 2260 TAILQ_INSERT_AFTER(&ctx->ipfw_track_list, t, anchor, t_link); 2261 2262 if (t->t_count == NULL) /* anchor */ 2263 continue; 2264 2265 ipfw_track_state_expire(ctx, t, TRUE); 2266 if (!LIST_EMPTY(&t->t_state_list)) { 2267 /* There are states referencing this track. */ 2268 continue; 2269 } 2270 2271 if (ipfw_track_free(ctx, t)) { 2272 if (++expired >= ipfw_track_reap_max) { 2273 ipfw_track_expire_more(ctx); 2274 break; 2275 } 2276 } 2277 } 2278 /* 2279 * NOTE: 2280 * Leave the anchor on the list, even if the end of the list has 2281 * been reached. ipfw_track_expire_more_dispatch() will handle 2282 * the removal. 2283 */ 2284 return (expired); 2285 } 2286 2287 static struct ipfw_track * 2288 ipfw_track_alloc(struct ipfw_context *ctx, const struct ipfw_flow_id *id, 2289 uint16_t limit_mask, struct ip_fw *rule) 2290 { 2291 struct ipfw_track *key, *t, *dup; 2292 struct ipfw_trkcnt *trk, *ret; 2293 boolean_t do_expire = FALSE; 2294 2295 KASSERT(rule->track_ruleid != 0, 2296 ("rule %u has no track ruleid", rule->rulenum)); 2297 2298 key = &ctx->ipfw_track_tmpkey; 2299 key->t_proto = id->proto; 2300 key->t_addrs = 0; 2301 key->t_ports = 0; 2302 key->t_rule = rule; 2303 if (limit_mask & DYN_SRC_ADDR) 2304 key->t_saddr = id->src_ip; 2305 if (limit_mask & DYN_DST_ADDR) 2306 key->t_daddr = id->dst_ip; 2307 if (limit_mask & DYN_SRC_PORT) 2308 key->t_sport = id->src_port; 2309 if (limit_mask & DYN_DST_PORT) 2310 key->t_dport = id->dst_port; 2311 2312 t = RB_FIND(ipfw_track_tree, &ctx->ipfw_track_tree, key); 2313 if (t != NULL) 2314 goto done; 2315 2316 t = kmalloc(sizeof(*t), M_IPFW, M_INTWAIT | M_NULLOK); 2317 if (t == NULL) { 2318 ctx->ipfw_tks_nomem++; 2319 return (NULL); 2320 } 2321 2322 t->t_key = key->t_key; 2323 t->t_rule = rule; 2324 t->t_lastexp = 0; 2325 LIST_INIT(&t->t_state_list); 2326 2327 if (ipfw_gd.ipfw_trkcnt_cnt >= ipfw_track_max) { 2328 time_t globexp, uptime; 2329 2330 trk = NULL; 2331 do_expire = TRUE; 2332 2333 /* 2334 * Do not expire globally more than once per second, 2335 * it is useless. 2336 */ 2337 uptime = time_uptime; 2338 globexp = ipfw_gd.ipfw_track_globexp; 2339 if (globexp != uptime && 2340 atomic_cmpset_long(&ipfw_gd.ipfw_track_globexp, 2341 globexp, uptime)) { 2342 int cpu; 2343 2344 /* Expire tracks on other CPUs. */ 2345 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 2346 if (cpu == mycpuid) 2347 continue; 2348 lwkt_send_ipiq(globaldata_find(cpu), 2349 ipfw_track_expire_ipifunc, NULL); 2350 } 2351 } 2352 } else { 2353 trk = ipfw_trkcnt_alloc(ctx); 2354 } 2355 if (trk == NULL) { 2356 struct ipfw_trkcnt *tkey; 2357 2358 tkey = &ctx->ipfw_trkcnt_tmpkey; 2359 key = NULL; /* tkey overlaps key */ 2360 2361 tkey->tc_key = t->t_key; 2362 tkey->tc_ruleid = rule->track_ruleid; 2363 2364 IPFW_TRKCNT_TOKGET; 2365 trk = RB_FIND(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree, 2366 tkey); 2367 if (trk == NULL) { 2368 IPFW_TRKCNT_TOKREL; 2369 if (do_expire) { 2370 ctx->ipfw_tks_reap++; 2371 if (ipfw_track_reap(ctx) > 0) { 2372 if (ipfw_gd.ipfw_trkcnt_cnt < 2373 ipfw_track_max) { 2374 trk = ipfw_trkcnt_alloc(ctx); 2375 if (trk != NULL) 2376 goto install; 2377 ctx->ipfw_tks_cntnomem++; 2378 } else { 2379 ctx->ipfw_tks_overflow++; 2380 } 2381 } else { 2382 ctx->ipfw_tks_reapfailed++; 2383 ctx->ipfw_tks_overflow++; 2384 } 2385 } else { 2386 ctx->ipfw_tks_cntnomem++; 2387 } 2388 kfree(t, M_IPFW); 2389 return (NULL); 2390 } 2391 KASSERT(trk->tc_refs > 0 && trk->tc_refs < netisr_ncpus, 2392 ("invalid trkcnt refs %d", trk->tc_refs)); 2393 atomic_add_int(&trk->tc_refs, 1); 2394 IPFW_TRKCNT_TOKREL; 2395 } else { 2396 install: 2397 trk->tc_key = t->t_key; 2398 trk->tc_ruleid = rule->track_ruleid; 2399 trk->tc_refs = 0; 2400 trk->tc_count = 0; 2401 trk->tc_expire = 0; 2402 trk->tc_rulenum = rule->rulenum; 2403 2404 IPFW_TRKCNT_TOKGET; 2405 ret = RB_INSERT(ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree, 2406 trk); 2407 if (ret != NULL) { 2408 KASSERT(ret->tc_refs > 0 && 2409 ret->tc_refs < netisr_ncpus, 2410 ("invalid trkcnt refs %d", ret->tc_refs)); 2411 KASSERT(ctx->ipfw_trkcnt_spare == NULL, 2412 ("trkcnt spare was installed")); 2413 ctx->ipfw_trkcnt_spare = trk; 2414 trk = ret; 2415 } else { 2416 ipfw_gd.ipfw_trkcnt_cnt++; 2417 } 2418 atomic_add_int(&trk->tc_refs, 1); 2419 IPFW_TRKCNT_TOKREL; 2420 } 2421 t->t_count = &trk->tc_count; 2422 t->t_trkcnt = trk; 2423 2424 dup = RB_INSERT(ipfw_track_tree, &ctx->ipfw_track_tree, t); 2425 if (dup != NULL) 2426 panic("ipfw: track exists"); 2427 TAILQ_INSERT_TAIL(&ctx->ipfw_track_list, t, t_link); 2428 done: 2429 t->t_expire = time_uptime + dyn_short_lifetime; 2430 return (t); 2431 } 2432 2433 /* 2434 * Install state for rule type cmd->o.opcode 2435 * 2436 * Returns 1 (failure) if state is not installed because of errors or because 2437 * states limitations are enforced. 2438 */ 2439 static int 2440 ipfw_state_install(struct ipfw_context *ctx, struct ip_fw *rule, 2441 ipfw_insn_limit *cmd, struct ip_fw_args *args, const struct tcphdr *tcp) 2442 { 2443 struct ipfw_state *s; 2444 struct ipfw_track *t; 2445 int count, diff; 2446 2447 if (ipfw_gd.ipfw_state_loosecnt >= ipfw_state_max && 2448 (diff = (ipfw_state_cntsync() - ipfw_state_max)) >= 0) { 2449 boolean_t overflow = TRUE; 2450 2451 ctx->ipfw_sts_reap++; 2452 if (ipfw_state_reap(ctx, diff) == 0) 2453 ctx->ipfw_sts_reapfailed++; 2454 if (ipfw_state_cntsync() < ipfw_state_max) 2455 overflow = FALSE; 2456 2457 if (overflow) { 2458 time_t globexp, uptime; 2459 int cpu; 2460 2461 /* 2462 * Do not expire globally more than once per second, 2463 * it is useless. 2464 */ 2465 uptime = time_uptime; 2466 globexp = ipfw_gd.ipfw_state_globexp; 2467 if (globexp == uptime || 2468 !atomic_cmpset_long(&ipfw_gd.ipfw_state_globexp, 2469 globexp, uptime)) { 2470 ctx->ipfw_sts_overflow++; 2471 return (1); 2472 } 2473 2474 /* Expire states on other CPUs. */ 2475 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 2476 if (cpu == mycpuid) 2477 continue; 2478 lwkt_send_ipiq(globaldata_find(cpu), 2479 ipfw_state_expire_ipifunc, NULL); 2480 } 2481 ctx->ipfw_sts_overflow++; 2482 return (1); 2483 } 2484 } 2485 2486 switch (cmd->o.opcode) { 2487 case O_KEEP_STATE: /* bidir rule */ 2488 s = ipfw_state_add(ctx, &args->f_id, O_KEEP_STATE, rule, NULL, 2489 tcp); 2490 if (s == NULL) 2491 return (1); 2492 break; 2493 2494 case O_LIMIT: /* limit number of sessions */ 2495 t = ipfw_track_alloc(ctx, &args->f_id, cmd->limit_mask, rule); 2496 if (t == NULL) 2497 return (1); 2498 2499 if (*t->t_count >= cmd->conn_limit) { 2500 if (!ipfw_track_state_expire(ctx, t, TRUE)) 2501 return (1); 2502 } 2503 for (;;) { 2504 count = *t->t_count; 2505 if (count >= cmd->conn_limit) 2506 return (1); 2507 if (atomic_cmpset_int(t->t_count, count, count + 1)) 2508 break; 2509 } 2510 2511 s = ipfw_state_add(ctx, &args->f_id, O_LIMIT, rule, t, tcp); 2512 if (s == NULL) { 2513 /* Undo damage. */ 2514 atomic_subtract_int(t->t_count, 1); 2515 return (1); 2516 } 2517 break; 2518 2519 default: 2520 panic("unknown state type %u\n", cmd->o.opcode); 2521 } 2522 return (0); 2523 } 2524 2525 static int 2526 ipfw_table_lookup(struct ipfw_context *ctx, uint16_t tableid, 2527 const struct in_addr *in) 2528 { 2529 struct radix_node_head *rnh; 2530 struct sockaddr_in sin; 2531 struct ipfw_tblent *te; 2532 2533 KASSERT(tableid < ipfw_table_max, ("invalid tableid %u", tableid)); 2534 rnh = ctx->ipfw_tables[tableid]; 2535 if (rnh == NULL) 2536 return (0); /* no match */ 2537 2538 memset(&sin, 0, sizeof(sin)); 2539 sin.sin_family = AF_INET; 2540 sin.sin_len = sizeof(sin); 2541 sin.sin_addr = *in; 2542 2543 te = (struct ipfw_tblent *)rnh->rnh_matchaddr((char *)&sin, rnh); 2544 if (te == NULL) 2545 return (0); /* no match */ 2546 2547 te->te_use++; 2548 te->te_lastuse = time_second; 2549 return (1); /* match */ 2550 } 2551 2552 /* 2553 * Transmit a TCP packet, containing either a RST or a keepalive. 2554 * When flags & TH_RST, we are sending a RST packet, because of a 2555 * "reset" action matched the packet. 2556 * Otherwise we are sending a keepalive, and flags & TH_ 2557 * 2558 * Only {src,dst}_{ip,port} of "id" are used. 2559 */ 2560 static void 2561 send_pkt(const struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags) 2562 { 2563 struct mbuf *m; 2564 struct ip *ip; 2565 struct tcphdr *tcp; 2566 struct route sro; /* fake route */ 2567 2568 MGETHDR(m, M_NOWAIT, MT_HEADER); 2569 if (m == NULL) 2570 return; 2571 m->m_pkthdr.rcvif = NULL; 2572 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); 2573 m->m_data += max_linkhdr; 2574 2575 ip = mtod(m, struct ip *); 2576 bzero(ip, m->m_len); 2577 tcp = (struct tcphdr *)(ip + 1); /* no IP options */ 2578 ip->ip_p = IPPROTO_TCP; 2579 tcp->th_off = 5; 2580 2581 /* 2582 * Assume we are sending a RST (or a keepalive in the reverse 2583 * direction), swap src and destination addresses and ports. 2584 */ 2585 ip->ip_src.s_addr = htonl(id->dst_ip); 2586 ip->ip_dst.s_addr = htonl(id->src_ip); 2587 tcp->th_sport = htons(id->dst_port); 2588 tcp->th_dport = htons(id->src_port); 2589 if (flags & TH_RST) { /* we are sending a RST */ 2590 if (flags & TH_ACK) { 2591 tcp->th_seq = htonl(ack); 2592 tcp->th_ack = htonl(0); 2593 tcp->th_flags = TH_RST; 2594 } else { 2595 if (flags & TH_SYN) 2596 seq++; 2597 tcp->th_seq = htonl(0); 2598 tcp->th_ack = htonl(seq); 2599 tcp->th_flags = TH_RST | TH_ACK; 2600 } 2601 } else { 2602 /* 2603 * We are sending a keepalive. flags & TH_SYN determines 2604 * the direction, forward if set, reverse if clear. 2605 * NOTE: seq and ack are always assumed to be correct 2606 * as set by the caller. This may be confusing... 2607 */ 2608 if (flags & TH_SYN) { 2609 /* 2610 * we have to rewrite the correct addresses! 2611 */ 2612 ip->ip_dst.s_addr = htonl(id->dst_ip); 2613 ip->ip_src.s_addr = htonl(id->src_ip); 2614 tcp->th_dport = htons(id->dst_port); 2615 tcp->th_sport = htons(id->src_port); 2616 } 2617 tcp->th_seq = htonl(seq); 2618 tcp->th_ack = htonl(ack); 2619 tcp->th_flags = TH_ACK; 2620 } 2621 2622 /* 2623 * set ip_len to the payload size so we can compute 2624 * the tcp checksum on the pseudoheader 2625 * XXX check this, could save a couple of words ? 2626 */ 2627 ip->ip_len = htons(sizeof(struct tcphdr)); 2628 tcp->th_sum = in_cksum(m, m->m_pkthdr.len); 2629 2630 /* 2631 * now fill fields left out earlier 2632 */ 2633 ip->ip_ttl = ip_defttl; 2634 ip->ip_len = m->m_pkthdr.len; 2635 2636 bzero(&sro, sizeof(sro)); 2637 ip_rtaddr(ip->ip_dst, &sro); 2638 2639 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED; 2640 ip_output(m, NULL, &sro, 0, NULL, NULL); 2641 if (sro.ro_rt) 2642 RTFREE(sro.ro_rt); 2643 } 2644 2645 /* 2646 * Send a reject message, consuming the mbuf passed as an argument. 2647 */ 2648 static void 2649 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) 2650 { 2651 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 2652 /* We need the IP header in host order for icmp_error(). */ 2653 if (args->eh != NULL) { 2654 struct ip *ip = mtod(args->m, struct ip *); 2655 2656 ip->ip_len = ntohs(ip->ip_len); 2657 ip->ip_off = ntohs(ip->ip_off); 2658 } 2659 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 2660 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { 2661 struct tcphdr *const tcp = 2662 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 2663 2664 if ((tcp->th_flags & TH_RST) == 0) { 2665 send_pkt(&args->f_id, ntohl(tcp->th_seq), 2666 ntohl(tcp->th_ack), tcp->th_flags | TH_RST); 2667 } 2668 m_freem(args->m); 2669 } else { 2670 m_freem(args->m); 2671 } 2672 args->m = NULL; 2673 } 2674 2675 /* 2676 * Given an ip_fw *, lookup_next_rule will return a pointer 2677 * to the next rule, which can be either the jump 2678 * target (for skipto instructions) or the next one in the list (in 2679 * all other cases including a missing jump target). 2680 * The result is also written in the "next_rule" field of the rule. 2681 * Backward jumps are not allowed, so start looking from the next 2682 * rule... 2683 * 2684 * This never returns NULL -- in case we do not have an exact match, 2685 * the next rule is returned. When the ruleset is changed, 2686 * pointers are flushed so we are always correct. 2687 */ 2688 static struct ip_fw * 2689 lookup_next_rule(struct ip_fw *me) 2690 { 2691 struct ip_fw *rule = NULL; 2692 ipfw_insn *cmd; 2693 2694 /* look for action, in case it is a skipto */ 2695 cmd = ACTION_PTR(me); 2696 if (cmd->opcode == O_LOG) 2697 cmd += F_LEN(cmd); 2698 if (cmd->opcode == O_SKIPTO) { 2699 for (rule = me->next; rule; rule = rule->next) { 2700 if (rule->rulenum >= cmd->arg1) 2701 break; 2702 } 2703 } 2704 if (rule == NULL) /* failure or not a skipto */ 2705 rule = me->next; 2706 me->next_rule = rule; 2707 return rule; 2708 } 2709 2710 static int 2711 ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, 2712 enum ipfw_opcodes opcode, uid_t uid) 2713 { 2714 struct in_addr src_ip, dst_ip; 2715 struct inpcbinfo *pi; 2716 boolean_t wildcard; 2717 struct inpcb *pcb; 2718 2719 if (fid->proto == IPPROTO_TCP) { 2720 wildcard = FALSE; 2721 pi = &tcbinfo[mycpuid]; 2722 } else if (fid->proto == IPPROTO_UDP) { 2723 wildcard = TRUE; 2724 pi = &udbinfo[mycpuid]; 2725 } else { 2726 return 0; 2727 } 2728 2729 /* 2730 * Values in 'fid' are in host byte order 2731 */ 2732 dst_ip.s_addr = htonl(fid->dst_ip); 2733 src_ip.s_addr = htonl(fid->src_ip); 2734 if (oif) { 2735 pcb = in_pcblookup_hash(pi, 2736 dst_ip, htons(fid->dst_port), 2737 src_ip, htons(fid->src_port), 2738 wildcard, oif); 2739 } else { 2740 pcb = in_pcblookup_hash(pi, 2741 src_ip, htons(fid->src_port), 2742 dst_ip, htons(fid->dst_port), 2743 wildcard, NULL); 2744 } 2745 if (pcb == NULL || pcb->inp_socket == NULL) 2746 return 0; 2747 2748 if (opcode == O_UID) { 2749 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b)) 2750 return !socheckuid(pcb->inp_socket, uid); 2751 #undef socheckuid 2752 } else { 2753 return groupmember(uid, pcb->inp_socket->so_cred); 2754 } 2755 } 2756 2757 static __inline int 2758 ipfw_match_ifip(ipfw_insn_ifip *cmd, const struct in_addr *ip) 2759 { 2760 2761 if (__predict_false((cmd->o.arg1 & IPFW_IFIP_VALID) == 0)) { 2762 struct ifaddr_container *ifac; 2763 struct ifnet *ifp; 2764 2765 ifp = ifunit_netisr(cmd->ifname); 2766 if (ifp == NULL) 2767 return (0); 2768 2769 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2770 struct ifaddr *ia = ifac->ifa; 2771 2772 if (ia->ifa_addr == NULL) 2773 continue; 2774 if (ia->ifa_addr->sa_family != AF_INET) 2775 continue; 2776 2777 cmd->mask.s_addr = INADDR_ANY; 2778 if (cmd->o.arg1 & IPFW_IFIP_NET) { 2779 cmd->mask = ((struct sockaddr_in *) 2780 ia->ifa_netmask)->sin_addr; 2781 } 2782 if (cmd->mask.s_addr == INADDR_ANY) 2783 cmd->mask.s_addr = INADDR_BROADCAST; 2784 2785 cmd->addr = 2786 ((struct sockaddr_in *)ia->ifa_addr)->sin_addr; 2787 cmd->addr.s_addr &= cmd->mask.s_addr; 2788 2789 cmd->o.arg1 |= IPFW_IFIP_VALID; 2790 break; 2791 } 2792 if ((cmd->o.arg1 & IPFW_IFIP_VALID) == 0) 2793 return (0); 2794 } 2795 return ((ip->s_addr & cmd->mask.s_addr) == cmd->addr.s_addr); 2796 } 2797 2798 static __inline struct mbuf * 2799 ipfw_setup_local(struct mbuf *m, const int hlen, struct ip_fw_args *args, 2800 struct ip_fw_local *local, struct ip **ip0) 2801 { 2802 struct ip *ip = mtod(m, struct ip *); 2803 struct tcphdr *tcp; 2804 struct udphdr *udp; 2805 2806 /* 2807 * Collect parameters into local variables for faster matching. 2808 */ 2809 if (hlen == 0) { /* do not grab addresses for non-ip pkts */ 2810 local->proto = args->f_id.proto = 0; /* mark f_id invalid */ 2811 goto done; 2812 } 2813 2814 local->proto = args->f_id.proto = ip->ip_p; 2815 local->src_ip = ip->ip_src; 2816 local->dst_ip = ip->ip_dst; 2817 if (args->eh != NULL) { /* layer 2 packets are as on the wire */ 2818 local->offset = ntohs(ip->ip_off) & IP_OFFMASK; 2819 local->ip_len = ntohs(ip->ip_len); 2820 } else { 2821 local->offset = ip->ip_off & IP_OFFMASK; 2822 local->ip_len = ip->ip_len; 2823 } 2824 2825 #define PULLUP_TO(len) \ 2826 do { \ 2827 if (m->m_len < (len)) { \ 2828 args->m = m = m_pullup(m, (len)); \ 2829 if (m == NULL) { \ 2830 ip = NULL; \ 2831 goto done; \ 2832 } \ 2833 ip = mtod(m, struct ip *); \ 2834 } \ 2835 } while (0) 2836 2837 if (local->offset == 0) { 2838 switch (local->proto) { 2839 case IPPROTO_TCP: 2840 PULLUP_TO(hlen + sizeof(struct tcphdr)); 2841 tcp = L3HDR(struct tcphdr, ip); 2842 local->dst_port = tcp->th_dport; 2843 local->src_port = tcp->th_sport; 2844 args->f_id.flags = tcp->th_flags; 2845 break; 2846 2847 case IPPROTO_UDP: 2848 PULLUP_TO(hlen + sizeof(struct udphdr)); 2849 udp = L3HDR(struct udphdr, ip); 2850 local->dst_port = udp->uh_dport; 2851 local->src_port = udp->uh_sport; 2852 break; 2853 2854 case IPPROTO_ICMP: 2855 PULLUP_TO(hlen + 4); /* type, code and checksum. */ 2856 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; 2857 break; 2858 2859 default: 2860 break; 2861 } 2862 } 2863 2864 #undef PULLUP_TO 2865 2866 args->f_id.src_ip = ntohl(local->src_ip.s_addr); 2867 args->f_id.dst_ip = ntohl(local->dst_ip.s_addr); 2868 args->f_id.src_port = local->src_port = ntohs(local->src_port); 2869 args->f_id.dst_port = local->dst_port = ntohs(local->dst_port); 2870 done: 2871 *ip0 = ip; 2872 return (m); 2873 } 2874 2875 /* 2876 * The main check routine for the firewall. 2877 * 2878 * All arguments are in args so we can modify them and return them 2879 * back to the caller. 2880 * 2881 * Parameters: 2882 * 2883 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 2884 * Starts with the IP header. 2885 * args->eh (in) Mac header if present, or NULL for layer3 packet. 2886 * args->oif Outgoing interface, or NULL if packet is incoming. 2887 * The incoming interface is in the mbuf. (in) 2888 * 2889 * args->rule Pointer to the last matching rule (in/out) 2890 * args->f_id Addresses grabbed from the packet (out) 2891 * 2892 * Return value: 2893 * 2894 * If the packet was denied/rejected and has been dropped, *m is equal 2895 * to NULL upon return. 2896 * 2897 * IP_FW_DENY the packet must be dropped. 2898 * IP_FW_PASS The packet is to be accepted and routed normally. 2899 * IP_FW_DIVERT Divert the packet to port (args->cookie) 2900 * IP_FW_TEE Tee the packet to port (args->cookie) 2901 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie) 2902 * IP_FW_CONTINUE Continue processing on another cpu. 2903 */ 2904 static int 2905 ipfw_chk(struct ip_fw_args *args) 2906 { 2907 /* 2908 * Local variables hold state during the processing of a packet. 2909 * 2910 * IMPORTANT NOTE: to speed up the processing of rules, there 2911 * are some assumption on the values of the variables, which 2912 * are documented here. Should you change them, please check 2913 * the implementation of the various instructions to make sure 2914 * that they still work. 2915 * 2916 * args->eh The MAC header. It is non-null for a layer2 2917 * packet, it is NULL for a layer-3 packet. 2918 * 2919 * m | args->m Pointer to the mbuf, as received from the caller. 2920 * It may change if ipfw_chk() does an m_pullup, or if it 2921 * consumes the packet because it calls send_reject(). 2922 * XXX This has to change, so that ipfw_chk() never modifies 2923 * or consumes the buffer. 2924 * ip is simply an alias of the value of m, and it is kept 2925 * in sync with it (the packet is supposed to start with 2926 * the ip header). 2927 */ 2928 struct mbuf *m = args->m; 2929 struct ip *ip = mtod(m, struct ip *); 2930 2931 /* 2932 * oif | args->oif If NULL, ipfw_chk has been called on the 2933 * inbound path (ether_input, ip_input). 2934 * If non-NULL, ipfw_chk has been called on the outbound path 2935 * (ether_output, ip_output). 2936 */ 2937 struct ifnet *oif = args->oif; 2938 2939 struct ip_fw *f = NULL; /* matching rule */ 2940 int retval = IP_FW_PASS; 2941 struct m_tag *mtag; 2942 struct divert_info *divinfo; 2943 2944 /* 2945 * hlen The length of the IPv4 header. 2946 * hlen >0 means we have an IPv4 packet. 2947 */ 2948 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 2949 2950 struct ip_fw_local lc; 2951 2952 /* 2953 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 2954 * MATCH_NONE when checked and not matched (dyn_f = NULL), 2955 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL) 2956 */ 2957 int dyn_dir = MATCH_UNKNOWN; 2958 struct ip_fw *dyn_f = NULL; 2959 int cpuid = mycpuid; 2960 struct ipfw_context *ctx; 2961 2962 ASSERT_NETISR_NCPUS(cpuid); 2963 ctx = ipfw_ctx[cpuid]; 2964 2965 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED) 2966 return IP_FW_PASS; /* accept */ 2967 2968 if (args->eh == NULL || /* layer 3 packet */ 2969 (m->m_pkthdr.len >= sizeof(struct ip) && 2970 ntohs(args->eh->ether_type) == ETHERTYPE_IP)) 2971 hlen = ip->ip_hl << 2; 2972 2973 memset(&lc, 0, sizeof(lc)); 2974 2975 m = ipfw_setup_local(m, hlen, args, &lc, &ip); 2976 if (m == NULL) 2977 goto pullup_failed; 2978 2979 if (args->rule) { 2980 /* 2981 * Packet has already been tagged. Look for the next rule 2982 * to restart processing. 2983 * 2984 * If fw_one_pass != 0 then just accept it. 2985 * XXX should not happen here, but optimized out in 2986 * the caller. 2987 */ 2988 if (fw_one_pass && !args->cont) 2989 return IP_FW_PASS; 2990 args->cont = 0; 2991 2992 /* This rule is being/has been flushed */ 2993 if (ipfw_flushing) 2994 return IP_FW_DENY; 2995 2996 KASSERT(args->rule->cpuid == cpuid, 2997 ("rule used on cpu%d", cpuid)); 2998 2999 /* This rule was deleted */ 3000 if (args->rule->rule_flags & IPFW_RULE_F_INVALID) 3001 return IP_FW_DENY; 3002 3003 f = args->rule->next_rule; 3004 if (f == NULL) 3005 f = lookup_next_rule(args->rule); 3006 } else { 3007 /* 3008 * Find the starting rule. It can be either the first 3009 * one, or the one after divert_rule if asked so. 3010 */ 3011 int skipto; 3012 3013 KKASSERT(!args->cont); 3014 3015 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL); 3016 if (mtag != NULL) { 3017 divinfo = m_tag_data(mtag); 3018 skipto = divinfo->skipto; 3019 } else { 3020 skipto = 0; 3021 } 3022 3023 f = ctx->ipfw_layer3_chain; 3024 if (args->eh == NULL && skipto != 0) { 3025 /* No skipto during rule flushing */ 3026 if (ipfw_flushing) 3027 return IP_FW_DENY; 3028 3029 if (skipto >= IPFW_DEFAULT_RULE) 3030 return IP_FW_DENY; /* invalid */ 3031 3032 while (f && f->rulenum <= skipto) 3033 f = f->next; 3034 if (f == NULL) /* drop packet */ 3035 return IP_FW_DENY; 3036 } else if (ipfw_flushing) { 3037 /* Rules are being flushed; skip to default rule */ 3038 f = ctx->ipfw_default_rule; 3039 } 3040 } 3041 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL) 3042 m_tag_delete(m, mtag); 3043 3044 /* 3045 * Now scan the rules, and parse microinstructions for each rule. 3046 */ 3047 for (; f; f = f->next) { 3048 int l, cmdlen; 3049 ipfw_insn *cmd; 3050 int skip_or; /* skip rest of OR block */ 3051 3052 again: 3053 if (ctx->ipfw_set_disable & (1 << f->set)) 3054 continue; 3055 3056 skip_or = 0; 3057 for (l = f->cmd_len, cmd = f->cmd; l > 0; 3058 l -= cmdlen, cmd += cmdlen) { 3059 int match; 3060 3061 /* 3062 * check_body is a jump target used when we find a 3063 * CHECK_STATE, and need to jump to the body of 3064 * the target rule. 3065 */ 3066 3067 check_body: 3068 cmdlen = F_LEN(cmd); 3069 /* 3070 * An OR block (insn_1 || .. || insn_n) has the 3071 * F_OR bit set in all but the last instruction. 3072 * The first match will set "skip_or", and cause 3073 * the following instructions to be skipped until 3074 * past the one with the F_OR bit clear. 3075 */ 3076 if (skip_or) { /* skip this instruction */ 3077 if ((cmd->len & F_OR) == 0) 3078 skip_or = 0; /* next one is good */ 3079 continue; 3080 } 3081 match = 0; /* set to 1 if we succeed */ 3082 3083 switch (cmd->opcode) { 3084 /* 3085 * The first set of opcodes compares the packet's 3086 * fields with some pattern, setting 'match' if a 3087 * match is found. At the end of the loop there is 3088 * logic to deal with F_NOT and F_OR flags associated 3089 * with the opcode. 3090 */ 3091 case O_NOP: 3092 match = 1; 3093 break; 3094 3095 case O_FORWARD_MAC: 3096 kprintf("ipfw: opcode %d unimplemented\n", 3097 cmd->opcode); 3098 break; 3099 3100 case O_GID: 3101 case O_UID: 3102 /* 3103 * We only check offset == 0 && proto != 0, 3104 * as this ensures that we have an IPv4 3105 * packet with the ports info. 3106 */ 3107 if (lc.offset!=0) 3108 break; 3109 3110 match = ipfw_match_uid(&args->f_id, oif, 3111 cmd->opcode, 3112 (uid_t)((ipfw_insn_u32 *)cmd)->d[0]); 3113 break; 3114 3115 case O_RECV: 3116 match = iface_match(m->m_pkthdr.rcvif, 3117 (ipfw_insn_if *)cmd); 3118 break; 3119 3120 case O_XMIT: 3121 match = iface_match(oif, (ipfw_insn_if *)cmd); 3122 break; 3123 3124 case O_VIA: 3125 match = iface_match(oif ? oif : 3126 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); 3127 break; 3128 3129 case O_MACADDR2: 3130 if (args->eh != NULL) { /* have MAC header */ 3131 uint32_t *want = (uint32_t *) 3132 ((ipfw_insn_mac *)cmd)->addr; 3133 uint32_t *mask = (uint32_t *) 3134 ((ipfw_insn_mac *)cmd)->mask; 3135 uint32_t *hdr = (uint32_t *)args->eh; 3136 3137 match = 3138 (want[0] == (hdr[0] & mask[0]) && 3139 want[1] == (hdr[1] & mask[1]) && 3140 want[2] == (hdr[2] & mask[2])); 3141 } 3142 break; 3143 3144 case O_MAC_TYPE: 3145 if (args->eh != NULL) { 3146 uint16_t t = 3147 ntohs(args->eh->ether_type); 3148 uint16_t *p = 3149 ((ipfw_insn_u16 *)cmd)->ports; 3150 int i; 3151 3152 /* Special vlan handling */ 3153 if (m->m_flags & M_VLANTAG) 3154 t = ETHERTYPE_VLAN; 3155 3156 for (i = cmdlen - 1; !match && i > 0; 3157 i--, p += 2) { 3158 match = 3159 (t >= p[0] && t <= p[1]); 3160 } 3161 } 3162 break; 3163 3164 case O_FRAG: 3165 match = (hlen > 0 && lc.offset != 0); 3166 break; 3167 3168 case O_IPFRAG: 3169 if (hlen > 0) { 3170 uint16_t off; 3171 3172 if (args->eh != NULL) 3173 off = ntohs(ip->ip_off); 3174 else 3175 off = ip->ip_off; 3176 if (off & (IP_MF | IP_OFFMASK)) 3177 match = 1; 3178 } 3179 break; 3180 3181 case O_IN: /* "out" is "not in" */ 3182 match = (oif == NULL); 3183 break; 3184 3185 case O_LAYER2: 3186 match = (args->eh != NULL); 3187 break; 3188 3189 case O_PROTO: 3190 /* 3191 * We do not allow an arg of 0 so the 3192 * check of "proto" only suffices. 3193 */ 3194 match = (lc.proto == cmd->arg1); 3195 break; 3196 3197 case O_IP_SRC: 3198 match = (hlen > 0 && 3199 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3200 lc.src_ip.s_addr); 3201 break; 3202 3203 case O_IP_SRC_MASK: 3204 match = (hlen > 0 && 3205 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3206 (lc.src_ip.s_addr & 3207 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 3208 break; 3209 3210 case O_IP_SRC_ME: 3211 if (hlen > 0) { 3212 struct ifnet *tif; 3213 3214 tif = INADDR_TO_IFP(&lc.src_ip); 3215 match = (tif != NULL); 3216 } 3217 break; 3218 3219 case O_IP_SRC_TABLE: 3220 match = ipfw_table_lookup(ctx, cmd->arg1, 3221 &lc.src_ip); 3222 break; 3223 3224 case O_IP_SRC_IFIP: 3225 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd, 3226 &lc.src_ip); 3227 break; 3228 3229 case O_IP_DST_SET: 3230 case O_IP_SRC_SET: 3231 if (hlen > 0) { 3232 uint32_t *d = (uint32_t *)(cmd + 1); 3233 uint32_t addr = 3234 cmd->opcode == O_IP_DST_SET ? 3235 args->f_id.dst_ip : 3236 args->f_id.src_ip; 3237 3238 if (addr < d[0]) 3239 break; 3240 addr -= d[0]; /* subtract base */ 3241 match = 3242 (addr < cmd->arg1) && 3243 (d[1 + (addr >> 5)] & 3244 (1 << (addr & 0x1f))); 3245 } 3246 break; 3247 3248 case O_IP_DST: 3249 match = (hlen > 0 && 3250 ((ipfw_insn_ip *)cmd)->addr.s_addr == 3251 lc.dst_ip.s_addr); 3252 break; 3253 3254 case O_IP_DST_MASK: 3255 match = (hlen > 0) && 3256 (((ipfw_insn_ip *)cmd)->addr.s_addr == 3257 (lc.dst_ip.s_addr & 3258 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 3259 break; 3260 3261 case O_IP_DST_ME: 3262 if (hlen > 0) { 3263 struct ifnet *tif; 3264 3265 tif = INADDR_TO_IFP(&lc.dst_ip); 3266 match = (tif != NULL); 3267 } 3268 break; 3269 3270 case O_IP_DST_TABLE: 3271 match = ipfw_table_lookup(ctx, cmd->arg1, 3272 &lc.dst_ip); 3273 break; 3274 3275 case O_IP_DST_IFIP: 3276 match = ipfw_match_ifip((ipfw_insn_ifip *)cmd, 3277 &lc.dst_ip); 3278 break; 3279 3280 case O_IP_SRCPORT: 3281 case O_IP_DSTPORT: 3282 /* 3283 * offset == 0 && proto != 0 is enough 3284 * to guarantee that we have an IPv4 3285 * packet with port info. 3286 */ 3287 if ((lc.proto==IPPROTO_UDP || 3288 lc.proto==IPPROTO_TCP) 3289 && lc.offset == 0) { 3290 uint16_t x = 3291 (cmd->opcode == O_IP_SRCPORT) ? 3292 lc.src_port : lc.dst_port; 3293 uint16_t *p = 3294 ((ipfw_insn_u16 *)cmd)->ports; 3295 int i; 3296 3297 for (i = cmdlen - 1; !match && i > 0; 3298 i--, p += 2) { 3299 match = 3300 (x >= p[0] && x <= p[1]); 3301 } 3302 } 3303 break; 3304 3305 case O_ICMPTYPE: 3306 match = (lc.offset == 0 && 3307 lc.proto==IPPROTO_ICMP && 3308 icmptype_match(ip, (ipfw_insn_u32 *)cmd)); 3309 break; 3310 3311 case O_IPOPT: 3312 match = (hlen > 0 && ipopts_match(ip, cmd)); 3313 break; 3314 3315 case O_IPVER: 3316 match = (hlen > 0 && cmd->arg1 == ip->ip_v); 3317 break; 3318 3319 case O_IPTTL: 3320 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl); 3321 break; 3322 3323 case O_IPID: 3324 match = (hlen > 0 && 3325 cmd->arg1 == ntohs(ip->ip_id)); 3326 break; 3327 3328 case O_IPLEN: 3329 match = (hlen > 0 && cmd->arg1 == lc.ip_len); 3330 break; 3331 3332 case O_IPPRECEDENCE: 3333 match = (hlen > 0 && 3334 (cmd->arg1 == (ip->ip_tos & 0xe0))); 3335 break; 3336 3337 case O_IPTOS: 3338 match = (hlen > 0 && 3339 flags_match(cmd, ip->ip_tos)); 3340 break; 3341 3342 case O_TCPFLAGS: 3343 match = (lc.proto == IPPROTO_TCP && 3344 lc.offset == 0 && 3345 flags_match(cmd, 3346 L3HDR(struct tcphdr,ip)->th_flags)); 3347 break; 3348 3349 case O_TCPOPTS: 3350 match = (lc.proto == IPPROTO_TCP && 3351 lc.offset == 0 && tcpopts_match(ip, cmd)); 3352 break; 3353 3354 case O_TCPSEQ: 3355 match = (lc.proto == IPPROTO_TCP && 3356 lc.offset == 0 && 3357 ((ipfw_insn_u32 *)cmd)->d[0] == 3358 L3HDR(struct tcphdr,ip)->th_seq); 3359 break; 3360 3361 case O_TCPACK: 3362 match = (lc.proto == IPPROTO_TCP && 3363 lc.offset == 0 && 3364 ((ipfw_insn_u32 *)cmd)->d[0] == 3365 L3HDR(struct tcphdr,ip)->th_ack); 3366 break; 3367 3368 case O_TCPWIN: 3369 match = (lc.proto == IPPROTO_TCP && 3370 lc.offset == 0 && 3371 cmd->arg1 == 3372 L3HDR(struct tcphdr,ip)->th_win); 3373 break; 3374 3375 case O_ESTAB: 3376 /* reject packets which have SYN only */ 3377 /* XXX should i also check for TH_ACK ? */ 3378 match = (lc.proto == IPPROTO_TCP && 3379 lc.offset == 0 && 3380 (L3HDR(struct tcphdr,ip)->th_flags & 3381 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 3382 break; 3383 3384 case O_LOG: 3385 if (fw_verbose) { 3386 ipfw_log(ctx, f, hlen, args->eh, m, 3387 oif); 3388 } 3389 match = 1; 3390 break; 3391 3392 case O_PROB: 3393 match = (krandom() < 3394 ((ipfw_insn_u32 *)cmd)->d[0]); 3395 break; 3396 3397 /* 3398 * The second set of opcodes represents 'actions', 3399 * i.e. the terminal part of a rule once the packet 3400 * matches all previous patterns. 3401 * Typically there is only one action for each rule, 3402 * and the opcode is stored at the end of the rule 3403 * (but there are exceptions -- see below). 3404 * 3405 * In general, here we set retval and terminate the 3406 * outer loop (would be a 'break 3' in some language, 3407 * but we need to do a 'goto done'). 3408 * 3409 * Exceptions: 3410 * O_COUNT and O_SKIPTO actions: 3411 * instead of terminating, we jump to the next rule 3412 * ('goto next_rule', equivalent to a 'break 2'), 3413 * or to the SKIPTO target ('goto again' after 3414 * having set f, cmd and l), respectively. 3415 * 3416 * O_LIMIT and O_KEEP_STATE: these opcodes are 3417 * not real 'actions', and are stored right 3418 * before the 'action' part of the rule. 3419 * These opcodes try to install an entry in the 3420 * state tables; if successful, we continue with 3421 * the next opcode (match=1; break;), otherwise 3422 * the packet must be dropped ('goto done' after 3423 * setting retval). If static rules are changed 3424 * during the state installation, the packet will 3425 * be dropped and rule's stats will not beupdated 3426 * ('return IP_FW_DENY'). 3427 * 3428 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 3429 * cause a lookup of the state table, and a jump 3430 * to the 'action' part of the parent rule 3431 * ('goto check_body') if an entry is found, or 3432 * (CHECK_STATE only) a jump to the next rule if 3433 * the entry is not found ('goto next_rule'). 3434 * The result of the lookup is cached to make 3435 * further instances of these opcodes are 3436 * effectively NOPs. If static rules are changed 3437 * during the state looking up, the packet will 3438 * be dropped and rule's stats will not be updated 3439 * ('return IP_FW_DENY'). 3440 */ 3441 case O_LIMIT: 3442 case O_KEEP_STATE: 3443 if (ipfw_state_install(ctx, f, 3444 (ipfw_insn_limit *)cmd, args, 3445 (lc.offset == 0 && 3446 lc.proto == IPPROTO_TCP) ? 3447 L3HDR(struct tcphdr, ip) : NULL)) { 3448 retval = IP_FW_DENY; 3449 goto done; /* error/limit violation */ 3450 } 3451 match = 1; 3452 break; 3453 3454 case O_PROBE_STATE: 3455 case O_CHECK_STATE: 3456 /* 3457 * States are checked at the first keep-state 3458 * check-state occurrence, with the result 3459 * being stored in dyn_dir. The compiler 3460 * introduces a PROBE_STATE instruction for 3461 * us when we have a KEEP_STATE/LIMIT (because 3462 * PROBE_STATE needs to be run first). 3463 */ 3464 if (dyn_dir == MATCH_UNKNOWN) { 3465 dyn_f = ipfw_state_lookup_rule(ctx, 3466 &args->f_id, &dyn_dir, 3467 (lc.offset == 0 && 3468 lc.proto == IPPROTO_TCP) ? 3469 L3HDR(struct tcphdr, ip) : NULL, 3470 lc.ip_len); 3471 if (dyn_f != NULL) { 3472 /* 3473 * Found a rule from a state; 3474 * jump to the 'action' part 3475 * of the rule. 3476 */ 3477 f = dyn_f; 3478 cmd = ACTION_PTR(f); 3479 l = f->cmd_len - f->act_ofs; 3480 goto check_body; 3481 } 3482 } 3483 /* 3484 * State not found. If CHECK_STATE, skip to 3485 * next rule, if PROBE_STATE just ignore and 3486 * continue with next opcode. 3487 */ 3488 if (cmd->opcode == O_CHECK_STATE) 3489 goto next_rule; 3490 match = 1; 3491 break; 3492 3493 case O_ACCEPT: 3494 retval = IP_FW_PASS; /* accept */ 3495 goto done; 3496 3497 case O_DEFRAG: 3498 if (f->cross_rules == NULL) { 3499 /* 3500 * This rule was not completely setup; 3501 * move on to the next rule. 3502 */ 3503 goto next_rule; 3504 } 3505 3506 /* 3507 * Don't defrag for l2 packets, output packets 3508 * or non-fragments. 3509 */ 3510 if (oif != NULL || args->eh != NULL || 3511 (ip->ip_off & (IP_MF | IP_OFFMASK)) == 0) 3512 goto next_rule; 3513 3514 ctx->ipfw_frags++; 3515 m = ip_reass(m); 3516 args->m = m; 3517 if (m == NULL) { 3518 retval = IP_FW_PASS; 3519 goto done; 3520 } 3521 ctx->ipfw_defraged++; 3522 KASSERT((m->m_flags & M_HASH) == 0, 3523 ("hash not cleared")); 3524 3525 /* Update statistics */ 3526 f->pcnt++; 3527 f->bcnt += lc.ip_len; 3528 f->timestamp = time_second; 3529 3530 ip = mtod(m, struct ip *); 3531 hlen = ip->ip_hl << 2; 3532 ip->ip_len += hlen; 3533 3534 ip->ip_len = htons(ip->ip_len); 3535 ip->ip_off = htons(ip->ip_off); 3536 3537 ip_hashfn(&m, 0); 3538 args->m = m; 3539 if (m == NULL) 3540 goto pullup_failed; 3541 3542 KASSERT(m->m_flags & M_HASH, ("no hash")); 3543 cpuid = netisr_hashcpu(m->m_pkthdr.hash); 3544 if (cpuid != mycpuid) { 3545 /* 3546 * NOTE: 3547 * ip_len/ip_off are in network byte 3548 * order. 3549 */ 3550 ctx->ipfw_defrag_remote++; 3551 args->rule = f; 3552 return (IP_FW_CONTINUE); 3553 } 3554 3555 /* 'm' might be changed by ip_hashfn(). */ 3556 ip = mtod(m, struct ip *); 3557 ip->ip_len = ntohs(ip->ip_len); 3558 ip->ip_off = ntohs(ip->ip_off); 3559 3560 m = ipfw_setup_local(m, hlen, args, &lc, &ip); 3561 if (m == NULL) 3562 goto pullup_failed; 3563 3564 /* Move on. */ 3565 goto next_rule; 3566 3567 case O_PIPE: 3568 case O_QUEUE: 3569 args->rule = f; /* report matching rule */ 3570 args->cookie = cmd->arg1; 3571 retval = IP_FW_DUMMYNET; 3572 goto done; 3573 3574 case O_DIVERT: 3575 case O_TEE: 3576 if (args->eh) /* not on layer 2 */ 3577 break; 3578 3579 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT, 3580 sizeof(*divinfo), M_INTWAIT | M_NULLOK); 3581 if (mtag == NULL) { 3582 retval = IP_FW_DENY; 3583 goto done; 3584 } 3585 divinfo = m_tag_data(mtag); 3586 3587 divinfo->skipto = f->rulenum; 3588 divinfo->port = cmd->arg1; 3589 divinfo->tee = (cmd->opcode == O_TEE); 3590 m_tag_prepend(m, mtag); 3591 3592 args->cookie = cmd->arg1; 3593 retval = (cmd->opcode == O_DIVERT) ? 3594 IP_FW_DIVERT : IP_FW_TEE; 3595 goto done; 3596 3597 case O_COUNT: 3598 case O_SKIPTO: 3599 f->pcnt++; /* update stats */ 3600 f->bcnt += lc.ip_len; 3601 f->timestamp = time_second; 3602 if (cmd->opcode == O_COUNT) 3603 goto next_rule; 3604 /* handle skipto */ 3605 if (f->next_rule == NULL) 3606 lookup_next_rule(f); 3607 f = f->next_rule; 3608 goto again; 3609 3610 case O_REJECT: 3611 /* 3612 * Drop the packet and send a reject notice 3613 * if the packet is not ICMP (or is an ICMP 3614 * query), and it is not multicast/broadcast. 3615 */ 3616 if (hlen > 0 && 3617 (lc.proto != IPPROTO_ICMP || 3618 is_icmp_query(ip)) && 3619 !(m->m_flags & (M_BCAST|M_MCAST)) && 3620 !IN_MULTICAST(ntohl(lc.dst_ip.s_addr))) { 3621 send_reject(args, cmd->arg1, 3622 lc.offset, lc.ip_len); 3623 retval = IP_FW_DENY; 3624 goto done; 3625 } 3626 /* FALLTHROUGH */ 3627 case O_DENY: 3628 retval = IP_FW_DENY; 3629 goto done; 3630 3631 case O_FORWARD_IP: 3632 if (args->eh) /* not valid on layer2 pkts */ 3633 break; 3634 if (!dyn_f || dyn_dir == MATCH_FORWARD) { 3635 struct sockaddr_in *sin; 3636 3637 mtag = m_tag_get(PACKET_TAG_IPFORWARD, 3638 sizeof(*sin), M_INTWAIT | M_NULLOK); 3639 if (mtag == NULL) { 3640 retval = IP_FW_DENY; 3641 goto done; 3642 } 3643 sin = m_tag_data(mtag); 3644 3645 /* Structure copy */ 3646 *sin = ((ipfw_insn_sa *)cmd)->sa; 3647 3648 m_tag_prepend(m, mtag); 3649 m->m_pkthdr.fw_flags |= 3650 IPFORWARD_MBUF_TAGGED; 3651 m->m_pkthdr.fw_flags &= 3652 ~BRIDGE_MBUF_TAGGED; 3653 } 3654 retval = IP_FW_PASS; 3655 goto done; 3656 3657 default: 3658 panic("-- unknown opcode %d", cmd->opcode); 3659 } /* end of switch() on opcodes */ 3660 3661 if (cmd->len & F_NOT) 3662 match = !match; 3663 3664 if (match) { 3665 if (cmd->len & F_OR) 3666 skip_or = 1; 3667 } else { 3668 if (!(cmd->len & F_OR)) /* not an OR block, */ 3669 break; /* try next rule */ 3670 } 3671 3672 } /* end of inner for, scan opcodes */ 3673 3674 next_rule:; /* try next rule */ 3675 3676 } /* end of outer for, scan rules */ 3677 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n"); 3678 return IP_FW_DENY; 3679 3680 done: 3681 /* Update statistics */ 3682 f->pcnt++; 3683 f->bcnt += lc.ip_len; 3684 f->timestamp = time_second; 3685 return retval; 3686 3687 pullup_failed: 3688 if (fw_verbose) 3689 kprintf("pullup failed\n"); 3690 return IP_FW_DENY; 3691 } 3692 3693 static struct mbuf * 3694 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 3695 { 3696 struct m_tag *mtag; 3697 struct dn_pkt *pkt; 3698 ipfw_insn *cmd; 3699 const struct ipfw_flow_id *id; 3700 struct dn_flow_id *fid; 3701 3702 M_ASSERTPKTHDR(m); 3703 3704 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), 3705 M_INTWAIT | M_NULLOK); 3706 if (mtag == NULL) { 3707 m_freem(m); 3708 return (NULL); 3709 } 3710 m_tag_prepend(m, mtag); 3711 3712 pkt = m_tag_data(mtag); 3713 bzero(pkt, sizeof(*pkt)); 3714 3715 cmd = fwa->rule->cmd + fwa->rule->act_ofs; 3716 if (cmd->opcode == O_LOG) 3717 cmd += F_LEN(cmd); 3718 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, 3719 ("Rule is not PIPE or QUEUE, opcode %d", cmd->opcode)); 3720 3721 pkt->dn_m = m; 3722 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK); 3723 pkt->ifp = fwa->oif; 3724 pkt->pipe_nr = pipe_nr; 3725 3726 pkt->cpuid = mycpuid; 3727 pkt->msgport = netisr_curport(); 3728 3729 id = &fwa->f_id; 3730 fid = &pkt->id; 3731 fid->fid_dst_ip = id->dst_ip; 3732 fid->fid_src_ip = id->src_ip; 3733 fid->fid_dst_port = id->dst_port; 3734 fid->fid_src_port = id->src_port; 3735 fid->fid_proto = id->proto; 3736 fid->fid_flags = id->flags; 3737 3738 ipfw_ref_rule(fwa->rule); 3739 pkt->dn_priv = fwa->rule; 3740 pkt->dn_unref_priv = ipfw_unref_rule; 3741 3742 if (cmd->opcode == O_PIPE) 3743 pkt->dn_flags |= DN_FLAGS_IS_PIPE; 3744 3745 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED; 3746 return (m); 3747 } 3748 3749 /* 3750 * When a rule is added/deleted, clear the next_rule pointers in all rules. 3751 * These will be reconstructed on the fly as packets are matched. 3752 */ 3753 static void 3754 ipfw_flush_rule_ptrs(struct ipfw_context *ctx) 3755 { 3756 struct ip_fw *rule; 3757 3758 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 3759 rule->next_rule = NULL; 3760 } 3761 3762 static __inline void 3763 ipfw_inc_static_count(struct ip_fw *rule) 3764 { 3765 /* Static rule's counts are updated only on CPU0 */ 3766 KKASSERT(mycpuid == 0); 3767 3768 static_count++; 3769 static_ioc_len += IOC_RULESIZE(rule); 3770 } 3771 3772 static __inline void 3773 ipfw_dec_static_count(struct ip_fw *rule) 3774 { 3775 int l = IOC_RULESIZE(rule); 3776 3777 /* Static rule's counts are updated only on CPU0 */ 3778 KKASSERT(mycpuid == 0); 3779 3780 KASSERT(static_count > 0, ("invalid static count %u", static_count)); 3781 static_count--; 3782 3783 KASSERT(static_ioc_len >= l, 3784 ("invalid static len %u", static_ioc_len)); 3785 static_ioc_len -= l; 3786 } 3787 3788 static void 3789 ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule) 3790 { 3791 if (fwmsg->sibling != NULL) { 3792 KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1); 3793 fwmsg->sibling->sibling = rule; 3794 } 3795 fwmsg->sibling = rule; 3796 } 3797 3798 static struct ip_fw * 3799 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 3800 { 3801 struct ip_fw *rule; 3802 3803 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO); 3804 3805 rule->act_ofs = ioc_rule->act_ofs; 3806 rule->cmd_len = ioc_rule->cmd_len; 3807 rule->rulenum = ioc_rule->rulenum; 3808 rule->set = ioc_rule->set; 3809 rule->usr_flags = ioc_rule->usr_flags; 3810 3811 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */); 3812 3813 rule->refcnt = 1; 3814 rule->cpuid = mycpuid; 3815 rule->rule_flags = rule_flags; 3816 3817 return rule; 3818 } 3819 3820 static void 3821 ipfw_add_rule_dispatch(netmsg_t nmsg) 3822 { 3823 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 3824 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3825 struct ip_fw *rule; 3826 3827 ASSERT_NETISR_NCPUS(mycpuid); 3828 3829 rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->rule_flags); 3830 3831 /* 3832 * Insert rule into the pre-determined position 3833 */ 3834 if (fwmsg->prev_rule != NULL) { 3835 struct ip_fw *prev, *next; 3836 3837 prev = fwmsg->prev_rule; 3838 KKASSERT(prev->cpuid == mycpuid); 3839 3840 next = fwmsg->next_rule; 3841 KKASSERT(next->cpuid == mycpuid); 3842 3843 rule->next = next; 3844 prev->next = rule; 3845 3846 /* 3847 * Move to the position on the next CPU 3848 * before the msg is forwarded. 3849 */ 3850 fwmsg->prev_rule = prev->sibling; 3851 fwmsg->next_rule = next->sibling; 3852 } else { 3853 KKASSERT(fwmsg->next_rule == NULL); 3854 rule->next = ctx->ipfw_layer3_chain; 3855 ctx->ipfw_layer3_chain = rule; 3856 } 3857 3858 /* Link rule CPU sibling */ 3859 ipfw_link_sibling(fwmsg, rule); 3860 3861 ipfw_flush_rule_ptrs(ctx); 3862 3863 if (mycpuid == 0) { 3864 /* Statistics only need to be updated once */ 3865 ipfw_inc_static_count(rule); 3866 3867 /* Return the rule on CPU0 */ 3868 nmsg->lmsg.u.ms_resultp = rule; 3869 } 3870 3871 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) 3872 rule->track_ruleid = (uintptr_t)nmsg->lmsg.u.ms_resultp; 3873 3874 if (fwmsg->cross_rules != NULL) { 3875 /* Save rules for later use. */ 3876 fwmsg->cross_rules[mycpuid] = rule; 3877 } 3878 3879 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 3880 } 3881 3882 static void 3883 ipfw_crossref_rule_dispatch(netmsg_t nmsg) 3884 { 3885 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 3886 struct ip_fw *rule = fwmsg->sibling; 3887 int sz = sizeof(struct ip_fw *) * netisr_ncpus; 3888 3889 ASSERT_NETISR_NCPUS(mycpuid); 3890 KASSERT(rule->rule_flags & IPFW_RULE_F_CROSSREF, 3891 ("not crossref rule")); 3892 3893 rule->cross_rules = kmalloc(sz, M_IPFW, M_WAITOK); 3894 memcpy(rule->cross_rules, fwmsg->cross_rules, sz); 3895 3896 fwmsg->sibling = rule->sibling; 3897 netisr_forwardmsg(&fwmsg->base, mycpuid + 1); 3898 } 3899 3900 /* 3901 * Add a new rule to the list. Copy the rule into a malloc'ed area, 3902 * then possibly create a rule number and add the rule to the list. 3903 * Update the rule_number in the input struct so the caller knows 3904 * it as well. 3905 */ 3906 static void 3907 ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 3908 { 3909 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3910 struct netmsg_ipfw fwmsg; 3911 struct ip_fw *f, *prev, *rule; 3912 3913 ASSERT_NETISR0; 3914 3915 /* 3916 * If rulenum is 0, find highest numbered rule before the 3917 * default rule, and add rule number incremental step. 3918 */ 3919 if (ioc_rule->rulenum == 0) { 3920 int step = autoinc_step; 3921 3922 KKASSERT(step >= IPFW_AUTOINC_STEP_MIN && 3923 step <= IPFW_AUTOINC_STEP_MAX); 3924 3925 /* 3926 * Locate the highest numbered rule before default 3927 */ 3928 for (f = ctx->ipfw_layer3_chain; f; f = f->next) { 3929 if (f->rulenum == IPFW_DEFAULT_RULE) 3930 break; 3931 ioc_rule->rulenum = f->rulenum; 3932 } 3933 if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step) 3934 ioc_rule->rulenum += step; 3935 } 3936 KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE && 3937 ioc_rule->rulenum != 0, 3938 ("invalid rule num %d", ioc_rule->rulenum)); 3939 3940 /* 3941 * Now find the right place for the new rule in the sorted list. 3942 */ 3943 for (prev = NULL, f = ctx->ipfw_layer3_chain; f; 3944 prev = f, f = f->next) { 3945 if (f->rulenum > ioc_rule->rulenum) { 3946 /* Found the location */ 3947 break; 3948 } 3949 } 3950 KASSERT(f != NULL, ("no default rule?!")); 3951 3952 /* 3953 * Duplicate the rule onto each CPU. 3954 * The rule duplicated on CPU0 will be returned. 3955 */ 3956 bzero(&fwmsg, sizeof(fwmsg)); 3957 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 3958 ipfw_add_rule_dispatch); 3959 fwmsg.ioc_rule = ioc_rule; 3960 fwmsg.prev_rule = prev; 3961 fwmsg.next_rule = prev == NULL ? NULL : f; 3962 fwmsg.rule_flags = rule_flags; 3963 if (rule_flags & IPFW_RULE_F_CROSSREF) { 3964 fwmsg.cross_rules = kmalloc( 3965 sizeof(struct ip_fw *) * netisr_ncpus, M_TEMP, 3966 M_WAITOK | M_ZERO); 3967 } 3968 3969 netisr_domsg_global(&fwmsg.base); 3970 KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL); 3971 3972 rule = fwmsg.base.lmsg.u.ms_resultp; 3973 KKASSERT(rule != NULL && rule->cpuid == mycpuid); 3974 3975 if (fwmsg.cross_rules != NULL) { 3976 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, 3977 MSGF_PRIORITY, ipfw_crossref_rule_dispatch); 3978 fwmsg.sibling = rule; 3979 netisr_domsg_global(&fwmsg.base); 3980 KKASSERT(fwmsg.sibling == NULL); 3981 3982 kfree(fwmsg.cross_rules, M_TEMP); 3983 3984 #ifdef KLD_MODULE 3985 atomic_add_int(&ipfw_gd.ipfw_refcnt, 1); 3986 #endif 3987 } 3988 3989 DPRINTF("++ installed rule %d, static count now %d\n", 3990 rule->rulenum, static_count); 3991 } 3992 3993 /* 3994 * Free storage associated with a static rule (including derived 3995 * states/tracks). 3996 * The caller is in charge of clearing rule pointers to avoid 3997 * dangling pointers. 3998 * @return a pointer to the next entry. 3999 * Arguments are not checked, so they better be correct. 4000 */ 4001 static struct ip_fw * 4002 ipfw_delete_rule(struct ipfw_context *ctx, 4003 struct ip_fw *prev, struct ip_fw *rule) 4004 { 4005 struct ip_fw *n; 4006 4007 n = rule->next; 4008 if (prev == NULL) 4009 ctx->ipfw_layer3_chain = n; 4010 else 4011 prev->next = n; 4012 4013 /* Mark the rule as invalid */ 4014 rule->rule_flags |= IPFW_RULE_F_INVALID; 4015 rule->next_rule = NULL; 4016 rule->sibling = NULL; 4017 #ifdef foo 4018 /* Don't reset cpuid here; keep various assertion working */ 4019 rule->cpuid = -1; 4020 #endif 4021 4022 /* Statistics only need to be updated once */ 4023 if (mycpuid == 0) 4024 ipfw_dec_static_count(rule); 4025 4026 if ((rule->rule_flags & IPFW_RULE_F_CROSSREF) == 0) { 4027 /* Try to free this rule */ 4028 ipfw_free_rule(rule); 4029 } else { 4030 /* TODO: check staging area. */ 4031 if (mycpuid == 0) { 4032 rule->next = ipfw_gd.ipfw_crossref_free; 4033 ipfw_gd.ipfw_crossref_free = rule; 4034 } 4035 } 4036 4037 /* Return the next rule */ 4038 return n; 4039 } 4040 4041 static void 4042 ipfw_flush_dispatch(netmsg_t nmsg) 4043 { 4044 int kill_default = nmsg->lmsg.u.ms_result; 4045 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4046 struct ip_fw *rule; 4047 4048 ASSERT_NETISR_NCPUS(mycpuid); 4049 4050 /* 4051 * Flush states. 4052 */ 4053 ipfw_state_flush(ctx, NULL); 4054 KASSERT(ctx->ipfw_state_cnt == 0, 4055 ("%d pcpu states remain", ctx->ipfw_state_cnt)); 4056 ctx->ipfw_state_loosecnt = 0; 4057 ctx->ipfw_state_lastexp = 0; 4058 4059 /* 4060 * Flush tracks. 4061 */ 4062 ipfw_track_flush(ctx, NULL); 4063 ctx->ipfw_track_lastexp = 0; 4064 if (ctx->ipfw_trkcnt_spare != NULL) { 4065 kfree(ctx->ipfw_trkcnt_spare, M_IPFW); 4066 ctx->ipfw_trkcnt_spare = NULL; 4067 } 4068 4069 ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */ 4070 4071 while ((rule = ctx->ipfw_layer3_chain) != NULL && 4072 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE)) 4073 ipfw_delete_rule(ctx, NULL, rule); 4074 4075 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4076 } 4077 4078 /* 4079 * Deletes all rules from a chain (including the default rule 4080 * if the second argument is set). 4081 */ 4082 static void 4083 ipfw_flush(int kill_default) 4084 { 4085 struct netmsg_base nmsg; 4086 #ifdef INVARIANTS 4087 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4088 int state_cnt; 4089 #endif 4090 4091 ASSERT_NETISR0; 4092 4093 /* 4094 * If 'kill_default' then caller has done the necessary 4095 * msgport syncing; unnecessary to do it again. 4096 */ 4097 if (!kill_default) { 4098 /* 4099 * Let ipfw_chk() know the rules are going to 4100 * be flushed, so it could jump directly to 4101 * the default rule. 4102 */ 4103 ipfw_flushing = 1; 4104 /* XXX use priority sync */ 4105 netmsg_service_sync(); 4106 } 4107 4108 /* 4109 * Press the 'flush' button 4110 */ 4111 bzero(&nmsg, sizeof(nmsg)); 4112 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4113 ipfw_flush_dispatch); 4114 nmsg.lmsg.u.ms_result = kill_default; 4115 netisr_domsg_global(&nmsg); 4116 ipfw_gd.ipfw_state_loosecnt = 0; 4117 ipfw_gd.ipfw_state_globexp = 0; 4118 ipfw_gd.ipfw_track_globexp = 0; 4119 4120 #ifdef INVARIANTS 4121 state_cnt = ipfw_state_cntcoll(); 4122 KASSERT(state_cnt == 0, ("%d states remain", state_cnt)); 4123 4124 KASSERT(ipfw_gd.ipfw_trkcnt_cnt == 0, 4125 ("%d trkcnts remain", ipfw_gd.ipfw_trkcnt_cnt)); 4126 4127 if (kill_default) { 4128 KASSERT(static_count == 0, 4129 ("%u static rules remain", static_count)); 4130 KASSERT(static_ioc_len == 0, 4131 ("%u bytes of static rules remain", static_ioc_len)); 4132 } else { 4133 KASSERT(static_count == 1, 4134 ("%u static rules remain", static_count)); 4135 KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule), 4136 ("%u bytes of static rules remain, should be %lu", 4137 static_ioc_len, 4138 (u_long)IOC_RULESIZE(ctx->ipfw_default_rule))); 4139 } 4140 #endif 4141 4142 /* Flush is done */ 4143 ipfw_flushing = 0; 4144 } 4145 4146 static void 4147 ipfw_alt_delete_rule_dispatch(netmsg_t nmsg) 4148 { 4149 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4150 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4151 struct ip_fw *rule, *prev; 4152 4153 ASSERT_NETISR_NCPUS(mycpuid); 4154 4155 rule = dmsg->start_rule; 4156 KKASSERT(rule->cpuid == mycpuid); 4157 dmsg->start_rule = rule->sibling; 4158 4159 prev = dmsg->prev_rule; 4160 if (prev != NULL) { 4161 KKASSERT(prev->cpuid == mycpuid); 4162 4163 /* 4164 * Move to the position on the next CPU 4165 * before the msg is forwarded. 4166 */ 4167 dmsg->prev_rule = prev->sibling; 4168 } 4169 4170 /* 4171 * flush pointers outside the loop, then delete all matching 4172 * rules. 'prev' remains the same throughout the cycle. 4173 */ 4174 ipfw_flush_rule_ptrs(ctx); 4175 while (rule && rule->rulenum == dmsg->rulenum) { 4176 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) { 4177 /* Flush states generated by this rule. */ 4178 ipfw_state_flush(ctx, rule); 4179 } 4180 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) { 4181 /* Flush tracks generated by this rule. */ 4182 ipfw_track_flush(ctx, rule); 4183 } 4184 rule = ipfw_delete_rule(ctx, prev, rule); 4185 } 4186 4187 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4188 } 4189 4190 static int 4191 ipfw_alt_delete_rule(uint16_t rulenum) 4192 { 4193 struct ip_fw *prev, *rule; 4194 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4195 struct netmsg_del dmsg; 4196 4197 ASSERT_NETISR0; 4198 4199 /* 4200 * Locate first rule to delete 4201 */ 4202 for (prev = NULL, rule = ctx->ipfw_layer3_chain; 4203 rule && rule->rulenum < rulenum; 4204 prev = rule, rule = rule->next) 4205 ; /* EMPTY */ 4206 if (rule->rulenum != rulenum) 4207 return EINVAL; 4208 4209 /* 4210 * Get rid of the rule duplications on all CPUs 4211 */ 4212 bzero(&dmsg, sizeof(dmsg)); 4213 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4214 ipfw_alt_delete_rule_dispatch); 4215 dmsg.prev_rule = prev; 4216 dmsg.start_rule = rule; 4217 dmsg.rulenum = rulenum; 4218 4219 netisr_domsg_global(&dmsg.base); 4220 KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL); 4221 return 0; 4222 } 4223 4224 static void 4225 ipfw_alt_delete_ruleset_dispatch(netmsg_t nmsg) 4226 { 4227 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4228 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4229 struct ip_fw *prev, *rule; 4230 #ifdef INVARIANTS 4231 int del = 0; 4232 #endif 4233 4234 ASSERT_NETISR_NCPUS(mycpuid); 4235 4236 ipfw_flush_rule_ptrs(ctx); 4237 4238 prev = NULL; 4239 rule = ctx->ipfw_layer3_chain; 4240 while (rule != NULL) { 4241 if (rule->set == dmsg->from_set) { 4242 if (rule->rule_flags & IPFW_RULE_F_GENSTATE) { 4243 /* Flush states generated by this rule. */ 4244 ipfw_state_flush(ctx, rule); 4245 } 4246 if (rule->rule_flags & IPFW_RULE_F_GENTRACK) { 4247 /* Flush tracks generated by this rule. */ 4248 ipfw_track_flush(ctx, rule); 4249 } 4250 rule = ipfw_delete_rule(ctx, prev, rule); 4251 #ifdef INVARIANTS 4252 del = 1; 4253 #endif 4254 } else { 4255 prev = rule; 4256 rule = rule->next; 4257 } 4258 } 4259 KASSERT(del, ("no match set?!")); 4260 4261 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4262 } 4263 4264 static int 4265 ipfw_alt_delete_ruleset(uint8_t set) 4266 { 4267 struct netmsg_del dmsg; 4268 int del; 4269 struct ip_fw *rule; 4270 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4271 4272 ASSERT_NETISR0; 4273 4274 /* 4275 * Check whether the 'set' exists. If it exists, 4276 * then check whether any rules within the set will 4277 * try to create states. 4278 */ 4279 del = 0; 4280 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 4281 if (rule->set == set) 4282 del = 1; 4283 } 4284 if (!del) 4285 return 0; /* XXX EINVAL? */ 4286 4287 /* 4288 * Delete this set 4289 */ 4290 bzero(&dmsg, sizeof(dmsg)); 4291 netmsg_init(&dmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4292 ipfw_alt_delete_ruleset_dispatch); 4293 dmsg.from_set = set; 4294 netisr_domsg_global(&dmsg.base); 4295 4296 return 0; 4297 } 4298 4299 static void 4300 ipfw_alt_move_rule_dispatch(netmsg_t nmsg) 4301 { 4302 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4303 struct ip_fw *rule; 4304 4305 ASSERT_NETISR_NCPUS(mycpuid); 4306 4307 rule = dmsg->start_rule; 4308 KKASSERT(rule->cpuid == mycpuid); 4309 4310 /* 4311 * Move to the position on the next CPU 4312 * before the msg is forwarded. 4313 */ 4314 dmsg->start_rule = rule->sibling; 4315 4316 while (rule && rule->rulenum <= dmsg->rulenum) { 4317 if (rule->rulenum == dmsg->rulenum) 4318 rule->set = dmsg->to_set; 4319 rule = rule->next; 4320 } 4321 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4322 } 4323 4324 static int 4325 ipfw_alt_move_rule(uint16_t rulenum, uint8_t set) 4326 { 4327 struct netmsg_del dmsg; 4328 struct netmsg_base *nmsg; 4329 struct ip_fw *rule; 4330 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4331 4332 ASSERT_NETISR0; 4333 4334 /* 4335 * Locate first rule to move 4336 */ 4337 for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum; 4338 rule = rule->next) { 4339 if (rule->rulenum == rulenum && rule->set != set) 4340 break; 4341 } 4342 if (rule == NULL || rule->rulenum > rulenum) 4343 return 0; /* XXX error? */ 4344 4345 bzero(&dmsg, sizeof(dmsg)); 4346 nmsg = &dmsg.base; 4347 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4348 ipfw_alt_move_rule_dispatch); 4349 dmsg.start_rule = rule; 4350 dmsg.rulenum = rulenum; 4351 dmsg.to_set = set; 4352 4353 netisr_domsg_global(nmsg); 4354 KKASSERT(dmsg.start_rule == NULL); 4355 return 0; 4356 } 4357 4358 static void 4359 ipfw_alt_move_ruleset_dispatch(netmsg_t nmsg) 4360 { 4361 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4362 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4363 struct ip_fw *rule; 4364 4365 ASSERT_NETISR_NCPUS(mycpuid); 4366 4367 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 4368 if (rule->set == dmsg->from_set) 4369 rule->set = dmsg->to_set; 4370 } 4371 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4372 } 4373 4374 static int 4375 ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set) 4376 { 4377 struct netmsg_del dmsg; 4378 struct netmsg_base *nmsg; 4379 4380 ASSERT_NETISR0; 4381 4382 bzero(&dmsg, sizeof(dmsg)); 4383 nmsg = &dmsg.base; 4384 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4385 ipfw_alt_move_ruleset_dispatch); 4386 dmsg.from_set = from_set; 4387 dmsg.to_set = to_set; 4388 4389 netisr_domsg_global(nmsg); 4390 return 0; 4391 } 4392 4393 static void 4394 ipfw_alt_swap_ruleset_dispatch(netmsg_t nmsg) 4395 { 4396 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 4397 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4398 struct ip_fw *rule; 4399 4400 ASSERT_NETISR_NCPUS(mycpuid); 4401 4402 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 4403 if (rule->set == dmsg->from_set) 4404 rule->set = dmsg->to_set; 4405 else if (rule->set == dmsg->to_set) 4406 rule->set = dmsg->from_set; 4407 } 4408 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4409 } 4410 4411 static int 4412 ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2) 4413 { 4414 struct netmsg_del dmsg; 4415 struct netmsg_base *nmsg; 4416 4417 ASSERT_NETISR0; 4418 4419 bzero(&dmsg, sizeof(dmsg)); 4420 nmsg = &dmsg.base; 4421 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4422 ipfw_alt_swap_ruleset_dispatch); 4423 dmsg.from_set = set1; 4424 dmsg.to_set = set2; 4425 4426 netisr_domsg_global(nmsg); 4427 return 0; 4428 } 4429 4430 /* 4431 * Remove all rules with given number, and also do set manipulation. 4432 * 4433 * The argument is an uint32_t. The low 16 bit are the rule or set number, 4434 * the next 8 bits are the new set, the top 8 bits are the command: 4435 * 4436 * 0 delete rules with given number 4437 * 1 delete rules with given set number 4438 * 2 move rules with given number to new set 4439 * 3 move rules with given set number to new set 4440 * 4 swap sets with given numbers 4441 */ 4442 static int 4443 ipfw_ctl_alter(uint32_t arg) 4444 { 4445 uint16_t rulenum; 4446 uint8_t cmd, new_set; 4447 int error = 0; 4448 4449 ASSERT_NETISR0; 4450 4451 rulenum = arg & 0xffff; 4452 cmd = (arg >> 24) & 0xff; 4453 new_set = (arg >> 16) & 0xff; 4454 4455 if (cmd > 4) 4456 return EINVAL; 4457 if (new_set >= IPFW_DEFAULT_SET) 4458 return EINVAL; 4459 if (cmd == 0 || cmd == 2) { 4460 if (rulenum == IPFW_DEFAULT_RULE) 4461 return EINVAL; 4462 } else { 4463 if (rulenum >= IPFW_DEFAULT_SET) 4464 return EINVAL; 4465 } 4466 4467 switch (cmd) { 4468 case 0: /* delete rules with given number */ 4469 error = ipfw_alt_delete_rule(rulenum); 4470 break; 4471 4472 case 1: /* delete all rules with given set number */ 4473 error = ipfw_alt_delete_ruleset(rulenum); 4474 break; 4475 4476 case 2: /* move rules with given number to new set */ 4477 error = ipfw_alt_move_rule(rulenum, new_set); 4478 break; 4479 4480 case 3: /* move rules with given set number to new set */ 4481 error = ipfw_alt_move_ruleset(rulenum, new_set); 4482 break; 4483 4484 case 4: /* swap two sets */ 4485 error = ipfw_alt_swap_ruleset(rulenum, new_set); 4486 break; 4487 } 4488 return error; 4489 } 4490 4491 /* 4492 * Clear counters for a specific rule. 4493 */ 4494 static void 4495 clear_counters(struct ip_fw *rule, int log_only) 4496 { 4497 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); 4498 4499 if (log_only == 0) { 4500 rule->bcnt = rule->pcnt = 0; 4501 rule->timestamp = 0; 4502 } 4503 if (l->o.opcode == O_LOG) 4504 l->log_left = l->max_log; 4505 } 4506 4507 static void 4508 ipfw_zero_entry_dispatch(netmsg_t nmsg) 4509 { 4510 struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg; 4511 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4512 struct ip_fw *rule; 4513 4514 ASSERT_NETISR_NCPUS(mycpuid); 4515 4516 if (zmsg->rulenum == 0) { 4517 KKASSERT(zmsg->start_rule == NULL); 4518 4519 ctx->ipfw_norule_counter = 0; 4520 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 4521 clear_counters(rule, zmsg->log_only); 4522 } else { 4523 struct ip_fw *start = zmsg->start_rule; 4524 4525 KKASSERT(start->cpuid == mycpuid); 4526 KKASSERT(start->rulenum == zmsg->rulenum); 4527 4528 /* 4529 * We can have multiple rules with the same number, so we 4530 * need to clear them all. 4531 */ 4532 for (rule = start; rule && rule->rulenum == zmsg->rulenum; 4533 rule = rule->next) 4534 clear_counters(rule, zmsg->log_only); 4535 4536 /* 4537 * Move to the position on the next CPU 4538 * before the msg is forwarded. 4539 */ 4540 zmsg->start_rule = start->sibling; 4541 } 4542 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 4543 } 4544 4545 /* 4546 * Reset some or all counters on firewall rules. 4547 * @arg frwl is null to clear all entries, or contains a specific 4548 * rule number. 4549 * @arg log_only is 1 if we only want to reset logs, zero otherwise. 4550 */ 4551 static int 4552 ipfw_ctl_zero_entry(int rulenum, int log_only) 4553 { 4554 struct netmsg_zent zmsg; 4555 struct netmsg_base *nmsg; 4556 const char *msg; 4557 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4558 4559 ASSERT_NETISR0; 4560 4561 bzero(&zmsg, sizeof(zmsg)); 4562 nmsg = &zmsg.base; 4563 netmsg_init(nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 4564 ipfw_zero_entry_dispatch); 4565 zmsg.log_only = log_only; 4566 4567 if (rulenum == 0) { 4568 msg = log_only ? "ipfw: All logging counts reset.\n" 4569 : "ipfw: Accounting cleared.\n"; 4570 } else { 4571 struct ip_fw *rule; 4572 4573 /* 4574 * Locate the first rule with 'rulenum' 4575 */ 4576 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 4577 if (rule->rulenum == rulenum) 4578 break; 4579 } 4580 if (rule == NULL) /* we did not find any matching rules */ 4581 return (EINVAL); 4582 zmsg.start_rule = rule; 4583 zmsg.rulenum = rulenum; 4584 4585 msg = log_only ? "ipfw: Entry %d logging count reset.\n" 4586 : "ipfw: Entry %d cleared.\n"; 4587 } 4588 netisr_domsg_global(nmsg); 4589 KKASSERT(zmsg.start_rule == NULL); 4590 4591 if (fw_verbose) 4592 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); 4593 return (0); 4594 } 4595 4596 /* 4597 * Check validity of the structure before insert. 4598 * Fortunately rules are simple, so this mostly need to check rule sizes. 4599 */ 4600 static int 4601 ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags) 4602 { 4603 int l, cmdlen = 0; 4604 int have_action = 0; 4605 ipfw_insn *cmd; 4606 4607 *rule_flags = 0; 4608 4609 /* Check for valid size */ 4610 if (size < sizeof(*rule)) { 4611 kprintf("ipfw: rule too short\n"); 4612 return EINVAL; 4613 } 4614 l = IOC_RULESIZE(rule); 4615 if (l != size) { 4616 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l); 4617 return EINVAL; 4618 } 4619 4620 /* Check rule number */ 4621 if (rule->rulenum == IPFW_DEFAULT_RULE) { 4622 kprintf("ipfw: invalid rule number\n"); 4623 return EINVAL; 4624 } 4625 4626 /* 4627 * Now go for the individual checks. Very simple ones, basically only 4628 * instruction sizes. 4629 */ 4630 for (l = rule->cmd_len, cmd = rule->cmd; l > 0; 4631 l -= cmdlen, cmd += cmdlen) { 4632 cmdlen = F_LEN(cmd); 4633 if (cmdlen > l) { 4634 kprintf("ipfw: opcode %d size truncated\n", 4635 cmd->opcode); 4636 return EINVAL; 4637 } 4638 4639 DPRINTF("ipfw: opcode %d\n", cmd->opcode); 4640 4641 if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT) { 4642 /* This rule will generate states. */ 4643 *rule_flags |= IPFW_RULE_F_GENSTATE; 4644 if (cmd->opcode == O_LIMIT) 4645 *rule_flags |= IPFW_RULE_F_GENTRACK; 4646 } 4647 if (cmd->opcode == O_DEFRAG) 4648 *rule_flags |= IPFW_RULE_F_CROSSREF; 4649 if (cmd->opcode == O_IP_SRC_IFIP || 4650 cmd->opcode == O_IP_DST_IFIP) { 4651 *rule_flags |= IPFW_RULE_F_DYNIFADDR; 4652 cmd->arg1 &= IPFW_IFIP_SETTINGS; 4653 } 4654 4655 switch (cmd->opcode) { 4656 case O_NOP: 4657 case O_PROBE_STATE: 4658 case O_KEEP_STATE: 4659 case O_PROTO: 4660 case O_IP_SRC_ME: 4661 case O_IP_DST_ME: 4662 case O_LAYER2: 4663 case O_IN: 4664 case O_FRAG: 4665 case O_IPFRAG: 4666 case O_IPOPT: 4667 case O_IPLEN: 4668 case O_IPID: 4669 case O_IPTOS: 4670 case O_IPPRECEDENCE: 4671 case O_IPTTL: 4672 case O_IPVER: 4673 case O_TCPWIN: 4674 case O_TCPFLAGS: 4675 case O_TCPOPTS: 4676 case O_ESTAB: 4677 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 4678 goto bad_size; 4679 break; 4680 4681 case O_IP_SRC_TABLE: 4682 case O_IP_DST_TABLE: 4683 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 4684 goto bad_size; 4685 if (cmd->arg1 >= ipfw_table_max) { 4686 kprintf("ipfw: invalid table id %u, max %d\n", 4687 cmd->arg1, ipfw_table_max); 4688 return EINVAL; 4689 } 4690 break; 4691 4692 case O_IP_SRC_IFIP: 4693 case O_IP_DST_IFIP: 4694 if (cmdlen != F_INSN_SIZE(ipfw_insn_ifip)) 4695 goto bad_size; 4696 break; 4697 4698 case O_UID: 4699 case O_GID: 4700 case O_IP_SRC: 4701 case O_IP_DST: 4702 case O_TCPSEQ: 4703 case O_TCPACK: 4704 case O_PROB: 4705 case O_ICMPTYPE: 4706 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 4707 goto bad_size; 4708 break; 4709 4710 case O_LIMIT: 4711 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) 4712 goto bad_size; 4713 break; 4714 4715 case O_LOG: 4716 if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) 4717 goto bad_size; 4718 4719 ((ipfw_insn_log *)cmd)->log_left = 4720 ((ipfw_insn_log *)cmd)->max_log; 4721 4722 break; 4723 4724 case O_IP_SRC_MASK: 4725 case O_IP_DST_MASK: 4726 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip)) 4727 goto bad_size; 4728 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) { 4729 kprintf("ipfw: opcode %d, useless rule\n", 4730 cmd->opcode); 4731 return EINVAL; 4732 } 4733 break; 4734 4735 case O_IP_SRC_SET: 4736 case O_IP_DST_SET: 4737 if (cmd->arg1 == 0 || cmd->arg1 > 256) { 4738 kprintf("ipfw: invalid set size %d\n", 4739 cmd->arg1); 4740 return EINVAL; 4741 } 4742 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 4743 (cmd->arg1+31)/32 ) 4744 goto bad_size; 4745 break; 4746 4747 case O_MACADDR2: 4748 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) 4749 goto bad_size; 4750 break; 4751 4752 case O_MAC_TYPE: 4753 case O_IP_SRCPORT: 4754 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ 4755 if (cmdlen < 2 || cmdlen > 31) 4756 goto bad_size; 4757 break; 4758 4759 case O_RECV: 4760 case O_XMIT: 4761 case O_VIA: 4762 if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) 4763 goto bad_size; 4764 break; 4765 4766 case O_PIPE: 4767 case O_QUEUE: 4768 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) 4769 goto bad_size; 4770 goto check_action; 4771 4772 case O_FORWARD_IP: 4773 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) { 4774 goto bad_size; 4775 } else { 4776 in_addr_t fwd_addr; 4777 4778 fwd_addr = ((ipfw_insn_sa *)cmd)-> 4779 sa.sin_addr.s_addr; 4780 if (IN_MULTICAST(ntohl(fwd_addr))) { 4781 kprintf("ipfw: try forwarding to " 4782 "multicast address\n"); 4783 return EINVAL; 4784 } 4785 } 4786 goto check_action; 4787 4788 case O_FORWARD_MAC: /* XXX not implemented yet */ 4789 case O_CHECK_STATE: 4790 case O_COUNT: 4791 case O_ACCEPT: 4792 case O_DENY: 4793 case O_REJECT: 4794 case O_SKIPTO: 4795 case O_DIVERT: 4796 case O_TEE: 4797 case O_DEFRAG: 4798 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 4799 goto bad_size; 4800 check_action: 4801 if (have_action) { 4802 kprintf("ipfw: opcode %d, multiple actions" 4803 " not allowed\n", 4804 cmd->opcode); 4805 return EINVAL; 4806 } 4807 have_action = 1; 4808 if (l != cmdlen) { 4809 kprintf("ipfw: opcode %d, action must be" 4810 " last opcode\n", 4811 cmd->opcode); 4812 return EINVAL; 4813 } 4814 break; 4815 default: 4816 kprintf("ipfw: opcode %d, unknown opcode\n", 4817 cmd->opcode); 4818 return EINVAL; 4819 } 4820 } 4821 if (have_action == 0) { 4822 kprintf("ipfw: missing action\n"); 4823 return EINVAL; 4824 } 4825 return 0; 4826 4827 bad_size: 4828 kprintf("ipfw: opcode %d size %d wrong\n", 4829 cmd->opcode, cmdlen); 4830 return EINVAL; 4831 } 4832 4833 static int 4834 ipfw_ctl_add_rule(struct sockopt *sopt) 4835 { 4836 struct ipfw_ioc_rule *ioc_rule; 4837 size_t size; 4838 uint32_t rule_flags; 4839 int error; 4840 4841 ASSERT_NETISR0; 4842 4843 size = sopt->sopt_valsize; 4844 if (size > (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX) || 4845 size < sizeof(*ioc_rule)) { 4846 return EINVAL; 4847 } 4848 if (size != (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX)) { 4849 sopt->sopt_val = krealloc(sopt->sopt_val, sizeof(uint32_t) * 4850 IPFW_RULE_SIZE_MAX, M_TEMP, M_WAITOK); 4851 } 4852 ioc_rule = sopt->sopt_val; 4853 4854 error = ipfw_check_ioc_rule(ioc_rule, size, &rule_flags); 4855 if (error) 4856 return error; 4857 4858 ipfw_add_rule(ioc_rule, rule_flags); 4859 4860 if (sopt->sopt_dir == SOPT_GET) 4861 sopt->sopt_valsize = IOC_RULESIZE(ioc_rule); 4862 return 0; 4863 } 4864 4865 static void * 4866 ipfw_copy_rule(const struct ipfw_context *ctx, const struct ip_fw *rule, 4867 struct ipfw_ioc_rule *ioc_rule) 4868 { 4869 const struct ip_fw *sibling; 4870 #ifdef INVARIANTS 4871 int i; 4872 #endif 4873 4874 ASSERT_NETISR0; 4875 KASSERT(rule->cpuid == 0, ("rule does not belong to cpu0")); 4876 4877 ioc_rule->act_ofs = rule->act_ofs; 4878 ioc_rule->cmd_len = rule->cmd_len; 4879 ioc_rule->rulenum = rule->rulenum; 4880 ioc_rule->set = rule->set; 4881 ioc_rule->usr_flags = rule->usr_flags; 4882 4883 ioc_rule->set_disable = ctx->ipfw_set_disable; 4884 ioc_rule->static_count = static_count; 4885 ioc_rule->static_len = static_ioc_len; 4886 4887 /* 4888 * Visit (read-only) all of the rule's duplications to get 4889 * the necessary statistics 4890 */ 4891 #ifdef INVARIANTS 4892 i = 0; 4893 #endif 4894 ioc_rule->pcnt = 0; 4895 ioc_rule->bcnt = 0; 4896 ioc_rule->timestamp = 0; 4897 for (sibling = rule; sibling != NULL; sibling = sibling->sibling) { 4898 ioc_rule->pcnt += sibling->pcnt; 4899 ioc_rule->bcnt += sibling->bcnt; 4900 if (sibling->timestamp > ioc_rule->timestamp) 4901 ioc_rule->timestamp = sibling->timestamp; 4902 #ifdef INVARIANTS 4903 ++i; 4904 #endif 4905 } 4906 KASSERT(i == netisr_ncpus, 4907 ("static rule is not duplicated on netisr_ncpus %d", netisr_ncpus)); 4908 4909 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */); 4910 4911 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule)); 4912 } 4913 4914 static boolean_t 4915 ipfw_track_copy(const struct ipfw_trkcnt *trk, struct ipfw_ioc_state *ioc_state) 4916 { 4917 struct ipfw_ioc_flowid *ioc_id; 4918 4919 if (trk->tc_expire == 0) { 4920 /* Not a scanned one. */ 4921 return (FALSE); 4922 } 4923 4924 ioc_state->expire = TIME_LEQ(trk->tc_expire, time_uptime) ? 4925 0 : trk->tc_expire - time_uptime; 4926 ioc_state->pcnt = 0; 4927 ioc_state->bcnt = 0; 4928 4929 ioc_state->dyn_type = O_LIMIT_PARENT; 4930 ioc_state->count = trk->tc_count; 4931 4932 ioc_state->rulenum = trk->tc_rulenum; 4933 4934 ioc_id = &ioc_state->id; 4935 ioc_id->type = ETHERTYPE_IP; 4936 ioc_id->u.ip.proto = trk->tc_proto; 4937 ioc_id->u.ip.src_ip = trk->tc_saddr; 4938 ioc_id->u.ip.dst_ip = trk->tc_daddr; 4939 ioc_id->u.ip.src_port = trk->tc_sport; 4940 ioc_id->u.ip.dst_port = trk->tc_dport; 4941 4942 return (TRUE); 4943 } 4944 4945 static boolean_t 4946 ipfw_state_copy(const struct ipfw_state *s, struct ipfw_ioc_state *ioc_state) 4947 { 4948 struct ipfw_ioc_flowid *ioc_id; 4949 4950 if (s->st_type == O_ANCHOR) 4951 return (FALSE); 4952 4953 ioc_state->expire = TIME_LEQ(s->st_expire, time_uptime) ? 4954 0 : s->st_expire - time_uptime; 4955 ioc_state->pcnt = s->st_pcnt; 4956 ioc_state->bcnt = s->st_bcnt; 4957 4958 ioc_state->dyn_type = s->st_type; 4959 ioc_state->count = 0; 4960 4961 ioc_state->rulenum = s->st_rule->rulenum; 4962 4963 ioc_id = &ioc_state->id; 4964 ioc_id->type = ETHERTYPE_IP; 4965 ioc_id->u.ip.proto = s->st_proto; 4966 ipfw_key_4tuple(&s->st_key, 4967 &ioc_id->u.ip.src_ip, &ioc_id->u.ip.src_port, 4968 &ioc_id->u.ip.dst_ip, &ioc_id->u.ip.dst_port); 4969 4970 return (TRUE); 4971 } 4972 4973 static void 4974 ipfw_state_copy_dispatch(netmsg_t nmsg) 4975 { 4976 struct netmsg_cpstate *nm = (struct netmsg_cpstate *)nmsg; 4977 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 4978 const struct ipfw_state *s; 4979 const struct ipfw_track *t; 4980 4981 ASSERT_NETISR_NCPUS(mycpuid); 4982 KASSERT(nm->state_cnt < nm->state_cntmax, 4983 ("invalid state count %d, max %d", 4984 nm->state_cnt, nm->state_cntmax)); 4985 4986 TAILQ_FOREACH(s, &ctx->ipfw_state_list, st_link) { 4987 if (ipfw_state_copy(s, nm->ioc_state)) { 4988 nm->ioc_state++; 4989 nm->state_cnt++; 4990 if (nm->state_cnt == nm->state_cntmax) 4991 goto done; 4992 } 4993 } 4994 4995 /* 4996 * Prepare tracks in the global track tree for userland. 4997 */ 4998 TAILQ_FOREACH(t, &ctx->ipfw_track_list, t_link) { 4999 struct ipfw_trkcnt *trk; 5000 5001 if (t->t_count == NULL) /* anchor */ 5002 continue; 5003 trk = t->t_trkcnt; 5004 5005 /* 5006 * Only one netisr can run this function at 5007 * any time, and only this function accesses 5008 * trkcnt's tc_expire, so this is safe w/o 5009 * ipfw_gd.ipfw_trkcnt_token. 5010 */ 5011 if (trk->tc_expire > t->t_expire) 5012 continue; 5013 trk->tc_expire = t->t_expire; 5014 } 5015 5016 /* 5017 * Copy tracks in the global track tree to userland in 5018 * the last netisr. 5019 */ 5020 if (mycpuid == netisr_ncpus - 1) { 5021 struct ipfw_trkcnt *trk; 5022 5023 KASSERT(nm->state_cnt < nm->state_cntmax, 5024 ("invalid state count %d, max %d", 5025 nm->state_cnt, nm->state_cntmax)); 5026 5027 IPFW_TRKCNT_TOKGET; 5028 RB_FOREACH(trk, ipfw_trkcnt_tree, &ipfw_gd.ipfw_trkcnt_tree) { 5029 if (ipfw_track_copy(trk, nm->ioc_state)) { 5030 nm->ioc_state++; 5031 nm->state_cnt++; 5032 if (nm->state_cnt == nm->state_cntmax) { 5033 IPFW_TRKCNT_TOKREL; 5034 goto done; 5035 } 5036 } 5037 } 5038 IPFW_TRKCNT_TOKREL; 5039 } 5040 done: 5041 if (nm->state_cnt == nm->state_cntmax) { 5042 /* No more space; done. */ 5043 netisr_replymsg(&nm->base, 0); 5044 } else { 5045 netisr_forwardmsg(&nm->base, mycpuid + 1); 5046 } 5047 } 5048 5049 static int 5050 ipfw_ctl_get_rules(struct sockopt *sopt) 5051 { 5052 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5053 struct ip_fw *rule; 5054 void *bp; 5055 size_t size; 5056 int state_cnt; 5057 5058 ASSERT_NETISR0; 5059 5060 /* 5061 * pass up a copy of the current rules. Static rules 5062 * come first (the last of which has number IPFW_DEFAULT_RULE), 5063 * followed by a possibly empty list of states. 5064 */ 5065 5066 size = static_ioc_len; /* size of static rules */ 5067 5068 /* 5069 * Size of the states. 5070 * XXX take tracks as state for userland compat. 5071 */ 5072 state_cnt = ipfw_state_cntcoll() + ipfw_gd.ipfw_trkcnt_cnt; 5073 state_cnt = (state_cnt * 5) / 4; /* leave 25% headroom */ 5074 size += state_cnt * sizeof(struct ipfw_ioc_state); 5075 5076 if (sopt->sopt_valsize < size) { 5077 /* short length, no need to return incomplete rules */ 5078 /* XXX: if superuser, no need to zero buffer */ 5079 bzero(sopt->sopt_val, sopt->sopt_valsize); 5080 return 0; 5081 } 5082 bp = sopt->sopt_val; 5083 5084 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 5085 bp = ipfw_copy_rule(ctx, rule, bp); 5086 5087 if (state_cnt) { 5088 struct netmsg_cpstate nm; 5089 #ifdef INVARIANTS 5090 size_t old_size = size; 5091 #endif 5092 5093 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 5094 MSGF_PRIORITY, ipfw_state_copy_dispatch); 5095 nm.ioc_state = bp; 5096 nm.state_cntmax = state_cnt; 5097 nm.state_cnt = 0; 5098 netisr_domsg_global(&nm.base); 5099 5100 /* 5101 * The # of states may be shrinked after the snapshot 5102 * of the state count was taken. To give user a correct 5103 * state count, nm->state_cnt is used to recalculate 5104 * the actual size. 5105 */ 5106 size = static_ioc_len + 5107 (nm.state_cnt * sizeof(struct ipfw_ioc_state)); 5108 KKASSERT(size <= old_size); 5109 } 5110 5111 sopt->sopt_valsize = size; 5112 return 0; 5113 } 5114 5115 static void 5116 ipfw_set_disable_dispatch(netmsg_t nmsg) 5117 { 5118 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5119 5120 ASSERT_NETISR_NCPUS(mycpuid); 5121 5122 ctx->ipfw_set_disable = nmsg->lmsg.u.ms_result32; 5123 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5124 } 5125 5126 static void 5127 ipfw_ctl_set_disable(uint32_t disable, uint32_t enable) 5128 { 5129 struct netmsg_base nmsg; 5130 uint32_t set_disable; 5131 5132 ASSERT_NETISR0; 5133 5134 /* IPFW_DEFAULT_SET is always enabled */ 5135 enable |= (1 << IPFW_DEFAULT_SET); 5136 set_disable = (ipfw_ctx[mycpuid]->ipfw_set_disable | disable) & ~enable; 5137 5138 bzero(&nmsg, sizeof(nmsg)); 5139 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5140 ipfw_set_disable_dispatch); 5141 nmsg.lmsg.u.ms_result32 = set_disable; 5142 5143 netisr_domsg_global(&nmsg); 5144 } 5145 5146 static void 5147 ipfw_table_create_dispatch(netmsg_t nm) 5148 { 5149 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5150 int tblid = nm->lmsg.u.ms_result; 5151 5152 ASSERT_NETISR_NCPUS(mycpuid); 5153 5154 if (!rn_inithead((void **)&ctx->ipfw_tables[tblid], 5155 rn_cpumaskhead(mycpuid), 32)) 5156 panic("ipfw: create table%d failed", tblid); 5157 5158 netisr_forwardmsg(&nm->base, mycpuid + 1); 5159 } 5160 5161 static int 5162 ipfw_table_create(struct sockopt *sopt) 5163 { 5164 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5165 struct ipfw_ioc_table *tbl; 5166 struct netmsg_base nm; 5167 5168 ASSERT_NETISR0; 5169 5170 if (sopt->sopt_valsize != sizeof(*tbl)) 5171 return (EINVAL); 5172 5173 tbl = sopt->sopt_val; 5174 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 5175 return (EINVAL); 5176 5177 if (ctx->ipfw_tables[tbl->tableid] != NULL) 5178 return (EEXIST); 5179 5180 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5181 ipfw_table_create_dispatch); 5182 nm.lmsg.u.ms_result = tbl->tableid; 5183 netisr_domsg_global(&nm); 5184 5185 return (0); 5186 } 5187 5188 static void 5189 ipfw_table_killrn(struct radix_node_head *rnh, struct radix_node *rn) 5190 { 5191 struct radix_node *ret; 5192 5193 ret = rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); 5194 if (ret != rn) 5195 panic("deleted other table entry"); 5196 kfree(ret, M_IPFW); 5197 } 5198 5199 static int 5200 ipfw_table_killent(struct radix_node *rn, void *xrnh) 5201 { 5202 5203 ipfw_table_killrn(xrnh, rn); 5204 return (0); 5205 } 5206 5207 static void 5208 ipfw_table_flush_oncpu(struct ipfw_context *ctx, int tableid, 5209 int destroy) 5210 { 5211 struct radix_node_head *rnh; 5212 5213 ASSERT_NETISR_NCPUS(mycpuid); 5214 5215 rnh = ctx->ipfw_tables[tableid]; 5216 rnh->rnh_walktree(rnh, ipfw_table_killent, rnh); 5217 if (destroy) { 5218 Free(rnh); 5219 ctx->ipfw_tables[tableid] = NULL; 5220 } 5221 } 5222 5223 static void 5224 ipfw_table_flush_dispatch(netmsg_t nmsg) 5225 { 5226 struct netmsg_tblflush *nm = (struct netmsg_tblflush *)nmsg; 5227 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5228 5229 ASSERT_NETISR_NCPUS(mycpuid); 5230 5231 ipfw_table_flush_oncpu(ctx, nm->tableid, nm->destroy); 5232 netisr_forwardmsg(&nm->base, mycpuid + 1); 5233 } 5234 5235 static void 5236 ipfw_table_flushall_oncpu(struct ipfw_context *ctx, int destroy) 5237 { 5238 int i; 5239 5240 ASSERT_NETISR_NCPUS(mycpuid); 5241 5242 for (i = 0; i < ipfw_table_max; ++i) { 5243 if (ctx->ipfw_tables[i] != NULL) 5244 ipfw_table_flush_oncpu(ctx, i, destroy); 5245 } 5246 } 5247 5248 static void 5249 ipfw_table_flushall_dispatch(netmsg_t nmsg) 5250 { 5251 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5252 5253 ASSERT_NETISR_NCPUS(mycpuid); 5254 5255 ipfw_table_flushall_oncpu(ctx, 0); 5256 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5257 } 5258 5259 static int 5260 ipfw_table_flush(struct sockopt *sopt) 5261 { 5262 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5263 struct ipfw_ioc_table *tbl; 5264 struct netmsg_tblflush nm; 5265 5266 ASSERT_NETISR0; 5267 5268 if (sopt->sopt_valsize != sizeof(*tbl)) 5269 return (EINVAL); 5270 5271 tbl = sopt->sopt_val; 5272 if (sopt->sopt_name == IP_FW_TBL_FLUSH && tbl->tableid < 0) { 5273 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 5274 MSGF_PRIORITY, ipfw_table_flushall_dispatch); 5275 netisr_domsg_global(&nm.base); 5276 return (0); 5277 } 5278 5279 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 5280 return (EINVAL); 5281 5282 if (ctx->ipfw_tables[tbl->tableid] == NULL) 5283 return (ENOENT); 5284 5285 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5286 ipfw_table_flush_dispatch); 5287 nm.tableid = tbl->tableid; 5288 nm.destroy = 0; 5289 if (sopt->sopt_name == IP_FW_TBL_DESTROY) 5290 nm.destroy = 1; 5291 netisr_domsg_global(&nm.base); 5292 5293 return (0); 5294 } 5295 5296 static int 5297 ipfw_table_cntent(struct radix_node *rn __unused, void *xcnt) 5298 { 5299 int *cnt = xcnt; 5300 5301 (*cnt)++; 5302 return (0); 5303 } 5304 5305 static int 5306 ipfw_table_cpent(struct radix_node *rn, void *xcp) 5307 { 5308 struct ipfw_table_cp *cp = xcp; 5309 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 5310 struct ipfw_ioc_tblent *ioc_te; 5311 #ifdef INVARIANTS 5312 int cnt; 5313 #endif 5314 5315 KASSERT(cp->te_idx < cp->te_cnt, ("invalid table cp idx %d, cnt %d", 5316 cp->te_idx, cp->te_cnt)); 5317 ioc_te = &cp->te[cp->te_idx]; 5318 5319 if (te->te_nodes->rn_mask != NULL) { 5320 memcpy(&ioc_te->netmask, te->te_nodes->rn_mask, 5321 *te->te_nodes->rn_mask); 5322 } else { 5323 ioc_te->netmask.sin_len = 0; 5324 } 5325 memcpy(&ioc_te->key, &te->te_key, sizeof(ioc_te->key)); 5326 5327 ioc_te->use = te->te_use; 5328 ioc_te->last_used = te->te_lastuse; 5329 #ifdef INVARIANTS 5330 cnt = 1; 5331 #endif 5332 5333 while ((te = te->te_sibling) != NULL) { 5334 #ifdef INVARIANTS 5335 ++cnt; 5336 #endif 5337 ioc_te->use += te->te_use; 5338 if (te->te_lastuse > ioc_te->last_used) 5339 ioc_te->last_used = te->te_lastuse; 5340 } 5341 KASSERT(cnt == netisr_ncpus, 5342 ("invalid # of tblent %d, should be %d", cnt, netisr_ncpus)); 5343 5344 cp->te_idx++; 5345 5346 return (0); 5347 } 5348 5349 static int 5350 ipfw_table_get(struct sockopt *sopt) 5351 { 5352 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5353 struct radix_node_head *rnh; 5354 struct ipfw_ioc_table *tbl; 5355 struct ipfw_ioc_tblcont *cont; 5356 struct ipfw_table_cp cp; 5357 int cnt = 0, sz; 5358 5359 ASSERT_NETISR0; 5360 5361 if (sopt->sopt_valsize < sizeof(*tbl)) 5362 return (EINVAL); 5363 5364 tbl = sopt->sopt_val; 5365 if (tbl->tableid < 0) { 5366 struct ipfw_ioc_tbllist *list; 5367 int i; 5368 5369 /* 5370 * List available table ids. 5371 */ 5372 for (i = 0; i < ipfw_table_max; ++i) { 5373 if (ctx->ipfw_tables[i] != NULL) 5374 ++cnt; 5375 } 5376 5377 sz = __offsetof(struct ipfw_ioc_tbllist, tables[cnt]); 5378 if (sopt->sopt_valsize < sz) { 5379 bzero(sopt->sopt_val, sopt->sopt_valsize); 5380 return (E2BIG); 5381 } 5382 list = sopt->sopt_val; 5383 list->tablecnt = cnt; 5384 5385 cnt = 0; 5386 for (i = 0; i < ipfw_table_max; ++i) { 5387 if (ctx->ipfw_tables[i] != NULL) { 5388 KASSERT(cnt < list->tablecnt, 5389 ("invalid idx %d, cnt %d", 5390 cnt, list->tablecnt)); 5391 list->tables[cnt++] = i; 5392 } 5393 } 5394 sopt->sopt_valsize = sz; 5395 return (0); 5396 } else if (tbl->tableid >= ipfw_table_max) { 5397 return (EINVAL); 5398 } 5399 5400 rnh = ctx->ipfw_tables[tbl->tableid]; 5401 if (rnh == NULL) 5402 return (ENOENT); 5403 rnh->rnh_walktree(rnh, ipfw_table_cntent, &cnt); 5404 5405 sz = __offsetof(struct ipfw_ioc_tblcont, ent[cnt]); 5406 if (sopt->sopt_valsize < sz) { 5407 bzero(sopt->sopt_val, sopt->sopt_valsize); 5408 return (E2BIG); 5409 } 5410 cont = sopt->sopt_val; 5411 cont->entcnt = cnt; 5412 5413 cp.te = cont->ent; 5414 cp.te_idx = 0; 5415 cp.te_cnt = cnt; 5416 rnh->rnh_walktree(rnh, ipfw_table_cpent, &cp); 5417 5418 sopt->sopt_valsize = sz; 5419 return (0); 5420 } 5421 5422 static void 5423 ipfw_table_add_dispatch(netmsg_t nmsg) 5424 { 5425 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; 5426 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5427 struct radix_node_head *rnh; 5428 struct ipfw_tblent *te; 5429 5430 ASSERT_NETISR_NCPUS(mycpuid); 5431 5432 rnh = ctx->ipfw_tables[nm->tableid]; 5433 5434 te = kmalloc(sizeof(*te), M_IPFW, M_WAITOK | M_ZERO); 5435 te->te_nodes->rn_key = (char *)&te->te_key; 5436 memcpy(&te->te_key, nm->key, sizeof(te->te_key)); 5437 5438 if (rnh->rnh_addaddr((char *)&te->te_key, (char *)nm->netmask, rnh, 5439 te->te_nodes) == NULL) { 5440 if (mycpuid == 0) { 5441 kfree(te, M_IPFW); 5442 netisr_replymsg(&nm->base, EEXIST); 5443 return; 5444 } 5445 panic("rnh_addaddr failed"); 5446 } 5447 5448 /* Link siblings. */ 5449 if (nm->sibling != NULL) 5450 nm->sibling->te_sibling = te; 5451 nm->sibling = te; 5452 5453 netisr_forwardmsg(&nm->base, mycpuid + 1); 5454 } 5455 5456 static void 5457 ipfw_table_del_dispatch(netmsg_t nmsg) 5458 { 5459 struct netmsg_tblent *nm = (struct netmsg_tblent *)nmsg; 5460 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5461 struct radix_node_head *rnh; 5462 struct radix_node *rn; 5463 5464 ASSERT_NETISR_NCPUS(mycpuid); 5465 5466 rnh = ctx->ipfw_tables[nm->tableid]; 5467 rn = rnh->rnh_deladdr((char *)nm->key, (char *)nm->netmask, rnh); 5468 if (rn == NULL) { 5469 if (mycpuid == 0) { 5470 netisr_replymsg(&nm->base, ESRCH); 5471 return; 5472 } 5473 panic("rnh_deladdr failed"); 5474 } 5475 kfree(rn, M_IPFW); 5476 5477 netisr_forwardmsg(&nm->base, mycpuid + 1); 5478 } 5479 5480 static int 5481 ipfw_table_alt(struct sockopt *sopt) 5482 { 5483 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5484 struct ipfw_ioc_tblcont *tbl; 5485 struct ipfw_ioc_tblent *te; 5486 struct sockaddr_in key0; 5487 struct sockaddr *netmask = NULL, *key; 5488 struct netmsg_tblent nm; 5489 5490 ASSERT_NETISR0; 5491 5492 if (sopt->sopt_valsize != sizeof(*tbl)) 5493 return (EINVAL); 5494 tbl = sopt->sopt_val; 5495 5496 if (tbl->tableid < 0 || tbl->tableid >= ipfw_table_max) 5497 return (EINVAL); 5498 if (tbl->entcnt != 1) 5499 return (EINVAL); 5500 5501 if (ctx->ipfw_tables[tbl->tableid] == NULL) 5502 return (ENOENT); 5503 te = &tbl->ent[0]; 5504 5505 if (te->key.sin_family != AF_INET || 5506 te->key.sin_port != 0 || 5507 te->key.sin_len != sizeof(struct sockaddr_in)) 5508 return (EINVAL); 5509 key = (struct sockaddr *)&te->key; 5510 5511 if (te->netmask.sin_len != 0) { 5512 if (te->netmask.sin_port != 0 || 5513 te->netmask.sin_len > sizeof(struct sockaddr_in)) 5514 return (EINVAL); 5515 netmask = (struct sockaddr *)&te->netmask; 5516 sa_maskedcopy(key, (struct sockaddr *)&key0, netmask); 5517 key = (struct sockaddr *)&key0; 5518 } 5519 5520 if (sopt->sopt_name == IP_FW_TBL_ADD) { 5521 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 5522 MSGF_PRIORITY, ipfw_table_add_dispatch); 5523 } else { 5524 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 5525 MSGF_PRIORITY, ipfw_table_del_dispatch); 5526 } 5527 nm.key = key; 5528 nm.netmask = netmask; 5529 nm.tableid = tbl->tableid; 5530 nm.sibling = NULL; 5531 return (netisr_domsg_global(&nm.base)); 5532 } 5533 5534 static int 5535 ipfw_table_zeroent(struct radix_node *rn, void *arg __unused) 5536 { 5537 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 5538 5539 te->te_use = 0; 5540 te->te_lastuse = 0; 5541 return (0); 5542 } 5543 5544 static void 5545 ipfw_table_zero_dispatch(netmsg_t nmsg) 5546 { 5547 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5548 struct radix_node_head *rnh; 5549 5550 ASSERT_NETISR_NCPUS(mycpuid); 5551 5552 rnh = ctx->ipfw_tables[nmsg->lmsg.u.ms_result]; 5553 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); 5554 5555 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5556 } 5557 5558 static void 5559 ipfw_table_zeroall_dispatch(netmsg_t nmsg) 5560 { 5561 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5562 int i; 5563 5564 ASSERT_NETISR_NCPUS(mycpuid); 5565 5566 for (i = 0; i < ipfw_table_max; ++i) { 5567 struct radix_node_head *rnh = ctx->ipfw_tables[i]; 5568 5569 if (rnh != NULL) 5570 rnh->rnh_walktree(rnh, ipfw_table_zeroent, NULL); 5571 } 5572 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 5573 } 5574 5575 static int 5576 ipfw_table_zero(struct sockopt *sopt) 5577 { 5578 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5579 struct netmsg_base nm; 5580 struct ipfw_ioc_table *tbl; 5581 5582 ASSERT_NETISR0; 5583 5584 if (sopt->sopt_valsize != sizeof(*tbl)) 5585 return (EINVAL); 5586 tbl = sopt->sopt_val; 5587 5588 if (tbl->tableid < 0) { 5589 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5590 ipfw_table_zeroall_dispatch); 5591 netisr_domsg_global(&nm); 5592 return (0); 5593 } else if (tbl->tableid >= ipfw_table_max) { 5594 return (EINVAL); 5595 } else if (ctx->ipfw_tables[tbl->tableid] == NULL) { 5596 return (ENOENT); 5597 } 5598 5599 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5600 ipfw_table_zero_dispatch); 5601 nm.lmsg.u.ms_result = tbl->tableid; 5602 netisr_domsg_global(&nm); 5603 5604 return (0); 5605 } 5606 5607 static int 5608 ipfw_table_killexp(struct radix_node *rn, void *xnm) 5609 { 5610 struct netmsg_tblexp *nm = xnm; 5611 struct ipfw_tblent *te = (struct ipfw_tblent *)rn; 5612 5613 if (te->te_expired) { 5614 ipfw_table_killrn(nm->rnh, rn); 5615 nm->expcnt++; 5616 } 5617 return (0); 5618 } 5619 5620 static void 5621 ipfw_table_expire_dispatch(netmsg_t nmsg) 5622 { 5623 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; 5624 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5625 struct radix_node_head *rnh; 5626 5627 ASSERT_NETISR_NCPUS(mycpuid); 5628 5629 rnh = ctx->ipfw_tables[nm->tableid]; 5630 nm->rnh = rnh; 5631 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); 5632 5633 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), 5634 ("not all expired addresses (%d) were deleted (%d)", 5635 nm->cnt * (mycpuid + 1), nm->expcnt)); 5636 5637 netisr_forwardmsg(&nm->base, mycpuid + 1); 5638 } 5639 5640 static void 5641 ipfw_table_expireall_dispatch(netmsg_t nmsg) 5642 { 5643 struct netmsg_tblexp *nm = (struct netmsg_tblexp *)nmsg; 5644 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5645 int i; 5646 5647 ASSERT_NETISR_NCPUS(mycpuid); 5648 5649 for (i = 0; i < ipfw_table_max; ++i) { 5650 struct radix_node_head *rnh = ctx->ipfw_tables[i]; 5651 5652 if (rnh == NULL) 5653 continue; 5654 nm->rnh = rnh; 5655 rnh->rnh_walktree(rnh, ipfw_table_killexp, nm); 5656 } 5657 5658 KASSERT(nm->expcnt == nm->cnt * (mycpuid + 1), 5659 ("not all expired addresses (%d) were deleted (%d)", 5660 nm->cnt * (mycpuid + 1), nm->expcnt)); 5661 5662 netisr_forwardmsg(&nm->base, mycpuid + 1); 5663 } 5664 5665 static int 5666 ipfw_table_markexp(struct radix_node *rn, void *xnm) 5667 { 5668 struct netmsg_tblexp *nm = xnm; 5669 struct ipfw_tblent *te; 5670 time_t lastuse; 5671 5672 te = (struct ipfw_tblent *)rn; 5673 lastuse = te->te_lastuse; 5674 5675 while ((te = te->te_sibling) != NULL) { 5676 if (te->te_lastuse > lastuse) 5677 lastuse = te->te_lastuse; 5678 } 5679 if (!TIME_LEQ(lastuse + nm->expire, time_second)) { 5680 /* Not expired */ 5681 return (0); 5682 } 5683 5684 te = (struct ipfw_tblent *)rn; 5685 te->te_expired = 1; 5686 while ((te = te->te_sibling) != NULL) 5687 te->te_expired = 1; 5688 nm->cnt++; 5689 5690 return (0); 5691 } 5692 5693 static int 5694 ipfw_table_expire(struct sockopt *sopt) 5695 { 5696 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 5697 struct netmsg_tblexp nm; 5698 struct ipfw_ioc_tblexp *tbl; 5699 struct radix_node_head *rnh; 5700 5701 ASSERT_NETISR0; 5702 5703 if (sopt->sopt_valsize != sizeof(*tbl)) 5704 return (EINVAL); 5705 tbl = sopt->sopt_val; 5706 tbl->expcnt = 0; 5707 5708 nm.expcnt = 0; 5709 nm.cnt = 0; 5710 nm.expire = tbl->expire; 5711 5712 if (tbl->tableid < 0) { 5713 int i; 5714 5715 for (i = 0; i < ipfw_table_max; ++i) { 5716 rnh = ctx->ipfw_tables[i]; 5717 if (rnh == NULL) 5718 continue; 5719 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); 5720 } 5721 if (nm.cnt == 0) { 5722 /* No addresses can be expired. */ 5723 return (0); 5724 } 5725 tbl->expcnt = nm.cnt; 5726 5727 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 5728 MSGF_PRIORITY, ipfw_table_expireall_dispatch); 5729 nm.tableid = -1; 5730 netisr_domsg_global(&nm.base); 5731 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, 5732 ("not all expired addresses (%d) were deleted (%d)", 5733 nm.cnt * netisr_ncpus, nm.expcnt)); 5734 5735 return (0); 5736 } else if (tbl->tableid >= ipfw_table_max) { 5737 return (EINVAL); 5738 } 5739 5740 rnh = ctx->ipfw_tables[tbl->tableid]; 5741 if (rnh == NULL) 5742 return (ENOENT); 5743 rnh->rnh_walktree(rnh, ipfw_table_markexp, &nm); 5744 if (nm.cnt == 0) { 5745 /* No addresses can be expired. */ 5746 return (0); 5747 } 5748 tbl->expcnt = nm.cnt; 5749 5750 netmsg_init(&nm.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 5751 ipfw_table_expire_dispatch); 5752 nm.tableid = tbl->tableid; 5753 netisr_domsg_global(&nm.base); 5754 KASSERT(nm.expcnt == nm.cnt * netisr_ncpus, 5755 ("not all expired addresses (%d) were deleted (%d)", 5756 nm.cnt * netisr_ncpus, nm.expcnt)); 5757 return (0); 5758 } 5759 5760 static void 5761 ipfw_crossref_free_dispatch(netmsg_t nmsg) 5762 { 5763 struct ip_fw *rule = nmsg->lmsg.u.ms_resultp; 5764 5765 KKASSERT((rule->rule_flags & 5766 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) == 5767 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)); 5768 ipfw_free_rule(rule); 5769 5770 netisr_replymsg(&nmsg->base, 0); 5771 } 5772 5773 static void 5774 ipfw_crossref_reap(void) 5775 { 5776 struct ip_fw *rule, *prev = NULL; 5777 5778 ASSERT_NETISR0; 5779 5780 rule = ipfw_gd.ipfw_crossref_free; 5781 while (rule != NULL) { 5782 uint64_t inflight = 0; 5783 int i; 5784 5785 for (i = 0; i < netisr_ncpus; ++i) 5786 inflight += rule->cross_rules[i]->cross_refs; 5787 if (inflight == 0) { 5788 struct ip_fw *f = rule; 5789 5790 /* 5791 * Unlink. 5792 */ 5793 rule = rule->next; 5794 if (prev != NULL) 5795 prev->next = rule; 5796 else 5797 ipfw_gd.ipfw_crossref_free = rule; 5798 5799 /* 5800 * Free. 5801 */ 5802 for (i = 1; i < netisr_ncpus; ++i) { 5803 struct netmsg_base nm; 5804 5805 netmsg_init(&nm, NULL, &curthread->td_msgport, 5806 MSGF_PRIORITY, ipfw_crossref_free_dispatch); 5807 nm.lmsg.u.ms_resultp = f->cross_rules[i]; 5808 netisr_domsg(&nm, i); 5809 } 5810 KKASSERT((f->rule_flags & 5811 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)) == 5812 (IPFW_RULE_F_CROSSREF | IPFW_RULE_F_INVALID)); 5813 ipfw_unref_rule(f); 5814 } else { 5815 prev = rule; 5816 rule = rule->next; 5817 } 5818 } 5819 5820 if (ipfw_gd.ipfw_crossref_free != NULL) { 5821 callout_reset(&ipfw_gd.ipfw_crossref_ch, hz, 5822 ipfw_crossref_timeo, NULL); 5823 } 5824 } 5825 5826 /* 5827 * {set|get}sockopt parser. 5828 */ 5829 static int 5830 ipfw_ctl(struct sockopt *sopt) 5831 { 5832 int error, rulenum; 5833 uint32_t *masks; 5834 size_t size; 5835 5836 ASSERT_NETISR0; 5837 5838 error = 0; 5839 5840 switch (sopt->sopt_name) { 5841 case IP_FW_GET: 5842 error = ipfw_ctl_get_rules(sopt); 5843 break; 5844 5845 case IP_FW_FLUSH: 5846 ipfw_flush(0 /* keep default rule */); 5847 break; 5848 5849 case IP_FW_ADD: 5850 error = ipfw_ctl_add_rule(sopt); 5851 break; 5852 5853 case IP_FW_DEL: 5854 /* 5855 * IP_FW_DEL is used for deleting single rules or sets, 5856 * and (ab)used to atomically manipulate sets. 5857 * Argument size is used to distinguish between the two: 5858 * sizeof(uint32_t) 5859 * delete single rule or set of rules, 5860 * or reassign rules (or sets) to a different set. 5861 * 2 * sizeof(uint32_t) 5862 * atomic disable/enable sets. 5863 * first uint32_t contains sets to be disabled, 5864 * second uint32_t contains sets to be enabled. 5865 */ 5866 masks = sopt->sopt_val; 5867 size = sopt->sopt_valsize; 5868 if (size == sizeof(*masks)) { 5869 /* 5870 * Delete or reassign static rule 5871 */ 5872 error = ipfw_ctl_alter(masks[0]); 5873 } else if (size == (2 * sizeof(*masks))) { 5874 /* 5875 * Set enable/disable 5876 */ 5877 ipfw_ctl_set_disable(masks[0], masks[1]); 5878 } else { 5879 error = EINVAL; 5880 } 5881 break; 5882 5883 case IP_FW_ZERO: 5884 case IP_FW_RESETLOG: /* argument is an int, the rule number */ 5885 rulenum = 0; 5886 5887 if (sopt->sopt_val != 0) { 5888 error = soopt_to_kbuf(sopt, &rulenum, 5889 sizeof(int), sizeof(int)); 5890 if (error) 5891 break; 5892 } 5893 error = ipfw_ctl_zero_entry(rulenum, 5894 sopt->sopt_name == IP_FW_RESETLOG); 5895 break; 5896 5897 case IP_FW_TBL_CREATE: 5898 error = ipfw_table_create(sopt); 5899 break; 5900 5901 case IP_FW_TBL_ADD: 5902 case IP_FW_TBL_DEL: 5903 error = ipfw_table_alt(sopt); 5904 break; 5905 5906 case IP_FW_TBL_FLUSH: 5907 case IP_FW_TBL_DESTROY: 5908 error = ipfw_table_flush(sopt); 5909 break; 5910 5911 case IP_FW_TBL_GET: 5912 error = ipfw_table_get(sopt); 5913 break; 5914 5915 case IP_FW_TBL_ZERO: 5916 error = ipfw_table_zero(sopt); 5917 break; 5918 5919 case IP_FW_TBL_EXPIRE: 5920 error = ipfw_table_expire(sopt); 5921 break; 5922 5923 default: 5924 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name); 5925 error = EINVAL; 5926 } 5927 5928 ipfw_crossref_reap(); 5929 return error; 5930 } 5931 5932 static void 5933 ipfw_keepalive_done(struct ipfw_context *ctx) 5934 { 5935 5936 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 5937 ("keepalive is not in progress")); 5938 ctx->ipfw_flags &= ~IPFW_FLAG_KEEPALIVE; 5939 callout_reset(&ctx->ipfw_keepalive_ch, dyn_keepalive_period * hz, 5940 ipfw_keepalive, NULL); 5941 } 5942 5943 static void 5944 ipfw_keepalive_more(struct ipfw_context *ctx) 5945 { 5946 struct netmsg_base *nm = &ctx->ipfw_keepalive_more; 5947 5948 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 5949 ("keepalive is not in progress")); 5950 KASSERT(nm->lmsg.ms_flags & MSGF_DONE, 5951 ("keepalive more did not finish")); 5952 netisr_sendmsg_oncpu(nm); 5953 } 5954 5955 static void 5956 ipfw_keepalive_loop(struct ipfw_context *ctx, struct ipfw_state *anchor) 5957 { 5958 struct ipfw_state *s; 5959 int scanned = 0, expired = 0, kept = 0; 5960 5961 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 5962 ("keepalive is not in progress")); 5963 5964 while ((s = TAILQ_NEXT(anchor, st_link)) != NULL) { 5965 uint32_t ack_rev, ack_fwd; 5966 struct ipfw_flow_id id; 5967 5968 if (scanned++ >= ipfw_state_scan_max) { 5969 ipfw_keepalive_more(ctx); 5970 return; 5971 } 5972 5973 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 5974 TAILQ_INSERT_AFTER(&ctx->ipfw_state_list, s, anchor, st_link); 5975 5976 if (s->st_type == O_ANCHOR) 5977 continue; 5978 5979 if (TIME_LEQ(s->st_expire, time_uptime)) { 5980 /* State expired. */ 5981 ipfw_state_del(ctx, s); 5982 if (++expired >= ipfw_state_expire_max) { 5983 ipfw_keepalive_more(ctx); 5984 return; 5985 } 5986 continue; 5987 } 5988 5989 /* 5990 * Keep alive processing 5991 */ 5992 5993 if (s->st_proto != IPPROTO_TCP) 5994 continue; 5995 if ((s->st_state & IPFW_STATE_TCPSTATES) != BOTH_SYN) 5996 continue; 5997 if (TIME_LEQ(time_uptime + dyn_keepalive_interval, 5998 s->st_expire)) 5999 continue; /* too early */ 6000 6001 ipfw_key_4tuple(&s->st_key, &id.src_ip, &id.src_port, 6002 &id.dst_ip, &id.dst_port); 6003 ack_rev = s->st_ack_rev; 6004 ack_fwd = s->st_ack_fwd; 6005 6006 send_pkt(&id, ack_rev - 1, ack_fwd, TH_SYN); 6007 send_pkt(&id, ack_fwd - 1, ack_rev, 0); 6008 6009 if (++kept >= ipfw_keepalive_max) { 6010 ipfw_keepalive_more(ctx); 6011 return; 6012 } 6013 } 6014 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6015 ipfw_keepalive_done(ctx); 6016 } 6017 6018 static void 6019 ipfw_keepalive_more_dispatch(netmsg_t nm) 6020 { 6021 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6022 struct ipfw_state *anchor; 6023 6024 ASSERT_NETISR_NCPUS(mycpuid); 6025 KASSERT(ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE, 6026 ("keepalive is not in progress")); 6027 6028 /* Reply ASAP */ 6029 netisr_replymsg(&nm->base, 0); 6030 6031 anchor = &ctx->ipfw_keepalive_anch; 6032 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) { 6033 TAILQ_REMOVE(&ctx->ipfw_state_list, anchor, st_link); 6034 ipfw_keepalive_done(ctx); 6035 return; 6036 } 6037 ipfw_keepalive_loop(ctx, anchor); 6038 } 6039 6040 /* 6041 * This procedure is only used to handle keepalives. It is invoked 6042 * every dyn_keepalive_period 6043 */ 6044 static void 6045 ipfw_keepalive_dispatch(netmsg_t nm) 6046 { 6047 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6048 struct ipfw_state *anchor; 6049 6050 ASSERT_NETISR_NCPUS(mycpuid); 6051 KASSERT((ctx->ipfw_flags & IPFW_FLAG_KEEPALIVE) == 0, 6052 ("keepalive is in progress")); 6053 ctx->ipfw_flags |= IPFW_FLAG_KEEPALIVE; 6054 6055 /* Reply ASAP */ 6056 crit_enter(); 6057 netisr_replymsg(&nm->base, 0); 6058 crit_exit(); 6059 6060 if (!dyn_keepalive || ctx->ipfw_state_cnt == 0) { 6061 ipfw_keepalive_done(ctx); 6062 return; 6063 } 6064 6065 anchor = &ctx->ipfw_keepalive_anch; 6066 TAILQ_INSERT_HEAD(&ctx->ipfw_state_list, anchor, st_link); 6067 ipfw_keepalive_loop(ctx, anchor); 6068 } 6069 6070 /* 6071 * This procedure is only used to handle keepalives. It is invoked 6072 * every dyn_keepalive_period 6073 */ 6074 static void 6075 ipfw_keepalive(void *dummy __unused) 6076 { 6077 struct netmsg_base *msg; 6078 6079 KKASSERT(mycpuid < netisr_ncpus); 6080 msg = &ipfw_ctx[mycpuid]->ipfw_keepalive_nm; 6081 6082 crit_enter(); 6083 if (msg->lmsg.ms_flags & MSGF_DONE) 6084 netisr_sendmsg_oncpu(msg); 6085 crit_exit(); 6086 } 6087 6088 static void 6089 ipfw_ip_input_dispatch(netmsg_t nmsg) 6090 { 6091 struct netmsg_genpkt *nm = (struct netmsg_genpkt *)nmsg; 6092 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6093 struct mbuf *m = nm->m; 6094 struct ip_fw *rule = nm->arg1; 6095 6096 ASSERT_NETISR_NCPUS(mycpuid); 6097 KASSERT(rule->cpuid == mycpuid, 6098 ("rule does not belong to cpu%d", mycpuid)); 6099 KASSERT(m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE, 6100 ("mbuf does not have ipfw continue rule")); 6101 6102 KASSERT(ctx->ipfw_cont_rule == NULL, 6103 ("pending ipfw continue rule")); 6104 ctx->ipfw_cont_rule = rule; 6105 ip_input(m); 6106 6107 /* 6108 * This rule is no longer used; decrement its cross_refs, 6109 * so this rule can be deleted. 6110 */ 6111 rule->cross_refs--; 6112 6113 /* May not be cleared, if ipfw was unload/disabled. */ 6114 ctx->ipfw_cont_rule = NULL; 6115 } 6116 6117 static int 6118 ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 6119 { 6120 struct ip_fw_args args; 6121 struct mbuf *m = *m0; 6122 struct m_tag *mtag; 6123 int tee = 0, error = 0, ret, cpuid; 6124 struct netmsg_genpkt *nm; 6125 6126 args.cont = 0; 6127 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 6128 /* Extract info from dummynet tag */ 6129 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 6130 KKASSERT(mtag != NULL); 6131 args.rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 6132 KKASSERT(args.rule != NULL); 6133 6134 m_tag_delete(m, mtag); 6135 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 6136 } else if (m->m_pkthdr.fw_flags & IPFW_MBUF_CONTINUE) { 6137 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6138 6139 KKASSERT(ctx->ipfw_cont_rule != NULL); 6140 args.rule = ctx->ipfw_cont_rule; 6141 ctx->ipfw_cont_rule = NULL; 6142 6143 args.cont = 1; 6144 m->m_pkthdr.fw_flags &= ~IPFW_MBUF_CONTINUE; 6145 } else { 6146 args.rule = NULL; 6147 } 6148 6149 args.eh = NULL; 6150 args.oif = NULL; 6151 args.m = m; 6152 ret = ipfw_chk(&args); 6153 m = args.m; 6154 6155 if (m == NULL) { 6156 error = EACCES; 6157 goto back; 6158 } 6159 6160 switch (ret) { 6161 case IP_FW_PASS: 6162 break; 6163 6164 case IP_FW_DENY: 6165 m_freem(m); 6166 m = NULL; 6167 error = EACCES; 6168 break; 6169 6170 case IP_FW_DUMMYNET: 6171 /* Send packet to the appropriate pipe */ 6172 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_IN, &args); 6173 break; 6174 6175 case IP_FW_TEE: 6176 tee = 1; 6177 /* FALL THROUGH */ 6178 6179 case IP_FW_DIVERT: 6180 /* 6181 * Must clear bridge tag when changing 6182 */ 6183 m->m_pkthdr.fw_flags &= ~BRIDGE_MBUF_TAGGED; 6184 if (ip_divert_p != NULL) { 6185 m = ip_divert_p(m, tee, 1); 6186 } else { 6187 m_freem(m); 6188 m = NULL; 6189 /* not sure this is the right error msg */ 6190 error = EACCES; 6191 } 6192 break; 6193 6194 case IP_FW_CONTINUE: 6195 KASSERT(m->m_flags & M_HASH, ("no hash")); 6196 cpuid = netisr_hashcpu(m->m_pkthdr.hash); 6197 KASSERT(cpuid != mycpuid, 6198 ("continue on the same cpu%d", cpuid)); 6199 6200 /* 6201 * NOTE: 6202 * Bump cross_refs to prevent this rule and its siblings 6203 * from being deleted, while this mbuf is inflight. The 6204 * cross_refs of the sibling rule on the target cpu will 6205 * be decremented, once this mbuf is going to be filtered 6206 * on the target cpu. 6207 */ 6208 args.rule->cross_refs++; 6209 m->m_pkthdr.fw_flags |= IPFW_MBUF_CONTINUE; 6210 6211 nm = &m->m_hdr.mh_genmsg; 6212 netmsg_init(&nm->base, NULL, &netisr_apanic_rport, 0, 6213 ipfw_ip_input_dispatch); 6214 nm->m = m; 6215 nm->arg1 = args.rule->cross_rules[cpuid]; 6216 netisr_sendmsg(&nm->base, cpuid); 6217 6218 /* This mbuf is dispatched; no longer valid. */ 6219 m = NULL; 6220 break; 6221 6222 default: 6223 panic("unknown ipfw return value: %d", ret); 6224 } 6225 back: 6226 *m0 = m; 6227 return error; 6228 } 6229 6230 static int 6231 ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 6232 { 6233 struct ip_fw_args args; 6234 struct mbuf *m = *m0; 6235 struct m_tag *mtag; 6236 int tee = 0, error = 0, ret; 6237 6238 args.cont = 0; 6239 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 6240 /* Extract info from dummynet tag */ 6241 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 6242 KKASSERT(mtag != NULL); 6243 args.rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 6244 KKASSERT(args.rule != NULL); 6245 6246 m_tag_delete(m, mtag); 6247 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 6248 } else { 6249 args.rule = NULL; 6250 } 6251 6252 args.eh = NULL; 6253 args.m = m; 6254 args.oif = ifp; 6255 ret = ipfw_chk(&args); 6256 m = args.m; 6257 6258 if (m == NULL) { 6259 error = EACCES; 6260 goto back; 6261 } 6262 6263 switch (ret) { 6264 case IP_FW_PASS: 6265 break; 6266 6267 case IP_FW_DENY: 6268 m_freem(m); 6269 m = NULL; 6270 error = EACCES; 6271 break; 6272 6273 case IP_FW_DUMMYNET: 6274 m = ipfw_dummynet_io(m, args.cookie, DN_TO_IP_OUT, &args); 6275 break; 6276 6277 case IP_FW_TEE: 6278 tee = 1; 6279 /* FALL THROUGH */ 6280 6281 case IP_FW_DIVERT: 6282 if (ip_divert_p != NULL) { 6283 m = ip_divert_p(m, tee, 0); 6284 } else { 6285 m_freem(m); 6286 m = NULL; 6287 /* not sure this is the right error msg */ 6288 error = EACCES; 6289 } 6290 break; 6291 6292 default: 6293 panic("unknown ipfw return value: %d", ret); 6294 } 6295 back: 6296 *m0 = m; 6297 return error; 6298 } 6299 6300 static void 6301 ipfw_hook(void) 6302 { 6303 struct pfil_head *pfh; 6304 6305 ASSERT_NETISR0; 6306 6307 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 6308 if (pfh == NULL) 6309 return; 6310 6311 pfil_add_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 6312 pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 6313 } 6314 6315 static void 6316 ipfw_dehook(void) 6317 { 6318 struct pfil_head *pfh; 6319 6320 ASSERT_NETISR0; 6321 6322 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 6323 if (pfh == NULL) 6324 return; 6325 6326 pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 6327 pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 6328 } 6329 6330 static int 6331 ipfw_sysctl_dyncnt(SYSCTL_HANDLER_ARGS) 6332 { 6333 int dyn_cnt; 6334 6335 dyn_cnt = ipfw_state_cntcoll(); 6336 dyn_cnt += ipfw_gd.ipfw_trkcnt_cnt; 6337 6338 return (sysctl_handle_int(oidp, &dyn_cnt, 0, req)); 6339 } 6340 6341 static int 6342 ipfw_sysctl_statecnt(SYSCTL_HANDLER_ARGS) 6343 { 6344 int state_cnt; 6345 6346 state_cnt = ipfw_state_cntcoll(); 6347 return (sysctl_handle_int(oidp, &state_cnt, 0, req)); 6348 } 6349 6350 static int 6351 ipfw_sysctl_statemax(SYSCTL_HANDLER_ARGS) 6352 { 6353 int state_max, error; 6354 6355 state_max = ipfw_state_max; 6356 error = sysctl_handle_int(oidp, &state_max, 0, req); 6357 if (error || req->newptr == NULL) 6358 return (error); 6359 6360 if (state_max < 1) 6361 return (EINVAL); 6362 6363 ipfw_state_max_set(state_max); 6364 return (0); 6365 } 6366 6367 static int 6368 ipfw_sysctl_dynmax(SYSCTL_HANDLER_ARGS) 6369 { 6370 int dyn_max, error; 6371 6372 dyn_max = ipfw_state_max + ipfw_track_max; 6373 6374 error = sysctl_handle_int(oidp, &dyn_max, 0, req); 6375 if (error || req->newptr == NULL) 6376 return (error); 6377 6378 if (dyn_max < 2) 6379 return (EINVAL); 6380 6381 ipfw_state_max_set(dyn_max / 2); 6382 ipfw_track_max = dyn_max / 2; 6383 return (0); 6384 } 6385 6386 static void 6387 ipfw_sysctl_enable_dispatch(netmsg_t nmsg) 6388 { 6389 int enable = nmsg->lmsg.u.ms_result; 6390 6391 ASSERT_NETISR0; 6392 6393 if (fw_enable == enable) 6394 goto reply; 6395 6396 fw_enable = enable; 6397 if (fw_enable) 6398 ipfw_hook(); 6399 else 6400 ipfw_dehook(); 6401 reply: 6402 netisr_replymsg(&nmsg->base, 0); 6403 } 6404 6405 static int 6406 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS) 6407 { 6408 struct netmsg_base nmsg; 6409 int enable, error; 6410 6411 enable = fw_enable; 6412 error = sysctl_handle_int(oidp, &enable, 0, req); 6413 if (error || req->newptr == NULL) 6414 return error; 6415 6416 netmsg_init(&nmsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6417 ipfw_sysctl_enable_dispatch); 6418 nmsg.lmsg.u.ms_result = enable; 6419 6420 return netisr_domsg(&nmsg, 0); 6421 } 6422 6423 static int 6424 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS) 6425 { 6426 return sysctl_int_range(oidp, arg1, arg2, req, 6427 IPFW_AUTOINC_STEP_MIN, IPFW_AUTOINC_STEP_MAX); 6428 } 6429 6430 static int 6431 ipfw_sysctl_scancnt(SYSCTL_HANDLER_ARGS) 6432 { 6433 6434 return sysctl_int_range(oidp, arg1, arg2, req, 1, INT_MAX); 6435 } 6436 6437 static int 6438 ipfw_sysctl_stat(SYSCTL_HANDLER_ARGS) 6439 { 6440 u_long stat = 0; 6441 int cpu, error; 6442 6443 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 6444 stat += *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)); 6445 6446 error = sysctl_handle_long(oidp, &stat, 0, req); 6447 if (error || req->newptr == NULL) 6448 return (error); 6449 6450 /* Zero out this stat. */ 6451 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 6452 *((u_long *)((uint8_t *)ipfw_ctx[cpu] + arg2)) = 0; 6453 return (0); 6454 } 6455 6456 static void 6457 ipfw_ctx_init_dispatch(netmsg_t nmsg) 6458 { 6459 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 6460 struct ipfw_context *ctx; 6461 struct ip_fw *def_rule; 6462 6463 ASSERT_NETISR_NCPUS(mycpuid); 6464 6465 ctx = kmalloc(__offsetof(struct ipfw_context, 6466 ipfw_tables[ipfw_table_max]), M_IPFW, M_WAITOK | M_ZERO); 6467 6468 RB_INIT(&ctx->ipfw_state_tree); 6469 TAILQ_INIT(&ctx->ipfw_state_list); 6470 6471 RB_INIT(&ctx->ipfw_track_tree); 6472 TAILQ_INIT(&ctx->ipfw_track_list); 6473 6474 callout_init_mp(&ctx->ipfw_stateto_ch); 6475 netmsg_init(&ctx->ipfw_stateexp_nm, NULL, &netisr_adone_rport, 6476 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_state_expire_dispatch); 6477 ctx->ipfw_stateexp_anch.st_type = O_ANCHOR; 6478 netmsg_init(&ctx->ipfw_stateexp_more, NULL, &netisr_adone_rport, 6479 MSGF_DROPABLE, ipfw_state_expire_more_dispatch); 6480 6481 callout_init_mp(&ctx->ipfw_trackto_ch); 6482 netmsg_init(&ctx->ipfw_trackexp_nm, NULL, &netisr_adone_rport, 6483 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_track_expire_dispatch); 6484 netmsg_init(&ctx->ipfw_trackexp_more, NULL, &netisr_adone_rport, 6485 MSGF_DROPABLE, ipfw_track_expire_more_dispatch); 6486 6487 callout_init_mp(&ctx->ipfw_keepalive_ch); 6488 netmsg_init(&ctx->ipfw_keepalive_nm, NULL, &netisr_adone_rport, 6489 MSGF_DROPABLE | MSGF_PRIORITY, ipfw_keepalive_dispatch); 6490 ctx->ipfw_keepalive_anch.st_type = O_ANCHOR; 6491 netmsg_init(&ctx->ipfw_keepalive_more, NULL, &netisr_adone_rport, 6492 MSGF_DROPABLE, ipfw_keepalive_more_dispatch); 6493 6494 ipfw_ctx[mycpuid] = ctx; 6495 6496 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO); 6497 6498 def_rule->act_ofs = 0; 6499 def_rule->rulenum = IPFW_DEFAULT_RULE; 6500 def_rule->cmd_len = 1; 6501 def_rule->set = IPFW_DEFAULT_SET; 6502 6503 def_rule->cmd[0].len = 1; 6504 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 6505 def_rule->cmd[0].opcode = O_ACCEPT; 6506 #else 6507 if (filters_default_to_accept) 6508 def_rule->cmd[0].opcode = O_ACCEPT; 6509 else 6510 def_rule->cmd[0].opcode = O_DENY; 6511 #endif 6512 6513 def_rule->refcnt = 1; 6514 def_rule->cpuid = mycpuid; 6515 6516 /* Install the default rule */ 6517 ctx->ipfw_default_rule = def_rule; 6518 ctx->ipfw_layer3_chain = def_rule; 6519 6520 /* Link rule CPU sibling */ 6521 ipfw_link_sibling(fwmsg, def_rule); 6522 6523 /* Statistics only need to be updated once */ 6524 if (mycpuid == 0) 6525 ipfw_inc_static_count(def_rule); 6526 6527 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6528 } 6529 6530 static void 6531 ipfw_crossref_reap_dispatch(netmsg_t nmsg) 6532 { 6533 6534 crit_enter(); 6535 /* Reply ASAP */ 6536 netisr_replymsg(&nmsg->base, 0); 6537 crit_exit(); 6538 ipfw_crossref_reap(); 6539 } 6540 6541 static void 6542 ipfw_crossref_timeo(void *dummy __unused) 6543 { 6544 struct netmsg_base *msg = &ipfw_gd.ipfw_crossref_nm; 6545 6546 KKASSERT(mycpuid == 0); 6547 6548 crit_enter(); 6549 if (msg->lmsg.ms_flags & MSGF_DONE) 6550 netisr_sendmsg_oncpu(msg); 6551 crit_exit(); 6552 } 6553 6554 static void 6555 ipfw_ifaddr_dispatch(netmsg_t nmsg) 6556 { 6557 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6558 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 6559 struct ip_fw *f; 6560 6561 ASSERT_NETISR_NCPUS(mycpuid); 6562 6563 for (f = ctx->ipfw_layer3_chain; f != NULL; f = f->next) { 6564 int l, cmdlen; 6565 ipfw_insn *cmd; 6566 6567 if ((f->rule_flags & IPFW_RULE_F_DYNIFADDR) == 0) 6568 continue; 6569 6570 for (l = f->cmd_len, cmd = f->cmd; l > 0; 6571 l -= cmdlen, cmd += cmdlen) { 6572 cmdlen = F_LEN(cmd); 6573 if (cmd->opcode == O_IP_SRC_IFIP || 6574 cmd->opcode == O_IP_DST_IFIP) { 6575 if (strncmp(ifp->if_xname, 6576 ((ipfw_insn_ifip *)cmd)->ifname, 6577 IFNAMSIZ) == 0) 6578 cmd->arg1 &= ~IPFW_IFIP_VALID; 6579 } 6580 } 6581 } 6582 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6583 } 6584 6585 static void 6586 ipfw_ifaddr(void *arg __unused, struct ifnet *ifp, 6587 enum ifaddr_event event __unused, struct ifaddr *ifa __unused) 6588 { 6589 struct netmsg_base nm; 6590 6591 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6592 ipfw_ifaddr_dispatch); 6593 nm.lmsg.u.ms_resultp = ifp; 6594 netisr_domsg_global(&nm); 6595 } 6596 6597 static void 6598 ipfw_init_dispatch(netmsg_t nmsg) 6599 { 6600 struct netmsg_ipfw fwmsg; 6601 int error = 0, cpu; 6602 6603 ASSERT_NETISR0; 6604 6605 if (IPFW_LOADED) { 6606 kprintf("IP firewall already loaded\n"); 6607 error = EEXIST; 6608 goto reply; 6609 } 6610 6611 if (ipfw_table_max > UINT16_MAX || ipfw_table_max <= 0) 6612 ipfw_table_max = UINT16_MAX; 6613 6614 /* Initialize global track tree. */ 6615 RB_INIT(&ipfw_gd.ipfw_trkcnt_tree); 6616 IPFW_TRKCNT_TOKINIT; 6617 6618 /* GC for freed crossref rules. */ 6619 callout_init_mp(&ipfw_gd.ipfw_crossref_ch); 6620 netmsg_init(&ipfw_gd.ipfw_crossref_nm, NULL, &netisr_adone_rport, 6621 MSGF_PRIORITY | MSGF_DROPABLE, ipfw_crossref_reap_dispatch); 6622 6623 ipfw_state_max_set(ipfw_state_max); 6624 ipfw_state_headroom = 8 * netisr_ncpus; 6625 6626 bzero(&fwmsg, sizeof(fwmsg)); 6627 netmsg_init(&fwmsg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6628 ipfw_ctx_init_dispatch); 6629 netisr_domsg_global(&fwmsg.base); 6630 6631 ip_fw_chk_ptr = ipfw_chk; 6632 ip_fw_ctl_ptr = ipfw_ctl; 6633 ip_fw_dn_io_ptr = ipfw_dummynet_io; 6634 6635 kprintf("ipfw2 initialized, default to %s, logging ", 6636 ipfw_ctx[mycpuid]->ipfw_default_rule->cmd[0].opcode == 6637 O_ACCEPT ? "accept" : "deny"); 6638 6639 #ifdef IPFIREWALL_VERBOSE 6640 fw_verbose = 1; 6641 #endif 6642 #ifdef IPFIREWALL_VERBOSE_LIMIT 6643 verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 6644 #endif 6645 if (fw_verbose == 0) { 6646 kprintf("disabled\n"); 6647 } else if (verbose_limit == 0) { 6648 kprintf("unlimited\n"); 6649 } else { 6650 kprintf("limited to %d packets/entry by default\n", 6651 verbose_limit); 6652 } 6653 6654 ip_fw_loaded = 1; 6655 for (cpu = 0; cpu < netisr_ncpus; ++cpu) { 6656 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_stateto_ch, hz, 6657 ipfw_state_expire_ipifunc, NULL, cpu); 6658 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_trackto_ch, hz, 6659 ipfw_track_expire_ipifunc, NULL, cpu); 6660 callout_reset_bycpu(&ipfw_ctx[cpu]->ipfw_keepalive_ch, hz, 6661 ipfw_keepalive, NULL, cpu); 6662 } 6663 6664 if (fw_enable) 6665 ipfw_hook(); 6666 6667 ipfw_ifaddr_event = EVENTHANDLER_REGISTER(ifaddr_event, ipfw_ifaddr, 6668 NULL, EVENTHANDLER_PRI_ANY); 6669 if (ipfw_ifaddr_event == NULL) 6670 kprintf("ipfw: ifaddr_event register failed\n"); 6671 6672 reply: 6673 netisr_replymsg(&nmsg->base, error); 6674 } 6675 6676 static int 6677 ipfw_init(void) 6678 { 6679 struct netmsg_base smsg; 6680 6681 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6682 ipfw_init_dispatch); 6683 return netisr_domsg(&smsg, 0); 6684 } 6685 6686 #ifdef KLD_MODULE 6687 6688 static void 6689 ipfw_ctx_fini_dispatch(netmsg_t nmsg) 6690 { 6691 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 6692 6693 ASSERT_NETISR_NCPUS(mycpuid); 6694 6695 callout_stop_sync(&ctx->ipfw_stateto_ch); 6696 callout_stop_sync(&ctx->ipfw_trackto_ch); 6697 callout_stop_sync(&ctx->ipfw_keepalive_ch); 6698 6699 crit_enter(); 6700 netisr_dropmsg(&ctx->ipfw_stateexp_more); 6701 netisr_dropmsg(&ctx->ipfw_stateexp_nm); 6702 netisr_dropmsg(&ctx->ipfw_trackexp_more); 6703 netisr_dropmsg(&ctx->ipfw_trackexp_nm); 6704 netisr_dropmsg(&ctx->ipfw_keepalive_more); 6705 netisr_dropmsg(&ctx->ipfw_keepalive_nm); 6706 crit_exit(); 6707 6708 ipfw_table_flushall_oncpu(ctx, 1); 6709 6710 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 6711 } 6712 6713 static void 6714 ipfw_fini_dispatch(netmsg_t nmsg) 6715 { 6716 struct netmsg_base nm; 6717 int error = 0, cpu; 6718 6719 ASSERT_NETISR0; 6720 6721 ipfw_crossref_reap(); 6722 6723 if (ipfw_gd.ipfw_refcnt != 0) { 6724 error = EBUSY; 6725 goto reply; 6726 } 6727 6728 ip_fw_loaded = 0; 6729 ipfw_dehook(); 6730 6731 /* Synchronize any inflight state/track expire IPIs. */ 6732 lwkt_synchronize_ipiqs("ipfwfini"); 6733 6734 netmsg_init(&nm, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6735 ipfw_ctx_fini_dispatch); 6736 netisr_domsg_global(&nm); 6737 6738 callout_stop_sync(&ipfw_gd.ipfw_crossref_ch); 6739 crit_enter(); 6740 netisr_dropmsg(&ipfw_gd.ipfw_crossref_nm); 6741 crit_exit(); 6742 6743 if (ipfw_ifaddr_event != NULL) 6744 EVENTHANDLER_DEREGISTER(ifaddr_event, ipfw_ifaddr_event); 6745 6746 ip_fw_chk_ptr = NULL; 6747 ip_fw_ctl_ptr = NULL; 6748 ip_fw_dn_io_ptr = NULL; 6749 ipfw_flush(1 /* kill default rule */); 6750 6751 /* Free pre-cpu context */ 6752 for (cpu = 0; cpu < netisr_ncpus; ++cpu) 6753 kfree(ipfw_ctx[cpu], M_IPFW); 6754 6755 kprintf("IP firewall unloaded\n"); 6756 reply: 6757 netisr_replymsg(&nmsg->base, error); 6758 } 6759 6760 static int 6761 ipfw_fini(void) 6762 { 6763 struct netmsg_base smsg; 6764 6765 netmsg_init(&smsg, NULL, &curthread->td_msgport, MSGF_PRIORITY, 6766 ipfw_fini_dispatch); 6767 return netisr_domsg(&smsg, 0); 6768 } 6769 6770 #endif /* KLD_MODULE */ 6771 6772 static int 6773 ipfw_modevent(module_t mod, int type, void *unused) 6774 { 6775 int err = 0; 6776 6777 switch (type) { 6778 case MOD_LOAD: 6779 err = ipfw_init(); 6780 break; 6781 6782 case MOD_UNLOAD: 6783 #ifndef KLD_MODULE 6784 kprintf("ipfw statically compiled, cannot unload\n"); 6785 err = EBUSY; 6786 #else 6787 err = ipfw_fini(); 6788 #endif 6789 break; 6790 default: 6791 break; 6792 } 6793 return err; 6794 } 6795 6796 static moduledata_t ipfwmod = { 6797 "ipfw", 6798 ipfw_modevent, 6799 0 6800 }; 6801 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY); 6802 MODULE_VERSION(ipfw, 1); 6803