1 /* 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $ 26 * $DragonFly: src/sys/net/ipfw/ip_fw2.c,v 1.100 2008/11/22 11:03:35 sephe Exp $ 27 */ 28 29 /* 30 * Implement IP packet firewall (new version) 31 */ 32 33 #include "opt_ipfw.h" 34 #include "opt_inet.h" 35 #ifndef INET 36 #error IPFIREWALL requires INET. 37 #endif /* INET */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/kernel.h> 44 #include <sys/proc.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/thread2.h> 50 #include <sys/ucred.h> 51 #include <sys/in_cksum.h> 52 #include <sys/lock.h> 53 54 #include <net/if.h> 55 #include <net/route.h> 56 #include <net/netmsg2.h> 57 #include <net/pfil.h> 58 #include <net/dummynet/ip_dummynet.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/in_var.h> 63 #include <netinet/in_pcb.h> 64 #include <netinet/ip.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/ip_icmp.h> 67 #include <netinet/tcp.h> 68 #include <netinet/tcp_timer.h> 69 #include <netinet/tcp_var.h> 70 #include <netinet/tcpip.h> 71 #include <netinet/udp.h> 72 #include <netinet/udp_var.h> 73 #include <netinet/ip_divert.h> 74 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ 75 76 #include <net/ipfw/ip_fw2.h> 77 78 #ifdef IPFIREWALL_DEBUG 79 #define DPRINTF(fmt, ...) \ 80 do { \ 81 if (fw_debug > 0) \ 82 kprintf(fmt, __VA_ARGS__); \ 83 } while (0) 84 #else 85 #define DPRINTF(fmt, ...) ((void)0) 86 #endif 87 88 /* 89 * Description about per-CPU rule duplication: 90 * 91 * Module loading/unloading and all ioctl operations are serialized 92 * by netisr0, so we don't have any ordering or locking problems. 93 * 94 * Following graph shows how operation on per-CPU rule list is 95 * performed [2 CPU case]: 96 * 97 * CPU0 CPU1 98 * 99 * netisr0 <------------------------------------+ 100 * domsg | 101 * | | 102 * | netmsg | 103 * | | 104 * V | 105 * ifnet0 | 106 * : | netmsg 107 * :(delete/add...) | 108 * : | 109 * : netmsg | 110 * forwardmsg---------->ifnet1 | 111 * : | 112 * :(delete/add...) | 113 * : | 114 * : | 115 * replymsg--------------+ 116 * 117 * 118 * 119 * 120 * Rules which will not create states (dyn rules) [2 CPU case] 121 * 122 * CPU0 CPU1 123 * layer3_chain layer3_chain 124 * | | 125 * V V 126 * +-------+ sibling +-------+ sibling 127 * | rule1 |--------->| rule1 |--------->NULL 128 * +-------+ +-------+ 129 * | | 130 * |next |next 131 * V V 132 * +-------+ sibling +-------+ sibling 133 * | rule2 |--------->| rule2 |--------->NULL 134 * +-------+ +-------+ 135 * 136 * ip_fw.sibling: 137 * 1) Ease statistics calculation during IP_FW_GET. We only need to 138 * iterate layer3_chain on CPU0; the current rule's duplication on 139 * the other CPUs could safely be read-only accessed by using 140 * ip_fw.sibling 141 * 2) Accelerate rule insertion and deletion, e.g. rule insertion: 142 * a) In netisr0 (on CPU0) rule3 is determined to be inserted between 143 * rule1 and rule2. To make this decision we need to iterate the 144 * layer3_chain on CPU0. The netmsg, which is used to insert the 145 * rule, will contain rule1 on CPU0 as prev_rule and rule2 on CPU0 146 * as next_rule 147 * b) After the insertion on CPU0 is done, we will move on to CPU1. 148 * But instead of relocating the rule3's position on CPU1 by 149 * iterating the layer3_chain on CPU1, we set the netmsg's prev_rule 150 * to rule1->sibling and next_rule to rule2->sibling before the 151 * netmsg is forwarded to CPU1 from CPU0 152 * 153 * 154 * 155 * Rules which will create states (dyn rules) [2 CPU case] 156 * (unnecessary parts are omitted; they are same as in the previous figure) 157 * 158 * CPU0 CPU1 159 * 160 * +-------+ +-------+ 161 * | rule1 | | rule1 | 162 * +-------+ +-------+ 163 * ^ | | ^ 164 * | |stub stub| | 165 * | | | | 166 * | +----+ +----+ | 167 * | | | | 168 * | V V | 169 * | +--------------------+ | 170 * | | rule_stub | | 171 * | | (read-only shared) | | 172 * | | | | 173 * | | back pointer array | | 174 * | | (indexed by cpuid) | | 175 * | | | | 176 * +----|---------[0] | | 177 * | [1]--------|----+ 178 * | | 179 * +--------------------+ 180 * ^ ^ 181 * | | 182 * ........|............|............ 183 * : | | : 184 * : |stub |stub : 185 * : | | : 186 * : +---------+ +---------+ : 187 * : | state1a | | state1b | .... : 188 * : +---------+ +---------+ : 189 * : : 190 * : states table : 191 * : (shared) : 192 * : (protected by dyn_lock) : 193 * .................................. 194 * 195 * [state1a and state1b are states created by rule1] 196 * 197 * ip_fw_stub: 198 * This structure is introduced so that shared (locked) state table could 199 * work with per-CPU (duplicated) static rules. It mainly bridges states 200 * and static rules and serves as static rule's place holder (a read-only 201 * shared part of duplicated rules) from states point of view. 202 * 203 * IPFW_RULE_F_STATE (only for rules which create states): 204 * o During rule installation, this flag is turned on after rule's 205 * duplications reach all CPUs, to avoid at least following race: 206 * 1) rule1 is duplicated on CPU0 and is not duplicated on CPU1 yet 207 * 2) rule1 creates state1 208 * 3) state1 is located on CPU1 by check-state 209 * But rule1 is not duplicated on CPU1 yet 210 * o During rule deletion, this flag is turned off before deleting states 211 * created by the rule and before deleting the rule itself, so no 212 * more states will be created by the to-be-deleted rule even when its 213 * duplication on certain CPUs are not eliminated yet. 214 */ 215 216 #define IPFW_AUTOINC_STEP_MIN 1 217 #define IPFW_AUTOINC_STEP_MAX 1000 218 #define IPFW_AUTOINC_STEP_DEF 100 219 220 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */ 221 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */ 222 223 struct netmsg_ipfw { 224 struct netmsg nmsg; 225 const struct ipfw_ioc_rule *ioc_rule; 226 struct ip_fw *next_rule; 227 struct ip_fw *prev_rule; 228 struct ip_fw *sibling; 229 struct ip_fw_stub *stub; 230 }; 231 232 struct netmsg_del { 233 struct netmsg nmsg; 234 struct ip_fw *start_rule; 235 struct ip_fw *prev_rule; 236 uint16_t rulenum; 237 uint8_t from_set; 238 uint8_t to_set; 239 }; 240 241 struct netmsg_zent { 242 struct netmsg nmsg; 243 struct ip_fw *start_rule; 244 uint16_t rulenum; 245 uint16_t log_only; 246 }; 247 248 struct ipfw_context { 249 struct ip_fw *ipfw_layer3_chain; /* list of rules for layer3 */ 250 struct ip_fw *ipfw_default_rule; /* default rule */ 251 uint64_t ipfw_norule_counter; /* counter for ipfw_log(NULL) */ 252 253 /* 254 * ipfw_set_disable contains one bit per set value (0..31). 255 * If the bit is set, all rules with the corresponding set 256 * are disabled. Set IPDW_DEFAULT_SET is reserved for the 257 * default rule and CANNOT be disabled. 258 */ 259 uint32_t ipfw_set_disable; 260 uint32_t ipfw_gen; /* generation of rule list */ 261 }; 262 263 static struct ipfw_context *ipfw_ctx[MAXCPU]; 264 265 #ifdef KLD_MODULE 266 /* 267 * Module can not be unloaded, if there are references to 268 * certains rules of ipfw(4), e.g. dummynet(4) 269 */ 270 static int ipfw_refcnt; 271 #endif 272 273 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); 274 275 /* 276 * Following two global variables are accessed and 277 * updated only on CPU0 278 */ 279 static uint32_t static_count; /* # of static rules */ 280 static uint32_t static_ioc_len; /* bytes of static rules */ 281 282 /* 283 * If 1, then ipfw static rules are being flushed, 284 * ipfw_chk() will skip to the default rule. 285 */ 286 static int ipfw_flushing; 287 288 static int fw_verbose; 289 static int verbose_limit; 290 291 static int fw_debug; 292 static int autoinc_step = IPFW_AUTOINC_STEP_DEF; 293 294 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS); 295 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS); 296 static int ipfw_sysctl_dyn_buckets(SYSCTL_HANDLER_ARGS); 297 static int ipfw_sysctl_dyn_fin(SYSCTL_HANDLER_ARGS); 298 static int ipfw_sysctl_dyn_rst(SYSCTL_HANDLER_ARGS); 299 300 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 301 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, 302 &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw"); 303 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW, 304 &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I", 305 "Rule number autincrement step"); 306 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, 307 &fw_one_pass, 0, 308 "Only do a single pass through ipfw when using dummynet(4)"); 309 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 310 &fw_debug, 0, "Enable printing of debug ip_fw statements"); 311 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, 312 &fw_verbose, 0, "Log matches to ipfw rules"); 313 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 314 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); 315 316 /* 317 * Description of dynamic rules. 318 * 319 * Dynamic rules are stored in lists accessed through a hash table 320 * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can 321 * be modified through the sysctl variable dyn_buckets which is 322 * updated when the table becomes empty. 323 * 324 * XXX currently there is only one list, ipfw_dyn. 325 * 326 * When a packet is received, its address fields are first masked 327 * with the mask defined for the rule, then hashed, then matched 328 * against the entries in the corresponding list. 329 * Dynamic rules can be used for different purposes: 330 * + stateful rules; 331 * + enforcing limits on the number of sessions; 332 * + in-kernel NAT (not implemented yet) 333 * 334 * The lifetime of dynamic rules is regulated by dyn_*_lifetime, 335 * measured in seconds and depending on the flags. 336 * 337 * The total number of dynamic rules is stored in dyn_count. 338 * The max number of dynamic rules is dyn_max. When we reach 339 * the maximum number of rules we do not create anymore. This is 340 * done to avoid consuming too much memory, but also too much 341 * time when searching on each packet (ideally, we should try instead 342 * to put a limit on the length of the list on each bucket...). 343 * 344 * Each dynamic rule holds a pointer to the parent ipfw rule so 345 * we know what action to perform. Dynamic rules are removed when 346 * the parent rule is deleted. XXX we should make them survive. 347 * 348 * There are some limitations with dynamic rules -- we do not 349 * obey the 'randomized match', and we do not do multiple 350 * passes through the firewall. XXX check the latter!!! 351 * 352 * NOTE about the SHARED LOCKMGR LOCK during dynamic rule looking up: 353 * Only TCP state transition will change dynamic rule's state and ack 354 * sequences, while all packets of one TCP connection only goes through 355 * one TCP thread, so it is safe to use shared lockmgr lock during dynamic 356 * rule looking up. The keep alive callout uses exclusive lockmgr lock 357 * when it tries to find suitable dynamic rules to send keep alive, so 358 * it will not see half updated state and ack sequences. Though the expire 359 * field updating looks racy for other protocols, the resolution (second) 360 * of expire field makes this kind of race harmless. 361 * XXX statistics' updating is _not_ MPsafe!!! 362 * XXX once UDP output path is fixed, we could use lockless dynamic rule 363 * hash table 364 */ 365 static ipfw_dyn_rule **ipfw_dyn_v = NULL; 366 static uint32_t dyn_buckets = 256; /* must be power of 2 */ 367 static uint32_t curr_dyn_buckets = 256; /* must be power of 2 */ 368 static uint32_t dyn_buckets_gen; /* generation of dyn buckets array */ 369 static struct lock dyn_lock; /* dynamic rules' hash table lock */ 370 371 static struct netmsg ipfw_timeout_netmsg; /* schedule ipfw timeout */ 372 static struct callout ipfw_timeout_h; 373 374 /* 375 * Timeouts for various events in handing dynamic rules. 376 */ 377 static uint32_t dyn_ack_lifetime = 300; 378 static uint32_t dyn_syn_lifetime = 20; 379 static uint32_t dyn_fin_lifetime = 1; 380 static uint32_t dyn_rst_lifetime = 1; 381 static uint32_t dyn_udp_lifetime = 10; 382 static uint32_t dyn_short_lifetime = 5; 383 384 /* 385 * Keepalives are sent if dyn_keepalive is set. They are sent every 386 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 387 * seconds of lifetime of a rule. 388 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 389 * than dyn_keepalive_period. 390 */ 391 392 static uint32_t dyn_keepalive_interval = 20; 393 static uint32_t dyn_keepalive_period = 5; 394 static uint32_t dyn_keepalive = 1; /* do send keepalives */ 395 396 static uint32_t dyn_count; /* # of dynamic rules */ 397 static uint32_t dyn_max = 4096; /* max # of dynamic rules */ 398 399 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLTYPE_INT | CTLFLAG_RW, 400 &dyn_buckets, 0, ipfw_sysctl_dyn_buckets, "I", "Number of dyn. buckets"); 401 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, 402 &curr_dyn_buckets, 0, "Current Number of dyn. buckets"); 403 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, 404 &dyn_count, 0, "Number of dyn. rules"); 405 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, 406 &dyn_max, 0, "Max number of dyn. rules"); 407 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, 408 &static_count, 0, "Number of static rules"); 409 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, 410 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); 411 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, 412 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); 413 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, 414 CTLTYPE_INT | CTLFLAG_RW, &dyn_fin_lifetime, 0, ipfw_sysctl_dyn_fin, "I", 415 "Lifetime of dyn. rules for fin"); 416 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, 417 CTLTYPE_INT | CTLFLAG_RW, &dyn_rst_lifetime, 0, ipfw_sysctl_dyn_rst, "I", 418 "Lifetime of dyn. rules for rst"); 419 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, 420 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); 421 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, 422 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); 423 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, 424 &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); 425 426 static ip_fw_chk_t ipfw_chk; 427 static void ipfw_tick(void *); 428 429 static __inline int 430 ipfw_free_rule(struct ip_fw *rule) 431 { 432 KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d\n", mycpuid)); 433 KASSERT(rule->refcnt > 0, ("invalid refcnt %u\n", rule->refcnt)); 434 rule->refcnt--; 435 if (rule->refcnt == 0) { 436 kfree(rule, M_IPFW); 437 return 1; 438 } 439 return 0; 440 } 441 442 static void 443 ipfw_unref_rule(void *priv) 444 { 445 ipfw_free_rule(priv); 446 #ifdef KLD_MODULE 447 atomic_subtract_int(&ipfw_refcnt, 1); 448 #endif 449 } 450 451 static __inline void 452 ipfw_ref_rule(struct ip_fw *rule) 453 { 454 KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d\n", mycpuid)); 455 #ifdef KLD_MODULE 456 atomic_add_int(&ipfw_refcnt, 1); 457 #endif 458 rule->refcnt++; 459 } 460 461 /* 462 * This macro maps an ip pointer into a layer3 header pointer of type T 463 */ 464 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl)) 465 466 static __inline int 467 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) 468 { 469 int type = L3HDR(struct icmp,ip)->icmp_type; 470 471 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1 << type))); 472 } 473 474 #define TT ((1 << ICMP_ECHO) | \ 475 (1 << ICMP_ROUTERSOLICIT) | \ 476 (1 << ICMP_TSTAMP) | \ 477 (1 << ICMP_IREQ) | \ 478 (1 << ICMP_MASKREQ)) 479 480 static int 481 is_icmp_query(struct ip *ip) 482 { 483 int type = L3HDR(struct icmp, ip)->icmp_type; 484 485 return (type <= ICMP_MAXTYPE && (TT & (1 << type))); 486 } 487 488 #undef TT 489 490 /* 491 * The following checks use two arrays of 8 or 16 bits to store the 492 * bits that we want set or clear, respectively. They are in the 493 * low and high half of cmd->arg1 or cmd->d[0]. 494 * 495 * We scan options and store the bits we find set. We succeed if 496 * 497 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 498 * 499 * The code is sometimes optimized not to store additional variables. 500 */ 501 502 static int 503 flags_match(ipfw_insn *cmd, uint8_t bits) 504 { 505 u_char want_clear; 506 bits = ~bits; 507 508 if (((cmd->arg1 & 0xff) & bits) != 0) 509 return 0; /* some bits we want set were clear */ 510 511 want_clear = (cmd->arg1 >> 8) & 0xff; 512 if ((want_clear & bits) != want_clear) 513 return 0; /* some bits we want clear were set */ 514 return 1; 515 } 516 517 static int 518 ipopts_match(struct ip *ip, ipfw_insn *cmd) 519 { 520 int optlen, bits = 0; 521 u_char *cp = (u_char *)(ip + 1); 522 int x = (ip->ip_hl << 2) - sizeof(struct ip); 523 524 for (; x > 0; x -= optlen, cp += optlen) { 525 int opt = cp[IPOPT_OPTVAL]; 526 527 if (opt == IPOPT_EOL) 528 break; 529 530 if (opt == IPOPT_NOP) { 531 optlen = 1; 532 } else { 533 optlen = cp[IPOPT_OLEN]; 534 if (optlen <= 0 || optlen > x) 535 return 0; /* invalid or truncated */ 536 } 537 538 switch (opt) { 539 case IPOPT_LSRR: 540 bits |= IP_FW_IPOPT_LSRR; 541 break; 542 543 case IPOPT_SSRR: 544 bits |= IP_FW_IPOPT_SSRR; 545 break; 546 547 case IPOPT_RR: 548 bits |= IP_FW_IPOPT_RR; 549 break; 550 551 case IPOPT_TS: 552 bits |= IP_FW_IPOPT_TS; 553 break; 554 555 default: 556 break; 557 } 558 } 559 return (flags_match(cmd, bits)); 560 } 561 562 static int 563 tcpopts_match(struct ip *ip, ipfw_insn *cmd) 564 { 565 int optlen, bits = 0; 566 struct tcphdr *tcp = L3HDR(struct tcphdr,ip); 567 u_char *cp = (u_char *)(tcp + 1); 568 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 569 570 for (; x > 0; x -= optlen, cp += optlen) { 571 int opt = cp[0]; 572 573 if (opt == TCPOPT_EOL) 574 break; 575 576 if (opt == TCPOPT_NOP) { 577 optlen = 1; 578 } else { 579 optlen = cp[1]; 580 if (optlen <= 0) 581 break; 582 } 583 584 switch (opt) { 585 case TCPOPT_MAXSEG: 586 bits |= IP_FW_TCPOPT_MSS; 587 break; 588 589 case TCPOPT_WINDOW: 590 bits |= IP_FW_TCPOPT_WINDOW; 591 break; 592 593 case TCPOPT_SACK_PERMITTED: 594 case TCPOPT_SACK: 595 bits |= IP_FW_TCPOPT_SACK; 596 break; 597 598 case TCPOPT_TIMESTAMP: 599 bits |= IP_FW_TCPOPT_TS; 600 break; 601 602 case TCPOPT_CC: 603 case TCPOPT_CCNEW: 604 case TCPOPT_CCECHO: 605 bits |= IP_FW_TCPOPT_CC; 606 break; 607 608 default: 609 break; 610 } 611 } 612 return (flags_match(cmd, bits)); 613 } 614 615 static int 616 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) 617 { 618 if (ifp == NULL) /* no iface with this packet, match fails */ 619 return 0; 620 621 /* Check by name or by IP address */ 622 if (cmd->name[0] != '\0') { /* match by name */ 623 /* Check name */ 624 if (cmd->p.glob) { 625 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0) 626 return(1); 627 } else { 628 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 629 return(1); 630 } 631 } else { 632 struct ifaddr_container *ifac; 633 634 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 635 struct ifaddr *ia = ifac->ifa; 636 637 if (ia->ifa_addr == NULL) 638 continue; 639 if (ia->ifa_addr->sa_family != AF_INET) 640 continue; 641 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 642 (ia->ifa_addr))->sin_addr.s_addr) 643 return(1); /* match */ 644 } 645 } 646 return(0); /* no match, fail ... */ 647 } 648 649 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 650 651 /* 652 * We enter here when we have a rule with O_LOG. 653 * XXX this function alone takes about 2Kbytes of code! 654 */ 655 static void 656 ipfw_log(struct ip_fw *f, u_int hlen, struct ether_header *eh, 657 struct mbuf *m, struct ifnet *oif) 658 { 659 char *action; 660 int limit_reached = 0; 661 char action2[40], proto[48], fragment[28]; 662 663 fragment[0] = '\0'; 664 proto[0] = '\0'; 665 666 if (f == NULL) { /* bogus pkt */ 667 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 668 669 if (verbose_limit != 0 && 670 ctx->ipfw_norule_counter >= verbose_limit) 671 return; 672 ctx->ipfw_norule_counter++; 673 if (ctx->ipfw_norule_counter == verbose_limit) 674 limit_reached = verbose_limit; 675 action = "Refuse"; 676 } else { /* O_LOG is the first action, find the real one */ 677 ipfw_insn *cmd = ACTION_PTR(f); 678 ipfw_insn_log *l = (ipfw_insn_log *)cmd; 679 680 if (l->max_log != 0 && l->log_left == 0) 681 return; 682 l->log_left--; 683 if (l->log_left == 0) 684 limit_reached = l->max_log; 685 cmd += F_LEN(cmd); /* point to first action */ 686 if (cmd->opcode == O_PROB) 687 cmd += F_LEN(cmd); 688 689 action = action2; 690 switch (cmd->opcode) { 691 case O_DENY: 692 action = "Deny"; 693 break; 694 695 case O_REJECT: 696 if (cmd->arg1==ICMP_REJECT_RST) { 697 action = "Reset"; 698 } else if (cmd->arg1==ICMP_UNREACH_HOST) { 699 action = "Reject"; 700 } else { 701 ksnprintf(SNPARGS(action2, 0), "Unreach %d", 702 cmd->arg1); 703 } 704 break; 705 706 case O_ACCEPT: 707 action = "Accept"; 708 break; 709 710 case O_COUNT: 711 action = "Count"; 712 break; 713 714 case O_DIVERT: 715 ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1); 716 break; 717 718 case O_TEE: 719 ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1); 720 break; 721 722 case O_SKIPTO: 723 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1); 724 break; 725 726 case O_PIPE: 727 ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1); 728 break; 729 730 case O_QUEUE: 731 ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1); 732 break; 733 734 case O_FORWARD_IP: 735 { 736 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; 737 int len; 738 739 len = ksnprintf(SNPARGS(action2, 0), 740 "Forward to %s", 741 inet_ntoa(sa->sa.sin_addr)); 742 if (sa->sa.sin_port) { 743 ksnprintf(SNPARGS(action2, len), ":%d", 744 sa->sa.sin_port); 745 } 746 } 747 break; 748 749 default: 750 action = "UNKNOWN"; 751 break; 752 } 753 } 754 755 if (hlen == 0) { /* non-ip */ 756 ksnprintf(SNPARGS(proto, 0), "MAC"); 757 } else { 758 struct ip *ip = mtod(m, struct ip *); 759 /* these three are all aliases to the same thing */ 760 struct icmp *const icmp = L3HDR(struct icmp, ip); 761 struct tcphdr *const tcp = (struct tcphdr *)icmp; 762 struct udphdr *const udp = (struct udphdr *)icmp; 763 764 int ip_off, offset, ip_len; 765 int len; 766 767 if (eh != NULL) { /* layer 2 packets are as on the wire */ 768 ip_off = ntohs(ip->ip_off); 769 ip_len = ntohs(ip->ip_len); 770 } else { 771 ip_off = ip->ip_off; 772 ip_len = ip->ip_len; 773 } 774 offset = ip_off & IP_OFFMASK; 775 switch (ip->ip_p) { 776 case IPPROTO_TCP: 777 len = ksnprintf(SNPARGS(proto, 0), "TCP %s", 778 inet_ntoa(ip->ip_src)); 779 if (offset == 0) { 780 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 781 ntohs(tcp->th_sport), 782 inet_ntoa(ip->ip_dst), 783 ntohs(tcp->th_dport)); 784 } else { 785 ksnprintf(SNPARGS(proto, len), " %s", 786 inet_ntoa(ip->ip_dst)); 787 } 788 break; 789 790 case IPPROTO_UDP: 791 len = ksnprintf(SNPARGS(proto, 0), "UDP %s", 792 inet_ntoa(ip->ip_src)); 793 if (offset == 0) { 794 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 795 ntohs(udp->uh_sport), 796 inet_ntoa(ip->ip_dst), 797 ntohs(udp->uh_dport)); 798 } else { 799 ksnprintf(SNPARGS(proto, len), " %s", 800 inet_ntoa(ip->ip_dst)); 801 } 802 break; 803 804 case IPPROTO_ICMP: 805 if (offset == 0) { 806 len = ksnprintf(SNPARGS(proto, 0), 807 "ICMP:%u.%u ", 808 icmp->icmp_type, 809 icmp->icmp_code); 810 } else { 811 len = ksnprintf(SNPARGS(proto, 0), "ICMP "); 812 } 813 len += ksnprintf(SNPARGS(proto, len), "%s", 814 inet_ntoa(ip->ip_src)); 815 ksnprintf(SNPARGS(proto, len), " %s", 816 inet_ntoa(ip->ip_dst)); 817 break; 818 819 default: 820 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, 821 inet_ntoa(ip->ip_src)); 822 ksnprintf(SNPARGS(proto, len), " %s", 823 inet_ntoa(ip->ip_dst)); 824 break; 825 } 826 827 if (ip_off & (IP_MF | IP_OFFMASK)) { 828 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", 829 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), 830 offset << 3, (ip_off & IP_MF) ? "+" : ""); 831 } 832 } 833 834 if (oif || m->m_pkthdr.rcvif) { 835 log(LOG_SECURITY | LOG_INFO, 836 "ipfw: %d %s %s %s via %s%s\n", 837 f ? f->rulenum : -1, 838 action, proto, oif ? "out" : "in", 839 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, 840 fragment); 841 } else { 842 log(LOG_SECURITY | LOG_INFO, 843 "ipfw: %d %s %s [no if info]%s\n", 844 f ? f->rulenum : -1, 845 action, proto, fragment); 846 } 847 848 if (limit_reached) { 849 log(LOG_SECURITY | LOG_NOTICE, 850 "ipfw: limit %d reached on entry %d\n", 851 limit_reached, f ? f->rulenum : -1); 852 } 853 } 854 855 #undef SNPARGS 856 857 /* 858 * IMPORTANT: the hash function for dynamic rules must be commutative 859 * in source and destination (ip,port), because rules are bidirectional 860 * and we want to find both in the same bucket. 861 */ 862 static __inline int 863 hash_packet(struct ipfw_flow_id *id) 864 { 865 uint32_t i; 866 867 i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); 868 i &= (curr_dyn_buckets - 1); 869 return i; 870 } 871 872 /** 873 * unlink a dynamic rule from a chain. prev is a pointer to 874 * the previous one, q is a pointer to the rule to delete, 875 * head is a pointer to the head of the queue. 876 * Modifies q and potentially also head. 877 */ 878 #define UNLINK_DYN_RULE(prev, head, q) \ 879 do { \ 880 ipfw_dyn_rule *old_q = q; \ 881 \ 882 /* remove a refcount to the parent */ \ 883 if (q->dyn_type == O_LIMIT) \ 884 q->parent->count--; \ 885 DPRINTF("-- unlink entry 0x%08x %d -> 0x%08x %d, %d left\n", \ 886 q->id.src_ip, q->id.src_port, \ 887 q->id.dst_ip, q->id.dst_port, dyn_count - 1); \ 888 if (prev != NULL) \ 889 prev->next = q = q->next; \ 890 else \ 891 head = q = q->next; \ 892 KASSERT(dyn_count > 0, ("invalid dyn count %u\n", dyn_count)); \ 893 dyn_count--; \ 894 kfree(old_q, M_IPFW); \ 895 } while (0) 896 897 #define TIME_LEQ(a, b) ((int)((a) - (b)) <= 0) 898 899 /** 900 * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. 901 * 902 * If keep_me == NULL, rules are deleted even if not expired, 903 * otherwise only expired rules are removed. 904 * 905 * The value of the second parameter is also used to point to identify 906 * a rule we absolutely do not want to remove (e.g. because we are 907 * holding a reference to it -- this is the case with O_LIMIT_PARENT 908 * rules). The pointer is only used for comparison, so any non-null 909 * value will do. 910 */ 911 static void 912 remove_dyn_rule_locked(struct ip_fw *rule, ipfw_dyn_rule *keep_me) 913 { 914 static uint32_t last_remove = 0; /* XXX */ 915 916 #define FORCE (keep_me == NULL) 917 918 ipfw_dyn_rule *prev, *q; 919 int i, pass = 0, max_pass = 0, unlinked = 0; 920 921 if (ipfw_dyn_v == NULL || dyn_count == 0) 922 return; 923 /* do not expire more than once per second, it is useless */ 924 if (!FORCE && last_remove == time_second) 925 return; 926 last_remove = time_second; 927 928 /* 929 * because O_LIMIT refer to parent rules, during the first pass only 930 * remove child and mark any pending LIMIT_PARENT, and remove 931 * them in a second pass. 932 */ 933 next_pass: 934 for (i = 0; i < curr_dyn_buckets; i++) { 935 for (prev = NULL, q = ipfw_dyn_v[i]; q;) { 936 /* 937 * Logic can become complex here, so we split tests. 938 */ 939 if (q == keep_me) 940 goto next; 941 if (rule != NULL && rule->stub != q->stub) 942 goto next; /* not the one we are looking for */ 943 if (q->dyn_type == O_LIMIT_PARENT) { 944 /* 945 * handle parent in the second pass, 946 * record we need one. 947 */ 948 max_pass = 1; 949 if (pass == 0) 950 goto next; 951 if (FORCE && q->count != 0) { 952 /* XXX should not happen! */ 953 kprintf("OUCH! cannot remove rule, " 954 "count %d\n", q->count); 955 } 956 } else { 957 if (!FORCE && !TIME_LEQ(q->expire, time_second)) 958 goto next; 959 } 960 unlinked = 1; 961 UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); 962 continue; 963 next: 964 prev = q; 965 q = q->next; 966 } 967 } 968 if (pass++ < max_pass) 969 goto next_pass; 970 971 if (unlinked) 972 ++dyn_buckets_gen; 973 974 #undef FORCE 975 } 976 977 /** 978 * lookup a dynamic rule. 979 */ 980 static ipfw_dyn_rule * 981 lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, 982 struct tcphdr *tcp) 983 { 984 /* 985 * stateful ipfw extensions. 986 * Lookup into dynamic session queue 987 */ 988 #define MATCH_REVERSE 0 989 #define MATCH_FORWARD 1 990 #define MATCH_NONE 2 991 #define MATCH_UNKNOWN 3 992 int i, dir = MATCH_NONE; 993 ipfw_dyn_rule *prev, *q=NULL; 994 995 if (ipfw_dyn_v == NULL) 996 goto done; /* not found */ 997 998 i = hash_packet(pkt); 999 for (prev = NULL, q = ipfw_dyn_v[i]; q != NULL;) { 1000 if (q->dyn_type == O_LIMIT_PARENT) 1001 goto next; 1002 1003 if (TIME_LEQ(q->expire, time_second)) { 1004 /* 1005 * Entry expired; skip. 1006 * Let ipfw_tick() take care of it 1007 */ 1008 goto next; 1009 } 1010 1011 if (pkt->proto == q->id.proto) { 1012 if (pkt->src_ip == q->id.src_ip && 1013 pkt->dst_ip == q->id.dst_ip && 1014 pkt->src_port == q->id.src_port && 1015 pkt->dst_port == q->id.dst_port) { 1016 dir = MATCH_FORWARD; 1017 break; 1018 } 1019 if (pkt->src_ip == q->id.dst_ip && 1020 pkt->dst_ip == q->id.src_ip && 1021 pkt->src_port == q->id.dst_port && 1022 pkt->dst_port == q->id.src_port) { 1023 dir = MATCH_REVERSE; 1024 break; 1025 } 1026 } 1027 next: 1028 prev = q; 1029 q = q->next; 1030 } 1031 if (q == NULL) 1032 goto done; /* q = NULL, not found */ 1033 1034 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 1035 u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); 1036 1037 #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 1038 #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 1039 1040 q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); 1041 switch (q->state) { 1042 case TH_SYN: /* opening */ 1043 q->expire = time_second + dyn_syn_lifetime; 1044 break; 1045 1046 case BOTH_SYN: /* move to established */ 1047 case BOTH_SYN | TH_FIN : /* one side tries to close */ 1048 case BOTH_SYN | (TH_FIN << 8) : 1049 if (tcp) { 1050 uint32_t ack = ntohl(tcp->th_ack); 1051 1052 #define _SEQ_GE(a, b) ((int)(a) - (int)(b) >= 0) 1053 1054 if (dir == MATCH_FORWARD) { 1055 if (q->ack_fwd == 0 || 1056 _SEQ_GE(ack, q->ack_fwd)) 1057 q->ack_fwd = ack; 1058 else /* ignore out-of-sequence */ 1059 break; 1060 } else { 1061 if (q->ack_rev == 0 || 1062 _SEQ_GE(ack, q->ack_rev)) 1063 q->ack_rev = ack; 1064 else /* ignore out-of-sequence */ 1065 break; 1066 } 1067 #undef _SEQ_GE 1068 } 1069 q->expire = time_second + dyn_ack_lifetime; 1070 break; 1071 1072 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 1073 KKASSERT(dyn_fin_lifetime < dyn_keepalive_period); 1074 q->expire = time_second + dyn_fin_lifetime; 1075 break; 1076 1077 default: 1078 #if 0 1079 /* 1080 * reset or some invalid combination, but can also 1081 * occur if we use keep-state the wrong way. 1082 */ 1083 if ((q->state & ((TH_RST << 8) | TH_RST)) == 0) 1084 kprintf("invalid state: 0x%x\n", q->state); 1085 #endif 1086 KKASSERT(dyn_rst_lifetime < dyn_keepalive_period); 1087 q->expire = time_second + dyn_rst_lifetime; 1088 break; 1089 } 1090 } else if (pkt->proto == IPPROTO_UDP) { 1091 q->expire = time_second + dyn_udp_lifetime; 1092 } else { 1093 /* other protocols */ 1094 q->expire = time_second + dyn_short_lifetime; 1095 } 1096 done: 1097 if (match_direction) 1098 *match_direction = dir; 1099 return q; 1100 } 1101 1102 static struct ip_fw * 1103 lookup_rule(struct ipfw_flow_id *pkt, int *match_direction, struct tcphdr *tcp, 1104 uint16_t len, int *deny) 1105 { 1106 struct ip_fw *rule = NULL; 1107 ipfw_dyn_rule *q; 1108 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1109 uint32_t gen; 1110 1111 *deny = 0; 1112 gen = ctx->ipfw_gen; 1113 1114 lockmgr(&dyn_lock, LK_SHARED); 1115 1116 if (ctx->ipfw_gen != gen) { 1117 /* 1118 * Static rules had been change when we were waiting 1119 * for the dynamic hash table lock; deny this packet, 1120 * since it is _not_ known whether it is safe to keep 1121 * iterating the static rules. 1122 */ 1123 *deny = 1; 1124 goto back; 1125 } 1126 1127 q = lookup_dyn_rule(pkt, match_direction, tcp); 1128 if (q == NULL) { 1129 rule = NULL; 1130 } else { 1131 rule = q->stub->rule[mycpuid]; 1132 KKASSERT(rule->stub == q->stub && rule->cpuid == mycpuid); 1133 1134 /* XXX */ 1135 q->pcnt++; 1136 q->bcnt += len; 1137 } 1138 back: 1139 lockmgr(&dyn_lock, LK_RELEASE); 1140 return rule; 1141 } 1142 1143 static void 1144 realloc_dynamic_table(void) 1145 { 1146 ipfw_dyn_rule **old_dyn_v; 1147 uint32_t old_curr_dyn_buckets; 1148 1149 KASSERT(dyn_buckets <= 65536 && (dyn_buckets & (dyn_buckets - 1)) == 0, 1150 ("invalid dyn_buckets %d\n", dyn_buckets)); 1151 1152 /* Save the current buckets array for later error recovery */ 1153 old_dyn_v = ipfw_dyn_v; 1154 old_curr_dyn_buckets = curr_dyn_buckets; 1155 1156 curr_dyn_buckets = dyn_buckets; 1157 for (;;) { 1158 ipfw_dyn_v = kmalloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *), 1159 M_IPFW, M_NOWAIT | M_ZERO); 1160 if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2) 1161 break; 1162 1163 curr_dyn_buckets /= 2; 1164 if (curr_dyn_buckets <= old_curr_dyn_buckets && 1165 old_dyn_v != NULL) { 1166 /* 1167 * Don't try allocating smaller buckets array, reuse 1168 * the old one, which alreay contains enough buckets 1169 */ 1170 break; 1171 } 1172 } 1173 1174 if (ipfw_dyn_v != NULL) { 1175 if (old_dyn_v != NULL) 1176 kfree(old_dyn_v, M_IPFW); 1177 } else { 1178 /* Allocation failed, restore old buckets array */ 1179 ipfw_dyn_v = old_dyn_v; 1180 curr_dyn_buckets = old_curr_dyn_buckets; 1181 } 1182 1183 if (ipfw_dyn_v != NULL) 1184 ++dyn_buckets_gen; 1185 } 1186 1187 /** 1188 * Install state of type 'type' for a dynamic session. 1189 * The hash table contains two type of rules: 1190 * - regular rules (O_KEEP_STATE) 1191 * - rules for sessions with limited number of sess per user 1192 * (O_LIMIT). When they are created, the parent is 1193 * increased by 1, and decreased on delete. In this case, 1194 * the third parameter is the parent rule and not the chain. 1195 * - "parent" rules for the above (O_LIMIT_PARENT). 1196 */ 1197 static ipfw_dyn_rule * 1198 add_dyn_rule(struct ipfw_flow_id *id, uint8_t dyn_type, struct ip_fw *rule) 1199 { 1200 ipfw_dyn_rule *r; 1201 int i; 1202 1203 if (ipfw_dyn_v == NULL || 1204 (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) { 1205 realloc_dynamic_table(); 1206 if (ipfw_dyn_v == NULL) 1207 return NULL; /* failed ! */ 1208 } 1209 i = hash_packet(id); 1210 1211 r = kmalloc(sizeof(*r), M_IPFW, M_NOWAIT | M_ZERO); 1212 if (r == NULL) { 1213 kprintf ("sorry cannot allocate state\n"); 1214 return NULL; 1215 } 1216 1217 /* increase refcount on parent, and set pointer */ 1218 if (dyn_type == O_LIMIT) { 1219 ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; 1220 1221 if (parent->dyn_type != O_LIMIT_PARENT) 1222 panic("invalid parent"); 1223 parent->count++; 1224 r->parent = parent; 1225 rule = parent->stub->rule[mycpuid]; 1226 KKASSERT(rule->stub == parent->stub); 1227 } 1228 KKASSERT(rule->cpuid == mycpuid && rule->stub != NULL); 1229 1230 r->id = *id; 1231 r->expire = time_second + dyn_syn_lifetime; 1232 r->stub = rule->stub; 1233 r->dyn_type = dyn_type; 1234 r->pcnt = r->bcnt = 0; 1235 r->count = 0; 1236 1237 r->bucket = i; 1238 r->next = ipfw_dyn_v[i]; 1239 ipfw_dyn_v[i] = r; 1240 dyn_count++; 1241 dyn_buckets_gen++; 1242 DPRINTF("-- add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n", 1243 dyn_type, 1244 r->id.src_ip, r->id.src_port, 1245 r->id.dst_ip, r->id.dst_port, dyn_count); 1246 return r; 1247 } 1248 1249 /** 1250 * lookup dynamic parent rule using pkt and rule as search keys. 1251 * If the lookup fails, then install one. 1252 */ 1253 static ipfw_dyn_rule * 1254 lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) 1255 { 1256 ipfw_dyn_rule *q; 1257 int i; 1258 1259 if (ipfw_dyn_v) { 1260 i = hash_packet(pkt); 1261 for (q = ipfw_dyn_v[i]; q != NULL; q = q->next) { 1262 if (q->dyn_type == O_LIMIT_PARENT && 1263 rule->stub == q->stub && 1264 pkt->proto == q->id.proto && 1265 pkt->src_ip == q->id.src_ip && 1266 pkt->dst_ip == q->id.dst_ip && 1267 pkt->src_port == q->id.src_port && 1268 pkt->dst_port == q->id.dst_port) { 1269 q->expire = time_second + dyn_short_lifetime; 1270 DPRINTF("lookup_dyn_parent found 0x%p\n", q); 1271 return q; 1272 } 1273 } 1274 } 1275 return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); 1276 } 1277 1278 /** 1279 * Install dynamic state for rule type cmd->o.opcode 1280 * 1281 * Returns 1 (failure) if state is not installed because of errors or because 1282 * session limitations are enforced. 1283 */ 1284 static int 1285 install_state_locked(struct ip_fw *rule, ipfw_insn_limit *cmd, 1286 struct ip_fw_args *args) 1287 { 1288 static int last_log; /* XXX */ 1289 1290 ipfw_dyn_rule *q; 1291 1292 DPRINTF("-- install state type %d 0x%08x %u -> 0x%08x %u\n", 1293 cmd->o.opcode, 1294 args->f_id.src_ip, args->f_id.src_port, 1295 args->f_id.dst_ip, args->f_id.dst_port); 1296 1297 q = lookup_dyn_rule(&args->f_id, NULL, NULL); 1298 if (q != NULL) { /* should never occur */ 1299 if (last_log != time_second) { 1300 last_log = time_second; 1301 kprintf(" install_state: entry already present, done\n"); 1302 } 1303 return 0; 1304 } 1305 1306 if (dyn_count >= dyn_max) { 1307 /* 1308 * Run out of slots, try to remove any expired rule. 1309 */ 1310 remove_dyn_rule_locked(NULL, (ipfw_dyn_rule *)1); 1311 if (dyn_count >= dyn_max) { 1312 if (last_log != time_second) { 1313 last_log = time_second; 1314 kprintf("install_state: " 1315 "Too many dynamic rules\n"); 1316 } 1317 return 1; /* cannot install, notify caller */ 1318 } 1319 } 1320 1321 switch (cmd->o.opcode) { 1322 case O_KEEP_STATE: /* bidir rule */ 1323 if (add_dyn_rule(&args->f_id, O_KEEP_STATE, rule) == NULL) 1324 return 1; 1325 break; 1326 1327 case O_LIMIT: /* limit number of sessions */ 1328 { 1329 uint16_t limit_mask = cmd->limit_mask; 1330 struct ipfw_flow_id id; 1331 ipfw_dyn_rule *parent; 1332 1333 DPRINTF("installing dyn-limit rule %d\n", 1334 cmd->conn_limit); 1335 1336 id.dst_ip = id.src_ip = 0; 1337 id.dst_port = id.src_port = 0; 1338 id.proto = args->f_id.proto; 1339 1340 if (limit_mask & DYN_SRC_ADDR) 1341 id.src_ip = args->f_id.src_ip; 1342 if (limit_mask & DYN_DST_ADDR) 1343 id.dst_ip = args->f_id.dst_ip; 1344 if (limit_mask & DYN_SRC_PORT) 1345 id.src_port = args->f_id.src_port; 1346 if (limit_mask & DYN_DST_PORT) 1347 id.dst_port = args->f_id.dst_port; 1348 1349 parent = lookup_dyn_parent(&id, rule); 1350 if (parent == NULL) { 1351 kprintf("add parent failed\n"); 1352 return 1; 1353 } 1354 1355 if (parent->count >= cmd->conn_limit) { 1356 /* 1357 * See if we can remove some expired rule. 1358 */ 1359 remove_dyn_rule_locked(rule, parent); 1360 if (parent->count >= cmd->conn_limit) { 1361 if (fw_verbose && 1362 last_log != time_second) { 1363 last_log = time_second; 1364 log(LOG_SECURITY | LOG_DEBUG, 1365 "drop session, " 1366 "too many entries\n"); 1367 } 1368 return 1; 1369 } 1370 } 1371 if (add_dyn_rule(&args->f_id, O_LIMIT, 1372 (struct ip_fw *)parent) == NULL) 1373 return 1; 1374 } 1375 break; 1376 default: 1377 kprintf("unknown dynamic rule type %u\n", cmd->o.opcode); 1378 return 1; 1379 } 1380 lookup_dyn_rule(&args->f_id, NULL, NULL); /* XXX just set lifetime */ 1381 return 0; 1382 } 1383 1384 static int 1385 install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, 1386 struct ip_fw_args *args, int *deny) 1387 { 1388 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1389 uint32_t gen; 1390 int ret = 0; 1391 1392 *deny = 0; 1393 gen = ctx->ipfw_gen; 1394 1395 lockmgr(&dyn_lock, LK_EXCLUSIVE); 1396 if (ctx->ipfw_gen != gen) { 1397 /* See the comment in lookup_rule() */ 1398 *deny = 1; 1399 } else { 1400 ret = install_state_locked(rule, cmd, args); 1401 } 1402 lockmgr(&dyn_lock, LK_RELEASE); 1403 1404 return ret; 1405 } 1406 1407 /* 1408 * Transmit a TCP packet, containing either a RST or a keepalive. 1409 * When flags & TH_RST, we are sending a RST packet, because of a 1410 * "reset" action matched the packet. 1411 * Otherwise we are sending a keepalive, and flags & TH_ 1412 */ 1413 static void 1414 send_pkt(struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags) 1415 { 1416 struct mbuf *m; 1417 struct ip *ip; 1418 struct tcphdr *tcp; 1419 struct route sro; /* fake route */ 1420 1421 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 1422 if (m == NULL) 1423 return; 1424 m->m_pkthdr.rcvif = NULL; 1425 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); 1426 m->m_data += max_linkhdr; 1427 1428 ip = mtod(m, struct ip *); 1429 bzero(ip, m->m_len); 1430 tcp = (struct tcphdr *)(ip + 1); /* no IP options */ 1431 ip->ip_p = IPPROTO_TCP; 1432 tcp->th_off = 5; 1433 1434 /* 1435 * Assume we are sending a RST (or a keepalive in the reverse 1436 * direction), swap src and destination addresses and ports. 1437 */ 1438 ip->ip_src.s_addr = htonl(id->dst_ip); 1439 ip->ip_dst.s_addr = htonl(id->src_ip); 1440 tcp->th_sport = htons(id->dst_port); 1441 tcp->th_dport = htons(id->src_port); 1442 if (flags & TH_RST) { /* we are sending a RST */ 1443 if (flags & TH_ACK) { 1444 tcp->th_seq = htonl(ack); 1445 tcp->th_ack = htonl(0); 1446 tcp->th_flags = TH_RST; 1447 } else { 1448 if (flags & TH_SYN) 1449 seq++; 1450 tcp->th_seq = htonl(0); 1451 tcp->th_ack = htonl(seq); 1452 tcp->th_flags = TH_RST | TH_ACK; 1453 } 1454 } else { 1455 /* 1456 * We are sending a keepalive. flags & TH_SYN determines 1457 * the direction, forward if set, reverse if clear. 1458 * NOTE: seq and ack are always assumed to be correct 1459 * as set by the caller. This may be confusing... 1460 */ 1461 if (flags & TH_SYN) { 1462 /* 1463 * we have to rewrite the correct addresses! 1464 */ 1465 ip->ip_dst.s_addr = htonl(id->dst_ip); 1466 ip->ip_src.s_addr = htonl(id->src_ip); 1467 tcp->th_dport = htons(id->dst_port); 1468 tcp->th_sport = htons(id->src_port); 1469 } 1470 tcp->th_seq = htonl(seq); 1471 tcp->th_ack = htonl(ack); 1472 tcp->th_flags = TH_ACK; 1473 } 1474 1475 /* 1476 * set ip_len to the payload size so we can compute 1477 * the tcp checksum on the pseudoheader 1478 * XXX check this, could save a couple of words ? 1479 */ 1480 ip->ip_len = htons(sizeof(struct tcphdr)); 1481 tcp->th_sum = in_cksum(m, m->m_pkthdr.len); 1482 1483 /* 1484 * now fill fields left out earlier 1485 */ 1486 ip->ip_ttl = ip_defttl; 1487 ip->ip_len = m->m_pkthdr.len; 1488 1489 bzero(&sro, sizeof(sro)); 1490 ip_rtaddr(ip->ip_dst, &sro); 1491 1492 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED; 1493 ip_output(m, NULL, &sro, 0, NULL, NULL); 1494 if (sro.ro_rt) 1495 RTFREE(sro.ro_rt); 1496 } 1497 1498 /* 1499 * sends a reject message, consuming the mbuf passed as an argument. 1500 */ 1501 static void 1502 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) 1503 { 1504 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 1505 /* We need the IP header in host order for icmp_error(). */ 1506 if (args->eh != NULL) { 1507 struct ip *ip = mtod(args->m, struct ip *); 1508 1509 ip->ip_len = ntohs(ip->ip_len); 1510 ip->ip_off = ntohs(ip->ip_off); 1511 } 1512 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 1513 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { 1514 struct tcphdr *const tcp = 1515 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 1516 1517 if ((tcp->th_flags & TH_RST) == 0) { 1518 send_pkt(&args->f_id, ntohl(tcp->th_seq), 1519 ntohl(tcp->th_ack), tcp->th_flags | TH_RST); 1520 } 1521 m_freem(args->m); 1522 } else { 1523 m_freem(args->m); 1524 } 1525 args->m = NULL; 1526 } 1527 1528 /** 1529 * 1530 * Given an ip_fw *, lookup_next_rule will return a pointer 1531 * to the next rule, which can be either the jump 1532 * target (for skipto instructions) or the next one in the list (in 1533 * all other cases including a missing jump target). 1534 * The result is also written in the "next_rule" field of the rule. 1535 * Backward jumps are not allowed, so start looking from the next 1536 * rule... 1537 * 1538 * This never returns NULL -- in case we do not have an exact match, 1539 * the next rule is returned. When the ruleset is changed, 1540 * pointers are flushed so we are always correct. 1541 */ 1542 1543 static struct ip_fw * 1544 lookup_next_rule(struct ip_fw *me) 1545 { 1546 struct ip_fw *rule = NULL; 1547 ipfw_insn *cmd; 1548 1549 /* look for action, in case it is a skipto */ 1550 cmd = ACTION_PTR(me); 1551 if (cmd->opcode == O_LOG) 1552 cmd += F_LEN(cmd); 1553 if (cmd->opcode == O_SKIPTO) { 1554 for (rule = me->next; rule; rule = rule->next) { 1555 if (rule->rulenum >= cmd->arg1) 1556 break; 1557 } 1558 } 1559 if (rule == NULL) /* failure or not a skipto */ 1560 rule = me->next; 1561 me->next_rule = rule; 1562 return rule; 1563 } 1564 1565 static int 1566 _ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, 1567 enum ipfw_opcodes opcode, uid_t uid) 1568 { 1569 struct in_addr src_ip, dst_ip; 1570 struct inpcbinfo *pi; 1571 int wildcard; 1572 struct inpcb *pcb; 1573 1574 if (fid->proto == IPPROTO_TCP) { 1575 wildcard = 0; 1576 pi = &tcbinfo[mycpuid]; 1577 } else if (fid->proto == IPPROTO_UDP) { 1578 wildcard = 1; 1579 pi = &udbinfo; 1580 } else { 1581 return 0; 1582 } 1583 1584 /* 1585 * Values in 'fid' are in host byte order 1586 */ 1587 dst_ip.s_addr = htonl(fid->dst_ip); 1588 src_ip.s_addr = htonl(fid->src_ip); 1589 if (oif) { 1590 pcb = in_pcblookup_hash(pi, 1591 dst_ip, htons(fid->dst_port), 1592 src_ip, htons(fid->src_port), 1593 wildcard, oif); 1594 } else { 1595 pcb = in_pcblookup_hash(pi, 1596 src_ip, htons(fid->src_port), 1597 dst_ip, htons(fid->dst_port), 1598 wildcard, NULL); 1599 } 1600 if (pcb == NULL || pcb->inp_socket == NULL) 1601 return 0; 1602 1603 if (opcode == O_UID) { 1604 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b)) 1605 return !socheckuid(pcb->inp_socket, uid); 1606 #undef socheckuid 1607 } else { 1608 return groupmember(uid, pcb->inp_socket->so_cred); 1609 } 1610 } 1611 1612 static int 1613 ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, 1614 enum ipfw_opcodes opcode, uid_t uid, int *deny) 1615 { 1616 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1617 uint32_t gen; 1618 int match = 0; 1619 1620 *deny = 0; 1621 gen = ctx->ipfw_gen; 1622 1623 get_mplock(); 1624 if (gen != ctx->ipfw_gen) { 1625 /* See the comment in lookup_rule() */ 1626 *deny = 1; 1627 } else { 1628 match = _ipfw_match_uid(fid, oif, opcode, uid); 1629 } 1630 rel_mplock(); 1631 return match; 1632 } 1633 1634 /* 1635 * The main check routine for the firewall. 1636 * 1637 * All arguments are in args so we can modify them and return them 1638 * back to the caller. 1639 * 1640 * Parameters: 1641 * 1642 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 1643 * Starts with the IP header. 1644 * args->eh (in) Mac header if present, or NULL for layer3 packet. 1645 * args->oif Outgoing interface, or NULL if packet is incoming. 1646 * The incoming interface is in the mbuf. (in) 1647 * 1648 * args->rule Pointer to the last matching rule (in/out) 1649 * args->f_id Addresses grabbed from the packet (out) 1650 * 1651 * Return value: 1652 * 1653 * If the packet was denied/rejected and has been dropped, *m is equal 1654 * to NULL upon return. 1655 * 1656 * IP_FW_DENY the packet must be dropped. 1657 * IP_FW_PASS The packet is to be accepted and routed normally. 1658 * IP_FW_DIVERT Divert the packet to port (args->cookie) 1659 * IP_FW_TEE Tee the packet to port (args->cookie) 1660 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie) 1661 */ 1662 1663 static int 1664 ipfw_chk(struct ip_fw_args *args) 1665 { 1666 /* 1667 * Local variables hold state during the processing of a packet. 1668 * 1669 * IMPORTANT NOTE: to speed up the processing of rules, there 1670 * are some assumption on the values of the variables, which 1671 * are documented here. Should you change them, please check 1672 * the implementation of the various instructions to make sure 1673 * that they still work. 1674 * 1675 * args->eh The MAC header. It is non-null for a layer2 1676 * packet, it is NULL for a layer-3 packet. 1677 * 1678 * m | args->m Pointer to the mbuf, as received from the caller. 1679 * It may change if ipfw_chk() does an m_pullup, or if it 1680 * consumes the packet because it calls send_reject(). 1681 * XXX This has to change, so that ipfw_chk() never modifies 1682 * or consumes the buffer. 1683 * ip is simply an alias of the value of m, and it is kept 1684 * in sync with it (the packet is supposed to start with 1685 * the ip header). 1686 */ 1687 struct mbuf *m = args->m; 1688 struct ip *ip = mtod(m, struct ip *); 1689 1690 /* 1691 * oif | args->oif If NULL, ipfw_chk has been called on the 1692 * inbound path (ether_input, ip_input). 1693 * If non-NULL, ipfw_chk has been called on the outbound path 1694 * (ether_output, ip_output). 1695 */ 1696 struct ifnet *oif = args->oif; 1697 1698 struct ip_fw *f = NULL; /* matching rule */ 1699 int retval = IP_FW_PASS; 1700 struct m_tag *mtag; 1701 struct divert_info *divinfo; 1702 1703 /* 1704 * hlen The length of the IPv4 header. 1705 * hlen >0 means we have an IPv4 packet. 1706 */ 1707 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 1708 1709 /* 1710 * offset The offset of a fragment. offset != 0 means that 1711 * we have a fragment at this offset of an IPv4 packet. 1712 * offset == 0 means that (if this is an IPv4 packet) 1713 * this is the first or only fragment. 1714 */ 1715 u_short offset = 0; 1716 1717 /* 1718 * Local copies of addresses. They are only valid if we have 1719 * an IP packet. 1720 * 1721 * proto The protocol. Set to 0 for non-ip packets, 1722 * or to the protocol read from the packet otherwise. 1723 * proto != 0 means that we have an IPv4 packet. 1724 * 1725 * src_port, dst_port port numbers, in HOST format. Only 1726 * valid for TCP and UDP packets. 1727 * 1728 * src_ip, dst_ip ip addresses, in NETWORK format. 1729 * Only valid for IPv4 packets. 1730 */ 1731 uint8_t proto; 1732 uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */ 1733 struct in_addr src_ip, dst_ip; /* NOTE: network format */ 1734 uint16_t ip_len = 0; 1735 1736 /* 1737 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 1738 * MATCH_NONE when checked and not matched (dyn_f = NULL), 1739 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL) 1740 */ 1741 int dyn_dir = MATCH_UNKNOWN; 1742 struct ip_fw *dyn_f = NULL; 1743 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1744 1745 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED) 1746 return IP_FW_PASS; /* accept */ 1747 1748 if (args->eh == NULL || /* layer 3 packet */ 1749 (m->m_pkthdr.len >= sizeof(struct ip) && 1750 ntohs(args->eh->ether_type) == ETHERTYPE_IP)) 1751 hlen = ip->ip_hl << 2; 1752 1753 /* 1754 * Collect parameters into local variables for faster matching. 1755 */ 1756 if (hlen == 0) { /* do not grab addresses for non-ip pkts */ 1757 proto = args->f_id.proto = 0; /* mark f_id invalid */ 1758 goto after_ip_checks; 1759 } 1760 1761 proto = args->f_id.proto = ip->ip_p; 1762 src_ip = ip->ip_src; 1763 dst_ip = ip->ip_dst; 1764 if (args->eh != NULL) { /* layer 2 packets are as on the wire */ 1765 offset = ntohs(ip->ip_off) & IP_OFFMASK; 1766 ip_len = ntohs(ip->ip_len); 1767 } else { 1768 offset = ip->ip_off & IP_OFFMASK; 1769 ip_len = ip->ip_len; 1770 } 1771 1772 #define PULLUP_TO(len) \ 1773 do { \ 1774 if (m->m_len < (len)) { \ 1775 args->m = m = m_pullup(m, (len));\ 1776 if (m == NULL) \ 1777 goto pullup_failed; \ 1778 ip = mtod(m, struct ip *); \ 1779 } \ 1780 } while (0) 1781 1782 if (offset == 0) { 1783 switch (proto) { 1784 case IPPROTO_TCP: 1785 { 1786 struct tcphdr *tcp; 1787 1788 PULLUP_TO(hlen + sizeof(struct tcphdr)); 1789 tcp = L3HDR(struct tcphdr, ip); 1790 dst_port = tcp->th_dport; 1791 src_port = tcp->th_sport; 1792 args->f_id.flags = tcp->th_flags; 1793 } 1794 break; 1795 1796 case IPPROTO_UDP: 1797 { 1798 struct udphdr *udp; 1799 1800 PULLUP_TO(hlen + sizeof(struct udphdr)); 1801 udp = L3HDR(struct udphdr, ip); 1802 dst_port = udp->uh_dport; 1803 src_port = udp->uh_sport; 1804 } 1805 break; 1806 1807 case IPPROTO_ICMP: 1808 PULLUP_TO(hlen + 4); /* type, code and checksum. */ 1809 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; 1810 break; 1811 1812 default: 1813 break; 1814 } 1815 } 1816 1817 #undef PULLUP_TO 1818 1819 args->f_id.src_ip = ntohl(src_ip.s_addr); 1820 args->f_id.dst_ip = ntohl(dst_ip.s_addr); 1821 args->f_id.src_port = src_port = ntohs(src_port); 1822 args->f_id.dst_port = dst_port = ntohs(dst_port); 1823 1824 after_ip_checks: 1825 if (args->rule) { 1826 /* 1827 * Packet has already been tagged. Look for the next rule 1828 * to restart processing. 1829 * 1830 * If fw_one_pass != 0 then just accept it. 1831 * XXX should not happen here, but optimized out in 1832 * the caller. 1833 */ 1834 if (fw_one_pass) 1835 return IP_FW_PASS; 1836 1837 /* This rule is being/has been flushed */ 1838 if (ipfw_flushing) 1839 return IP_FW_DENY; 1840 1841 KASSERT(args->rule->cpuid == mycpuid, 1842 ("rule used on cpu%d\n", mycpuid)); 1843 1844 /* This rule was deleted */ 1845 if (args->rule->rule_flags & IPFW_RULE_F_INVALID) 1846 return IP_FW_DENY; 1847 1848 f = args->rule->next_rule; 1849 if (f == NULL) 1850 f = lookup_next_rule(args->rule); 1851 } else { 1852 /* 1853 * Find the starting rule. It can be either the first 1854 * one, or the one after divert_rule if asked so. 1855 */ 1856 int skipto; 1857 1858 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL); 1859 if (mtag != NULL) { 1860 divinfo = m_tag_data(mtag); 1861 skipto = divinfo->skipto; 1862 } else { 1863 skipto = 0; 1864 } 1865 1866 f = ctx->ipfw_layer3_chain; 1867 if (args->eh == NULL && skipto != 0) { 1868 /* No skipto during rule flushing */ 1869 if (ipfw_flushing) 1870 return IP_FW_DENY; 1871 1872 if (skipto >= IPFW_DEFAULT_RULE) 1873 return IP_FW_DENY; /* invalid */ 1874 1875 while (f && f->rulenum <= skipto) 1876 f = f->next; 1877 if (f == NULL) /* drop packet */ 1878 return IP_FW_DENY; 1879 } else if (ipfw_flushing) { 1880 /* Rules are being flushed; skip to default rule */ 1881 f = ctx->ipfw_default_rule; 1882 } 1883 } 1884 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL) 1885 m_tag_delete(m, mtag); 1886 1887 /* 1888 * Now scan the rules, and parse microinstructions for each rule. 1889 */ 1890 for (; f; f = f->next) { 1891 int l, cmdlen; 1892 ipfw_insn *cmd; 1893 int skip_or; /* skip rest of OR block */ 1894 1895 again: 1896 if (ctx->ipfw_set_disable & (1 << f->set)) 1897 continue; 1898 1899 skip_or = 0; 1900 for (l = f->cmd_len, cmd = f->cmd; l > 0; 1901 l -= cmdlen, cmd += cmdlen) { 1902 int match, deny; 1903 1904 /* 1905 * check_body is a jump target used when we find a 1906 * CHECK_STATE, and need to jump to the body of 1907 * the target rule. 1908 */ 1909 1910 check_body: 1911 cmdlen = F_LEN(cmd); 1912 /* 1913 * An OR block (insn_1 || .. || insn_n) has the 1914 * F_OR bit set in all but the last instruction. 1915 * The first match will set "skip_or", and cause 1916 * the following instructions to be skipped until 1917 * past the one with the F_OR bit clear. 1918 */ 1919 if (skip_or) { /* skip this instruction */ 1920 if ((cmd->len & F_OR) == 0) 1921 skip_or = 0; /* next one is good */ 1922 continue; 1923 } 1924 match = 0; /* set to 1 if we succeed */ 1925 1926 switch (cmd->opcode) { 1927 /* 1928 * The first set of opcodes compares the packet's 1929 * fields with some pattern, setting 'match' if a 1930 * match is found. At the end of the loop there is 1931 * logic to deal with F_NOT and F_OR flags associated 1932 * with the opcode. 1933 */ 1934 case O_NOP: 1935 match = 1; 1936 break; 1937 1938 case O_FORWARD_MAC: 1939 kprintf("ipfw: opcode %d unimplemented\n", 1940 cmd->opcode); 1941 break; 1942 1943 case O_GID: 1944 case O_UID: 1945 /* 1946 * We only check offset == 0 && proto != 0, 1947 * as this ensures that we have an IPv4 1948 * packet with the ports info. 1949 */ 1950 if (offset!=0) 1951 break; 1952 1953 match = ipfw_match_uid(&args->f_id, oif, 1954 cmd->opcode, 1955 (uid_t)((ipfw_insn_u32 *)cmd)->d[0], 1956 &deny); 1957 if (deny) 1958 return IP_FW_DENY; 1959 break; 1960 1961 case O_RECV: 1962 match = iface_match(m->m_pkthdr.rcvif, 1963 (ipfw_insn_if *)cmd); 1964 break; 1965 1966 case O_XMIT: 1967 match = iface_match(oif, (ipfw_insn_if *)cmd); 1968 break; 1969 1970 case O_VIA: 1971 match = iface_match(oif ? oif : 1972 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); 1973 break; 1974 1975 case O_MACADDR2: 1976 if (args->eh != NULL) { /* have MAC header */ 1977 uint32_t *want = (uint32_t *) 1978 ((ipfw_insn_mac *)cmd)->addr; 1979 uint32_t *mask = (uint32_t *) 1980 ((ipfw_insn_mac *)cmd)->mask; 1981 uint32_t *hdr = (uint32_t *)args->eh; 1982 1983 match = 1984 (want[0] == (hdr[0] & mask[0]) && 1985 want[1] == (hdr[1] & mask[1]) && 1986 want[2] == (hdr[2] & mask[2])); 1987 } 1988 break; 1989 1990 case O_MAC_TYPE: 1991 if (args->eh != NULL) { 1992 uint16_t t = 1993 ntohs(args->eh->ether_type); 1994 uint16_t *p = 1995 ((ipfw_insn_u16 *)cmd)->ports; 1996 int i; 1997 1998 /* Special vlan handling */ 1999 if (m->m_flags & M_VLANTAG) 2000 t = ETHERTYPE_VLAN; 2001 2002 for (i = cmdlen - 1; !match && i > 0; 2003 i--, p += 2) { 2004 match = 2005 (t >= p[0] && t <= p[1]); 2006 } 2007 } 2008 break; 2009 2010 case O_FRAG: 2011 match = (hlen > 0 && offset != 0); 2012 break; 2013 2014 case O_IN: /* "out" is "not in" */ 2015 match = (oif == NULL); 2016 break; 2017 2018 case O_LAYER2: 2019 match = (args->eh != NULL); 2020 break; 2021 2022 case O_PROTO: 2023 /* 2024 * We do not allow an arg of 0 so the 2025 * check of "proto" only suffices. 2026 */ 2027 match = (proto == cmd->arg1); 2028 break; 2029 2030 case O_IP_SRC: 2031 match = (hlen > 0 && 2032 ((ipfw_insn_ip *)cmd)->addr.s_addr == 2033 src_ip.s_addr); 2034 break; 2035 2036 case O_IP_SRC_MASK: 2037 match = (hlen > 0 && 2038 ((ipfw_insn_ip *)cmd)->addr.s_addr == 2039 (src_ip.s_addr & 2040 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 2041 break; 2042 2043 case O_IP_SRC_ME: 2044 if (hlen > 0) { 2045 struct ifnet *tif; 2046 2047 tif = INADDR_TO_IFP(&src_ip); 2048 match = (tif != NULL); 2049 } 2050 break; 2051 2052 case O_IP_DST_SET: 2053 case O_IP_SRC_SET: 2054 if (hlen > 0) { 2055 uint32_t *d = (uint32_t *)(cmd + 1); 2056 uint32_t addr = 2057 cmd->opcode == O_IP_DST_SET ? 2058 args->f_id.dst_ip : 2059 args->f_id.src_ip; 2060 2061 if (addr < d[0]) 2062 break; 2063 addr -= d[0]; /* subtract base */ 2064 match = 2065 (addr < cmd->arg1) && 2066 (d[1 + (addr >> 5)] & 2067 (1 << (addr & 0x1f))); 2068 } 2069 break; 2070 2071 case O_IP_DST: 2072 match = (hlen > 0 && 2073 ((ipfw_insn_ip *)cmd)->addr.s_addr == 2074 dst_ip.s_addr); 2075 break; 2076 2077 case O_IP_DST_MASK: 2078 match = (hlen > 0) && 2079 (((ipfw_insn_ip *)cmd)->addr.s_addr == 2080 (dst_ip.s_addr & 2081 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 2082 break; 2083 2084 case O_IP_DST_ME: 2085 if (hlen > 0) { 2086 struct ifnet *tif; 2087 2088 tif = INADDR_TO_IFP(&dst_ip); 2089 match = (tif != NULL); 2090 } 2091 break; 2092 2093 case O_IP_SRCPORT: 2094 case O_IP_DSTPORT: 2095 /* 2096 * offset == 0 && proto != 0 is enough 2097 * to guarantee that we have an IPv4 2098 * packet with port info. 2099 */ 2100 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) 2101 && offset == 0) { 2102 uint16_t x = 2103 (cmd->opcode == O_IP_SRCPORT) ? 2104 src_port : dst_port ; 2105 uint16_t *p = 2106 ((ipfw_insn_u16 *)cmd)->ports; 2107 int i; 2108 2109 for (i = cmdlen - 1; !match && i > 0; 2110 i--, p += 2) { 2111 match = 2112 (x >= p[0] && x <= p[1]); 2113 } 2114 } 2115 break; 2116 2117 case O_ICMPTYPE: 2118 match = (offset == 0 && proto==IPPROTO_ICMP && 2119 icmptype_match(ip, (ipfw_insn_u32 *)cmd)); 2120 break; 2121 2122 case O_IPOPT: 2123 match = (hlen > 0 && ipopts_match(ip, cmd)); 2124 break; 2125 2126 case O_IPVER: 2127 match = (hlen > 0 && cmd->arg1 == ip->ip_v); 2128 break; 2129 2130 case O_IPTTL: 2131 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl); 2132 break; 2133 2134 case O_IPID: 2135 match = (hlen > 0 && 2136 cmd->arg1 == ntohs(ip->ip_id)); 2137 break; 2138 2139 case O_IPLEN: 2140 match = (hlen > 0 && cmd->arg1 == ip_len); 2141 break; 2142 2143 case O_IPPRECEDENCE: 2144 match = (hlen > 0 && 2145 (cmd->arg1 == (ip->ip_tos & 0xe0))); 2146 break; 2147 2148 case O_IPTOS: 2149 match = (hlen > 0 && 2150 flags_match(cmd, ip->ip_tos)); 2151 break; 2152 2153 case O_TCPFLAGS: 2154 match = (proto == IPPROTO_TCP && offset == 0 && 2155 flags_match(cmd, 2156 L3HDR(struct tcphdr,ip)->th_flags)); 2157 break; 2158 2159 case O_TCPOPTS: 2160 match = (proto == IPPROTO_TCP && offset == 0 && 2161 tcpopts_match(ip, cmd)); 2162 break; 2163 2164 case O_TCPSEQ: 2165 match = (proto == IPPROTO_TCP && offset == 0 && 2166 ((ipfw_insn_u32 *)cmd)->d[0] == 2167 L3HDR(struct tcphdr,ip)->th_seq); 2168 break; 2169 2170 case O_TCPACK: 2171 match = (proto == IPPROTO_TCP && offset == 0 && 2172 ((ipfw_insn_u32 *)cmd)->d[0] == 2173 L3HDR(struct tcphdr,ip)->th_ack); 2174 break; 2175 2176 case O_TCPWIN: 2177 match = (proto == IPPROTO_TCP && offset == 0 && 2178 cmd->arg1 == 2179 L3HDR(struct tcphdr,ip)->th_win); 2180 break; 2181 2182 case O_ESTAB: 2183 /* reject packets which have SYN only */ 2184 /* XXX should i also check for TH_ACK ? */ 2185 match = (proto == IPPROTO_TCP && offset == 0 && 2186 (L3HDR(struct tcphdr,ip)->th_flags & 2187 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 2188 break; 2189 2190 case O_LOG: 2191 if (fw_verbose) 2192 ipfw_log(f, hlen, args->eh, m, oif); 2193 match = 1; 2194 break; 2195 2196 case O_PROB: 2197 match = (krandom() < 2198 ((ipfw_insn_u32 *)cmd)->d[0]); 2199 break; 2200 2201 /* 2202 * The second set of opcodes represents 'actions', 2203 * i.e. the terminal part of a rule once the packet 2204 * matches all previous patterns. 2205 * Typically there is only one action for each rule, 2206 * and the opcode is stored at the end of the rule 2207 * (but there are exceptions -- see below). 2208 * 2209 * In general, here we set retval and terminate the 2210 * outer loop (would be a 'break 3' in some language, 2211 * but we need to do a 'goto done'). 2212 * 2213 * Exceptions: 2214 * O_COUNT and O_SKIPTO actions: 2215 * instead of terminating, we jump to the next rule 2216 * ('goto next_rule', equivalent to a 'break 2'), 2217 * or to the SKIPTO target ('goto again' after 2218 * having set f, cmd and l), respectively. 2219 * 2220 * O_LIMIT and O_KEEP_STATE: these opcodes are 2221 * not real 'actions', and are stored right 2222 * before the 'action' part of the rule. 2223 * These opcodes try to install an entry in the 2224 * state tables; if successful, we continue with 2225 * the next opcode (match=1; break;), otherwise 2226 * the packet must be dropped ('goto done' after 2227 * setting retval). If static rules are changed 2228 * during the state installation, the packet will 2229 * be dropped and rule's stats will not beupdated 2230 * ('return IP_FW_DENY'). 2231 * 2232 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 2233 * cause a lookup of the state table, and a jump 2234 * to the 'action' part of the parent rule 2235 * ('goto check_body') if an entry is found, or 2236 * (CHECK_STATE only) a jump to the next rule if 2237 * the entry is not found ('goto next_rule'). 2238 * The result of the lookup is cached to make 2239 * further instances of these opcodes are 2240 * effectively NOPs. If static rules are changed 2241 * during the state looking up, the packet will 2242 * be dropped and rule's stats will not be updated 2243 * ('return IP_FW_DENY'). 2244 */ 2245 case O_LIMIT: 2246 case O_KEEP_STATE: 2247 if (!(f->rule_flags & IPFW_RULE_F_STATE)) { 2248 kprintf("%s rule (%d) is not ready " 2249 "on cpu%d\n", 2250 cmd->opcode == O_LIMIT ? 2251 "limit" : "keep state", 2252 f->rulenum, f->cpuid); 2253 goto next_rule; 2254 } 2255 if (install_state(f, 2256 (ipfw_insn_limit *)cmd, args, &deny)) { 2257 if (deny) 2258 return IP_FW_DENY; 2259 2260 retval = IP_FW_DENY; 2261 goto done; /* error/limit violation */ 2262 } 2263 if (deny) 2264 return IP_FW_DENY; 2265 match = 1; 2266 break; 2267 2268 case O_PROBE_STATE: 2269 case O_CHECK_STATE: 2270 /* 2271 * dynamic rules are checked at the first 2272 * keep-state or check-state occurrence, 2273 * with the result being stored in dyn_dir. 2274 * The compiler introduces a PROBE_STATE 2275 * instruction for us when we have a 2276 * KEEP_STATE (because PROBE_STATE needs 2277 * to be run first). 2278 */ 2279 if (dyn_dir == MATCH_UNKNOWN) { 2280 dyn_f = lookup_rule(&args->f_id, 2281 &dyn_dir, 2282 proto == IPPROTO_TCP ? 2283 L3HDR(struct tcphdr, ip) : NULL, 2284 ip_len, &deny); 2285 if (deny) 2286 return IP_FW_DENY; 2287 if (dyn_f != NULL) { 2288 /* 2289 * Found a rule from a dynamic 2290 * entry; jump to the 'action' 2291 * part of the rule. 2292 */ 2293 f = dyn_f; 2294 cmd = ACTION_PTR(f); 2295 l = f->cmd_len - f->act_ofs; 2296 goto check_body; 2297 } 2298 } 2299 /* 2300 * Dynamic entry not found. If CHECK_STATE, 2301 * skip to next rule, if PROBE_STATE just 2302 * ignore and continue with next opcode. 2303 */ 2304 if (cmd->opcode == O_CHECK_STATE) 2305 goto next_rule; 2306 else if (!(f->rule_flags & IPFW_RULE_F_STATE)) 2307 goto next_rule; /* not ready yet */ 2308 match = 1; 2309 break; 2310 2311 case O_ACCEPT: 2312 retval = IP_FW_PASS; /* accept */ 2313 goto done; 2314 2315 case O_PIPE: 2316 case O_QUEUE: 2317 args->rule = f; /* report matching rule */ 2318 args->cookie = cmd->arg1; 2319 retval = IP_FW_DUMMYNET; 2320 goto done; 2321 2322 case O_DIVERT: 2323 case O_TEE: 2324 if (args->eh) /* not on layer 2 */ 2325 break; 2326 2327 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT, 2328 sizeof(*divinfo), MB_DONTWAIT); 2329 if (mtag == NULL) { 2330 retval = IP_FW_DENY; 2331 goto done; 2332 } 2333 divinfo = m_tag_data(mtag); 2334 2335 divinfo->skipto = f->rulenum; 2336 divinfo->port = cmd->arg1; 2337 divinfo->tee = (cmd->opcode == O_TEE); 2338 m_tag_prepend(m, mtag); 2339 2340 args->cookie = cmd->arg1; 2341 retval = (cmd->opcode == O_DIVERT) ? 2342 IP_FW_DIVERT : IP_FW_TEE; 2343 goto done; 2344 2345 case O_COUNT: 2346 case O_SKIPTO: 2347 f->pcnt++; /* update stats */ 2348 f->bcnt += ip_len; 2349 f->timestamp = time_second; 2350 if (cmd->opcode == O_COUNT) 2351 goto next_rule; 2352 /* handle skipto */ 2353 if (f->next_rule == NULL) 2354 lookup_next_rule(f); 2355 f = f->next_rule; 2356 goto again; 2357 2358 case O_REJECT: 2359 /* 2360 * Drop the packet and send a reject notice 2361 * if the packet is not ICMP (or is an ICMP 2362 * query), and it is not multicast/broadcast. 2363 */ 2364 if (hlen > 0 && 2365 (proto != IPPROTO_ICMP || 2366 is_icmp_query(ip)) && 2367 !(m->m_flags & (M_BCAST|M_MCAST)) && 2368 !IN_MULTICAST(ntohl(dst_ip.s_addr))) { 2369 /* 2370 * Update statistics before the possible 2371 * blocking 'send_reject' 2372 */ 2373 f->pcnt++; 2374 f->bcnt += ip_len; 2375 f->timestamp = time_second; 2376 2377 send_reject(args, cmd->arg1, 2378 offset,ip_len); 2379 m = args->m; 2380 2381 /* 2382 * Return directly here, rule stats 2383 * have been updated above. 2384 */ 2385 return IP_FW_DENY; 2386 } 2387 /* FALLTHROUGH */ 2388 case O_DENY: 2389 retval = IP_FW_DENY; 2390 goto done; 2391 2392 case O_FORWARD_IP: 2393 if (args->eh) /* not valid on layer2 pkts */ 2394 break; 2395 if (!dyn_f || dyn_dir == MATCH_FORWARD) { 2396 struct sockaddr_in *sin; 2397 2398 mtag = m_tag_get(PACKET_TAG_IPFORWARD, 2399 sizeof(*sin), MB_DONTWAIT); 2400 if (mtag == NULL) { 2401 retval = IP_FW_DENY; 2402 goto done; 2403 } 2404 sin = m_tag_data(mtag); 2405 2406 /* Structure copy */ 2407 *sin = ((ipfw_insn_sa *)cmd)->sa; 2408 2409 m_tag_prepend(m, mtag); 2410 m->m_pkthdr.fw_flags |= 2411 IPFORWARD_MBUF_TAGGED; 2412 } 2413 retval = IP_FW_PASS; 2414 goto done; 2415 2416 default: 2417 panic("-- unknown opcode %d\n", cmd->opcode); 2418 } /* end of switch() on opcodes */ 2419 2420 if (cmd->len & F_NOT) 2421 match = !match; 2422 2423 if (match) { 2424 if (cmd->len & F_OR) 2425 skip_or = 1; 2426 } else { 2427 if (!(cmd->len & F_OR)) /* not an OR block, */ 2428 break; /* try next rule */ 2429 } 2430 2431 } /* end of inner for, scan opcodes */ 2432 2433 next_rule:; /* try next rule */ 2434 2435 } /* end of outer for, scan rules */ 2436 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n"); 2437 return IP_FW_DENY; 2438 2439 done: 2440 /* Update statistics */ 2441 f->pcnt++; 2442 f->bcnt += ip_len; 2443 f->timestamp = time_second; 2444 return retval; 2445 2446 pullup_failed: 2447 if (fw_verbose) 2448 kprintf("pullup failed\n"); 2449 return IP_FW_DENY; 2450 } 2451 2452 static void 2453 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 2454 { 2455 struct m_tag *mtag; 2456 struct dn_pkt *pkt; 2457 ipfw_insn *cmd; 2458 const struct ipfw_flow_id *id; 2459 struct dn_flow_id *fid; 2460 2461 M_ASSERTPKTHDR(m); 2462 2463 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), MB_DONTWAIT); 2464 if (mtag == NULL) { 2465 m_freem(m); 2466 return; 2467 } 2468 m_tag_prepend(m, mtag); 2469 2470 pkt = m_tag_data(mtag); 2471 bzero(pkt, sizeof(*pkt)); 2472 2473 cmd = fwa->rule->cmd + fwa->rule->act_ofs; 2474 if (cmd->opcode == O_LOG) 2475 cmd += F_LEN(cmd); 2476 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, 2477 ("Rule is not PIPE or QUEUE, opcode %d\n", cmd->opcode)); 2478 2479 pkt->dn_m = m; 2480 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK); 2481 pkt->ifp = fwa->oif; 2482 pkt->pipe_nr = pipe_nr; 2483 2484 pkt->cpuid = mycpuid; 2485 pkt->msgport = curnetport; 2486 2487 id = &fwa->f_id; 2488 fid = &pkt->id; 2489 fid->fid_dst_ip = id->dst_ip; 2490 fid->fid_src_ip = id->src_ip; 2491 fid->fid_dst_port = id->dst_port; 2492 fid->fid_src_port = id->src_port; 2493 fid->fid_proto = id->proto; 2494 fid->fid_flags = id->flags; 2495 2496 ipfw_ref_rule(fwa->rule); 2497 pkt->dn_priv = fwa->rule; 2498 pkt->dn_unref_priv = ipfw_unref_rule; 2499 2500 if (cmd->opcode == O_PIPE) 2501 pkt->dn_flags |= DN_FLAGS_IS_PIPE; 2502 2503 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED; 2504 } 2505 2506 /* 2507 * When a rule is added/deleted, clear the next_rule pointers in all rules. 2508 * These will be reconstructed on the fly as packets are matched. 2509 * Must be called at splimp(). 2510 */ 2511 static void 2512 ipfw_flush_rule_ptrs(struct ipfw_context *ctx) 2513 { 2514 struct ip_fw *rule; 2515 2516 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 2517 rule->next_rule = NULL; 2518 } 2519 2520 static __inline void 2521 ipfw_inc_static_count(struct ip_fw *rule) 2522 { 2523 /* Static rule's counts are updated only on CPU0 */ 2524 KKASSERT(mycpuid == 0); 2525 2526 static_count++; 2527 static_ioc_len += IOC_RULESIZE(rule); 2528 } 2529 2530 static __inline void 2531 ipfw_dec_static_count(struct ip_fw *rule) 2532 { 2533 int l = IOC_RULESIZE(rule); 2534 2535 /* Static rule's counts are updated only on CPU0 */ 2536 KKASSERT(mycpuid == 0); 2537 2538 KASSERT(static_count > 0, ("invalid static count %u\n", static_count)); 2539 static_count--; 2540 2541 KASSERT(static_ioc_len >= l, 2542 ("invalid static len %u\n", static_ioc_len)); 2543 static_ioc_len -= l; 2544 } 2545 2546 static void 2547 ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule) 2548 { 2549 if (fwmsg->sibling != NULL) { 2550 KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1); 2551 fwmsg->sibling->sibling = rule; 2552 } 2553 fwmsg->sibling = rule; 2554 } 2555 2556 static struct ip_fw * 2557 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, struct ip_fw_stub *stub) 2558 { 2559 struct ip_fw *rule; 2560 2561 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO); 2562 2563 rule->act_ofs = ioc_rule->act_ofs; 2564 rule->cmd_len = ioc_rule->cmd_len; 2565 rule->rulenum = ioc_rule->rulenum; 2566 rule->set = ioc_rule->set; 2567 rule->usr_flags = ioc_rule->usr_flags; 2568 2569 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */); 2570 2571 rule->refcnt = 1; 2572 rule->cpuid = mycpuid; 2573 2574 rule->stub = stub; 2575 if (stub != NULL) 2576 stub->rule[mycpuid] = rule; 2577 2578 return rule; 2579 } 2580 2581 static void 2582 ipfw_add_rule_dispatch(struct netmsg *nmsg) 2583 { 2584 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 2585 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2586 struct ip_fw *rule; 2587 2588 rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->stub); 2589 2590 /* 2591 * Bump generation after ipfw_create_rule(), 2592 * since this function is blocking 2593 */ 2594 ctx->ipfw_gen++; 2595 2596 /* 2597 * Insert rule into the pre-determined position 2598 */ 2599 if (fwmsg->prev_rule != NULL) { 2600 struct ip_fw *prev, *next; 2601 2602 prev = fwmsg->prev_rule; 2603 KKASSERT(prev->cpuid == mycpuid); 2604 2605 next = fwmsg->next_rule; 2606 KKASSERT(next->cpuid == mycpuid); 2607 2608 rule->next = next; 2609 prev->next = rule; 2610 2611 /* 2612 * Move to the position on the next CPU 2613 * before the msg is forwarded. 2614 */ 2615 fwmsg->prev_rule = prev->sibling; 2616 fwmsg->next_rule = next->sibling; 2617 } else { 2618 KKASSERT(fwmsg->next_rule == NULL); 2619 rule->next = ctx->ipfw_layer3_chain; 2620 ctx->ipfw_layer3_chain = rule; 2621 } 2622 2623 /* Link rule CPU sibling */ 2624 ipfw_link_sibling(fwmsg, rule); 2625 2626 ipfw_flush_rule_ptrs(ctx); 2627 2628 if (mycpuid == 0) { 2629 /* Statistics only need to be updated once */ 2630 ipfw_inc_static_count(rule); 2631 2632 /* Return the rule on CPU0 */ 2633 nmsg->nm_lmsg.u.ms_resultp = rule; 2634 } 2635 2636 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 2637 } 2638 2639 static void 2640 ipfw_enable_state_dispatch(struct netmsg *nmsg) 2641 { 2642 struct lwkt_msg *lmsg = &nmsg->nm_lmsg; 2643 struct ip_fw *rule = lmsg->u.ms_resultp; 2644 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2645 2646 ctx->ipfw_gen++; 2647 2648 KKASSERT(rule->cpuid == mycpuid); 2649 KKASSERT(rule->stub != NULL && rule->stub->rule[mycpuid] == rule); 2650 KKASSERT(!(rule->rule_flags & IPFW_RULE_F_STATE)); 2651 rule->rule_flags |= IPFW_RULE_F_STATE; 2652 lmsg->u.ms_resultp = rule->sibling; 2653 2654 ifnet_forwardmsg(lmsg, mycpuid + 1); 2655 } 2656 2657 /* 2658 * Add a new rule to the list. Copy the rule into a malloc'ed area, 2659 * then possibly create a rule number and add the rule to the list. 2660 * Update the rule_number in the input struct so the caller knows 2661 * it as well. 2662 */ 2663 static void 2664 ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 2665 { 2666 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2667 struct netmsg_ipfw fwmsg; 2668 struct netmsg *nmsg; 2669 struct ip_fw *f, *prev, *rule; 2670 struct ip_fw_stub *stub; 2671 2672 IPFW_ASSERT_CFGPORT(&curthread->td_msgport); 2673 2674 /* 2675 * If rulenum is 0, find highest numbered rule before the 2676 * default rule, and add rule number incremental step. 2677 */ 2678 if (ioc_rule->rulenum == 0) { 2679 int step = autoinc_step; 2680 2681 KKASSERT(step >= IPFW_AUTOINC_STEP_MIN && 2682 step <= IPFW_AUTOINC_STEP_MAX); 2683 2684 /* 2685 * Locate the highest numbered rule before default 2686 */ 2687 for (f = ctx->ipfw_layer3_chain; f; f = f->next) { 2688 if (f->rulenum == IPFW_DEFAULT_RULE) 2689 break; 2690 ioc_rule->rulenum = f->rulenum; 2691 } 2692 if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step) 2693 ioc_rule->rulenum += step; 2694 } 2695 KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE && 2696 ioc_rule->rulenum != 0, 2697 ("invalid rule num %d\n", ioc_rule->rulenum)); 2698 2699 /* 2700 * Now find the right place for the new rule in the sorted list. 2701 */ 2702 for (prev = NULL, f = ctx->ipfw_layer3_chain; f; 2703 prev = f, f = f->next) { 2704 if (f->rulenum > ioc_rule->rulenum) { 2705 /* Found the location */ 2706 break; 2707 } 2708 } 2709 KASSERT(f != NULL, ("no default rule?!\n")); 2710 2711 if (rule_flags & IPFW_RULE_F_STATE) { 2712 int size; 2713 2714 /* 2715 * If the new rule will create states, then allocate 2716 * a rule stub, which will be referenced by states 2717 * (dyn rules) 2718 */ 2719 size = sizeof(*stub) + ((ncpus - 1) * sizeof(struct ip_fw *)); 2720 stub = kmalloc(size, M_IPFW, M_WAITOK | M_ZERO); 2721 } else { 2722 stub = NULL; 2723 } 2724 2725 /* 2726 * Duplicate the rule onto each CPU. 2727 * The rule duplicated on CPU0 will be returned. 2728 */ 2729 bzero(&fwmsg, sizeof(fwmsg)); 2730 nmsg = &fwmsg.nmsg; 2731 netmsg_init(nmsg, &curthread->td_msgport, 0, ipfw_add_rule_dispatch); 2732 fwmsg.ioc_rule = ioc_rule; 2733 fwmsg.prev_rule = prev; 2734 fwmsg.next_rule = prev == NULL ? NULL : f; 2735 fwmsg.stub = stub; 2736 2737 ifnet_domsg(&nmsg->nm_lmsg, 0); 2738 KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL); 2739 2740 rule = nmsg->nm_lmsg.u.ms_resultp; 2741 KKASSERT(rule != NULL && rule->cpuid == mycpuid); 2742 2743 if (rule_flags & IPFW_RULE_F_STATE) { 2744 /* 2745 * Turn on state flag, _after_ everything on all 2746 * CPUs have been setup. 2747 */ 2748 bzero(nmsg, sizeof(*nmsg)); 2749 netmsg_init(nmsg, &curthread->td_msgport, 0, 2750 ipfw_enable_state_dispatch); 2751 nmsg->nm_lmsg.u.ms_resultp = rule; 2752 2753 ifnet_domsg(&nmsg->nm_lmsg, 0); 2754 KKASSERT(nmsg->nm_lmsg.u.ms_resultp == NULL); 2755 } 2756 2757 DPRINTF("++ installed rule %d, static count now %d\n", 2758 rule->rulenum, static_count); 2759 } 2760 2761 /** 2762 * Free storage associated with a static rule (including derived 2763 * dynamic rules). 2764 * The caller is in charge of clearing rule pointers to avoid 2765 * dangling pointers. 2766 * @return a pointer to the next entry. 2767 * Arguments are not checked, so they better be correct. 2768 * Must be called at splimp(). 2769 */ 2770 static struct ip_fw * 2771 ipfw_delete_rule(struct ipfw_context *ctx, 2772 struct ip_fw *prev, struct ip_fw *rule) 2773 { 2774 struct ip_fw *n; 2775 struct ip_fw_stub *stub; 2776 2777 ctx->ipfw_gen++; 2778 2779 /* STATE flag should have been cleared before we reach here */ 2780 KKASSERT((rule->rule_flags & IPFW_RULE_F_STATE) == 0); 2781 2782 stub = rule->stub; 2783 n = rule->next; 2784 if (prev == NULL) 2785 ctx->ipfw_layer3_chain = n; 2786 else 2787 prev->next = n; 2788 2789 /* Mark the rule as invalid */ 2790 rule->rule_flags |= IPFW_RULE_F_INVALID; 2791 rule->next_rule = NULL; 2792 rule->sibling = NULL; 2793 rule->stub = NULL; 2794 #ifdef foo 2795 /* Don't reset cpuid here; keep various assertion working */ 2796 rule->cpuid = -1; 2797 #endif 2798 2799 /* Statistics only need to be updated once */ 2800 if (mycpuid == 0) 2801 ipfw_dec_static_count(rule); 2802 2803 /* Free 'stub' on the last CPU */ 2804 if (stub != NULL && mycpuid == ncpus - 1) 2805 kfree(stub, M_IPFW); 2806 2807 /* Try to free this rule */ 2808 ipfw_free_rule(rule); 2809 2810 /* Return the next rule */ 2811 return n; 2812 } 2813 2814 static void 2815 ipfw_flush_dispatch(struct netmsg *nmsg) 2816 { 2817 struct lwkt_msg *lmsg = &nmsg->nm_lmsg; 2818 int kill_default = lmsg->u.ms_result; 2819 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2820 struct ip_fw *rule; 2821 2822 ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */ 2823 2824 while ((rule = ctx->ipfw_layer3_chain) != NULL && 2825 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE)) 2826 ipfw_delete_rule(ctx, NULL, rule); 2827 2828 ifnet_forwardmsg(lmsg, mycpuid + 1); 2829 } 2830 2831 static void 2832 ipfw_disable_rule_state_dispatch(struct netmsg *nmsg) 2833 { 2834 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 2835 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2836 struct ip_fw *rule; 2837 2838 ctx->ipfw_gen++; 2839 2840 rule = dmsg->start_rule; 2841 if (rule != NULL) { 2842 KKASSERT(rule->cpuid == mycpuid); 2843 2844 /* 2845 * Move to the position on the next CPU 2846 * before the msg is forwarded. 2847 */ 2848 dmsg->start_rule = rule->sibling; 2849 } else { 2850 KKASSERT(dmsg->rulenum == 0); 2851 rule = ctx->ipfw_layer3_chain; 2852 } 2853 2854 while (rule != NULL) { 2855 if (dmsg->rulenum && rule->rulenum != dmsg->rulenum) 2856 break; 2857 rule->rule_flags &= ~IPFW_RULE_F_STATE; 2858 rule = rule->next; 2859 } 2860 2861 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 2862 } 2863 2864 /* 2865 * Deletes all rules from a chain (including the default rule 2866 * if the second argument is set). 2867 * Must be called at splimp(). 2868 */ 2869 static void 2870 ipfw_flush(int kill_default) 2871 { 2872 struct netmsg_del dmsg; 2873 struct netmsg nmsg; 2874 struct lwkt_msg *lmsg; 2875 struct ip_fw *rule; 2876 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2877 2878 IPFW_ASSERT_CFGPORT(&curthread->td_msgport); 2879 2880 /* 2881 * If 'kill_default' then caller has done the necessary 2882 * msgport syncing; unnecessary to do it again. 2883 */ 2884 if (!kill_default) { 2885 /* 2886 * Let ipfw_chk() know the rules are going to 2887 * be flushed, so it could jump directly to 2888 * the default rule. 2889 */ 2890 ipfw_flushing = 1; 2891 netmsg_service_sync(); 2892 } 2893 2894 /* 2895 * Clear STATE flag on rules, so no more states (dyn rules) 2896 * will be created. 2897 */ 2898 bzero(&dmsg, sizeof(dmsg)); 2899 netmsg_init(&dmsg.nmsg, &curthread->td_msgport, 0, 2900 ipfw_disable_rule_state_dispatch); 2901 ifnet_domsg(&dmsg.nmsg.nm_lmsg, 0); 2902 2903 /* 2904 * This actually nukes all states (dyn rules) 2905 */ 2906 lockmgr(&dyn_lock, LK_EXCLUSIVE); 2907 for (rule = ctx->ipfw_layer3_chain; rule != NULL; rule = rule->next) { 2908 /* 2909 * Can't check IPFW_RULE_F_STATE here, 2910 * since it has been cleared previously. 2911 * Check 'stub' instead. 2912 */ 2913 if (rule->stub != NULL) { 2914 /* Force removal */ 2915 remove_dyn_rule_locked(rule, NULL); 2916 } 2917 } 2918 lockmgr(&dyn_lock, LK_RELEASE); 2919 2920 /* 2921 * Press the 'flush' button 2922 */ 2923 bzero(&nmsg, sizeof(nmsg)); 2924 netmsg_init(&nmsg, &curthread->td_msgport, 0, ipfw_flush_dispatch); 2925 lmsg = &nmsg.nm_lmsg; 2926 lmsg->u.ms_result = kill_default; 2927 ifnet_domsg(lmsg, 0); 2928 2929 KASSERT(dyn_count == 0, ("%u dyn rule remains\n", dyn_count)); 2930 2931 if (kill_default) { 2932 if (ipfw_dyn_v != NULL) { 2933 /* 2934 * Free dynamic rules(state) hash table 2935 */ 2936 kfree(ipfw_dyn_v, M_IPFW); 2937 ipfw_dyn_v = NULL; 2938 } 2939 2940 KASSERT(static_count == 0, 2941 ("%u static rules remains\n", static_count)); 2942 KASSERT(static_ioc_len == 0, 2943 ("%u bytes of static rules remains\n", static_ioc_len)); 2944 } else { 2945 KASSERT(static_count == 1, 2946 ("%u static rules remains\n", static_count)); 2947 KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule), 2948 ("%u bytes of static rules remains, should be %u\n", 2949 static_ioc_len, IOC_RULESIZE(ctx->ipfw_default_rule))); 2950 } 2951 2952 /* Flush is done */ 2953 ipfw_flushing = 0; 2954 } 2955 2956 static void 2957 ipfw_alt_delete_rule_dispatch(struct netmsg *nmsg) 2958 { 2959 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 2960 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2961 struct ip_fw *rule, *prev; 2962 2963 rule = dmsg->start_rule; 2964 KKASSERT(rule->cpuid == mycpuid); 2965 dmsg->start_rule = rule->sibling; 2966 2967 prev = dmsg->prev_rule; 2968 if (prev != NULL) { 2969 KKASSERT(prev->cpuid == mycpuid); 2970 2971 /* 2972 * Move to the position on the next CPU 2973 * before the msg is forwarded. 2974 */ 2975 dmsg->prev_rule = prev->sibling; 2976 } 2977 2978 /* 2979 * flush pointers outside the loop, then delete all matching 2980 * rules. 'prev' remains the same throughout the cycle. 2981 */ 2982 ipfw_flush_rule_ptrs(ctx); 2983 while (rule && rule->rulenum == dmsg->rulenum) 2984 rule = ipfw_delete_rule(ctx, prev, rule); 2985 2986 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 2987 } 2988 2989 static int 2990 ipfw_alt_delete_rule(uint16_t rulenum) 2991 { 2992 struct ip_fw *prev, *rule, *f; 2993 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2994 struct netmsg_del dmsg; 2995 struct netmsg *nmsg; 2996 int state; 2997 2998 /* 2999 * Locate first rule to delete 3000 */ 3001 for (prev = NULL, rule = ctx->ipfw_layer3_chain; 3002 rule && rule->rulenum < rulenum; 3003 prev = rule, rule = rule->next) 3004 ; /* EMPTY */ 3005 if (rule->rulenum != rulenum) 3006 return EINVAL; 3007 3008 /* 3009 * Check whether any rules with the given number will 3010 * create states. 3011 */ 3012 state = 0; 3013 for (f = rule; f && f->rulenum == rulenum; f = f->next) { 3014 if (f->rule_flags & IPFW_RULE_F_STATE) { 3015 state = 1; 3016 break; 3017 } 3018 } 3019 3020 if (state) { 3021 /* 3022 * Clear the STATE flag, so no more states will be 3023 * created based the rules numbered 'rulenum'. 3024 */ 3025 bzero(&dmsg, sizeof(dmsg)); 3026 nmsg = &dmsg.nmsg; 3027 netmsg_init(nmsg, &curthread->td_msgport, 0, 3028 ipfw_disable_rule_state_dispatch); 3029 dmsg.start_rule = rule; 3030 dmsg.rulenum = rulenum; 3031 3032 ifnet_domsg(&nmsg->nm_lmsg, 0); 3033 KKASSERT(dmsg.start_rule == NULL); 3034 3035 /* 3036 * Nuke all related states 3037 */ 3038 lockmgr(&dyn_lock, LK_EXCLUSIVE); 3039 for (f = rule; f && f->rulenum == rulenum; f = f->next) { 3040 /* 3041 * Can't check IPFW_RULE_F_STATE here, 3042 * since it has been cleared previously. 3043 * Check 'stub' instead. 3044 */ 3045 if (f->stub != NULL) { 3046 /* Force removal */ 3047 remove_dyn_rule_locked(f, NULL); 3048 } 3049 } 3050 lockmgr(&dyn_lock, LK_RELEASE); 3051 } 3052 3053 /* 3054 * Get rid of the rule duplications on all CPUs 3055 */ 3056 bzero(&dmsg, sizeof(dmsg)); 3057 nmsg = &dmsg.nmsg; 3058 netmsg_init(nmsg, &curthread->td_msgport, 0, 3059 ipfw_alt_delete_rule_dispatch); 3060 dmsg.prev_rule = prev; 3061 dmsg.start_rule = rule; 3062 dmsg.rulenum = rulenum; 3063 3064 ifnet_domsg(&nmsg->nm_lmsg, 0); 3065 KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL); 3066 return 0; 3067 } 3068 3069 static void 3070 ipfw_alt_delete_ruleset_dispatch(struct netmsg *nmsg) 3071 { 3072 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 3073 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3074 struct ip_fw *prev, *rule; 3075 #ifdef INVARIANTS 3076 int del = 0; 3077 #endif 3078 3079 ipfw_flush_rule_ptrs(ctx); 3080 3081 prev = NULL; 3082 rule = ctx->ipfw_layer3_chain; 3083 while (rule != NULL) { 3084 if (rule->set == dmsg->from_set) { 3085 rule = ipfw_delete_rule(ctx, prev, rule); 3086 #ifdef INVARIANTS 3087 del = 1; 3088 #endif 3089 } else { 3090 prev = rule; 3091 rule = rule->next; 3092 } 3093 } 3094 KASSERT(del, ("no match set?!\n")); 3095 3096 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3097 } 3098 3099 static void 3100 ipfw_disable_ruleset_state_dispatch(struct netmsg *nmsg) 3101 { 3102 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 3103 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3104 struct ip_fw *rule; 3105 #ifdef INVARIANTS 3106 int cleared = 0; 3107 #endif 3108 3109 ctx->ipfw_gen++; 3110 3111 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3112 if (rule->set == dmsg->from_set) { 3113 #ifdef INVARIANTS 3114 cleared = 1; 3115 #endif 3116 rule->rule_flags &= ~IPFW_RULE_F_STATE; 3117 } 3118 } 3119 KASSERT(cleared, ("no match set?!\n")); 3120 3121 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3122 } 3123 3124 static int 3125 ipfw_alt_delete_ruleset(uint8_t set) 3126 { 3127 struct netmsg_del dmsg; 3128 struct netmsg *nmsg; 3129 int state, del; 3130 struct ip_fw *rule; 3131 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3132 3133 /* 3134 * Check whether the 'set' exists. If it exists, 3135 * then check whether any rules within the set will 3136 * try to create states. 3137 */ 3138 state = 0; 3139 del = 0; 3140 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3141 if (rule->set == set) { 3142 del = 1; 3143 if (rule->rule_flags & IPFW_RULE_F_STATE) { 3144 state = 1; 3145 break; 3146 } 3147 } 3148 } 3149 if (!del) 3150 return 0; /* XXX EINVAL? */ 3151 3152 if (state) { 3153 /* 3154 * Clear the STATE flag, so no more states will be 3155 * created based the rules in this set. 3156 */ 3157 bzero(&dmsg, sizeof(dmsg)); 3158 nmsg = &dmsg.nmsg; 3159 netmsg_init(nmsg, &curthread->td_msgport, 0, 3160 ipfw_disable_ruleset_state_dispatch); 3161 dmsg.from_set = set; 3162 3163 ifnet_domsg(&nmsg->nm_lmsg, 0); 3164 3165 /* 3166 * Nuke all related states 3167 */ 3168 lockmgr(&dyn_lock, LK_EXCLUSIVE); 3169 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3170 if (rule->set != set) 3171 continue; 3172 3173 /* 3174 * Can't check IPFW_RULE_F_STATE here, 3175 * since it has been cleared previously. 3176 * Check 'stub' instead. 3177 */ 3178 if (rule->stub != NULL) { 3179 /* Force removal */ 3180 remove_dyn_rule_locked(rule, NULL); 3181 } 3182 } 3183 lockmgr(&dyn_lock, LK_RELEASE); 3184 } 3185 3186 /* 3187 * Delete this set 3188 */ 3189 bzero(&dmsg, sizeof(dmsg)); 3190 nmsg = &dmsg.nmsg; 3191 netmsg_init(nmsg, &curthread->td_msgport, 0, 3192 ipfw_alt_delete_ruleset_dispatch); 3193 dmsg.from_set = set; 3194 3195 ifnet_domsg(&nmsg->nm_lmsg, 0); 3196 return 0; 3197 } 3198 3199 static void 3200 ipfw_alt_move_rule_dispatch(struct netmsg *nmsg) 3201 { 3202 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 3203 struct ip_fw *rule; 3204 3205 rule = dmsg->start_rule; 3206 KKASSERT(rule->cpuid == mycpuid); 3207 3208 /* 3209 * Move to the position on the next CPU 3210 * before the msg is forwarded. 3211 */ 3212 dmsg->start_rule = rule->sibling; 3213 3214 while (rule && rule->rulenum <= dmsg->rulenum) { 3215 if (rule->rulenum == dmsg->rulenum) 3216 rule->set = dmsg->to_set; 3217 rule = rule->next; 3218 } 3219 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3220 } 3221 3222 static int 3223 ipfw_alt_move_rule(uint16_t rulenum, uint8_t set) 3224 { 3225 struct netmsg_del dmsg; 3226 struct netmsg *nmsg; 3227 struct ip_fw *rule; 3228 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3229 3230 /* 3231 * Locate first rule to move 3232 */ 3233 for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum; 3234 rule = rule->next) { 3235 if (rule->rulenum == rulenum && rule->set != set) 3236 break; 3237 } 3238 if (rule == NULL || rule->rulenum > rulenum) 3239 return 0; /* XXX error? */ 3240 3241 bzero(&dmsg, sizeof(dmsg)); 3242 nmsg = &dmsg.nmsg; 3243 netmsg_init(nmsg, &curthread->td_msgport, 0, 3244 ipfw_alt_move_rule_dispatch); 3245 dmsg.start_rule = rule; 3246 dmsg.rulenum = rulenum; 3247 dmsg.to_set = set; 3248 3249 ifnet_domsg(&nmsg->nm_lmsg, 0); 3250 KKASSERT(dmsg.start_rule == NULL); 3251 return 0; 3252 } 3253 3254 static void 3255 ipfw_alt_move_ruleset_dispatch(struct netmsg *nmsg) 3256 { 3257 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 3258 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3259 struct ip_fw *rule; 3260 3261 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3262 if (rule->set == dmsg->from_set) 3263 rule->set = dmsg->to_set; 3264 } 3265 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3266 } 3267 3268 static int 3269 ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set) 3270 { 3271 struct netmsg_del dmsg; 3272 struct netmsg *nmsg; 3273 3274 bzero(&dmsg, sizeof(dmsg)); 3275 nmsg = &dmsg.nmsg; 3276 netmsg_init(nmsg, &curthread->td_msgport, 0, 3277 ipfw_alt_move_ruleset_dispatch); 3278 dmsg.from_set = from_set; 3279 dmsg.to_set = to_set; 3280 3281 ifnet_domsg(&nmsg->nm_lmsg, 0); 3282 return 0; 3283 } 3284 3285 static void 3286 ipfw_alt_swap_ruleset_dispatch(struct netmsg *nmsg) 3287 { 3288 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 3289 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3290 struct ip_fw *rule; 3291 3292 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3293 if (rule->set == dmsg->from_set) 3294 rule->set = dmsg->to_set; 3295 else if (rule->set == dmsg->to_set) 3296 rule->set = dmsg->from_set; 3297 } 3298 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3299 } 3300 3301 static int 3302 ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2) 3303 { 3304 struct netmsg_del dmsg; 3305 struct netmsg *nmsg; 3306 3307 bzero(&dmsg, sizeof(dmsg)); 3308 nmsg = &dmsg.nmsg; 3309 netmsg_init(nmsg, &curthread->td_msgport, 0, 3310 ipfw_alt_swap_ruleset_dispatch); 3311 dmsg.from_set = set1; 3312 dmsg.to_set = set2; 3313 3314 ifnet_domsg(&nmsg->nm_lmsg, 0); 3315 return 0; 3316 } 3317 3318 /** 3319 * Remove all rules with given number, and also do set manipulation. 3320 * 3321 * The argument is an uint32_t. The low 16 bit are the rule or set number, 3322 * the next 8 bits are the new set, the top 8 bits are the command: 3323 * 3324 * 0 delete rules with given number 3325 * 1 delete rules with given set number 3326 * 2 move rules with given number to new set 3327 * 3 move rules with given set number to new set 3328 * 4 swap sets with given numbers 3329 */ 3330 static int 3331 ipfw_ctl_alter(uint32_t arg) 3332 { 3333 uint16_t rulenum; 3334 uint8_t cmd, new_set; 3335 int error = 0; 3336 3337 rulenum = arg & 0xffff; 3338 cmd = (arg >> 24) & 0xff; 3339 new_set = (arg >> 16) & 0xff; 3340 3341 if (cmd > 4) 3342 return EINVAL; 3343 if (new_set >= IPFW_DEFAULT_SET) 3344 return EINVAL; 3345 if (cmd == 0 || cmd == 2) { 3346 if (rulenum == IPFW_DEFAULT_RULE) 3347 return EINVAL; 3348 } else { 3349 if (rulenum >= IPFW_DEFAULT_SET) 3350 return EINVAL; 3351 } 3352 3353 switch (cmd) { 3354 case 0: /* delete rules with given number */ 3355 error = ipfw_alt_delete_rule(rulenum); 3356 break; 3357 3358 case 1: /* delete all rules with given set number */ 3359 error = ipfw_alt_delete_ruleset(rulenum); 3360 break; 3361 3362 case 2: /* move rules with given number to new set */ 3363 error = ipfw_alt_move_rule(rulenum, new_set); 3364 break; 3365 3366 case 3: /* move rules with given set number to new set */ 3367 error = ipfw_alt_move_ruleset(rulenum, new_set); 3368 break; 3369 3370 case 4: /* swap two sets */ 3371 error = ipfw_alt_swap_ruleset(rulenum, new_set); 3372 break; 3373 } 3374 return error; 3375 } 3376 3377 /* 3378 * Clear counters for a specific rule. 3379 */ 3380 static void 3381 clear_counters(struct ip_fw *rule, int log_only) 3382 { 3383 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); 3384 3385 if (log_only == 0) { 3386 rule->bcnt = rule->pcnt = 0; 3387 rule->timestamp = 0; 3388 } 3389 if (l->o.opcode == O_LOG) 3390 l->log_left = l->max_log; 3391 } 3392 3393 static void 3394 ipfw_zero_entry_dispatch(struct netmsg *nmsg) 3395 { 3396 struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg; 3397 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3398 struct ip_fw *rule; 3399 3400 if (zmsg->rulenum == 0) { 3401 KKASSERT(zmsg->start_rule == NULL); 3402 3403 ctx->ipfw_norule_counter = 0; 3404 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 3405 clear_counters(rule, zmsg->log_only); 3406 } else { 3407 struct ip_fw *start = zmsg->start_rule; 3408 3409 KKASSERT(start->cpuid == mycpuid); 3410 KKASSERT(start->rulenum == zmsg->rulenum); 3411 3412 /* 3413 * We can have multiple rules with the same number, so we 3414 * need to clear them all. 3415 */ 3416 for (rule = start; rule && rule->rulenum == zmsg->rulenum; 3417 rule = rule->next) 3418 clear_counters(rule, zmsg->log_only); 3419 3420 /* 3421 * Move to the position on the next CPU 3422 * before the msg is forwarded. 3423 */ 3424 zmsg->start_rule = start->sibling; 3425 } 3426 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3427 } 3428 3429 /** 3430 * Reset some or all counters on firewall rules. 3431 * @arg frwl is null to clear all entries, or contains a specific 3432 * rule number. 3433 * @arg log_only is 1 if we only want to reset logs, zero otherwise. 3434 */ 3435 static int 3436 ipfw_ctl_zero_entry(int rulenum, int log_only) 3437 { 3438 struct netmsg_zent zmsg; 3439 struct netmsg *nmsg; 3440 const char *msg; 3441 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3442 3443 bzero(&zmsg, sizeof(zmsg)); 3444 nmsg = &zmsg.nmsg; 3445 netmsg_init(nmsg, &curthread->td_msgport, 0, ipfw_zero_entry_dispatch); 3446 zmsg.log_only = log_only; 3447 3448 if (rulenum == 0) { 3449 msg = log_only ? "ipfw: All logging counts reset.\n" 3450 : "ipfw: Accounting cleared.\n"; 3451 } else { 3452 struct ip_fw *rule; 3453 3454 /* 3455 * Locate the first rule with 'rulenum' 3456 */ 3457 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3458 if (rule->rulenum == rulenum) 3459 break; 3460 } 3461 if (rule == NULL) /* we did not find any matching rules */ 3462 return (EINVAL); 3463 zmsg.start_rule = rule; 3464 zmsg.rulenum = rulenum; 3465 3466 msg = log_only ? "ipfw: Entry %d logging count reset.\n" 3467 : "ipfw: Entry %d cleared.\n"; 3468 } 3469 ifnet_domsg(&nmsg->nm_lmsg, 0); 3470 KKASSERT(zmsg.start_rule == NULL); 3471 3472 if (fw_verbose) 3473 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); 3474 return (0); 3475 } 3476 3477 /* 3478 * Check validity of the structure before insert. 3479 * Fortunately rules are simple, so this mostly need to check rule sizes. 3480 */ 3481 static int 3482 ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags) 3483 { 3484 int l, cmdlen = 0; 3485 int have_action = 0; 3486 ipfw_insn *cmd; 3487 3488 *rule_flags = 0; 3489 3490 /* Check for valid size */ 3491 if (size < sizeof(*rule)) { 3492 kprintf("ipfw: rule too short\n"); 3493 return EINVAL; 3494 } 3495 l = IOC_RULESIZE(rule); 3496 if (l != size) { 3497 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l); 3498 return EINVAL; 3499 } 3500 3501 /* Check rule number */ 3502 if (rule->rulenum == IPFW_DEFAULT_RULE) { 3503 kprintf("ipfw: invalid rule number\n"); 3504 return EINVAL; 3505 } 3506 3507 /* 3508 * Now go for the individual checks. Very simple ones, basically only 3509 * instruction sizes. 3510 */ 3511 for (l = rule->cmd_len, cmd = rule->cmd; l > 0; 3512 l -= cmdlen, cmd += cmdlen) { 3513 cmdlen = F_LEN(cmd); 3514 if (cmdlen > l) { 3515 kprintf("ipfw: opcode %d size truncated\n", 3516 cmd->opcode); 3517 return EINVAL; 3518 } 3519 3520 DPRINTF("ipfw: opcode %d\n", cmd->opcode); 3521 3522 if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT) { 3523 /* This rule will create states */ 3524 *rule_flags |= IPFW_RULE_F_STATE; 3525 } 3526 3527 switch (cmd->opcode) { 3528 case O_NOP: 3529 case O_PROBE_STATE: 3530 case O_KEEP_STATE: 3531 case O_PROTO: 3532 case O_IP_SRC_ME: 3533 case O_IP_DST_ME: 3534 case O_LAYER2: 3535 case O_IN: 3536 case O_FRAG: 3537 case O_IPOPT: 3538 case O_IPLEN: 3539 case O_IPID: 3540 case O_IPTOS: 3541 case O_IPPRECEDENCE: 3542 case O_IPTTL: 3543 case O_IPVER: 3544 case O_TCPWIN: 3545 case O_TCPFLAGS: 3546 case O_TCPOPTS: 3547 case O_ESTAB: 3548 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 3549 goto bad_size; 3550 break; 3551 3552 case O_UID: 3553 case O_GID: 3554 case O_IP_SRC: 3555 case O_IP_DST: 3556 case O_TCPSEQ: 3557 case O_TCPACK: 3558 case O_PROB: 3559 case O_ICMPTYPE: 3560 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 3561 goto bad_size; 3562 break; 3563 3564 case O_LIMIT: 3565 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) 3566 goto bad_size; 3567 break; 3568 3569 case O_LOG: 3570 if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) 3571 goto bad_size; 3572 3573 ((ipfw_insn_log *)cmd)->log_left = 3574 ((ipfw_insn_log *)cmd)->max_log; 3575 3576 break; 3577 3578 case O_IP_SRC_MASK: 3579 case O_IP_DST_MASK: 3580 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip)) 3581 goto bad_size; 3582 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) { 3583 kprintf("ipfw: opcode %d, useless rule\n", 3584 cmd->opcode); 3585 return EINVAL; 3586 } 3587 break; 3588 3589 case O_IP_SRC_SET: 3590 case O_IP_DST_SET: 3591 if (cmd->arg1 == 0 || cmd->arg1 > 256) { 3592 kprintf("ipfw: invalid set size %d\n", 3593 cmd->arg1); 3594 return EINVAL; 3595 } 3596 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 3597 (cmd->arg1+31)/32 ) 3598 goto bad_size; 3599 break; 3600 3601 case O_MACADDR2: 3602 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) 3603 goto bad_size; 3604 break; 3605 3606 case O_MAC_TYPE: 3607 case O_IP_SRCPORT: 3608 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ 3609 if (cmdlen < 2 || cmdlen > 31) 3610 goto bad_size; 3611 break; 3612 3613 case O_RECV: 3614 case O_XMIT: 3615 case O_VIA: 3616 if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) 3617 goto bad_size; 3618 break; 3619 3620 case O_PIPE: 3621 case O_QUEUE: 3622 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) 3623 goto bad_size; 3624 goto check_action; 3625 3626 case O_FORWARD_IP: 3627 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) { 3628 goto bad_size; 3629 } else { 3630 in_addr_t fwd_addr; 3631 3632 fwd_addr = ((ipfw_insn_sa *)cmd)-> 3633 sa.sin_addr.s_addr; 3634 if (IN_MULTICAST(ntohl(fwd_addr))) { 3635 kprintf("ipfw: try forwarding to " 3636 "multicast address\n"); 3637 return EINVAL; 3638 } 3639 } 3640 goto check_action; 3641 3642 case O_FORWARD_MAC: /* XXX not implemented yet */ 3643 case O_CHECK_STATE: 3644 case O_COUNT: 3645 case O_ACCEPT: 3646 case O_DENY: 3647 case O_REJECT: 3648 case O_SKIPTO: 3649 case O_DIVERT: 3650 case O_TEE: 3651 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 3652 goto bad_size; 3653 check_action: 3654 if (have_action) { 3655 kprintf("ipfw: opcode %d, multiple actions" 3656 " not allowed\n", 3657 cmd->opcode); 3658 return EINVAL; 3659 } 3660 have_action = 1; 3661 if (l != cmdlen) { 3662 kprintf("ipfw: opcode %d, action must be" 3663 " last opcode\n", 3664 cmd->opcode); 3665 return EINVAL; 3666 } 3667 break; 3668 default: 3669 kprintf("ipfw: opcode %d, unknown opcode\n", 3670 cmd->opcode); 3671 return EINVAL; 3672 } 3673 } 3674 if (have_action == 0) { 3675 kprintf("ipfw: missing action\n"); 3676 return EINVAL; 3677 } 3678 return 0; 3679 3680 bad_size: 3681 kprintf("ipfw: opcode %d size %d wrong\n", 3682 cmd->opcode, cmdlen); 3683 return EINVAL; 3684 } 3685 3686 static int 3687 ipfw_ctl_add_rule(struct sockopt *sopt) 3688 { 3689 struct ipfw_ioc_rule *ioc_rule; 3690 size_t size; 3691 uint32_t rule_flags; 3692 int error; 3693 3694 size = sopt->sopt_valsize; 3695 if (size > (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX) || 3696 size < sizeof(*ioc_rule)) { 3697 return EINVAL; 3698 } 3699 if (size != (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX)) { 3700 sopt->sopt_val = krealloc(sopt->sopt_val, sizeof(uint32_t) * 3701 IPFW_RULE_SIZE_MAX, M_TEMP, M_WAITOK); 3702 } 3703 ioc_rule = sopt->sopt_val; 3704 3705 error = ipfw_check_ioc_rule(ioc_rule, size, &rule_flags); 3706 if (error) 3707 return error; 3708 3709 ipfw_add_rule(ioc_rule, rule_flags); 3710 3711 if (sopt->sopt_dir == SOPT_GET) 3712 sopt->sopt_valsize = IOC_RULESIZE(ioc_rule); 3713 return 0; 3714 } 3715 3716 static void * 3717 ipfw_copy_rule(const struct ip_fw *rule, struct ipfw_ioc_rule *ioc_rule) 3718 { 3719 const struct ip_fw *sibling; 3720 #ifdef INVARIANTS 3721 int i; 3722 #endif 3723 3724 KKASSERT(rule->cpuid == IPFW_CFGCPUID); 3725 3726 ioc_rule->act_ofs = rule->act_ofs; 3727 ioc_rule->cmd_len = rule->cmd_len; 3728 ioc_rule->rulenum = rule->rulenum; 3729 ioc_rule->set = rule->set; 3730 ioc_rule->usr_flags = rule->usr_flags; 3731 3732 ioc_rule->set_disable = ipfw_ctx[mycpuid]->ipfw_set_disable; 3733 ioc_rule->static_count = static_count; 3734 ioc_rule->static_len = static_ioc_len; 3735 3736 /* 3737 * Visit (read-only) all of the rule's duplications to get 3738 * the necessary statistics 3739 */ 3740 #ifdef INVARIANTS 3741 i = 0; 3742 #endif 3743 ioc_rule->pcnt = 0; 3744 ioc_rule->bcnt = 0; 3745 ioc_rule->timestamp = 0; 3746 for (sibling = rule; sibling != NULL; sibling = sibling->sibling) { 3747 ioc_rule->pcnt += sibling->pcnt; 3748 ioc_rule->bcnt += sibling->bcnt; 3749 if (sibling->timestamp > ioc_rule->timestamp) 3750 ioc_rule->timestamp = sibling->timestamp; 3751 #ifdef INVARIANTS 3752 ++i; 3753 #endif 3754 } 3755 KASSERT(i == ncpus, ("static rule is not duplicated on every cpu\n")); 3756 3757 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */); 3758 3759 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule)); 3760 } 3761 3762 static void 3763 ipfw_copy_state(const ipfw_dyn_rule *dyn_rule, 3764 struct ipfw_ioc_state *ioc_state) 3765 { 3766 const struct ipfw_flow_id *id; 3767 struct ipfw_ioc_flowid *ioc_id; 3768 3769 ioc_state->expire = TIME_LEQ(dyn_rule->expire, time_second) ? 3770 0 : dyn_rule->expire - time_second; 3771 ioc_state->pcnt = dyn_rule->pcnt; 3772 ioc_state->bcnt = dyn_rule->bcnt; 3773 3774 ioc_state->dyn_type = dyn_rule->dyn_type; 3775 ioc_state->count = dyn_rule->count; 3776 3777 ioc_state->rulenum = dyn_rule->stub->rule[mycpuid]->rulenum; 3778 3779 id = &dyn_rule->id; 3780 ioc_id = &ioc_state->id; 3781 3782 ioc_id->type = ETHERTYPE_IP; 3783 ioc_id->u.ip.dst_ip = id->dst_ip; 3784 ioc_id->u.ip.src_ip = id->src_ip; 3785 ioc_id->u.ip.dst_port = id->dst_port; 3786 ioc_id->u.ip.src_port = id->src_port; 3787 ioc_id->u.ip.proto = id->proto; 3788 } 3789 3790 static int 3791 ipfw_ctl_get_rules(struct sockopt *sopt) 3792 { 3793 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3794 struct ip_fw *rule; 3795 void *bp; 3796 size_t size; 3797 uint32_t dcount = 0; 3798 3799 /* 3800 * pass up a copy of the current rules. Static rules 3801 * come first (the last of which has number IPFW_DEFAULT_RULE), 3802 * followed by a possibly empty list of dynamic rule. 3803 */ 3804 3805 size = static_ioc_len; /* size of static rules */ 3806 if (ipfw_dyn_v) { /* add size of dyn.rules */ 3807 dcount = dyn_count; 3808 size += dcount * sizeof(struct ipfw_ioc_state); 3809 } 3810 3811 if (sopt->sopt_valsize < size) { 3812 /* short length, no need to return incomplete rules */ 3813 /* XXX: if superuser, no need to zero buffer */ 3814 bzero(sopt->sopt_val, sopt->sopt_valsize); 3815 return 0; 3816 } 3817 bp = sopt->sopt_val; 3818 3819 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 3820 bp = ipfw_copy_rule(rule, bp); 3821 3822 if (ipfw_dyn_v && dcount != 0) { 3823 struct ipfw_ioc_state *ioc_state = bp; 3824 uint32_t dcount2 = 0; 3825 #ifdef INVARIANTS 3826 size_t old_size = size; 3827 #endif 3828 int i; 3829 3830 lockmgr(&dyn_lock, LK_SHARED); 3831 3832 /* Check 'ipfw_dyn_v' again with lock held */ 3833 if (ipfw_dyn_v == NULL) 3834 goto skip; 3835 3836 for (i = 0; i < curr_dyn_buckets; i++) { 3837 ipfw_dyn_rule *p; 3838 3839 /* 3840 * The # of dynamic rules may have grown after the 3841 * snapshot of 'dyn_count' was taken, so we will have 3842 * to check 'dcount' (snapshot of dyn_count) here to 3843 * make sure that we don't overflow the pre-allocated 3844 * buffer. 3845 */ 3846 for (p = ipfw_dyn_v[i]; p != NULL && dcount != 0; 3847 p = p->next, ioc_state++, dcount--, dcount2++) 3848 ipfw_copy_state(p, ioc_state); 3849 } 3850 skip: 3851 lockmgr(&dyn_lock, LK_RELEASE); 3852 3853 /* 3854 * The # of dynamic rules may be shrinked after the 3855 * snapshot of 'dyn_count' was taken. To give user a 3856 * correct dynamic rule count, we use the 'dcount2' 3857 * calculated above (with shared lockmgr lock held). 3858 */ 3859 size = static_ioc_len + 3860 (dcount2 * sizeof(struct ipfw_ioc_state)); 3861 KKASSERT(size <= old_size); 3862 } 3863 3864 sopt->sopt_valsize = size; 3865 return 0; 3866 } 3867 3868 static void 3869 ipfw_set_disable_dispatch(struct netmsg *nmsg) 3870 { 3871 struct lwkt_msg *lmsg = &nmsg->nm_lmsg; 3872 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3873 3874 ctx->ipfw_gen++; 3875 ctx->ipfw_set_disable = lmsg->u.ms_result32; 3876 3877 ifnet_forwardmsg(lmsg, mycpuid + 1); 3878 } 3879 3880 static void 3881 ipfw_ctl_set_disable(uint32_t disable, uint32_t enable) 3882 { 3883 struct netmsg nmsg; 3884 struct lwkt_msg *lmsg; 3885 uint32_t set_disable; 3886 3887 /* IPFW_DEFAULT_SET is always enabled */ 3888 enable |= (1 << IPFW_DEFAULT_SET); 3889 set_disable = (ipfw_ctx[mycpuid]->ipfw_set_disable | disable) & ~enable; 3890 3891 bzero(&nmsg, sizeof(nmsg)); 3892 netmsg_init(&nmsg, &curthread->td_msgport, 0, ipfw_set_disable_dispatch); 3893 lmsg = &nmsg.nm_lmsg; 3894 lmsg->u.ms_result32 = set_disable; 3895 3896 ifnet_domsg(lmsg, 0); 3897 } 3898 3899 /** 3900 * {set|get}sockopt parser. 3901 */ 3902 static int 3903 ipfw_ctl(struct sockopt *sopt) 3904 { 3905 int error, rulenum; 3906 uint32_t *masks; 3907 size_t size; 3908 3909 error = 0; 3910 3911 switch (sopt->sopt_name) { 3912 case IP_FW_GET: 3913 error = ipfw_ctl_get_rules(sopt); 3914 break; 3915 3916 case IP_FW_FLUSH: 3917 ipfw_flush(0 /* keep default rule */); 3918 break; 3919 3920 case IP_FW_ADD: 3921 error = ipfw_ctl_add_rule(sopt); 3922 break; 3923 3924 case IP_FW_DEL: 3925 /* 3926 * IP_FW_DEL is used for deleting single rules or sets, 3927 * and (ab)used to atomically manipulate sets. 3928 * Argument size is used to distinguish between the two: 3929 * sizeof(uint32_t) 3930 * delete single rule or set of rules, 3931 * or reassign rules (or sets) to a different set. 3932 * 2 * sizeof(uint32_t) 3933 * atomic disable/enable sets. 3934 * first uint32_t contains sets to be disabled, 3935 * second uint32_t contains sets to be enabled. 3936 */ 3937 masks = sopt->sopt_val; 3938 size = sopt->sopt_valsize; 3939 if (size == sizeof(*masks)) { 3940 /* 3941 * Delete or reassign static rule 3942 */ 3943 error = ipfw_ctl_alter(masks[0]); 3944 } else if (size == (2 * sizeof(*masks))) { 3945 /* 3946 * Set enable/disable 3947 */ 3948 ipfw_ctl_set_disable(masks[0], masks[1]); 3949 } else { 3950 error = EINVAL; 3951 } 3952 break; 3953 3954 case IP_FW_ZERO: 3955 case IP_FW_RESETLOG: /* argument is an int, the rule number */ 3956 rulenum = 0; 3957 3958 if (sopt->sopt_val != 0) { 3959 error = soopt_to_kbuf(sopt, &rulenum, 3960 sizeof(int), sizeof(int)); 3961 if (error) 3962 break; 3963 } 3964 error = ipfw_ctl_zero_entry(rulenum, 3965 sopt->sopt_name == IP_FW_RESETLOG); 3966 break; 3967 3968 default: 3969 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name); 3970 error = EINVAL; 3971 } 3972 return error; 3973 } 3974 3975 /* 3976 * This procedure is only used to handle keepalives. It is invoked 3977 * every dyn_keepalive_period 3978 */ 3979 static void 3980 ipfw_tick_dispatch(struct netmsg *nmsg) 3981 { 3982 time_t keep_alive; 3983 uint32_t gen; 3984 int i; 3985 3986 IPFW_ASSERT_CFGPORT(&curthread->td_msgport); 3987 KKASSERT(IPFW_LOADED); 3988 3989 /* Reply ASAP */ 3990 crit_enter(); 3991 lwkt_replymsg(&nmsg->nm_lmsg, 0); 3992 crit_exit(); 3993 3994 if (ipfw_dyn_v == NULL || dyn_count == 0) 3995 goto done; 3996 3997 keep_alive = time_second; 3998 3999 lockmgr(&dyn_lock, LK_EXCLUSIVE); 4000 again: 4001 if (ipfw_dyn_v == NULL || dyn_count == 0) { 4002 lockmgr(&dyn_lock, LK_RELEASE); 4003 goto done; 4004 } 4005 gen = dyn_buckets_gen; 4006 4007 for (i = 0; i < curr_dyn_buckets; i++) { 4008 ipfw_dyn_rule *q, *prev; 4009 4010 for (prev = NULL, q = ipfw_dyn_v[i]; q != NULL;) { 4011 uint32_t ack_rev, ack_fwd; 4012 struct ipfw_flow_id id; 4013 4014 if (q->dyn_type == O_LIMIT_PARENT) 4015 goto next; 4016 4017 if (TIME_LEQ(q->expire, time_second)) { 4018 /* State expired */ 4019 UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); 4020 continue; 4021 } 4022 4023 /* 4024 * Keep alive processing 4025 */ 4026 4027 if (!dyn_keepalive) 4028 goto next; 4029 if (q->id.proto != IPPROTO_TCP) 4030 goto next; 4031 if ((q->state & BOTH_SYN) != BOTH_SYN) 4032 goto next; 4033 if (TIME_LEQ(time_second + dyn_keepalive_interval, 4034 q->expire)) 4035 goto next; /* too early */ 4036 if (q->keep_alive == keep_alive) 4037 goto next; /* alreay done */ 4038 4039 /* 4040 * Save necessary information, so that they could 4041 * survive after possible blocking in send_pkt() 4042 */ 4043 id = q->id; 4044 ack_rev = q->ack_rev; 4045 ack_fwd = q->ack_fwd; 4046 4047 /* Sending has been started */ 4048 q->keep_alive = keep_alive; 4049 4050 /* Release lock to avoid possible dead lock */ 4051 lockmgr(&dyn_lock, LK_RELEASE); 4052 send_pkt(&id, ack_rev - 1, ack_fwd, TH_SYN); 4053 send_pkt(&id, ack_fwd - 1, ack_rev, 0); 4054 lockmgr(&dyn_lock, LK_EXCLUSIVE); 4055 4056 if (gen != dyn_buckets_gen) { 4057 /* 4058 * Dyn bucket array has been changed during 4059 * the above two sending; reiterate. 4060 */ 4061 goto again; 4062 } 4063 next: 4064 prev = q; 4065 q = q->next; 4066 } 4067 } 4068 lockmgr(&dyn_lock, LK_RELEASE); 4069 done: 4070 callout_reset(&ipfw_timeout_h, dyn_keepalive_period * hz, 4071 ipfw_tick, NULL); 4072 } 4073 4074 /* 4075 * This procedure is only used to handle keepalives. It is invoked 4076 * every dyn_keepalive_period 4077 */ 4078 static void 4079 ipfw_tick(void *dummy __unused) 4080 { 4081 struct lwkt_msg *lmsg = &ipfw_timeout_netmsg.nm_lmsg; 4082 4083 KKASSERT(mycpuid == IPFW_CFGCPUID); 4084 4085 crit_enter(); 4086 4087 KKASSERT(lmsg->ms_flags & MSGF_DONE); 4088 if (IPFW_LOADED) { 4089 lwkt_sendmsg(IPFW_CFGPORT, lmsg); 4090 /* ipfw_timeout_netmsg's handler reset this callout */ 4091 } 4092 4093 crit_exit(); 4094 } 4095 4096 static int 4097 ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 4098 { 4099 struct ip_fw_args args; 4100 struct mbuf *m = *m0; 4101 struct m_tag *mtag; 4102 int tee = 0, error = 0, ret; 4103 4104 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 4105 /* Extract info from dummynet tag */ 4106 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 4107 KKASSERT(mtag != NULL); 4108 args.rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 4109 KKASSERT(args.rule != NULL); 4110 4111 m_tag_delete(m, mtag); 4112 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 4113 } else { 4114 args.rule = NULL; 4115 } 4116 4117 args.eh = NULL; 4118 args.oif = NULL; 4119 args.m = m; 4120 ret = ipfw_chk(&args); 4121 m = args.m; 4122 4123 if (m == NULL) { 4124 error = EACCES; 4125 goto back; 4126 } 4127 4128 switch (ret) { 4129 case IP_FW_PASS: 4130 break; 4131 4132 case IP_FW_DENY: 4133 m_freem(m); 4134 m = NULL; 4135 error = EACCES; 4136 break; 4137 4138 case IP_FW_DUMMYNET: 4139 /* Send packet to the appropriate pipe */ 4140 ipfw_dummynet_io(m, args.cookie, DN_TO_IP_IN, &args); 4141 break; 4142 4143 case IP_FW_TEE: 4144 tee = 1; 4145 /* FALL THROUGH */ 4146 4147 case IP_FW_DIVERT: 4148 if (ip_divert_p != NULL) { 4149 m = ip_divert_p(m, tee, 1); 4150 } else { 4151 m_freem(m); 4152 m = NULL; 4153 /* not sure this is the right error msg */ 4154 error = EACCES; 4155 } 4156 break; 4157 4158 default: 4159 panic("unknown ipfw return value: %d\n", ret); 4160 } 4161 back: 4162 *m0 = m; 4163 return error; 4164 } 4165 4166 static int 4167 ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 4168 { 4169 struct ip_fw_args args; 4170 struct mbuf *m = *m0; 4171 struct m_tag *mtag; 4172 int tee = 0, error = 0, ret; 4173 4174 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 4175 /* Extract info from dummynet tag */ 4176 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 4177 KKASSERT(mtag != NULL); 4178 args.rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 4179 KKASSERT(args.rule != NULL); 4180 4181 m_tag_delete(m, mtag); 4182 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 4183 } else { 4184 args.rule = NULL; 4185 } 4186 4187 args.eh = NULL; 4188 args.m = m; 4189 args.oif = ifp; 4190 ret = ipfw_chk(&args); 4191 m = args.m; 4192 4193 if (m == NULL) { 4194 error = EACCES; 4195 goto back; 4196 } 4197 4198 switch (ret) { 4199 case IP_FW_PASS: 4200 break; 4201 4202 case IP_FW_DENY: 4203 m_freem(m); 4204 m = NULL; 4205 error = EACCES; 4206 break; 4207 4208 case IP_FW_DUMMYNET: 4209 ipfw_dummynet_io(m, args.cookie, DN_TO_IP_OUT, &args); 4210 break; 4211 4212 case IP_FW_TEE: 4213 tee = 1; 4214 /* FALL THROUGH */ 4215 4216 case IP_FW_DIVERT: 4217 if (ip_divert_p != NULL) { 4218 m = ip_divert_p(m, tee, 0); 4219 } else { 4220 m_freem(m); 4221 m = NULL; 4222 /* not sure this is the right error msg */ 4223 error = EACCES; 4224 } 4225 break; 4226 4227 default: 4228 panic("unknown ipfw return value: %d\n", ret); 4229 } 4230 back: 4231 *m0 = m; 4232 return error; 4233 } 4234 4235 static void 4236 ipfw_hook(void) 4237 { 4238 struct pfil_head *pfh; 4239 4240 IPFW_ASSERT_CFGPORT(&curthread->td_msgport); 4241 4242 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 4243 if (pfh == NULL) 4244 return; 4245 4246 pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_MPSAFE, pfh); 4247 pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_MPSAFE, pfh); 4248 } 4249 4250 static void 4251 ipfw_dehook(void) 4252 { 4253 struct pfil_head *pfh; 4254 4255 IPFW_ASSERT_CFGPORT(&curthread->td_msgport); 4256 4257 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 4258 if (pfh == NULL) 4259 return; 4260 4261 pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 4262 pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 4263 } 4264 4265 static void 4266 ipfw_sysctl_enable_dispatch(struct netmsg *nmsg) 4267 { 4268 struct lwkt_msg *lmsg = &nmsg->nm_lmsg; 4269 int enable = lmsg->u.ms_result; 4270 4271 if (fw_enable == enable) 4272 goto reply; 4273 4274 fw_enable = enable; 4275 if (fw_enable) 4276 ipfw_hook(); 4277 else 4278 ipfw_dehook(); 4279 reply: 4280 lwkt_replymsg(lmsg, 0); 4281 } 4282 4283 static int 4284 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS) 4285 { 4286 struct netmsg nmsg; 4287 struct lwkt_msg *lmsg; 4288 int enable, error; 4289 4290 enable = fw_enable; 4291 error = sysctl_handle_int(oidp, &enable, 0, req); 4292 if (error || req->newptr == NULL) 4293 return error; 4294 4295 netmsg_init(&nmsg, &curthread->td_msgport, 0, 4296 ipfw_sysctl_enable_dispatch); 4297 lmsg = &nmsg.nm_lmsg; 4298 lmsg->u.ms_result = enable; 4299 4300 return lwkt_domsg(IPFW_CFGPORT, lmsg, 0); 4301 } 4302 4303 static int 4304 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS) 4305 { 4306 return sysctl_int_range(oidp, arg1, arg2, req, 4307 IPFW_AUTOINC_STEP_MIN, IPFW_AUTOINC_STEP_MAX); 4308 } 4309 4310 static int 4311 ipfw_sysctl_dyn_buckets(SYSCTL_HANDLER_ARGS) 4312 { 4313 int error, value; 4314 4315 lockmgr(&dyn_lock, LK_EXCLUSIVE); 4316 4317 value = dyn_buckets; 4318 error = sysctl_handle_int(oidp, &value, 0, req); 4319 if (error || !req->newptr) 4320 goto back; 4321 4322 /* 4323 * Make sure we have a power of 2 and 4324 * do not allow more than 64k entries. 4325 */ 4326 error = EINVAL; 4327 if (value <= 1 || value > 65536) 4328 goto back; 4329 if ((value & (value - 1)) != 0) 4330 goto back; 4331 4332 error = 0; 4333 dyn_buckets = value; 4334 back: 4335 lockmgr(&dyn_lock, LK_RELEASE); 4336 return error; 4337 } 4338 4339 static int 4340 ipfw_sysctl_dyn_fin(SYSCTL_HANDLER_ARGS) 4341 { 4342 return sysctl_int_range(oidp, arg1, arg2, req, 4343 1, dyn_keepalive_period - 1); 4344 } 4345 4346 static int 4347 ipfw_sysctl_dyn_rst(SYSCTL_HANDLER_ARGS) 4348 { 4349 return sysctl_int_range(oidp, arg1, arg2, req, 4350 1, dyn_keepalive_period - 1); 4351 } 4352 4353 static void 4354 ipfw_ctx_init_dispatch(struct netmsg *nmsg) 4355 { 4356 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 4357 struct ipfw_context *ctx; 4358 struct ip_fw *def_rule; 4359 4360 ctx = kmalloc(sizeof(*ctx), M_IPFW, M_WAITOK | M_ZERO); 4361 ipfw_ctx[mycpuid] = ctx; 4362 4363 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO); 4364 4365 def_rule->act_ofs = 0; 4366 def_rule->rulenum = IPFW_DEFAULT_RULE; 4367 def_rule->cmd_len = 1; 4368 def_rule->set = IPFW_DEFAULT_SET; 4369 4370 def_rule->cmd[0].len = 1; 4371 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 4372 def_rule->cmd[0].opcode = O_ACCEPT; 4373 #else 4374 def_rule->cmd[0].opcode = O_DENY; 4375 #endif 4376 4377 def_rule->refcnt = 1; 4378 def_rule->cpuid = mycpuid; 4379 4380 /* Install the default rule */ 4381 ctx->ipfw_default_rule = def_rule; 4382 ctx->ipfw_layer3_chain = def_rule; 4383 4384 /* Link rule CPU sibling */ 4385 ipfw_link_sibling(fwmsg, def_rule); 4386 4387 /* Statistics only need to be updated once */ 4388 if (mycpuid == 0) 4389 ipfw_inc_static_count(def_rule); 4390 4391 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 4392 } 4393 4394 static void 4395 ipfw_init_dispatch(struct netmsg *nmsg) 4396 { 4397 struct netmsg_ipfw fwmsg; 4398 int error = 0; 4399 4400 if (IPFW_LOADED) { 4401 kprintf("IP firewall already loaded\n"); 4402 error = EEXIST; 4403 goto reply; 4404 } 4405 4406 bzero(&fwmsg, sizeof(fwmsg)); 4407 netmsg_init(&fwmsg.nmsg, &curthread->td_msgport, 0, 4408 ipfw_ctx_init_dispatch); 4409 ifnet_domsg(&fwmsg.nmsg.nm_lmsg, 0); 4410 4411 ip_fw_chk_ptr = ipfw_chk; 4412 ip_fw_ctl_ptr = ipfw_ctl; 4413 ip_fw_dn_io_ptr = ipfw_dummynet_io; 4414 4415 kprintf("ipfw2 initialized, default to %s, logging ", 4416 ipfw_ctx[mycpuid]->ipfw_default_rule->cmd[0].opcode == 4417 O_ACCEPT ? "accept" : "deny"); 4418 4419 #ifdef IPFIREWALL_VERBOSE 4420 fw_verbose = 1; 4421 #endif 4422 #ifdef IPFIREWALL_VERBOSE_LIMIT 4423 verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 4424 #endif 4425 if (fw_verbose == 0) { 4426 kprintf("disabled\n"); 4427 } else if (verbose_limit == 0) { 4428 kprintf("unlimited\n"); 4429 } else { 4430 kprintf("limited to %d packets/entry by default\n", 4431 verbose_limit); 4432 } 4433 4434 callout_init_mp(&ipfw_timeout_h); 4435 netmsg_init(&ipfw_timeout_netmsg, &netisr_adone_rport, 4436 MSGF_MPSAFE | MSGF_DROPABLE | MSGF_PRIORITY, 4437 ipfw_tick_dispatch); 4438 lockinit(&dyn_lock, "ipfw_dyn", 0, 0); 4439 4440 ip_fw_loaded = 1; 4441 callout_reset(&ipfw_timeout_h, hz, ipfw_tick, NULL); 4442 4443 if (fw_enable) 4444 ipfw_hook(); 4445 reply: 4446 lwkt_replymsg(&nmsg->nm_lmsg, error); 4447 } 4448 4449 static int 4450 ipfw_init(void) 4451 { 4452 struct netmsg smsg; 4453 4454 netmsg_init(&smsg, &curthread->td_msgport, 0, ipfw_init_dispatch); 4455 return lwkt_domsg(IPFW_CFGPORT, &smsg.nm_lmsg, 0); 4456 } 4457 4458 #ifdef KLD_MODULE 4459 4460 static void 4461 ipfw_fini_dispatch(struct netmsg *nmsg) 4462 { 4463 int error = 0, cpu; 4464 4465 if (ipfw_refcnt != 0) { 4466 error = EBUSY; 4467 goto reply; 4468 } 4469 4470 ip_fw_loaded = 0; 4471 4472 ipfw_dehook(); 4473 callout_stop(&ipfw_timeout_h); 4474 4475 netmsg_service_sync(); 4476 4477 crit_enter(); 4478 if ((ipfw_timeout_netmsg.nm_lmsg.ms_flags & MSGF_DONE) == 0) { 4479 /* 4480 * Callout message is pending; drop it 4481 */ 4482 lwkt_dropmsg(&ipfw_timeout_netmsg.nm_lmsg); 4483 } 4484 crit_exit(); 4485 4486 ip_fw_chk_ptr = NULL; 4487 ip_fw_ctl_ptr = NULL; 4488 ip_fw_dn_io_ptr = NULL; 4489 ipfw_flush(1 /* kill default rule */); 4490 4491 /* Free pre-cpu context */ 4492 for (cpu = 0; cpu < ncpus; ++cpu) 4493 kfree(ipfw_ctx[cpu], M_IPFW); 4494 4495 kprintf("IP firewall unloaded\n"); 4496 reply: 4497 lwkt_replymsg(&nmsg->nm_lmsg, error); 4498 } 4499 4500 static int 4501 ipfw_fini(void) 4502 { 4503 struct netmsg smsg; 4504 4505 netmsg_init(&smsg, &curthread->td_msgport, 0, ipfw_fini_dispatch); 4506 return lwkt_domsg(IPFW_CFGPORT, &smsg.nm_lmsg, 0); 4507 } 4508 4509 #endif /* KLD_MODULE */ 4510 4511 static int 4512 ipfw_modevent(module_t mod, int type, void *unused) 4513 { 4514 int err = 0; 4515 4516 switch (type) { 4517 case MOD_LOAD: 4518 err = ipfw_init(); 4519 break; 4520 4521 case MOD_UNLOAD: 4522 #ifndef KLD_MODULE 4523 kprintf("ipfw statically compiled, cannot unload\n"); 4524 err = EBUSY; 4525 #else 4526 err = ipfw_fini(); 4527 #endif 4528 break; 4529 default: 4530 break; 4531 } 4532 return err; 4533 } 4534 4535 static moduledata_t ipfwmod = { 4536 "ipfw", 4537 ipfw_modevent, 4538 0 4539 }; 4540 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY); 4541 MODULE_VERSION(ipfw, 1); 4542