1 /* 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $ 26 * $DragonFly: src/sys/net/ipfw/ip_fw2.c,v 1.100 2008/11/22 11:03:35 sephe Exp $ 27 */ 28 29 /* 30 * Implement IP packet firewall (new version) 31 */ 32 33 #include "opt_ipfw.h" 34 #include "opt_inet.h" 35 #ifndef INET 36 #error IPFIREWALL requires INET. 37 #endif /* INET */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/kernel.h> 44 #include <sys/proc.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/thread2.h> 50 #include <sys/ucred.h> 51 #include <sys/in_cksum.h> 52 #include <sys/lock.h> 53 54 #include <net/if.h> 55 #include <net/route.h> 56 #include <net/netmsg2.h> 57 #include <net/pfil.h> 58 #include <net/dummynet/ip_dummynet.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/in_var.h> 63 #include <netinet/in_pcb.h> 64 #include <netinet/ip.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/ip_icmp.h> 67 #include <netinet/tcp.h> 68 #include <netinet/tcp_timer.h> 69 #include <netinet/tcp_var.h> 70 #include <netinet/tcpip.h> 71 #include <netinet/udp.h> 72 #include <netinet/udp_var.h> 73 #include <netinet/ip_divert.h> 74 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ 75 76 #include <net/ipfw/ip_fw2.h> 77 78 #ifdef IPFIREWALL_DEBUG 79 #define DPRINTF(fmt, ...) \ 80 do { \ 81 if (fw_debug > 0) \ 82 kprintf(fmt, __VA_ARGS__); \ 83 } while (0) 84 #else 85 #define DPRINTF(fmt, ...) ((void)0) 86 #endif 87 88 /* 89 * Description about per-CPU rule duplication: 90 * 91 * Module loading/unloading and all ioctl operations are serialized 92 * by netisr0, so we don't have any ordering or locking problems. 93 * 94 * Following graph shows how operation on per-CPU rule list is 95 * performed [2 CPU case]: 96 * 97 * CPU0 CPU1 98 * 99 * netisr0 <------------------------------------+ 100 * domsg | 101 * | | 102 * | netmsg | 103 * | | 104 * V | 105 * ifnet0 | 106 * : | netmsg 107 * :(delete/add...) | 108 * : | 109 * : netmsg | 110 * forwardmsg---------->ifnet1 | 111 * : | 112 * :(delete/add...) | 113 * : | 114 * : | 115 * replymsg--------------+ 116 * 117 * 118 * 119 * 120 * Rules which will not create states (dyn rules) [2 CPU case] 121 * 122 * CPU0 CPU1 123 * layer3_chain layer3_chain 124 * | | 125 * V V 126 * +-------+ sibling +-------+ sibling 127 * | rule1 |--------->| rule1 |--------->NULL 128 * +-------+ +-------+ 129 * | | 130 * |next |next 131 * V V 132 * +-------+ sibling +-------+ sibling 133 * | rule2 |--------->| rule2 |--------->NULL 134 * +-------+ +-------+ 135 * 136 * ip_fw.sibling: 137 * 1) Ease statistics calculation during IP_FW_GET. We only need to 138 * iterate layer3_chain on CPU0; the current rule's duplication on 139 * the other CPUs could safely be read-only accessed by using 140 * ip_fw.sibling 141 * 2) Accelerate rule insertion and deletion, e.g. rule insertion: 142 * a) In netisr0 (on CPU0) rule3 is determined to be inserted between 143 * rule1 and rule2. To make this decision we need to iterate the 144 * layer3_chain on CPU0. The netmsg, which is used to insert the 145 * rule, will contain rule1 on CPU0 as prev_rule and rule2 on CPU0 146 * as next_rule 147 * b) After the insertion on CPU0 is done, we will move on to CPU1. 148 * But instead of relocating the rule3's position on CPU1 by 149 * iterating the layer3_chain on CPU1, we set the netmsg's prev_rule 150 * to rule1->sibling and next_rule to rule2->sibling before the 151 * netmsg is forwarded to CPU1 from CPU0 152 * 153 * 154 * 155 * Rules which will create states (dyn rules) [2 CPU case] 156 * (unnecessary parts are omitted; they are same as in the previous figure) 157 * 158 * CPU0 CPU1 159 * 160 * +-------+ +-------+ 161 * | rule1 | | rule1 | 162 * +-------+ +-------+ 163 * ^ | | ^ 164 * | |stub stub| | 165 * | | | | 166 * | +----+ +----+ | 167 * | | | | 168 * | V V | 169 * | +--------------------+ | 170 * | | rule_stub | | 171 * | | (read-only shared) | | 172 * | | | | 173 * | | back pointer array | | 174 * | | (indexed by cpuid) | | 175 * | | | | 176 * +----|---------[0] | | 177 * | [1]--------|----+ 178 * | | 179 * +--------------------+ 180 * ^ ^ 181 * | | 182 * ........|............|............ 183 * : | | : 184 * : |stub |stub : 185 * : | | : 186 * : +---------+ +---------+ : 187 * : | state1a | | state1b | .... : 188 * : +---------+ +---------+ : 189 * : : 190 * : states table : 191 * : (shared) : 192 * : (protected by dyn_lock) : 193 * .................................. 194 * 195 * [state1a and state1b are states created by rule1] 196 * 197 * ip_fw_stub: 198 * This structure is introduced so that shared (locked) state table could 199 * work with per-CPU (duplicated) static rules. It mainly bridges states 200 * and static rules and serves as static rule's place holder (a read-only 201 * shared part of duplicated rules) from states point of view. 202 * 203 * IPFW_RULE_F_STATE (only for rules which create states): 204 * o During rule installation, this flag is turned on after rule's 205 * duplications reach all CPUs, to avoid at least following race: 206 * 1) rule1 is duplicated on CPU0 and is not duplicated on CPU1 yet 207 * 2) rule1 creates state1 208 * 3) state1 is located on CPU1 by check-state 209 * But rule1 is not duplicated on CPU1 yet 210 * o During rule deletion, this flag is turned off before deleting states 211 * created by the rule and before deleting the rule itself, so no 212 * more states will be created by the to-be-deleted rule even when its 213 * duplication on certain CPUs are not eliminated yet. 214 */ 215 216 #define IPFW_AUTOINC_STEP_MIN 1 217 #define IPFW_AUTOINC_STEP_MAX 1000 218 #define IPFW_AUTOINC_STEP_DEF 100 219 220 #define IPFW_DEFAULT_RULE 65535 /* rulenum for the default rule */ 221 #define IPFW_DEFAULT_SET 31 /* set number for the default rule */ 222 223 struct netmsg_ipfw { 224 struct netmsg nmsg; 225 const struct ipfw_ioc_rule *ioc_rule; 226 struct ip_fw *next_rule; 227 struct ip_fw *prev_rule; 228 struct ip_fw *sibling; 229 struct ip_fw_stub *stub; 230 }; 231 232 struct netmsg_del { 233 struct netmsg nmsg; 234 struct ip_fw *start_rule; 235 struct ip_fw *prev_rule; 236 uint16_t rulenum; 237 uint8_t from_set; 238 uint8_t to_set; 239 }; 240 241 struct netmsg_zent { 242 struct netmsg nmsg; 243 struct ip_fw *start_rule; 244 uint16_t rulenum; 245 uint16_t log_only; 246 }; 247 248 struct ipfw_context { 249 struct ip_fw *ipfw_layer3_chain; /* list of rules for layer3 */ 250 struct ip_fw *ipfw_default_rule; /* default rule */ 251 uint64_t ipfw_norule_counter; /* counter for ipfw_log(NULL) */ 252 253 /* 254 * ipfw_set_disable contains one bit per set value (0..31). 255 * If the bit is set, all rules with the corresponding set 256 * are disabled. Set IPDW_DEFAULT_SET is reserved for the 257 * default rule and CANNOT be disabled. 258 */ 259 uint32_t ipfw_set_disable; 260 uint32_t ipfw_gen; /* generation of rule list */ 261 }; 262 263 static struct ipfw_context *ipfw_ctx[MAXCPU]; 264 265 #ifdef KLD_MODULE 266 /* 267 * Module can not be unloaded, if there are references to 268 * certains rules of ipfw(4), e.g. dummynet(4) 269 */ 270 static int ipfw_refcnt; 271 #endif 272 273 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); 274 275 /* 276 * Following two global variables are accessed and 277 * updated only on CPU0 278 */ 279 static uint32_t static_count; /* # of static rules */ 280 static uint32_t static_ioc_len; /* bytes of static rules */ 281 282 /* 283 * If 1, then ipfw static rules are being flushed, 284 * ipfw_chk() will skip to the default rule. 285 */ 286 static int ipfw_flushing; 287 288 static int fw_verbose; 289 static int verbose_limit; 290 291 static int fw_debug; 292 static int autoinc_step = IPFW_AUTOINC_STEP_DEF; 293 294 static int ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS); 295 static int ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS); 296 static int ipfw_sysctl_dyn_buckets(SYSCTL_HANDLER_ARGS); 297 static int ipfw_sysctl_dyn_fin(SYSCTL_HANDLER_ARGS); 298 static int ipfw_sysctl_dyn_rst(SYSCTL_HANDLER_ARGS); 299 300 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 301 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW, 302 &fw_enable, 0, ipfw_sysctl_enable, "I", "Enable ipfw"); 303 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLTYPE_INT | CTLFLAG_RW, 304 &autoinc_step, 0, ipfw_sysctl_autoinc_step, "I", 305 "Rule number autincrement step"); 306 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, 307 &fw_one_pass, 0, 308 "Only do a single pass through ipfw when using dummynet(4)"); 309 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 310 &fw_debug, 0, "Enable printing of debug ip_fw statements"); 311 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, 312 &fw_verbose, 0, "Log matches to ipfw rules"); 313 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 314 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); 315 316 /* 317 * Description of dynamic rules. 318 * 319 * Dynamic rules are stored in lists accessed through a hash table 320 * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can 321 * be modified through the sysctl variable dyn_buckets which is 322 * updated when the table becomes empty. 323 * 324 * XXX currently there is only one list, ipfw_dyn. 325 * 326 * When a packet is received, its address fields are first masked 327 * with the mask defined for the rule, then hashed, then matched 328 * against the entries in the corresponding list. 329 * Dynamic rules can be used for different purposes: 330 * + stateful rules; 331 * + enforcing limits on the number of sessions; 332 * + in-kernel NAT (not implemented yet) 333 * 334 * The lifetime of dynamic rules is regulated by dyn_*_lifetime, 335 * measured in seconds and depending on the flags. 336 * 337 * The total number of dynamic rules is stored in dyn_count. 338 * The max number of dynamic rules is dyn_max. When we reach 339 * the maximum number of rules we do not create anymore. This is 340 * done to avoid consuming too much memory, but also too much 341 * time when searching on each packet (ideally, we should try instead 342 * to put a limit on the length of the list on each bucket...). 343 * 344 * Each dynamic rule holds a pointer to the parent ipfw rule so 345 * we know what action to perform. Dynamic rules are removed when 346 * the parent rule is deleted. XXX we should make them survive. 347 * 348 * There are some limitations with dynamic rules -- we do not 349 * obey the 'randomized match', and we do not do multiple 350 * passes through the firewall. XXX check the latter!!! 351 * 352 * NOTE about the SHARED LOCKMGR LOCK during dynamic rule looking up: 353 * Only TCP state transition will change dynamic rule's state and ack 354 * sequences, while all packets of one TCP connection only goes through 355 * one TCP thread, so it is safe to use shared lockmgr lock during dynamic 356 * rule looking up. The keep alive callout uses exclusive lockmgr lock 357 * when it tries to find suitable dynamic rules to send keep alive, so 358 * it will not see half updated state and ack sequences. Though the expire 359 * field updating looks racy for other protocols, the resolution (second) 360 * of expire field makes this kind of race harmless. 361 * XXX statistics' updating is _not_ MPsafe!!! 362 * XXX once UDP output path is fixed, we could use lockless dynamic rule 363 * hash table 364 */ 365 static ipfw_dyn_rule **ipfw_dyn_v = NULL; 366 static uint32_t dyn_buckets = 256; /* must be power of 2 */ 367 static uint32_t curr_dyn_buckets = 256; /* must be power of 2 */ 368 static uint32_t dyn_buckets_gen; /* generation of dyn buckets array */ 369 static struct lock dyn_lock; /* dynamic rules' hash table lock */ 370 371 static struct netmsg ipfw_timeout_netmsg; /* schedule ipfw timeout */ 372 static struct callout ipfw_timeout_h; 373 374 /* 375 * Timeouts for various events in handing dynamic rules. 376 */ 377 static uint32_t dyn_ack_lifetime = 300; 378 static uint32_t dyn_syn_lifetime = 20; 379 static uint32_t dyn_fin_lifetime = 1; 380 static uint32_t dyn_rst_lifetime = 1; 381 static uint32_t dyn_udp_lifetime = 10; 382 static uint32_t dyn_short_lifetime = 5; 383 384 /* 385 * Keepalives are sent if dyn_keepalive is set. They are sent every 386 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 387 * seconds of lifetime of a rule. 388 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 389 * than dyn_keepalive_period. 390 */ 391 392 static uint32_t dyn_keepalive_interval = 20; 393 static uint32_t dyn_keepalive_period = 5; 394 static uint32_t dyn_keepalive = 1; /* do send keepalives */ 395 396 static uint32_t dyn_count; /* # of dynamic rules */ 397 static uint32_t dyn_max = 4096; /* max # of dynamic rules */ 398 399 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLTYPE_INT | CTLFLAG_RW, 400 &dyn_buckets, 0, ipfw_sysctl_dyn_buckets, "I", "Number of dyn. buckets"); 401 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, 402 &curr_dyn_buckets, 0, "Current Number of dyn. buckets"); 403 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, 404 &dyn_count, 0, "Number of dyn. rules"); 405 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, 406 &dyn_max, 0, "Max number of dyn. rules"); 407 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, 408 &static_count, 0, "Number of static rules"); 409 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, 410 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); 411 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, 412 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); 413 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, 414 CTLTYPE_INT | CTLFLAG_RW, &dyn_fin_lifetime, 0, ipfw_sysctl_dyn_fin, "I", 415 "Lifetime of dyn. rules for fin"); 416 SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, 417 CTLTYPE_INT | CTLFLAG_RW, &dyn_rst_lifetime, 0, ipfw_sysctl_dyn_rst, "I", 418 "Lifetime of dyn. rules for rst"); 419 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, 420 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); 421 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, 422 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); 423 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, 424 &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); 425 426 static ip_fw_chk_t ipfw_chk; 427 static void ipfw_tick(void *); 428 429 static __inline int 430 ipfw_free_rule(struct ip_fw *rule) 431 { 432 KASSERT(rule->cpuid == mycpuid, ("rule freed on cpu%d\n", mycpuid)); 433 KASSERT(rule->refcnt > 0, ("invalid refcnt %u\n", rule->refcnt)); 434 rule->refcnt--; 435 if (rule->refcnt == 0) { 436 kfree(rule, M_IPFW); 437 return 1; 438 } 439 return 0; 440 } 441 442 static void 443 ipfw_unref_rule(void *priv) 444 { 445 ipfw_free_rule(priv); 446 #ifdef KLD_MODULE 447 atomic_subtract_int(&ipfw_refcnt, 1); 448 #endif 449 } 450 451 static __inline void 452 ipfw_ref_rule(struct ip_fw *rule) 453 { 454 KASSERT(rule->cpuid == mycpuid, ("rule used on cpu%d\n", mycpuid)); 455 #ifdef KLD_MODULE 456 atomic_add_int(&ipfw_refcnt, 1); 457 #endif 458 rule->refcnt++; 459 } 460 461 /* 462 * This macro maps an ip pointer into a layer3 header pointer of type T 463 */ 464 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl)) 465 466 static __inline int 467 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) 468 { 469 int type = L3HDR(struct icmp,ip)->icmp_type; 470 471 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1 << type))); 472 } 473 474 #define TT ((1 << ICMP_ECHO) | \ 475 (1 << ICMP_ROUTERSOLICIT) | \ 476 (1 << ICMP_TSTAMP) | \ 477 (1 << ICMP_IREQ) | \ 478 (1 << ICMP_MASKREQ)) 479 480 static int 481 is_icmp_query(struct ip *ip) 482 { 483 int type = L3HDR(struct icmp, ip)->icmp_type; 484 485 return (type <= ICMP_MAXTYPE && (TT & (1 << type))); 486 } 487 488 #undef TT 489 490 /* 491 * The following checks use two arrays of 8 or 16 bits to store the 492 * bits that we want set or clear, respectively. They are in the 493 * low and high half of cmd->arg1 or cmd->d[0]. 494 * 495 * We scan options and store the bits we find set. We succeed if 496 * 497 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 498 * 499 * The code is sometimes optimized not to store additional variables. 500 */ 501 502 static int 503 flags_match(ipfw_insn *cmd, uint8_t bits) 504 { 505 u_char want_clear; 506 bits = ~bits; 507 508 if (((cmd->arg1 & 0xff) & bits) != 0) 509 return 0; /* some bits we want set were clear */ 510 511 want_clear = (cmd->arg1 >> 8) & 0xff; 512 if ((want_clear & bits) != want_clear) 513 return 0; /* some bits we want clear were set */ 514 return 1; 515 } 516 517 static int 518 ipopts_match(struct ip *ip, ipfw_insn *cmd) 519 { 520 int optlen, bits = 0; 521 u_char *cp = (u_char *)(ip + 1); 522 int x = (ip->ip_hl << 2) - sizeof(struct ip); 523 524 for (; x > 0; x -= optlen, cp += optlen) { 525 int opt = cp[IPOPT_OPTVAL]; 526 527 if (opt == IPOPT_EOL) 528 break; 529 530 if (opt == IPOPT_NOP) { 531 optlen = 1; 532 } else { 533 optlen = cp[IPOPT_OLEN]; 534 if (optlen <= 0 || optlen > x) 535 return 0; /* invalid or truncated */ 536 } 537 538 switch (opt) { 539 case IPOPT_LSRR: 540 bits |= IP_FW_IPOPT_LSRR; 541 break; 542 543 case IPOPT_SSRR: 544 bits |= IP_FW_IPOPT_SSRR; 545 break; 546 547 case IPOPT_RR: 548 bits |= IP_FW_IPOPT_RR; 549 break; 550 551 case IPOPT_TS: 552 bits |= IP_FW_IPOPT_TS; 553 break; 554 555 default: 556 break; 557 } 558 } 559 return (flags_match(cmd, bits)); 560 } 561 562 static int 563 tcpopts_match(struct ip *ip, ipfw_insn *cmd) 564 { 565 int optlen, bits = 0; 566 struct tcphdr *tcp = L3HDR(struct tcphdr,ip); 567 u_char *cp = (u_char *)(tcp + 1); 568 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 569 570 for (; x > 0; x -= optlen, cp += optlen) { 571 int opt = cp[0]; 572 573 if (opt == TCPOPT_EOL) 574 break; 575 576 if (opt == TCPOPT_NOP) { 577 optlen = 1; 578 } else { 579 optlen = cp[1]; 580 if (optlen <= 0) 581 break; 582 } 583 584 switch (opt) { 585 case TCPOPT_MAXSEG: 586 bits |= IP_FW_TCPOPT_MSS; 587 break; 588 589 case TCPOPT_WINDOW: 590 bits |= IP_FW_TCPOPT_WINDOW; 591 break; 592 593 case TCPOPT_SACK_PERMITTED: 594 case TCPOPT_SACK: 595 bits |= IP_FW_TCPOPT_SACK; 596 break; 597 598 case TCPOPT_TIMESTAMP: 599 bits |= IP_FW_TCPOPT_TS; 600 break; 601 602 case TCPOPT_CC: 603 case TCPOPT_CCNEW: 604 case TCPOPT_CCECHO: 605 bits |= IP_FW_TCPOPT_CC; 606 break; 607 608 default: 609 break; 610 } 611 } 612 return (flags_match(cmd, bits)); 613 } 614 615 static int 616 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) 617 { 618 if (ifp == NULL) /* no iface with this packet, match fails */ 619 return 0; 620 621 /* Check by name or by IP address */ 622 if (cmd->name[0] != '\0') { /* match by name */ 623 /* Check name */ 624 if (cmd->p.glob) { 625 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0) 626 return(1); 627 } else { 628 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 629 return(1); 630 } 631 } else { 632 struct ifaddr_container *ifac; 633 634 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 635 struct ifaddr *ia = ifac->ifa; 636 637 if (ia->ifa_addr == NULL) 638 continue; 639 if (ia->ifa_addr->sa_family != AF_INET) 640 continue; 641 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 642 (ia->ifa_addr))->sin_addr.s_addr) 643 return(1); /* match */ 644 } 645 } 646 return(0); /* no match, fail ... */ 647 } 648 649 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 650 651 /* 652 * We enter here when we have a rule with O_LOG. 653 * XXX this function alone takes about 2Kbytes of code! 654 */ 655 static void 656 ipfw_log(struct ip_fw *f, u_int hlen, struct ether_header *eh, 657 struct mbuf *m, struct ifnet *oif) 658 { 659 char *action; 660 int limit_reached = 0; 661 char action2[40], proto[48], fragment[28]; 662 663 fragment[0] = '\0'; 664 proto[0] = '\0'; 665 666 if (f == NULL) { /* bogus pkt */ 667 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 668 669 if (verbose_limit != 0 && 670 ctx->ipfw_norule_counter >= verbose_limit) 671 return; 672 ctx->ipfw_norule_counter++; 673 if (ctx->ipfw_norule_counter == verbose_limit) 674 limit_reached = verbose_limit; 675 action = "Refuse"; 676 } else { /* O_LOG is the first action, find the real one */ 677 ipfw_insn *cmd = ACTION_PTR(f); 678 ipfw_insn_log *l = (ipfw_insn_log *)cmd; 679 680 if (l->max_log != 0 && l->log_left == 0) 681 return; 682 l->log_left--; 683 if (l->log_left == 0) 684 limit_reached = l->max_log; 685 cmd += F_LEN(cmd); /* point to first action */ 686 if (cmd->opcode == O_PROB) 687 cmd += F_LEN(cmd); 688 689 action = action2; 690 switch (cmd->opcode) { 691 case O_DENY: 692 action = "Deny"; 693 break; 694 695 case O_REJECT: 696 if (cmd->arg1==ICMP_REJECT_RST) { 697 action = "Reset"; 698 } else if (cmd->arg1==ICMP_UNREACH_HOST) { 699 action = "Reject"; 700 } else { 701 ksnprintf(SNPARGS(action2, 0), "Unreach %d", 702 cmd->arg1); 703 } 704 break; 705 706 case O_ACCEPT: 707 action = "Accept"; 708 break; 709 710 case O_COUNT: 711 action = "Count"; 712 break; 713 714 case O_DIVERT: 715 ksnprintf(SNPARGS(action2, 0), "Divert %d", cmd->arg1); 716 break; 717 718 case O_TEE: 719 ksnprintf(SNPARGS(action2, 0), "Tee %d", cmd->arg1); 720 break; 721 722 case O_SKIPTO: 723 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", cmd->arg1); 724 break; 725 726 case O_PIPE: 727 ksnprintf(SNPARGS(action2, 0), "Pipe %d", cmd->arg1); 728 break; 729 730 case O_QUEUE: 731 ksnprintf(SNPARGS(action2, 0), "Queue %d", cmd->arg1); 732 break; 733 734 case O_FORWARD_IP: 735 { 736 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; 737 int len; 738 739 len = ksnprintf(SNPARGS(action2, 0), 740 "Forward to %s", 741 inet_ntoa(sa->sa.sin_addr)); 742 if (sa->sa.sin_port) { 743 ksnprintf(SNPARGS(action2, len), ":%d", 744 sa->sa.sin_port); 745 } 746 } 747 break; 748 749 default: 750 action = "UNKNOWN"; 751 break; 752 } 753 } 754 755 if (hlen == 0) { /* non-ip */ 756 ksnprintf(SNPARGS(proto, 0), "MAC"); 757 } else { 758 struct ip *ip = mtod(m, struct ip *); 759 /* these three are all aliases to the same thing */ 760 struct icmp *const icmp = L3HDR(struct icmp, ip); 761 struct tcphdr *const tcp = (struct tcphdr *)icmp; 762 struct udphdr *const udp = (struct udphdr *)icmp; 763 764 int ip_off, offset, ip_len; 765 int len; 766 767 if (eh != NULL) { /* layer 2 packets are as on the wire */ 768 ip_off = ntohs(ip->ip_off); 769 ip_len = ntohs(ip->ip_len); 770 } else { 771 ip_off = ip->ip_off; 772 ip_len = ip->ip_len; 773 } 774 offset = ip_off & IP_OFFMASK; 775 switch (ip->ip_p) { 776 case IPPROTO_TCP: 777 len = ksnprintf(SNPARGS(proto, 0), "TCP %s", 778 inet_ntoa(ip->ip_src)); 779 if (offset == 0) { 780 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 781 ntohs(tcp->th_sport), 782 inet_ntoa(ip->ip_dst), 783 ntohs(tcp->th_dport)); 784 } else { 785 ksnprintf(SNPARGS(proto, len), " %s", 786 inet_ntoa(ip->ip_dst)); 787 } 788 break; 789 790 case IPPROTO_UDP: 791 len = ksnprintf(SNPARGS(proto, 0), "UDP %s", 792 inet_ntoa(ip->ip_src)); 793 if (offset == 0) { 794 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 795 ntohs(udp->uh_sport), 796 inet_ntoa(ip->ip_dst), 797 ntohs(udp->uh_dport)); 798 } else { 799 ksnprintf(SNPARGS(proto, len), " %s", 800 inet_ntoa(ip->ip_dst)); 801 } 802 break; 803 804 case IPPROTO_ICMP: 805 if (offset == 0) { 806 len = ksnprintf(SNPARGS(proto, 0), 807 "ICMP:%u.%u ", 808 icmp->icmp_type, 809 icmp->icmp_code); 810 } else { 811 len = ksnprintf(SNPARGS(proto, 0), "ICMP "); 812 } 813 len += ksnprintf(SNPARGS(proto, len), "%s", 814 inet_ntoa(ip->ip_src)); 815 ksnprintf(SNPARGS(proto, len), " %s", 816 inet_ntoa(ip->ip_dst)); 817 break; 818 819 default: 820 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, 821 inet_ntoa(ip->ip_src)); 822 ksnprintf(SNPARGS(proto, len), " %s", 823 inet_ntoa(ip->ip_dst)); 824 break; 825 } 826 827 if (ip_off & (IP_MF | IP_OFFMASK)) { 828 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", 829 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), 830 offset << 3, (ip_off & IP_MF) ? "+" : ""); 831 } 832 } 833 834 if (oif || m->m_pkthdr.rcvif) { 835 log(LOG_SECURITY | LOG_INFO, 836 "ipfw: %d %s %s %s via %s%s\n", 837 f ? f->rulenum : -1, 838 action, proto, oif ? "out" : "in", 839 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, 840 fragment); 841 } else { 842 log(LOG_SECURITY | LOG_INFO, 843 "ipfw: %d %s %s [no if info]%s\n", 844 f ? f->rulenum : -1, 845 action, proto, fragment); 846 } 847 848 if (limit_reached) { 849 log(LOG_SECURITY | LOG_NOTICE, 850 "ipfw: limit %d reached on entry %d\n", 851 limit_reached, f ? f->rulenum : -1); 852 } 853 } 854 855 #undef SNPARGS 856 857 /* 858 * IMPORTANT: the hash function for dynamic rules must be commutative 859 * in source and destination (ip,port), because rules are bidirectional 860 * and we want to find both in the same bucket. 861 */ 862 static __inline int 863 hash_packet(struct ipfw_flow_id *id) 864 { 865 uint32_t i; 866 867 i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); 868 i &= (curr_dyn_buckets - 1); 869 return i; 870 } 871 872 /** 873 * unlink a dynamic rule from a chain. prev is a pointer to 874 * the previous one, q is a pointer to the rule to delete, 875 * head is a pointer to the head of the queue. 876 * Modifies q and potentially also head. 877 */ 878 #define UNLINK_DYN_RULE(prev, head, q) \ 879 do { \ 880 ipfw_dyn_rule *old_q = q; \ 881 \ 882 /* remove a refcount to the parent */ \ 883 if (q->dyn_type == O_LIMIT) \ 884 q->parent->count--; \ 885 DPRINTF("-- unlink entry 0x%08x %d -> 0x%08x %d, %d left\n", \ 886 q->id.src_ip, q->id.src_port, \ 887 q->id.dst_ip, q->id.dst_port, dyn_count - 1); \ 888 if (prev != NULL) \ 889 prev->next = q = q->next; \ 890 else \ 891 head = q = q->next; \ 892 KASSERT(dyn_count > 0, ("invalid dyn count %u\n", dyn_count)); \ 893 dyn_count--; \ 894 kfree(old_q, M_IPFW); \ 895 } while (0) 896 897 #define TIME_LEQ(a, b) ((int)((a) - (b)) <= 0) 898 899 /** 900 * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. 901 * 902 * If keep_me == NULL, rules are deleted even if not expired, 903 * otherwise only expired rules are removed. 904 * 905 * The value of the second parameter is also used to point to identify 906 * a rule we absolutely do not want to remove (e.g. because we are 907 * holding a reference to it -- this is the case with O_LIMIT_PARENT 908 * rules). The pointer is only used for comparison, so any non-null 909 * value will do. 910 */ 911 static void 912 remove_dyn_rule_locked(struct ip_fw *rule, ipfw_dyn_rule *keep_me) 913 { 914 static uint32_t last_remove = 0; /* XXX */ 915 916 #define FORCE (keep_me == NULL) 917 918 ipfw_dyn_rule *prev, *q; 919 int i, pass = 0, max_pass = 0, unlinked = 0; 920 921 if (ipfw_dyn_v == NULL || dyn_count == 0) 922 return; 923 /* do not expire more than once per second, it is useless */ 924 if (!FORCE && last_remove == time_second) 925 return; 926 last_remove = time_second; 927 928 /* 929 * because O_LIMIT refer to parent rules, during the first pass only 930 * remove child and mark any pending LIMIT_PARENT, and remove 931 * them in a second pass. 932 */ 933 next_pass: 934 for (i = 0; i < curr_dyn_buckets; i++) { 935 for (prev = NULL, q = ipfw_dyn_v[i]; q;) { 936 /* 937 * Logic can become complex here, so we split tests. 938 */ 939 if (q == keep_me) 940 goto next; 941 if (rule != NULL && rule->stub != q->stub) 942 goto next; /* not the one we are looking for */ 943 if (q->dyn_type == O_LIMIT_PARENT) { 944 /* 945 * handle parent in the second pass, 946 * record we need one. 947 */ 948 max_pass = 1; 949 if (pass == 0) 950 goto next; 951 if (FORCE && q->count != 0) { 952 /* XXX should not happen! */ 953 kprintf("OUCH! cannot remove rule, " 954 "count %d\n", q->count); 955 } 956 } else { 957 if (!FORCE && !TIME_LEQ(q->expire, time_second)) 958 goto next; 959 } 960 unlinked = 1; 961 UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); 962 continue; 963 next: 964 prev = q; 965 q = q->next; 966 } 967 } 968 if (pass++ < max_pass) 969 goto next_pass; 970 971 if (unlinked) 972 ++dyn_buckets_gen; 973 974 #undef FORCE 975 } 976 977 /** 978 * lookup a dynamic rule. 979 */ 980 static ipfw_dyn_rule * 981 lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, 982 struct tcphdr *tcp) 983 { 984 /* 985 * stateful ipfw extensions. 986 * Lookup into dynamic session queue 987 */ 988 #define MATCH_REVERSE 0 989 #define MATCH_FORWARD 1 990 #define MATCH_NONE 2 991 #define MATCH_UNKNOWN 3 992 int i, dir = MATCH_NONE; 993 ipfw_dyn_rule *prev, *q=NULL; 994 995 if (ipfw_dyn_v == NULL) 996 goto done; /* not found */ 997 998 i = hash_packet(pkt); 999 for (prev = NULL, q = ipfw_dyn_v[i]; q != NULL;) { 1000 if (q->dyn_type == O_LIMIT_PARENT) 1001 goto next; 1002 1003 if (TIME_LEQ(q->expire, time_second)) { 1004 /* 1005 * Entry expired; skip. 1006 * Let ipfw_tick() take care of it 1007 */ 1008 goto next; 1009 } 1010 1011 if (pkt->proto == q->id.proto) { 1012 if (pkt->src_ip == q->id.src_ip && 1013 pkt->dst_ip == q->id.dst_ip && 1014 pkt->src_port == q->id.src_port && 1015 pkt->dst_port == q->id.dst_port) { 1016 dir = MATCH_FORWARD; 1017 break; 1018 } 1019 if (pkt->src_ip == q->id.dst_ip && 1020 pkt->dst_ip == q->id.src_ip && 1021 pkt->src_port == q->id.dst_port && 1022 pkt->dst_port == q->id.src_port) { 1023 dir = MATCH_REVERSE; 1024 break; 1025 } 1026 } 1027 next: 1028 prev = q; 1029 q = q->next; 1030 } 1031 if (q == NULL) 1032 goto done; /* q = NULL, not found */ 1033 1034 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 1035 u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); 1036 1037 #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 1038 #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 1039 1040 q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); 1041 switch (q->state) { 1042 case TH_SYN: /* opening */ 1043 q->expire = time_second + dyn_syn_lifetime; 1044 break; 1045 1046 case BOTH_SYN: /* move to established */ 1047 case BOTH_SYN | TH_FIN : /* one side tries to close */ 1048 case BOTH_SYN | (TH_FIN << 8) : 1049 if (tcp) { 1050 uint32_t ack = ntohl(tcp->th_ack); 1051 1052 #define _SEQ_GE(a, b) ((int)(a) - (int)(b) >= 0) 1053 1054 if (dir == MATCH_FORWARD) { 1055 if (q->ack_fwd == 0 || 1056 _SEQ_GE(ack, q->ack_fwd)) 1057 q->ack_fwd = ack; 1058 else /* ignore out-of-sequence */ 1059 break; 1060 } else { 1061 if (q->ack_rev == 0 || 1062 _SEQ_GE(ack, q->ack_rev)) 1063 q->ack_rev = ack; 1064 else /* ignore out-of-sequence */ 1065 break; 1066 } 1067 #undef _SEQ_GE 1068 } 1069 q->expire = time_second + dyn_ack_lifetime; 1070 break; 1071 1072 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 1073 KKASSERT(dyn_fin_lifetime < dyn_keepalive_period); 1074 q->expire = time_second + dyn_fin_lifetime; 1075 break; 1076 1077 default: 1078 #if 0 1079 /* 1080 * reset or some invalid combination, but can also 1081 * occur if we use keep-state the wrong way. 1082 */ 1083 if ((q->state & ((TH_RST << 8) | TH_RST)) == 0) 1084 kprintf("invalid state: 0x%x\n", q->state); 1085 #endif 1086 KKASSERT(dyn_rst_lifetime < dyn_keepalive_period); 1087 q->expire = time_second + dyn_rst_lifetime; 1088 break; 1089 } 1090 } else if (pkt->proto == IPPROTO_UDP) { 1091 q->expire = time_second + dyn_udp_lifetime; 1092 } else { 1093 /* other protocols */ 1094 q->expire = time_second + dyn_short_lifetime; 1095 } 1096 done: 1097 if (match_direction) 1098 *match_direction = dir; 1099 return q; 1100 } 1101 1102 static struct ip_fw * 1103 lookup_rule(struct ipfw_flow_id *pkt, int *match_direction, struct tcphdr *tcp, 1104 uint16_t len, int *deny) 1105 { 1106 struct ip_fw *rule = NULL; 1107 ipfw_dyn_rule *q; 1108 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1109 uint32_t gen; 1110 1111 *deny = 0; 1112 gen = ctx->ipfw_gen; 1113 1114 lockmgr(&dyn_lock, LK_SHARED); 1115 1116 if (ctx->ipfw_gen != gen) { 1117 /* 1118 * Static rules had been change when we were waiting 1119 * for the dynamic hash table lock; deny this packet, 1120 * since it is _not_ known whether it is safe to keep 1121 * iterating the static rules. 1122 */ 1123 *deny = 1; 1124 goto back; 1125 } 1126 1127 q = lookup_dyn_rule(pkt, match_direction, tcp); 1128 if (q == NULL) { 1129 rule = NULL; 1130 } else { 1131 rule = q->stub->rule[mycpuid]; 1132 KKASSERT(rule->stub == q->stub && rule->cpuid == mycpuid); 1133 1134 /* XXX */ 1135 q->pcnt++; 1136 q->bcnt += len; 1137 } 1138 back: 1139 lockmgr(&dyn_lock, LK_RELEASE); 1140 return rule; 1141 } 1142 1143 static void 1144 realloc_dynamic_table(void) 1145 { 1146 ipfw_dyn_rule **old_dyn_v; 1147 uint32_t old_curr_dyn_buckets; 1148 1149 KASSERT(dyn_buckets <= 65536 && (dyn_buckets & (dyn_buckets - 1)) == 0, 1150 ("invalid dyn_buckets %d\n", dyn_buckets)); 1151 1152 /* Save the current buckets array for later error recovery */ 1153 old_dyn_v = ipfw_dyn_v; 1154 old_curr_dyn_buckets = curr_dyn_buckets; 1155 1156 curr_dyn_buckets = dyn_buckets; 1157 for (;;) { 1158 ipfw_dyn_v = kmalloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *), 1159 M_IPFW, M_NOWAIT | M_ZERO); 1160 if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2) 1161 break; 1162 1163 curr_dyn_buckets /= 2; 1164 if (curr_dyn_buckets <= old_curr_dyn_buckets && 1165 old_dyn_v != NULL) { 1166 /* 1167 * Don't try allocating smaller buckets array, reuse 1168 * the old one, which alreay contains enough buckets 1169 */ 1170 break; 1171 } 1172 } 1173 1174 if (ipfw_dyn_v != NULL) { 1175 if (old_dyn_v != NULL) 1176 kfree(old_dyn_v, M_IPFW); 1177 } else { 1178 /* Allocation failed, restore old buckets array */ 1179 ipfw_dyn_v = old_dyn_v; 1180 curr_dyn_buckets = old_curr_dyn_buckets; 1181 } 1182 1183 if (ipfw_dyn_v != NULL) 1184 ++dyn_buckets_gen; 1185 } 1186 1187 /** 1188 * Install state of type 'type' for a dynamic session. 1189 * The hash table contains two type of rules: 1190 * - regular rules (O_KEEP_STATE) 1191 * - rules for sessions with limited number of sess per user 1192 * (O_LIMIT). When they are created, the parent is 1193 * increased by 1, and decreased on delete. In this case, 1194 * the third parameter is the parent rule and not the chain. 1195 * - "parent" rules for the above (O_LIMIT_PARENT). 1196 */ 1197 static ipfw_dyn_rule * 1198 add_dyn_rule(struct ipfw_flow_id *id, uint8_t dyn_type, struct ip_fw *rule) 1199 { 1200 ipfw_dyn_rule *r; 1201 int i; 1202 1203 if (ipfw_dyn_v == NULL || 1204 (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) { 1205 realloc_dynamic_table(); 1206 if (ipfw_dyn_v == NULL) 1207 return NULL; /* failed ! */ 1208 } 1209 i = hash_packet(id); 1210 1211 r = kmalloc(sizeof(*r), M_IPFW, M_NOWAIT | M_ZERO); 1212 if (r == NULL) { 1213 kprintf ("sorry cannot allocate state\n"); 1214 return NULL; 1215 } 1216 1217 /* increase refcount on parent, and set pointer */ 1218 if (dyn_type == O_LIMIT) { 1219 ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; 1220 1221 if (parent->dyn_type != O_LIMIT_PARENT) 1222 panic("invalid parent"); 1223 parent->count++; 1224 r->parent = parent; 1225 rule = parent->stub->rule[mycpuid]; 1226 KKASSERT(rule->stub == parent->stub); 1227 } 1228 KKASSERT(rule->cpuid == mycpuid && rule->stub != NULL); 1229 1230 r->id = *id; 1231 r->expire = time_second + dyn_syn_lifetime; 1232 r->stub = rule->stub; 1233 r->dyn_type = dyn_type; 1234 r->pcnt = r->bcnt = 0; 1235 r->count = 0; 1236 1237 r->bucket = i; 1238 r->next = ipfw_dyn_v[i]; 1239 ipfw_dyn_v[i] = r; 1240 dyn_count++; 1241 dyn_buckets_gen++; 1242 DPRINTF("-- add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n", 1243 dyn_type, 1244 r->id.src_ip, r->id.src_port, 1245 r->id.dst_ip, r->id.dst_port, dyn_count); 1246 return r; 1247 } 1248 1249 /** 1250 * lookup dynamic parent rule using pkt and rule as search keys. 1251 * If the lookup fails, then install one. 1252 */ 1253 static ipfw_dyn_rule * 1254 lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) 1255 { 1256 ipfw_dyn_rule *q; 1257 int i; 1258 1259 if (ipfw_dyn_v) { 1260 i = hash_packet(pkt); 1261 for (q = ipfw_dyn_v[i]; q != NULL; q = q->next) { 1262 if (q->dyn_type == O_LIMIT_PARENT && 1263 rule->stub == q->stub && 1264 pkt->proto == q->id.proto && 1265 pkt->src_ip == q->id.src_ip && 1266 pkt->dst_ip == q->id.dst_ip && 1267 pkt->src_port == q->id.src_port && 1268 pkt->dst_port == q->id.dst_port) { 1269 q->expire = time_second + dyn_short_lifetime; 1270 DPRINTF("lookup_dyn_parent found 0x%p\n", q); 1271 return q; 1272 } 1273 } 1274 } 1275 return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); 1276 } 1277 1278 /** 1279 * Install dynamic state for rule type cmd->o.opcode 1280 * 1281 * Returns 1 (failure) if state is not installed because of errors or because 1282 * session limitations are enforced. 1283 */ 1284 static int 1285 install_state_locked(struct ip_fw *rule, ipfw_insn_limit *cmd, 1286 struct ip_fw_args *args) 1287 { 1288 static int last_log; /* XXX */ 1289 1290 ipfw_dyn_rule *q; 1291 1292 DPRINTF("-- install state type %d 0x%08x %u -> 0x%08x %u\n", 1293 cmd->o.opcode, 1294 args->f_id.src_ip, args->f_id.src_port, 1295 args->f_id.dst_ip, args->f_id.dst_port); 1296 1297 q = lookup_dyn_rule(&args->f_id, NULL, NULL); 1298 if (q != NULL) { /* should never occur */ 1299 if (last_log != time_second) { 1300 last_log = time_second; 1301 kprintf(" install_state: entry already present, done\n"); 1302 } 1303 return 0; 1304 } 1305 1306 if (dyn_count >= dyn_max) { 1307 /* 1308 * Run out of slots, try to remove any expired rule. 1309 */ 1310 remove_dyn_rule_locked(NULL, (ipfw_dyn_rule *)1); 1311 if (dyn_count >= dyn_max) { 1312 if (last_log != time_second) { 1313 last_log = time_second; 1314 kprintf("install_state: " 1315 "Too many dynamic rules\n"); 1316 } 1317 return 1; /* cannot install, notify caller */ 1318 } 1319 } 1320 1321 switch (cmd->o.opcode) { 1322 case O_KEEP_STATE: /* bidir rule */ 1323 if (add_dyn_rule(&args->f_id, O_KEEP_STATE, rule) == NULL) 1324 return 1; 1325 break; 1326 1327 case O_LIMIT: /* limit number of sessions */ 1328 { 1329 uint16_t limit_mask = cmd->limit_mask; 1330 struct ipfw_flow_id id; 1331 ipfw_dyn_rule *parent; 1332 1333 DPRINTF("installing dyn-limit rule %d\n", 1334 cmd->conn_limit); 1335 1336 id.dst_ip = id.src_ip = 0; 1337 id.dst_port = id.src_port = 0; 1338 id.proto = args->f_id.proto; 1339 1340 if (limit_mask & DYN_SRC_ADDR) 1341 id.src_ip = args->f_id.src_ip; 1342 if (limit_mask & DYN_DST_ADDR) 1343 id.dst_ip = args->f_id.dst_ip; 1344 if (limit_mask & DYN_SRC_PORT) 1345 id.src_port = args->f_id.src_port; 1346 if (limit_mask & DYN_DST_PORT) 1347 id.dst_port = args->f_id.dst_port; 1348 1349 parent = lookup_dyn_parent(&id, rule); 1350 if (parent == NULL) { 1351 kprintf("add parent failed\n"); 1352 return 1; 1353 } 1354 1355 if (parent->count >= cmd->conn_limit) { 1356 /* 1357 * See if we can remove some expired rule. 1358 */ 1359 remove_dyn_rule_locked(rule, parent); 1360 if (parent->count >= cmd->conn_limit) { 1361 if (fw_verbose && 1362 last_log != time_second) { 1363 last_log = time_second; 1364 log(LOG_SECURITY | LOG_DEBUG, 1365 "drop session, " 1366 "too many entries\n"); 1367 } 1368 return 1; 1369 } 1370 } 1371 if (add_dyn_rule(&args->f_id, O_LIMIT, 1372 (struct ip_fw *)parent) == NULL) 1373 return 1; 1374 } 1375 break; 1376 default: 1377 kprintf("unknown dynamic rule type %u\n", cmd->o.opcode); 1378 return 1; 1379 } 1380 lookup_dyn_rule(&args->f_id, NULL, NULL); /* XXX just set lifetime */ 1381 return 0; 1382 } 1383 1384 static int 1385 install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, 1386 struct ip_fw_args *args, int *deny) 1387 { 1388 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1389 uint32_t gen; 1390 int ret = 0; 1391 1392 *deny = 0; 1393 gen = ctx->ipfw_gen; 1394 1395 lockmgr(&dyn_lock, LK_EXCLUSIVE); 1396 if (ctx->ipfw_gen != gen) { 1397 /* See the comment in lookup_rule() */ 1398 *deny = 1; 1399 } else { 1400 ret = install_state_locked(rule, cmd, args); 1401 } 1402 lockmgr(&dyn_lock, LK_RELEASE); 1403 1404 return ret; 1405 } 1406 1407 /* 1408 * Transmit a TCP packet, containing either a RST or a keepalive. 1409 * When flags & TH_RST, we are sending a RST packet, because of a 1410 * "reset" action matched the packet. 1411 * Otherwise we are sending a keepalive, and flags & TH_ 1412 */ 1413 static void 1414 send_pkt(struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags) 1415 { 1416 struct mbuf *m; 1417 struct ip *ip; 1418 struct tcphdr *tcp; 1419 struct route sro; /* fake route */ 1420 1421 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 1422 if (m == NULL) 1423 return; 1424 m->m_pkthdr.rcvif = NULL; 1425 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); 1426 m->m_data += max_linkhdr; 1427 1428 ip = mtod(m, struct ip *); 1429 bzero(ip, m->m_len); 1430 tcp = (struct tcphdr *)(ip + 1); /* no IP options */ 1431 ip->ip_p = IPPROTO_TCP; 1432 tcp->th_off = 5; 1433 1434 /* 1435 * Assume we are sending a RST (or a keepalive in the reverse 1436 * direction), swap src and destination addresses and ports. 1437 */ 1438 ip->ip_src.s_addr = htonl(id->dst_ip); 1439 ip->ip_dst.s_addr = htonl(id->src_ip); 1440 tcp->th_sport = htons(id->dst_port); 1441 tcp->th_dport = htons(id->src_port); 1442 if (flags & TH_RST) { /* we are sending a RST */ 1443 if (flags & TH_ACK) { 1444 tcp->th_seq = htonl(ack); 1445 tcp->th_ack = htonl(0); 1446 tcp->th_flags = TH_RST; 1447 } else { 1448 if (flags & TH_SYN) 1449 seq++; 1450 tcp->th_seq = htonl(0); 1451 tcp->th_ack = htonl(seq); 1452 tcp->th_flags = TH_RST | TH_ACK; 1453 } 1454 } else { 1455 /* 1456 * We are sending a keepalive. flags & TH_SYN determines 1457 * the direction, forward if set, reverse if clear. 1458 * NOTE: seq and ack are always assumed to be correct 1459 * as set by the caller. This may be confusing... 1460 */ 1461 if (flags & TH_SYN) { 1462 /* 1463 * we have to rewrite the correct addresses! 1464 */ 1465 ip->ip_dst.s_addr = htonl(id->dst_ip); 1466 ip->ip_src.s_addr = htonl(id->src_ip); 1467 tcp->th_dport = htons(id->dst_port); 1468 tcp->th_sport = htons(id->src_port); 1469 } 1470 tcp->th_seq = htonl(seq); 1471 tcp->th_ack = htonl(ack); 1472 tcp->th_flags = TH_ACK; 1473 } 1474 1475 /* 1476 * set ip_len to the payload size so we can compute 1477 * the tcp checksum on the pseudoheader 1478 * XXX check this, could save a couple of words ? 1479 */ 1480 ip->ip_len = htons(sizeof(struct tcphdr)); 1481 tcp->th_sum = in_cksum(m, m->m_pkthdr.len); 1482 1483 /* 1484 * now fill fields left out earlier 1485 */ 1486 ip->ip_ttl = ip_defttl; 1487 ip->ip_len = m->m_pkthdr.len; 1488 1489 bzero(&sro, sizeof(sro)); 1490 ip_rtaddr(ip->ip_dst, &sro); 1491 1492 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED; 1493 ip_output(m, NULL, &sro, 0, NULL, NULL); 1494 if (sro.ro_rt) 1495 RTFREE(sro.ro_rt); 1496 } 1497 1498 /* 1499 * sends a reject message, consuming the mbuf passed as an argument. 1500 */ 1501 static void 1502 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) 1503 { 1504 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 1505 /* We need the IP header in host order for icmp_error(). */ 1506 if (args->eh != NULL) { 1507 struct ip *ip = mtod(args->m, struct ip *); 1508 1509 ip->ip_len = ntohs(ip->ip_len); 1510 ip->ip_off = ntohs(ip->ip_off); 1511 } 1512 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 1513 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { 1514 struct tcphdr *const tcp = 1515 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 1516 1517 if ((tcp->th_flags & TH_RST) == 0) { 1518 send_pkt(&args->f_id, ntohl(tcp->th_seq), 1519 ntohl(tcp->th_ack), tcp->th_flags | TH_RST); 1520 } 1521 m_freem(args->m); 1522 } else { 1523 m_freem(args->m); 1524 } 1525 args->m = NULL; 1526 } 1527 1528 /** 1529 * 1530 * Given an ip_fw *, lookup_next_rule will return a pointer 1531 * to the next rule, which can be either the jump 1532 * target (for skipto instructions) or the next one in the list (in 1533 * all other cases including a missing jump target). 1534 * The result is also written in the "next_rule" field of the rule. 1535 * Backward jumps are not allowed, so start looking from the next 1536 * rule... 1537 * 1538 * This never returns NULL -- in case we do not have an exact match, 1539 * the next rule is returned. When the ruleset is changed, 1540 * pointers are flushed so we are always correct. 1541 */ 1542 1543 static struct ip_fw * 1544 lookup_next_rule(struct ip_fw *me) 1545 { 1546 struct ip_fw *rule = NULL; 1547 ipfw_insn *cmd; 1548 1549 /* look for action, in case it is a skipto */ 1550 cmd = ACTION_PTR(me); 1551 if (cmd->opcode == O_LOG) 1552 cmd += F_LEN(cmd); 1553 if (cmd->opcode == O_SKIPTO) { 1554 for (rule = me->next; rule; rule = rule->next) { 1555 if (rule->rulenum >= cmd->arg1) 1556 break; 1557 } 1558 } 1559 if (rule == NULL) /* failure or not a skipto */ 1560 rule = me->next; 1561 me->next_rule = rule; 1562 return rule; 1563 } 1564 1565 static int 1566 _ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, 1567 enum ipfw_opcodes opcode, uid_t uid) 1568 { 1569 struct in_addr src_ip, dst_ip; 1570 struct inpcbinfo *pi; 1571 int wildcard; 1572 struct inpcb *pcb; 1573 1574 if (fid->proto == IPPROTO_TCP) { 1575 wildcard = 0; 1576 pi = &tcbinfo[mycpuid]; 1577 } else if (fid->proto == IPPROTO_UDP) { 1578 wildcard = 1; 1579 pi = &udbinfo; 1580 } else { 1581 return 0; 1582 } 1583 1584 /* 1585 * Values in 'fid' are in host byte order 1586 */ 1587 dst_ip.s_addr = htonl(fid->dst_ip); 1588 src_ip.s_addr = htonl(fid->src_ip); 1589 if (oif) { 1590 pcb = in_pcblookup_hash(pi, 1591 dst_ip, htons(fid->dst_port), 1592 src_ip, htons(fid->src_port), 1593 wildcard, oif); 1594 } else { 1595 pcb = in_pcblookup_hash(pi, 1596 src_ip, htons(fid->src_port), 1597 dst_ip, htons(fid->dst_port), 1598 wildcard, NULL); 1599 } 1600 if (pcb == NULL || pcb->inp_socket == NULL) 1601 return 0; 1602 1603 if (opcode == O_UID) { 1604 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b)) 1605 return !socheckuid(pcb->inp_socket, uid); 1606 #undef socheckuid 1607 } else { 1608 return groupmember(uid, pcb->inp_socket->so_cred); 1609 } 1610 } 1611 1612 static int 1613 ipfw_match_uid(const struct ipfw_flow_id *fid, struct ifnet *oif, 1614 enum ipfw_opcodes opcode, uid_t uid, int *deny) 1615 { 1616 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1617 uint32_t gen; 1618 int match = 0; 1619 1620 *deny = 0; 1621 gen = ctx->ipfw_gen; 1622 1623 get_mplock(); 1624 if (gen != ctx->ipfw_gen) { 1625 /* See the comment in lookup_rule() */ 1626 *deny = 1; 1627 } else { 1628 match = _ipfw_match_uid(fid, oif, opcode, uid); 1629 } 1630 rel_mplock(); 1631 return match; 1632 } 1633 1634 /* 1635 * The main check routine for the firewall. 1636 * 1637 * All arguments are in args so we can modify them and return them 1638 * back to the caller. 1639 * 1640 * Parameters: 1641 * 1642 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 1643 * Starts with the IP header. 1644 * args->eh (in) Mac header if present, or NULL for layer3 packet. 1645 * args->oif Outgoing interface, or NULL if packet is incoming. 1646 * The incoming interface is in the mbuf. (in) 1647 * 1648 * args->rule Pointer to the last matching rule (in/out) 1649 * args->f_id Addresses grabbed from the packet (out) 1650 * 1651 * Return value: 1652 * 1653 * If the packet was denied/rejected and has been dropped, *m is equal 1654 * to NULL upon return. 1655 * 1656 * IP_FW_DENY the packet must be dropped. 1657 * IP_FW_PASS The packet is to be accepted and routed normally. 1658 * IP_FW_DIVERT Divert the packet to port (args->cookie) 1659 * IP_FW_TEE Tee the packet to port (args->cookie) 1660 * IP_FW_DUMMYNET Send the packet to pipe/queue (args->cookie) 1661 */ 1662 1663 static int 1664 ipfw_chk(struct ip_fw_args *args) 1665 { 1666 /* 1667 * Local variables hold state during the processing of a packet. 1668 * 1669 * IMPORTANT NOTE: to speed up the processing of rules, there 1670 * are some assumption on the values of the variables, which 1671 * are documented here. Should you change them, please check 1672 * the implementation of the various instructions to make sure 1673 * that they still work. 1674 * 1675 * args->eh The MAC header. It is non-null for a layer2 1676 * packet, it is NULL for a layer-3 packet. 1677 * 1678 * m | args->m Pointer to the mbuf, as received from the caller. 1679 * It may change if ipfw_chk() does an m_pullup, or if it 1680 * consumes the packet because it calls send_reject(). 1681 * XXX This has to change, so that ipfw_chk() never modifies 1682 * or consumes the buffer. 1683 * ip is simply an alias of the value of m, and it is kept 1684 * in sync with it (the packet is supposed to start with 1685 * the ip header). 1686 */ 1687 struct mbuf *m = args->m; 1688 struct ip *ip = mtod(m, struct ip *); 1689 1690 /* 1691 * oif | args->oif If NULL, ipfw_chk has been called on the 1692 * inbound path (ether_input, ip_input). 1693 * If non-NULL, ipfw_chk has been called on the outbound path 1694 * (ether_output, ip_output). 1695 */ 1696 struct ifnet *oif = args->oif; 1697 1698 struct ip_fw *f = NULL; /* matching rule */ 1699 int retval = IP_FW_PASS; 1700 struct m_tag *mtag; 1701 struct divert_info *divinfo; 1702 1703 /* 1704 * hlen The length of the IPv4 header. 1705 * hlen >0 means we have an IPv4 packet. 1706 */ 1707 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 1708 1709 /* 1710 * offset The offset of a fragment. offset != 0 means that 1711 * we have a fragment at this offset of an IPv4 packet. 1712 * offset == 0 means that (if this is an IPv4 packet) 1713 * this is the first or only fragment. 1714 */ 1715 u_short offset = 0; 1716 1717 /* 1718 * Local copies of addresses. They are only valid if we have 1719 * an IP packet. 1720 * 1721 * proto The protocol. Set to 0 for non-ip packets, 1722 * or to the protocol read from the packet otherwise. 1723 * proto != 0 means that we have an IPv4 packet. 1724 * 1725 * src_port, dst_port port numbers, in HOST format. Only 1726 * valid for TCP and UDP packets. 1727 * 1728 * src_ip, dst_ip ip addresses, in NETWORK format. 1729 * Only valid for IPv4 packets. 1730 */ 1731 uint8_t proto; 1732 uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */ 1733 struct in_addr src_ip, dst_ip; /* NOTE: network format */ 1734 uint16_t ip_len = 0; 1735 1736 /* 1737 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 1738 * MATCH_NONE when checked and not matched (dyn_f = NULL), 1739 * MATCH_FORWARD or MATCH_REVERSE otherwise (dyn_f != NULL) 1740 */ 1741 int dyn_dir = MATCH_UNKNOWN; 1742 struct ip_fw *dyn_f = NULL; 1743 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 1744 1745 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED) 1746 return IP_FW_PASS; /* accept */ 1747 1748 if (args->eh == NULL || /* layer 3 packet */ 1749 (m->m_pkthdr.len >= sizeof(struct ip) && 1750 ntohs(args->eh->ether_type) == ETHERTYPE_IP)) 1751 hlen = ip->ip_hl << 2; 1752 1753 /* 1754 * Collect parameters into local variables for faster matching. 1755 */ 1756 if (hlen == 0) { /* do not grab addresses for non-ip pkts */ 1757 proto = args->f_id.proto = 0; /* mark f_id invalid */ 1758 goto after_ip_checks; 1759 } 1760 1761 proto = args->f_id.proto = ip->ip_p; 1762 src_ip = ip->ip_src; 1763 dst_ip = ip->ip_dst; 1764 if (args->eh != NULL) { /* layer 2 packets are as on the wire */ 1765 offset = ntohs(ip->ip_off) & IP_OFFMASK; 1766 ip_len = ntohs(ip->ip_len); 1767 } else { 1768 offset = ip->ip_off & IP_OFFMASK; 1769 ip_len = ip->ip_len; 1770 } 1771 1772 #define PULLUP_TO(len) \ 1773 do { \ 1774 if (m->m_len < (len)) { \ 1775 args->m = m = m_pullup(m, (len));\ 1776 if (m == NULL) \ 1777 goto pullup_failed; \ 1778 ip = mtod(m, struct ip *); \ 1779 } \ 1780 } while (0) 1781 1782 if (offset == 0) { 1783 switch (proto) { 1784 case IPPROTO_TCP: 1785 { 1786 struct tcphdr *tcp; 1787 1788 PULLUP_TO(hlen + sizeof(struct tcphdr)); 1789 tcp = L3HDR(struct tcphdr, ip); 1790 dst_port = tcp->th_dport; 1791 src_port = tcp->th_sport; 1792 args->f_id.flags = tcp->th_flags; 1793 } 1794 break; 1795 1796 case IPPROTO_UDP: 1797 { 1798 struct udphdr *udp; 1799 1800 PULLUP_TO(hlen + sizeof(struct udphdr)); 1801 udp = L3HDR(struct udphdr, ip); 1802 dst_port = udp->uh_dport; 1803 src_port = udp->uh_sport; 1804 } 1805 break; 1806 1807 case IPPROTO_ICMP: 1808 PULLUP_TO(hlen + 4); /* type, code and checksum. */ 1809 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; 1810 break; 1811 1812 default: 1813 break; 1814 } 1815 } 1816 1817 #undef PULLUP_TO 1818 1819 args->f_id.src_ip = ntohl(src_ip.s_addr); 1820 args->f_id.dst_ip = ntohl(dst_ip.s_addr); 1821 args->f_id.src_port = src_port = ntohs(src_port); 1822 args->f_id.dst_port = dst_port = ntohs(dst_port); 1823 1824 after_ip_checks: 1825 if (args->rule) { 1826 /* 1827 * Packet has already been tagged. Look for the next rule 1828 * to restart processing. 1829 * 1830 * If fw_one_pass != 0 then just accept it. 1831 * XXX should not happen here, but optimized out in 1832 * the caller. 1833 */ 1834 if (fw_one_pass) 1835 return IP_FW_PASS; 1836 1837 /* This rule is being/has been flushed */ 1838 if (ipfw_flushing) 1839 return IP_FW_DENY; 1840 1841 KASSERT(args->rule->cpuid == mycpuid, 1842 ("rule used on cpu%d\n", mycpuid)); 1843 1844 /* This rule was deleted */ 1845 if (args->rule->rule_flags & IPFW_RULE_F_INVALID) 1846 return IP_FW_DENY; 1847 1848 f = args->rule->next_rule; 1849 if (f == NULL) 1850 f = lookup_next_rule(args->rule); 1851 } else { 1852 /* 1853 * Find the starting rule. It can be either the first 1854 * one, or the one after divert_rule if asked so. 1855 */ 1856 int skipto; 1857 1858 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL); 1859 if (mtag != NULL) { 1860 divinfo = m_tag_data(mtag); 1861 skipto = divinfo->skipto; 1862 } else { 1863 skipto = 0; 1864 } 1865 1866 f = ctx->ipfw_layer3_chain; 1867 if (args->eh == NULL && skipto != 0) { 1868 /* No skipto during rule flushing */ 1869 if (ipfw_flushing) 1870 return IP_FW_DENY; 1871 1872 if (skipto >= IPFW_DEFAULT_RULE) 1873 return IP_FW_DENY; /* invalid */ 1874 1875 while (f && f->rulenum <= skipto) 1876 f = f->next; 1877 if (f == NULL) /* drop packet */ 1878 return IP_FW_DENY; 1879 } else if (ipfw_flushing) { 1880 /* Rules are being flushed; skip to default rule */ 1881 f = ctx->ipfw_default_rule; 1882 } 1883 } 1884 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL) 1885 m_tag_delete(m, mtag); 1886 1887 /* 1888 * Now scan the rules, and parse microinstructions for each rule. 1889 */ 1890 for (; f; f = f->next) { 1891 int l, cmdlen; 1892 ipfw_insn *cmd; 1893 int skip_or; /* skip rest of OR block */ 1894 1895 again: 1896 if (ctx->ipfw_set_disable & (1 << f->set)) 1897 continue; 1898 1899 skip_or = 0; 1900 for (l = f->cmd_len, cmd = f->cmd; l > 0; 1901 l -= cmdlen, cmd += cmdlen) { 1902 int match, deny; 1903 1904 /* 1905 * check_body is a jump target used when we find a 1906 * CHECK_STATE, and need to jump to the body of 1907 * the target rule. 1908 */ 1909 1910 check_body: 1911 cmdlen = F_LEN(cmd); 1912 /* 1913 * An OR block (insn_1 || .. || insn_n) has the 1914 * F_OR bit set in all but the last instruction. 1915 * The first match will set "skip_or", and cause 1916 * the following instructions to be skipped until 1917 * past the one with the F_OR bit clear. 1918 */ 1919 if (skip_or) { /* skip this instruction */ 1920 if ((cmd->len & F_OR) == 0) 1921 skip_or = 0; /* next one is good */ 1922 continue; 1923 } 1924 match = 0; /* set to 1 if we succeed */ 1925 1926 switch (cmd->opcode) { 1927 /* 1928 * The first set of opcodes compares the packet's 1929 * fields with some pattern, setting 'match' if a 1930 * match is found. At the end of the loop there is 1931 * logic to deal with F_NOT and F_OR flags associated 1932 * with the opcode. 1933 */ 1934 case O_NOP: 1935 match = 1; 1936 break; 1937 1938 case O_FORWARD_MAC: 1939 kprintf("ipfw: opcode %d unimplemented\n", 1940 cmd->opcode); 1941 break; 1942 1943 case O_GID: 1944 case O_UID: 1945 /* 1946 * We only check offset == 0 && proto != 0, 1947 * as this ensures that we have an IPv4 1948 * packet with the ports info. 1949 */ 1950 if (offset!=0) 1951 break; 1952 1953 match = ipfw_match_uid(&args->f_id, oif, 1954 cmd->opcode, 1955 (uid_t)((ipfw_insn_u32 *)cmd)->d[0], 1956 &deny); 1957 if (deny) 1958 return IP_FW_DENY; 1959 break; 1960 1961 case O_RECV: 1962 match = iface_match(m->m_pkthdr.rcvif, 1963 (ipfw_insn_if *)cmd); 1964 break; 1965 1966 case O_XMIT: 1967 match = iface_match(oif, (ipfw_insn_if *)cmd); 1968 break; 1969 1970 case O_VIA: 1971 match = iface_match(oif ? oif : 1972 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); 1973 break; 1974 1975 case O_MACADDR2: 1976 if (args->eh != NULL) { /* have MAC header */ 1977 uint32_t *want = (uint32_t *) 1978 ((ipfw_insn_mac *)cmd)->addr; 1979 uint32_t *mask = (uint32_t *) 1980 ((ipfw_insn_mac *)cmd)->mask; 1981 uint32_t *hdr = (uint32_t *)args->eh; 1982 1983 match = 1984 (want[0] == (hdr[0] & mask[0]) && 1985 want[1] == (hdr[1] & mask[1]) && 1986 want[2] == (hdr[2] & mask[2])); 1987 } 1988 break; 1989 1990 case O_MAC_TYPE: 1991 if (args->eh != NULL) { 1992 uint16_t t = 1993 ntohs(args->eh->ether_type); 1994 uint16_t *p = 1995 ((ipfw_insn_u16 *)cmd)->ports; 1996 int i; 1997 1998 /* Special vlan handling */ 1999 if (m->m_flags & M_VLANTAG) 2000 t = ETHERTYPE_VLAN; 2001 2002 for (i = cmdlen - 1; !match && i > 0; 2003 i--, p += 2) { 2004 match = 2005 (t >= p[0] && t <= p[1]); 2006 } 2007 } 2008 break; 2009 2010 case O_FRAG: 2011 match = (hlen > 0 && offset != 0); 2012 break; 2013 2014 case O_IN: /* "out" is "not in" */ 2015 match = (oif == NULL); 2016 break; 2017 2018 case O_LAYER2: 2019 match = (args->eh != NULL); 2020 break; 2021 2022 case O_PROTO: 2023 /* 2024 * We do not allow an arg of 0 so the 2025 * check of "proto" only suffices. 2026 */ 2027 match = (proto == cmd->arg1); 2028 break; 2029 2030 case O_IP_SRC: 2031 match = (hlen > 0 && 2032 ((ipfw_insn_ip *)cmd)->addr.s_addr == 2033 src_ip.s_addr); 2034 break; 2035 2036 case O_IP_SRC_MASK: 2037 match = (hlen > 0 && 2038 ((ipfw_insn_ip *)cmd)->addr.s_addr == 2039 (src_ip.s_addr & 2040 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 2041 break; 2042 2043 case O_IP_SRC_ME: 2044 if (hlen > 0) { 2045 struct ifnet *tif; 2046 2047 tif = INADDR_TO_IFP(&src_ip); 2048 match = (tif != NULL); 2049 } 2050 break; 2051 2052 case O_IP_DST_SET: 2053 case O_IP_SRC_SET: 2054 if (hlen > 0) { 2055 uint32_t *d = (uint32_t *)(cmd + 1); 2056 uint32_t addr = 2057 cmd->opcode == O_IP_DST_SET ? 2058 args->f_id.dst_ip : 2059 args->f_id.src_ip; 2060 2061 if (addr < d[0]) 2062 break; 2063 addr -= d[0]; /* subtract base */ 2064 match = 2065 (addr < cmd->arg1) && 2066 (d[1 + (addr >> 5)] & 2067 (1 << (addr & 0x1f))); 2068 } 2069 break; 2070 2071 case O_IP_DST: 2072 match = (hlen > 0 && 2073 ((ipfw_insn_ip *)cmd)->addr.s_addr == 2074 dst_ip.s_addr); 2075 break; 2076 2077 case O_IP_DST_MASK: 2078 match = (hlen > 0) && 2079 (((ipfw_insn_ip *)cmd)->addr.s_addr == 2080 (dst_ip.s_addr & 2081 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 2082 break; 2083 2084 case O_IP_DST_ME: 2085 if (hlen > 0) { 2086 struct ifnet *tif; 2087 2088 tif = INADDR_TO_IFP(&dst_ip); 2089 match = (tif != NULL); 2090 } 2091 break; 2092 2093 case O_IP_SRCPORT: 2094 case O_IP_DSTPORT: 2095 /* 2096 * offset == 0 && proto != 0 is enough 2097 * to guarantee that we have an IPv4 2098 * packet with port info. 2099 */ 2100 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) 2101 && offset == 0) { 2102 uint16_t x = 2103 (cmd->opcode == O_IP_SRCPORT) ? 2104 src_port : dst_port ; 2105 uint16_t *p = 2106 ((ipfw_insn_u16 *)cmd)->ports; 2107 int i; 2108 2109 for (i = cmdlen - 1; !match && i > 0; 2110 i--, p += 2) { 2111 match = 2112 (x >= p[0] && x <= p[1]); 2113 } 2114 } 2115 break; 2116 2117 case O_ICMPTYPE: 2118 match = (offset == 0 && proto==IPPROTO_ICMP && 2119 icmptype_match(ip, (ipfw_insn_u32 *)cmd)); 2120 break; 2121 2122 case O_IPOPT: 2123 match = (hlen > 0 && ipopts_match(ip, cmd)); 2124 break; 2125 2126 case O_IPVER: 2127 match = (hlen > 0 && cmd->arg1 == ip->ip_v); 2128 break; 2129 2130 case O_IPTTL: 2131 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl); 2132 break; 2133 2134 case O_IPID: 2135 match = (hlen > 0 && 2136 cmd->arg1 == ntohs(ip->ip_id)); 2137 break; 2138 2139 case O_IPLEN: 2140 match = (hlen > 0 && cmd->arg1 == ip_len); 2141 break; 2142 2143 case O_IPPRECEDENCE: 2144 match = (hlen > 0 && 2145 (cmd->arg1 == (ip->ip_tos & 0xe0))); 2146 break; 2147 2148 case O_IPTOS: 2149 match = (hlen > 0 && 2150 flags_match(cmd, ip->ip_tos)); 2151 break; 2152 2153 case O_TCPFLAGS: 2154 match = (proto == IPPROTO_TCP && offset == 0 && 2155 flags_match(cmd, 2156 L3HDR(struct tcphdr,ip)->th_flags)); 2157 break; 2158 2159 case O_TCPOPTS: 2160 match = (proto == IPPROTO_TCP && offset == 0 && 2161 tcpopts_match(ip, cmd)); 2162 break; 2163 2164 case O_TCPSEQ: 2165 match = (proto == IPPROTO_TCP && offset == 0 && 2166 ((ipfw_insn_u32 *)cmd)->d[0] == 2167 L3HDR(struct tcphdr,ip)->th_seq); 2168 break; 2169 2170 case O_TCPACK: 2171 match = (proto == IPPROTO_TCP && offset == 0 && 2172 ((ipfw_insn_u32 *)cmd)->d[0] == 2173 L3HDR(struct tcphdr,ip)->th_ack); 2174 break; 2175 2176 case O_TCPWIN: 2177 match = (proto == IPPROTO_TCP && offset == 0 && 2178 cmd->arg1 == 2179 L3HDR(struct tcphdr,ip)->th_win); 2180 break; 2181 2182 case O_ESTAB: 2183 /* reject packets which have SYN only */ 2184 /* XXX should i also check for TH_ACK ? */ 2185 match = (proto == IPPROTO_TCP && offset == 0 && 2186 (L3HDR(struct tcphdr,ip)->th_flags & 2187 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 2188 break; 2189 2190 case O_LOG: 2191 if (fw_verbose) 2192 ipfw_log(f, hlen, args->eh, m, oif); 2193 match = 1; 2194 break; 2195 2196 case O_PROB: 2197 match = (krandom() < 2198 ((ipfw_insn_u32 *)cmd)->d[0]); 2199 break; 2200 2201 /* 2202 * The second set of opcodes represents 'actions', 2203 * i.e. the terminal part of a rule once the packet 2204 * matches all previous patterns. 2205 * Typically there is only one action for each rule, 2206 * and the opcode is stored at the end of the rule 2207 * (but there are exceptions -- see below). 2208 * 2209 * In general, here we set retval and terminate the 2210 * outer loop (would be a 'break 3' in some language, 2211 * but we need to do a 'goto done'). 2212 * 2213 * Exceptions: 2214 * O_COUNT and O_SKIPTO actions: 2215 * instead of terminating, we jump to the next rule 2216 * ('goto next_rule', equivalent to a 'break 2'), 2217 * or to the SKIPTO target ('goto again' after 2218 * having set f, cmd and l), respectively. 2219 * 2220 * O_LIMIT and O_KEEP_STATE: these opcodes are 2221 * not real 'actions', and are stored right 2222 * before the 'action' part of the rule. 2223 * These opcodes try to install an entry in the 2224 * state tables; if successful, we continue with 2225 * the next opcode (match=1; break;), otherwise 2226 * the packet must be dropped ('goto done' after 2227 * setting retval). If static rules are changed 2228 * during the state installation, the packet will 2229 * be dropped and rule's stats will not beupdated 2230 * ('return IP_FW_DENY'). 2231 * 2232 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 2233 * cause a lookup of the state table, and a jump 2234 * to the 'action' part of the parent rule 2235 * ('goto check_body') if an entry is found, or 2236 * (CHECK_STATE only) a jump to the next rule if 2237 * the entry is not found ('goto next_rule'). 2238 * The result of the lookup is cached to make 2239 * further instances of these opcodes are 2240 * effectively NOPs. If static rules are changed 2241 * during the state looking up, the packet will 2242 * be dropped and rule's stats will not be updated 2243 * ('return IP_FW_DENY'). 2244 */ 2245 case O_LIMIT: 2246 case O_KEEP_STATE: 2247 if (!(f->rule_flags & IPFW_RULE_F_STATE)) { 2248 kprintf("%s rule (%d) is not ready " 2249 "on cpu%d\n", 2250 cmd->opcode == O_LIMIT ? 2251 "limit" : "keep state", 2252 f->rulenum, f->cpuid); 2253 goto next_rule; 2254 } 2255 if (install_state(f, 2256 (ipfw_insn_limit *)cmd, args, &deny)) { 2257 if (deny) 2258 return IP_FW_DENY; 2259 2260 retval = IP_FW_DENY; 2261 goto done; /* error/limit violation */ 2262 } 2263 if (deny) 2264 return IP_FW_DENY; 2265 match = 1; 2266 break; 2267 2268 case O_PROBE_STATE: 2269 case O_CHECK_STATE: 2270 /* 2271 * dynamic rules are checked at the first 2272 * keep-state or check-state occurrence, 2273 * with the result being stored in dyn_dir. 2274 * The compiler introduces a PROBE_STATE 2275 * instruction for us when we have a 2276 * KEEP_STATE (because PROBE_STATE needs 2277 * to be run first). 2278 */ 2279 if (dyn_dir == MATCH_UNKNOWN) { 2280 dyn_f = lookup_rule(&args->f_id, 2281 &dyn_dir, 2282 proto == IPPROTO_TCP ? 2283 L3HDR(struct tcphdr, ip) : NULL, 2284 ip_len, &deny); 2285 if (deny) 2286 return IP_FW_DENY; 2287 if (dyn_f != NULL) { 2288 /* 2289 * Found a rule from a dynamic 2290 * entry; jump to the 'action' 2291 * part of the rule. 2292 */ 2293 f = dyn_f; 2294 cmd = ACTION_PTR(f); 2295 l = f->cmd_len - f->act_ofs; 2296 goto check_body; 2297 } 2298 } 2299 /* 2300 * Dynamic entry not found. If CHECK_STATE, 2301 * skip to next rule, if PROBE_STATE just 2302 * ignore and continue with next opcode. 2303 */ 2304 if (cmd->opcode == O_CHECK_STATE) 2305 goto next_rule; 2306 else if (!(f->rule_flags & IPFW_RULE_F_STATE)) 2307 goto next_rule; /* not ready yet */ 2308 match = 1; 2309 break; 2310 2311 case O_ACCEPT: 2312 retval = IP_FW_PASS; /* accept */ 2313 goto done; 2314 2315 case O_PIPE: 2316 case O_QUEUE: 2317 args->rule = f; /* report matching rule */ 2318 args->cookie = cmd->arg1; 2319 retval = IP_FW_DUMMYNET; 2320 goto done; 2321 2322 case O_DIVERT: 2323 case O_TEE: 2324 if (args->eh) /* not on layer 2 */ 2325 break; 2326 2327 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT, 2328 sizeof(*divinfo), MB_DONTWAIT); 2329 if (mtag == NULL) { 2330 retval = IP_FW_DENY; 2331 goto done; 2332 } 2333 divinfo = m_tag_data(mtag); 2334 2335 divinfo->skipto = f->rulenum; 2336 divinfo->port = cmd->arg1; 2337 divinfo->tee = (cmd->opcode == O_TEE); 2338 m_tag_prepend(m, mtag); 2339 2340 args->cookie = cmd->arg1; 2341 retval = (cmd->opcode == O_DIVERT) ? 2342 IP_FW_DIVERT : IP_FW_TEE; 2343 goto done; 2344 2345 case O_COUNT: 2346 case O_SKIPTO: 2347 f->pcnt++; /* update stats */ 2348 f->bcnt += ip_len; 2349 f->timestamp = time_second; 2350 if (cmd->opcode == O_COUNT) 2351 goto next_rule; 2352 /* handle skipto */ 2353 if (f->next_rule == NULL) 2354 lookup_next_rule(f); 2355 f = f->next_rule; 2356 goto again; 2357 2358 case O_REJECT: 2359 /* 2360 * Drop the packet and send a reject notice 2361 * if the packet is not ICMP (or is an ICMP 2362 * query), and it is not multicast/broadcast. 2363 */ 2364 if (hlen > 0 && 2365 (proto != IPPROTO_ICMP || 2366 is_icmp_query(ip)) && 2367 !(m->m_flags & (M_BCAST|M_MCAST)) && 2368 !IN_MULTICAST(ntohl(dst_ip.s_addr))) { 2369 /* 2370 * Update statistics before the possible 2371 * blocking 'send_reject' 2372 */ 2373 f->pcnt++; 2374 f->bcnt += ip_len; 2375 f->timestamp = time_second; 2376 2377 send_reject(args, cmd->arg1, 2378 offset,ip_len); 2379 m = args->m; 2380 2381 /* 2382 * Return directly here, rule stats 2383 * have been updated above. 2384 */ 2385 return IP_FW_DENY; 2386 } 2387 /* FALLTHROUGH */ 2388 case O_DENY: 2389 retval = IP_FW_DENY; 2390 goto done; 2391 2392 case O_FORWARD_IP: 2393 if (args->eh) /* not valid on layer2 pkts */ 2394 break; 2395 if (!dyn_f || dyn_dir == MATCH_FORWARD) { 2396 struct sockaddr_in *sin; 2397 2398 mtag = m_tag_get(PACKET_TAG_IPFORWARD, 2399 sizeof(*sin), MB_DONTWAIT); 2400 if (mtag == NULL) { 2401 retval = IP_FW_DENY; 2402 goto done; 2403 } 2404 sin = m_tag_data(mtag); 2405 2406 /* Structure copy */ 2407 *sin = ((ipfw_insn_sa *)cmd)->sa; 2408 2409 m_tag_prepend(m, mtag); 2410 m->m_pkthdr.fw_flags |= 2411 IPFORWARD_MBUF_TAGGED; 2412 } 2413 retval = IP_FW_PASS; 2414 goto done; 2415 2416 default: 2417 panic("-- unknown opcode %d\n", cmd->opcode); 2418 } /* end of switch() on opcodes */ 2419 2420 if (cmd->len & F_NOT) 2421 match = !match; 2422 2423 if (match) { 2424 if (cmd->len & F_OR) 2425 skip_or = 1; 2426 } else { 2427 if (!(cmd->len & F_OR)) /* not an OR block, */ 2428 break; /* try next rule */ 2429 } 2430 2431 } /* end of inner for, scan opcodes */ 2432 2433 next_rule:; /* try next rule */ 2434 2435 } /* end of outer for, scan rules */ 2436 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n"); 2437 return IP_FW_DENY; 2438 2439 done: 2440 /* Update statistics */ 2441 f->pcnt++; 2442 f->bcnt += ip_len; 2443 f->timestamp = time_second; 2444 return retval; 2445 2446 pullup_failed: 2447 if (fw_verbose) 2448 kprintf("pullup failed\n"); 2449 return IP_FW_DENY; 2450 } 2451 2452 static void 2453 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 2454 { 2455 struct m_tag *mtag; 2456 struct dn_pkt *pkt; 2457 ipfw_insn *cmd; 2458 const struct ipfw_flow_id *id; 2459 struct dn_flow_id *fid; 2460 2461 M_ASSERTPKTHDR(m); 2462 2463 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), MB_DONTWAIT); 2464 if (mtag == NULL) { 2465 m_freem(m); 2466 return; 2467 } 2468 m_tag_prepend(m, mtag); 2469 2470 pkt = m_tag_data(mtag); 2471 bzero(pkt, sizeof(*pkt)); 2472 2473 cmd = fwa->rule->cmd + fwa->rule->act_ofs; 2474 if (cmd->opcode == O_LOG) 2475 cmd += F_LEN(cmd); 2476 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, 2477 ("Rule is not PIPE or QUEUE, opcode %d\n", cmd->opcode)); 2478 2479 pkt->dn_m = m; 2480 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK); 2481 pkt->ifp = fwa->oif; 2482 pkt->pipe_nr = pipe_nr; 2483 2484 pkt->cpuid = mycpuid; 2485 if (curthread->td_flags & TDF_NETWORK) { 2486 pkt->msgport = &curthread->td_msgport; 2487 } else { 2488 /* 2489 * This could happen: 2490 * - If a gratuitous arp request sent by us is going to 2491 * be added to dummynet(4) pipe/queue. 2492 * - Other conditions ... 2493 * 2494 * We can't use current thread's msgport (since its 2495 * behaviour is unknown), so netisrX's msgport is used. 2496 */ 2497 pkt->msgport = cpu_portfn(pkt->cpuid); 2498 } 2499 2500 id = &fwa->f_id; 2501 fid = &pkt->id; 2502 fid->fid_dst_ip = id->dst_ip; 2503 fid->fid_src_ip = id->src_ip; 2504 fid->fid_dst_port = id->dst_port; 2505 fid->fid_src_port = id->src_port; 2506 fid->fid_proto = id->proto; 2507 fid->fid_flags = id->flags; 2508 2509 ipfw_ref_rule(fwa->rule); 2510 pkt->dn_priv = fwa->rule; 2511 pkt->dn_unref_priv = ipfw_unref_rule; 2512 2513 if (cmd->opcode == O_PIPE) 2514 pkt->dn_flags |= DN_FLAGS_IS_PIPE; 2515 2516 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED; 2517 } 2518 2519 /* 2520 * When a rule is added/deleted, clear the next_rule pointers in all rules. 2521 * These will be reconstructed on the fly as packets are matched. 2522 * Must be called at splimp(). 2523 */ 2524 static void 2525 ipfw_flush_rule_ptrs(struct ipfw_context *ctx) 2526 { 2527 struct ip_fw *rule; 2528 2529 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 2530 rule->next_rule = NULL; 2531 } 2532 2533 static __inline void 2534 ipfw_inc_static_count(struct ip_fw *rule) 2535 { 2536 /* Static rule's counts are updated only on CPU0 */ 2537 KKASSERT(mycpuid == 0); 2538 2539 static_count++; 2540 static_ioc_len += IOC_RULESIZE(rule); 2541 } 2542 2543 static __inline void 2544 ipfw_dec_static_count(struct ip_fw *rule) 2545 { 2546 int l = IOC_RULESIZE(rule); 2547 2548 /* Static rule's counts are updated only on CPU0 */ 2549 KKASSERT(mycpuid == 0); 2550 2551 KASSERT(static_count > 0, ("invalid static count %u\n", static_count)); 2552 static_count--; 2553 2554 KASSERT(static_ioc_len >= l, 2555 ("invalid static len %u\n", static_ioc_len)); 2556 static_ioc_len -= l; 2557 } 2558 2559 static void 2560 ipfw_link_sibling(struct netmsg_ipfw *fwmsg, struct ip_fw *rule) 2561 { 2562 if (fwmsg->sibling != NULL) { 2563 KKASSERT(mycpuid > 0 && fwmsg->sibling->cpuid == mycpuid - 1); 2564 fwmsg->sibling->sibling = rule; 2565 } 2566 fwmsg->sibling = rule; 2567 } 2568 2569 static struct ip_fw * 2570 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule, struct ip_fw_stub *stub) 2571 { 2572 struct ip_fw *rule; 2573 2574 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO); 2575 2576 rule->act_ofs = ioc_rule->act_ofs; 2577 rule->cmd_len = ioc_rule->cmd_len; 2578 rule->rulenum = ioc_rule->rulenum; 2579 rule->set = ioc_rule->set; 2580 rule->usr_flags = ioc_rule->usr_flags; 2581 2582 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */); 2583 2584 rule->refcnt = 1; 2585 rule->cpuid = mycpuid; 2586 2587 rule->stub = stub; 2588 if (stub != NULL) 2589 stub->rule[mycpuid] = rule; 2590 2591 return rule; 2592 } 2593 2594 static void 2595 ipfw_add_rule_dispatch(struct netmsg *nmsg) 2596 { 2597 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 2598 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2599 struct ip_fw *rule; 2600 2601 rule = ipfw_create_rule(fwmsg->ioc_rule, fwmsg->stub); 2602 2603 /* 2604 * Bump generation after ipfw_create_rule(), 2605 * since this function is blocking 2606 */ 2607 ctx->ipfw_gen++; 2608 2609 /* 2610 * Insert rule into the pre-determined position 2611 */ 2612 if (fwmsg->prev_rule != NULL) { 2613 struct ip_fw *prev, *next; 2614 2615 prev = fwmsg->prev_rule; 2616 KKASSERT(prev->cpuid == mycpuid); 2617 2618 next = fwmsg->next_rule; 2619 KKASSERT(next->cpuid == mycpuid); 2620 2621 rule->next = next; 2622 prev->next = rule; 2623 2624 /* 2625 * Move to the position on the next CPU 2626 * before the msg is forwarded. 2627 */ 2628 fwmsg->prev_rule = prev->sibling; 2629 fwmsg->next_rule = next->sibling; 2630 } else { 2631 KKASSERT(fwmsg->next_rule == NULL); 2632 rule->next = ctx->ipfw_layer3_chain; 2633 ctx->ipfw_layer3_chain = rule; 2634 } 2635 2636 /* Link rule CPU sibling */ 2637 ipfw_link_sibling(fwmsg, rule); 2638 2639 ipfw_flush_rule_ptrs(ctx); 2640 2641 if (mycpuid == 0) { 2642 /* Statistics only need to be updated once */ 2643 ipfw_inc_static_count(rule); 2644 2645 /* Return the rule on CPU0 */ 2646 nmsg->nm_lmsg.u.ms_resultp = rule; 2647 } 2648 2649 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 2650 } 2651 2652 static void 2653 ipfw_enable_state_dispatch(struct netmsg *nmsg) 2654 { 2655 struct lwkt_msg *lmsg = &nmsg->nm_lmsg; 2656 struct ip_fw *rule = lmsg->u.ms_resultp; 2657 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2658 2659 ctx->ipfw_gen++; 2660 2661 KKASSERT(rule->cpuid == mycpuid); 2662 KKASSERT(rule->stub != NULL && rule->stub->rule[mycpuid] == rule); 2663 KKASSERT(!(rule->rule_flags & IPFW_RULE_F_STATE)); 2664 rule->rule_flags |= IPFW_RULE_F_STATE; 2665 lmsg->u.ms_resultp = rule->sibling; 2666 2667 ifnet_forwardmsg(lmsg, mycpuid + 1); 2668 } 2669 2670 /* 2671 * Add a new rule to the list. Copy the rule into a malloc'ed area, 2672 * then possibly create a rule number and add the rule to the list. 2673 * Update the rule_number in the input struct so the caller knows 2674 * it as well. 2675 */ 2676 static void 2677 ipfw_add_rule(struct ipfw_ioc_rule *ioc_rule, uint32_t rule_flags) 2678 { 2679 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2680 struct netmsg_ipfw fwmsg; 2681 struct netmsg *nmsg; 2682 struct ip_fw *f, *prev, *rule; 2683 struct ip_fw_stub *stub; 2684 2685 IPFW_ASSERT_CFGPORT(&curthread->td_msgport); 2686 2687 /* 2688 * If rulenum is 0, find highest numbered rule before the 2689 * default rule, and add rule number incremental step. 2690 */ 2691 if (ioc_rule->rulenum == 0) { 2692 int step = autoinc_step; 2693 2694 KKASSERT(step >= IPFW_AUTOINC_STEP_MIN && 2695 step <= IPFW_AUTOINC_STEP_MAX); 2696 2697 /* 2698 * Locate the highest numbered rule before default 2699 */ 2700 for (f = ctx->ipfw_layer3_chain; f; f = f->next) { 2701 if (f->rulenum == IPFW_DEFAULT_RULE) 2702 break; 2703 ioc_rule->rulenum = f->rulenum; 2704 } 2705 if (ioc_rule->rulenum < IPFW_DEFAULT_RULE - step) 2706 ioc_rule->rulenum += step; 2707 } 2708 KASSERT(ioc_rule->rulenum != IPFW_DEFAULT_RULE && 2709 ioc_rule->rulenum != 0, 2710 ("invalid rule num %d\n", ioc_rule->rulenum)); 2711 2712 /* 2713 * Now find the right place for the new rule in the sorted list. 2714 */ 2715 for (prev = NULL, f = ctx->ipfw_layer3_chain; f; 2716 prev = f, f = f->next) { 2717 if (f->rulenum > ioc_rule->rulenum) { 2718 /* Found the location */ 2719 break; 2720 } 2721 } 2722 KASSERT(f != NULL, ("no default rule?!\n")); 2723 2724 if (rule_flags & IPFW_RULE_F_STATE) { 2725 int size; 2726 2727 /* 2728 * If the new rule will create states, then allocate 2729 * a rule stub, which will be referenced by states 2730 * (dyn rules) 2731 */ 2732 size = sizeof(*stub) + ((ncpus - 1) * sizeof(struct ip_fw *)); 2733 stub = kmalloc(size, M_IPFW, M_WAITOK | M_ZERO); 2734 } else { 2735 stub = NULL; 2736 } 2737 2738 /* 2739 * Duplicate the rule onto each CPU. 2740 * The rule duplicated on CPU0 will be returned. 2741 */ 2742 bzero(&fwmsg, sizeof(fwmsg)); 2743 nmsg = &fwmsg.nmsg; 2744 netmsg_init(nmsg, &curthread->td_msgport, 0, ipfw_add_rule_dispatch); 2745 fwmsg.ioc_rule = ioc_rule; 2746 fwmsg.prev_rule = prev; 2747 fwmsg.next_rule = prev == NULL ? NULL : f; 2748 fwmsg.stub = stub; 2749 2750 ifnet_domsg(&nmsg->nm_lmsg, 0); 2751 KKASSERT(fwmsg.prev_rule == NULL && fwmsg.next_rule == NULL); 2752 2753 rule = nmsg->nm_lmsg.u.ms_resultp; 2754 KKASSERT(rule != NULL && rule->cpuid == mycpuid); 2755 2756 if (rule_flags & IPFW_RULE_F_STATE) { 2757 /* 2758 * Turn on state flag, _after_ everything on all 2759 * CPUs have been setup. 2760 */ 2761 bzero(nmsg, sizeof(*nmsg)); 2762 netmsg_init(nmsg, &curthread->td_msgport, 0, 2763 ipfw_enable_state_dispatch); 2764 nmsg->nm_lmsg.u.ms_resultp = rule; 2765 2766 ifnet_domsg(&nmsg->nm_lmsg, 0); 2767 KKASSERT(nmsg->nm_lmsg.u.ms_resultp == NULL); 2768 } 2769 2770 DPRINTF("++ installed rule %d, static count now %d\n", 2771 rule->rulenum, static_count); 2772 } 2773 2774 /** 2775 * Free storage associated with a static rule (including derived 2776 * dynamic rules). 2777 * The caller is in charge of clearing rule pointers to avoid 2778 * dangling pointers. 2779 * @return a pointer to the next entry. 2780 * Arguments are not checked, so they better be correct. 2781 * Must be called at splimp(). 2782 */ 2783 static struct ip_fw * 2784 ipfw_delete_rule(struct ipfw_context *ctx, 2785 struct ip_fw *prev, struct ip_fw *rule) 2786 { 2787 struct ip_fw *n; 2788 struct ip_fw_stub *stub; 2789 2790 ctx->ipfw_gen++; 2791 2792 /* STATE flag should have been cleared before we reach here */ 2793 KKASSERT((rule->rule_flags & IPFW_RULE_F_STATE) == 0); 2794 2795 stub = rule->stub; 2796 n = rule->next; 2797 if (prev == NULL) 2798 ctx->ipfw_layer3_chain = n; 2799 else 2800 prev->next = n; 2801 2802 /* Mark the rule as invalid */ 2803 rule->rule_flags |= IPFW_RULE_F_INVALID; 2804 rule->next_rule = NULL; 2805 rule->sibling = NULL; 2806 rule->stub = NULL; 2807 #ifdef foo 2808 /* Don't reset cpuid here; keep various assertion working */ 2809 rule->cpuid = -1; 2810 #endif 2811 2812 /* Statistics only need to be updated once */ 2813 if (mycpuid == 0) 2814 ipfw_dec_static_count(rule); 2815 2816 /* Free 'stub' on the last CPU */ 2817 if (stub != NULL && mycpuid == ncpus - 1) 2818 kfree(stub, M_IPFW); 2819 2820 /* Try to free this rule */ 2821 ipfw_free_rule(rule); 2822 2823 /* Return the next rule */ 2824 return n; 2825 } 2826 2827 static void 2828 ipfw_flush_dispatch(struct netmsg *nmsg) 2829 { 2830 struct lwkt_msg *lmsg = &nmsg->nm_lmsg; 2831 int kill_default = lmsg->u.ms_result; 2832 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2833 struct ip_fw *rule; 2834 2835 ipfw_flush_rule_ptrs(ctx); /* more efficient to do outside the loop */ 2836 2837 while ((rule = ctx->ipfw_layer3_chain) != NULL && 2838 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE)) 2839 ipfw_delete_rule(ctx, NULL, rule); 2840 2841 ifnet_forwardmsg(lmsg, mycpuid + 1); 2842 } 2843 2844 static void 2845 ipfw_disable_rule_state_dispatch(struct netmsg *nmsg) 2846 { 2847 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 2848 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2849 struct ip_fw *rule; 2850 2851 ctx->ipfw_gen++; 2852 2853 rule = dmsg->start_rule; 2854 if (rule != NULL) { 2855 KKASSERT(rule->cpuid == mycpuid); 2856 2857 /* 2858 * Move to the position on the next CPU 2859 * before the msg is forwarded. 2860 */ 2861 dmsg->start_rule = rule->sibling; 2862 } else { 2863 KKASSERT(dmsg->rulenum == 0); 2864 rule = ctx->ipfw_layer3_chain; 2865 } 2866 2867 while (rule != NULL) { 2868 if (dmsg->rulenum && rule->rulenum != dmsg->rulenum) 2869 break; 2870 rule->rule_flags &= ~IPFW_RULE_F_STATE; 2871 rule = rule->next; 2872 } 2873 2874 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 2875 } 2876 2877 /* 2878 * Deletes all rules from a chain (including the default rule 2879 * if the second argument is set). 2880 * Must be called at splimp(). 2881 */ 2882 static void 2883 ipfw_flush(int kill_default) 2884 { 2885 struct netmsg_del dmsg; 2886 struct netmsg nmsg; 2887 struct lwkt_msg *lmsg; 2888 struct ip_fw *rule; 2889 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2890 2891 IPFW_ASSERT_CFGPORT(&curthread->td_msgport); 2892 2893 /* 2894 * If 'kill_default' then caller has done the necessary 2895 * msgport syncing; unnecessary to do it again. 2896 */ 2897 if (!kill_default) { 2898 /* 2899 * Let ipfw_chk() know the rules are going to 2900 * be flushed, so it could jump directly to 2901 * the default rule. 2902 */ 2903 ipfw_flushing = 1; 2904 netmsg_service_sync(); 2905 } 2906 2907 /* 2908 * Clear STATE flag on rules, so no more states (dyn rules) 2909 * will be created. 2910 */ 2911 bzero(&dmsg, sizeof(dmsg)); 2912 netmsg_init(&dmsg.nmsg, &curthread->td_msgport, 0, 2913 ipfw_disable_rule_state_dispatch); 2914 ifnet_domsg(&dmsg.nmsg.nm_lmsg, 0); 2915 2916 /* 2917 * This actually nukes all states (dyn rules) 2918 */ 2919 lockmgr(&dyn_lock, LK_EXCLUSIVE); 2920 for (rule = ctx->ipfw_layer3_chain; rule != NULL; rule = rule->next) { 2921 /* 2922 * Can't check IPFW_RULE_F_STATE here, 2923 * since it has been cleared previously. 2924 * Check 'stub' instead. 2925 */ 2926 if (rule->stub != NULL) { 2927 /* Force removal */ 2928 remove_dyn_rule_locked(rule, NULL); 2929 } 2930 } 2931 lockmgr(&dyn_lock, LK_RELEASE); 2932 2933 /* 2934 * Press the 'flush' button 2935 */ 2936 bzero(&nmsg, sizeof(nmsg)); 2937 netmsg_init(&nmsg, &curthread->td_msgport, 0, ipfw_flush_dispatch); 2938 lmsg = &nmsg.nm_lmsg; 2939 lmsg->u.ms_result = kill_default; 2940 ifnet_domsg(lmsg, 0); 2941 2942 KASSERT(dyn_count == 0, ("%u dyn rule remains\n", dyn_count)); 2943 2944 if (kill_default) { 2945 if (ipfw_dyn_v != NULL) { 2946 /* 2947 * Free dynamic rules(state) hash table 2948 */ 2949 kfree(ipfw_dyn_v, M_IPFW); 2950 ipfw_dyn_v = NULL; 2951 } 2952 2953 KASSERT(static_count == 0, 2954 ("%u static rules remains\n", static_count)); 2955 KASSERT(static_ioc_len == 0, 2956 ("%u bytes of static rules remains\n", static_ioc_len)); 2957 } else { 2958 KASSERT(static_count == 1, 2959 ("%u static rules remains\n", static_count)); 2960 KASSERT(static_ioc_len == IOC_RULESIZE(ctx->ipfw_default_rule), 2961 ("%u bytes of static rules remains, should be %u\n", 2962 static_ioc_len, IOC_RULESIZE(ctx->ipfw_default_rule))); 2963 } 2964 2965 /* Flush is done */ 2966 ipfw_flushing = 0; 2967 } 2968 2969 static void 2970 ipfw_alt_delete_rule_dispatch(struct netmsg *nmsg) 2971 { 2972 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 2973 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 2974 struct ip_fw *rule, *prev; 2975 2976 rule = dmsg->start_rule; 2977 KKASSERT(rule->cpuid == mycpuid); 2978 dmsg->start_rule = rule->sibling; 2979 2980 prev = dmsg->prev_rule; 2981 if (prev != NULL) { 2982 KKASSERT(prev->cpuid == mycpuid); 2983 2984 /* 2985 * Move to the position on the next CPU 2986 * before the msg is forwarded. 2987 */ 2988 dmsg->prev_rule = prev->sibling; 2989 } 2990 2991 /* 2992 * flush pointers outside the loop, then delete all matching 2993 * rules. 'prev' remains the same throughout the cycle. 2994 */ 2995 ipfw_flush_rule_ptrs(ctx); 2996 while (rule && rule->rulenum == dmsg->rulenum) 2997 rule = ipfw_delete_rule(ctx, prev, rule); 2998 2999 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3000 } 3001 3002 static int 3003 ipfw_alt_delete_rule(uint16_t rulenum) 3004 { 3005 struct ip_fw *prev, *rule, *f; 3006 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3007 struct netmsg_del dmsg; 3008 struct netmsg *nmsg; 3009 int state; 3010 3011 /* 3012 * Locate first rule to delete 3013 */ 3014 for (prev = NULL, rule = ctx->ipfw_layer3_chain; 3015 rule && rule->rulenum < rulenum; 3016 prev = rule, rule = rule->next) 3017 ; /* EMPTY */ 3018 if (rule->rulenum != rulenum) 3019 return EINVAL; 3020 3021 /* 3022 * Check whether any rules with the given number will 3023 * create states. 3024 */ 3025 state = 0; 3026 for (f = rule; f && f->rulenum == rulenum; f = f->next) { 3027 if (f->rule_flags & IPFW_RULE_F_STATE) { 3028 state = 1; 3029 break; 3030 } 3031 } 3032 3033 if (state) { 3034 /* 3035 * Clear the STATE flag, so no more states will be 3036 * created based the rules numbered 'rulenum'. 3037 */ 3038 bzero(&dmsg, sizeof(dmsg)); 3039 nmsg = &dmsg.nmsg; 3040 netmsg_init(nmsg, &curthread->td_msgport, 0, 3041 ipfw_disable_rule_state_dispatch); 3042 dmsg.start_rule = rule; 3043 dmsg.rulenum = rulenum; 3044 3045 ifnet_domsg(&nmsg->nm_lmsg, 0); 3046 KKASSERT(dmsg.start_rule == NULL); 3047 3048 /* 3049 * Nuke all related states 3050 */ 3051 lockmgr(&dyn_lock, LK_EXCLUSIVE); 3052 for (f = rule; f && f->rulenum == rulenum; f = f->next) { 3053 /* 3054 * Can't check IPFW_RULE_F_STATE here, 3055 * since it has been cleared previously. 3056 * Check 'stub' instead. 3057 */ 3058 if (f->stub != NULL) { 3059 /* Force removal */ 3060 remove_dyn_rule_locked(f, NULL); 3061 } 3062 } 3063 lockmgr(&dyn_lock, LK_RELEASE); 3064 } 3065 3066 /* 3067 * Get rid of the rule duplications on all CPUs 3068 */ 3069 bzero(&dmsg, sizeof(dmsg)); 3070 nmsg = &dmsg.nmsg; 3071 netmsg_init(nmsg, &curthread->td_msgport, 0, 3072 ipfw_alt_delete_rule_dispatch); 3073 dmsg.prev_rule = prev; 3074 dmsg.start_rule = rule; 3075 dmsg.rulenum = rulenum; 3076 3077 ifnet_domsg(&nmsg->nm_lmsg, 0); 3078 KKASSERT(dmsg.prev_rule == NULL && dmsg.start_rule == NULL); 3079 return 0; 3080 } 3081 3082 static void 3083 ipfw_alt_delete_ruleset_dispatch(struct netmsg *nmsg) 3084 { 3085 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 3086 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3087 struct ip_fw *prev, *rule; 3088 #ifdef INVARIANTS 3089 int del = 0; 3090 #endif 3091 3092 ipfw_flush_rule_ptrs(ctx); 3093 3094 prev = NULL; 3095 rule = ctx->ipfw_layer3_chain; 3096 while (rule != NULL) { 3097 if (rule->set == dmsg->from_set) { 3098 rule = ipfw_delete_rule(ctx, prev, rule); 3099 #ifdef INVARIANTS 3100 del = 1; 3101 #endif 3102 } else { 3103 prev = rule; 3104 rule = rule->next; 3105 } 3106 } 3107 KASSERT(del, ("no match set?!\n")); 3108 3109 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3110 } 3111 3112 static void 3113 ipfw_disable_ruleset_state_dispatch(struct netmsg *nmsg) 3114 { 3115 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 3116 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3117 struct ip_fw *rule; 3118 #ifdef INVARIANTS 3119 int cleared = 0; 3120 #endif 3121 3122 ctx->ipfw_gen++; 3123 3124 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3125 if (rule->set == dmsg->from_set) { 3126 #ifdef INVARIANTS 3127 cleared = 1; 3128 #endif 3129 rule->rule_flags &= ~IPFW_RULE_F_STATE; 3130 } 3131 } 3132 KASSERT(cleared, ("no match set?!\n")); 3133 3134 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3135 } 3136 3137 static int 3138 ipfw_alt_delete_ruleset(uint8_t set) 3139 { 3140 struct netmsg_del dmsg; 3141 struct netmsg *nmsg; 3142 int state, del; 3143 struct ip_fw *rule; 3144 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3145 3146 /* 3147 * Check whether the 'set' exists. If it exists, 3148 * then check whether any rules within the set will 3149 * try to create states. 3150 */ 3151 state = 0; 3152 del = 0; 3153 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3154 if (rule->set == set) { 3155 del = 1; 3156 if (rule->rule_flags & IPFW_RULE_F_STATE) { 3157 state = 1; 3158 break; 3159 } 3160 } 3161 } 3162 if (!del) 3163 return 0; /* XXX EINVAL? */ 3164 3165 if (state) { 3166 /* 3167 * Clear the STATE flag, so no more states will be 3168 * created based the rules in this set. 3169 */ 3170 bzero(&dmsg, sizeof(dmsg)); 3171 nmsg = &dmsg.nmsg; 3172 netmsg_init(nmsg, &curthread->td_msgport, 0, 3173 ipfw_disable_ruleset_state_dispatch); 3174 dmsg.from_set = set; 3175 3176 ifnet_domsg(&nmsg->nm_lmsg, 0); 3177 3178 /* 3179 * Nuke all related states 3180 */ 3181 lockmgr(&dyn_lock, LK_EXCLUSIVE); 3182 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3183 if (rule->set != set) 3184 continue; 3185 3186 /* 3187 * Can't check IPFW_RULE_F_STATE here, 3188 * since it has been cleared previously. 3189 * Check 'stub' instead. 3190 */ 3191 if (rule->stub != NULL) { 3192 /* Force removal */ 3193 remove_dyn_rule_locked(rule, NULL); 3194 } 3195 } 3196 lockmgr(&dyn_lock, LK_RELEASE); 3197 } 3198 3199 /* 3200 * Delete this set 3201 */ 3202 bzero(&dmsg, sizeof(dmsg)); 3203 nmsg = &dmsg.nmsg; 3204 netmsg_init(nmsg, &curthread->td_msgport, 0, 3205 ipfw_alt_delete_ruleset_dispatch); 3206 dmsg.from_set = set; 3207 3208 ifnet_domsg(&nmsg->nm_lmsg, 0); 3209 return 0; 3210 } 3211 3212 static void 3213 ipfw_alt_move_rule_dispatch(struct netmsg *nmsg) 3214 { 3215 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 3216 struct ip_fw *rule; 3217 3218 rule = dmsg->start_rule; 3219 KKASSERT(rule->cpuid == mycpuid); 3220 3221 /* 3222 * Move to the position on the next CPU 3223 * before the msg is forwarded. 3224 */ 3225 dmsg->start_rule = rule->sibling; 3226 3227 while (rule && rule->rulenum <= dmsg->rulenum) { 3228 if (rule->rulenum == dmsg->rulenum) 3229 rule->set = dmsg->to_set; 3230 rule = rule->next; 3231 } 3232 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3233 } 3234 3235 static int 3236 ipfw_alt_move_rule(uint16_t rulenum, uint8_t set) 3237 { 3238 struct netmsg_del dmsg; 3239 struct netmsg *nmsg; 3240 struct ip_fw *rule; 3241 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3242 3243 /* 3244 * Locate first rule to move 3245 */ 3246 for (rule = ctx->ipfw_layer3_chain; rule && rule->rulenum <= rulenum; 3247 rule = rule->next) { 3248 if (rule->rulenum == rulenum && rule->set != set) 3249 break; 3250 } 3251 if (rule == NULL || rule->rulenum > rulenum) 3252 return 0; /* XXX error? */ 3253 3254 bzero(&dmsg, sizeof(dmsg)); 3255 nmsg = &dmsg.nmsg; 3256 netmsg_init(nmsg, &curthread->td_msgport, 0, 3257 ipfw_alt_move_rule_dispatch); 3258 dmsg.start_rule = rule; 3259 dmsg.rulenum = rulenum; 3260 dmsg.to_set = set; 3261 3262 ifnet_domsg(&nmsg->nm_lmsg, 0); 3263 KKASSERT(dmsg.start_rule == NULL); 3264 return 0; 3265 } 3266 3267 static void 3268 ipfw_alt_move_ruleset_dispatch(struct netmsg *nmsg) 3269 { 3270 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 3271 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3272 struct ip_fw *rule; 3273 3274 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3275 if (rule->set == dmsg->from_set) 3276 rule->set = dmsg->to_set; 3277 } 3278 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3279 } 3280 3281 static int 3282 ipfw_alt_move_ruleset(uint8_t from_set, uint8_t to_set) 3283 { 3284 struct netmsg_del dmsg; 3285 struct netmsg *nmsg; 3286 3287 bzero(&dmsg, sizeof(dmsg)); 3288 nmsg = &dmsg.nmsg; 3289 netmsg_init(nmsg, &curthread->td_msgport, 0, 3290 ipfw_alt_move_ruleset_dispatch); 3291 dmsg.from_set = from_set; 3292 dmsg.to_set = to_set; 3293 3294 ifnet_domsg(&nmsg->nm_lmsg, 0); 3295 return 0; 3296 } 3297 3298 static void 3299 ipfw_alt_swap_ruleset_dispatch(struct netmsg *nmsg) 3300 { 3301 struct netmsg_del *dmsg = (struct netmsg_del *)nmsg; 3302 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3303 struct ip_fw *rule; 3304 3305 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3306 if (rule->set == dmsg->from_set) 3307 rule->set = dmsg->to_set; 3308 else if (rule->set == dmsg->to_set) 3309 rule->set = dmsg->from_set; 3310 } 3311 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3312 } 3313 3314 static int 3315 ipfw_alt_swap_ruleset(uint8_t set1, uint8_t set2) 3316 { 3317 struct netmsg_del dmsg; 3318 struct netmsg *nmsg; 3319 3320 bzero(&dmsg, sizeof(dmsg)); 3321 nmsg = &dmsg.nmsg; 3322 netmsg_init(nmsg, &curthread->td_msgport, 0, 3323 ipfw_alt_swap_ruleset_dispatch); 3324 dmsg.from_set = set1; 3325 dmsg.to_set = set2; 3326 3327 ifnet_domsg(&nmsg->nm_lmsg, 0); 3328 return 0; 3329 } 3330 3331 /** 3332 * Remove all rules with given number, and also do set manipulation. 3333 * 3334 * The argument is an uint32_t. The low 16 bit are the rule or set number, 3335 * the next 8 bits are the new set, the top 8 bits are the command: 3336 * 3337 * 0 delete rules with given number 3338 * 1 delete rules with given set number 3339 * 2 move rules with given number to new set 3340 * 3 move rules with given set number to new set 3341 * 4 swap sets with given numbers 3342 */ 3343 static int 3344 ipfw_ctl_alter(uint32_t arg) 3345 { 3346 uint16_t rulenum; 3347 uint8_t cmd, new_set; 3348 int error = 0; 3349 3350 rulenum = arg & 0xffff; 3351 cmd = (arg >> 24) & 0xff; 3352 new_set = (arg >> 16) & 0xff; 3353 3354 if (cmd > 4) 3355 return EINVAL; 3356 if (new_set >= IPFW_DEFAULT_SET) 3357 return EINVAL; 3358 if (cmd == 0 || cmd == 2) { 3359 if (rulenum == IPFW_DEFAULT_RULE) 3360 return EINVAL; 3361 } else { 3362 if (rulenum >= IPFW_DEFAULT_SET) 3363 return EINVAL; 3364 } 3365 3366 switch (cmd) { 3367 case 0: /* delete rules with given number */ 3368 error = ipfw_alt_delete_rule(rulenum); 3369 break; 3370 3371 case 1: /* delete all rules with given set number */ 3372 error = ipfw_alt_delete_ruleset(rulenum); 3373 break; 3374 3375 case 2: /* move rules with given number to new set */ 3376 error = ipfw_alt_move_rule(rulenum, new_set); 3377 break; 3378 3379 case 3: /* move rules with given set number to new set */ 3380 error = ipfw_alt_move_ruleset(rulenum, new_set); 3381 break; 3382 3383 case 4: /* swap two sets */ 3384 error = ipfw_alt_swap_ruleset(rulenum, new_set); 3385 break; 3386 } 3387 return error; 3388 } 3389 3390 /* 3391 * Clear counters for a specific rule. 3392 */ 3393 static void 3394 clear_counters(struct ip_fw *rule, int log_only) 3395 { 3396 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); 3397 3398 if (log_only == 0) { 3399 rule->bcnt = rule->pcnt = 0; 3400 rule->timestamp = 0; 3401 } 3402 if (l->o.opcode == O_LOG) 3403 l->log_left = l->max_log; 3404 } 3405 3406 static void 3407 ipfw_zero_entry_dispatch(struct netmsg *nmsg) 3408 { 3409 struct netmsg_zent *zmsg = (struct netmsg_zent *)nmsg; 3410 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3411 struct ip_fw *rule; 3412 3413 if (zmsg->rulenum == 0) { 3414 KKASSERT(zmsg->start_rule == NULL); 3415 3416 ctx->ipfw_norule_counter = 0; 3417 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 3418 clear_counters(rule, zmsg->log_only); 3419 } else { 3420 struct ip_fw *start = zmsg->start_rule; 3421 3422 KKASSERT(start->cpuid == mycpuid); 3423 KKASSERT(start->rulenum == zmsg->rulenum); 3424 3425 /* 3426 * We can have multiple rules with the same number, so we 3427 * need to clear them all. 3428 */ 3429 for (rule = start; rule && rule->rulenum == zmsg->rulenum; 3430 rule = rule->next) 3431 clear_counters(rule, zmsg->log_only); 3432 3433 /* 3434 * Move to the position on the next CPU 3435 * before the msg is forwarded. 3436 */ 3437 zmsg->start_rule = start->sibling; 3438 } 3439 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 3440 } 3441 3442 /** 3443 * Reset some or all counters on firewall rules. 3444 * @arg frwl is null to clear all entries, or contains a specific 3445 * rule number. 3446 * @arg log_only is 1 if we only want to reset logs, zero otherwise. 3447 */ 3448 static int 3449 ipfw_ctl_zero_entry(int rulenum, int log_only) 3450 { 3451 struct netmsg_zent zmsg; 3452 struct netmsg *nmsg; 3453 const char *msg; 3454 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3455 3456 bzero(&zmsg, sizeof(zmsg)); 3457 nmsg = &zmsg.nmsg; 3458 netmsg_init(nmsg, &curthread->td_msgport, 0, ipfw_zero_entry_dispatch); 3459 zmsg.log_only = log_only; 3460 3461 if (rulenum == 0) { 3462 msg = log_only ? "ipfw: All logging counts reset.\n" 3463 : "ipfw: Accounting cleared.\n"; 3464 } else { 3465 struct ip_fw *rule; 3466 3467 /* 3468 * Locate the first rule with 'rulenum' 3469 */ 3470 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) { 3471 if (rule->rulenum == rulenum) 3472 break; 3473 } 3474 if (rule == NULL) /* we did not find any matching rules */ 3475 return (EINVAL); 3476 zmsg.start_rule = rule; 3477 zmsg.rulenum = rulenum; 3478 3479 msg = log_only ? "ipfw: Entry %d logging count reset.\n" 3480 : "ipfw: Entry %d cleared.\n"; 3481 } 3482 ifnet_domsg(&nmsg->nm_lmsg, 0); 3483 KKASSERT(zmsg.start_rule == NULL); 3484 3485 if (fw_verbose) 3486 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); 3487 return (0); 3488 } 3489 3490 /* 3491 * Check validity of the structure before insert. 3492 * Fortunately rules are simple, so this mostly need to check rule sizes. 3493 */ 3494 static int 3495 ipfw_check_ioc_rule(struct ipfw_ioc_rule *rule, int size, uint32_t *rule_flags) 3496 { 3497 int l, cmdlen = 0; 3498 int have_action = 0; 3499 ipfw_insn *cmd; 3500 3501 *rule_flags = 0; 3502 3503 /* Check for valid size */ 3504 if (size < sizeof(*rule)) { 3505 kprintf("ipfw: rule too short\n"); 3506 return EINVAL; 3507 } 3508 l = IOC_RULESIZE(rule); 3509 if (l != size) { 3510 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l); 3511 return EINVAL; 3512 } 3513 3514 /* Check rule number */ 3515 if (rule->rulenum == IPFW_DEFAULT_RULE) { 3516 kprintf("ipfw: invalid rule number\n"); 3517 return EINVAL; 3518 } 3519 3520 /* 3521 * Now go for the individual checks. Very simple ones, basically only 3522 * instruction sizes. 3523 */ 3524 for (l = rule->cmd_len, cmd = rule->cmd; l > 0; 3525 l -= cmdlen, cmd += cmdlen) { 3526 cmdlen = F_LEN(cmd); 3527 if (cmdlen > l) { 3528 kprintf("ipfw: opcode %d size truncated\n", 3529 cmd->opcode); 3530 return EINVAL; 3531 } 3532 3533 DPRINTF("ipfw: opcode %d\n", cmd->opcode); 3534 3535 if (cmd->opcode == O_KEEP_STATE || cmd->opcode == O_LIMIT) { 3536 /* This rule will create states */ 3537 *rule_flags |= IPFW_RULE_F_STATE; 3538 } 3539 3540 switch (cmd->opcode) { 3541 case O_NOP: 3542 case O_PROBE_STATE: 3543 case O_KEEP_STATE: 3544 case O_PROTO: 3545 case O_IP_SRC_ME: 3546 case O_IP_DST_ME: 3547 case O_LAYER2: 3548 case O_IN: 3549 case O_FRAG: 3550 case O_IPOPT: 3551 case O_IPLEN: 3552 case O_IPID: 3553 case O_IPTOS: 3554 case O_IPPRECEDENCE: 3555 case O_IPTTL: 3556 case O_IPVER: 3557 case O_TCPWIN: 3558 case O_TCPFLAGS: 3559 case O_TCPOPTS: 3560 case O_ESTAB: 3561 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 3562 goto bad_size; 3563 break; 3564 3565 case O_UID: 3566 case O_GID: 3567 case O_IP_SRC: 3568 case O_IP_DST: 3569 case O_TCPSEQ: 3570 case O_TCPACK: 3571 case O_PROB: 3572 case O_ICMPTYPE: 3573 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 3574 goto bad_size; 3575 break; 3576 3577 case O_LIMIT: 3578 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) 3579 goto bad_size; 3580 break; 3581 3582 case O_LOG: 3583 if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) 3584 goto bad_size; 3585 3586 ((ipfw_insn_log *)cmd)->log_left = 3587 ((ipfw_insn_log *)cmd)->max_log; 3588 3589 break; 3590 3591 case O_IP_SRC_MASK: 3592 case O_IP_DST_MASK: 3593 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip)) 3594 goto bad_size; 3595 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) { 3596 kprintf("ipfw: opcode %d, useless rule\n", 3597 cmd->opcode); 3598 return EINVAL; 3599 } 3600 break; 3601 3602 case O_IP_SRC_SET: 3603 case O_IP_DST_SET: 3604 if (cmd->arg1 == 0 || cmd->arg1 > 256) { 3605 kprintf("ipfw: invalid set size %d\n", 3606 cmd->arg1); 3607 return EINVAL; 3608 } 3609 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 3610 (cmd->arg1+31)/32 ) 3611 goto bad_size; 3612 break; 3613 3614 case O_MACADDR2: 3615 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) 3616 goto bad_size; 3617 break; 3618 3619 case O_MAC_TYPE: 3620 case O_IP_SRCPORT: 3621 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ 3622 if (cmdlen < 2 || cmdlen > 31) 3623 goto bad_size; 3624 break; 3625 3626 case O_RECV: 3627 case O_XMIT: 3628 case O_VIA: 3629 if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) 3630 goto bad_size; 3631 break; 3632 3633 case O_PIPE: 3634 case O_QUEUE: 3635 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) 3636 goto bad_size; 3637 goto check_action; 3638 3639 case O_FORWARD_IP: 3640 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) { 3641 goto bad_size; 3642 } else { 3643 in_addr_t fwd_addr; 3644 3645 fwd_addr = ((ipfw_insn_sa *)cmd)-> 3646 sa.sin_addr.s_addr; 3647 if (IN_MULTICAST(ntohl(fwd_addr))) { 3648 kprintf("ipfw: try forwarding to " 3649 "multicast address\n"); 3650 return EINVAL; 3651 } 3652 } 3653 goto check_action; 3654 3655 case O_FORWARD_MAC: /* XXX not implemented yet */ 3656 case O_CHECK_STATE: 3657 case O_COUNT: 3658 case O_ACCEPT: 3659 case O_DENY: 3660 case O_REJECT: 3661 case O_SKIPTO: 3662 case O_DIVERT: 3663 case O_TEE: 3664 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 3665 goto bad_size; 3666 check_action: 3667 if (have_action) { 3668 kprintf("ipfw: opcode %d, multiple actions" 3669 " not allowed\n", 3670 cmd->opcode); 3671 return EINVAL; 3672 } 3673 have_action = 1; 3674 if (l != cmdlen) { 3675 kprintf("ipfw: opcode %d, action must be" 3676 " last opcode\n", 3677 cmd->opcode); 3678 return EINVAL; 3679 } 3680 break; 3681 default: 3682 kprintf("ipfw: opcode %d, unknown opcode\n", 3683 cmd->opcode); 3684 return EINVAL; 3685 } 3686 } 3687 if (have_action == 0) { 3688 kprintf("ipfw: missing action\n"); 3689 return EINVAL; 3690 } 3691 return 0; 3692 3693 bad_size: 3694 kprintf("ipfw: opcode %d size %d wrong\n", 3695 cmd->opcode, cmdlen); 3696 return EINVAL; 3697 } 3698 3699 static int 3700 ipfw_ctl_add_rule(struct sockopt *sopt) 3701 { 3702 struct ipfw_ioc_rule *ioc_rule; 3703 size_t size; 3704 uint32_t rule_flags; 3705 int error; 3706 3707 size = sopt->sopt_valsize; 3708 if (size > (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX) || 3709 size < sizeof(*ioc_rule)) { 3710 return EINVAL; 3711 } 3712 if (size != (sizeof(uint32_t) * IPFW_RULE_SIZE_MAX)) { 3713 sopt->sopt_val = krealloc(sopt->sopt_val, sizeof(uint32_t) * 3714 IPFW_RULE_SIZE_MAX, M_TEMP, M_WAITOK); 3715 } 3716 ioc_rule = sopt->sopt_val; 3717 3718 error = ipfw_check_ioc_rule(ioc_rule, size, &rule_flags); 3719 if (error) 3720 return error; 3721 3722 ipfw_add_rule(ioc_rule, rule_flags); 3723 3724 if (sopt->sopt_dir == SOPT_GET) 3725 sopt->sopt_valsize = IOC_RULESIZE(ioc_rule); 3726 return 0; 3727 } 3728 3729 static void * 3730 ipfw_copy_rule(const struct ip_fw *rule, struct ipfw_ioc_rule *ioc_rule) 3731 { 3732 const struct ip_fw *sibling; 3733 #ifdef INVARIANTS 3734 int i; 3735 #endif 3736 3737 KKASSERT(rule->cpuid == IPFW_CFGCPUID); 3738 3739 ioc_rule->act_ofs = rule->act_ofs; 3740 ioc_rule->cmd_len = rule->cmd_len; 3741 ioc_rule->rulenum = rule->rulenum; 3742 ioc_rule->set = rule->set; 3743 ioc_rule->usr_flags = rule->usr_flags; 3744 3745 ioc_rule->set_disable = ipfw_ctx[mycpuid]->ipfw_set_disable; 3746 ioc_rule->static_count = static_count; 3747 ioc_rule->static_len = static_ioc_len; 3748 3749 /* 3750 * Visit (read-only) all of the rule's duplications to get 3751 * the necessary statistics 3752 */ 3753 #ifdef INVARIANTS 3754 i = 0; 3755 #endif 3756 ioc_rule->pcnt = 0; 3757 ioc_rule->bcnt = 0; 3758 ioc_rule->timestamp = 0; 3759 for (sibling = rule; sibling != NULL; sibling = sibling->sibling) { 3760 ioc_rule->pcnt += sibling->pcnt; 3761 ioc_rule->bcnt += sibling->bcnt; 3762 if (sibling->timestamp > ioc_rule->timestamp) 3763 ioc_rule->timestamp = sibling->timestamp; 3764 #ifdef INVARIANTS 3765 ++i; 3766 #endif 3767 } 3768 KASSERT(i == ncpus, ("static rule is not duplicated on every cpu\n")); 3769 3770 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */); 3771 3772 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule)); 3773 } 3774 3775 static void 3776 ipfw_copy_state(const ipfw_dyn_rule *dyn_rule, 3777 struct ipfw_ioc_state *ioc_state) 3778 { 3779 const struct ipfw_flow_id *id; 3780 struct ipfw_ioc_flowid *ioc_id; 3781 3782 ioc_state->expire = TIME_LEQ(dyn_rule->expire, time_second) ? 3783 0 : dyn_rule->expire - time_second; 3784 ioc_state->pcnt = dyn_rule->pcnt; 3785 ioc_state->bcnt = dyn_rule->bcnt; 3786 3787 ioc_state->dyn_type = dyn_rule->dyn_type; 3788 ioc_state->count = dyn_rule->count; 3789 3790 ioc_state->rulenum = dyn_rule->stub->rule[mycpuid]->rulenum; 3791 3792 id = &dyn_rule->id; 3793 ioc_id = &ioc_state->id; 3794 3795 ioc_id->type = ETHERTYPE_IP; 3796 ioc_id->u.ip.dst_ip = id->dst_ip; 3797 ioc_id->u.ip.src_ip = id->src_ip; 3798 ioc_id->u.ip.dst_port = id->dst_port; 3799 ioc_id->u.ip.src_port = id->src_port; 3800 ioc_id->u.ip.proto = id->proto; 3801 } 3802 3803 static int 3804 ipfw_ctl_get_rules(struct sockopt *sopt) 3805 { 3806 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3807 struct ip_fw *rule; 3808 void *bp; 3809 size_t size; 3810 uint32_t dcount = 0; 3811 3812 /* 3813 * pass up a copy of the current rules. Static rules 3814 * come first (the last of which has number IPFW_DEFAULT_RULE), 3815 * followed by a possibly empty list of dynamic rule. 3816 */ 3817 3818 size = static_ioc_len; /* size of static rules */ 3819 if (ipfw_dyn_v) { /* add size of dyn.rules */ 3820 dcount = dyn_count; 3821 size += dcount * sizeof(struct ipfw_ioc_state); 3822 } 3823 3824 if (sopt->sopt_valsize < size) { 3825 /* short length, no need to return incomplete rules */ 3826 /* XXX: if superuser, no need to zero buffer */ 3827 bzero(sopt->sopt_val, sopt->sopt_valsize); 3828 return 0; 3829 } 3830 bp = sopt->sopt_val; 3831 3832 for (rule = ctx->ipfw_layer3_chain; rule; rule = rule->next) 3833 bp = ipfw_copy_rule(rule, bp); 3834 3835 if (ipfw_dyn_v && dcount != 0) { 3836 struct ipfw_ioc_state *ioc_state = bp; 3837 uint32_t dcount2 = 0; 3838 #ifdef INVARIANTS 3839 size_t old_size = size; 3840 #endif 3841 int i; 3842 3843 lockmgr(&dyn_lock, LK_SHARED); 3844 3845 /* Check 'ipfw_dyn_v' again with lock held */ 3846 if (ipfw_dyn_v == NULL) 3847 goto skip; 3848 3849 for (i = 0; i < curr_dyn_buckets; i++) { 3850 ipfw_dyn_rule *p; 3851 3852 /* 3853 * The # of dynamic rules may have grown after the 3854 * snapshot of 'dyn_count' was taken, so we will have 3855 * to check 'dcount' (snapshot of dyn_count) here to 3856 * make sure that we don't overflow the pre-allocated 3857 * buffer. 3858 */ 3859 for (p = ipfw_dyn_v[i]; p != NULL && dcount != 0; 3860 p = p->next, ioc_state++, dcount--, dcount2++) 3861 ipfw_copy_state(p, ioc_state); 3862 } 3863 skip: 3864 lockmgr(&dyn_lock, LK_RELEASE); 3865 3866 /* 3867 * The # of dynamic rules may be shrinked after the 3868 * snapshot of 'dyn_count' was taken. To give user a 3869 * correct dynamic rule count, we use the 'dcount2' 3870 * calculated above (with shared lockmgr lock held). 3871 */ 3872 size = static_ioc_len + 3873 (dcount2 * sizeof(struct ipfw_ioc_state)); 3874 KKASSERT(size <= old_size); 3875 } 3876 3877 sopt->sopt_valsize = size; 3878 return 0; 3879 } 3880 3881 static void 3882 ipfw_set_disable_dispatch(struct netmsg *nmsg) 3883 { 3884 struct lwkt_msg *lmsg = &nmsg->nm_lmsg; 3885 struct ipfw_context *ctx = ipfw_ctx[mycpuid]; 3886 3887 ctx->ipfw_gen++; 3888 ctx->ipfw_set_disable = lmsg->u.ms_result32; 3889 3890 ifnet_forwardmsg(lmsg, mycpuid + 1); 3891 } 3892 3893 static void 3894 ipfw_ctl_set_disable(uint32_t disable, uint32_t enable) 3895 { 3896 struct netmsg nmsg; 3897 struct lwkt_msg *lmsg; 3898 uint32_t set_disable; 3899 3900 /* IPFW_DEFAULT_SET is always enabled */ 3901 enable |= (1 << IPFW_DEFAULT_SET); 3902 set_disable = (ipfw_ctx[mycpuid]->ipfw_set_disable | disable) & ~enable; 3903 3904 bzero(&nmsg, sizeof(nmsg)); 3905 netmsg_init(&nmsg, &curthread->td_msgport, 0, ipfw_set_disable_dispatch); 3906 lmsg = &nmsg.nm_lmsg; 3907 lmsg->u.ms_result32 = set_disable; 3908 3909 ifnet_domsg(lmsg, 0); 3910 } 3911 3912 /** 3913 * {set|get}sockopt parser. 3914 */ 3915 static int 3916 ipfw_ctl(struct sockopt *sopt) 3917 { 3918 int error, rulenum; 3919 uint32_t *masks; 3920 size_t size; 3921 3922 error = 0; 3923 3924 switch (sopt->sopt_name) { 3925 case IP_FW_GET: 3926 error = ipfw_ctl_get_rules(sopt); 3927 break; 3928 3929 case IP_FW_FLUSH: 3930 ipfw_flush(0 /* keep default rule */); 3931 break; 3932 3933 case IP_FW_ADD: 3934 error = ipfw_ctl_add_rule(sopt); 3935 break; 3936 3937 case IP_FW_DEL: 3938 /* 3939 * IP_FW_DEL is used for deleting single rules or sets, 3940 * and (ab)used to atomically manipulate sets. 3941 * Argument size is used to distinguish between the two: 3942 * sizeof(uint32_t) 3943 * delete single rule or set of rules, 3944 * or reassign rules (or sets) to a different set. 3945 * 2 * sizeof(uint32_t) 3946 * atomic disable/enable sets. 3947 * first uint32_t contains sets to be disabled, 3948 * second uint32_t contains sets to be enabled. 3949 */ 3950 masks = sopt->sopt_val; 3951 size = sopt->sopt_valsize; 3952 if (size == sizeof(*masks)) { 3953 /* 3954 * Delete or reassign static rule 3955 */ 3956 error = ipfw_ctl_alter(masks[0]); 3957 } else if (size == (2 * sizeof(*masks))) { 3958 /* 3959 * Set enable/disable 3960 */ 3961 ipfw_ctl_set_disable(masks[0], masks[1]); 3962 } else { 3963 error = EINVAL; 3964 } 3965 break; 3966 3967 case IP_FW_ZERO: 3968 case IP_FW_RESETLOG: /* argument is an int, the rule number */ 3969 rulenum = 0; 3970 3971 if (sopt->sopt_val != 0) { 3972 error = soopt_to_kbuf(sopt, &rulenum, 3973 sizeof(int), sizeof(int)); 3974 if (error) 3975 break; 3976 } 3977 error = ipfw_ctl_zero_entry(rulenum, 3978 sopt->sopt_name == IP_FW_RESETLOG); 3979 break; 3980 3981 default: 3982 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name); 3983 error = EINVAL; 3984 } 3985 return error; 3986 } 3987 3988 /* 3989 * This procedure is only used to handle keepalives. It is invoked 3990 * every dyn_keepalive_period 3991 */ 3992 static void 3993 ipfw_tick_dispatch(struct netmsg *nmsg) 3994 { 3995 time_t keep_alive; 3996 uint32_t gen; 3997 int i; 3998 3999 IPFW_ASSERT_CFGPORT(&curthread->td_msgport); 4000 KKASSERT(IPFW_LOADED); 4001 4002 /* Reply ASAP */ 4003 crit_enter(); 4004 lwkt_replymsg(&nmsg->nm_lmsg, 0); 4005 crit_exit(); 4006 4007 if (ipfw_dyn_v == NULL || dyn_count == 0) 4008 goto done; 4009 4010 keep_alive = time_second; 4011 4012 lockmgr(&dyn_lock, LK_EXCLUSIVE); 4013 again: 4014 if (ipfw_dyn_v == NULL || dyn_count == 0) { 4015 lockmgr(&dyn_lock, LK_RELEASE); 4016 goto done; 4017 } 4018 gen = dyn_buckets_gen; 4019 4020 for (i = 0; i < curr_dyn_buckets; i++) { 4021 ipfw_dyn_rule *q, *prev; 4022 4023 for (prev = NULL, q = ipfw_dyn_v[i]; q != NULL;) { 4024 uint32_t ack_rev, ack_fwd; 4025 struct ipfw_flow_id id; 4026 4027 if (q->dyn_type == O_LIMIT_PARENT) 4028 goto next; 4029 4030 if (TIME_LEQ(q->expire, time_second)) { 4031 /* State expired */ 4032 UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); 4033 continue; 4034 } 4035 4036 /* 4037 * Keep alive processing 4038 */ 4039 4040 if (!dyn_keepalive) 4041 goto next; 4042 if (q->id.proto != IPPROTO_TCP) 4043 goto next; 4044 if ((q->state & BOTH_SYN) != BOTH_SYN) 4045 goto next; 4046 if (TIME_LEQ(time_second + dyn_keepalive_interval, 4047 q->expire)) 4048 goto next; /* too early */ 4049 if (q->keep_alive == keep_alive) 4050 goto next; /* alreay done */ 4051 4052 /* 4053 * Save necessary information, so that they could 4054 * survive after possible blocking in send_pkt() 4055 */ 4056 id = q->id; 4057 ack_rev = q->ack_rev; 4058 ack_fwd = q->ack_fwd; 4059 4060 /* Sending has been started */ 4061 q->keep_alive = keep_alive; 4062 4063 /* Release lock to avoid possible dead lock */ 4064 lockmgr(&dyn_lock, LK_RELEASE); 4065 send_pkt(&id, ack_rev - 1, ack_fwd, TH_SYN); 4066 send_pkt(&id, ack_fwd - 1, ack_rev, 0); 4067 lockmgr(&dyn_lock, LK_EXCLUSIVE); 4068 4069 if (gen != dyn_buckets_gen) { 4070 /* 4071 * Dyn bucket array has been changed during 4072 * the above two sending; reiterate. 4073 */ 4074 goto again; 4075 } 4076 next: 4077 prev = q; 4078 q = q->next; 4079 } 4080 } 4081 lockmgr(&dyn_lock, LK_RELEASE); 4082 done: 4083 callout_reset(&ipfw_timeout_h, dyn_keepalive_period * hz, 4084 ipfw_tick, NULL); 4085 } 4086 4087 /* 4088 * This procedure is only used to handle keepalives. It is invoked 4089 * every dyn_keepalive_period 4090 */ 4091 static void 4092 ipfw_tick(void *dummy __unused) 4093 { 4094 struct lwkt_msg *lmsg = &ipfw_timeout_netmsg.nm_lmsg; 4095 4096 KKASSERT(mycpuid == IPFW_CFGCPUID); 4097 4098 crit_enter(); 4099 4100 KKASSERT(lmsg->ms_flags & MSGF_DONE); 4101 if (IPFW_LOADED) { 4102 lwkt_sendmsg(IPFW_CFGPORT, lmsg); 4103 /* ipfw_timeout_netmsg's handler reset this callout */ 4104 } 4105 4106 crit_exit(); 4107 } 4108 4109 static int 4110 ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 4111 { 4112 struct ip_fw_args args; 4113 struct mbuf *m = *m0; 4114 struct m_tag *mtag; 4115 int tee = 0, error = 0, ret; 4116 4117 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 4118 /* Extract info from dummynet tag */ 4119 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 4120 KKASSERT(mtag != NULL); 4121 args.rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 4122 KKASSERT(args.rule != NULL); 4123 4124 m_tag_delete(m, mtag); 4125 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 4126 } else { 4127 args.rule = NULL; 4128 } 4129 4130 args.eh = NULL; 4131 args.oif = NULL; 4132 args.m = m; 4133 ret = ipfw_chk(&args); 4134 m = args.m; 4135 4136 if (m == NULL) { 4137 error = EACCES; 4138 goto back; 4139 } 4140 4141 switch (ret) { 4142 case IP_FW_PASS: 4143 break; 4144 4145 case IP_FW_DENY: 4146 m_freem(m); 4147 m = NULL; 4148 error = EACCES; 4149 break; 4150 4151 case IP_FW_DUMMYNET: 4152 /* Send packet to the appropriate pipe */ 4153 ipfw_dummynet_io(m, args.cookie, DN_TO_IP_IN, &args); 4154 break; 4155 4156 case IP_FW_TEE: 4157 tee = 1; 4158 /* FALL THROUGH */ 4159 4160 case IP_FW_DIVERT: 4161 if (ip_divert_p != NULL) { 4162 m = ip_divert_p(m, tee, 1); 4163 } else { 4164 m_freem(m); 4165 m = NULL; 4166 /* not sure this is the right error msg */ 4167 error = EACCES; 4168 } 4169 break; 4170 4171 default: 4172 panic("unknown ipfw return value: %d\n", ret); 4173 } 4174 back: 4175 *m0 = m; 4176 return error; 4177 } 4178 4179 static int 4180 ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir) 4181 { 4182 struct ip_fw_args args; 4183 struct mbuf *m = *m0; 4184 struct m_tag *mtag; 4185 int tee = 0, error = 0, ret; 4186 4187 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 4188 /* Extract info from dummynet tag */ 4189 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 4190 KKASSERT(mtag != NULL); 4191 args.rule = ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 4192 KKASSERT(args.rule != NULL); 4193 4194 m_tag_delete(m, mtag); 4195 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 4196 } else { 4197 args.rule = NULL; 4198 } 4199 4200 args.eh = NULL; 4201 args.m = m; 4202 args.oif = ifp; 4203 ret = ipfw_chk(&args); 4204 m = args.m; 4205 4206 if (m == NULL) { 4207 error = EACCES; 4208 goto back; 4209 } 4210 4211 switch (ret) { 4212 case IP_FW_PASS: 4213 break; 4214 4215 case IP_FW_DENY: 4216 m_freem(m); 4217 m = NULL; 4218 error = EACCES; 4219 break; 4220 4221 case IP_FW_DUMMYNET: 4222 ipfw_dummynet_io(m, args.cookie, DN_TO_IP_OUT, &args); 4223 break; 4224 4225 case IP_FW_TEE: 4226 tee = 1; 4227 /* FALL THROUGH */ 4228 4229 case IP_FW_DIVERT: 4230 if (ip_divert_p != NULL) { 4231 m = ip_divert_p(m, tee, 0); 4232 } else { 4233 m_freem(m); 4234 m = NULL; 4235 /* not sure this is the right error msg */ 4236 error = EACCES; 4237 } 4238 break; 4239 4240 default: 4241 panic("unknown ipfw return value: %d\n", ret); 4242 } 4243 back: 4244 *m0 = m; 4245 return error; 4246 } 4247 4248 static void 4249 ipfw_hook(void) 4250 { 4251 struct pfil_head *pfh; 4252 4253 IPFW_ASSERT_CFGPORT(&curthread->td_msgport); 4254 4255 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 4256 if (pfh == NULL) 4257 return; 4258 4259 pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_MPSAFE, pfh); 4260 pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_MPSAFE, pfh); 4261 } 4262 4263 static void 4264 ipfw_dehook(void) 4265 { 4266 struct pfil_head *pfh; 4267 4268 IPFW_ASSERT_CFGPORT(&curthread->td_msgport); 4269 4270 pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); 4271 if (pfh == NULL) 4272 return; 4273 4274 pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN, pfh); 4275 pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT, pfh); 4276 } 4277 4278 static void 4279 ipfw_sysctl_enable_dispatch(struct netmsg *nmsg) 4280 { 4281 struct lwkt_msg *lmsg = &nmsg->nm_lmsg; 4282 int enable = lmsg->u.ms_result; 4283 4284 if (fw_enable == enable) 4285 goto reply; 4286 4287 fw_enable = enable; 4288 if (fw_enable) 4289 ipfw_hook(); 4290 else 4291 ipfw_dehook(); 4292 reply: 4293 lwkt_replymsg(lmsg, 0); 4294 } 4295 4296 static int 4297 ipfw_sysctl_enable(SYSCTL_HANDLER_ARGS) 4298 { 4299 struct netmsg nmsg; 4300 struct lwkt_msg *lmsg; 4301 int enable, error; 4302 4303 enable = fw_enable; 4304 error = sysctl_handle_int(oidp, &enable, 0, req); 4305 if (error || req->newptr == NULL) 4306 return error; 4307 4308 netmsg_init(&nmsg, &curthread->td_msgport, 0, 4309 ipfw_sysctl_enable_dispatch); 4310 lmsg = &nmsg.nm_lmsg; 4311 lmsg->u.ms_result = enable; 4312 4313 return lwkt_domsg(IPFW_CFGPORT, lmsg, 0); 4314 } 4315 4316 static int 4317 ipfw_sysctl_autoinc_step(SYSCTL_HANDLER_ARGS) 4318 { 4319 return sysctl_int_range(oidp, arg1, arg2, req, 4320 IPFW_AUTOINC_STEP_MIN, IPFW_AUTOINC_STEP_MAX); 4321 } 4322 4323 static int 4324 ipfw_sysctl_dyn_buckets(SYSCTL_HANDLER_ARGS) 4325 { 4326 int error, value; 4327 4328 lockmgr(&dyn_lock, LK_EXCLUSIVE); 4329 4330 value = dyn_buckets; 4331 error = sysctl_handle_int(oidp, &value, 0, req); 4332 if (error || !req->newptr) 4333 goto back; 4334 4335 /* 4336 * Make sure we have a power of 2 and 4337 * do not allow more than 64k entries. 4338 */ 4339 error = EINVAL; 4340 if (value <= 1 || value > 65536) 4341 goto back; 4342 if ((value & (value - 1)) != 0) 4343 goto back; 4344 4345 error = 0; 4346 dyn_buckets = value; 4347 back: 4348 lockmgr(&dyn_lock, LK_RELEASE); 4349 return error; 4350 } 4351 4352 static int 4353 ipfw_sysctl_dyn_fin(SYSCTL_HANDLER_ARGS) 4354 { 4355 return sysctl_int_range(oidp, arg1, arg2, req, 4356 1, dyn_keepalive_period - 1); 4357 } 4358 4359 static int 4360 ipfw_sysctl_dyn_rst(SYSCTL_HANDLER_ARGS) 4361 { 4362 return sysctl_int_range(oidp, arg1, arg2, req, 4363 1, dyn_keepalive_period - 1); 4364 } 4365 4366 static void 4367 ipfw_ctx_init_dispatch(struct netmsg *nmsg) 4368 { 4369 struct netmsg_ipfw *fwmsg = (struct netmsg_ipfw *)nmsg; 4370 struct ipfw_context *ctx; 4371 struct ip_fw *def_rule; 4372 4373 ctx = kmalloc(sizeof(*ctx), M_IPFW, M_WAITOK | M_ZERO); 4374 ipfw_ctx[mycpuid] = ctx; 4375 4376 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO); 4377 4378 def_rule->act_ofs = 0; 4379 def_rule->rulenum = IPFW_DEFAULT_RULE; 4380 def_rule->cmd_len = 1; 4381 def_rule->set = IPFW_DEFAULT_SET; 4382 4383 def_rule->cmd[0].len = 1; 4384 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 4385 def_rule->cmd[0].opcode = O_ACCEPT; 4386 #else 4387 def_rule->cmd[0].opcode = O_DENY; 4388 #endif 4389 4390 def_rule->refcnt = 1; 4391 def_rule->cpuid = mycpuid; 4392 4393 /* Install the default rule */ 4394 ctx->ipfw_default_rule = def_rule; 4395 ctx->ipfw_layer3_chain = def_rule; 4396 4397 /* Link rule CPU sibling */ 4398 ipfw_link_sibling(fwmsg, def_rule); 4399 4400 /* Statistics only need to be updated once */ 4401 if (mycpuid == 0) 4402 ipfw_inc_static_count(def_rule); 4403 4404 ifnet_forwardmsg(&nmsg->nm_lmsg, mycpuid + 1); 4405 } 4406 4407 static void 4408 ipfw_init_dispatch(struct netmsg *nmsg) 4409 { 4410 struct netmsg_ipfw fwmsg; 4411 int error = 0; 4412 4413 if (IPFW_LOADED) { 4414 kprintf("IP firewall already loaded\n"); 4415 error = EEXIST; 4416 goto reply; 4417 } 4418 4419 bzero(&fwmsg, sizeof(fwmsg)); 4420 netmsg_init(&fwmsg.nmsg, &curthread->td_msgport, 0, 4421 ipfw_ctx_init_dispatch); 4422 ifnet_domsg(&fwmsg.nmsg.nm_lmsg, 0); 4423 4424 ip_fw_chk_ptr = ipfw_chk; 4425 ip_fw_ctl_ptr = ipfw_ctl; 4426 ip_fw_dn_io_ptr = ipfw_dummynet_io; 4427 4428 kprintf("ipfw2 initialized, default to %s, logging ", 4429 ipfw_ctx[mycpuid]->ipfw_default_rule->cmd[0].opcode == 4430 O_ACCEPT ? "accept" : "deny"); 4431 4432 #ifdef IPFIREWALL_VERBOSE 4433 fw_verbose = 1; 4434 #endif 4435 #ifdef IPFIREWALL_VERBOSE_LIMIT 4436 verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 4437 #endif 4438 if (fw_verbose == 0) { 4439 kprintf("disabled\n"); 4440 } else if (verbose_limit == 0) { 4441 kprintf("unlimited\n"); 4442 } else { 4443 kprintf("limited to %d packets/entry by default\n", 4444 verbose_limit); 4445 } 4446 4447 callout_init_mp(&ipfw_timeout_h); 4448 netmsg_init(&ipfw_timeout_netmsg, &netisr_adone_rport, 4449 MSGF_MPSAFE | MSGF_DROPABLE, ipfw_tick_dispatch); 4450 lockinit(&dyn_lock, "ipfw_dyn", 0, 0); 4451 4452 ip_fw_loaded = 1; 4453 callout_reset(&ipfw_timeout_h, hz, ipfw_tick, NULL); 4454 4455 if (fw_enable) 4456 ipfw_hook(); 4457 reply: 4458 lwkt_replymsg(&nmsg->nm_lmsg, error); 4459 } 4460 4461 static int 4462 ipfw_init(void) 4463 { 4464 struct netmsg smsg; 4465 4466 netmsg_init(&smsg, &curthread->td_msgport, 0, ipfw_init_dispatch); 4467 return lwkt_domsg(IPFW_CFGPORT, &smsg.nm_lmsg, 0); 4468 } 4469 4470 #ifdef KLD_MODULE 4471 4472 static void 4473 ipfw_fini_dispatch(struct netmsg *nmsg) 4474 { 4475 int error = 0, cpu; 4476 4477 if (ipfw_refcnt != 0) { 4478 error = EBUSY; 4479 goto reply; 4480 } 4481 4482 ip_fw_loaded = 0; 4483 4484 ipfw_dehook(); 4485 callout_stop(&ipfw_timeout_h); 4486 4487 netmsg_service_sync(); 4488 4489 crit_enter(); 4490 if ((ipfw_timeout_netmsg.nm_lmsg.ms_flags & MSGF_DONE) == 0) { 4491 /* 4492 * Callout message is pending; drop it 4493 */ 4494 lwkt_dropmsg(&ipfw_timeout_netmsg.nm_lmsg); 4495 } 4496 crit_exit(); 4497 4498 ip_fw_chk_ptr = NULL; 4499 ip_fw_ctl_ptr = NULL; 4500 ip_fw_dn_io_ptr = NULL; 4501 ipfw_flush(1 /* kill default rule */); 4502 4503 /* Free pre-cpu context */ 4504 for (cpu = 0; cpu < ncpus; ++cpu) 4505 kfree(ipfw_ctx[cpu], M_IPFW); 4506 4507 kprintf("IP firewall unloaded\n"); 4508 reply: 4509 lwkt_replymsg(&nmsg->nm_lmsg, error); 4510 } 4511 4512 static int 4513 ipfw_fini(void) 4514 { 4515 struct netmsg smsg; 4516 4517 netmsg_init(&smsg, &curthread->td_msgport, 0, ipfw_fini_dispatch); 4518 return lwkt_domsg(IPFW_CFGPORT, &smsg.nm_lmsg, 0); 4519 } 4520 4521 #endif /* KLD_MODULE */ 4522 4523 static int 4524 ipfw_modevent(module_t mod, int type, void *unused) 4525 { 4526 int err = 0; 4527 4528 switch (type) { 4529 case MOD_LOAD: 4530 err = ipfw_init(); 4531 break; 4532 4533 case MOD_UNLOAD: 4534 #ifndef KLD_MODULE 4535 kprintf("ipfw statically compiled, cannot unload\n"); 4536 err = EBUSY; 4537 #else 4538 err = ipfw_fini(); 4539 #endif 4540 break; 4541 default: 4542 break; 4543 } 4544 return err; 4545 } 4546 4547 static moduledata_t ipfwmod = { 4548 "ipfw", 4549 ipfw_modevent, 4550 0 4551 }; 4552 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY); 4553 MODULE_VERSION(ipfw, 1); 4554