1 /* 2 * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.12 2003/04/08 10:42:32 maxim Exp $ 26 * $DragonFly: src/sys/net/ipfw/ip_fw2.c,v 1.41 2008/03/07 11:34:20 sephe Exp $ 27 */ 28 29 #define DEB(x) 30 #define DDB(x) x 31 32 /* 33 * Implement IP packet firewall (new version) 34 */ 35 36 #ifndef KLD_MODULE 37 #include "opt_ipfw.h" 38 #include "opt_ipdn.h" 39 #include "opt_ipdivert.h" 40 #include "opt_inet.h" 41 #ifndef INET 42 #error IPFIREWALL requires INET. 43 #endif /* INET */ 44 #endif 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/kernel.h> 51 #include <sys/proc.h> 52 #include <sys/socket.h> 53 #include <sys/socketvar.h> 54 #include <sys/sysctl.h> 55 #include <sys/syslog.h> 56 #include <sys/thread2.h> 57 #include <sys/ucred.h> 58 #include <sys/in_cksum.h> 59 #include <net/if.h> 60 #include <net/route.h> 61 #include <netinet/in.h> 62 #include <netinet/in_systm.h> 63 #include <netinet/in_var.h> 64 #include <netinet/in_pcb.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip_var.h> 67 #include <netinet/ip_icmp.h> 68 #include "ip_fw.h" 69 #include <net/dummynet/ip_dummynet.h> 70 #include <netinet/tcp.h> 71 #include <netinet/tcp_timer.h> 72 #include <netinet/tcp_var.h> 73 #include <netinet/tcpip.h> 74 #include <netinet/udp.h> 75 #include <netinet/udp_var.h> 76 77 #include <netinet/if_ether.h> /* XXX for ETHERTYPE_IP */ 78 79 /* 80 * set_disable contains one bit per set value (0..31). 81 * If the bit is set, all rules with the corresponding set 82 * are disabled. Set 31 is reserved for the default rule 83 * and CANNOT be disabled. 84 */ 85 static uint32_t set_disable; 86 87 static int fw_verbose; 88 static int verbose_limit; 89 90 #ifdef KLD_MODULE 91 static int ipfw_refcnt; 92 #endif 93 94 static struct callout ipfw_timeout_h; 95 #define IPFW_DEFAULT_RULE 65535 96 97 /* 98 * list of rules for layer 3 99 */ 100 static struct ip_fw *layer3_chain; 101 102 MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); 103 104 static int fw_debug = 1; 105 static int autoinc_step = 100; /* bounded to 1..1000 in ipfw_add_rule() */ 106 107 #ifdef SYSCTL_NODE 108 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); 109 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, enable, CTLFLAG_RW, 110 &fw_enable, 0, "Enable ipfw"); 111 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW, 112 &autoinc_step, 0, "Rule number autincrement step"); 113 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO,one_pass,CTLFLAG_RW, 114 &fw_one_pass, 0, 115 "Only do a single pass through ipfw when using dummynet(4)"); 116 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, CTLFLAG_RW, 117 &fw_debug, 0, "Enable printing of debug ip_fw statements"); 118 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, CTLFLAG_RW, 119 &fw_verbose, 0, "Log matches to ipfw rules"); 120 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, 121 &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); 122 123 /* 124 * Description of dynamic rules. 125 * 126 * Dynamic rules are stored in lists accessed through a hash table 127 * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can 128 * be modified through the sysctl variable dyn_buckets which is 129 * updated when the table becomes empty. 130 * 131 * XXX currently there is only one list, ipfw_dyn. 132 * 133 * When a packet is received, its address fields are first masked 134 * with the mask defined for the rule, then hashed, then matched 135 * against the entries in the corresponding list. 136 * Dynamic rules can be used for different purposes: 137 * + stateful rules; 138 * + enforcing limits on the number of sessions; 139 * + in-kernel NAT (not implemented yet) 140 * 141 * The lifetime of dynamic rules is regulated by dyn_*_lifetime, 142 * measured in seconds and depending on the flags. 143 * 144 * The total number of dynamic rules is stored in dyn_count. 145 * The max number of dynamic rules is dyn_max. When we reach 146 * the maximum number of rules we do not create anymore. This is 147 * done to avoid consuming too much memory, but also too much 148 * time when searching on each packet (ideally, we should try instead 149 * to put a limit on the length of the list on each bucket...). 150 * 151 * Each dynamic rule holds a pointer to the parent ipfw rule so 152 * we know what action to perform. Dynamic rules are removed when 153 * the parent rule is deleted. XXX we should make them survive. 154 * 155 * There are some limitations with dynamic rules -- we do not 156 * obey the 'randomized match', and we do not do multiple 157 * passes through the firewall. XXX check the latter!!! 158 */ 159 static ipfw_dyn_rule **ipfw_dyn_v = NULL; 160 static uint32_t dyn_buckets = 256; /* must be power of 2 */ 161 static uint32_t curr_dyn_buckets = 256; /* must be power of 2 */ 162 163 /* 164 * Timeouts for various events in handing dynamic rules. 165 */ 166 static uint32_t dyn_ack_lifetime = 300; 167 static uint32_t dyn_syn_lifetime = 20; 168 static uint32_t dyn_fin_lifetime = 1; 169 static uint32_t dyn_rst_lifetime = 1; 170 static uint32_t dyn_udp_lifetime = 10; 171 static uint32_t dyn_short_lifetime = 5; 172 173 /* 174 * Keepalives are sent if dyn_keepalive is set. They are sent every 175 * dyn_keepalive_period seconds, in the last dyn_keepalive_interval 176 * seconds of lifetime of a rule. 177 * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower 178 * than dyn_keepalive_period. 179 */ 180 181 static uint32_t dyn_keepalive_interval = 20; 182 static uint32_t dyn_keepalive_period = 5; 183 static uint32_t dyn_keepalive = 1; /* do send keepalives */ 184 185 static uint32_t static_count; /* # of static rules */ 186 static uint32_t static_ioc_len; /* bytes of static rules */ 187 static uint32_t dyn_count; /* # of dynamic rules */ 188 static uint32_t dyn_max = 4096; /* max # of dynamic rules */ 189 190 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW, 191 &dyn_buckets, 0, "Number of dyn. buckets"); 192 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, 193 &curr_dyn_buckets, 0, "Current Number of dyn. buckets"); 194 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, 195 &dyn_count, 0, "Number of dyn. rules"); 196 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, 197 &dyn_max, 0, "Max number of dyn. rules"); 198 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, 199 &static_count, 0, "Number of static rules"); 200 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, 201 &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); 202 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, 203 &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); 204 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, 205 &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); 206 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, 207 &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); 208 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, 209 &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); 210 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, 211 &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); 212 SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, 213 &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); 214 215 #endif /* SYSCTL_NODE */ 216 217 /** 218 * dummynet needs a reference to the default rule, because rules can be 219 * deleted while packets hold a reference to them. When this happens, 220 * dummynet changes the reference to the default rule (it could well be a 221 * NULL pointer, but this way we do not need to check for the special 222 * case, plus here he have info on the default behaviour). 223 */ 224 struct ip_fw *ip_fw_default_rule; 225 226 static ip_fw_chk_t ipfw_chk; 227 228 static __inline int 229 ipfw_free_rule(struct ip_fw *rule) 230 { 231 KASSERT(rule->refcnt > 0, ("invalid refcnt %u\n", rule->refcnt)); 232 atomic_subtract_int(&rule->refcnt, 1); 233 if (atomic_cmpset_int(&rule->refcnt, 0, 1)) { 234 kfree(rule, M_IPFW); 235 return 1; 236 } 237 return 0; 238 } 239 240 static void 241 ipfw_unref_rule(void *priv) 242 { 243 ipfw_free_rule(priv); 244 #ifdef KLD_MODULE 245 atomic_subtract_int(&ipfw_refcnt, 1); 246 #endif 247 } 248 249 static __inline void 250 ipfw_ref_rule(struct ip_fw *rule) 251 { 252 #ifdef KLD_MODULE 253 atomic_add_int(&ipfw_refcnt, 1); 254 #endif 255 atomic_add_int(&rule->refcnt, 1); 256 } 257 258 /* 259 * This macro maps an ip pointer into a layer3 header pointer of type T 260 */ 261 #define L3HDR(T, ip) ((T *)((uint32_t *)(ip) + (ip)->ip_hl)) 262 263 static __inline int 264 icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) 265 { 266 int type = L3HDR(struct icmp,ip)->icmp_type; 267 268 return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) ); 269 } 270 271 #define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \ 272 (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) ) 273 274 static int 275 is_icmp_query(struct ip *ip) 276 { 277 int type = L3HDR(struct icmp, ip)->icmp_type; 278 return (type <= ICMP_MAXTYPE && (TT & (1<<type)) ); 279 } 280 #undef TT 281 282 /* 283 * The following checks use two arrays of 8 or 16 bits to store the 284 * bits that we want set or clear, respectively. They are in the 285 * low and high half of cmd->arg1 or cmd->d[0]. 286 * 287 * We scan options and store the bits we find set. We succeed if 288 * 289 * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear 290 * 291 * The code is sometimes optimized not to store additional variables. 292 */ 293 294 static int 295 flags_match(ipfw_insn *cmd, uint8_t bits) 296 { 297 u_char want_clear; 298 bits = ~bits; 299 300 if ( ((cmd->arg1 & 0xff) & bits) != 0) 301 return 0; /* some bits we want set were clear */ 302 want_clear = (cmd->arg1 >> 8) & 0xff; 303 if ( (want_clear & bits) != want_clear) 304 return 0; /* some bits we want clear were set */ 305 return 1; 306 } 307 308 static int 309 ipopts_match(struct ip *ip, ipfw_insn *cmd) 310 { 311 int optlen, bits = 0; 312 u_char *cp = (u_char *)(ip + 1); 313 int x = (ip->ip_hl << 2) - sizeof (struct ip); 314 315 for (; x > 0; x -= optlen, cp += optlen) { 316 int opt = cp[IPOPT_OPTVAL]; 317 318 if (opt == IPOPT_EOL) 319 break; 320 if (opt == IPOPT_NOP) 321 optlen = 1; 322 else { 323 optlen = cp[IPOPT_OLEN]; 324 if (optlen <= 0 || optlen > x) 325 return 0; /* invalid or truncated */ 326 } 327 switch (opt) { 328 329 default: 330 break; 331 332 case IPOPT_LSRR: 333 bits |= IP_FW_IPOPT_LSRR; 334 break; 335 336 case IPOPT_SSRR: 337 bits |= IP_FW_IPOPT_SSRR; 338 break; 339 340 case IPOPT_RR: 341 bits |= IP_FW_IPOPT_RR; 342 break; 343 344 case IPOPT_TS: 345 bits |= IP_FW_IPOPT_TS; 346 break; 347 } 348 } 349 return (flags_match(cmd, bits)); 350 } 351 352 static int 353 tcpopts_match(struct ip *ip, ipfw_insn *cmd) 354 { 355 int optlen, bits = 0; 356 struct tcphdr *tcp = L3HDR(struct tcphdr,ip); 357 u_char *cp = (u_char *)(tcp + 1); 358 int x = (tcp->th_off << 2) - sizeof(struct tcphdr); 359 360 for (; x > 0; x -= optlen, cp += optlen) { 361 int opt = cp[0]; 362 if (opt == TCPOPT_EOL) 363 break; 364 if (opt == TCPOPT_NOP) 365 optlen = 1; 366 else { 367 optlen = cp[1]; 368 if (optlen <= 0) 369 break; 370 } 371 372 switch (opt) { 373 374 default: 375 break; 376 377 case TCPOPT_MAXSEG: 378 bits |= IP_FW_TCPOPT_MSS; 379 break; 380 381 case TCPOPT_WINDOW: 382 bits |= IP_FW_TCPOPT_WINDOW; 383 break; 384 385 case TCPOPT_SACK_PERMITTED: 386 case TCPOPT_SACK: 387 bits |= IP_FW_TCPOPT_SACK; 388 break; 389 390 case TCPOPT_TIMESTAMP: 391 bits |= IP_FW_TCPOPT_TS; 392 break; 393 394 case TCPOPT_CC: 395 case TCPOPT_CCNEW: 396 case TCPOPT_CCECHO: 397 bits |= IP_FW_TCPOPT_CC; 398 break; 399 } 400 } 401 return (flags_match(cmd, bits)); 402 } 403 404 static int 405 iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) 406 { 407 if (ifp == NULL) /* no iface with this packet, match fails */ 408 return 0; 409 /* Check by name or by IP address */ 410 if (cmd->name[0] != '\0') { /* match by name */ 411 /* Check name */ 412 if (cmd->p.glob) { 413 if (kfnmatch(cmd->name, ifp->if_xname, 0) == 0) 414 return(1); 415 } else { 416 if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) 417 return(1); 418 } 419 } else { 420 struct ifaddr_container *ifac; 421 422 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 423 struct ifaddr *ia = ifac->ifa; 424 425 if (ia->ifa_addr == NULL) 426 continue; 427 if (ia->ifa_addr->sa_family != AF_INET) 428 continue; 429 if (cmd->p.ip.s_addr == ((struct sockaddr_in *) 430 (ia->ifa_addr))->sin_addr.s_addr) 431 return(1); /* match */ 432 } 433 } 434 return(0); /* no match, fail ... */ 435 } 436 437 static uint64_t norule_counter; /* counter for ipfw_log(NULL...) */ 438 439 #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 440 #define SNP(buf) buf, sizeof(buf) 441 442 /* 443 * We enter here when we have a rule with O_LOG. 444 * XXX this function alone takes about 2Kbytes of code! 445 */ 446 static void 447 ipfw_log(struct ip_fw *f, u_int hlen, struct ether_header *eh, 448 struct mbuf *m, struct ifnet *oif) 449 { 450 char *action; 451 int limit_reached = 0; 452 char action2[40], proto[48], fragment[28]; 453 454 fragment[0] = '\0'; 455 proto[0] = '\0'; 456 457 if (f == NULL) { /* bogus pkt */ 458 if (verbose_limit != 0 && norule_counter >= verbose_limit) 459 return; 460 norule_counter++; 461 if (norule_counter == verbose_limit) 462 limit_reached = verbose_limit; 463 action = "Refuse"; 464 } else { /* O_LOG is the first action, find the real one */ 465 ipfw_insn *cmd = ACTION_PTR(f); 466 ipfw_insn_log *l = (ipfw_insn_log *)cmd; 467 468 if (l->max_log != 0 && l->log_left == 0) 469 return; 470 l->log_left--; 471 if (l->log_left == 0) 472 limit_reached = l->max_log; 473 cmd += F_LEN(cmd); /* point to first action */ 474 if (cmd->opcode == O_PROB) 475 cmd += F_LEN(cmd); 476 477 action = action2; 478 switch (cmd->opcode) { 479 case O_DENY: 480 action = "Deny"; 481 break; 482 483 case O_REJECT: 484 if (cmd->arg1==ICMP_REJECT_RST) 485 action = "Reset"; 486 else if (cmd->arg1==ICMP_UNREACH_HOST) 487 action = "Reject"; 488 else 489 ksnprintf(SNPARGS(action2, 0), "Unreach %d", 490 cmd->arg1); 491 break; 492 493 case O_ACCEPT: 494 action = "Accept"; 495 break; 496 case O_COUNT: 497 action = "Count"; 498 break; 499 case O_DIVERT: 500 ksnprintf(SNPARGS(action2, 0), "Divert %d", 501 cmd->arg1); 502 break; 503 case O_TEE: 504 ksnprintf(SNPARGS(action2, 0), "Tee %d", 505 cmd->arg1); 506 break; 507 case O_SKIPTO: 508 ksnprintf(SNPARGS(action2, 0), "SkipTo %d", 509 cmd->arg1); 510 break; 511 case O_PIPE: 512 ksnprintf(SNPARGS(action2, 0), "Pipe %d", 513 cmd->arg1); 514 break; 515 case O_QUEUE: 516 ksnprintf(SNPARGS(action2, 0), "Queue %d", 517 cmd->arg1); 518 break; 519 case O_FORWARD_IP: { 520 ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; 521 int len; 522 523 len = ksnprintf(SNPARGS(action2, 0), "Forward to %s", 524 inet_ntoa(sa->sa.sin_addr)); 525 if (sa->sa.sin_port) 526 ksnprintf(SNPARGS(action2, len), ":%d", 527 sa->sa.sin_port); 528 } 529 break; 530 default: 531 action = "UNKNOWN"; 532 break; 533 } 534 } 535 536 if (hlen == 0) { /* non-ip */ 537 ksnprintf(SNPARGS(proto, 0), "MAC"); 538 } else { 539 struct ip *ip = mtod(m, struct ip *); 540 /* these three are all aliases to the same thing */ 541 struct icmp *const icmp = L3HDR(struct icmp, ip); 542 struct tcphdr *const tcp = (struct tcphdr *)icmp; 543 struct udphdr *const udp = (struct udphdr *)icmp; 544 545 int ip_off, offset, ip_len; 546 547 int len; 548 549 if (eh != NULL) { /* layer 2 packets are as on the wire */ 550 ip_off = ntohs(ip->ip_off); 551 ip_len = ntohs(ip->ip_len); 552 } else { 553 ip_off = ip->ip_off; 554 ip_len = ip->ip_len; 555 } 556 offset = ip_off & IP_OFFMASK; 557 switch (ip->ip_p) { 558 case IPPROTO_TCP: 559 len = ksnprintf(SNPARGS(proto, 0), "TCP %s", 560 inet_ntoa(ip->ip_src)); 561 if (offset == 0) 562 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 563 ntohs(tcp->th_sport), 564 inet_ntoa(ip->ip_dst), 565 ntohs(tcp->th_dport)); 566 else 567 ksnprintf(SNPARGS(proto, len), " %s", 568 inet_ntoa(ip->ip_dst)); 569 break; 570 571 case IPPROTO_UDP: 572 len = ksnprintf(SNPARGS(proto, 0), "UDP %s", 573 inet_ntoa(ip->ip_src)); 574 if (offset == 0) 575 ksnprintf(SNPARGS(proto, len), ":%d %s:%d", 576 ntohs(udp->uh_sport), 577 inet_ntoa(ip->ip_dst), 578 ntohs(udp->uh_dport)); 579 else 580 ksnprintf(SNPARGS(proto, len), " %s", 581 inet_ntoa(ip->ip_dst)); 582 break; 583 584 case IPPROTO_ICMP: 585 if (offset == 0) 586 len = ksnprintf(SNPARGS(proto, 0), 587 "ICMP:%u.%u ", 588 icmp->icmp_type, icmp->icmp_code); 589 else 590 len = ksnprintf(SNPARGS(proto, 0), "ICMP "); 591 len += ksnprintf(SNPARGS(proto, len), "%s", 592 inet_ntoa(ip->ip_src)); 593 ksnprintf(SNPARGS(proto, len), " %s", 594 inet_ntoa(ip->ip_dst)); 595 break; 596 597 default: 598 len = ksnprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, 599 inet_ntoa(ip->ip_src)); 600 ksnprintf(SNPARGS(proto, len), " %s", 601 inet_ntoa(ip->ip_dst)); 602 break; 603 } 604 605 if (ip_off & (IP_MF | IP_OFFMASK)) 606 ksnprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", 607 ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), 608 offset << 3, 609 (ip_off & IP_MF) ? "+" : ""); 610 } 611 if (oif || m->m_pkthdr.rcvif) 612 log(LOG_SECURITY | LOG_INFO, 613 "ipfw: %d %s %s %s via %s%s\n", 614 f ? f->rulenum : -1, 615 action, proto, oif ? "out" : "in", 616 oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, 617 fragment); 618 else 619 log(LOG_SECURITY | LOG_INFO, 620 "ipfw: %d %s %s [no if info]%s\n", 621 f ? f->rulenum : -1, 622 action, proto, fragment); 623 if (limit_reached) 624 log(LOG_SECURITY | LOG_NOTICE, 625 "ipfw: limit %d reached on entry %d\n", 626 limit_reached, f ? f->rulenum : -1); 627 } 628 629 /* 630 * IMPORTANT: the hash function for dynamic rules must be commutative 631 * in source and destination (ip,port), because rules are bidirectional 632 * and we want to find both in the same bucket. 633 */ 634 static __inline int 635 hash_packet(struct ipfw_flow_id *id) 636 { 637 uint32_t i; 638 639 i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); 640 i &= (curr_dyn_buckets - 1); 641 return i; 642 } 643 644 /** 645 * unlink a dynamic rule from a chain. prev is a pointer to 646 * the previous one, q is a pointer to the rule to delete, 647 * head is a pointer to the head of the queue. 648 * Modifies q and potentially also head. 649 */ 650 #define UNLINK_DYN_RULE(prev, head, q) { \ 651 ipfw_dyn_rule *old_q = q; \ 652 \ 653 /* remove a refcount to the parent */ \ 654 if (q->dyn_type == O_LIMIT) \ 655 q->parent->count--; \ 656 DEB(kprintf("-- unlink entry 0x%08x %d -> 0x%08x %d, %d left\n", \ 657 (q->id.src_ip), (q->id.src_port), \ 658 (q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); ) \ 659 if (prev != NULL) \ 660 prev->next = q = q->next; \ 661 else \ 662 head = q = q->next; \ 663 KASSERT(dyn_count > 0, ("invalid dyn count %u\n", dyn_count)); \ 664 dyn_count--; \ 665 kfree(old_q, M_IPFW); } 666 667 #define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) 668 669 /** 670 * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. 671 * 672 * If keep_me == NULL, rules are deleted even if not expired, 673 * otherwise only expired rules are removed. 674 * 675 * The value of the second parameter is also used to point to identify 676 * a rule we absolutely do not want to remove (e.g. because we are 677 * holding a reference to it -- this is the case with O_LIMIT_PARENT 678 * rules). The pointer is only used for comparison, so any non-null 679 * value will do. 680 */ 681 static void 682 remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) 683 { 684 static uint32_t last_remove = 0; 685 686 #define FORCE (keep_me == NULL) 687 688 ipfw_dyn_rule *prev, *q; 689 int i, pass = 0, max_pass = 0; 690 691 if (ipfw_dyn_v == NULL || dyn_count == 0) 692 return; 693 /* do not expire more than once per second, it is useless */ 694 if (!FORCE && last_remove == time_second) 695 return; 696 last_remove = time_second; 697 698 /* 699 * because O_LIMIT refer to parent rules, during the first pass only 700 * remove child and mark any pending LIMIT_PARENT, and remove 701 * them in a second pass. 702 */ 703 next_pass: 704 for (i = 0 ; i < curr_dyn_buckets ; i++) { 705 for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) { 706 /* 707 * Logic can become complex here, so we split tests. 708 */ 709 if (q == keep_me) 710 goto next; 711 if (rule != NULL && rule != q->rule) 712 goto next; /* not the one we are looking for */ 713 if (q->dyn_type == O_LIMIT_PARENT) { 714 /* 715 * handle parent in the second pass, 716 * record we need one. 717 */ 718 max_pass = 1; 719 if (pass == 0) 720 goto next; 721 if (FORCE && q->count != 0 ) { 722 /* XXX should not happen! */ 723 kprintf( "OUCH! cannot remove rule," 724 " count %d\n", q->count); 725 } 726 } else { 727 if (!FORCE && 728 !TIME_LEQ( q->expire, time_second )) 729 goto next; 730 } 731 UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); 732 continue; 733 next: 734 prev=q; 735 q=q->next; 736 } 737 } 738 if (pass++ < max_pass) 739 goto next_pass; 740 } 741 742 743 /** 744 * lookup a dynamic rule. 745 */ 746 static ipfw_dyn_rule * 747 lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, 748 struct tcphdr *tcp) 749 { 750 /* 751 * stateful ipfw extensions. 752 * Lookup into dynamic session queue 753 */ 754 #define MATCH_REVERSE 0 755 #define MATCH_FORWARD 1 756 #define MATCH_NONE 2 757 #define MATCH_UNKNOWN 3 758 int i, dir = MATCH_NONE; 759 ipfw_dyn_rule *prev, *q=NULL; 760 761 if (ipfw_dyn_v == NULL) 762 goto done; /* not found */ 763 i = hash_packet( pkt ); 764 for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) { 765 if (q->dyn_type == O_LIMIT_PARENT) 766 goto next; 767 if (TIME_LEQ( q->expire, time_second)) { /* expire entry */ 768 UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); 769 continue; 770 } 771 if ( pkt->proto == q->id.proto) { 772 if (pkt->src_ip == q->id.src_ip && 773 pkt->dst_ip == q->id.dst_ip && 774 pkt->src_port == q->id.src_port && 775 pkt->dst_port == q->id.dst_port ) { 776 dir = MATCH_FORWARD; 777 break; 778 } 779 if (pkt->src_ip == q->id.dst_ip && 780 pkt->dst_ip == q->id.src_ip && 781 pkt->src_port == q->id.dst_port && 782 pkt->dst_port == q->id.src_port ) { 783 dir = MATCH_REVERSE; 784 break; 785 } 786 } 787 next: 788 prev = q; 789 q = q->next; 790 } 791 if (q == NULL) 792 goto done; /* q = NULL, not found */ 793 794 if ( prev != NULL) { /* found and not in front */ 795 prev->next = q->next; 796 q->next = ipfw_dyn_v[i]; 797 ipfw_dyn_v[i] = q; 798 } 799 if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ 800 u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); 801 802 #define BOTH_SYN (TH_SYN | (TH_SYN << 8)) 803 #define BOTH_FIN (TH_FIN | (TH_FIN << 8)) 804 q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); 805 switch (q->state) { 806 case TH_SYN: /* opening */ 807 q->expire = time_second + dyn_syn_lifetime; 808 break; 809 810 case BOTH_SYN: /* move to established */ 811 case BOTH_SYN | TH_FIN : /* one side tries to close */ 812 case BOTH_SYN | (TH_FIN << 8) : 813 if (tcp) { 814 #define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) 815 uint32_t ack = ntohl(tcp->th_ack); 816 if (dir == MATCH_FORWARD) { 817 if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) 818 q->ack_fwd = ack; 819 else { /* ignore out-of-sequence */ 820 break; 821 } 822 } else { 823 if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) 824 q->ack_rev = ack; 825 else { /* ignore out-of-sequence */ 826 break; 827 } 828 } 829 } 830 q->expire = time_second + dyn_ack_lifetime; 831 break; 832 833 case BOTH_SYN | BOTH_FIN: /* both sides closed */ 834 if (dyn_fin_lifetime >= dyn_keepalive_period) 835 dyn_fin_lifetime = dyn_keepalive_period - 1; 836 q->expire = time_second + dyn_fin_lifetime; 837 break; 838 839 default: 840 #if 0 841 /* 842 * reset or some invalid combination, but can also 843 * occur if we use keep-state the wrong way. 844 */ 845 if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) 846 kprintf("invalid state: 0x%x\n", q->state); 847 #endif 848 if (dyn_rst_lifetime >= dyn_keepalive_period) 849 dyn_rst_lifetime = dyn_keepalive_period - 1; 850 q->expire = time_second + dyn_rst_lifetime; 851 break; 852 } 853 } else if (pkt->proto == IPPROTO_UDP) { 854 q->expire = time_second + dyn_udp_lifetime; 855 } else { 856 /* other protocols */ 857 q->expire = time_second + dyn_short_lifetime; 858 } 859 done: 860 if (match_direction) 861 *match_direction = dir; 862 return q; 863 } 864 865 static void 866 realloc_dynamic_table(void) 867 { 868 /* 869 * Try reallocation, make sure we have a power of 2 and do 870 * not allow more than 64k entries. In case of overflow, 871 * default to 1024. 872 */ 873 874 if (dyn_buckets > 65536) 875 dyn_buckets = 1024; 876 if ((dyn_buckets & (dyn_buckets-1)) != 0) { /* not a power of 2 */ 877 dyn_buckets = curr_dyn_buckets; /* reset */ 878 return; 879 } 880 curr_dyn_buckets = dyn_buckets; 881 if (ipfw_dyn_v != NULL) 882 kfree(ipfw_dyn_v, M_IPFW); 883 for (;;) { 884 ipfw_dyn_v = kmalloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *), 885 M_IPFW, M_INTWAIT | M_NULLOK | M_ZERO); 886 if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2) 887 break; 888 curr_dyn_buckets /= 2; 889 } 890 } 891 892 /** 893 * Install state of type 'type' for a dynamic session. 894 * The hash table contains two type of rules: 895 * - regular rules (O_KEEP_STATE) 896 * - rules for sessions with limited number of sess per user 897 * (O_LIMIT). When they are created, the parent is 898 * increased by 1, and decreased on delete. In this case, 899 * the third parameter is the parent rule and not the chain. 900 * - "parent" rules for the above (O_LIMIT_PARENT). 901 */ 902 static ipfw_dyn_rule * 903 add_dyn_rule(struct ipfw_flow_id *id, uint8_t dyn_type, struct ip_fw *rule) 904 { 905 ipfw_dyn_rule *r; 906 int i; 907 908 if (ipfw_dyn_v == NULL || 909 (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) { 910 realloc_dynamic_table(); 911 if (ipfw_dyn_v == NULL) 912 return NULL; /* failed ! */ 913 } 914 i = hash_packet(id); 915 916 r = kmalloc(sizeof *r, M_IPFW, M_INTWAIT | M_NULLOK | M_ZERO); 917 if (r == NULL) { 918 kprintf ("sorry cannot allocate state\n"); 919 return NULL; 920 } 921 922 /* increase refcount on parent, and set pointer */ 923 if (dyn_type == O_LIMIT) { 924 ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; 925 if ( parent->dyn_type != O_LIMIT_PARENT) 926 panic("invalid parent"); 927 parent->count++; 928 r->parent = parent; 929 rule = parent->rule; 930 } 931 932 r->id = *id; 933 r->expire = time_second + dyn_syn_lifetime; 934 r->rule = rule; 935 r->dyn_type = dyn_type; 936 r->pcnt = r->bcnt = 0; 937 r->count = 0; 938 939 r->bucket = i; 940 r->next = ipfw_dyn_v[i]; 941 ipfw_dyn_v[i] = r; 942 dyn_count++; 943 DEB(kprintf("-- add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n", 944 dyn_type, 945 (r->id.src_ip), (r->id.src_port), 946 (r->id.dst_ip), (r->id.dst_port), 947 dyn_count ); ) 948 return r; 949 } 950 951 /** 952 * lookup dynamic parent rule using pkt and rule as search keys. 953 * If the lookup fails, then install one. 954 */ 955 static ipfw_dyn_rule * 956 lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) 957 { 958 ipfw_dyn_rule *q; 959 int i; 960 961 if (ipfw_dyn_v) { 962 i = hash_packet( pkt ); 963 for (q = ipfw_dyn_v[i] ; q != NULL ; q=q->next) 964 if (q->dyn_type == O_LIMIT_PARENT && 965 rule== q->rule && 966 pkt->proto == q->id.proto && 967 pkt->src_ip == q->id.src_ip && 968 pkt->dst_ip == q->id.dst_ip && 969 pkt->src_port == q->id.src_port && 970 pkt->dst_port == q->id.dst_port) { 971 q->expire = time_second + dyn_short_lifetime; 972 DEB(kprintf("lookup_dyn_parent found 0x%p\n",q);) 973 return q; 974 } 975 } 976 return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); 977 } 978 979 /** 980 * Install dynamic state for rule type cmd->o.opcode 981 * 982 * Returns 1 (failure) if state is not installed because of errors or because 983 * session limitations are enforced. 984 */ 985 static int 986 install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, 987 struct ip_fw_args *args) 988 { 989 static int last_log; 990 991 ipfw_dyn_rule *q; 992 993 DEB(kprintf("-- install state type %d 0x%08x %u -> 0x%08x %u\n", 994 cmd->o.opcode, 995 (args->f_id.src_ip), (args->f_id.src_port), 996 (args->f_id.dst_ip), (args->f_id.dst_port) );) 997 998 q = lookup_dyn_rule(&args->f_id, NULL, NULL); 999 1000 if (q != NULL) { /* should never occur */ 1001 if (last_log != time_second) { 1002 last_log = time_second; 1003 kprintf(" install_state: entry already present, done\n"); 1004 } 1005 return 0; 1006 } 1007 1008 if (dyn_count >= dyn_max) 1009 /* 1010 * Run out of slots, try to remove any expired rule. 1011 */ 1012 remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); 1013 1014 if (dyn_count >= dyn_max) { 1015 if (last_log != time_second) { 1016 last_log = time_second; 1017 kprintf("install_state: Too many dynamic rules\n"); 1018 } 1019 return 1; /* cannot install, notify caller */ 1020 } 1021 1022 switch (cmd->o.opcode) { 1023 case O_KEEP_STATE: /* bidir rule */ 1024 add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); 1025 break; 1026 1027 case O_LIMIT: /* limit number of sessions */ 1028 { 1029 uint16_t limit_mask = cmd->limit_mask; 1030 struct ipfw_flow_id id; 1031 ipfw_dyn_rule *parent; 1032 1033 DEB(kprintf("installing dyn-limit rule %d\n", cmd->conn_limit);) 1034 1035 id.dst_ip = id.src_ip = 0; 1036 id.dst_port = id.src_port = 0; 1037 id.proto = args->f_id.proto; 1038 1039 if (limit_mask & DYN_SRC_ADDR) 1040 id.src_ip = args->f_id.src_ip; 1041 if (limit_mask & DYN_DST_ADDR) 1042 id.dst_ip = args->f_id.dst_ip; 1043 if (limit_mask & DYN_SRC_PORT) 1044 id.src_port = args->f_id.src_port; 1045 if (limit_mask & DYN_DST_PORT) 1046 id.dst_port = args->f_id.dst_port; 1047 parent = lookup_dyn_parent(&id, rule); 1048 if (parent == NULL) { 1049 kprintf("add parent failed\n"); 1050 return 1; 1051 } 1052 if (parent->count >= cmd->conn_limit) { 1053 /* 1054 * See if we can remove some expired rule. 1055 */ 1056 remove_dyn_rule(rule, parent); 1057 if (parent->count >= cmd->conn_limit) { 1058 if (fw_verbose && last_log != time_second) { 1059 last_log = time_second; 1060 log(LOG_SECURITY | LOG_DEBUG, 1061 "drop session, too many entries\n"); 1062 } 1063 return 1; 1064 } 1065 } 1066 add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); 1067 } 1068 break; 1069 default: 1070 kprintf("unknown dynamic rule type %u\n", cmd->o.opcode); 1071 return 1; 1072 } 1073 lookup_dyn_rule(&args->f_id, NULL, NULL); /* XXX just set lifetime */ 1074 return 0; 1075 } 1076 1077 /* 1078 * Transmit a TCP packet, containing either a RST or a keepalive. 1079 * When flags & TH_RST, we are sending a RST packet, because of a 1080 * "reset" action matched the packet. 1081 * Otherwise we are sending a keepalive, and flags & TH_ 1082 */ 1083 static void 1084 send_pkt(struct ipfw_flow_id *id, uint32_t seq, uint32_t ack, int flags) 1085 { 1086 struct mbuf *m; 1087 struct ip *ip; 1088 struct tcphdr *tcp; 1089 struct route sro; /* fake route */ 1090 1091 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 1092 if (m == 0) 1093 return; 1094 m->m_pkthdr.rcvif = (struct ifnet *)0; 1095 m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); 1096 m->m_data += max_linkhdr; 1097 1098 ip = mtod(m, struct ip *); 1099 bzero(ip, m->m_len); 1100 tcp = (struct tcphdr *)(ip + 1); /* no IP options */ 1101 ip->ip_p = IPPROTO_TCP; 1102 tcp->th_off = 5; 1103 /* 1104 * Assume we are sending a RST (or a keepalive in the reverse 1105 * direction), swap src and destination addresses and ports. 1106 */ 1107 ip->ip_src.s_addr = htonl(id->dst_ip); 1108 ip->ip_dst.s_addr = htonl(id->src_ip); 1109 tcp->th_sport = htons(id->dst_port); 1110 tcp->th_dport = htons(id->src_port); 1111 if (flags & TH_RST) { /* we are sending a RST */ 1112 if (flags & TH_ACK) { 1113 tcp->th_seq = htonl(ack); 1114 tcp->th_ack = htonl(0); 1115 tcp->th_flags = TH_RST; 1116 } else { 1117 if (flags & TH_SYN) 1118 seq++; 1119 tcp->th_seq = htonl(0); 1120 tcp->th_ack = htonl(seq); 1121 tcp->th_flags = TH_RST | TH_ACK; 1122 } 1123 } else { 1124 /* 1125 * We are sending a keepalive. flags & TH_SYN determines 1126 * the direction, forward if set, reverse if clear. 1127 * NOTE: seq and ack are always assumed to be correct 1128 * as set by the caller. This may be confusing... 1129 */ 1130 if (flags & TH_SYN) { 1131 /* 1132 * we have to rewrite the correct addresses! 1133 */ 1134 ip->ip_dst.s_addr = htonl(id->dst_ip); 1135 ip->ip_src.s_addr = htonl(id->src_ip); 1136 tcp->th_dport = htons(id->dst_port); 1137 tcp->th_sport = htons(id->src_port); 1138 } 1139 tcp->th_seq = htonl(seq); 1140 tcp->th_ack = htonl(ack); 1141 tcp->th_flags = TH_ACK; 1142 } 1143 /* 1144 * set ip_len to the payload size so we can compute 1145 * the tcp checksum on the pseudoheader 1146 * XXX check this, could save a couple of words ? 1147 */ 1148 ip->ip_len = htons(sizeof(struct tcphdr)); 1149 tcp->th_sum = in_cksum(m, m->m_pkthdr.len); 1150 /* 1151 * now fill fields left out earlier 1152 */ 1153 ip->ip_ttl = ip_defttl; 1154 ip->ip_len = m->m_pkthdr.len; 1155 bzero (&sro, sizeof (sro)); 1156 ip_rtaddr(ip->ip_dst, &sro); 1157 m->m_pkthdr.fw_flags |= IPFW_MBUF_GENERATED; 1158 ip_output(m, NULL, &sro, 0, NULL, NULL); 1159 if (sro.ro_rt) 1160 RTFREE(sro.ro_rt); 1161 } 1162 1163 /* 1164 * sends a reject message, consuming the mbuf passed as an argument. 1165 */ 1166 static void 1167 send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) 1168 { 1169 1170 if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ 1171 /* We need the IP header in host order for icmp_error(). */ 1172 if (args->eh != NULL) { 1173 struct ip *ip = mtod(args->m, struct ip *); 1174 ip->ip_len = ntohs(ip->ip_len); 1175 ip->ip_off = ntohs(ip->ip_off); 1176 } 1177 icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); 1178 } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { 1179 struct tcphdr *const tcp = 1180 L3HDR(struct tcphdr, mtod(args->m, struct ip *)); 1181 if ( (tcp->th_flags & TH_RST) == 0) 1182 send_pkt(&(args->f_id), ntohl(tcp->th_seq), 1183 ntohl(tcp->th_ack), 1184 tcp->th_flags | TH_RST); 1185 m_freem(args->m); 1186 } else 1187 m_freem(args->m); 1188 args->m = NULL; 1189 } 1190 1191 /** 1192 * 1193 * Given an ip_fw *, lookup_next_rule will return a pointer 1194 * to the next rule, which can be either the jump 1195 * target (for skipto instructions) or the next one in the list (in 1196 * all other cases including a missing jump target). 1197 * The result is also written in the "next_rule" field of the rule. 1198 * Backward jumps are not allowed, so start looking from the next 1199 * rule... 1200 * 1201 * This never returns NULL -- in case we do not have an exact match, 1202 * the next rule is returned. When the ruleset is changed, 1203 * pointers are flushed so we are always correct. 1204 */ 1205 1206 static struct ip_fw * 1207 lookup_next_rule(struct ip_fw *me) 1208 { 1209 struct ip_fw *rule = NULL; 1210 ipfw_insn *cmd; 1211 1212 /* look for action, in case it is a skipto */ 1213 cmd = ACTION_PTR(me); 1214 if (cmd->opcode == O_LOG) 1215 cmd += F_LEN(cmd); 1216 if ( cmd->opcode == O_SKIPTO ) 1217 for (rule = me->next; rule ; rule = rule->next) 1218 if (rule->rulenum >= cmd->arg1) 1219 break; 1220 if (rule == NULL) /* failure or not a skipto */ 1221 rule = me->next; 1222 me->next_rule = rule; 1223 return rule; 1224 } 1225 1226 /* 1227 * The main check routine for the firewall. 1228 * 1229 * All arguments are in args so we can modify them and return them 1230 * back to the caller. 1231 * 1232 * Parameters: 1233 * 1234 * args->m (in/out) The packet; we set to NULL when/if we nuke it. 1235 * Starts with the IP header. 1236 * args->eh (in) Mac header if present, or NULL for layer3 packet. 1237 * args->oif Outgoing interface, or NULL if packet is incoming. 1238 * The incoming interface is in the mbuf. (in) 1239 * 1240 * args->rule Pointer to the last matching rule (in/out) 1241 * args->next_hop Socket we are forwarding to (out). 1242 * args->f_id Addresses grabbed from the packet (out) 1243 * 1244 * Return value: 1245 * 1246 * IP_FW_PORT_DENY_FLAG the packet must be dropped. 1247 * 0 The packet is to be accepted and routed normally OR 1248 * the packet was denied/rejected and has been dropped; 1249 * in the latter case, *m is equal to NULL upon return. 1250 * port Divert the packet to port, with these caveats: 1251 * 1252 * - If IP_FW_PORT_TEE_FLAG is set, tee the packet instead 1253 * of diverting it (ie, 'ipfw tee'). 1254 * 1255 * - If IP_FW_PORT_DYNT_FLAG is set, interpret the lower 1256 * 16 bits as a dummynet pipe number instead of diverting 1257 */ 1258 1259 static int 1260 ipfw_chk(struct ip_fw_args *args) 1261 { 1262 /* 1263 * Local variables hold state during the processing of a packet. 1264 * 1265 * IMPORTANT NOTE: to speed up the processing of rules, there 1266 * are some assumption on the values of the variables, which 1267 * are documented here. Should you change them, please check 1268 * the implementation of the various instructions to make sure 1269 * that they still work. 1270 * 1271 * args->eh The MAC header. It is non-null for a layer2 1272 * packet, it is NULL for a layer-3 packet. 1273 * 1274 * m | args->m Pointer to the mbuf, as received from the caller. 1275 * It may change if ipfw_chk() does an m_pullup, or if it 1276 * consumes the packet because it calls send_reject(). 1277 * XXX This has to change, so that ipfw_chk() never modifies 1278 * or consumes the buffer. 1279 * ip is simply an alias of the value of m, and it is kept 1280 * in sync with it (the packet is supposed to start with 1281 * the ip header). 1282 */ 1283 struct mbuf *m = args->m; 1284 struct ip *ip = mtod(m, struct ip *); 1285 1286 /* 1287 * oif | args->oif If NULL, ipfw_chk has been called on the 1288 * inbound path (ether_input, ip_input). 1289 * If non-NULL, ipfw_chk has been called on the outbound path 1290 * (ether_output, ip_output). 1291 */ 1292 struct ifnet *oif = args->oif; 1293 1294 struct ip_fw *f = NULL; /* matching rule */ 1295 int retval = 0; 1296 struct m_tag *mtag; 1297 1298 /* 1299 * hlen The length of the IPv4 header. 1300 * hlen >0 means we have an IPv4 packet. 1301 */ 1302 u_int hlen = 0; /* hlen >0 means we have an IP pkt */ 1303 1304 /* 1305 * offset The offset of a fragment. offset != 0 means that 1306 * we have a fragment at this offset of an IPv4 packet. 1307 * offset == 0 means that (if this is an IPv4 packet) 1308 * this is the first or only fragment. 1309 */ 1310 u_short offset = 0; 1311 1312 /* 1313 * Local copies of addresses. They are only valid if we have 1314 * an IP packet. 1315 * 1316 * proto The protocol. Set to 0 for non-ip packets, 1317 * or to the protocol read from the packet otherwise. 1318 * proto != 0 means that we have an IPv4 packet. 1319 * 1320 * src_port, dst_port port numbers, in HOST format. Only 1321 * valid for TCP and UDP packets. 1322 * 1323 * src_ip, dst_ip ip addresses, in NETWORK format. 1324 * Only valid for IPv4 packets. 1325 */ 1326 uint8_t proto; 1327 uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */ 1328 struct in_addr src_ip, dst_ip; /* NOTE: network format */ 1329 uint16_t ip_len=0; 1330 int dyn_dir = MATCH_UNKNOWN; 1331 ipfw_dyn_rule *q = NULL; 1332 1333 if (m->m_pkthdr.fw_flags & IPFW_MBUF_GENERATED) 1334 return 0; /* accept */ 1335 /* 1336 * dyn_dir = MATCH_UNKNOWN when rules unchecked, 1337 * MATCH_NONE when checked and not matched (q = NULL), 1338 * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) 1339 */ 1340 1341 if (args->eh == NULL || /* layer 3 packet */ 1342 ( m->m_pkthdr.len >= sizeof(struct ip) && 1343 ntohs(args->eh->ether_type) == ETHERTYPE_IP)) 1344 hlen = ip->ip_hl << 2; 1345 1346 /* 1347 * Collect parameters into local variables for faster matching. 1348 */ 1349 if (hlen == 0) { /* do not grab addresses for non-ip pkts */ 1350 proto = args->f_id.proto = 0; /* mark f_id invalid */ 1351 goto after_ip_checks; 1352 } 1353 1354 proto = args->f_id.proto = ip->ip_p; 1355 src_ip = ip->ip_src; 1356 dst_ip = ip->ip_dst; 1357 if (args->eh != NULL) { /* layer 2 packets are as on the wire */ 1358 offset = ntohs(ip->ip_off) & IP_OFFMASK; 1359 ip_len = ntohs(ip->ip_len); 1360 } else { 1361 offset = ip->ip_off & IP_OFFMASK; 1362 ip_len = ip->ip_len; 1363 } 1364 1365 #define PULLUP_TO(len) \ 1366 do { \ 1367 if ((m)->m_len < (len)) { \ 1368 args->m = m = m_pullup(m, (len)); \ 1369 if (m == 0) \ 1370 goto pullup_failed; \ 1371 ip = mtod(m, struct ip *); \ 1372 } \ 1373 } while (0) 1374 1375 if (offset == 0) { 1376 switch (proto) { 1377 case IPPROTO_TCP: 1378 { 1379 struct tcphdr *tcp; 1380 1381 PULLUP_TO(hlen + sizeof(struct tcphdr)); 1382 tcp = L3HDR(struct tcphdr, ip); 1383 dst_port = tcp->th_dport; 1384 src_port = tcp->th_sport; 1385 args->f_id.flags = tcp->th_flags; 1386 } 1387 break; 1388 1389 case IPPROTO_UDP: 1390 { 1391 struct udphdr *udp; 1392 1393 PULLUP_TO(hlen + sizeof(struct udphdr)); 1394 udp = L3HDR(struct udphdr, ip); 1395 dst_port = udp->uh_dport; 1396 src_port = udp->uh_sport; 1397 } 1398 break; 1399 1400 case IPPROTO_ICMP: 1401 PULLUP_TO(hlen + 4); /* type, code and checksum. */ 1402 args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; 1403 break; 1404 1405 default: 1406 break; 1407 } 1408 #undef PULLUP_TO 1409 } 1410 1411 args->f_id.src_ip = ntohl(src_ip.s_addr); 1412 args->f_id.dst_ip = ntohl(dst_ip.s_addr); 1413 args->f_id.src_port = src_port = ntohs(src_port); 1414 args->f_id.dst_port = dst_port = ntohs(dst_port); 1415 1416 after_ip_checks: 1417 if (args->rule) { 1418 /* 1419 * Packet has already been tagged. Look for the next rule 1420 * to restart processing. 1421 * 1422 * If fw_one_pass != 0 then just accept it. 1423 * XXX should not happen here, but optimized out in 1424 * the caller. 1425 */ 1426 if (fw_one_pass) 1427 return 0; 1428 1429 /* This rule was deleted */ 1430 if (args->rule->rule_flags & IPFW_RULE_F_INVALID) 1431 return IP_FW_PORT_DENY_FLAG; 1432 1433 f = args->rule->next_rule; 1434 if (f == NULL) 1435 f = lookup_next_rule(args->rule); 1436 } else { 1437 /* 1438 * Find the starting rule. It can be either the first 1439 * one, or the one after divert_rule if asked so. 1440 */ 1441 int skipto; 1442 1443 mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL); 1444 if (mtag != NULL) 1445 skipto = *(uint16_t *)m_tag_data(mtag); 1446 else 1447 skipto = 0; 1448 1449 f = layer3_chain; 1450 if (args->eh == NULL && skipto != 0) { 1451 if (skipto >= IPFW_DEFAULT_RULE) 1452 return(IP_FW_PORT_DENY_FLAG); /* invalid */ 1453 while (f && f->rulenum <= skipto) 1454 f = f->next; 1455 if (f == NULL) /* drop packet */ 1456 return(IP_FW_PORT_DENY_FLAG); 1457 } 1458 } 1459 if ((mtag = m_tag_find(m, PACKET_TAG_IPFW_DIVERT, NULL)) != NULL) 1460 m_tag_delete(m, mtag); 1461 1462 /* 1463 * Now scan the rules, and parse microinstructions for each rule. 1464 */ 1465 for (; f; f = f->next) { 1466 int l, cmdlen; 1467 ipfw_insn *cmd; 1468 int skip_or; /* skip rest of OR block */ 1469 1470 again: 1471 if (set_disable & (1 << f->set) ) 1472 continue; 1473 1474 skip_or = 0; 1475 for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; 1476 l -= cmdlen, cmd += cmdlen) { 1477 int match; 1478 1479 /* 1480 * check_body is a jump target used when we find a 1481 * CHECK_STATE, and need to jump to the body of 1482 * the target rule. 1483 */ 1484 1485 check_body: 1486 cmdlen = F_LEN(cmd); 1487 /* 1488 * An OR block (insn_1 || .. || insn_n) has the 1489 * F_OR bit set in all but the last instruction. 1490 * The first match will set "skip_or", and cause 1491 * the following instructions to be skipped until 1492 * past the one with the F_OR bit clear. 1493 */ 1494 if (skip_or) { /* skip this instruction */ 1495 if ((cmd->len & F_OR) == 0) 1496 skip_or = 0; /* next one is good */ 1497 continue; 1498 } 1499 match = 0; /* set to 1 if we succeed */ 1500 1501 switch (cmd->opcode) { 1502 /* 1503 * The first set of opcodes compares the packet's 1504 * fields with some pattern, setting 'match' if a 1505 * match is found. At the end of the loop there is 1506 * logic to deal with F_NOT and F_OR flags associated 1507 * with the opcode. 1508 */ 1509 case O_NOP: 1510 match = 1; 1511 break; 1512 1513 case O_FORWARD_MAC: 1514 kprintf("ipfw: opcode %d unimplemented\n", 1515 cmd->opcode); 1516 break; 1517 1518 case O_GID: 1519 case O_UID: 1520 /* 1521 * We only check offset == 0 && proto != 0, 1522 * as this ensures that we have an IPv4 1523 * packet with the ports info. 1524 */ 1525 if (offset!=0) 1526 break; 1527 { 1528 struct inpcbinfo *pi; 1529 int wildcard; 1530 struct inpcb *pcb; 1531 1532 if (proto == IPPROTO_TCP) { 1533 wildcard = 0; 1534 pi = &tcbinfo[mycpu->gd_cpuid]; 1535 } else if (proto == IPPROTO_UDP) { 1536 wildcard = 1; 1537 pi = &udbinfo; 1538 } else 1539 break; 1540 1541 pcb = (oif) ? 1542 in_pcblookup_hash(pi, 1543 dst_ip, htons(dst_port), 1544 src_ip, htons(src_port), 1545 wildcard, oif) : 1546 in_pcblookup_hash(pi, 1547 src_ip, htons(src_port), 1548 dst_ip, htons(dst_port), 1549 wildcard, NULL); 1550 1551 if (pcb == NULL || pcb->inp_socket == NULL) 1552 break; 1553 #if defined(__DragonFly__) || (defined(__FreeBSD__) && __FreeBSD_version < 500034) 1554 #define socheckuid(a,b) ((a)->so_cred->cr_uid != (b)) 1555 #endif 1556 if (cmd->opcode == O_UID) { 1557 match = 1558 !socheckuid(pcb->inp_socket, 1559 (uid_t)((ipfw_insn_u32 *)cmd)->d[0]); 1560 } else { 1561 match = groupmember( 1562 (uid_t)((ipfw_insn_u32 *)cmd)->d[0], 1563 pcb->inp_socket->so_cred); 1564 } 1565 } 1566 break; 1567 1568 case O_RECV: 1569 match = iface_match(m->m_pkthdr.rcvif, 1570 (ipfw_insn_if *)cmd); 1571 break; 1572 1573 case O_XMIT: 1574 match = iface_match(oif, (ipfw_insn_if *)cmd); 1575 break; 1576 1577 case O_VIA: 1578 match = iface_match(oif ? oif : 1579 m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); 1580 break; 1581 1582 case O_MACADDR2: 1583 if (args->eh != NULL) { /* have MAC header */ 1584 uint32_t *want = (uint32_t *) 1585 ((ipfw_insn_mac *)cmd)->addr; 1586 uint32_t *mask = (uint32_t *) 1587 ((ipfw_insn_mac *)cmd)->mask; 1588 uint32_t *hdr = (uint32_t *)args->eh; 1589 1590 match = 1591 ( want[0] == (hdr[0] & mask[0]) && 1592 want[1] == (hdr[1] & mask[1]) && 1593 want[2] == (hdr[2] & mask[2]) ); 1594 } 1595 break; 1596 1597 case O_MAC_TYPE: 1598 if (args->eh != NULL) { 1599 uint16_t t = 1600 ntohs(args->eh->ether_type); 1601 uint16_t *p = 1602 ((ipfw_insn_u16 *)cmd)->ports; 1603 int i; 1604 1605 for (i = cmdlen - 1; !match && i>0; 1606 i--, p += 2) 1607 match = (t>=p[0] && t<=p[1]); 1608 } 1609 break; 1610 1611 case O_FRAG: 1612 match = (hlen > 0 && offset != 0); 1613 break; 1614 1615 case O_IN: /* "out" is "not in" */ 1616 match = (oif == NULL); 1617 break; 1618 1619 case O_LAYER2: 1620 match = (args->eh != NULL); 1621 break; 1622 1623 case O_PROTO: 1624 /* 1625 * We do not allow an arg of 0 so the 1626 * check of "proto" only suffices. 1627 */ 1628 match = (proto == cmd->arg1); 1629 break; 1630 1631 case O_IP_SRC: 1632 match = (hlen > 0 && 1633 ((ipfw_insn_ip *)cmd)->addr.s_addr == 1634 src_ip.s_addr); 1635 break; 1636 1637 case O_IP_SRC_MASK: 1638 match = (hlen > 0 && 1639 ((ipfw_insn_ip *)cmd)->addr.s_addr == 1640 (src_ip.s_addr & 1641 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 1642 break; 1643 1644 case O_IP_SRC_ME: 1645 if (hlen > 0) { 1646 struct ifnet *tif; 1647 1648 INADDR_TO_IFP(src_ip, tif); 1649 match = (tif != NULL); 1650 } 1651 break; 1652 1653 case O_IP_DST_SET: 1654 case O_IP_SRC_SET: 1655 if (hlen > 0) { 1656 uint32_t *d = (uint32_t *)(cmd+1); 1657 uint32_t addr = 1658 cmd->opcode == O_IP_DST_SET ? 1659 args->f_id.dst_ip : 1660 args->f_id.src_ip; 1661 1662 if (addr < d[0]) 1663 break; 1664 addr -= d[0]; /* subtract base */ 1665 match = (addr < cmd->arg1) && 1666 ( d[ 1 + (addr>>5)] & 1667 (1<<(addr & 0x1f)) ); 1668 } 1669 break; 1670 1671 case O_IP_DST: 1672 match = (hlen > 0 && 1673 ((ipfw_insn_ip *)cmd)->addr.s_addr == 1674 dst_ip.s_addr); 1675 break; 1676 1677 case O_IP_DST_MASK: 1678 match = (hlen > 0) && 1679 (((ipfw_insn_ip *)cmd)->addr.s_addr == 1680 (dst_ip.s_addr & 1681 ((ipfw_insn_ip *)cmd)->mask.s_addr)); 1682 break; 1683 1684 case O_IP_DST_ME: 1685 if (hlen > 0) { 1686 struct ifnet *tif; 1687 1688 INADDR_TO_IFP(dst_ip, tif); 1689 match = (tif != NULL); 1690 } 1691 break; 1692 1693 case O_IP_SRCPORT: 1694 case O_IP_DSTPORT: 1695 /* 1696 * offset == 0 && proto != 0 is enough 1697 * to guarantee that we have an IPv4 1698 * packet with port info. 1699 */ 1700 if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) 1701 && offset == 0) { 1702 uint16_t x = 1703 (cmd->opcode == O_IP_SRCPORT) ? 1704 src_port : dst_port ; 1705 uint16_t *p = 1706 ((ipfw_insn_u16 *)cmd)->ports; 1707 int i; 1708 1709 for (i = cmdlen - 1; !match && i>0; 1710 i--, p += 2) 1711 match = (x>=p[0] && x<=p[1]); 1712 } 1713 break; 1714 1715 case O_ICMPTYPE: 1716 match = (offset == 0 && proto==IPPROTO_ICMP && 1717 icmptype_match(ip, (ipfw_insn_u32 *)cmd) ); 1718 break; 1719 1720 case O_IPOPT: 1721 match = (hlen > 0 && ipopts_match(ip, cmd) ); 1722 break; 1723 1724 case O_IPVER: 1725 match = (hlen > 0 && cmd->arg1 == ip->ip_v); 1726 break; 1727 1728 case O_IPTTL: 1729 match = (hlen > 0 && cmd->arg1 == ip->ip_ttl); 1730 break; 1731 1732 case O_IPID: 1733 match = (hlen > 0 && 1734 cmd->arg1 == ntohs(ip->ip_id)); 1735 break; 1736 1737 case O_IPLEN: 1738 match = (hlen > 0 && cmd->arg1 == ip_len); 1739 break; 1740 1741 case O_IPPRECEDENCE: 1742 match = (hlen > 0 && 1743 (cmd->arg1 == (ip->ip_tos & 0xe0)) ); 1744 break; 1745 1746 case O_IPTOS: 1747 match = (hlen > 0 && 1748 flags_match(cmd, ip->ip_tos)); 1749 break; 1750 1751 case O_TCPFLAGS: 1752 match = (proto == IPPROTO_TCP && offset == 0 && 1753 flags_match(cmd, 1754 L3HDR(struct tcphdr,ip)->th_flags)); 1755 break; 1756 1757 case O_TCPOPTS: 1758 match = (proto == IPPROTO_TCP && offset == 0 && 1759 tcpopts_match(ip, cmd)); 1760 break; 1761 1762 case O_TCPSEQ: 1763 match = (proto == IPPROTO_TCP && offset == 0 && 1764 ((ipfw_insn_u32 *)cmd)->d[0] == 1765 L3HDR(struct tcphdr,ip)->th_seq); 1766 break; 1767 1768 case O_TCPACK: 1769 match = (proto == IPPROTO_TCP && offset == 0 && 1770 ((ipfw_insn_u32 *)cmd)->d[0] == 1771 L3HDR(struct tcphdr,ip)->th_ack); 1772 break; 1773 1774 case O_TCPWIN: 1775 match = (proto == IPPROTO_TCP && offset == 0 && 1776 cmd->arg1 == 1777 L3HDR(struct tcphdr,ip)->th_win); 1778 break; 1779 1780 case O_ESTAB: 1781 /* reject packets which have SYN only */ 1782 /* XXX should i also check for TH_ACK ? */ 1783 match = (proto == IPPROTO_TCP && offset == 0 && 1784 (L3HDR(struct tcphdr,ip)->th_flags & 1785 (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); 1786 break; 1787 1788 case O_LOG: 1789 if (fw_verbose) 1790 ipfw_log(f, hlen, args->eh, m, oif); 1791 match = 1; 1792 break; 1793 1794 case O_PROB: 1795 match = (krandom() < 1796 ((ipfw_insn_u32 *)cmd)->d[0]); 1797 break; 1798 1799 /* 1800 * The second set of opcodes represents 'actions', 1801 * i.e. the terminal part of a rule once the packet 1802 * matches all previous patterns. 1803 * Typically there is only one action for each rule, 1804 * and the opcode is stored at the end of the rule 1805 * (but there are exceptions -- see below). 1806 * 1807 * In general, here we set retval and terminate the 1808 * outer loop (would be a 'break 3' in some language, 1809 * but we need to do a 'goto done'). 1810 * 1811 * Exceptions: 1812 * O_COUNT and O_SKIPTO actions: 1813 * instead of terminating, we jump to the next rule 1814 * ('goto next_rule', equivalent to a 'break 2'), 1815 * or to the SKIPTO target ('goto again' after 1816 * having set f, cmd and l), respectively. 1817 * 1818 * O_LIMIT and O_KEEP_STATE: these opcodes are 1819 * not real 'actions', and are stored right 1820 * before the 'action' part of the rule. 1821 * These opcodes try to install an entry in the 1822 * state tables; if successful, we continue with 1823 * the next opcode (match=1; break;), otherwise 1824 * the packet * must be dropped 1825 * ('goto done' after setting retval); 1826 * 1827 * O_PROBE_STATE and O_CHECK_STATE: these opcodes 1828 * cause a lookup of the state table, and a jump 1829 * to the 'action' part of the parent rule 1830 * ('goto check_body') if an entry is found, or 1831 * (CHECK_STATE only) a jump to the next rule if 1832 * the entry is not found ('goto next_rule'). 1833 * The result of the lookup is cached to make 1834 * further instances of these opcodes are 1835 * effectively NOPs. 1836 */ 1837 case O_LIMIT: 1838 case O_KEEP_STATE: 1839 if (install_state(f, 1840 (ipfw_insn_limit *)cmd, args)) { 1841 retval = IP_FW_PORT_DENY_FLAG; 1842 goto done; /* error/limit violation */ 1843 } 1844 match = 1; 1845 break; 1846 1847 case O_PROBE_STATE: 1848 case O_CHECK_STATE: 1849 /* 1850 * dynamic rules are checked at the first 1851 * keep-state or check-state occurrence, 1852 * with the result being stored in dyn_dir. 1853 * The compiler introduces a PROBE_STATE 1854 * instruction for us when we have a 1855 * KEEP_STATE (because PROBE_STATE needs 1856 * to be run first). 1857 */ 1858 if (dyn_dir == MATCH_UNKNOWN && 1859 (q = lookup_dyn_rule(&args->f_id, 1860 &dyn_dir, proto == IPPROTO_TCP ? 1861 L3HDR(struct tcphdr, ip) : NULL)) 1862 != NULL) { 1863 /* 1864 * Found dynamic entry, update stats 1865 * and jump to the 'action' part of 1866 * the parent rule. 1867 */ 1868 q->pcnt++; 1869 q->bcnt += ip_len; 1870 f = q->rule; 1871 cmd = ACTION_PTR(f); 1872 l = f->cmd_len - f->act_ofs; 1873 goto check_body; 1874 } 1875 /* 1876 * Dynamic entry not found. If CHECK_STATE, 1877 * skip to next rule, if PROBE_STATE just 1878 * ignore and continue with next opcode. 1879 */ 1880 if (cmd->opcode == O_CHECK_STATE) 1881 goto next_rule; 1882 match = 1; 1883 break; 1884 1885 case O_ACCEPT: 1886 retval = 0; /* accept */ 1887 goto done; 1888 1889 case O_PIPE: 1890 case O_QUEUE: 1891 args->rule = f; /* report matching rule */ 1892 retval = cmd->arg1 | IP_FW_PORT_DYNT_FLAG; 1893 goto done; 1894 1895 case O_DIVERT: 1896 case O_TEE: 1897 if (args->eh) /* not on layer 2 */ 1898 break; 1899 1900 mtag = m_tag_get(PACKET_TAG_IPFW_DIVERT, 1901 sizeof(uint16_t), MB_DONTWAIT); 1902 if (mtag == NULL) { 1903 retval = IP_FW_PORT_DENY_FLAG; 1904 goto done; 1905 } 1906 *(uint16_t *)m_tag_data(mtag) = f->rulenum; 1907 m_tag_prepend(m, mtag); 1908 retval = (cmd->opcode == O_DIVERT) ? 1909 cmd->arg1 : 1910 cmd->arg1 | IP_FW_PORT_TEE_FLAG; 1911 goto done; 1912 1913 case O_COUNT: 1914 case O_SKIPTO: 1915 f->pcnt++; /* update stats */ 1916 f->bcnt += ip_len; 1917 f->timestamp = time_second; 1918 if (cmd->opcode == O_COUNT) 1919 goto next_rule; 1920 /* handle skipto */ 1921 if (f->next_rule == NULL) 1922 lookup_next_rule(f); 1923 f = f->next_rule; 1924 goto again; 1925 1926 case O_REJECT: 1927 /* 1928 * Drop the packet and send a reject notice 1929 * if the packet is not ICMP (or is an ICMP 1930 * query), and it is not multicast/broadcast. 1931 */ 1932 if (hlen > 0 && 1933 (proto != IPPROTO_ICMP || 1934 is_icmp_query(ip)) && 1935 !(m->m_flags & (M_BCAST|M_MCAST)) && 1936 !IN_MULTICAST(ntohl(dst_ip.s_addr))) { 1937 send_reject(args, cmd->arg1, 1938 offset,ip_len); 1939 m = args->m; 1940 } 1941 /* FALLTHROUGH */ 1942 case O_DENY: 1943 retval = IP_FW_PORT_DENY_FLAG; 1944 goto done; 1945 1946 case O_FORWARD_IP: 1947 if (args->eh) /* not valid on layer2 pkts */ 1948 break; 1949 if (!q || dyn_dir == MATCH_FORWARD) 1950 args->next_hop = 1951 &((ipfw_insn_sa *)cmd)->sa; 1952 retval = 0; 1953 goto done; 1954 1955 default: 1956 panic("-- unknown opcode %d\n", cmd->opcode); 1957 } /* end of switch() on opcodes */ 1958 1959 if (cmd->len & F_NOT) 1960 match = !match; 1961 1962 if (match) { 1963 if (cmd->len & F_OR) 1964 skip_or = 1; 1965 } else { 1966 if (!(cmd->len & F_OR)) /* not an OR block, */ 1967 break; /* try next rule */ 1968 } 1969 1970 } /* end of inner for, scan opcodes */ 1971 1972 next_rule:; /* try next rule */ 1973 1974 } /* end of outer for, scan rules */ 1975 kprintf("+++ ipfw: ouch!, skip past end of rules, denying packet\n"); 1976 return(IP_FW_PORT_DENY_FLAG); 1977 1978 done: 1979 /* Update statistics */ 1980 f->pcnt++; 1981 f->bcnt += ip_len; 1982 f->timestamp = time_second; 1983 return retval; 1984 1985 pullup_failed: 1986 if (fw_verbose) 1987 kprintf("pullup failed\n"); 1988 return(IP_FW_PORT_DENY_FLAG); 1989 } 1990 1991 static void 1992 ipfw_dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 1993 { 1994 struct m_tag *mtag; 1995 struct dn_pkt *pkt; 1996 ipfw_insn *cmd; 1997 const struct ipfw_flow_id *id; 1998 struct dn_flow_id *fid; 1999 2000 M_ASSERTPKTHDR(m); 2001 2002 mtag = m_tag_get(PACKET_TAG_DUMMYNET, sizeof(*pkt), MB_DONTWAIT); 2003 if (mtag == NULL) { 2004 m_freem(m); 2005 return; 2006 } 2007 m_tag_prepend(m, mtag); 2008 2009 pkt = m_tag_data(mtag); 2010 bzero(pkt, sizeof(*pkt)); 2011 2012 cmd = fwa->rule->cmd + fwa->rule->act_ofs; 2013 if (cmd->opcode == O_LOG) 2014 cmd += F_LEN(cmd); 2015 KASSERT(cmd->opcode == O_PIPE || cmd->opcode == O_QUEUE, 2016 ("Rule is not PIPE or QUEUE, opcode %d\n", cmd->opcode)); 2017 2018 pkt->dn_m = m; 2019 pkt->dn_flags = (dir & DN_FLAGS_DIR_MASK); 2020 pkt->ifp = fwa->oif; 2021 pkt->cpuid = mycpu->gd_cpuid; 2022 pkt->pipe_nr = pipe_nr; 2023 2024 id = &fwa->f_id; 2025 fid = &pkt->id; 2026 fid->fid_dst_ip = id->dst_ip; 2027 fid->fid_src_ip = id->src_ip; 2028 fid->fid_dst_port = id->dst_port; 2029 fid->fid_src_port = id->src_port; 2030 fid->fid_proto = id->proto; 2031 fid->fid_flags = id->flags; 2032 2033 ipfw_ref_rule(fwa->rule); 2034 pkt->dn_priv = fwa->rule; 2035 pkt->dn_unref_priv = ipfw_unref_rule; 2036 2037 if (cmd->opcode == O_PIPE) 2038 pkt->dn_flags |= DN_FLAGS_IS_PIPE; 2039 2040 if (dir == DN_TO_IP_OUT) { 2041 /* 2042 * We need to copy *ro because for ICMP pkts (and maybe 2043 * others) the caller passed a pointer into the stack; 2044 * dst might also be a pointer into *ro so it needs to 2045 * be updated. 2046 */ 2047 pkt->ro = *(fwa->ro); 2048 if (fwa->ro->ro_rt) 2049 fwa->ro->ro_rt->rt_refcnt++; 2050 if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) { 2051 /* 'dst' points into 'ro' */ 2052 fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst); 2053 } 2054 pkt->dn_dst = fwa->dst; 2055 pkt->flags = fwa->flags; 2056 } 2057 2058 m->m_pkthdr.fw_flags |= DUMMYNET_MBUF_TAGGED; 2059 ip_dn_queue(m); 2060 } 2061 2062 /* 2063 * When a rule is added/deleted, clear the next_rule pointers in all rules. 2064 * These will be reconstructed on the fly as packets are matched. 2065 * Must be called at splimp(). 2066 */ 2067 static void 2068 flush_rule_ptrs(void) 2069 { 2070 struct ip_fw *rule; 2071 2072 for (rule = layer3_chain; rule; rule = rule->next) 2073 rule->next_rule = NULL; 2074 } 2075 2076 static __inline void 2077 ipfw_inc_static_count(struct ip_fw *rule) 2078 { 2079 static_count++; 2080 static_ioc_len += IOC_RULESIZE(rule); 2081 } 2082 2083 static __inline void 2084 ipfw_dec_static_count(struct ip_fw *rule) 2085 { 2086 int l = IOC_RULESIZE(rule); 2087 2088 KASSERT(static_count > 0, ("invalid static count %u\n", static_count)); 2089 static_count--; 2090 2091 KASSERT(static_ioc_len >= l, 2092 ("invalid static len %u\n", static_ioc_len)); 2093 static_ioc_len -= l; 2094 } 2095 2096 static struct ip_fw * 2097 ipfw_create_rule(const struct ipfw_ioc_rule *ioc_rule) 2098 { 2099 struct ip_fw *rule; 2100 2101 rule = kmalloc(RULESIZE(ioc_rule), M_IPFW, M_WAITOK | M_ZERO); 2102 2103 rule->act_ofs = ioc_rule->act_ofs; 2104 rule->cmd_len = ioc_rule->cmd_len; 2105 rule->rulenum = ioc_rule->rulenum; 2106 rule->set = ioc_rule->set; 2107 rule->usr_flags = ioc_rule->usr_flags; 2108 2109 bcopy(ioc_rule->cmd, rule->cmd, rule->cmd_len * 4 /* XXX */); 2110 2111 rule->refcnt = 1; 2112 2113 return rule; 2114 } 2115 2116 /* 2117 * Add a new rule to the list. Copy the rule into a malloc'ed area, then 2118 * possibly create a rule number and add the rule to the list. 2119 * Update the rule_number in the input struct so the caller knows it as well. 2120 */ 2121 static int 2122 ipfw_add_rule(struct ip_fw **head, struct ipfw_ioc_rule *ioc_rule) 2123 { 2124 struct ip_fw *rule, *f, *prev; 2125 2126 KKASSERT(*head != NULL); 2127 2128 rule = ipfw_create_rule(ioc_rule); 2129 2130 crit_enter(); 2131 2132 /* 2133 * If rulenum is 0, find highest numbered rule before the 2134 * default rule, and add autoinc_step 2135 */ 2136 if (autoinc_step < 1) 2137 autoinc_step = 1; 2138 else if (autoinc_step > 1000) 2139 autoinc_step = 1000; 2140 if (rule->rulenum == 0) { 2141 /* 2142 * locate the highest numbered rule before default 2143 */ 2144 for (f = *head; f; f = f->next) { 2145 if (f->rulenum == IPFW_DEFAULT_RULE) 2146 break; 2147 rule->rulenum = f->rulenum; 2148 } 2149 if (rule->rulenum < IPFW_DEFAULT_RULE - autoinc_step) 2150 rule->rulenum += autoinc_step; 2151 ioc_rule->rulenum = rule->rulenum; 2152 } 2153 2154 /* 2155 * Now insert the new rule in the right place in the sorted list. 2156 */ 2157 for (prev = NULL, f = *head; f; prev = f, f = f->next) { 2158 if (f->rulenum > rule->rulenum) { /* found the location */ 2159 if (prev) { 2160 rule->next = f; 2161 prev->next = rule; 2162 } else { /* head insert */ 2163 rule->next = *head; 2164 *head = rule; 2165 } 2166 break; 2167 } 2168 } 2169 2170 flush_rule_ptrs(); 2171 ipfw_inc_static_count(rule); 2172 2173 crit_exit(); 2174 2175 DEB(kprintf("++ installed rule %d, static count now %d\n", 2176 rule->rulenum, static_count);) 2177 return (0); 2178 } 2179 2180 /** 2181 * Free storage associated with a static rule (including derived 2182 * dynamic rules). 2183 * The caller is in charge of clearing rule pointers to avoid 2184 * dangling pointers. 2185 * @return a pointer to the next entry. 2186 * Arguments are not checked, so they better be correct. 2187 * Must be called at splimp(). 2188 */ 2189 static struct ip_fw * 2190 delete_rule(struct ip_fw **head, struct ip_fw *prev, struct ip_fw *rule) 2191 { 2192 struct ip_fw *n; 2193 2194 n = rule->next; 2195 remove_dyn_rule(rule, NULL /* force removal */); 2196 if (prev == NULL) 2197 *head = n; 2198 else 2199 prev->next = n; 2200 ipfw_dec_static_count(rule); 2201 2202 /* Mark the rule as invalid */ 2203 rule->rule_flags |= IPFW_RULE_F_INVALID; 2204 rule->next_rule = NULL; 2205 2206 /* Try to free this rule */ 2207 ipfw_free_rule(rule); 2208 2209 return n; 2210 } 2211 2212 /* 2213 * Deletes all rules from a chain (including the default rule 2214 * if the second argument is set). 2215 * Must be called at splimp(). 2216 */ 2217 static void 2218 free_chain(struct ip_fw **chain, int kill_default) 2219 { 2220 struct ip_fw *rule; 2221 2222 flush_rule_ptrs(); /* more efficient to do outside the loop */ 2223 2224 while ( (rule = *chain) != NULL && 2225 (kill_default || rule->rulenum != IPFW_DEFAULT_RULE) ) 2226 delete_rule(chain, NULL, rule); 2227 2228 KASSERT(dyn_count == 0, ("%u dyn rule remains\n", dyn_count)); 2229 2230 if (kill_default) { 2231 ip_fw_default_rule = NULL; /* Reset default rule */ 2232 2233 if (ipfw_dyn_v != NULL) { 2234 /* 2235 * Free dynamic rules(state) hash table 2236 */ 2237 kfree(ipfw_dyn_v, M_IPFW); 2238 ipfw_dyn_v = NULL; 2239 } 2240 2241 KASSERT(static_count == 0, 2242 ("%u static rules remains\n", static_count)); 2243 KASSERT(static_ioc_len == 0, 2244 ("%u bytes of static rules remains\n", static_ioc_len)); 2245 } else { 2246 KASSERT(static_count == 1, 2247 ("%u static rules remains\n", static_count)); 2248 KASSERT(static_ioc_len == IOC_RULESIZE(ip_fw_default_rule), 2249 ("%u bytes of static rules remains, should be %u\n", 2250 static_ioc_len, IOC_RULESIZE(ip_fw_default_rule))); 2251 } 2252 } 2253 2254 /** 2255 * Remove all rules with given number, and also do set manipulation. 2256 * 2257 * The argument is an uint32_t. The low 16 bit are the rule or set number, 2258 * the next 8 bits are the new set, the top 8 bits are the command: 2259 * 2260 * 0 delete rules with given number 2261 * 1 delete rules with given set number 2262 * 2 move rules with given number to new set 2263 * 3 move rules with given set number to new set 2264 * 4 swap sets with given numbers 2265 */ 2266 static int 2267 del_entry(struct ip_fw **chain, uint32_t arg) 2268 { 2269 struct ip_fw *prev, *rule; 2270 uint16_t rulenum; 2271 uint8_t cmd, new_set; 2272 2273 rulenum = arg & 0xffff; 2274 cmd = (arg >> 24) & 0xff; 2275 new_set = (arg >> 16) & 0xff; 2276 2277 if (cmd > 4) 2278 return EINVAL; 2279 if (new_set > 30) 2280 return EINVAL; 2281 if (cmd == 0 || cmd == 2) { 2282 if (rulenum == IPFW_DEFAULT_RULE) 2283 return EINVAL; 2284 } else { 2285 if (rulenum > 30) 2286 return EINVAL; 2287 } 2288 2289 switch (cmd) { 2290 case 0: /* delete rules with given number */ 2291 /* 2292 * locate first rule to delete 2293 */ 2294 for (prev = NULL, rule = *chain; 2295 rule && rule->rulenum < rulenum; 2296 prev = rule, rule = rule->next) 2297 ; 2298 if (rule->rulenum != rulenum) 2299 return EINVAL; 2300 2301 crit_enter(); /* no access to rules while removing */ 2302 /* 2303 * flush pointers outside the loop, then delete all matching 2304 * rules. prev remains the same throughout the cycle. 2305 */ 2306 flush_rule_ptrs(); 2307 while (rule && rule->rulenum == rulenum) 2308 rule = delete_rule(chain, prev, rule); 2309 crit_exit(); 2310 break; 2311 2312 case 1: /* delete all rules with given set number */ 2313 crit_enter(); 2314 flush_rule_ptrs(); 2315 for (prev = NULL, rule = *chain; rule ; ) 2316 if (rule->set == rulenum) 2317 rule = delete_rule(chain, prev, rule); 2318 else { 2319 prev = rule; 2320 rule = rule->next; 2321 } 2322 crit_exit(); 2323 break; 2324 2325 case 2: /* move rules with given number to new set */ 2326 crit_enter(); 2327 for (rule = *chain; rule ; rule = rule->next) 2328 if (rule->rulenum == rulenum) 2329 rule->set = new_set; 2330 crit_exit(); 2331 break; 2332 2333 case 3: /* move rules with given set number to new set */ 2334 crit_enter(); 2335 for (rule = *chain; rule ; rule = rule->next) 2336 if (rule->set == rulenum) 2337 rule->set = new_set; 2338 crit_exit(); 2339 break; 2340 2341 case 4: /* swap two sets */ 2342 crit_enter(); 2343 for (rule = *chain; rule ; rule = rule->next) 2344 if (rule->set == rulenum) 2345 rule->set = new_set; 2346 else if (rule->set == new_set) 2347 rule->set = rulenum; 2348 crit_exit(); 2349 break; 2350 } 2351 return 0; 2352 } 2353 2354 /* 2355 * Clear counters for a specific rule. 2356 */ 2357 static void 2358 clear_counters(struct ip_fw *rule, int log_only) 2359 { 2360 ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); 2361 2362 if (log_only == 0) { 2363 rule->bcnt = rule->pcnt = 0; 2364 rule->timestamp = 0; 2365 } 2366 if (l->o.opcode == O_LOG) 2367 l->log_left = l->max_log; 2368 } 2369 2370 /** 2371 * Reset some or all counters on firewall rules. 2372 * @arg frwl is null to clear all entries, or contains a specific 2373 * rule number. 2374 * @arg log_only is 1 if we only want to reset logs, zero otherwise. 2375 */ 2376 static int 2377 zero_entry(int rulenum, int log_only) 2378 { 2379 struct ip_fw *rule; 2380 char *msg; 2381 2382 if (rulenum == 0) { 2383 crit_enter(); 2384 norule_counter = 0; 2385 for (rule = layer3_chain; rule; rule = rule->next) 2386 clear_counters(rule, log_only); 2387 crit_exit(); 2388 msg = log_only ? "ipfw: All logging counts reset.\n" : 2389 "ipfw: Accounting cleared.\n"; 2390 } else { 2391 int cleared = 0; 2392 /* 2393 * We can have multiple rules with the same number, so we 2394 * need to clear them all. 2395 */ 2396 for (rule = layer3_chain; rule; rule = rule->next) 2397 if (rule->rulenum == rulenum) { 2398 crit_enter(); 2399 while (rule && rule->rulenum == rulenum) { 2400 clear_counters(rule, log_only); 2401 rule = rule->next; 2402 } 2403 crit_exit(); 2404 cleared = 1; 2405 break; 2406 } 2407 if (!cleared) /* we did not find any matching rules */ 2408 return (EINVAL); 2409 msg = log_only ? "ipfw: Entry %d logging count reset.\n" : 2410 "ipfw: Entry %d cleared.\n"; 2411 } 2412 if (fw_verbose) 2413 log(LOG_SECURITY | LOG_NOTICE, msg, rulenum); 2414 return (0); 2415 } 2416 2417 /* 2418 * Check validity of the structure before insert. 2419 * Fortunately rules are simple, so this mostly need to check rule sizes. 2420 */ 2421 static int 2422 ipfw_ctl_check_rule(struct ipfw_ioc_rule *rule, int size) 2423 { 2424 int l, cmdlen = 0; 2425 int have_action = 0; 2426 ipfw_insn *cmd; 2427 2428 /* Check for valid size */ 2429 if (size < sizeof(*rule)) { 2430 kprintf("ipfw: rule too short\n"); 2431 return EINVAL; 2432 } 2433 l = IOC_RULESIZE(rule); 2434 if (l != size) { 2435 kprintf("ipfw: size mismatch (have %d want %d)\n", size, l); 2436 return EINVAL; 2437 } 2438 2439 /* 2440 * Now go for the individual checks. Very simple ones, basically only 2441 * instruction sizes. 2442 */ 2443 for (l = rule->cmd_len, cmd = rule->cmd; l > 0; 2444 l -= cmdlen, cmd += cmdlen) { 2445 cmdlen = F_LEN(cmd); 2446 if (cmdlen > l) { 2447 kprintf("ipfw: opcode %d size truncated\n", 2448 cmd->opcode); 2449 return EINVAL; 2450 } 2451 DEB(kprintf("ipfw: opcode %d\n", cmd->opcode);) 2452 switch (cmd->opcode) { 2453 case O_NOP: 2454 case O_PROBE_STATE: 2455 case O_KEEP_STATE: 2456 case O_PROTO: 2457 case O_IP_SRC_ME: 2458 case O_IP_DST_ME: 2459 case O_LAYER2: 2460 case O_IN: 2461 case O_FRAG: 2462 case O_IPOPT: 2463 case O_IPLEN: 2464 case O_IPID: 2465 case O_IPTOS: 2466 case O_IPPRECEDENCE: 2467 case O_IPTTL: 2468 case O_IPVER: 2469 case O_TCPWIN: 2470 case O_TCPFLAGS: 2471 case O_TCPOPTS: 2472 case O_ESTAB: 2473 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 2474 goto bad_size; 2475 break; 2476 2477 case O_UID: 2478 case O_GID: 2479 case O_IP_SRC: 2480 case O_IP_DST: 2481 case O_TCPSEQ: 2482 case O_TCPACK: 2483 case O_PROB: 2484 case O_ICMPTYPE: 2485 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) 2486 goto bad_size; 2487 break; 2488 2489 case O_LIMIT: 2490 if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) 2491 goto bad_size; 2492 break; 2493 2494 case O_LOG: 2495 if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) 2496 goto bad_size; 2497 2498 ((ipfw_insn_log *)cmd)->log_left = 2499 ((ipfw_insn_log *)cmd)->max_log; 2500 2501 break; 2502 2503 case O_IP_SRC_MASK: 2504 case O_IP_DST_MASK: 2505 if (cmdlen != F_INSN_SIZE(ipfw_insn_ip)) 2506 goto bad_size; 2507 if (((ipfw_insn_ip *)cmd)->mask.s_addr == 0) { 2508 kprintf("ipfw: opcode %d, useless rule\n", 2509 cmd->opcode); 2510 return EINVAL; 2511 } 2512 break; 2513 2514 case O_IP_SRC_SET: 2515 case O_IP_DST_SET: 2516 if (cmd->arg1 == 0 || cmd->arg1 > 256) { 2517 kprintf("ipfw: invalid set size %d\n", 2518 cmd->arg1); 2519 return EINVAL; 2520 } 2521 if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 2522 (cmd->arg1+31)/32 ) 2523 goto bad_size; 2524 break; 2525 2526 case O_MACADDR2: 2527 if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) 2528 goto bad_size; 2529 break; 2530 2531 case O_MAC_TYPE: 2532 case O_IP_SRCPORT: 2533 case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ 2534 if (cmdlen < 2 || cmdlen > 31) 2535 goto bad_size; 2536 break; 2537 2538 case O_RECV: 2539 case O_XMIT: 2540 case O_VIA: 2541 if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) 2542 goto bad_size; 2543 break; 2544 2545 case O_PIPE: 2546 case O_QUEUE: 2547 if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) 2548 goto bad_size; 2549 goto check_action; 2550 2551 case O_FORWARD_IP: 2552 if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) 2553 goto bad_size; 2554 goto check_action; 2555 2556 case O_FORWARD_MAC: /* XXX not implemented yet */ 2557 case O_CHECK_STATE: 2558 case O_COUNT: 2559 case O_ACCEPT: 2560 case O_DENY: 2561 case O_REJECT: 2562 case O_SKIPTO: 2563 case O_DIVERT: 2564 case O_TEE: 2565 if (cmdlen != F_INSN_SIZE(ipfw_insn)) 2566 goto bad_size; 2567 check_action: 2568 if (have_action) { 2569 kprintf("ipfw: opcode %d, multiple actions" 2570 " not allowed\n", 2571 cmd->opcode); 2572 return EINVAL; 2573 } 2574 have_action = 1; 2575 if (l != cmdlen) { 2576 kprintf("ipfw: opcode %d, action must be" 2577 " last opcode\n", 2578 cmd->opcode); 2579 return EINVAL; 2580 } 2581 break; 2582 default: 2583 kprintf("ipfw: opcode %d, unknown opcode\n", 2584 cmd->opcode); 2585 return EINVAL; 2586 } 2587 } 2588 if (have_action == 0) { 2589 kprintf("ipfw: missing action\n"); 2590 return EINVAL; 2591 } 2592 return 0; 2593 2594 bad_size: 2595 kprintf("ipfw: opcode %d size %d wrong\n", 2596 cmd->opcode, cmdlen); 2597 return EINVAL; 2598 } 2599 2600 static int 2601 ipfw_ctl_add_rule(struct sockopt *sopt) 2602 { 2603 struct ipfw_ioc_rule *ioc_rule; 2604 uint32_t rule_buf[IPFW_RULE_SIZE_MAX]; 2605 size_t size; 2606 int error; 2607 2608 ioc_rule = (struct ipfw_ioc_rule *)rule_buf; 2609 error = sooptcopyin(sopt, ioc_rule, sizeof(rule_buf), 2610 sizeof(*ioc_rule)); 2611 if (error) 2612 return error; 2613 2614 size = sopt->sopt_valsize; 2615 error = ipfw_ctl_check_rule(ioc_rule, size); 2616 if (error) 2617 return error; 2618 2619 error = ipfw_add_rule(&layer3_chain, ioc_rule); 2620 if (error) 2621 return error; 2622 2623 if (sopt->sopt_dir == SOPT_GET) 2624 error = sooptcopyout(sopt, ioc_rule, IOC_RULESIZE(ioc_rule)); 2625 return error; 2626 } 2627 2628 static void * 2629 ipfw_copy_rule(const struct ip_fw *rule, struct ipfw_ioc_rule *ioc_rule) 2630 { 2631 ioc_rule->act_ofs = rule->act_ofs; 2632 ioc_rule->cmd_len = rule->cmd_len; 2633 ioc_rule->rulenum = rule->rulenum; 2634 ioc_rule->set = rule->set; 2635 ioc_rule->usr_flags = rule->usr_flags; 2636 2637 ioc_rule->set_disable = set_disable; 2638 ioc_rule->static_count = static_count; 2639 ioc_rule->static_len = static_ioc_len; 2640 2641 ioc_rule->pcnt = rule->pcnt; 2642 ioc_rule->bcnt = rule->bcnt; 2643 ioc_rule->timestamp = rule->timestamp; 2644 2645 bcopy(rule->cmd, ioc_rule->cmd, ioc_rule->cmd_len * 4 /* XXX */); 2646 2647 return ((uint8_t *)ioc_rule + IOC_RULESIZE(ioc_rule)); 2648 } 2649 2650 static void 2651 ipfw_copy_state(const ipfw_dyn_rule *dyn_rule, 2652 struct ipfw_ioc_state *ioc_state) 2653 { 2654 const struct ipfw_flow_id *id; 2655 struct ipfw_ioc_flowid *ioc_id; 2656 2657 ioc_state->expire = TIME_LEQ(dyn_rule->expire, time_second) ? 2658 0 : dyn_rule->expire - time_second; 2659 ioc_state->pcnt = dyn_rule->pcnt; 2660 ioc_state->bcnt = dyn_rule->bcnt; 2661 2662 ioc_state->dyn_type = dyn_rule->dyn_type; 2663 ioc_state->count = dyn_rule->count; 2664 2665 ioc_state->rulenum = dyn_rule->rule->rulenum; 2666 2667 id = &dyn_rule->id; 2668 ioc_id = &ioc_state->id; 2669 2670 ioc_id->type = ETHERTYPE_IP; 2671 ioc_id->u.ip.dst_ip = id->dst_ip; 2672 ioc_id->u.ip.src_ip = id->src_ip; 2673 ioc_id->u.ip.dst_port = id->dst_port; 2674 ioc_id->u.ip.src_port = id->src_port; 2675 ioc_id->u.ip.proto = id->proto; 2676 } 2677 2678 static int 2679 ipfw_ctl_get_rules(struct sockopt *sopt) 2680 { 2681 struct ip_fw *rule; 2682 void *buf, *bp; 2683 size_t size; 2684 int error; 2685 2686 /* 2687 * pass up a copy of the current rules. Static rules 2688 * come first (the last of which has number IPFW_DEFAULT_RULE), 2689 * followed by a possibly empty list of dynamic rule. 2690 */ 2691 crit_enter(); 2692 2693 size = static_ioc_len; /* size of static rules */ 2694 if (ipfw_dyn_v) /* add size of dyn.rules */ 2695 size += (dyn_count * sizeof(struct ipfw_ioc_state)); 2696 2697 /* 2698 * XXX todo: if the user passes a short length just to know 2699 * how much room is needed, do not bother filling up the 2700 * buffer, just jump to the sooptcopyout. 2701 */ 2702 bp = buf = kmalloc(size, M_TEMP, M_WAITOK | M_ZERO); 2703 2704 for (rule = layer3_chain; rule; rule = rule->next) 2705 bp = ipfw_copy_rule(rule, bp); 2706 2707 if (ipfw_dyn_v) { 2708 struct ipfw_ioc_state *ioc_state; 2709 int i; 2710 2711 ioc_state = bp; 2712 for (i = 0; i < curr_dyn_buckets; i++) { 2713 ipfw_dyn_rule *p; 2714 2715 for (p = ipfw_dyn_v[i]; p != NULL; 2716 p = p->next, ioc_state++) 2717 ipfw_copy_state(p, ioc_state); 2718 } 2719 } 2720 2721 crit_exit(); 2722 2723 error = sooptcopyout(sopt, buf, size); 2724 kfree(buf, M_TEMP); 2725 return error; 2726 } 2727 2728 /** 2729 * {set|get}sockopt parser. 2730 */ 2731 static int 2732 ipfw_ctl(struct sockopt *sopt) 2733 { 2734 int error, rulenum; 2735 uint32_t masks[2]; 2736 size_t size; 2737 2738 /* 2739 * Disallow modifications in really-really secure mode, but still allow 2740 * the logging counters to be reset. 2741 */ 2742 if (sopt->sopt_name == IP_FW_ADD || 2743 (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) { 2744 if (securelevel >= 3) 2745 return EPERM; 2746 } 2747 2748 error = 0; 2749 2750 switch (sopt->sopt_name) { 2751 case IP_FW_GET: 2752 error = ipfw_ctl_get_rules(sopt); 2753 break; 2754 2755 case IP_FW_FLUSH: 2756 /* 2757 * Normally we cannot release the lock on each iteration. 2758 * We could do it here only because we start from the head all 2759 * the times so there is no risk of missing some entries. 2760 * On the other hand, the risk is that we end up with 2761 * a very inconsistent ruleset, so better keep the lock 2762 * around the whole cycle. 2763 * 2764 * XXX this code can be improved by resetting the head of 2765 * the list to point to the default rule, and then freeing 2766 * the old list without the need for a lock. 2767 */ 2768 2769 crit_enter(); 2770 free_chain(&layer3_chain, 0 /* keep default rule */); 2771 crit_exit(); 2772 break; 2773 2774 case IP_FW_ADD: 2775 error = ipfw_ctl_add_rule(sopt); 2776 break; 2777 2778 case IP_FW_DEL: 2779 /* 2780 * IP_FW_DEL is used for deleting single rules or sets, 2781 * and (ab)used to atomically manipulate sets. Argument size 2782 * is used to distinguish between the two: 2783 * sizeof(uint32_t) 2784 * delete single rule or set of rules, 2785 * or reassign rules (or sets) to a different set. 2786 * 2*sizeof(uint32_t) 2787 * atomic disable/enable sets. 2788 * first uint32_t contains sets to be disabled, 2789 * second uint32_t contains sets to be enabled. 2790 */ 2791 error = sooptcopyin(sopt, masks, 2792 sizeof(masks), sizeof(masks[0])); 2793 if (error) 2794 break; 2795 2796 size = sopt->sopt_valsize; 2797 if (size == sizeof(masks[0])) { 2798 /* 2799 * Delete or reassign static rule 2800 */ 2801 error = del_entry(&layer3_chain, masks[0]); 2802 } else if (size == sizeof(masks)) { 2803 /* 2804 * Set enable/disable 2805 */ 2806 crit_enter(); 2807 2808 set_disable = 2809 (set_disable | masks[0]) & ~masks[1] & 2810 ~(1 << 31); /* set 31 always enabled */ 2811 2812 crit_exit(); 2813 } else { 2814 error = EINVAL; 2815 } 2816 break; 2817 2818 case IP_FW_ZERO: 2819 case IP_FW_RESETLOG: /* argument is an int, the rule number */ 2820 rulenum=0; 2821 2822 if (sopt->sopt_val != 0) { 2823 error = sooptcopyin(sopt, &rulenum, 2824 sizeof(int), sizeof(int)); 2825 if (error) 2826 break; 2827 } 2828 error = zero_entry(rulenum, sopt->sopt_name == IP_FW_RESETLOG); 2829 break; 2830 2831 default: 2832 kprintf("ipfw_ctl invalid option %d\n", sopt->sopt_name); 2833 error = EINVAL; 2834 } 2835 2836 return error; 2837 } 2838 2839 /* 2840 * This procedure is only used to handle keepalives. It is invoked 2841 * every dyn_keepalive_period 2842 */ 2843 static void 2844 ipfw_tick(void * __unused unused) 2845 { 2846 int i; 2847 ipfw_dyn_rule *q; 2848 2849 if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0) 2850 goto done; 2851 2852 crit_enter(); 2853 for (i = 0 ; i < curr_dyn_buckets ; i++) { 2854 for (q = ipfw_dyn_v[i] ; q ; q = q->next ) { 2855 if (q->dyn_type == O_LIMIT_PARENT) 2856 continue; 2857 if (q->id.proto != IPPROTO_TCP) 2858 continue; 2859 if ( (q->state & BOTH_SYN) != BOTH_SYN) 2860 continue; 2861 if (TIME_LEQ( time_second+dyn_keepalive_interval, 2862 q->expire)) 2863 continue; /* too early */ 2864 if (TIME_LEQ(q->expire, time_second)) 2865 continue; /* too late, rule expired */ 2866 2867 send_pkt(&(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN); 2868 send_pkt(&(q->id), q->ack_fwd - 1, q->ack_rev, 0); 2869 } 2870 } 2871 crit_exit(); 2872 done: 2873 callout_reset(&ipfw_timeout_h, dyn_keepalive_period * hz, 2874 ipfw_tick, NULL); 2875 } 2876 2877 static void 2878 ipfw_init_default_rule(struct ip_fw **head) 2879 { 2880 struct ip_fw *def_rule; 2881 2882 KKASSERT(*head == NULL); 2883 2884 def_rule = kmalloc(sizeof(*def_rule), M_IPFW, M_WAITOK | M_ZERO); 2885 2886 def_rule->act_ofs = 0; 2887 def_rule->rulenum = IPFW_DEFAULT_RULE; 2888 def_rule->cmd_len = 1; 2889 def_rule->set = 31; 2890 2891 def_rule->cmd[0].len = 1; 2892 #ifdef IPFIREWALL_DEFAULT_TO_ACCEPT 2893 def_rule->cmd[0].opcode = O_ACCEPT; 2894 #else 2895 def_rule->cmd[0].opcode = O_DENY; 2896 #endif 2897 2898 def_rule->refcnt = 1; 2899 2900 *head = def_rule; 2901 ipfw_inc_static_count(def_rule); 2902 2903 /* Install the default rule */ 2904 ip_fw_default_rule = def_rule; 2905 } 2906 2907 static void 2908 ipfw_init(void) 2909 { 2910 ip_fw_chk_ptr = ipfw_chk; 2911 ip_fw_ctl_ptr = ipfw_ctl; 2912 ip_fw_dn_io_ptr = ipfw_dummynet_io; 2913 2914 layer3_chain = NULL; 2915 ipfw_init_default_rule(&layer3_chain); 2916 2917 kprintf("ipfw2 initialized, divert %s, " 2918 "rule-based forwarding enabled, default to %s, logging ", 2919 #ifdef IPDIVERT 2920 "enabled", 2921 #else 2922 "disabled", 2923 #endif 2924 ip_fw_default_rule->cmd[0].opcode == O_ACCEPT ? 2925 "accept" : "deny"); 2926 2927 #ifdef IPFIREWALL_VERBOSE 2928 fw_verbose = 1; 2929 #endif 2930 #ifdef IPFIREWALL_VERBOSE_LIMIT 2931 verbose_limit = IPFIREWALL_VERBOSE_LIMIT; 2932 #endif 2933 if (fw_verbose == 0) 2934 kprintf("disabled\n"); 2935 else if (verbose_limit == 0) 2936 kprintf("unlimited\n"); 2937 else 2938 kprintf("limited to %d packets/entry by default\n", 2939 verbose_limit); 2940 callout_init(&ipfw_timeout_h); 2941 callout_reset(&ipfw_timeout_h, hz, ipfw_tick, NULL); 2942 } 2943 2944 static int 2945 ipfw_modevent(module_t mod, int type, void *unused) 2946 { 2947 int err = 0; 2948 2949 switch (type) { 2950 case MOD_LOAD: 2951 crit_enter(); 2952 if (IPFW_LOADED) { 2953 crit_exit(); 2954 kprintf("IP firewall already loaded\n"); 2955 err = EEXIST; 2956 } else { 2957 ipfw_init(); 2958 crit_exit(); 2959 } 2960 break; 2961 2962 case MOD_UNLOAD: 2963 #ifndef KLD_MODULE 2964 kprintf("ipfw statically compiled, cannot unload\n"); 2965 err = EBUSY; 2966 #else 2967 if (ipfw_refcnt != 0) { 2968 err = EBUSY; 2969 break; 2970 } 2971 2972 crit_enter(); 2973 callout_stop(&ipfw_timeout_h); 2974 ip_fw_chk_ptr = NULL; 2975 ip_fw_ctl_ptr = NULL; 2976 ip_fw_dn_io_ptr = NULL; 2977 free_chain(&layer3_chain, 1 /* kill default rule */); 2978 crit_exit(); 2979 kprintf("IP firewall unloaded\n"); 2980 #endif 2981 break; 2982 default: 2983 break; 2984 } 2985 return err; 2986 } 2987 2988 static moduledata_t ipfwmod = { 2989 "ipfw", 2990 ipfw_modevent, 2991 0 2992 }; 2993 DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_END, SI_ORDER_ANY); 2994 MODULE_VERSION(ipfw, 1); 2995