1 /* 2 * Copyright (c) 2014 - 2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Bill Yuan <bycn82@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "opt_ipfw.h" 36 #include "opt_inet.h" 37 #ifndef INET 38 #error IPFIREWALL3 requires INET. 39 #endif /* INET */ 40 41 #include <sys/param.h> 42 #include <sys/kernel.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/systimer.h> 48 #include <sys/in_cksum.h> 49 #include <sys/systm.h> 50 #include <sys/proc.h> 51 #include <sys/socket.h> 52 #include <sys/syslog.h> 53 #include <sys/ucred.h> 54 #include <sys/lock.h> 55 56 #include <net/ethernet.h> 57 #include <net/netmsg2.h> 58 #include <net/netisr2.h> 59 #include <net/route.h> 60 #include <net/if.h> 61 62 #include <netinet/in.h> 63 #include <netinet/ip.h> 64 #include <netinet/ip_icmp.h> 65 #include <netinet/tcp.h> 66 #include <netinet/tcp_timer.h> 67 #include <netinet/tcp_var.h> 68 #include <netinet/tcpip.h> 69 #include <netinet/udp.h> 70 #include <netinet/udp_var.h> 71 #include <netinet/in_systm.h> 72 #include <netinet/in_var.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/ip_var.h> 75 #include <netinet/ip_divert.h> 76 #include <net/ipfw3/ip_fw.h> 77 78 #include "ip_fw3_nat.h" 79 80 MALLOC_DEFINE(M_IPFW3_NAT, "IP_FW3_NAT", "ipfw3_nat module"); 81 82 /* 83 * Highspeed Lockless Kernel NAT 84 * 85 * Kernel NAT 86 * The network address translation (NAT) will replace the `src` of the packet 87 * with an `alias` (alias_addr & alias_port). Accordingt to the configuration, 88 * The alias will be randomly picked from the configured range. 89 * 90 * Highspeed 91 * The first outgoing packet should trigger the creation of the `net_state`, 92 * and the `net_state` will keep in a RB-Tree for the subsequent outgoing 93 * packets. 94 * The first returning packet will trigger the creation of the `net_state2`, 95 * which will be stored in a multidimensional array of points ( of net_state2 ). 96 * 97 * Lockless 98 * The `net_state` for outgoing packet will be stored in the nat_context of 99 * current CPU. But due to the nature of the NAT, the returning packet may be 100 * handled by another CPU. Hence, The `net_state2` for the returning packet 101 * will be prepared and stored into the nat_context of the right CPU. 102 */ 103 104 struct ip_fw3_nat_context *ip_fw3_nat_ctx[MAXCPU]; 105 static struct callout ip_fw3_nat_cleanup_callout; 106 extern struct ipfw3_context *fw3_ctx[MAXCPU]; 107 extern ip_fw_ctl_t *ip_fw3_ctl_nat_ptr; 108 109 static int sysctl_var_cleanup_interval = 1; 110 static int sysctl_var_icmp_timeout = 10; 111 static int sysctl_var_tcp_timeout = 60; 112 static int sysctl_var_udp_timeout = 30; 113 114 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw3_nat, CTLFLAG_RW, 0, "ipfw3 NAT"); 115 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, cleanup_interval, CTLFLAG_RW, 116 &sysctl_var_cleanup_interval, 0, "default life time"); 117 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, icmp_timeout, CTLFLAG_RW, 118 &sysctl_var_icmp_timeout, 0, "default icmp state life time"); 119 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, tcp_timeout, CTLFLAG_RW, 120 &sysctl_var_tcp_timeout, 0, "default tcp state life time"); 121 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, udp_timeout, CTLFLAG_RW, 122 &sysctl_var_udp_timeout, 0, "default udp state life time"); 123 124 RB_PROTOTYPE(state_tree, nat_state, entries, ip_fw3_nat_state_cmp); 125 RB_GENERATE(state_tree, nat_state, entries, ip_fw3_nat_state_cmp); 126 127 static __inline uint16_t 128 fix_cksum(uint16_t cksum, uint16_t old_info, uint16_t new_info, uint8_t is_udp) 129 { 130 uint32_t tmp; 131 132 if (is_udp && !cksum) 133 return (0x0000); 134 tmp = cksum + old_info - new_info; 135 tmp = (tmp >> 16) + (tmp & 65535); 136 tmp = tmp & 65535; 137 if (is_udp && !tmp) 138 return (0xFFFF); 139 return tmp; 140 } 141 142 void 143 check_nat(int *cmd_ctl, int *cmd_val, struct ip_fw_args **args, 144 struct ip_fw **f, ipfw_insn *cmd, uint16_t ip_len) 145 { 146 if ((*args)->eh != NULL) { 147 *cmd_ctl = IP_FW_CTL_NO; 148 *cmd_val = IP_FW_NOT_MATCH; 149 return; 150 } 151 152 struct ip_fw3_nat_context *nat_ctx; 153 struct cfg_nat *nat; 154 int nat_id; 155 156 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 157 (*args)->rule = *f; 158 nat = ((ipfw_insn_nat *)cmd)->nat; 159 if (nat == NULL) { 160 nat_id = cmd->arg1; 161 nat = nat_ctx->nats[nat_id - 1]; 162 if (nat == NULL) { 163 *cmd_val = IP_FW_DENY; 164 *cmd_ctl = IP_FW_CTL_DONE; 165 return; 166 } 167 ((ipfw_insn_nat *)cmd)->nat = nat; 168 } 169 *cmd_val = ip_fw3_nat(*args, nat, (*args)->m); 170 *cmd_ctl = IP_FW_CTL_NAT; 171 } 172 173 int 174 ip_fw3_nat(struct ip_fw_args *args, struct cfg_nat *nat, struct mbuf *m) 175 { 176 struct state_tree *tree_out = NULL; 177 struct nat_state *s = NULL, *dup, *k, key; 178 struct nat_state2 *s2 = NULL; 179 struct ip *ip = mtod(m, struct ip *); 180 struct in_addr *old_addr = NULL, new_addr; 181 uint16_t *old_port = NULL, new_port; 182 uint16_t *csum = NULL, dlen = 0; 183 uint8_t udp = 0; 184 boolean_t pseudo = FALSE, need_return_state = FALSE; 185 struct cfg_alias *alias; 186 int i = 0, rand_n = 0; 187 188 k = &key; 189 memset(k, 0, LEN_NAT_STATE); 190 if (args->oif == NULL) { 191 old_addr = &ip->ip_dst; 192 k->dst_addr = ntohl(args->f_id.dst_ip); 193 LIST_FOREACH(alias, &nat->alias, next) { 194 if (alias->ip.s_addr == ntohl(args->f_id.dst_ip)) { 195 break; 196 } 197 } 198 if (alias == NULL) { 199 goto oops; 200 } 201 switch (ip->ip_p) { 202 case IPPROTO_TCP: 203 old_port = &L3HDR(struct tcphdr, ip)->th_dport; 204 s2 = alias->tcp_in[*old_port - ALIAS_BEGIN]; 205 csum = &L3HDR(struct tcphdr, ip)->th_sum; 206 break; 207 case IPPROTO_UDP: 208 old_port = &L3HDR(struct udphdr, ip)->uh_dport; 209 s2 = alias->udp_in[*old_port - ALIAS_BEGIN]; 210 csum = &L3HDR(struct udphdr, ip)->uh_sum; 211 udp = 1; 212 break; 213 case IPPROTO_ICMP: 214 old_port = &L3HDR(struct icmp, ip)->icmp_id; 215 s2 = alias->icmp_in[*old_port]; 216 csum = &L3HDR(struct icmp, ip)->icmp_cksum; 217 break; 218 default: 219 panic("ipfw3: unsupported proto %u", ip->ip_p); 220 } 221 if (s2 == NULL) { 222 goto oops; 223 } 224 } else { 225 old_addr = &ip->ip_src; 226 k->src_addr = args->f_id.src_ip; 227 k->dst_addr = args->f_id.dst_ip; 228 switch (ip->ip_p) { 229 case IPPROTO_TCP: 230 k->src_port = args->f_id.src_port; 231 k->dst_port = args->f_id.dst_port; 232 m->m_pkthdr.csum_flags = CSUM_TCP; 233 tree_out = &nat->rb_tcp_out; 234 old_port = &L3HDR(struct tcphdr, ip)->th_sport; 235 csum = &L3HDR(struct tcphdr, ip)->th_sum; 236 break; 237 case IPPROTO_UDP: 238 k->src_port = args->f_id.src_port; 239 k->dst_port = args->f_id.dst_port; 240 m->m_pkthdr.csum_flags = CSUM_UDP; 241 tree_out = &nat->rb_udp_out; 242 old_port = &L3HDR(struct udphdr, ip)->uh_sport; 243 csum = &L3HDR(struct udphdr, ip)->uh_sum; 244 udp = 1; 245 break; 246 case IPPROTO_ICMP: 247 k->src_port = L3HDR(struct icmp, ip)->icmp_id; 248 k->dst_port = k->src_port; 249 tree_out = &nat->rb_icmp_out; 250 old_port = &L3HDR(struct icmp, ip)->icmp_id; 251 csum = &L3HDR(struct icmp, ip)->icmp_cksum; 252 break; 253 default: 254 panic("ipfw3: unsupported proto %u", ip->ip_p); 255 } 256 s = RB_FIND(state_tree, tree_out, k); 257 if (s == NULL) { 258 /* pick an alias ip randomly when there are multiple */ 259 if (nat->count > 1) { 260 rand_n = krandom() % nat->count; 261 } 262 LIST_FOREACH(alias, &nat->alias, next) { 263 if (i++ == rand_n) { 264 break; 265 } 266 } 267 switch (ip->ip_p) { 268 case IPPROTO_TCP: 269 m->m_pkthdr.csum_flags = CSUM_TCP; 270 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT, 271 M_INTWAIT | M_NULLOK | M_ZERO); 272 273 s->src_addr = args->f_id.src_ip; 274 s->src_port = args->f_id.src_port; 275 276 s->dst_addr = args->f_id.dst_ip; 277 s->dst_port = args->f_id.dst_port; 278 279 s->alias_addr = alias->ip.s_addr; 280 pick_alias_port(s, tree_out); 281 dup = RB_INSERT(state_tree, tree_out, s); 282 need_return_state = TRUE; 283 break; 284 case IPPROTO_UDP: 285 m->m_pkthdr.csum_flags = CSUM_UDP; 286 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT, 287 M_INTWAIT | M_NULLOK | M_ZERO); 288 289 s->src_addr = args->f_id.src_ip; 290 s->src_port = args->f_id.src_port; 291 292 s->dst_addr = args->f_id.dst_ip; 293 s->dst_port = args->f_id.dst_port; 294 295 s->alias_addr = alias->ip.s_addr; 296 pick_alias_port(s, tree_out); 297 dup = RB_INSERT(state_tree, tree_out, s); 298 need_return_state = TRUE; 299 break; 300 case IPPROTO_ICMP: 301 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT, 302 M_INTWAIT | M_NULLOK | M_ZERO); 303 s->src_addr = args->f_id.src_ip; 304 s->dst_addr = args->f_id.dst_ip; 305 306 s->src_port = *old_port; 307 s->dst_port = *old_port; 308 309 s->alias_addr = alias->ip.s_addr; 310 s->alias_port = htons(s->src_addr * 311 s->dst_addr % ALIAS_RANGE); 312 dup = RB_INSERT(state_tree, tree_out, s); 313 314 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT, 315 M_INTWAIT | M_NULLOK | M_ZERO); 316 317 s2->src_addr = args->f_id.dst_ip; 318 s2->dst_addr = alias->ip.s_addr; 319 320 s2->src_port = s->alias_port; 321 s2->dst_port = s->alias_port; 322 323 s2->alias_addr = htonl(args->f_id.src_ip); 324 s2->alias_port = *old_port; 325 326 alias->icmp_in[s->alias_port] = s2; 327 break; 328 default : 329 goto oops; 330 } 331 } 332 } 333 if (args->oif == NULL) { 334 if (ip->ip_p == IPPROTO_ICMP) { 335 new_addr.s_addr = s2->alias_addr; 336 new_port = s2->alias_port; 337 } else { 338 new_addr.s_addr = s2->src_addr; 339 new_port = s2->src_port; 340 } 341 s2->timestamp = time_uptime; 342 } else { 343 new_addr.s_addr = s->alias_addr; 344 new_port = s->alias_port; 345 s->timestamp = time_uptime; 346 } 347 348 /* replace src/dst and fix the checksum */ 349 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) { 350 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 351 dlen = ip->ip_len - (ip->ip_hl << 2); 352 } 353 pseudo = TRUE; 354 } 355 if (!pseudo) { 356 const uint16_t *oaddr, *naddr; 357 oaddr = (const uint16_t *)&old_addr->s_addr; 358 naddr = (const uint16_t *)&new_addr.s_addr; 359 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[0], naddr[0], 0); 360 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[1], naddr[1], 0); 361 if (ip->ip_p != IPPROTO_ICMP) { 362 *csum = fix_cksum(*csum, oaddr[0], naddr[0], udp); 363 *csum = fix_cksum(*csum, oaddr[1], naddr[1], udp); 364 } 365 } 366 old_addr->s_addr = new_addr.s_addr; 367 if (!pseudo) { 368 *csum = fix_cksum(*csum, *old_port, new_port, udp); 369 } 370 *old_port = new_port; 371 372 if (pseudo) { 373 *csum = in_pseudo(ip->ip_src.s_addr, 374 ip->ip_dst.s_addr, htons(dlen + ip->ip_p)); 375 } 376 377 /* prepare the state for return traffic */ 378 if (need_return_state) { 379 ip->ip_len = htons(ip->ip_len); 380 ip->ip_off = htons(ip->ip_off); 381 382 m->m_flags &= ~M_HASH; 383 ip_hashfn(&m, 0); 384 385 ip->ip_len = ntohs(ip->ip_len); 386 ip->ip_off = ntohs(ip->ip_off); 387 388 int nextcpu = netisr_hashcpu(m->m_pkthdr.hash); 389 if (nextcpu != mycpuid) { 390 struct netmsg_nat_state_add *msg; 391 msg = kmalloc(LEN_NMSG_NAT_STATE_ADD, 392 M_LWKTMSG, M_NOWAIT | M_ZERO); 393 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 394 0, nat_state_add_dispatch); 395 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT, 396 M_INTWAIT | M_NULLOK | M_ZERO); 397 398 s2->src_addr = args->f_id.dst_ip; 399 s2->src_port = args->f_id.dst_port; 400 401 s2->dst_addr = alias->ip.s_addr; 402 s2->dst_port = s->alias_port; 403 404 s2->src_addr = htonl(args->f_id.src_ip); 405 s2->src_port = htons(args->f_id.src_port); 406 407 s2->timestamp = s->timestamp; 408 msg->alias_addr.s_addr = alias->ip.s_addr; 409 msg->alias_port = s->alias_port; 410 msg->state = s2; 411 msg->nat_id = nat->id; 412 msg->proto = ip->ip_p; 413 netisr_sendmsg(&msg->base, nextcpu); 414 } else { 415 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT, 416 M_INTWAIT | M_NULLOK | M_ZERO); 417 418 s2->src_addr = args->f_id.dst_ip; 419 s2->dst_addr = alias->ip.s_addr; 420 421 s2->src_port = s->alias_port; 422 s2->dst_port = s->alias_port; 423 424 s2->src_addr = htonl(args->f_id.src_ip); 425 s2->src_port = htons(args->f_id.src_port); 426 427 s2->timestamp = s->timestamp; 428 if (ip->ip_p == IPPROTO_TCP) { 429 alias->tcp_in[s->alias_port - ALIAS_BEGIN] = s2; 430 } else { 431 alias->udp_in[s->alias_port - ALIAS_BEGIN] = s2; 432 } 433 } 434 } 435 return IP_FW_NAT; 436 oops: 437 IPFW3_DEBUG1("oops\n"); 438 return IP_FW_DENY; 439 } 440 441 void 442 pick_alias_port(struct nat_state *s, struct state_tree *tree) 443 { 444 do { 445 s->alias_port = htons(krandom() % ALIAS_RANGE + ALIAS_BEGIN); 446 } while (RB_FIND(state_tree, tree, s) != NULL); 447 } 448 449 int 450 ip_fw3_nat_state_cmp(struct nat_state *s1, struct nat_state *s2) 451 { 452 if (s1->src_addr > s2->src_addr) 453 return 1; 454 if (s1->src_addr < s2->src_addr) 455 return -1; 456 457 if (s1->dst_addr > s2->dst_addr) 458 return 1; 459 if (s1->dst_addr < s2->dst_addr) 460 return -1; 461 462 if (s1->src_port > s2->src_port) 463 return 1; 464 if (s1->src_port < s2->src_port) 465 return -1; 466 467 if (s1->dst_port > s2->dst_port) 468 return 1; 469 if (s1->dst_port < s2->dst_port) 470 return -1; 471 472 return 0; 473 } 474 475 int 476 ip_fw3_ctl_nat_get_cfg(struct sockopt *sopt) 477 { 478 struct ip_fw3_nat_context *nat_ctx; 479 struct ioc_nat *ioc; 480 struct cfg_nat *nat; 481 struct cfg_alias *alias; 482 struct in_addr *ip; 483 size_t valsize; 484 int i, len; 485 486 len = 0; 487 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 488 valsize = sopt->sopt_valsize; 489 ioc = (struct ioc_nat *)sopt->sopt_val; 490 491 for (i = 0; i < NAT_ID_MAX; i++) { 492 nat = nat_ctx->nats[i]; 493 if (nat != NULL) { 494 len += LEN_IOC_NAT; 495 if (len >= valsize) { 496 goto nospace; 497 } 498 ioc->id = nat->id; 499 ioc->count = nat->count; 500 ip = &ioc->ip; 501 LIST_FOREACH(alias, &nat->alias, next) { 502 len += LEN_IN_ADDR; 503 if (len > valsize) { 504 goto nospace; 505 } 506 bcopy(&alias->ip, ip, LEN_IN_ADDR); 507 ip++; 508 } 509 } 510 } 511 sopt->sopt_valsize = len; 512 return 0; 513 nospace: 514 bzero(sopt->sopt_val, sopt->sopt_valsize); 515 sopt->sopt_valsize = 0; 516 return 0; 517 } 518 519 int 520 ip_fw3_ctl_nat_get_record(struct sockopt *sopt) 521 { 522 struct ip_fw3_nat_context *nat_ctx; 523 struct cfg_nat *the; 524 size_t sopt_size, total_len = 0; 525 struct ioc_nat_state *ioc; 526 int ioc_nat_id, i, n, cpu; 527 struct nat_state *s; 528 struct nat_state2 *s2; 529 struct cfg_alias *a1; 530 531 ioc_nat_id = *((int *)(sopt->sopt_val)); 532 sopt_size = sopt->sopt_valsize; 533 ioc = (struct ioc_nat_state *)sopt->sopt_val; 534 /* icmp states only in CPU 0 */ 535 cpu = 0; 536 nat_ctx = ip_fw3_nat_ctx[cpu]; 537 for (n = 0; n < NAT_ID_MAX; n++) { 538 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) { 539 if (nat_ctx->nats[n] == NULL) 540 break; 541 the = nat_ctx->nats[n]; 542 RB_FOREACH(s, state_tree, &the->rb_icmp_out) { 543 total_len += LEN_IOC_NAT_STATE; 544 if (total_len > sopt_size) 545 goto nospace; 546 ioc->src_addr.s_addr = ntohl(s->src_addr); 547 ioc->dst_addr.s_addr = s->dst_addr; 548 ioc->alias_addr.s_addr = s->alias_addr; 549 ioc->src_port = s->src_port; 550 ioc->dst_port = s->dst_port; 551 ioc->alias_port = s->alias_port; 552 ioc->nat_id = n + 1; 553 ioc->cpu_id = cpu; 554 ioc->proto = IPPROTO_ICMP; 555 ioc->direction = 1; 556 ioc->life = s->timestamp + 557 sysctl_var_icmp_timeout - time_uptime; 558 ioc++; 559 } 560 561 LIST_FOREACH(a1, &the->alias, next) { 562 for (i = 0; i < ALIAS_RANGE; i++) { 563 s2 = a1->icmp_in[i]; 564 if (s2 == NULL) { 565 continue; 566 } 567 568 total_len += LEN_IOC_NAT_STATE; 569 if (total_len > sopt_size) 570 goto nospace; 571 572 ioc->src_addr.s_addr = ntohl(s2->src_addr); 573 ioc->dst_addr.s_addr = s2->dst_addr; 574 ioc->alias_addr.s_addr = s2->alias_addr; 575 ioc->src_port = s2->src_port; 576 ioc->dst_port = s2->dst_port; 577 ioc->alias_port = s2->alias_port; 578 ioc->nat_id = n + 1; 579 ioc->cpu_id = cpu; 580 ioc->proto = IPPROTO_ICMP; 581 ioc->direction = 0; 582 ioc->life = s2->timestamp + 583 sysctl_var_icmp_timeout - time_uptime; 584 ioc++; 585 } 586 } 587 } 588 } 589 590 /* tcp states */ 591 for (cpu = 0; cpu < ncpus; cpu++) { 592 nat_ctx = ip_fw3_nat_ctx[cpu]; 593 for (n = 0; n < NAT_ID_MAX; n++) { 594 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) { 595 if (nat_ctx->nats[n] == NULL) 596 break; 597 the = nat_ctx->nats[n]; 598 RB_FOREACH(s, state_tree, &the->rb_tcp_out) { 599 total_len += LEN_IOC_NAT_STATE; 600 if (total_len > sopt_size) 601 goto nospace; 602 ioc->src_addr.s_addr = ntohl(s->src_addr); 603 ioc->dst_addr.s_addr = ntohl(s->dst_addr); 604 ioc->alias_addr.s_addr = s->alias_addr; 605 ioc->src_port = ntohs(s->src_port); 606 ioc->dst_port = ntohs(s->dst_port); 607 ioc->alias_port = s->alias_port; 608 ioc->nat_id = n + 1; 609 ioc->cpu_id = cpu; 610 ioc->proto = IPPROTO_TCP; 611 ioc->direction = 1; 612 ioc->life = s->timestamp + 613 sysctl_var_tcp_timeout - time_uptime; 614 ioc++; 615 } 616 LIST_FOREACH(a1, &the->alias, next) { 617 for (i = 0; i < ALIAS_RANGE; i++) { 618 s2 = a1->tcp_in[i]; 619 if (s2 == NULL) { 620 continue; 621 } 622 623 total_len += LEN_IOC_NAT_STATE; 624 if (total_len > sopt_size) 625 goto nospace; 626 627 ioc->src_addr.s_addr = ntohl(s2->src_addr); 628 ioc->dst_addr.s_addr = s2->dst_addr; 629 ioc->alias_addr.s_addr = s2->alias_addr; 630 ioc->src_port = s2->src_port; 631 ioc->dst_port = s2->dst_port; 632 ioc->alias_port = s2->alias_port; 633 ioc->nat_id = n + 1; 634 ioc->cpu_id = cpu; 635 ioc->proto = IPPROTO_TCP; 636 ioc->direction = 0; 637 ioc->life = s2->timestamp + 638 sysctl_var_icmp_timeout - time_uptime; 639 ioc++; 640 } 641 } 642 } 643 } 644 } 645 646 /* udp states */ 647 for (cpu = 0; cpu < ncpus; cpu++) { 648 nat_ctx = ip_fw3_nat_ctx[cpu]; 649 for (n = 0; n < NAT_ID_MAX; n++) { 650 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) { 651 if (nat_ctx->nats[n] == NULL) 652 break; 653 the = nat_ctx->nats[n]; 654 RB_FOREACH(s, state_tree, &the->rb_udp_out) { 655 total_len += LEN_IOC_NAT_STATE; 656 if (total_len > sopt_size) 657 goto nospace; 658 ioc->src_addr.s_addr = ntohl(s->src_addr); 659 ioc->dst_addr.s_addr = s->dst_addr; 660 ioc->alias_addr.s_addr = s->alias_addr; 661 ioc->src_port = s->src_port; 662 ioc->dst_port = s->dst_port; 663 ioc->alias_port = s->alias_port; 664 ioc->nat_id = n + 1; 665 ioc->cpu_id = cpu; 666 ioc->proto = IPPROTO_UDP; 667 ioc->direction = 1; 668 ioc->life = s->timestamp + 669 sysctl_var_udp_timeout - time_uptime; 670 ioc++; 671 } 672 LIST_FOREACH(a1, &the->alias, next) { 673 for (i = 0; i < ALIAS_RANGE; i++) { 674 s2 = a1->udp_in[i]; 675 if (s2 == NULL) { 676 continue; 677 } 678 679 total_len += LEN_IOC_NAT_STATE; 680 if (total_len > sopt_size) 681 goto nospace; 682 683 ioc->src_addr.s_addr = ntohl(s2->src_addr); 684 ioc->dst_addr.s_addr = s2->dst_addr; 685 ioc->alias_addr.s_addr = s2->alias_addr; 686 ioc->src_port = s2->src_port; 687 ioc->dst_port = s2->dst_port; 688 ioc->alias_port = s2->alias_port; 689 ioc->nat_id = n + 1; 690 ioc->cpu_id = cpu; 691 ioc->proto = IPPROTO_UDP; 692 ioc->direction = 0; 693 ioc->life = s2->timestamp + 694 sysctl_var_icmp_timeout - time_uptime; 695 ioc++; 696 } 697 } 698 } 699 } 700 } 701 sopt->sopt_valsize = total_len; 702 return 0; 703 nospace: 704 return 0; 705 } 706 707 void 708 nat_state_add_dispatch(netmsg_t add_msg) 709 { 710 struct ip_fw3_nat_context *nat_ctx; 711 struct netmsg_nat_state_add *msg; 712 struct cfg_nat *nat; 713 struct nat_state2 *s2; 714 struct cfg_alias *alias; 715 716 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 717 msg = (struct netmsg_nat_state_add *)add_msg; 718 nat = nat_ctx->nats[msg->nat_id - 1]; 719 720 LIST_FOREACH(alias, &nat->alias, next) { 721 if (alias->ip.s_addr == msg->alias_addr.s_addr) { 722 break; 723 } 724 } 725 s2 = msg->state; 726 if (msg->proto == IPPROTO_TCP) { 727 alias->tcp_in[msg->alias_port - ALIAS_BEGIN] = s2; 728 } else { 729 alias->udp_in[msg->alias_port - ALIAS_BEGIN] = s2; 730 } 731 } 732 733 /* 734 * Init the RB trees only when the NAT is configured. 735 */ 736 void 737 nat_add_dispatch(netmsg_t nat_add_msg) 738 { 739 struct ip_fw3_nat_context *nat_ctx; 740 struct netmsg_nat_add *msg; 741 struct ioc_nat *ioc; 742 struct cfg_nat *nat; 743 struct cfg_alias *alias; 744 struct in_addr *ip; 745 int n; 746 747 msg = (struct netmsg_nat_add *)nat_add_msg; 748 ioc = &msg->ioc_nat; 749 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 750 751 if (nat_ctx->nats[ioc->id - 1] == NULL) { 752 /* op = set, and nat not exists */ 753 nat = kmalloc(LEN_CFG_NAT, M_IPFW3_NAT, M_WAITOK | M_ZERO); 754 LIST_INIT(&nat->alias); 755 RB_INIT(&nat->rb_tcp_out); 756 RB_INIT(&nat->rb_udp_out); 757 if (mycpuid == 0) { 758 RB_INIT(&nat->rb_icmp_out); 759 } 760 nat->id = ioc->id; 761 nat->count = ioc->count; 762 ip = &ioc->ip; 763 for (n = 0; n < ioc->count; n++) { 764 alias = kmalloc(LEN_CFG_ALIAS, 765 M_IPFW3_NAT, M_WAITOK | M_ZERO); 766 memcpy(&alias->ip, ip, LEN_IN_ADDR); 767 LIST_INSERT_HEAD((&nat->alias), alias, next); 768 ip++; 769 } 770 nat_ctx->nats[ioc->id - 1] = nat; 771 } 772 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 773 } 774 775 int 776 ip_fw3_ctl_nat_add(struct sockopt *sopt) 777 { 778 struct netmsg_nat_add nat_add_msg, *msg; 779 struct ioc_nat *ioc; 780 msg = &nat_add_msg; 781 782 ioc = (struct ioc_nat *)(sopt->sopt_val); 783 sooptcopyin(sopt, &msg->ioc_nat, sopt->sopt_valsize, 784 sizeof(struct ioc_nat)); 785 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 0, 786 nat_add_dispatch); 787 netisr_domsg(&msg->base, 0); 788 return 0; 789 } 790 791 void 792 nat_del_dispatch(netmsg_t nat_del_msg) 793 { 794 struct ip_fw3_nat_context *nat_ctx; 795 struct netmsg_nat_del *msg; 796 struct cfg_nat *nat; 797 struct nat_state *s, *tmp; 798 struct cfg_alias *alias, *tmp3; 799 800 msg = (struct netmsg_nat_del *)nat_del_msg; 801 802 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 803 nat = nat_ctx->nats[msg->id - 1]; 804 if (nat != NULL) { 805 /* the icmp states will only stored in cpu 0 */ 806 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_out, tmp) { 807 RB_REMOVE(state_tree, &nat->rb_icmp_out, s); 808 if (s != NULL) { 809 kfree(s, M_IPFW3_NAT); 810 } 811 } 812 /* 813 LIST_FOREACH_MUTABLE(s2, &nat->alias->icmp_in, next, tmp2) { 814 LIST_REMOVE(s2, next); 815 if (s != NULL) { 816 kfree(s, M_IPFW3_NAT); 817 } 818 } 819 */ 820 821 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_out, tmp) { 822 RB_REMOVE(state_tree, &nat->rb_tcp_out, s); 823 if (s != NULL) { 824 kfree(s, M_IPFW3_NAT); 825 } 826 } 827 /* 828 LIST_FOREACH_MUTABLE(s2, &nat->alias->tcp_in, next, tmp2) { 829 LIST_REMOVE(s2, next); 830 if (s != NULL) { 831 kfree(s, M_IPFW3_NAT); 832 } 833 } 834 */ 835 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_out, tmp) { 836 RB_REMOVE(state_tree, &nat->rb_udp_out, s); 837 if (s != NULL) { 838 kfree(s, M_IPFW3_NAT); 839 } 840 } 841 /* 842 LIST_FOREACH_MUTABLE(s2, &nat->alias->udp_in, next, tmp2) { 843 LIST_REMOVE(s2, next); 844 if (s != NULL) { 845 kfree(s, M_IPFW3_NAT); 846 } 847 } 848 */ 849 LIST_FOREACH_MUTABLE(alias, &nat->alias, next, tmp3) { 850 kfree(alias, M_IPFW3_NAT); 851 } 852 kfree(nat, M_IPFW3_NAT); 853 nat_ctx->nats[msg->id - 1] = NULL; 854 } 855 netisr_forwardmsg_all(&nat_del_msg->base, mycpuid + 1); 856 } 857 int 858 ip_fw3_ctl_nat_del(struct sockopt *sopt) 859 { 860 struct netmsg_nat_del nat_del_msg, *msg; 861 862 msg = &nat_del_msg; 863 msg->id = *((int *)sopt->sopt_val); 864 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 865 0, nat_del_dispatch); 866 867 netisr_domsg(&msg->base, 0); 868 return 0; 869 } 870 int 871 ip_fw3_ctl_nat_flush(struct sockopt *sopt) 872 { 873 struct netmsg_nat_del nat_del_msg, *msg; 874 int i; 875 msg = &nat_del_msg; 876 for (i = 0; i < NAT_ID_MAX; i++) { 877 msg->id = i + 1; 878 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 879 0, nat_del_dispatch); 880 881 netisr_domsg(&msg->base, 0); 882 } 883 return 0; 884 } 885 886 int 887 ip_fw3_ctl_nat_sockopt(struct sockopt *sopt) 888 { 889 int error = 0; 890 switch (sopt->sopt_name) { 891 case IP_FW_NAT_ADD: 892 error = ip_fw3_ctl_nat_add(sopt); 893 break; 894 case IP_FW_NAT_DEL: 895 error = ip_fw3_ctl_nat_del(sopt); 896 break; 897 case IP_FW_NAT_FLUSH: 898 error = ip_fw3_ctl_nat_flush(sopt); 899 break; 900 case IP_FW_NAT_GET: 901 error = ip_fw3_ctl_nat_get_cfg(sopt); 902 break; 903 case IP_FW_NAT_GET_RECORD: 904 error = ip_fw3_ctl_nat_get_record(sopt); 905 break; 906 default: 907 kprintf("ipfw3 nat invalid socket option %d\n", 908 sopt->sopt_name); 909 } 910 return error; 911 } 912 913 void 914 nat_init_ctx_dispatch(netmsg_t msg) 915 { 916 struct ip_fw3_nat_context *tmp; 917 tmp = kmalloc(sizeof(struct ip_fw3_nat_context), 918 M_IPFW3_NAT, M_WAITOK | M_ZERO); 919 920 ip_fw3_nat_ctx[mycpuid] = tmp; 921 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 922 } 923 924 void 925 nat_fnit_ctx_dispatch(netmsg_t msg) 926 { 927 kfree(ip_fw3_nat_ctx[mycpuid], M_IPFW3_NAT); 928 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 929 } 930 931 static void 932 nat_cleanup_func_dispatch(netmsg_t nmsg) 933 { 934 struct nat_state *s, *tmp; 935 struct ip_fw3_nat_context *nat_ctx; 936 struct cfg_nat *nat; 937 struct cfg_alias *a1, *tmp2; 938 struct nat_state2 *s2; 939 int i, j; 940 941 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 942 for (j = 0; j < NAT_ID_MAX; j++) { 943 nat = nat_ctx->nats[j]; 944 if (nat == NULL) 945 continue; 946 /* check the nat_states, remove the expired state */ 947 /* the icmp states will only stored in cpu 0 */ 948 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_out, tmp) { 949 if (time_uptime - s->timestamp > sysctl_var_icmp_timeout) { 950 RB_REMOVE(state_tree, &nat->rb_icmp_out, s); 951 kfree(s, M_IPFW3_NAT); 952 } 953 } 954 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) { 955 for (i = 0; i < ALIAS_RANGE; i++) { 956 s2 = a1->icmp_in[i]; 957 if (s2 != NULL) { 958 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) { 959 a1->icmp_in[i] = NULL; 960 kfree(s2, M_IPFW3_NAT); 961 } 962 } 963 964 } 965 } 966 967 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_out, tmp) { 968 if (time_uptime - s->timestamp > sysctl_var_tcp_timeout) { 969 RB_REMOVE(state_tree, &nat->rb_tcp_out, s); 970 kfree(s, M_IPFW3_NAT); 971 } 972 } 973 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) { 974 for (i = 0; i < ALIAS_RANGE; i++) { 975 s2 = a1->tcp_in[i]; 976 if (s2 != NULL) { 977 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) { 978 a1->tcp_in[i] = NULL; 979 kfree(s2, M_IPFW3_NAT); 980 } 981 } 982 983 } 984 } 985 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_out, tmp) { 986 if (time_uptime - s->timestamp > sysctl_var_udp_timeout) { 987 RB_REMOVE(state_tree, &nat->rb_udp_out, s); 988 kfree(s, M_IPFW3_NAT); 989 } 990 } 991 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) { 992 for (i = 0; i < ALIAS_RANGE; i++) { 993 s2 = a1->udp_in[i]; 994 if (s2 != NULL) { 995 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) { 996 a1->udp_in[i] = NULL; 997 kfree(s2, M_IPFW3_NAT); 998 } 999 } 1000 1001 } 1002 } 1003 } 1004 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); 1005 } 1006 1007 static void 1008 ip_fw3_nat_cleanup_func(void *dummy __unused) 1009 { 1010 struct netmsg_base msg; 1011 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, 1012 nat_cleanup_func_dispatch); 1013 netisr_domsg(&msg, 0); 1014 1015 callout_reset(&ip_fw3_nat_cleanup_callout, 1016 sysctl_var_cleanup_interval * hz, 1017 ip_fw3_nat_cleanup_func, NULL); 1018 } 1019 1020 static 1021 int ip_fw3_nat_init(void) 1022 { 1023 struct netmsg_base msg; 1024 ip_fw3_register_module(MODULE_NAT_ID, MODULE_NAT_NAME); 1025 ip_fw3_register_filter_funcs(MODULE_NAT_ID, O_NAT_NAT, 1026 (filter_func)check_nat); 1027 ip_fw3_ctl_nat_ptr = ip_fw3_ctl_nat_sockopt; 1028 netmsg_init(&msg, NULL, &curthread->td_msgport, 1029 0, nat_init_ctx_dispatch); 1030 netisr_domsg(&msg, 0); 1031 1032 callout_init_mp(&ip_fw3_nat_cleanup_callout); 1033 callout_reset(&ip_fw3_nat_cleanup_callout, 1034 sysctl_var_cleanup_interval * hz, 1035 ip_fw3_nat_cleanup_func, 1036 NULL); 1037 return 0; 1038 } 1039 1040 static int 1041 ip_fw3_nat_fini(void) 1042 { 1043 struct netmsg_base msg; 1044 struct netmsg_nat_del nat_del_msg, *msg1; 1045 int i; 1046 1047 callout_stop(&ip_fw3_nat_cleanup_callout); 1048 1049 msg1 = &nat_del_msg; 1050 for (i = 0; i < NAT_ID_MAX; i++) { 1051 msg1->id = i + 1; 1052 netmsg_init(&msg1->base, NULL, &curthread->td_msgport, 1053 0, nat_del_dispatch); 1054 1055 netisr_domsg(&msg1->base, 0); 1056 } 1057 1058 netmsg_init(&msg, NULL, &curthread->td_msgport, 1059 0, nat_fnit_ctx_dispatch); 1060 netisr_domsg(&msg, 0); 1061 1062 return ip_fw3_unregister_module(MODULE_NAT_ID); 1063 } 1064 1065 static int 1066 ip_fw3_nat_modevent(module_t mod, int type, void *data) 1067 { 1068 switch (type) { 1069 case MOD_LOAD: 1070 return ip_fw3_nat_init(); 1071 case MOD_UNLOAD: 1072 return ip_fw3_nat_fini(); 1073 default: 1074 break; 1075 } 1076 return 0; 1077 } 1078 1079 moduledata_t ip_fw3_nat_mod = { 1080 "ipfw3_nat", 1081 ip_fw3_nat_modevent, 1082 NULL 1083 }; 1084 1085 DECLARE_MODULE(ipfw3_nat, ip_fw3_nat_mod, 1086 SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 1087 MODULE_DEPEND(ipfw3_nat, ipfw3_basic, 1, 1, 1); 1088 MODULE_VERSION(ipfw3_nat, 1); 1089