1 /* 2 * Copyright (c) 2014 - 2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Bill Yuan <bycn82@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "opt_ipfw.h" 36 #include "opt_inet.h" 37 #ifndef INET 38 #error IPFIREWALL3 requires INET. 39 #endif /* INET */ 40 41 #include <sys/param.h> 42 #include <sys/kernel.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/systimer.h> 48 #include <sys/in_cksum.h> 49 #include <sys/systm.h> 50 #include <sys/proc.h> 51 #include <sys/socket.h> 52 #include <sys/syslog.h> 53 #include <sys/ucred.h> 54 #include <sys/lock.h> 55 56 #include <net/ethernet.h> 57 #include <net/netmsg2.h> 58 #include <net/netisr2.h> 59 #include <net/route.h> 60 #include <net/if.h> 61 62 #include <netinet/in.h> 63 #include <netinet/ip.h> 64 #include <netinet/ip_icmp.h> 65 #include <netinet/tcp.h> 66 #include <netinet/tcp_timer.h> 67 #include <netinet/tcp_var.h> 68 #include <netinet/tcpip.h> 69 #include <netinet/udp.h> 70 #include <netinet/udp_var.h> 71 #include <netinet/in_systm.h> 72 #include <netinet/in_var.h> 73 #include <netinet/in_pcb.h> 74 #include <netinet/ip_var.h> 75 #include <netinet/ip_divert.h> 76 #include <net/ipfw3/ip_fw.h> 77 78 #include "ip_fw3_nat.h" 79 80 MALLOC_DEFINE(M_IPFW3_NAT, "IP_FW3_NAT", "ipfw3_nat module"); 81 82 /* 83 * Highspeed Lockless Kernel NAT 84 * 85 * Kernel NAT 86 * The network address translation (NAT) will replace the `src` of the packet 87 * with an `alias` (alias_addr & alias_port). Accordingt to the configuration, 88 * The alias will be randomly picked from the configured range. 89 * 90 * Highspeed 91 * The first outgoing packet should trigger the creation of the `net_state`, 92 * and the `net_state` will keep in a RB-Tree for the subsequent outgoing 93 * packets. 94 * The first returning packet will trigger the creation of the `net_state2`, 95 * which will be stored in a multidimensional array of points ( of net_state2 ). 96 * 97 * Lockless 98 * The `net_state` for outgoing packet will be stored in the nat_context of 99 * current CPU. But due to the nature of the NAT, the returning packet may be 100 * handled by another CPU. Hence, The `net_state2` for the returning packet 101 * will be prepared and stored into the nat_context of the right CPU. 102 */ 103 104 struct ip_fw3_nat_context *ip_fw3_nat_ctx[MAXCPU]; 105 static struct callout ip_fw3_nat_cleanup_callout; 106 extern struct ipfw3_context *fw3_ctx[MAXCPU]; 107 extern ip_fw_ctl_t *ip_fw3_ctl_nat_ptr; 108 109 static int sysctl_var_cleanup_interval = 1; 110 static int sysctl_var_icmp_timeout = 10; 111 static int sysctl_var_tcp_timeout = 60; 112 static int sysctl_var_udp_timeout = 30; 113 114 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw3_nat, CTLFLAG_RW, 0, "ipfw3 NAT"); 115 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, cleanup_interval, CTLFLAG_RW, 116 &sysctl_var_cleanup_interval, 0, "default life time"); 117 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, icmp_timeout, CTLFLAG_RW, 118 &sysctl_var_icmp_timeout, 0, "default icmp state life time"); 119 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, tcp_timeout, CTLFLAG_RW, 120 &sysctl_var_tcp_timeout, 0, "default tcp state life time"); 121 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, udp_timeout, CTLFLAG_RW, 122 &sysctl_var_udp_timeout, 0, "default udp state life time"); 123 124 RB_PROTOTYPE(state_tree, nat_state, entries, ip_fw3_nat_state_cmp); 125 RB_GENERATE(state_tree, nat_state, entries, ip_fw3_nat_state_cmp); 126 127 static __inline uint16_t 128 fix_cksum(uint16_t cksum, uint16_t old_info, uint16_t new_info, uint8_t is_udp) 129 { 130 uint32_t tmp; 131 132 if (is_udp && !cksum) 133 return (0x0000); 134 tmp = cksum + old_info - new_info; 135 tmp = (tmp >> 16) + (tmp & 65535); 136 tmp = tmp & 65535; 137 if (is_udp && !tmp) 138 return (0xFFFF); 139 return tmp; 140 } 141 142 void 143 check_nat(int *cmd_ctl, int *cmd_val, struct ip_fw_args **args, 144 struct ip_fw **f, ipfw_insn *cmd, uint16_t ip_len) 145 { 146 if ((*args)->eh != NULL) { 147 *cmd_ctl = IP_FW_CTL_NO; 148 *cmd_val = IP_FW_NOT_MATCH; 149 return; 150 } 151 152 struct ip_fw3_nat_context *nat_ctx; 153 struct cfg_nat *nat; 154 int nat_id; 155 156 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 157 (*args)->rule = *f; 158 nat = ((ipfw_insn_nat *)cmd)->nat; 159 if (nat == NULL) { 160 nat_id = cmd->arg1; 161 nat = nat_ctx->nats[nat_id - 1]; 162 if (nat == NULL) { 163 *cmd_val = IP_FW_DENY; 164 *cmd_ctl = IP_FW_CTL_DONE; 165 return; 166 } 167 ((ipfw_insn_nat *)cmd)->nat = nat; 168 } 169 *cmd_val = ip_fw3_nat(*args, nat, (*args)->m); 170 *cmd_ctl = IP_FW_CTL_NAT; 171 } 172 173 int 174 ip_fw3_nat(struct ip_fw_args *args, struct cfg_nat *nat, struct mbuf *m) 175 { 176 struct state_tree *tree_out = NULL; 177 struct nat_state *s = NULL, *dup, *k, key; 178 struct nat_state2 *s2 = NULL; 179 struct ip *ip = mtod(m, struct ip *); 180 struct in_addr *old_addr = NULL, new_addr; 181 uint16_t *old_port = NULL, new_port; 182 uint16_t *csum = NULL, dlen = 0; 183 uint8_t udp = 0; 184 boolean_t pseudo = FALSE, need_return_state = FALSE; 185 struct cfg_alias *alias; 186 int i = 0, rand_n = 0; 187 188 k = &key; 189 memset(k, 0, LEN_NAT_STATE); 190 if (args->oif == NULL) { 191 old_addr = &ip->ip_dst; 192 k->dst_addr = ntohl(args->f_id.dst_ip); 193 LIST_FOREACH(alias, &nat->alias, next) { 194 if (alias->ip.s_addr == ntohl(args->f_id.dst_ip)) { 195 break; 196 } 197 } 198 if (alias == NULL) { 199 goto oops; 200 } 201 switch (ip->ip_p) { 202 case IPPROTO_TCP: 203 old_port = &L3HDR(struct tcphdr, ip)->th_dport; 204 s2 = alias->tcp_in[*old_port - ALIAS_BEGIN]; 205 csum = &L3HDR(struct tcphdr, ip)->th_sum; 206 break; 207 case IPPROTO_UDP: 208 old_port = &L3HDR(struct udphdr, ip)->uh_dport; 209 s2 = alias->udp_in[*old_port - ALIAS_BEGIN]; 210 csum = &L3HDR(struct udphdr, ip)->uh_sum; 211 udp = 1; 212 break; 213 case IPPROTO_ICMP: 214 old_port = &L3HDR(struct icmp, ip)->icmp_id; 215 s2 = alias->icmp_in[*old_port]; 216 csum = &L3HDR(struct icmp, ip)->icmp_cksum; 217 break; 218 default: 219 panic("ipfw3: unsupported proto %u", ip->ip_p); 220 } 221 if (s2 == NULL) { 222 goto oops; 223 } 224 } else { 225 old_addr = &ip->ip_src; 226 k->src_addr = args->f_id.src_ip; 227 k->dst_addr = args->f_id.dst_ip; 228 switch (ip->ip_p) { 229 case IPPROTO_TCP: 230 k->src_port = args->f_id.src_port; 231 k->dst_port = args->f_id.dst_port; 232 m->m_pkthdr.csum_flags = CSUM_TCP; 233 tree_out = &nat->rb_tcp_out; 234 old_port = &L3HDR(struct tcphdr, ip)->th_sport; 235 csum = &L3HDR(struct tcphdr, ip)->th_sum; 236 break; 237 case IPPROTO_UDP: 238 k->src_port = args->f_id.src_port; 239 k->dst_port = args->f_id.dst_port; 240 m->m_pkthdr.csum_flags = CSUM_UDP; 241 tree_out = &nat->rb_udp_out; 242 old_port = &L3HDR(struct udphdr, ip)->uh_sport; 243 csum = &L3HDR(struct udphdr, ip)->uh_sum; 244 udp = 1; 245 break; 246 case IPPROTO_ICMP: 247 k->src_port = L3HDR(struct icmp, ip)->icmp_id; 248 k->dst_port = k->src_port; 249 tree_out = &nat->rb_icmp_out; 250 old_port = &L3HDR(struct icmp, ip)->icmp_id; 251 csum = &L3HDR(struct icmp, ip)->icmp_cksum; 252 break; 253 default: 254 panic("ipfw3: unsupported proto %u", ip->ip_p); 255 } 256 s = RB_FIND(state_tree, tree_out, k); 257 if (s == NULL) { 258 /* pick an alias ip randomly when there are multiple */ 259 if (nat->count > 1) { 260 rand_n = krandom() % nat->count; 261 } 262 LIST_FOREACH(alias, &nat->alias, next) { 263 if (i++ == rand_n) { 264 break; 265 } 266 } 267 switch (ip->ip_p) { 268 case IPPROTO_TCP: 269 m->m_pkthdr.csum_flags = CSUM_TCP; 270 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT, 271 M_INTWAIT | M_NULLOK | M_ZERO); 272 273 s->src_addr = args->f_id.src_ip; 274 s->src_port = args->f_id.src_port; 275 276 s->dst_addr = args->f_id.dst_ip; 277 s->dst_port = args->f_id.dst_port; 278 279 s->alias_addr = alias->ip.s_addr; 280 pick_alias_port(s, tree_out); 281 dup = RB_INSERT(state_tree, tree_out, s); 282 need_return_state = TRUE; 283 break; 284 case IPPROTO_UDP: 285 m->m_pkthdr.csum_flags = CSUM_UDP; 286 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT, 287 M_INTWAIT | M_NULLOK | M_ZERO); 288 289 s->src_addr = args->f_id.src_ip; 290 s->src_port = args->f_id.src_port; 291 292 s->dst_addr = args->f_id.dst_ip; 293 s->dst_port = args->f_id.dst_port; 294 295 s->alias_addr = alias->ip.s_addr; 296 pick_alias_port(s, tree_out); 297 dup = RB_INSERT(state_tree, tree_out, s); 298 need_return_state = TRUE; 299 break; 300 case IPPROTO_ICMP: 301 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT, 302 M_INTWAIT | M_NULLOK | M_ZERO); 303 s->src_addr = args->f_id.src_ip; 304 s->dst_addr = args->f_id.dst_ip; 305 306 s->src_port = *old_port; 307 s->dst_port = *old_port; 308 309 s->alias_addr = alias->ip.s_addr; 310 s->alias_port = htons(s->src_addr * 311 s->dst_addr % ALIAS_RANGE); 312 dup = RB_INSERT(state_tree, tree_out, s); 313 314 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT, 315 M_INTWAIT | M_NULLOK | M_ZERO); 316 317 s2->src_addr = args->f_id.dst_ip; 318 s2->dst_addr = alias->ip.s_addr; 319 320 s2->src_port = s->alias_port; 321 s2->dst_port = s->alias_port; 322 323 s2->alias_addr = htonl(args->f_id.src_ip); 324 s2->alias_port = *old_port; 325 326 alias->icmp_in[s->alias_port] = s2; 327 break; 328 default : 329 goto oops; 330 } 331 } 332 } 333 if (args->oif == NULL) { 334 if (ip->ip_p == IPPROTO_ICMP) { 335 new_addr.s_addr = s2->alias_addr; 336 new_port = s2->alias_port; 337 } else { 338 new_addr.s_addr = s2->src_addr; 339 new_port = s2->src_port; 340 } 341 s2->timestamp = time_uptime; 342 } else { 343 new_addr.s_addr = s->alias_addr; 344 new_port = s->alias_port; 345 s->timestamp = time_uptime; 346 } 347 348 /* replace src/dst and fix the checksum */ 349 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) { 350 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 351 dlen = ntohs(ip->ip_len) - (ip->ip_hl << 2); 352 } 353 pseudo = TRUE; 354 } 355 if (!pseudo) { 356 const uint16_t *oaddr, *naddr; 357 oaddr = (const uint16_t *)&old_addr->s_addr; 358 naddr = (const uint16_t *)&new_addr.s_addr; 359 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[0], naddr[0], 0); 360 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[1], naddr[1], 0); 361 if (ip->ip_p != IPPROTO_ICMP) { 362 *csum = fix_cksum(*csum, oaddr[0], naddr[0], udp); 363 *csum = fix_cksum(*csum, oaddr[1], naddr[1], udp); 364 } 365 } 366 old_addr->s_addr = new_addr.s_addr; 367 if (!pseudo) { 368 *csum = fix_cksum(*csum, *old_port, new_port, udp); 369 } 370 *old_port = new_port; 371 372 if (pseudo) { 373 *csum = in_pseudo(ip->ip_src.s_addr, 374 ip->ip_dst.s_addr, htons(dlen + ip->ip_p)); 375 } 376 377 /* prepare the state for return traffic */ 378 if (need_return_state) { 379 m->m_flags &= ~M_HASH; 380 ip_hashfn(&m, 0); 381 382 int nextcpu = netisr_hashcpu(m->m_pkthdr.hash); 383 if (nextcpu != mycpuid) { 384 struct netmsg_nat_state_add *msg; 385 msg = kmalloc(LEN_NMSG_NAT_STATE_ADD, 386 M_LWKTMSG, M_NOWAIT | M_ZERO); 387 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 388 0, nat_state_add_dispatch); 389 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT, 390 M_INTWAIT | M_NULLOK | M_ZERO); 391 392 s2->src_addr = args->f_id.dst_ip; 393 s2->src_port = args->f_id.dst_port; 394 395 s2->dst_addr = alias->ip.s_addr; 396 s2->dst_port = s->alias_port; 397 398 s2->src_addr = htonl(args->f_id.src_ip); 399 s2->src_port = htons(args->f_id.src_port); 400 401 s2->timestamp = s->timestamp; 402 msg->alias_addr.s_addr = alias->ip.s_addr; 403 msg->alias_port = s->alias_port; 404 msg->state = s2; 405 msg->nat_id = nat->id; 406 msg->proto = ip->ip_p; 407 netisr_sendmsg(&msg->base, nextcpu); 408 } else { 409 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT, 410 M_INTWAIT | M_NULLOK | M_ZERO); 411 412 s2->src_addr = args->f_id.dst_ip; 413 s2->dst_addr = alias->ip.s_addr; 414 415 s2->src_port = s->alias_port; 416 s2->dst_port = s->alias_port; 417 418 s2->src_addr = htonl(args->f_id.src_ip); 419 s2->src_port = htons(args->f_id.src_port); 420 421 s2->timestamp = s->timestamp; 422 if (ip->ip_p == IPPROTO_TCP) { 423 alias->tcp_in[s->alias_port - ALIAS_BEGIN] = s2; 424 } else { 425 alias->udp_in[s->alias_port - ALIAS_BEGIN] = s2; 426 } 427 } 428 } 429 return IP_FW_NAT; 430 oops: 431 IPFW3_DEBUG1("oops\n"); 432 return IP_FW_DENY; 433 } 434 435 void 436 pick_alias_port(struct nat_state *s, struct state_tree *tree) 437 { 438 do { 439 s->alias_port = htons(krandom() % ALIAS_RANGE + ALIAS_BEGIN); 440 } while (RB_FIND(state_tree, tree, s) != NULL); 441 } 442 443 int 444 ip_fw3_nat_state_cmp(struct nat_state *s1, struct nat_state *s2) 445 { 446 if (s1->src_addr > s2->src_addr) 447 return 1; 448 if (s1->src_addr < s2->src_addr) 449 return -1; 450 451 if (s1->dst_addr > s2->dst_addr) 452 return 1; 453 if (s1->dst_addr < s2->dst_addr) 454 return -1; 455 456 if (s1->src_port > s2->src_port) 457 return 1; 458 if (s1->src_port < s2->src_port) 459 return -1; 460 461 if (s1->dst_port > s2->dst_port) 462 return 1; 463 if (s1->dst_port < s2->dst_port) 464 return -1; 465 466 return 0; 467 } 468 469 int 470 ip_fw3_ctl_nat_get_cfg(struct sockopt *sopt) 471 { 472 struct ip_fw3_nat_context *nat_ctx; 473 struct ioc_nat *ioc; 474 struct cfg_nat *nat; 475 struct cfg_alias *alias; 476 struct in_addr *ip; 477 size_t valsize; 478 int i, len; 479 480 len = 0; 481 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 482 valsize = sopt->sopt_valsize; 483 ioc = (struct ioc_nat *)sopt->sopt_val; 484 485 for (i = 0; i < NAT_ID_MAX; i++) { 486 nat = nat_ctx->nats[i]; 487 if (nat != NULL) { 488 len += LEN_IOC_NAT; 489 if (len >= valsize) { 490 goto nospace; 491 } 492 ioc->id = nat->id; 493 ioc->count = nat->count; 494 ip = &ioc->ip; 495 LIST_FOREACH(alias, &nat->alias, next) { 496 len += LEN_IN_ADDR; 497 if (len > valsize) { 498 goto nospace; 499 } 500 bcopy(&alias->ip, ip, LEN_IN_ADDR); 501 ip++; 502 } 503 } 504 } 505 sopt->sopt_valsize = len; 506 return 0; 507 nospace: 508 bzero(sopt->sopt_val, sopt->sopt_valsize); 509 sopt->sopt_valsize = 0; 510 return 0; 511 } 512 513 int 514 ip_fw3_ctl_nat_get_record(struct sockopt *sopt) 515 { 516 struct ip_fw3_nat_context *nat_ctx; 517 struct cfg_nat *the; 518 size_t sopt_size, total_len = 0; 519 struct ioc_nat_state *ioc; 520 int ioc_nat_id, i, n, cpu; 521 struct nat_state *s; 522 struct nat_state2 *s2; 523 struct cfg_alias *a1; 524 525 ioc_nat_id = *((int *)(sopt->sopt_val)); 526 sopt_size = sopt->sopt_valsize; 527 ioc = (struct ioc_nat_state *)sopt->sopt_val; 528 /* icmp states only in CPU 0 */ 529 cpu = 0; 530 nat_ctx = ip_fw3_nat_ctx[cpu]; 531 for (n = 0; n < NAT_ID_MAX; n++) { 532 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) { 533 if (nat_ctx->nats[n] == NULL) 534 break; 535 the = nat_ctx->nats[n]; 536 RB_FOREACH(s, state_tree, &the->rb_icmp_out) { 537 total_len += LEN_IOC_NAT_STATE; 538 if (total_len > sopt_size) 539 goto nospace; 540 ioc->src_addr.s_addr = ntohl(s->src_addr); 541 ioc->dst_addr.s_addr = s->dst_addr; 542 ioc->alias_addr.s_addr = s->alias_addr; 543 ioc->src_port = s->src_port; 544 ioc->dst_port = s->dst_port; 545 ioc->alias_port = s->alias_port; 546 ioc->nat_id = n + 1; 547 ioc->cpu_id = cpu; 548 ioc->proto = IPPROTO_ICMP; 549 ioc->direction = 1; 550 ioc->life = s->timestamp + 551 sysctl_var_icmp_timeout - time_uptime; 552 ioc++; 553 } 554 555 LIST_FOREACH(a1, &the->alias, next) { 556 for (i = 0; i < ALIAS_RANGE; i++) { 557 s2 = a1->icmp_in[i]; 558 if (s2 == NULL) { 559 continue; 560 } 561 562 total_len += LEN_IOC_NAT_STATE; 563 if (total_len > sopt_size) 564 goto nospace; 565 566 ioc->src_addr.s_addr = ntohl(s2->src_addr); 567 ioc->dst_addr.s_addr = s2->dst_addr; 568 ioc->alias_addr.s_addr = s2->alias_addr; 569 ioc->src_port = s2->src_port; 570 ioc->dst_port = s2->dst_port; 571 ioc->alias_port = s2->alias_port; 572 ioc->nat_id = n + 1; 573 ioc->cpu_id = cpu; 574 ioc->proto = IPPROTO_ICMP; 575 ioc->direction = 0; 576 ioc->life = s2->timestamp + 577 sysctl_var_icmp_timeout - time_uptime; 578 ioc++; 579 } 580 } 581 } 582 } 583 584 /* tcp states */ 585 for (cpu = 0; cpu < ncpus; cpu++) { 586 nat_ctx = ip_fw3_nat_ctx[cpu]; 587 for (n = 0; n < NAT_ID_MAX; n++) { 588 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) { 589 if (nat_ctx->nats[n] == NULL) 590 break; 591 the = nat_ctx->nats[n]; 592 RB_FOREACH(s, state_tree, &the->rb_tcp_out) { 593 total_len += LEN_IOC_NAT_STATE; 594 if (total_len > sopt_size) 595 goto nospace; 596 ioc->src_addr.s_addr = ntohl(s->src_addr); 597 ioc->dst_addr.s_addr = ntohl(s->dst_addr); 598 ioc->alias_addr.s_addr = s->alias_addr; 599 ioc->src_port = ntohs(s->src_port); 600 ioc->dst_port = ntohs(s->dst_port); 601 ioc->alias_port = s->alias_port; 602 ioc->nat_id = n + 1; 603 ioc->cpu_id = cpu; 604 ioc->proto = IPPROTO_TCP; 605 ioc->direction = 1; 606 ioc->life = s->timestamp + 607 sysctl_var_tcp_timeout - time_uptime; 608 ioc++; 609 } 610 LIST_FOREACH(a1, &the->alias, next) { 611 for (i = 0; i < ALIAS_RANGE; i++) { 612 s2 = a1->tcp_in[i]; 613 if (s2 == NULL) { 614 continue; 615 } 616 617 total_len += LEN_IOC_NAT_STATE; 618 if (total_len > sopt_size) 619 goto nospace; 620 621 ioc->src_addr.s_addr = ntohl(s2->src_addr); 622 ioc->dst_addr.s_addr = s2->dst_addr; 623 ioc->alias_addr.s_addr = s2->alias_addr; 624 ioc->src_port = s2->src_port; 625 ioc->dst_port = s2->dst_port; 626 ioc->alias_port = s2->alias_port; 627 ioc->nat_id = n + 1; 628 ioc->cpu_id = cpu; 629 ioc->proto = IPPROTO_TCP; 630 ioc->direction = 0; 631 ioc->life = s2->timestamp + 632 sysctl_var_icmp_timeout - time_uptime; 633 ioc++; 634 } 635 } 636 } 637 } 638 } 639 640 /* udp states */ 641 for (cpu = 0; cpu < ncpus; cpu++) { 642 nat_ctx = ip_fw3_nat_ctx[cpu]; 643 for (n = 0; n < NAT_ID_MAX; n++) { 644 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) { 645 if (nat_ctx->nats[n] == NULL) 646 break; 647 the = nat_ctx->nats[n]; 648 RB_FOREACH(s, state_tree, &the->rb_udp_out) { 649 total_len += LEN_IOC_NAT_STATE; 650 if (total_len > sopt_size) 651 goto nospace; 652 ioc->src_addr.s_addr = ntohl(s->src_addr); 653 ioc->dst_addr.s_addr = s->dst_addr; 654 ioc->alias_addr.s_addr = s->alias_addr; 655 ioc->src_port = s->src_port; 656 ioc->dst_port = s->dst_port; 657 ioc->alias_port = s->alias_port; 658 ioc->nat_id = n + 1; 659 ioc->cpu_id = cpu; 660 ioc->proto = IPPROTO_UDP; 661 ioc->direction = 1; 662 ioc->life = s->timestamp + 663 sysctl_var_udp_timeout - time_uptime; 664 ioc++; 665 } 666 LIST_FOREACH(a1, &the->alias, next) { 667 for (i = 0; i < ALIAS_RANGE; i++) { 668 s2 = a1->udp_in[i]; 669 if (s2 == NULL) { 670 continue; 671 } 672 673 total_len += LEN_IOC_NAT_STATE; 674 if (total_len > sopt_size) 675 goto nospace; 676 677 ioc->src_addr.s_addr = ntohl(s2->src_addr); 678 ioc->dst_addr.s_addr = s2->dst_addr; 679 ioc->alias_addr.s_addr = s2->alias_addr; 680 ioc->src_port = s2->src_port; 681 ioc->dst_port = s2->dst_port; 682 ioc->alias_port = s2->alias_port; 683 ioc->nat_id = n + 1; 684 ioc->cpu_id = cpu; 685 ioc->proto = IPPROTO_UDP; 686 ioc->direction = 0; 687 ioc->life = s2->timestamp + 688 sysctl_var_icmp_timeout - time_uptime; 689 ioc++; 690 } 691 } 692 } 693 } 694 } 695 sopt->sopt_valsize = total_len; 696 return 0; 697 nospace: 698 return 0; 699 } 700 701 void 702 nat_state_add_dispatch(netmsg_t add_msg) 703 { 704 struct ip_fw3_nat_context *nat_ctx; 705 struct netmsg_nat_state_add *msg; 706 struct cfg_nat *nat; 707 struct nat_state2 *s2; 708 struct cfg_alias *alias; 709 710 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 711 msg = (struct netmsg_nat_state_add *)add_msg; 712 nat = nat_ctx->nats[msg->nat_id - 1]; 713 714 LIST_FOREACH(alias, &nat->alias, next) { 715 if (alias->ip.s_addr == msg->alias_addr.s_addr) { 716 break; 717 } 718 } 719 s2 = msg->state; 720 if (msg->proto == IPPROTO_TCP) { 721 alias->tcp_in[msg->alias_port - ALIAS_BEGIN] = s2; 722 } else { 723 alias->udp_in[msg->alias_port - ALIAS_BEGIN] = s2; 724 } 725 } 726 727 /* 728 * Init the RB trees only when the NAT is configured. 729 */ 730 void 731 nat_add_dispatch(netmsg_t nat_add_msg) 732 { 733 struct ip_fw3_nat_context *nat_ctx; 734 struct netmsg_nat_add *msg; 735 struct ioc_nat *ioc; 736 struct cfg_nat *nat; 737 struct cfg_alias *alias; 738 struct in_addr *ip; 739 int n; 740 741 msg = (struct netmsg_nat_add *)nat_add_msg; 742 ioc = &msg->ioc_nat; 743 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 744 745 if (nat_ctx->nats[ioc->id - 1] == NULL) { 746 /* op = set, and nat not exists */ 747 nat = kmalloc(LEN_CFG_NAT, M_IPFW3_NAT, M_WAITOK | M_ZERO); 748 LIST_INIT(&nat->alias); 749 RB_INIT(&nat->rb_tcp_out); 750 RB_INIT(&nat->rb_udp_out); 751 if (mycpuid == 0) { 752 RB_INIT(&nat->rb_icmp_out); 753 } 754 nat->id = ioc->id; 755 nat->count = ioc->count; 756 ip = &ioc->ip; 757 for (n = 0; n < ioc->count; n++) { 758 alias = kmalloc(LEN_CFG_ALIAS, 759 M_IPFW3_NAT, M_WAITOK | M_ZERO); 760 memcpy(&alias->ip, ip, LEN_IN_ADDR); 761 LIST_INSERT_HEAD((&nat->alias), alias, next); 762 ip++; 763 } 764 nat_ctx->nats[ioc->id - 1] = nat; 765 } 766 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 767 } 768 769 int 770 ip_fw3_ctl_nat_add(struct sockopt *sopt) 771 { 772 struct netmsg_nat_add nat_add_msg, *msg; 773 struct ioc_nat *ioc; 774 msg = &nat_add_msg; 775 776 ioc = (struct ioc_nat *)(sopt->sopt_val); 777 sooptcopyin(sopt, &msg->ioc_nat, sopt->sopt_valsize, 778 sizeof(struct ioc_nat)); 779 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 0, 780 nat_add_dispatch); 781 netisr_domsg(&msg->base, 0); 782 return 0; 783 } 784 785 void 786 nat_del_dispatch(netmsg_t nat_del_msg) 787 { 788 struct ip_fw3_nat_context *nat_ctx; 789 struct netmsg_nat_del *msg; 790 struct cfg_nat *nat; 791 struct nat_state *s, *tmp; 792 struct cfg_alias *alias, *tmp3; 793 794 msg = (struct netmsg_nat_del *)nat_del_msg; 795 796 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 797 nat = nat_ctx->nats[msg->id - 1]; 798 if (nat != NULL) { 799 /* the icmp states will only stored in cpu 0 */ 800 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_out, tmp) { 801 RB_REMOVE(state_tree, &nat->rb_icmp_out, s); 802 if (s != NULL) { 803 kfree(s, M_IPFW3_NAT); 804 } 805 } 806 /* 807 LIST_FOREACH_MUTABLE(s2, &nat->alias->icmp_in, next, tmp2) { 808 LIST_REMOVE(s2, next); 809 if (s != NULL) { 810 kfree(s, M_IPFW3_NAT); 811 } 812 } 813 */ 814 815 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_out, tmp) { 816 RB_REMOVE(state_tree, &nat->rb_tcp_out, s); 817 if (s != NULL) { 818 kfree(s, M_IPFW3_NAT); 819 } 820 } 821 /* 822 LIST_FOREACH_MUTABLE(s2, &nat->alias->tcp_in, next, tmp2) { 823 LIST_REMOVE(s2, next); 824 if (s != NULL) { 825 kfree(s, M_IPFW3_NAT); 826 } 827 } 828 */ 829 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_out, tmp) { 830 RB_REMOVE(state_tree, &nat->rb_udp_out, s); 831 if (s != NULL) { 832 kfree(s, M_IPFW3_NAT); 833 } 834 } 835 /* 836 LIST_FOREACH_MUTABLE(s2, &nat->alias->udp_in, next, tmp2) { 837 LIST_REMOVE(s2, next); 838 if (s != NULL) { 839 kfree(s, M_IPFW3_NAT); 840 } 841 } 842 */ 843 LIST_FOREACH_MUTABLE(alias, &nat->alias, next, tmp3) { 844 kfree(alias, M_IPFW3_NAT); 845 } 846 kfree(nat, M_IPFW3_NAT); 847 nat_ctx->nats[msg->id - 1] = NULL; 848 } 849 netisr_forwardmsg_all(&nat_del_msg->base, mycpuid + 1); 850 } 851 int 852 ip_fw3_ctl_nat_del(struct sockopt *sopt) 853 { 854 struct netmsg_nat_del nat_del_msg, *msg; 855 856 msg = &nat_del_msg; 857 msg->id = *((int *)sopt->sopt_val); 858 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 859 0, nat_del_dispatch); 860 861 netisr_domsg(&msg->base, 0); 862 return 0; 863 } 864 int 865 ip_fw3_ctl_nat_flush(struct sockopt *sopt) 866 { 867 struct netmsg_nat_del nat_del_msg, *msg; 868 int i; 869 msg = &nat_del_msg; 870 for (i = 0; i < NAT_ID_MAX; i++) { 871 msg->id = i + 1; 872 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 873 0, nat_del_dispatch); 874 875 netisr_domsg(&msg->base, 0); 876 } 877 return 0; 878 } 879 880 int 881 ip_fw3_ctl_nat_sockopt(struct sockopt *sopt) 882 { 883 int error = 0; 884 switch (sopt->sopt_name) { 885 case IP_FW_NAT_ADD: 886 error = ip_fw3_ctl_nat_add(sopt); 887 break; 888 case IP_FW_NAT_DEL: 889 error = ip_fw3_ctl_nat_del(sopt); 890 break; 891 case IP_FW_NAT_FLUSH: 892 error = ip_fw3_ctl_nat_flush(sopt); 893 break; 894 case IP_FW_NAT_GET: 895 error = ip_fw3_ctl_nat_get_cfg(sopt); 896 break; 897 case IP_FW_NAT_GET_RECORD: 898 error = ip_fw3_ctl_nat_get_record(sopt); 899 break; 900 default: 901 kprintf("ipfw3 nat invalid socket option %d\n", 902 sopt->sopt_name); 903 } 904 return error; 905 } 906 907 void 908 nat_init_ctx_dispatch(netmsg_t msg) 909 { 910 struct ip_fw3_nat_context *tmp; 911 tmp = kmalloc(sizeof(struct ip_fw3_nat_context), 912 M_IPFW3_NAT, M_WAITOK | M_ZERO); 913 914 ip_fw3_nat_ctx[mycpuid] = tmp; 915 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 916 } 917 918 void 919 nat_fnit_ctx_dispatch(netmsg_t msg) 920 { 921 kfree(ip_fw3_nat_ctx[mycpuid], M_IPFW3_NAT); 922 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 923 } 924 925 static void 926 nat_cleanup_func_dispatch(netmsg_t nmsg) 927 { 928 struct nat_state *s, *tmp; 929 struct ip_fw3_nat_context *nat_ctx; 930 struct cfg_nat *nat; 931 struct cfg_alias *a1, *tmp2; 932 struct nat_state2 *s2; 933 int i, j; 934 935 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 936 for (j = 0; j < NAT_ID_MAX; j++) { 937 nat = nat_ctx->nats[j]; 938 if (nat == NULL) 939 continue; 940 /* check the nat_states, remove the expired state */ 941 /* the icmp states will only stored in cpu 0 */ 942 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_out, tmp) { 943 if (time_uptime - s->timestamp > sysctl_var_icmp_timeout) { 944 RB_REMOVE(state_tree, &nat->rb_icmp_out, s); 945 kfree(s, M_IPFW3_NAT); 946 } 947 } 948 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) { 949 for (i = 0; i < ALIAS_RANGE; i++) { 950 s2 = a1->icmp_in[i]; 951 if (s2 != NULL) { 952 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) { 953 a1->icmp_in[i] = NULL; 954 kfree(s2, M_IPFW3_NAT); 955 } 956 } 957 958 } 959 } 960 961 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_out, tmp) { 962 if (time_uptime - s->timestamp > sysctl_var_tcp_timeout) { 963 RB_REMOVE(state_tree, &nat->rb_tcp_out, s); 964 kfree(s, M_IPFW3_NAT); 965 } 966 } 967 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) { 968 for (i = 0; i < ALIAS_RANGE; i++) { 969 s2 = a1->tcp_in[i]; 970 if (s2 != NULL) { 971 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) { 972 a1->tcp_in[i] = NULL; 973 kfree(s2, M_IPFW3_NAT); 974 } 975 } 976 977 } 978 } 979 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_out, tmp) { 980 if (time_uptime - s->timestamp > sysctl_var_udp_timeout) { 981 RB_REMOVE(state_tree, &nat->rb_udp_out, s); 982 kfree(s, M_IPFW3_NAT); 983 } 984 } 985 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) { 986 for (i = 0; i < ALIAS_RANGE; i++) { 987 s2 = a1->udp_in[i]; 988 if (s2 != NULL) { 989 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) { 990 a1->udp_in[i] = NULL; 991 kfree(s2, M_IPFW3_NAT); 992 } 993 } 994 995 } 996 } 997 } 998 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); 999 } 1000 1001 static void 1002 ip_fw3_nat_cleanup_func(void *dummy __unused) 1003 { 1004 struct netmsg_base msg; 1005 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, 1006 nat_cleanup_func_dispatch); 1007 netisr_domsg(&msg, 0); 1008 1009 callout_reset(&ip_fw3_nat_cleanup_callout, 1010 sysctl_var_cleanup_interval * hz, 1011 ip_fw3_nat_cleanup_func, NULL); 1012 } 1013 1014 static 1015 int ip_fw3_nat_init(void) 1016 { 1017 struct netmsg_base msg; 1018 ip_fw3_register_module(MODULE_NAT_ID, MODULE_NAT_NAME); 1019 ip_fw3_register_filter_funcs(MODULE_NAT_ID, O_NAT_NAT, 1020 (filter_func)check_nat); 1021 ip_fw3_ctl_nat_ptr = ip_fw3_ctl_nat_sockopt; 1022 netmsg_init(&msg, NULL, &curthread->td_msgport, 1023 0, nat_init_ctx_dispatch); 1024 netisr_domsg(&msg, 0); 1025 1026 callout_init_mp(&ip_fw3_nat_cleanup_callout); 1027 callout_reset(&ip_fw3_nat_cleanup_callout, 1028 sysctl_var_cleanup_interval * hz, 1029 ip_fw3_nat_cleanup_func, 1030 NULL); 1031 return 0; 1032 } 1033 1034 static int 1035 ip_fw3_nat_fini(void) 1036 { 1037 struct netmsg_base msg; 1038 struct netmsg_nat_del nat_del_msg, *msg1; 1039 int i; 1040 1041 callout_stop(&ip_fw3_nat_cleanup_callout); 1042 1043 msg1 = &nat_del_msg; 1044 for (i = 0; i < NAT_ID_MAX; i++) { 1045 msg1->id = i + 1; 1046 netmsg_init(&msg1->base, NULL, &curthread->td_msgport, 1047 0, nat_del_dispatch); 1048 1049 netisr_domsg(&msg1->base, 0); 1050 } 1051 1052 netmsg_init(&msg, NULL, &curthread->td_msgport, 1053 0, nat_fnit_ctx_dispatch); 1054 netisr_domsg(&msg, 0); 1055 1056 return ip_fw3_unregister_module(MODULE_NAT_ID); 1057 } 1058 1059 static int 1060 ip_fw3_nat_modevent(module_t mod, int type, void *data) 1061 { 1062 switch (type) { 1063 case MOD_LOAD: 1064 return ip_fw3_nat_init(); 1065 case MOD_UNLOAD: 1066 return ip_fw3_nat_fini(); 1067 default: 1068 break; 1069 } 1070 return 0; 1071 } 1072 1073 moduledata_t ip_fw3_nat_mod = { 1074 "ipfw3_nat", 1075 ip_fw3_nat_modevent, 1076 NULL 1077 }; 1078 1079 DECLARE_MODULE(ipfw3_nat, ip_fw3_nat_mod, 1080 SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 1081 MODULE_DEPEND(ipfw3_nat, ipfw3_basic, 1, 1, 1); 1082 MODULE_VERSION(ipfw3_nat, 1); 1083