1 /* 2 * Copyright (c) 2014 - 2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Bill Yuan <bycn82@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/socketvar.h> 40 #include <sys/sysctl.h> 41 #include <sys/systimer.h> 42 #include <sys/thread2.h> 43 #include <sys/in_cksum.h> 44 #include <sys/systm.h> 45 #include <sys/proc.h> 46 #include <sys/socket.h> 47 #include <sys/syslog.h> 48 #include <sys/ucred.h> 49 #include <sys/lock.h> 50 #include <sys/mplock2.h> 51 52 #include <net/ethernet.h> 53 #include <net/netmsg2.h> 54 #include <net/netisr2.h> 55 #include <net/route.h> 56 #include <net/if.h> 57 58 #include <netinet/in.h> 59 #include <netinet/ip.h> 60 #include <netinet/ip_icmp.h> 61 #include <netinet/tcp.h> 62 #include <netinet/tcp_timer.h> 63 #include <netinet/tcp_var.h> 64 #include <netinet/tcpip.h> 65 #include <netinet/udp.h> 66 #include <netinet/udp_var.h> 67 #include <netinet/in_systm.h> 68 #include <netinet/in_var.h> 69 #include <netinet/in_pcb.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/ip_divert.h> 72 #include <net/ipfw3/ip_fw.h> 73 74 #include "ip_fw3_nat.h" 75 76 /* 77 * Lockless Kernel NAT 78 * 79 * The `src` will be replaced by `alias` when a packet is leaving the system. 80 * Hence, the packet is from `src` to `dst` before been translated. And after 81 * been translated, the packet is from `alias` to `dst`. 82 * 83 * The state for outgoing packet will be stored in the nat_context of current 84 * CPU. But due to the nature of the NAT, the returning packet may be handled 85 * by another CPU. Hence, a state for the returning packet will be prepared and 86 * store into the nat_context of the right CPU. 87 */ 88 89 struct ip_fw3_nat_context *ip_fw3_nat_ctx[MAXCPU]; 90 static struct callout ip_fw3_nat_cleanup_callout; 91 extern struct ipfw_context *ipfw_ctx[MAXCPU]; 92 extern ip_fw_ctl_t *ipfw_ctl_nat_ptr; 93 94 static int sysctl_var_cleanup_interval = 1; 95 static int sysctl_var_icmp_timeout = 10; 96 static int sysctl_var_tcp_timeout = 60; 97 static int sysctl_var_udp_timeout = 30; 98 99 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw3_nat, CTLFLAG_RW, 0, "ipfw3 NAT"); 100 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, cleanup_interval, CTLFLAG_RW, 101 &sysctl_var_cleanup_interval, 0, "default life time"); 102 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, icmp_timeout, CTLFLAG_RW, 103 &sysctl_var_icmp_timeout, 0, "default icmp state life time"); 104 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, tcp_timeout, CTLFLAG_RW, 105 &sysctl_var_tcp_timeout, 0, "default tcp state life time"); 106 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, udp_timeout, CTLFLAG_RW, 107 &sysctl_var_udp_timeout, 0, "default udp state life time"); 108 109 RB_PROTOTYPE(state_tree, nat_state, entries, nat_state_cmp); 110 RB_GENERATE(state_tree, nat_state, entries, nat_state_cmp); 111 112 static __inline uint16_t 113 fix_cksum(uint16_t cksum, uint16_t old_info, uint16_t new_info, uint8_t is_udp) 114 { 115 uint32_t tmp; 116 117 if (is_udp && !cksum) 118 return (0x0000); 119 tmp = cksum + old_info - new_info; 120 tmp = (tmp >> 16) + (tmp & 65535); 121 tmp = tmp & 65535; 122 if (is_udp && !tmp) 123 return (0xFFFF); 124 return tmp; 125 } 126 127 void 128 check_nat(int *cmd_ctl, int *cmd_val, struct ip_fw_args **args, 129 struct ip_fw **f, ipfw_insn *cmd, uint16_t ip_len) 130 { 131 if ((*args)->eh != NULL) { 132 *cmd_ctl = IP_FW_CTL_NO; 133 *cmd_val = IP_FW_NOT_MATCH; 134 return; 135 } 136 137 struct ip_fw3_nat_context *nat_ctx; 138 struct cfg_nat *nat; 139 int nat_id; 140 141 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 142 (*args)->rule = *f; 143 nat = ((ipfw_insn_nat *)cmd)->nat; 144 if (nat == NULL) { 145 nat_id = cmd->arg1; 146 nat = nat_ctx->nats[nat_id - 1]; 147 if (nat == NULL) { 148 *cmd_val = IP_FW_DENY; 149 *cmd_ctl = IP_FW_CTL_DONE; 150 return; 151 } 152 ((ipfw_insn_nat *)cmd)->nat = nat; 153 } 154 *cmd_val = ip_fw3_nat(*args, nat, (*args)->m); 155 *cmd_ctl = IP_FW_CTL_NAT; 156 } 157 158 int 159 ip_fw3_nat(struct ip_fw_args *args, struct cfg_nat *nat, struct mbuf *m) 160 { 161 struct state_tree *tree_in = NULL, *tree_out = NULL; 162 struct nat_state *s, *s2, *dup, *k, key; 163 struct ip *ip = mtod(m, struct ip *); 164 struct in_addr *old_addr = NULL, new_addr; 165 uint16_t *old_port = NULL, new_port; 166 uint16_t *csum = NULL, dlen = 0; 167 uint8_t udp = 0; 168 boolean_t pseudo = FALSE, need_return_state = FALSE; 169 struct cfg_alias *alias; 170 int i = 0, rand_n = 0; 171 172 k = &key; 173 memset(k, 0, LEN_NAT_STATE); 174 if (args->oif == NULL) { 175 old_addr = &ip->ip_dst; 176 k->src_addr = args->f_id.src_ip; 177 k->dst_addr = ntohl(args->f_id.dst_ip); 178 switch (ip->ip_p) { 179 case IPPROTO_TCP: 180 k->src_port = args->f_id.src_port; 181 k->dst_port = ntohs(args->f_id.dst_port); 182 tree_in = &nat->rb_tcp_in; 183 old_port = &L3HDR(struct tcphdr, ip)->th_dport; 184 csum = &L3HDR(struct tcphdr, ip)->th_sum; 185 break; 186 case IPPROTO_UDP: 187 k->src_port = args->f_id.src_port; 188 k->dst_port = ntohs(args->f_id.dst_port); 189 tree_in = &nat->rb_udp_in; 190 old_port = &L3HDR(struct udphdr, ip)->uh_dport; 191 csum = &L3HDR(struct udphdr, ip)->uh_sum; 192 udp = 1; 193 break; 194 case IPPROTO_ICMP: 195 k->src_port = L3HDR(struct icmp, ip)->icmp_id;; 196 k->dst_port = L3HDR(struct icmp, ip)->icmp_id;; 197 tree_in = &nat->rb_icmp_in; 198 old_port = &L3HDR(struct icmp, ip)->icmp_id; 199 csum = &L3HDR(struct icmp, ip)->icmp_cksum; 200 break; 201 default: 202 panic("ipfw3: unsupported proto %u", ip->ip_p); 203 } 204 s = RB_FIND(state_tree, tree_in, k); 205 if (s == NULL) { 206 goto oops; 207 } 208 } else { 209 old_addr = &ip->ip_src; 210 k->src_addr = args->f_id.src_ip; 211 k->dst_addr = args->f_id.dst_ip; 212 switch (ip->ip_p) { 213 case IPPROTO_TCP: 214 k->src_port = args->f_id.src_port; 215 k->dst_port = args->f_id.dst_port; 216 m->m_pkthdr.csum_flags = CSUM_TCP; 217 tree_out = &nat->rb_tcp_out; 218 old_port = &L3HDR(struct tcphdr, ip)->th_sport; 219 csum = &L3HDR(struct tcphdr, ip)->th_sum; 220 break; 221 case IPPROTO_UDP: 222 k->src_port = args->f_id.src_port; 223 k->dst_port = args->f_id.dst_port; 224 m->m_pkthdr.csum_flags = CSUM_UDP; 225 tree_out = &nat->rb_udp_out; 226 old_port = &L3HDR(struct udphdr, ip)->uh_sport; 227 csum = &L3HDR(struct udphdr, ip)->uh_sum; 228 udp = 1; 229 break; 230 case IPPROTO_ICMP: 231 k->src_port = L3HDR(struct icmp, ip)->icmp_id; 232 k->dst_port = k->src_port; 233 tree_out = &nat->rb_icmp_out; 234 old_port = &L3HDR(struct icmp, ip)->icmp_id; 235 csum = &L3HDR(struct icmp, ip)->icmp_cksum; 236 break; 237 default: 238 panic("ipfw3: unsupported proto %u", ip->ip_p); 239 } 240 s = RB_FIND(state_tree, tree_out, k); 241 if (s == NULL) { 242 /* pick an alias ip randomly when there are multiple */ 243 if (nat->count > 1) { 244 rand_n = krandom() % nat->count; 245 } 246 LIST_FOREACH(alias, &nat->alias, next) { 247 if (i++ == rand_n) { 248 break; 249 } 250 } 251 switch (ip->ip_p) { 252 case IPPROTO_TCP: 253 m->m_pkthdr.csum_flags = CSUM_TCP; 254 s = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 255 M_INTWAIT | M_NULLOK | M_ZERO); 256 257 s->src_addr = args->f_id.src_ip; 258 s->src_port = args->f_id.src_port; 259 260 s->dst_addr = args->f_id.dst_ip; 261 s->dst_port = args->f_id.dst_port; 262 263 s->alias_addr = alias->ip.s_addr; 264 pick_alias_port(s, tree_out); 265 dup = RB_INSERT(state_tree, tree_out, s); 266 need_return_state = TRUE; 267 break; 268 case IPPROTO_UDP: 269 m->m_pkthdr.csum_flags = CSUM_UDP; 270 s = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 271 M_INTWAIT | M_NULLOK | M_ZERO); 272 273 s->src_addr = args->f_id.src_ip; 274 s->src_port = args->f_id.src_port; 275 276 s->dst_addr = args->f_id.dst_ip; 277 s->dst_port = args->f_id.dst_port; 278 279 s->alias_addr = alias->ip.s_addr; 280 pick_alias_port(s, tree_out); 281 dup = RB_INSERT(state_tree, tree_out, s); 282 need_return_state = TRUE; 283 break; 284 case IPPROTO_ICMP: 285 s = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 286 M_INTWAIT | M_NULLOK | M_ZERO); 287 s->src_addr = args->f_id.src_ip; 288 s->dst_addr = args->f_id.dst_ip; 289 290 s->src_port = *old_port; 291 s->dst_port = *old_port; 292 293 s->alias_addr = alias->ip.s_addr; 294 s->alias_port = htons(s->src_addr % ALIAS_RANGE); 295 dup = RB_INSERT(state_tree, tree_out, s); 296 297 s2 = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 298 M_INTWAIT | M_NULLOK | M_ZERO); 299 300 s2->src_addr = args->f_id.dst_ip; 301 s2->dst_addr = alias->ip.s_addr; 302 303 s2->src_port = s->alias_port; 304 s2->dst_port = s->alias_port; 305 306 s2->alias_addr = htonl(args->f_id.src_ip); 307 s2->alias_port = *old_port; 308 tree_in = &nat->rb_icmp_in; 309 dup = RB_INSERT(state_tree, tree_in, s2); 310 break; 311 default : 312 goto oops; 313 } 314 } 315 } 316 new_addr.s_addr = s->alias_addr; 317 new_port = s->alias_port; 318 s->timestamp = time_uptime; 319 320 /* replace src/dst and fix the checksum */ 321 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) { 322 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 323 dlen = ip->ip_len - (ip->ip_hl << 2); 324 } 325 pseudo = TRUE; 326 } 327 if (!pseudo) { 328 const uint16_t *oaddr, *naddr; 329 oaddr = (const uint16_t *)&old_addr->s_addr; 330 naddr = (const uint16_t *)&new_addr.s_addr; 331 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[0], naddr[0], 0); 332 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[1], naddr[1], 0); 333 if (ip->ip_p != IPPROTO_ICMP) { 334 *csum = fix_cksum(*csum, oaddr[0], naddr[0], udp); 335 *csum = fix_cksum(*csum, oaddr[1], naddr[1], udp); 336 } 337 } 338 old_addr->s_addr = new_addr.s_addr; 339 if (!pseudo) { 340 *csum = fix_cksum(*csum, *old_port, new_port, udp); 341 } 342 *old_port = new_port; 343 344 if (pseudo) { 345 *csum = in_pseudo(ip->ip_src.s_addr, 346 ip->ip_dst.s_addr, htons(dlen + ip->ip_p)); 347 } 348 349 /* prepare the state for return traffic */ 350 if (need_return_state) { 351 ip->ip_len = htons(ip->ip_len); 352 ip->ip_off = htons(ip->ip_off); 353 354 m->m_flags &= ~M_HASH; 355 ip_hashfn(&m, 0); 356 357 ip->ip_len = ntohs(ip->ip_len); 358 ip->ip_off = ntohs(ip->ip_off); 359 360 int nextcpu = netisr_hashcpu(m->m_pkthdr.hash); 361 if (nextcpu != mycpuid) { 362 struct netmsg_nat_state_add *msg; 363 msg = kmalloc(LEN_NMSG_NAT_STATE_ADD, 364 M_LWKTMSG, M_NOWAIT | M_ZERO); 365 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 366 0, nat_state_add_dispatch); 367 s2 = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 368 M_INTWAIT | M_NULLOK | M_ZERO); 369 370 s2->src_addr = args->f_id.dst_ip; 371 s2->src_port = s->dst_port; 372 373 s2->dst_addr = alias->ip.s_addr; 374 s2->dst_port = s->alias_port; 375 376 s2->alias_addr = htonl(args->f_id.src_ip); 377 s2->alias_port = htons(args->f_id.src_port); 378 379 s2->timestamp = s->timestamp; 380 381 msg->state = s2; 382 msg->nat_id = nat->id; 383 msg->proto = ip->ip_p; 384 netisr_sendmsg(&msg->base, nextcpu); 385 } else { 386 s2 = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 387 M_INTWAIT | M_NULLOK | M_ZERO); 388 389 s2->src_addr = args->f_id.dst_ip; 390 s2->src_port = s->dst_port; 391 392 s2->dst_addr = alias->ip.s_addr; 393 s2->dst_port = s->alias_port; 394 395 s2->alias_addr = htonl(args->f_id.src_ip); 396 s2->alias_port = htons(args->f_id.src_port); 397 398 s2->timestamp = s->timestamp; 399 if (ip->ip_p == IPPROTO_TCP) { 400 tree_in = &nat->rb_tcp_in; 401 } else { 402 tree_in = &nat->rb_udp_in; 403 } 404 dup = RB_INSERT(state_tree, tree_in, s2); 405 } 406 } 407 return IP_FW_NAT; 408 oops: 409 return IP_FW_DENY; 410 } 411 412 void 413 pick_alias_port(struct nat_state *s, struct state_tree *tree) 414 { 415 do { 416 s->alias_port = htons(krandom() % ALIAS_RANGE + ALIAS_BEGIN); 417 } while (RB_FIND(state_tree, tree, s) != NULL); 418 } 419 420 int 421 nat_state_cmp(struct nat_state *s1, struct nat_state *s2) 422 { 423 if (s1->src_addr > s2->src_addr) 424 return 1; 425 if (s1->src_addr < s2->src_addr) 426 return -1; 427 428 if (s1->dst_addr > s2->dst_addr) 429 return 1; 430 if (s1->dst_addr < s2->dst_addr) 431 return -1; 432 433 if (s1->src_port > s2->src_port) 434 return 1; 435 if (s1->src_port < s2->src_port) 436 return -1; 437 438 if (s1->dst_port > s2->dst_port) 439 return 1; 440 if (s1->dst_port < s2->dst_port) 441 return -1; 442 443 return 0; 444 } 445 446 int 447 ip_fw3_ctl_nat_get_cfg(struct sockopt *sopt) 448 { 449 struct ip_fw3_nat_context *nat_ctx; 450 struct ioc_nat *ioc; 451 struct cfg_nat *nat; 452 struct cfg_alias *alias; 453 struct in_addr *ip; 454 size_t valsize; 455 int i, len; 456 457 len = 0; 458 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 459 valsize = sopt->sopt_valsize; 460 ioc = (struct ioc_nat *)sopt->sopt_val; 461 462 for (i = 0; i < NAT_ID_MAX; i++) { 463 nat = nat_ctx->nats[i]; 464 if (nat != NULL) { 465 len += LEN_IOC_NAT; 466 if (len >= valsize) { 467 goto nospace; 468 } 469 ioc->id = nat->id; 470 ioc->count = nat->count; 471 ip = &ioc->ip; 472 LIST_FOREACH(alias, &nat->alias, next) { 473 len += LEN_IN_ADDR; 474 if (len > valsize) { 475 goto nospace; 476 } 477 bcopy(&alias->ip, ip, LEN_IN_ADDR); 478 ip++; 479 } 480 } 481 } 482 sopt->sopt_valsize = len; 483 return 0; 484 nospace: 485 bzero(sopt->sopt_val, sopt->sopt_valsize); 486 sopt->sopt_valsize = 0; 487 return 0; 488 } 489 490 int 491 ip_fw3_ctl_nat_get_record(struct sockopt *sopt) 492 { 493 struct ip_fw3_nat_context *nat_ctx; 494 struct cfg_nat *the; 495 size_t sopt_size, total_len = 0; 496 struct ioc_nat_state *ioc; 497 int ioc_nat_id, n, cpu; 498 struct nat_state *s; 499 500 ioc_nat_id = *((int *)(sopt->sopt_val)); 501 sopt_size = sopt->sopt_valsize; 502 ioc = (struct ioc_nat_state *)sopt->sopt_val; 503 /* icmp states only in CPU 0 */ 504 cpu = 0; 505 nat_ctx = ip_fw3_nat_ctx[cpu]; 506 for (n = 0; n < NAT_ID_MAX; n++) { 507 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) { 508 if (nat_ctx->nats[n] == NULL) 509 break; 510 the = nat_ctx->nats[n]; 511 RB_FOREACH(s, state_tree, &the->rb_icmp_out) { 512 total_len += LEN_IOC_NAT_STATE; 513 if (total_len > sopt_size) 514 goto nospace; 515 ioc->src_addr.s_addr = ntohl(s->src_addr); 516 ioc->dst_addr.s_addr = s->dst_addr; 517 ioc->alias_addr.s_addr = s->alias_addr; 518 ioc->src_port = s->src_port; 519 ioc->dst_port = s->dst_port; 520 ioc->alias_port = s->alias_port; 521 ioc->nat_id = n + 1; 522 ioc->cpu_id = cpu; 523 ioc->proto = IPPROTO_ICMP; 524 ioc->direction = 1; 525 ioc->life = s->timestamp + 526 sysctl_var_icmp_timeout - time_uptime; 527 ioc++; 528 } 529 RB_FOREACH(s, state_tree, &the->rb_icmp_in) { 530 total_len += LEN_IOC_NAT_STATE; 531 if (total_len > sopt_size) 532 goto nospace; 533 ioc->src_addr.s_addr = ntohl(s->src_addr); 534 ioc->dst_addr.s_addr = s->dst_addr; 535 ioc->alias_addr.s_addr = s->alias_addr; 536 ioc->src_port = s->src_port; 537 ioc->dst_port = s->dst_port; 538 ioc->alias_port = s->alias_port; 539 ioc->nat_id = n + 1; 540 ioc->cpu_id = cpu; 541 ioc->proto = IPPROTO_ICMP; 542 ioc->direction = 0; 543 ioc->life = s->timestamp + 544 sysctl_var_icmp_timeout - time_uptime; 545 ioc++; 546 } 547 } 548 } 549 550 /* tcp states */ 551 for (cpu = 0; cpu < ncpus; cpu++) { 552 nat_ctx = ip_fw3_nat_ctx[cpu]; 553 for (n = 0; n < NAT_ID_MAX; n++) { 554 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) { 555 if (nat_ctx->nats[n] == NULL) 556 break; 557 the = nat_ctx->nats[n]; 558 RB_FOREACH(s, state_tree, &the->rb_tcp_out) { 559 total_len += LEN_IOC_NAT_STATE; 560 if (total_len > sopt_size) 561 goto nospace; 562 ioc->src_addr.s_addr = ntohl(s->src_addr); 563 ioc->dst_addr.s_addr = ntohl(s->dst_addr); 564 ioc->alias_addr.s_addr = s->alias_addr; 565 ioc->src_port = ntohs(s->src_port); 566 ioc->dst_port = ntohs(s->dst_port); 567 ioc->alias_port = s->alias_port; 568 ioc->nat_id = n + 1; 569 ioc->cpu_id = cpu; 570 ioc->proto = IPPROTO_TCP; 571 ioc->direction = 1; 572 ioc->life = s->timestamp + 573 sysctl_var_tcp_timeout - time_uptime; 574 ioc++; 575 } 576 RB_FOREACH(s, state_tree, &the->rb_tcp_in) { 577 total_len += LEN_IOC_NAT_STATE; 578 if (total_len > sopt_size) 579 goto nospace; 580 ioc->src_addr.s_addr = ntohl(s->src_addr); 581 ioc->dst_addr.s_addr = s->dst_addr; 582 ioc->alias_addr.s_addr = s->alias_addr; 583 ioc->src_port = ntohs(s->src_port); 584 ioc->dst_port = s->dst_port; 585 ioc->alias_port = s->alias_port; 586 ioc->nat_id = n + 1; 587 ioc->cpu_id = cpu; 588 ioc->proto = IPPROTO_TCP; 589 ioc->direction = 0; 590 ioc->life = s->timestamp + 591 sysctl_var_tcp_timeout - time_uptime; 592 ioc++; 593 } 594 } 595 } 596 } 597 598 /* udp states */ 599 for (cpu = 0; cpu < ncpus; cpu++) { 600 nat_ctx = ip_fw3_nat_ctx[cpu]; 601 for (n = 0; n < NAT_ID_MAX; n++) { 602 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) { 603 if (nat_ctx->nats[n] == NULL) 604 break; 605 the = nat_ctx->nats[n]; 606 RB_FOREACH(s, state_tree, &the->rb_udp_out) { 607 total_len += LEN_IOC_NAT_STATE; 608 if (total_len > sopt_size) 609 goto nospace; 610 ioc->src_addr.s_addr = ntohl(s->src_addr); 611 ioc->dst_addr.s_addr = s->dst_addr; 612 ioc->alias_addr.s_addr = s->alias_addr; 613 ioc->src_port = s->src_port; 614 ioc->dst_port = s->dst_port; 615 ioc->alias_port = s->alias_port; 616 ioc->nat_id = n + 1; 617 ioc->cpu_id = cpu; 618 ioc->proto = IPPROTO_UDP; 619 ioc->direction = 1; 620 ioc->life = s->timestamp + 621 sysctl_var_udp_timeout - time_uptime; 622 ioc++; 623 } 624 RB_FOREACH(s, state_tree, &the->rb_udp_in) { 625 total_len += LEN_IOC_NAT_STATE; 626 if (total_len > sopt_size) 627 goto nospace; 628 ioc->src_addr.s_addr = ntohl(s->src_addr); 629 ioc->dst_addr.s_addr = s->dst_addr; 630 ioc->alias_addr.s_addr = s->alias_addr; 631 ioc->src_port = s->src_port; 632 ioc->dst_port = s->dst_port; 633 ioc->alias_port = s->alias_port; 634 ioc->nat_id = n + 1; 635 ioc->cpu_id = cpu; 636 ioc->proto = IPPROTO_UDP; 637 ioc->direction = 0; 638 ioc->life = s->timestamp + 639 sysctl_var_udp_timeout - time_uptime; 640 ioc++; 641 } 642 } 643 } 644 } 645 sopt->sopt_valsize = total_len; 646 return 0; 647 nospace: 648 return 0; 649 } 650 651 void 652 nat_state_add_dispatch(netmsg_t add_msg) 653 { 654 struct ip_fw3_nat_context *nat_ctx; 655 struct netmsg_nat_state_add *msg; 656 struct cfg_nat *nat; 657 struct state_tree *tree_in = NULL; 658 struct nat_state *s2; 659 660 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 661 msg = (struct netmsg_nat_state_add *)add_msg; 662 nat = nat_ctx->nats[msg->nat_id - 1]; 663 if (msg->proto == IPPROTO_TCP) { 664 tree_in = &nat->rb_tcp_in; 665 } else { 666 tree_in = &nat->rb_udp_in; 667 } 668 s2 = msg->state; 669 RB_INSERT(state_tree, tree_in, msg->state); 670 } 671 672 /* 673 * Init the RB trees only when the NAT is configured. 674 */ 675 void 676 nat_add_dispatch(netmsg_t nat_add_msg) 677 { 678 struct ip_fw3_nat_context *nat_ctx; 679 struct netmsg_nat_add *msg; 680 struct ioc_nat *ioc; 681 struct cfg_nat *nat; 682 struct cfg_alias *alias; 683 struct in_addr *ip; 684 int n; 685 686 msg = (struct netmsg_nat_add *)nat_add_msg; 687 ioc = &msg->ioc_nat; 688 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 689 690 if (nat_ctx->nats[ioc->id - 1] == NULL) { 691 /* op = set, and nat not exists */ 692 nat = kmalloc(LEN_CFG_NAT, M_IP_FW3_NAT, M_WAITOK | M_ZERO); 693 LIST_INIT(&nat->alias); 694 RB_INIT(&nat->rb_tcp_in); 695 RB_INIT(&nat->rb_tcp_out); 696 RB_INIT(&nat->rb_udp_in); 697 RB_INIT(&nat->rb_udp_out); 698 if (mycpuid == 0) { 699 RB_INIT(&nat->rb_icmp_in); 700 RB_INIT(&nat->rb_icmp_out); 701 } 702 nat->id = ioc->id; 703 nat->count = ioc->count; 704 ip = &ioc->ip; 705 for (n = 0; n < ioc->count; n++) { 706 alias = kmalloc(LEN_CFG_ALIAS, 707 M_IP_FW3_NAT, M_WAITOK | M_ZERO); 708 memcpy(&alias->ip, ip, LEN_IN_ADDR); 709 LIST_INSERT_HEAD((&nat->alias), alias, next); 710 ip++; 711 } 712 nat_ctx->nats[ioc->id - 1] = nat; 713 } 714 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 715 } 716 717 int 718 ip_fw3_ctl_nat_add(struct sockopt *sopt) 719 { 720 struct netmsg_nat_add nat_add_msg, *msg; 721 struct ioc_nat *ioc; 722 msg = &nat_add_msg; 723 724 ioc = (struct ioc_nat *)(sopt->sopt_val); 725 sooptcopyin(sopt, &msg->ioc_nat, sopt->sopt_valsize, 726 sizeof(struct ioc_nat)); 727 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 0, 728 nat_add_dispatch); 729 netisr_domsg(&msg->base, 0); 730 return 0; 731 } 732 733 void 734 nat_del_dispatch(netmsg_t nat_del_msg) 735 { 736 struct ip_fw3_nat_context *nat_ctx; 737 struct netmsg_nat_del *msg; 738 struct cfg_nat *nat; 739 struct nat_state *s, *tmp; 740 struct cfg_alias *alias, *tmp2; 741 742 msg = (struct netmsg_nat_del *)nat_del_msg; 743 744 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 745 nat = nat_ctx->nats[msg->id - 1]; 746 if (nat != NULL) { 747 /* the icmp states will only stored in cpu 0 */ 748 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_in, tmp) { 749 RB_REMOVE(state_tree, &nat->rb_icmp_in, s); 750 if (s != NULL) { 751 kfree(s, M_IP_FW3_NAT); 752 } 753 } 754 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_out, tmp) { 755 RB_REMOVE(state_tree, &nat->rb_icmp_out, s); 756 if (s != NULL) { 757 kfree(s, M_IP_FW3_NAT); 758 } 759 } 760 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_in, tmp) { 761 RB_REMOVE(state_tree, &nat->rb_tcp_in, s); 762 if (s != NULL) { 763 kfree(s, M_IP_FW3_NAT); 764 } 765 } 766 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_out, tmp) { 767 RB_REMOVE(state_tree, &nat->rb_tcp_out, s); 768 if (s != NULL) { 769 kfree(s, M_IP_FW3_NAT); 770 } 771 } 772 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_in, tmp) { 773 RB_REMOVE(state_tree, &nat->rb_udp_in, s); 774 if (s != NULL) { 775 kfree(s, M_IP_FW3_NAT); 776 } 777 } 778 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_out, tmp) { 779 RB_REMOVE(state_tree, &nat->rb_udp_out, s); 780 if (s != NULL) { 781 kfree(s, M_IP_FW3_NAT); 782 } 783 } 784 LIST_FOREACH_MUTABLE(alias, &nat->alias, next, tmp2) { 785 kfree(alias, M_IP_FW3_NAT); 786 } 787 kfree(nat, M_IP_FW3_NAT); 788 nat_ctx->nats[msg->id - 1] = NULL; 789 } 790 netisr_forwardmsg_all(&nat_del_msg->base, mycpuid + 1); 791 } 792 int 793 ip_fw3_ctl_nat_del(struct sockopt *sopt) 794 { 795 struct netmsg_nat_del nat_del_msg, *msg; 796 797 msg = &nat_del_msg; 798 msg->id = *((int *)sopt->sopt_val); 799 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 800 0, nat_del_dispatch); 801 802 netisr_domsg(&msg->base, 0); 803 return 0; 804 } 805 int 806 ip_fw3_ctl_nat_flush(struct sockopt *sopt) 807 { 808 struct netmsg_nat_del nat_del_msg, *msg; 809 int i; 810 msg = &nat_del_msg; 811 for (i = 0; i < NAT_ID_MAX; i++) { 812 msg->id = i + 1; 813 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 814 0, nat_del_dispatch); 815 816 netisr_domsg(&msg->base, 0); 817 } 818 return 0; 819 } 820 int 821 ip_fw3_ctl_nat_sockopt(struct sockopt *sopt) 822 { 823 int error = 0; 824 switch (sopt->sopt_name) { 825 case IP_FW_NAT_ADD: 826 error = ip_fw3_ctl_nat_add(sopt); 827 break; 828 case IP_FW_NAT_DEL: 829 error = ip_fw3_ctl_nat_del(sopt); 830 break; 831 case IP_FW_NAT_FLUSH: 832 error = ip_fw3_ctl_nat_flush(sopt); 833 break; 834 case IP_FW_NAT_GET: 835 error = ip_fw3_ctl_nat_get_cfg(sopt); 836 break; 837 case IP_FW_NAT_GET_RECORD: 838 error = ip_fw3_ctl_nat_get_record(sopt); 839 break; 840 default: 841 kprintf("ipfw3 nat invalid socket option %d\n", 842 sopt->sopt_name); 843 } 844 return error; 845 } 846 847 void 848 nat_init_ctx_dispatch(netmsg_t msg) 849 { 850 struct ip_fw3_nat_context *tmp; 851 tmp = kmalloc(sizeof(struct ip_fw3_nat_context), 852 M_IP_FW3_NAT, M_WAITOK | M_ZERO); 853 854 ip_fw3_nat_ctx[mycpuid] = tmp; 855 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 856 } 857 858 void 859 nat_fnit_ctx_dispatch(netmsg_t msg) 860 { 861 kfree(ip_fw3_nat_ctx[mycpuid], M_IP_FW3_NAT); 862 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 863 } 864 865 static void 866 ip_fw3_nat_cleanup_func_dispatch(netmsg_t nmsg) 867 { 868 struct nat_state *s, *tmp; 869 struct ip_fw3_nat_context *nat_ctx; 870 struct cfg_nat *nat; 871 int i; 872 873 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 874 for (i = 0; i < NAT_ID_MAX; i++) { 875 nat = nat_ctx->nats[i]; 876 if (nat == NULL) 877 continue; 878 /* check the nat_states, remove the expired state */ 879 /* the icmp states will only stored in cpu 0 */ 880 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_in, tmp) { 881 if (time_uptime - s->timestamp > sysctl_var_icmp_timeout) { 882 RB_REMOVE(state_tree, &nat->rb_icmp_in, s); 883 kfree(s, M_IP_FW3_NAT); 884 } 885 } 886 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_out, tmp) { 887 if (time_uptime - s->timestamp > sysctl_var_icmp_timeout) { 888 RB_REMOVE(state_tree, &nat->rb_icmp_out, s); 889 kfree(s, M_IP_FW3_NAT); 890 } 891 } 892 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_in, tmp) { 893 if (time_uptime - s->timestamp > sysctl_var_tcp_timeout) { 894 RB_REMOVE(state_tree, &nat->rb_tcp_in, s); 895 kfree(s, M_IP_FW3_NAT); 896 } 897 } 898 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_out, tmp) { 899 if (time_uptime - s->timestamp > sysctl_var_tcp_timeout) { 900 RB_REMOVE(state_tree, &nat->rb_tcp_out, s); 901 kfree(s, M_IP_FW3_NAT); 902 } 903 } 904 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_in, tmp) { 905 if (time_uptime - s->timestamp > sysctl_var_udp_timeout) { 906 RB_REMOVE(state_tree, &nat->rb_udp_in, s); 907 kfree(s, M_IP_FW3_NAT); 908 } 909 } 910 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_out, tmp) { 911 if (time_uptime - s->timestamp > sysctl_var_udp_timeout) { 912 RB_REMOVE(state_tree, &nat->rb_udp_out, s); 913 kfree(s, M_IP_FW3_NAT); 914 } 915 } 916 } 917 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); 918 } 919 920 static void 921 ip_fw3_nat_cleanup_func(void *dummy __unused) 922 { 923 struct netmsg_base msg; 924 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, 925 ip_fw3_nat_cleanup_func_dispatch); 926 netisr_domsg(&msg, 0); 927 928 callout_reset(&ip_fw3_nat_cleanup_callout, 929 sysctl_var_cleanup_interval * hz, 930 ip_fw3_nat_cleanup_func, NULL); 931 } 932 933 static 934 int ip_fw3_nat_init(void) 935 { 936 struct netmsg_base msg; 937 register_ipfw_module(MODULE_NAT_ID, MODULE_NAT_NAME); 938 register_ipfw_filter_funcs(MODULE_NAT_ID, O_NAT_NAT, 939 (filter_func)check_nat); 940 ipfw_ctl_nat_ptr = ip_fw3_ctl_nat_sockopt; 941 netmsg_init(&msg, NULL, &curthread->td_msgport, 942 0, nat_init_ctx_dispatch); 943 netisr_domsg(&msg, 0); 944 945 callout_init_mp(&ip_fw3_nat_cleanup_callout); 946 callout_reset(&ip_fw3_nat_cleanup_callout, 947 sysctl_var_cleanup_interval * hz, 948 ip_fw3_nat_cleanup_func, 949 NULL); 950 return 0; 951 } 952 953 static int 954 ip_fw3_nat_fini(void) 955 { 956 struct netmsg_base msg; 957 struct netmsg_nat_del nat_del_msg, *msg1; 958 int i; 959 960 callout_stop(&ip_fw3_nat_cleanup_callout); 961 962 msg1 = &nat_del_msg; 963 for (i = 0; i < NAT_ID_MAX; i++) { 964 msg1->id = i + 1; 965 netmsg_init(&msg1->base, NULL, &curthread->td_msgport, 966 0, nat_del_dispatch); 967 968 netisr_domsg(&msg1->base, 0); 969 } 970 971 netmsg_init(&msg, NULL, &curthread->td_msgport, 972 0, nat_fnit_ctx_dispatch); 973 netisr_domsg(&msg, 0); 974 975 return unregister_ipfw_module(MODULE_NAT_ID); 976 } 977 978 static int 979 ip_fw3_nat_modevent(module_t mod, int type, void *data) 980 { 981 switch (type) { 982 case MOD_LOAD: 983 return ip_fw3_nat_init(); 984 case MOD_UNLOAD: 985 return ip_fw3_nat_fini(); 986 default: 987 break; 988 } 989 return 0; 990 } 991 992 moduledata_t ip_fw3_nat_mod = { 993 "ipfw3_nat", 994 ip_fw3_nat_modevent, 995 NULL 996 }; 997 998 DECLARE_MODULE(ipfw3_nat, ip_fw3_nat_mod, 999 SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 1000 MODULE_DEPEND(ipfw3_nat, ipfw3_basic, 1, 1, 1); 1001 MODULE_VERSION(ipfw3_nat, 1); 1002