1 /* 2 * Copyright (c) 2014 - 2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Bill Yuan <bycn82@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/socketvar.h> 40 #include <sys/sysctl.h> 41 #include <sys/systimer.h> 42 #include <sys/thread2.h> 43 #include <sys/in_cksum.h> 44 #include <sys/systm.h> 45 #include <sys/proc.h> 46 #include <sys/socket.h> 47 #include <sys/syslog.h> 48 #include <sys/ucred.h> 49 #include <sys/lock.h> 50 #include <sys/mplock2.h> 51 52 #include <net/ethernet.h> 53 #include <net/netmsg2.h> 54 #include <net/netisr2.h> 55 #include <net/route.h> 56 #include <net/if.h> 57 58 #include <netinet/in.h> 59 #include <netinet/ip.h> 60 #include <netinet/ip_icmp.h> 61 #include <netinet/tcp.h> 62 #include <netinet/tcp_timer.h> 63 #include <netinet/tcp_var.h> 64 #include <netinet/tcpip.h> 65 #include <netinet/udp.h> 66 #include <netinet/udp_var.h> 67 #include <netinet/in_systm.h> 68 #include <netinet/in_var.h> 69 #include <netinet/in_pcb.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/ip_divert.h> 72 73 #include <net/ipfw3/ip_fw.h> 74 #include "ip_fw3_nat.h" 75 76 /* 77 * Lockless Kernel NAT 78 * 79 * The `src` will be replaced by `alias` when a packet is leaving the system. 80 * Hence, the packet is from `src` to `dst` before been translated. And after 81 * been translated, the packet is from `alias` to `dst`. 82 * 83 * The state for outgoing packet will be stored in the nat_context of current 84 * CPU. But due to the nature of the NAT, the returning packet may be handled 85 * by another CPU. Hence, a state for the returning packet will be prepared and 86 * store into the nat_context of the right CPU. 87 */ 88 89 struct ip_fw3_nat_context *ip_fw3_nat_ctx[MAXCPU]; 90 static struct callout ip_fw3_nat_cleanup_callout; 91 extern struct ipfw_context *ipfw_ctx[MAXCPU]; 92 extern ip_fw_ctl_t *ipfw_ctl_nat_ptr; 93 static int fw3_nat_cleanup_interval = 1; 94 95 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw3_nat, CTLFLAG_RW, 0, "ipfw3 NAT"); 96 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, cleanup_interval, CTLFLAG_RW, 97 &fw3_nat_cleanup_interval, 0, "default life time"); 98 99 RB_PROTOTYPE(state_tree, nat_state, entries, nat_state_cmp); 100 RB_GENERATE(state_tree, nat_state, entries, nat_state_cmp); 101 102 static __inline uint16_t 103 fix_cksum(uint16_t cksum, uint16_t old_info, uint16_t new_info, uint8_t is_udp) 104 { 105 uint32_t tmp; 106 107 if (is_udp && !cksum) 108 return (0x0000); 109 tmp = cksum + old_info - new_info; 110 tmp = (tmp >> 16) + (tmp & 65535); 111 tmp = tmp & 65535; 112 if (is_udp && !tmp) 113 return (0xFFFF); 114 return tmp; 115 } 116 117 void 118 check_nat(int *cmd_ctl, int *cmd_val, struct ip_fw_args **args, 119 struct ip_fw **f, ipfw_insn *cmd, uint16_t ip_len) 120 { 121 if ((*args)->eh != NULL) { 122 *cmd_ctl = IP_FW_CTL_NO; 123 *cmd_val = IP_FW_NOT_MATCH; 124 return; 125 } 126 127 struct ip_fw3_nat_context *nat_ctx; 128 struct cfg_nat *nat; 129 int nat_id; 130 131 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 132 (*args)->rule = *f; 133 nat = ((ipfw_insn_nat *)cmd)->nat; 134 if (nat == NULL) { 135 nat_id = cmd->arg1; 136 nat = nat_ctx->nats[nat_id - 1]; 137 if (nat == NULL) { 138 *cmd_val = IP_FW_DENY; 139 *cmd_ctl = IP_FW_CTL_DONE; 140 return; 141 } 142 ((ipfw_insn_nat *)cmd)->nat = nat; 143 } 144 *cmd_val = ip_fw3_nat(*args, nat, (*args)->m); 145 *cmd_ctl = IP_FW_CTL_NAT; 146 } 147 148 int 149 ip_fw3_nat(struct ip_fw_args *args, struct cfg_nat *nat, struct mbuf *m) 150 { 151 struct nat_state *s, *k; 152 struct ip *ip = mtod(m, struct ip *); 153 struct in_addr *addr; 154 struct in_addr *old_addr = NULL; 155 uint16_t *old_port = NULL; 156 uint16_t *port = NULL, *csum = NULL, dlen = 0; 157 uint8_t udp = 0; 158 boolean_t pseudo = FALSE; 159 struct state_tree *tree_in = NULL, *tree_out = NULL; 160 struct nat_state *s1 = NULL, *s2, *dup; 161 162 struct in_addr oaddr; 163 uint16_t oport; 164 165 old_addr = &oaddr; 166 old_port = &oport; 167 168 k = &nat->tmp; 169 if (args->oif == NULL) { 170 /* for outgoing packets */ 171 addr = &ip->ip_dst; 172 k->saddr = args->f_id.src_ip; 173 k->daddr = ntohl(args->f_id.dst_ip); 174 k->proto = args->f_id.proto; 175 switch (ip->ip_p) { 176 case IPPROTO_TCP: 177 k->sport = args->f_id.src_port; 178 k->dport = ntohs(args->f_id.dst_port); 179 tree_in = &nat->tree_tcp_in; 180 port = &L3HDR(struct tcphdr, ip)->th_dport; 181 csum = &L3HDR(struct tcphdr, ip)->th_sum; 182 break; 183 case IPPROTO_UDP: 184 k->sport = args->f_id.src_port; 185 k->dport = ntohs(args->f_id.dst_port); 186 tree_in = &nat->tree_udp_in; 187 port = &L3HDR(struct udphdr, ip)->uh_dport; 188 csum = &L3HDR(struct udphdr, ip)->uh_sum; 189 udp = 1; 190 break; 191 case IPPROTO_ICMP: 192 k->sport = L3HDR(struct icmp, ip)->icmp_id;; 193 k->dport = L3HDR(struct icmp, ip)->icmp_id;; 194 tree_in = &nat->tree_icmp_in; 195 port = &L3HDR(struct icmp, ip)->icmp_id; 196 csum = &L3HDR(struct icmp, ip)->icmp_cksum; 197 break; 198 default: 199 panic("ipfw3: unsupported proto %u", ip->ip_p); 200 } 201 s = RB_FIND(state_tree, tree_in, k); 202 if (s == NULL) { 203 goto oops; 204 } 205 } else { 206 /* for incoming packets */ 207 addr = &ip->ip_src; 208 k->saddr = args->f_id.src_ip; 209 k->daddr = args->f_id.dst_ip; 210 k->proto = args->f_id.proto; 211 switch (ip->ip_p) { 212 case IPPROTO_TCP: 213 k->sport = args->f_id.src_port; 214 k->dport = args->f_id.dst_port; 215 m->m_pkthdr.csum_flags = CSUM_TCP; 216 tree_in = &nat->tree_tcp_in; 217 tree_out = &nat->tree_tcp_out; 218 port = &L3HDR(struct tcphdr, ip)->th_sport; 219 csum = &L3HDR(struct tcphdr, ip)->th_sum; 220 break; 221 case IPPROTO_UDP: 222 k->sport = args->f_id.src_port; 223 k->dport = args->f_id.dst_port; 224 m->m_pkthdr.csum_flags = CSUM_UDP; 225 tree_in = &nat->tree_udp_in; 226 tree_out = &nat->tree_udp_out; 227 port = &L3HDR(struct udphdr, ip)->uh_sport; 228 csum = &L3HDR(struct udphdr, ip)->uh_sum; 229 udp = 1; 230 break; 231 case IPPROTO_ICMP: 232 k->sport = 0; 233 k->dport = 0; 234 tree_in = &nat->tree_icmp_in; 235 tree_out = &nat->tree_icmp_out; 236 port = &L3HDR(struct icmp, ip)->icmp_id; 237 csum = &L3HDR(struct icmp, ip)->icmp_cksum; 238 break; 239 default: 240 panic("ipfw3: unsupported proto %u", ip->ip_p); 241 } 242 s = RB_FIND(state_tree, tree_out, k); 243 if (s == NULL) { 244 switch (ip->ip_p) { 245 case IPPROTO_TCP: 246 m->m_pkthdr.csum_flags = CSUM_TCP; 247 s1 = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 248 M_INTWAIT | M_NULLOK | M_ZERO); 249 s1->saddr = args->f_id.src_ip; 250 s1->daddr = args->f_id.dst_ip; 251 s1->proto = args->f_id.proto; 252 253 s1->sport = args->f_id.src_port; 254 s1->dport = args->f_id.dst_port; 255 256 nat_state_get_alias(s1, nat, tree_out); 257 /* TODO */ 258 dup = RB_INSERT(state_tree, tree_out, s1); 259 s2 = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 260 M_INTWAIT | M_NULLOK | M_ZERO); 261 s2->saddr = args->f_id.dst_ip; 262 s2->daddr = nat->ip.s_addr; 263 s2->proto = args->f_id.proto; 264 265 s2->sport = s1->dport; 266 s2->dport = s1->alias_port; 267 s2->alias_addr = htonl(args->f_id.src_ip); 268 s2->alias_port = htons(args->f_id.src_port); 269 dup = RB_INSERT(state_tree, tree_in, s2); 270 break; 271 case IPPROTO_UDP: 272 m->m_pkthdr.csum_flags = CSUM_UDP; 273 s1 = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 274 M_INTWAIT | M_NULLOK | M_ZERO); 275 s1->saddr = args->f_id.src_ip; 276 s1->daddr = args->f_id.dst_ip; 277 s1->proto = args->f_id.proto; 278 279 s1->sport = args->f_id.src_port; 280 s1->dport = args->f_id.dst_port; 281 282 nat_state_get_alias(s1, nat, tree_out); 283 dup = RB_INSERT(state_tree, tree_out, s1); 284 s2 = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 285 M_INTWAIT | M_NULLOK | M_ZERO); 286 s2->saddr = args->f_id.dst_ip; 287 s2->daddr = nat->ip.s_addr; 288 s2->proto = args->f_id.proto; 289 290 s2->sport = s1->dport; 291 s2->dport = s1->alias_port; 292 293 s2->alias_addr = htonl(args->f_id.src_ip); 294 s2->alias_port = htons(args->f_id.src_port); 295 dup = RB_INSERT(state_tree, tree_in, s2); 296 break; 297 case IPPROTO_ICMP: 298 s1 = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 299 M_INTWAIT | M_NULLOK | M_ZERO); 300 s1->saddr = args->f_id.src_ip; 301 s1->daddr = args->f_id.dst_ip; 302 s1->proto = args->f_id.proto; 303 304 s1->sport = *port; 305 s1->dport = *port; 306 307 s1->alias_addr = nat->ip.s_addr; 308 s1->alias_port = htons(s1->saddr % 1024); 309 310 dup = RB_INSERT(state_tree, tree_out, s1); 311 312 s2 = kmalloc(LEN_NAT_STATE, M_IP_FW3_NAT, 313 M_INTWAIT | M_NULLOK | M_ZERO); 314 s2->saddr = args->f_id.dst_ip; 315 s2->daddr = nat->ip.s_addr; 316 s2->proto = args->f_id.proto; 317 318 s2->sport = s1->alias_port; 319 s2->dport = s1->alias_port; 320 321 s2->alias_addr = htonl(args->f_id.src_ip); 322 s2->alias_port = *port; 323 324 dup = RB_INSERT(state_tree, tree_in, s2); 325 break; 326 default : 327 goto oops; 328 } 329 s = s1; 330 } 331 } 332 *old_addr = *addr; 333 *old_port = *port; 334 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) { 335 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { 336 dlen = ip->ip_len - (ip->ip_hl << 2); 337 } 338 pseudo = TRUE; 339 } 340 if (!pseudo) { 341 const uint16_t *oaddr, *naddr; 342 oaddr = (const uint16_t *)&old_addr->s_addr; 343 naddr = (const uint16_t *)&s->alias_addr; 344 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[0], naddr[0], 0); 345 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[1], naddr[1], 0); 346 if (ip->ip_p != IPPROTO_ICMP) { 347 *csum = fix_cksum(*csum, oaddr[0], naddr[0], udp); 348 *csum = fix_cksum(*csum, oaddr[1], naddr[1], udp); 349 } 350 } 351 addr->s_addr = s->alias_addr; 352 if (!pseudo) { 353 *csum = fix_cksum(*csum, *port, s->alias_port, udp); 354 } 355 *port = s->alias_port; 356 357 if (pseudo) { 358 *csum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(dlen + ip->ip_p)); 359 } 360 return IP_FW_NAT; 361 oops: 362 return IP_FW_DENY; 363 } 364 365 int 366 nat_state_cmp(struct nat_state *s1, struct nat_state *s2) 367 { 368 if (s1->saddr > s2->saddr) 369 return 1; 370 if (s1->saddr < s2->saddr) 371 return -1; 372 373 if (s1->daddr > s2->daddr) 374 return 1; 375 if (s1->daddr < s2->daddr) 376 return -1; 377 378 if (s1->sport > s2->sport) 379 return 1; 380 if (s1->sport < s2->sport) 381 return -1; 382 383 if (s1->dport > s2->dport) 384 return 1; 385 if (s1->dport < s2->dport) 386 return -1; 387 388 return 0; 389 } 390 391 int 392 ip_fw3_ctl_nat_get_cfg(struct sockopt *sopt) 393 { 394 /* TODO */ 395 return 0; 396 } 397 398 int 399 ip_fw3_ctl_nat_get_record(struct sockopt *sopt) 400 { 401 /* TODO */ 402 return 0; 403 } 404 405 /* 406 * Init the RB trees only when the NAT is configured. 407 */ 408 void 409 nat_add_dispatch(netmsg_t nat_add_msg) 410 { 411 struct ip_fw3_nat_context *nat_ctx; 412 struct netmsg_nat_add *msg; 413 struct ioc_nat *ioc; 414 struct cfg_nat *nat; 415 416 msg = (struct netmsg_nat_add *)nat_add_msg; 417 ioc = &msg->ioc_nat; 418 nat_ctx = ip_fw3_nat_ctx[mycpuid]; 419 420 if (nat_ctx->nats[ioc->id - 1] == NULL) { 421 nat = kmalloc(LEN_CFG_NAT, M_IP_FW3_NAT, M_WAITOK | M_ZERO); 422 RB_INIT(&nat->tree_tcp_in); 423 RB_INIT(&nat->tree_tcp_out); 424 RB_INIT(&nat->tree_udp_in); 425 RB_INIT(&nat->tree_udp_out); 426 RB_INIT(&nat->tree_icmp_in); 427 RB_INIT(&nat->tree_icmp_out); 428 nat->id = ioc->id; 429 memcpy(&nat->ip, &ioc->ip, LEN_IN_ADDR); 430 nat_ctx->nats[ioc->id - 1] = nat; 431 } 432 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 433 } 434 435 int 436 ip_fw3_ctl_nat_add(struct sockopt *sopt) 437 { 438 struct netmsg_nat_add nat_add_msg, *msg; 439 struct ioc_nat *ioc; 440 441 msg = &nat_add_msg; 442 ioc = (struct ioc_nat *)(sopt->sopt_val); 443 sooptcopyin(sopt, &msg->ioc_nat, sopt->sopt_valsize, 444 sizeof(struct ioc_nat)); 445 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 0, 446 nat_add_dispatch); 447 netisr_domsg(&msg->base, 0); 448 return 0; 449 } 450 451 void 452 nat_del_dispatch(netmsg_t nat_del_msg) 453 { 454 /* TODO */ 455 } 456 457 int 458 ip_fw3_ctl_nat_del(struct sockopt *sopt) 459 { 460 struct netmsg_nat_del nat_del_msg; 461 struct netmsg_nat_del *msg; 462 463 /* TODO */ 464 msg = &nat_del_msg; 465 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 466 0, nat_del_dispatch); 467 468 netisr_domsg(&msg->base, 0); 469 return 0; 470 } 471 472 int 473 ip_fw3_ctl_nat_flush(struct sockopt *sopt) 474 { 475 /* TODO */ 476 return 0; 477 } 478 479 int 480 ip_fw3_ctl_nat_sockopt(struct sockopt *sopt) 481 { 482 int error = 0; 483 switch (sopt->sopt_name) { 484 case IP_FW_NAT_ADD: 485 error = ip_fw3_ctl_nat_add(sopt); 486 break; 487 case IP_FW_NAT_DEL: 488 error = ip_fw3_ctl_nat_del(sopt); 489 break; 490 case IP_FW_NAT_FLUSH: 491 error = ip_fw3_ctl_nat_flush(sopt); 492 break; 493 case IP_FW_NAT_GET: 494 error = ip_fw3_ctl_nat_get_cfg(sopt); 495 break; 496 case IP_FW_NAT_GET_RECORD: 497 error = ip_fw3_ctl_nat_get_record(sopt); 498 break; 499 default: 500 kprintf("ipfw3 nat invalid socket option %d\n", 501 sopt->sopt_name); 502 } 503 return error; 504 } 505 506 void 507 nat_init_ctx_dispatch(netmsg_t msg) 508 { 509 struct ip_fw3_nat_context *tmp; 510 tmp = kmalloc(sizeof(struct ip_fw3_nat_context), 511 M_IP_FW3_NAT, M_WAITOK | M_ZERO); 512 ip_fw3_nat_ctx[mycpuid] = tmp; 513 netisr_forwardmsg_all(&msg->base, mycpuid + 1); 514 } 515 516 static void 517 ipfw3_nat_cleanup_func_dispatch(netmsg_t nmsg) 518 { 519 /* TODO */ 520 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); 521 } 522 523 static void 524 ipfw3_nat_cleanup_func(void *dummy __unused) 525 { 526 struct netmsg_base msg; 527 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, 528 ipfw3_nat_cleanup_func_dispatch); 529 netisr_domsg(&msg, 0); 530 531 callout_reset(&ip_fw3_nat_cleanup_callout, 532 fw3_nat_cleanup_interval * hz, 533 ipfw3_nat_cleanup_func, 534 NULL); 535 } 536 537 static int 538 ip_fw3_nat_init(void) 539 { 540 struct netmsg_base msg; 541 register_ipfw_module(MODULE_NAT_ID, MODULE_NAT_NAME); 542 register_ipfw_filter_funcs(MODULE_NAT_ID, O_NAT_NAT, 543 (filter_func)check_nat); 544 ipfw_ctl_nat_ptr = ip_fw3_ctl_nat_sockopt; 545 netmsg_init(&msg, NULL, &curthread->td_msgport, 546 0, nat_init_ctx_dispatch); 547 netisr_domsg(&msg, 0); 548 549 callout_init_mp(&ip_fw3_nat_cleanup_callout); 550 callout_reset(&ip_fw3_nat_cleanup_callout, 551 fw3_nat_cleanup_interval * hz, 552 ipfw3_nat_cleanup_func, 553 NULL); 554 return 0; 555 } 556 557 static int 558 ip_fw3_nat_fini(void) 559 { 560 /* TODO */ 561 callout_stop(&ip_fw3_nat_cleanup_callout); 562 return unregister_ipfw_module(MODULE_NAT_ID); 563 } 564 565 static int 566 ip_fw3_nat_modevent(module_t mod, int type, void *data) 567 { 568 switch (type) { 569 case MOD_LOAD: 570 return ip_fw3_nat_init(); 571 case MOD_UNLOAD: 572 return ip_fw3_nat_fini(); 573 default: 574 break; 575 } 576 return 0; 577 } 578 579 moduledata_t ip_fw3_nat_mod = { 580 "ipfw3_nat", 581 ip_fw3_nat_modevent, 582 NULL 583 }; 584 585 DECLARE_MODULE(ipfw3_nat, ip_fw3_nat_mod, 586 SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 587 MODULE_DEPEND(ipfw3_nat, ipfw3_basic, 1, 1, 1); 588 MODULE_VERSION(ipfw3_nat, 1); 589