1 /*
2 * Copyright (c) 2014 - 2018 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Bill Yuan <bycn82@dragonflybsd.org>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include "opt_ipfw.h"
36 #include "opt_inet.h"
37 #ifndef INET
38 #error IPFIREWALL3 requires INET.
39 #endif /* INET */
40
41 #include <sys/param.h>
42 #include <sys/kernel.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <sys/systimer.h>
48 #include <sys/in_cksum.h>
49 #include <sys/systm.h>
50 #include <sys/proc.h>
51 #include <sys/socket.h>
52 #include <sys/syslog.h>
53 #include <sys/ucred.h>
54 #include <sys/lock.h>
55
56 #include <net/ethernet.h>
57 #include <net/netmsg2.h>
58 #include <net/netisr2.h>
59 #include <net/route.h>
60 #include <net/if.h>
61
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_icmp.h>
65 #include <netinet/tcp.h>
66 #include <netinet/tcp_timer.h>
67 #include <netinet/tcp_var.h>
68 #include <netinet/tcpip.h>
69 #include <netinet/udp.h>
70 #include <netinet/udp_var.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/in_var.h>
73 #include <netinet/in_pcb.h>
74 #include <netinet/ip_var.h>
75 #include <netinet/ip_divert.h>
76 #include <net/ipfw3/ip_fw.h>
77
78 #include "ip_fw3_nat.h"
79
80 MALLOC_DEFINE(M_IPFW3_NAT, "IP_FW3_NAT", "ipfw3_nat module");
81
82 /*
83 * Highspeed Lockless Kernel NAT
84 *
85 * Kernel NAT
86 * The network address translation (NAT) will replace the `src` of the packet
87 * with an `alias` (alias_addr & alias_port). Accordingt to the configuration,
88 * The alias will be randomly picked from the configured range.
89 *
90 * Highspeed
91 * The first outgoing packet should trigger the creation of the `net_state`,
92 * and the `net_state` will keep in a RB-Tree for the subsequent outgoing
93 * packets.
94 * The first returning packet will trigger the creation of the `net_state2`,
95 * which will be stored in a multidimensional array of points ( of net_state2 ).
96 *
97 * Lockless
98 * The `net_state` for outgoing packet will be stored in the nat_context of
99 * current CPU. But due to the nature of the NAT, the returning packet may be
100 * handled by another CPU. Hence, The `net_state2` for the returning packet
101 * will be prepared and stored into the nat_context of the right CPU.
102 */
103
104 struct ip_fw3_nat_context *ip_fw3_nat_ctx[MAXCPU];
105 static struct callout ip_fw3_nat_cleanup_callout;
106 extern struct ipfw3_context *fw3_ctx[MAXCPU];
107 extern ip_fw_ctl_t *ip_fw3_ctl_nat_ptr;
108
109 static int sysctl_var_cleanup_interval = 1;
110 static int sysctl_var_icmp_timeout = 10;
111 static int sysctl_var_tcp_timeout = 60;
112 static int sysctl_var_udp_timeout = 30;
113
114 SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw3_nat, CTLFLAG_RW, 0, "ipfw3 NAT");
115 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, cleanup_interval, CTLFLAG_RW,
116 &sysctl_var_cleanup_interval, 0, "default life time");
117 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, icmp_timeout, CTLFLAG_RW,
118 &sysctl_var_icmp_timeout, 0, "default icmp state life time");
119 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, tcp_timeout, CTLFLAG_RW,
120 &sysctl_var_tcp_timeout, 0, "default tcp state life time");
121 SYSCTL_INT(_net_inet_ip_fw3_nat, OID_AUTO, udp_timeout, CTLFLAG_RW,
122 &sysctl_var_udp_timeout, 0, "default udp state life time");
123
124 RB_PROTOTYPE(state_tree, nat_state, entries, ip_fw3_nat_state_cmp);
125 RB_GENERATE(state_tree, nat_state, entries, ip_fw3_nat_state_cmp);
126
127 static __inline uint16_t
fix_cksum(uint16_t cksum,uint16_t old_info,uint16_t new_info,uint8_t is_udp)128 fix_cksum(uint16_t cksum, uint16_t old_info, uint16_t new_info, uint8_t is_udp)
129 {
130 uint32_t tmp;
131
132 if (is_udp && !cksum)
133 return (0x0000);
134 tmp = cksum + old_info - new_info;
135 tmp = (tmp >> 16) + (tmp & 65535);
136 tmp = tmp & 65535;
137 if (is_udp && !tmp)
138 return (0xFFFF);
139 return tmp;
140 }
141
142 void
check_nat(int * cmd_ctl,int * cmd_val,struct ip_fw_args ** args,struct ip_fw ** f,ipfw_insn * cmd,uint16_t ip_len)143 check_nat(int *cmd_ctl, int *cmd_val, struct ip_fw_args **args,
144 struct ip_fw **f, ipfw_insn *cmd, uint16_t ip_len)
145 {
146 if ((*args)->eh != NULL) {
147 *cmd_ctl = IP_FW_CTL_NO;
148 *cmd_val = IP_FW_NOT_MATCH;
149 return;
150 }
151
152 struct ip_fw3_nat_context *nat_ctx;
153 struct cfg_nat *nat;
154 int nat_id;
155
156 nat_ctx = ip_fw3_nat_ctx[mycpuid];
157 (*args)->rule = *f;
158 nat = ((ipfw_insn_nat *)cmd)->nat;
159 if (nat == NULL) {
160 nat_id = cmd->arg1;
161 nat = nat_ctx->nats[nat_id - 1];
162 if (nat == NULL) {
163 *cmd_val = IP_FW_DENY;
164 *cmd_ctl = IP_FW_CTL_DONE;
165 return;
166 }
167 ((ipfw_insn_nat *)cmd)->nat = nat;
168 }
169 *cmd_val = ip_fw3_nat(*args, nat, (*args)->m);
170 *cmd_ctl = IP_FW_CTL_NAT;
171 }
172
173 int
ip_fw3_nat(struct ip_fw_args * args,struct cfg_nat * nat,struct mbuf * m)174 ip_fw3_nat(struct ip_fw_args *args, struct cfg_nat *nat, struct mbuf *m)
175 {
176 struct state_tree *tree_out = NULL;
177 struct nat_state *s = NULL, *dup, *k, key;
178 struct nat_state2 *s2 = NULL;
179 struct ip *ip = mtod(m, struct ip *);
180 struct in_addr *old_addr = NULL, new_addr;
181 uint16_t *old_port = NULL, new_port;
182 uint16_t *csum = NULL, dlen = 0;
183 uint8_t udp = 0;
184 boolean_t pseudo = FALSE, need_return_state = FALSE;
185 struct cfg_alias *alias;
186 int i = 0, rand_n = 0;
187
188 k = &key;
189 memset(k, 0, LEN_NAT_STATE);
190 if (args->oif == NULL) {
191 old_addr = &ip->ip_dst;
192 k->dst_addr = ntohl(args->f_id.dst_ip);
193 LIST_FOREACH(alias, &nat->alias, next) {
194 if (alias->ip.s_addr == ntohl(args->f_id.dst_ip)) {
195 break;
196 }
197 }
198 if (alias == NULL) {
199 goto oops;
200 }
201 switch (ip->ip_p) {
202 case IPPROTO_TCP:
203 old_port = &L3HDR(struct tcphdr, ip)->th_dport;
204 s2 = alias->tcp_in[*old_port - ALIAS_BEGIN];
205 csum = &L3HDR(struct tcphdr, ip)->th_sum;
206 break;
207 case IPPROTO_UDP:
208 old_port = &L3HDR(struct udphdr, ip)->uh_dport;
209 s2 = alias->udp_in[*old_port - ALIAS_BEGIN];
210 csum = &L3HDR(struct udphdr, ip)->uh_sum;
211 udp = 1;
212 break;
213 case IPPROTO_ICMP:
214 old_port = &L3HDR(struct icmp, ip)->icmp_id;
215 s2 = alias->icmp_in[*old_port];
216 csum = &L3HDR(struct icmp, ip)->icmp_cksum;
217 break;
218 default:
219 panic("ipfw3: unsupported proto %u", ip->ip_p);
220 }
221 if (s2 == NULL) {
222 goto oops;
223 }
224 } else {
225 old_addr = &ip->ip_src;
226 k->src_addr = args->f_id.src_ip;
227 k->dst_addr = args->f_id.dst_ip;
228 switch (ip->ip_p) {
229 case IPPROTO_TCP:
230 k->src_port = args->f_id.src_port;
231 k->dst_port = args->f_id.dst_port;
232 m->m_pkthdr.csum_flags = CSUM_TCP;
233 tree_out = &nat->rb_tcp_out;
234 old_port = &L3HDR(struct tcphdr, ip)->th_sport;
235 csum = &L3HDR(struct tcphdr, ip)->th_sum;
236 break;
237 case IPPROTO_UDP:
238 k->src_port = args->f_id.src_port;
239 k->dst_port = args->f_id.dst_port;
240 m->m_pkthdr.csum_flags = CSUM_UDP;
241 tree_out = &nat->rb_udp_out;
242 old_port = &L3HDR(struct udphdr, ip)->uh_sport;
243 csum = &L3HDR(struct udphdr, ip)->uh_sum;
244 udp = 1;
245 break;
246 case IPPROTO_ICMP:
247 k->src_port = L3HDR(struct icmp, ip)->icmp_id;
248 k->dst_port = k->src_port;
249 tree_out = &nat->rb_icmp_out;
250 old_port = &L3HDR(struct icmp, ip)->icmp_id;
251 csum = &L3HDR(struct icmp, ip)->icmp_cksum;
252 break;
253 default:
254 panic("ipfw3: unsupported proto %u", ip->ip_p);
255 }
256 s = RB_FIND(state_tree, tree_out, k);
257 if (s == NULL) {
258 /* pick an alias ip randomly when there are multiple */
259 if (nat->count > 1) {
260 rand_n = krandom() % nat->count;
261 }
262 LIST_FOREACH(alias, &nat->alias, next) {
263 if (i++ == rand_n) {
264 break;
265 }
266 }
267 switch (ip->ip_p) {
268 case IPPROTO_TCP:
269 m->m_pkthdr.csum_flags = CSUM_TCP;
270 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT,
271 M_INTWAIT | M_NULLOK | M_ZERO);
272
273 s->src_addr = args->f_id.src_ip;
274 s->src_port = args->f_id.src_port;
275
276 s->dst_addr = args->f_id.dst_ip;
277 s->dst_port = args->f_id.dst_port;
278
279 s->alias_addr = alias->ip.s_addr;
280 pick_alias_port(s, tree_out);
281 dup = RB_INSERT(state_tree, tree_out, s);
282 need_return_state = TRUE;
283 break;
284 case IPPROTO_UDP:
285 m->m_pkthdr.csum_flags = CSUM_UDP;
286 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT,
287 M_INTWAIT | M_NULLOK | M_ZERO);
288
289 s->src_addr = args->f_id.src_ip;
290 s->src_port = args->f_id.src_port;
291
292 s->dst_addr = args->f_id.dst_ip;
293 s->dst_port = args->f_id.dst_port;
294
295 s->alias_addr = alias->ip.s_addr;
296 pick_alias_port(s, tree_out);
297 dup = RB_INSERT(state_tree, tree_out, s);
298 need_return_state = TRUE;
299 break;
300 case IPPROTO_ICMP:
301 s = kmalloc(LEN_NAT_STATE, M_IPFW3_NAT,
302 M_INTWAIT | M_NULLOK | M_ZERO);
303 s->src_addr = args->f_id.src_ip;
304 s->dst_addr = args->f_id.dst_ip;
305
306 s->src_port = *old_port;
307 s->dst_port = *old_port;
308
309 s->alias_addr = alias->ip.s_addr;
310 s->alias_port = htons(s->src_addr *
311 s->dst_addr % ALIAS_RANGE);
312 dup = RB_INSERT(state_tree, tree_out, s);
313
314 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT,
315 M_INTWAIT | M_NULLOK | M_ZERO);
316
317 s2->src_addr = args->f_id.dst_ip;
318 s2->dst_addr = alias->ip.s_addr;
319
320 s2->src_port = s->alias_port;
321 s2->dst_port = s->alias_port;
322
323 s2->alias_addr = htonl(args->f_id.src_ip);
324 s2->alias_port = *old_port;
325
326 alias->icmp_in[s->alias_port] = s2;
327 break;
328 default :
329 goto oops;
330 }
331 }
332 }
333 if (args->oif == NULL) {
334 if (ip->ip_p == IPPROTO_ICMP) {
335 new_addr.s_addr = s2->alias_addr;
336 new_port = s2->alias_port;
337 } else {
338 new_addr.s_addr = s2->src_addr;
339 new_port = s2->src_port;
340 }
341 s2->timestamp = time_uptime;
342 } else {
343 new_addr.s_addr = s->alias_addr;
344 new_port = s->alias_port;
345 s->timestamp = time_uptime;
346 }
347
348 /* replace src/dst and fix the checksum */
349 if (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TSO)) {
350 if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) {
351 dlen = ntohs(ip->ip_len) - (ip->ip_hl << 2);
352 }
353 pseudo = TRUE;
354 }
355 if (!pseudo) {
356 const uint16_t *oaddr, *naddr;
357 oaddr = (const uint16_t *)&old_addr->s_addr;
358 naddr = (const uint16_t *)&new_addr.s_addr;
359 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[0], naddr[0], 0);
360 ip->ip_sum = fix_cksum(ip->ip_sum, oaddr[1], naddr[1], 0);
361 if (ip->ip_p != IPPROTO_ICMP) {
362 *csum = fix_cksum(*csum, oaddr[0], naddr[0], udp);
363 *csum = fix_cksum(*csum, oaddr[1], naddr[1], udp);
364 }
365 }
366 old_addr->s_addr = new_addr.s_addr;
367 if (!pseudo) {
368 *csum = fix_cksum(*csum, *old_port, new_port, udp);
369 }
370 *old_port = new_port;
371
372 if (pseudo) {
373 *csum = in_pseudo(ip->ip_src.s_addr,
374 ip->ip_dst.s_addr, htons(dlen + ip->ip_p));
375 }
376
377 /* prepare the state for return traffic */
378 if (need_return_state) {
379 m->m_flags &= ~M_HASH;
380 ip_hashfn(&m, 0);
381
382 int nextcpu = netisr_hashcpu(m->m_pkthdr.hash);
383 if (nextcpu != mycpuid) {
384 struct netmsg_nat_state_add *msg;
385 msg = kmalloc(LEN_NMSG_NAT_STATE_ADD,
386 M_LWKTMSG, M_NOWAIT | M_ZERO);
387 netmsg_init(&msg->base, NULL, &curthread->td_msgport,
388 0, nat_state_add_dispatch);
389 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT,
390 M_INTWAIT | M_NULLOK | M_ZERO);
391
392 s2->src_addr = args->f_id.dst_ip;
393 s2->src_port = args->f_id.dst_port;
394
395 s2->dst_addr = alias->ip.s_addr;
396 s2->dst_port = s->alias_port;
397
398 s2->src_addr = htonl(args->f_id.src_ip);
399 s2->src_port = htons(args->f_id.src_port);
400
401 s2->timestamp = s->timestamp;
402 msg->alias_addr.s_addr = alias->ip.s_addr;
403 msg->alias_port = s->alias_port;
404 msg->state = s2;
405 msg->nat_id = nat->id;
406 msg->proto = ip->ip_p;
407 netisr_sendmsg(&msg->base, nextcpu);
408 } else {
409 s2 = kmalloc(LEN_NAT_STATE2, M_IPFW3_NAT,
410 M_INTWAIT | M_NULLOK | M_ZERO);
411
412 s2->src_addr = args->f_id.dst_ip;
413 s2->dst_addr = alias->ip.s_addr;
414
415 s2->src_port = s->alias_port;
416 s2->dst_port = s->alias_port;
417
418 s2->src_addr = htonl(args->f_id.src_ip);
419 s2->src_port = htons(args->f_id.src_port);
420
421 s2->timestamp = s->timestamp;
422 if (ip->ip_p == IPPROTO_TCP) {
423 alias->tcp_in[s->alias_port - ALIAS_BEGIN] = s2;
424 } else {
425 alias->udp_in[s->alias_port - ALIAS_BEGIN] = s2;
426 }
427 }
428 }
429 return IP_FW_NAT;
430 oops:
431 IPFW3_DEBUG1("oops\n");
432 return IP_FW_DENY;
433 }
434
435 void
pick_alias_port(struct nat_state * s,struct state_tree * tree)436 pick_alias_port(struct nat_state *s, struct state_tree *tree)
437 {
438 do {
439 s->alias_port = htons(krandom() % ALIAS_RANGE + ALIAS_BEGIN);
440 } while (RB_FIND(state_tree, tree, s) != NULL);
441 }
442
443 int
ip_fw3_nat_state_cmp(struct nat_state * s1,struct nat_state * s2)444 ip_fw3_nat_state_cmp(struct nat_state *s1, struct nat_state *s2)
445 {
446 if (s1->src_addr > s2->src_addr)
447 return 1;
448 if (s1->src_addr < s2->src_addr)
449 return -1;
450
451 if (s1->dst_addr > s2->dst_addr)
452 return 1;
453 if (s1->dst_addr < s2->dst_addr)
454 return -1;
455
456 if (s1->src_port > s2->src_port)
457 return 1;
458 if (s1->src_port < s2->src_port)
459 return -1;
460
461 if (s1->dst_port > s2->dst_port)
462 return 1;
463 if (s1->dst_port < s2->dst_port)
464 return -1;
465
466 return 0;
467 }
468
469 int
ip_fw3_ctl_nat_get_cfg(struct sockopt * sopt)470 ip_fw3_ctl_nat_get_cfg(struct sockopt *sopt)
471 {
472 struct ip_fw3_nat_context *nat_ctx;
473 struct ioc_nat *ioc;
474 struct cfg_nat *nat;
475 struct cfg_alias *alias;
476 struct in_addr *ip;
477 size_t valsize;
478 int i, len;
479
480 len = 0;
481 nat_ctx = ip_fw3_nat_ctx[mycpuid];
482 valsize = sopt->sopt_valsize;
483 ioc = (struct ioc_nat *)sopt->sopt_val;
484
485 for (i = 0; i < NAT_ID_MAX; i++) {
486 nat = nat_ctx->nats[i];
487 if (nat != NULL) {
488 len += LEN_IOC_NAT;
489 if (len >= valsize) {
490 goto nospace;
491 }
492 ioc->id = nat->id;
493 ioc->count = nat->count;
494 ip = &ioc->ip;
495 LIST_FOREACH(alias, &nat->alias, next) {
496 len += LEN_IN_ADDR;
497 if (len > valsize) {
498 goto nospace;
499 }
500 bcopy(&alias->ip, ip, LEN_IN_ADDR);
501 ip++;
502 }
503 }
504 }
505 sopt->sopt_valsize = len;
506 return 0;
507 nospace:
508 bzero(sopt->sopt_val, sopt->sopt_valsize);
509 sopt->sopt_valsize = 0;
510 return 0;
511 }
512
513 int
ip_fw3_ctl_nat_get_record(struct sockopt * sopt)514 ip_fw3_ctl_nat_get_record(struct sockopt *sopt)
515 {
516 struct ip_fw3_nat_context *nat_ctx;
517 struct cfg_nat *the;
518 size_t sopt_size, total_len = 0;
519 struct ioc_nat_state *ioc;
520 int ioc_nat_id, i, n, cpu;
521 struct nat_state *s;
522 struct nat_state2 *s2;
523 struct cfg_alias *a1;
524
525 ioc_nat_id = *((int *)(sopt->sopt_val));
526 sopt_size = sopt->sopt_valsize;
527 ioc = (struct ioc_nat_state *)sopt->sopt_val;
528 /* icmp states only in CPU 0 */
529 cpu = 0;
530 nat_ctx = ip_fw3_nat_ctx[cpu];
531 for (n = 0; n < NAT_ID_MAX; n++) {
532 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) {
533 if (nat_ctx->nats[n] == NULL)
534 break;
535 the = nat_ctx->nats[n];
536 RB_FOREACH(s, state_tree, &the->rb_icmp_out) {
537 total_len += LEN_IOC_NAT_STATE;
538 if (total_len > sopt_size)
539 goto nospace;
540 ioc->src_addr.s_addr = ntohl(s->src_addr);
541 ioc->dst_addr.s_addr = s->dst_addr;
542 ioc->alias_addr.s_addr = s->alias_addr;
543 ioc->src_port = s->src_port;
544 ioc->dst_port = s->dst_port;
545 ioc->alias_port = s->alias_port;
546 ioc->nat_id = n + 1;
547 ioc->cpu_id = cpu;
548 ioc->proto = IPPROTO_ICMP;
549 ioc->direction = 1;
550 ioc->life = s->timestamp +
551 sysctl_var_icmp_timeout - time_uptime;
552 ioc++;
553 }
554
555 LIST_FOREACH(a1, &the->alias, next) {
556 for (i = 0; i < ALIAS_RANGE; i++) {
557 s2 = a1->icmp_in[i];
558 if (s2 == NULL) {
559 continue;
560 }
561
562 total_len += LEN_IOC_NAT_STATE;
563 if (total_len > sopt_size)
564 goto nospace;
565
566 ioc->src_addr.s_addr = ntohl(s2->src_addr);
567 ioc->dst_addr.s_addr = s2->dst_addr;
568 ioc->alias_addr.s_addr = s2->alias_addr;
569 ioc->src_port = s2->src_port;
570 ioc->dst_port = s2->dst_port;
571 ioc->alias_port = s2->alias_port;
572 ioc->nat_id = n + 1;
573 ioc->cpu_id = cpu;
574 ioc->proto = IPPROTO_ICMP;
575 ioc->direction = 0;
576 ioc->life = s2->timestamp +
577 sysctl_var_icmp_timeout - time_uptime;
578 ioc++;
579 }
580 }
581 }
582 }
583
584 /* tcp states */
585 for (cpu = 0; cpu < ncpus; cpu++) {
586 nat_ctx = ip_fw3_nat_ctx[cpu];
587 for (n = 0; n < NAT_ID_MAX; n++) {
588 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) {
589 if (nat_ctx->nats[n] == NULL)
590 break;
591 the = nat_ctx->nats[n];
592 RB_FOREACH(s, state_tree, &the->rb_tcp_out) {
593 total_len += LEN_IOC_NAT_STATE;
594 if (total_len > sopt_size)
595 goto nospace;
596 ioc->src_addr.s_addr = ntohl(s->src_addr);
597 ioc->dst_addr.s_addr = ntohl(s->dst_addr);
598 ioc->alias_addr.s_addr = s->alias_addr;
599 ioc->src_port = ntohs(s->src_port);
600 ioc->dst_port = ntohs(s->dst_port);
601 ioc->alias_port = s->alias_port;
602 ioc->nat_id = n + 1;
603 ioc->cpu_id = cpu;
604 ioc->proto = IPPROTO_TCP;
605 ioc->direction = 1;
606 ioc->life = s->timestamp +
607 sysctl_var_tcp_timeout - time_uptime;
608 ioc++;
609 }
610 LIST_FOREACH(a1, &the->alias, next) {
611 for (i = 0; i < ALIAS_RANGE; i++) {
612 s2 = a1->tcp_in[i];
613 if (s2 == NULL) {
614 continue;
615 }
616
617 total_len += LEN_IOC_NAT_STATE;
618 if (total_len > sopt_size)
619 goto nospace;
620
621 ioc->src_addr.s_addr = ntohl(s2->src_addr);
622 ioc->dst_addr.s_addr = s2->dst_addr;
623 ioc->alias_addr.s_addr = s2->alias_addr;
624 ioc->src_port = s2->src_port;
625 ioc->dst_port = s2->dst_port;
626 ioc->alias_port = s2->alias_port;
627 ioc->nat_id = n + 1;
628 ioc->cpu_id = cpu;
629 ioc->proto = IPPROTO_TCP;
630 ioc->direction = 0;
631 ioc->life = s2->timestamp +
632 sysctl_var_icmp_timeout - time_uptime;
633 ioc++;
634 }
635 }
636 }
637 }
638 }
639
640 /* udp states */
641 for (cpu = 0; cpu < ncpus; cpu++) {
642 nat_ctx = ip_fw3_nat_ctx[cpu];
643 for (n = 0; n < NAT_ID_MAX; n++) {
644 if (ioc_nat_id == 0 || ioc_nat_id == n + 1) {
645 if (nat_ctx->nats[n] == NULL)
646 break;
647 the = nat_ctx->nats[n];
648 RB_FOREACH(s, state_tree, &the->rb_udp_out) {
649 total_len += LEN_IOC_NAT_STATE;
650 if (total_len > sopt_size)
651 goto nospace;
652 ioc->src_addr.s_addr = ntohl(s->src_addr);
653 ioc->dst_addr.s_addr = s->dst_addr;
654 ioc->alias_addr.s_addr = s->alias_addr;
655 ioc->src_port = s->src_port;
656 ioc->dst_port = s->dst_port;
657 ioc->alias_port = s->alias_port;
658 ioc->nat_id = n + 1;
659 ioc->cpu_id = cpu;
660 ioc->proto = IPPROTO_UDP;
661 ioc->direction = 1;
662 ioc->life = s->timestamp +
663 sysctl_var_udp_timeout - time_uptime;
664 ioc++;
665 }
666 LIST_FOREACH(a1, &the->alias, next) {
667 for (i = 0; i < ALIAS_RANGE; i++) {
668 s2 = a1->udp_in[i];
669 if (s2 == NULL) {
670 continue;
671 }
672
673 total_len += LEN_IOC_NAT_STATE;
674 if (total_len > sopt_size)
675 goto nospace;
676
677 ioc->src_addr.s_addr = ntohl(s2->src_addr);
678 ioc->dst_addr.s_addr = s2->dst_addr;
679 ioc->alias_addr.s_addr = s2->alias_addr;
680 ioc->src_port = s2->src_port;
681 ioc->dst_port = s2->dst_port;
682 ioc->alias_port = s2->alias_port;
683 ioc->nat_id = n + 1;
684 ioc->cpu_id = cpu;
685 ioc->proto = IPPROTO_UDP;
686 ioc->direction = 0;
687 ioc->life = s2->timestamp +
688 sysctl_var_icmp_timeout - time_uptime;
689 ioc++;
690 }
691 }
692 }
693 }
694 }
695 sopt->sopt_valsize = total_len;
696 return 0;
697 nospace:
698 return 0;
699 }
700
701 void
nat_state_add_dispatch(netmsg_t add_msg)702 nat_state_add_dispatch(netmsg_t add_msg)
703 {
704 struct ip_fw3_nat_context *nat_ctx;
705 struct netmsg_nat_state_add *msg;
706 struct cfg_nat *nat;
707 struct nat_state2 *s2;
708 struct cfg_alias *alias;
709
710 nat_ctx = ip_fw3_nat_ctx[mycpuid];
711 msg = (struct netmsg_nat_state_add *)add_msg;
712 nat = nat_ctx->nats[msg->nat_id - 1];
713
714 LIST_FOREACH(alias, &nat->alias, next) {
715 if (alias->ip.s_addr == msg->alias_addr.s_addr) {
716 break;
717 }
718 }
719 s2 = msg->state;
720 if (msg->proto == IPPROTO_TCP) {
721 alias->tcp_in[msg->alias_port - ALIAS_BEGIN] = s2;
722 } else {
723 alias->udp_in[msg->alias_port - ALIAS_BEGIN] = s2;
724 }
725 }
726
727 /*
728 * Init the RB trees only when the NAT is configured.
729 */
730 void
nat_add_dispatch(netmsg_t nat_add_msg)731 nat_add_dispatch(netmsg_t nat_add_msg)
732 {
733 struct ip_fw3_nat_context *nat_ctx;
734 struct netmsg_nat_add *msg;
735 struct ioc_nat *ioc;
736 struct cfg_nat *nat;
737 struct cfg_alias *alias;
738 struct in_addr *ip;
739 int n;
740
741 msg = (struct netmsg_nat_add *)nat_add_msg;
742 ioc = &msg->ioc_nat;
743 nat_ctx = ip_fw3_nat_ctx[mycpuid];
744
745 if (nat_ctx->nats[ioc->id - 1] == NULL) {
746 /* op = set, and nat not exists */
747 nat = kmalloc(LEN_CFG_NAT, M_IPFW3_NAT, M_WAITOK | M_ZERO);
748 LIST_INIT(&nat->alias);
749 RB_INIT(&nat->rb_tcp_out);
750 RB_INIT(&nat->rb_udp_out);
751 if (mycpuid == 0) {
752 RB_INIT(&nat->rb_icmp_out);
753 }
754 nat->id = ioc->id;
755 nat->count = ioc->count;
756 ip = &ioc->ip;
757 for (n = 0; n < ioc->count; n++) {
758 alias = kmalloc(LEN_CFG_ALIAS,
759 M_IPFW3_NAT, M_WAITOK | M_ZERO);
760 memcpy(&alias->ip, ip, LEN_IN_ADDR);
761 LIST_INSERT_HEAD((&nat->alias), alias, next);
762 ip++;
763 }
764 nat_ctx->nats[ioc->id - 1] = nat;
765 }
766 netisr_forwardmsg_all(&msg->base, mycpuid + 1);
767 }
768
769 int
ip_fw3_ctl_nat_add(struct sockopt * sopt)770 ip_fw3_ctl_nat_add(struct sockopt *sopt)
771 {
772 struct netmsg_nat_add nat_add_msg, *msg;
773 struct ioc_nat *ioc;
774 msg = &nat_add_msg;
775
776 ioc = (struct ioc_nat *)(sopt->sopt_val);
777 sooptcopyin(sopt, &msg->ioc_nat, sopt->sopt_valsize,
778 sizeof(struct ioc_nat));
779 netmsg_init(&msg->base, NULL, &curthread->td_msgport, 0,
780 nat_add_dispatch);
781 netisr_domsg(&msg->base, 0);
782 return 0;
783 }
784
785 void
nat_del_dispatch(netmsg_t nat_del_msg)786 nat_del_dispatch(netmsg_t nat_del_msg)
787 {
788 struct ip_fw3_nat_context *nat_ctx;
789 struct netmsg_nat_del *msg;
790 struct cfg_nat *nat;
791 struct nat_state *s, *tmp;
792 struct cfg_alias *alias, *tmp3;
793
794 msg = (struct netmsg_nat_del *)nat_del_msg;
795
796 nat_ctx = ip_fw3_nat_ctx[mycpuid];
797 nat = nat_ctx->nats[msg->id - 1];
798 if (nat != NULL) {
799 /* the icmp states will only stored in cpu 0 */
800 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_out, tmp) {
801 RB_REMOVE(state_tree, &nat->rb_icmp_out, s);
802 if (s != NULL) {
803 kfree(s, M_IPFW3_NAT);
804 }
805 }
806 /*
807 LIST_FOREACH_MUTABLE(s2, &nat->alias->icmp_in, next, tmp2) {
808 LIST_REMOVE(s2, next);
809 if (s != NULL) {
810 kfree(s, M_IPFW3_NAT);
811 }
812 }
813 */
814
815 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_out, tmp) {
816 RB_REMOVE(state_tree, &nat->rb_tcp_out, s);
817 if (s != NULL) {
818 kfree(s, M_IPFW3_NAT);
819 }
820 }
821 /*
822 LIST_FOREACH_MUTABLE(s2, &nat->alias->tcp_in, next, tmp2) {
823 LIST_REMOVE(s2, next);
824 if (s != NULL) {
825 kfree(s, M_IPFW3_NAT);
826 }
827 }
828 */
829 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_out, tmp) {
830 RB_REMOVE(state_tree, &nat->rb_udp_out, s);
831 if (s != NULL) {
832 kfree(s, M_IPFW3_NAT);
833 }
834 }
835 /*
836 LIST_FOREACH_MUTABLE(s2, &nat->alias->udp_in, next, tmp2) {
837 LIST_REMOVE(s2, next);
838 if (s != NULL) {
839 kfree(s, M_IPFW3_NAT);
840 }
841 }
842 */
843 LIST_FOREACH_MUTABLE(alias, &nat->alias, next, tmp3) {
844 kfree(alias, M_IPFW3_NAT);
845 }
846 kfree(nat, M_IPFW3_NAT);
847 nat_ctx->nats[msg->id - 1] = NULL;
848 }
849 netisr_forwardmsg_all(&nat_del_msg->base, mycpuid + 1);
850 }
851 int
ip_fw3_ctl_nat_del(struct sockopt * sopt)852 ip_fw3_ctl_nat_del(struct sockopt *sopt)
853 {
854 struct netmsg_nat_del nat_del_msg, *msg;
855
856 msg = &nat_del_msg;
857 msg->id = *((int *)sopt->sopt_val);
858 netmsg_init(&msg->base, NULL, &curthread->td_msgport,
859 0, nat_del_dispatch);
860
861 netisr_domsg(&msg->base, 0);
862 return 0;
863 }
864 int
ip_fw3_ctl_nat_flush(struct sockopt * sopt)865 ip_fw3_ctl_nat_flush(struct sockopt *sopt)
866 {
867 struct netmsg_nat_del nat_del_msg, *msg;
868 int i;
869 msg = &nat_del_msg;
870 for (i = 0; i < NAT_ID_MAX; i++) {
871 msg->id = i + 1;
872 netmsg_init(&msg->base, NULL, &curthread->td_msgport,
873 0, nat_del_dispatch);
874
875 netisr_domsg(&msg->base, 0);
876 }
877 return 0;
878 }
879
880 int
ip_fw3_ctl_nat_sockopt(struct sockopt * sopt)881 ip_fw3_ctl_nat_sockopt(struct sockopt *sopt)
882 {
883 int error = 0;
884 switch (sopt->sopt_name) {
885 case IP_FW_NAT_ADD:
886 error = ip_fw3_ctl_nat_add(sopt);
887 break;
888 case IP_FW_NAT_DEL:
889 error = ip_fw3_ctl_nat_del(sopt);
890 break;
891 case IP_FW_NAT_FLUSH:
892 error = ip_fw3_ctl_nat_flush(sopt);
893 break;
894 case IP_FW_NAT_GET:
895 error = ip_fw3_ctl_nat_get_cfg(sopt);
896 break;
897 case IP_FW_NAT_GET_RECORD:
898 error = ip_fw3_ctl_nat_get_record(sopt);
899 break;
900 default:
901 kprintf("ipfw3 nat invalid socket option %d\n",
902 sopt->sopt_name);
903 }
904 return error;
905 }
906
907 void
nat_init_ctx_dispatch(netmsg_t msg)908 nat_init_ctx_dispatch(netmsg_t msg)
909 {
910 struct ip_fw3_nat_context *tmp;
911 tmp = kmalloc(sizeof(struct ip_fw3_nat_context),
912 M_IPFW3_NAT, M_WAITOK | M_ZERO);
913
914 ip_fw3_nat_ctx[mycpuid] = tmp;
915 netisr_forwardmsg_all(&msg->base, mycpuid + 1);
916 }
917
918 void
nat_fnit_ctx_dispatch(netmsg_t msg)919 nat_fnit_ctx_dispatch(netmsg_t msg)
920 {
921 kfree(ip_fw3_nat_ctx[mycpuid], M_IPFW3_NAT);
922 netisr_forwardmsg_all(&msg->base, mycpuid + 1);
923 }
924
925 static void
nat_cleanup_func_dispatch(netmsg_t nmsg)926 nat_cleanup_func_dispatch(netmsg_t nmsg)
927 {
928 struct nat_state *s, *tmp;
929 struct ip_fw3_nat_context *nat_ctx;
930 struct cfg_nat *nat;
931 struct cfg_alias *a1, *tmp2;
932 struct nat_state2 *s2;
933 int i, j;
934
935 nat_ctx = ip_fw3_nat_ctx[mycpuid];
936 for (j = 0; j < NAT_ID_MAX; j++) {
937 nat = nat_ctx->nats[j];
938 if (nat == NULL)
939 continue;
940 /* check the nat_states, remove the expired state */
941 /* the icmp states will only stored in cpu 0 */
942 RB_FOREACH_SAFE(s, state_tree, &nat->rb_icmp_out, tmp) {
943 if (time_uptime - s->timestamp > sysctl_var_icmp_timeout) {
944 RB_REMOVE(state_tree, &nat->rb_icmp_out, s);
945 kfree(s, M_IPFW3_NAT);
946 }
947 }
948 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) {
949 for (i = 0; i < ALIAS_RANGE; i++) {
950 s2 = a1->icmp_in[i];
951 if (s2 != NULL) {
952 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) {
953 a1->icmp_in[i] = NULL;
954 kfree(s2, M_IPFW3_NAT);
955 }
956 }
957
958 }
959 }
960
961 RB_FOREACH_SAFE(s, state_tree, &nat->rb_tcp_out, tmp) {
962 if (time_uptime - s->timestamp > sysctl_var_tcp_timeout) {
963 RB_REMOVE(state_tree, &nat->rb_tcp_out, s);
964 kfree(s, M_IPFW3_NAT);
965 }
966 }
967 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) {
968 for (i = 0; i < ALIAS_RANGE; i++) {
969 s2 = a1->tcp_in[i];
970 if (s2 != NULL) {
971 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) {
972 a1->tcp_in[i] = NULL;
973 kfree(s2, M_IPFW3_NAT);
974 }
975 }
976
977 }
978 }
979 RB_FOREACH_SAFE(s, state_tree, &nat->rb_udp_out, tmp) {
980 if (time_uptime - s->timestamp > sysctl_var_udp_timeout) {
981 RB_REMOVE(state_tree, &nat->rb_udp_out, s);
982 kfree(s, M_IPFW3_NAT);
983 }
984 }
985 LIST_FOREACH_MUTABLE(a1, &nat->alias, next, tmp2) {
986 for (i = 0; i < ALIAS_RANGE; i++) {
987 s2 = a1->udp_in[i];
988 if (s2 != NULL) {
989 if (time_uptime - s2->timestamp > sysctl_var_icmp_timeout) {
990 a1->udp_in[i] = NULL;
991 kfree(s2, M_IPFW3_NAT);
992 }
993 }
994
995 }
996 }
997 }
998 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1);
999 }
1000
1001 static void
ip_fw3_nat_cleanup_func(void * dummy __unused)1002 ip_fw3_nat_cleanup_func(void *dummy __unused)
1003 {
1004 struct netmsg_base msg;
1005 netmsg_init(&msg, NULL, &curthread->td_msgport, 0,
1006 nat_cleanup_func_dispatch);
1007 netisr_domsg(&msg, 0);
1008
1009 callout_reset(&ip_fw3_nat_cleanup_callout,
1010 sysctl_var_cleanup_interval * hz,
1011 ip_fw3_nat_cleanup_func, NULL);
1012 }
1013
1014 static
ip_fw3_nat_init(void)1015 int ip_fw3_nat_init(void)
1016 {
1017 struct netmsg_base msg;
1018 ip_fw3_register_module(MODULE_NAT_ID, MODULE_NAT_NAME);
1019 ip_fw3_register_filter_funcs(MODULE_NAT_ID, O_NAT_NAT,
1020 (filter_func)check_nat);
1021 ip_fw3_ctl_nat_ptr = ip_fw3_ctl_nat_sockopt;
1022 netmsg_init(&msg, NULL, &curthread->td_msgport,
1023 0, nat_init_ctx_dispatch);
1024 netisr_domsg(&msg, 0);
1025
1026 callout_init_mp(&ip_fw3_nat_cleanup_callout);
1027 callout_reset(&ip_fw3_nat_cleanup_callout,
1028 sysctl_var_cleanup_interval * hz,
1029 ip_fw3_nat_cleanup_func,
1030 NULL);
1031 return 0;
1032 }
1033
1034 static int
ip_fw3_nat_fini(void)1035 ip_fw3_nat_fini(void)
1036 {
1037 struct netmsg_base msg;
1038 struct netmsg_nat_del nat_del_msg, *msg1;
1039 int i;
1040
1041 callout_stop(&ip_fw3_nat_cleanup_callout);
1042
1043 msg1 = &nat_del_msg;
1044 for (i = 0; i < NAT_ID_MAX; i++) {
1045 msg1->id = i + 1;
1046 netmsg_init(&msg1->base, NULL, &curthread->td_msgport,
1047 0, nat_del_dispatch);
1048
1049 netisr_domsg(&msg1->base, 0);
1050 }
1051
1052 netmsg_init(&msg, NULL, &curthread->td_msgport,
1053 0, nat_fnit_ctx_dispatch);
1054 netisr_domsg(&msg, 0);
1055
1056 return ip_fw3_unregister_module(MODULE_NAT_ID);
1057 }
1058
1059 static int
ip_fw3_nat_modevent(module_t mod,int type,void * data)1060 ip_fw3_nat_modevent(module_t mod, int type, void *data)
1061 {
1062 switch (type) {
1063 case MOD_LOAD:
1064 return ip_fw3_nat_init();
1065 case MOD_UNLOAD:
1066 return ip_fw3_nat_fini();
1067 default:
1068 break;
1069 }
1070 return 0;
1071 }
1072
1073 moduledata_t ip_fw3_nat_mod = {
1074 "ipfw3_nat",
1075 ip_fw3_nat_modevent,
1076 NULL
1077 };
1078
1079 DECLARE_MODULE(ipfw3_nat, ip_fw3_nat_mod,
1080 SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
1081 MODULE_DEPEND(ipfw3_nat, ipfw3_basic, 1, 1, 1);
1082 MODULE_VERSION(ipfw3_nat, 1);
1083