1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2015-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/counter.h>
33 #include <sys/ck.h>
34 #include <sys/epoch.h>
35 #include <sys/errno.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/rmlock.h>
42 #include <sys/rwlock.h>
43 #include <sys/socket.h>
44 #include <sys/sockopt.h>
45 
46 #include <net/if.h>
47 
48 #include <netinet/in.h>
49 #include <netinet/ip.h>
50 #include <netinet/ip_var.h>
51 #include <netinet/ip_fw.h>
52 #include <netinet6/ip_fw_nat64.h>
53 
54 #include <netpfil/ipfw/ip_fw_private.h>
55 
56 #include "nat64lsn.h"
57 
58 VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
59 
60 static struct nat64lsn_cfg *
61 nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
62 {
63 	struct nat64lsn_cfg *cfg;
64 
65 	cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
66 	    IPFW_TLV_NAT64LSN_NAME, name);
67 
68 	return (cfg);
69 }
70 
71 static void
72 nat64lsn_default_config(ipfw_nat64lsn_cfg *uc)
73 {
74 
75 	if (uc->jmaxlen == 0)
76 		uc->jmaxlen = NAT64LSN_JMAXLEN;
77 	if (uc->jmaxlen > 65536)
78 		uc->jmaxlen = 65536;
79 	if (uc->nh_delete_delay == 0)
80 		uc->nh_delete_delay = NAT64LSN_HOST_AGE;
81 	if (uc->pg_delete_delay == 0)
82 		uc->pg_delete_delay = NAT64LSN_PG_AGE;
83 	if (uc->st_syn_ttl == 0)
84 		uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE;
85 	if (uc->st_close_ttl == 0)
86 		uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE;
87 	if (uc->st_estab_ttl == 0)
88 		uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE;
89 	if (uc->st_udp_ttl == 0)
90 		uc->st_udp_ttl = NAT64LSN_UDP_AGE;
91 	if (uc->st_icmp_ttl == 0)
92 		uc->st_icmp_ttl = NAT64LSN_ICMP_AGE;
93 
94 	if (uc->states_chunks == 0)
95 		uc->states_chunks = 1;
96 	else if (uc->states_chunks >= 128)
97 		uc->states_chunks = 128;
98 	else if (!powerof2(uc->states_chunks))
99 		uc->states_chunks = 1 << fls(uc->states_chunks);
100 }
101 
102 /*
103  * Creates new nat64lsn instance.
104  * Data layout (v0)(current):
105  * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
106  *
107  * Returns 0 on success
108  */
109 static int
110 nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
111     struct sockopt_data *sd)
112 {
113 	ipfw_obj_lheader *olh;
114 	ipfw_nat64lsn_cfg *uc;
115 	struct nat64lsn_cfg *cfg;
116 	struct namedobj_instance *ni;
117 	uint32_t addr4, mask4;
118 
119 	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
120 		return (EINVAL);
121 
122 	olh = (ipfw_obj_lheader *)sd->kbuf;
123 	uc = (ipfw_nat64lsn_cfg *)(olh + 1);
124 
125 	if (ipfw_check_object_name_generic(uc->name) != 0)
126 		return (EINVAL);
127 
128 	if (uc->set >= IPFW_MAX_SETS)
129 		return (EINVAL);
130 
131 	if (uc->plen4 > 32)
132 		return (EINVAL);
133 
134 	/*
135 	 * Unspecified address has special meaning. But it must
136 	 * have valid prefix length. This length will be used to
137 	 * correctly extract and embedd IPv4 address into IPv6.
138 	 */
139 	if (nat64_check_prefix6(&uc->prefix6, uc->plen6) != 0 &&
140 	    IN6_IS_ADDR_UNSPECIFIED(&uc->prefix6) &&
141 	    nat64_check_prefixlen(uc->plen6) != 0)
142 		return (EINVAL);
143 
144 	/* XXX: Check prefix4 to be global */
145 	addr4 = ntohl(uc->prefix4.s_addr);
146 	mask4 = ~((1 << (32 - uc->plen4)) - 1);
147 	if ((addr4 & mask4) != addr4)
148 		return (EINVAL);
149 
150 	nat64lsn_default_config(uc);
151 
152 	ni = CHAIN_TO_SRV(ch);
153 	IPFW_UH_RLOCK(ch);
154 	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
155 		IPFW_UH_RUNLOCK(ch);
156 		return (EEXIST);
157 	}
158 	IPFW_UH_RUNLOCK(ch);
159 
160 	cfg = nat64lsn_init_instance(ch, addr4, uc->plen4);
161 	strlcpy(cfg->name, uc->name, sizeof(cfg->name));
162 	cfg->no.name = cfg->name;
163 	cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
164 	cfg->no.set = uc->set;
165 
166 	cfg->base.plat_prefix = uc->prefix6;
167 	cfg->base.plat_plen = uc->plen6;
168 	cfg->base.flags = (uc->flags & NAT64LSN_FLAGSMASK) | NAT64_PLATPFX;
169 	if (IN6_IS_ADDR_WKPFX(&cfg->base.plat_prefix))
170 		cfg->base.flags |= NAT64_WKPFX;
171 	else if (IN6_IS_ADDR_UNSPECIFIED(&cfg->base.plat_prefix))
172 		cfg->base.flags |= NAT64LSN_ANYPREFIX;
173 
174 	cfg->states_chunks = uc->states_chunks;
175 	cfg->jmaxlen = uc->jmaxlen;
176 	cfg->host_delete_delay = uc->nh_delete_delay;
177 	cfg->pg_delete_delay = uc->pg_delete_delay;
178 	cfg->st_syn_ttl = uc->st_syn_ttl;
179 	cfg->st_close_ttl = uc->st_close_ttl;
180 	cfg->st_estab_ttl = uc->st_estab_ttl;
181 	cfg->st_udp_ttl = uc->st_udp_ttl;
182 	cfg->st_icmp_ttl = uc->st_icmp_ttl;
183 
184 	cfg->nomatch_verdict = IP_FW_DENY;
185 
186 	IPFW_UH_WLOCK(ch);
187 
188 	if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
189 		IPFW_UH_WUNLOCK(ch);
190 		nat64lsn_destroy_instance(cfg);
191 		return (EEXIST);
192 	}
193 
194 	if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
195 		IPFW_UH_WUNLOCK(ch);
196 		nat64lsn_destroy_instance(cfg);
197 		return (ENOSPC);
198 	}
199 	ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
200 
201 	/* Okay, let's link data */
202 	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
203 	nat64lsn_start_instance(cfg);
204 
205 	IPFW_UH_WUNLOCK(ch);
206 	return (0);
207 }
208 
209 static void
210 nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
211 {
212 
213 	IPFW_UH_WLOCK_ASSERT(ch);
214 
215 	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
216 	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
217 }
218 
219 /*
220  * Destroys nat64 instance.
221  * Data layout (v0)(current):
222  * Request: [ ipfw_obj_header ]
223  *
224  * Returns 0 on success
225  */
226 static int
227 nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
228     struct sockopt_data *sd)
229 {
230 	struct nat64lsn_cfg *cfg;
231 	ipfw_obj_header *oh;
232 
233 	if (sd->valsize != sizeof(*oh))
234 		return (EINVAL);
235 
236 	oh = (ipfw_obj_header *)op3;
237 
238 	IPFW_UH_WLOCK(ch);
239 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
240 	if (cfg == NULL) {
241 		IPFW_UH_WUNLOCK(ch);
242 		return (ENOENT);
243 	}
244 
245 	if (cfg->no.refcnt > 0) {
246 		IPFW_UH_WUNLOCK(ch);
247 		return (EBUSY);
248 	}
249 
250 	ipfw_reset_eaction_instance(ch, V_nat64lsn_eid, cfg->no.kidx);
251 	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
252 	nat64lsn_detach_config(ch, cfg);
253 	IPFW_UH_WUNLOCK(ch);
254 
255 	nat64lsn_destroy_instance(cfg);
256 	return (0);
257 }
258 
259 #define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
260 	(_stats)->_field = NAT64STAT_FETCH(&(_cfg)->base.stats, _field)
261 static void
262 export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
263     struct ipfw_nat64lsn_stats *stats)
264 {
265 	struct nat64lsn_alias *alias;
266 	int i, j;
267 
268 	__COPY_STAT_FIELD(cfg, stats, opcnt64);
269 	__COPY_STAT_FIELD(cfg, stats, opcnt46);
270 	__COPY_STAT_FIELD(cfg, stats, ofrags);
271 	__COPY_STAT_FIELD(cfg, stats, ifrags);
272 	__COPY_STAT_FIELD(cfg, stats, oerrors);
273 	__COPY_STAT_FIELD(cfg, stats, noroute4);
274 	__COPY_STAT_FIELD(cfg, stats, noroute6);
275 	__COPY_STAT_FIELD(cfg, stats, nomatch4);
276 	__COPY_STAT_FIELD(cfg, stats, noproto);
277 	__COPY_STAT_FIELD(cfg, stats, nomem);
278 	__COPY_STAT_FIELD(cfg, stats, dropped);
279 
280 	__COPY_STAT_FIELD(cfg, stats, jcalls);
281 	__COPY_STAT_FIELD(cfg, stats, jrequests);
282 	__COPY_STAT_FIELD(cfg, stats, jhostsreq);
283 	__COPY_STAT_FIELD(cfg, stats, jportreq);
284 	__COPY_STAT_FIELD(cfg, stats, jhostfails);
285 	__COPY_STAT_FIELD(cfg, stats, jportfails);
286 	__COPY_STAT_FIELD(cfg, stats, jmaxlen);
287 	__COPY_STAT_FIELD(cfg, stats, jnomem);
288 	__COPY_STAT_FIELD(cfg, stats, jreinjected);
289 	__COPY_STAT_FIELD(cfg, stats, screated);
290 	__COPY_STAT_FIELD(cfg, stats, sdeleted);
291 	__COPY_STAT_FIELD(cfg, stats, spgcreated);
292 	__COPY_STAT_FIELD(cfg, stats, spgdeleted);
293 
294 	stats->hostcount = cfg->hosts_count;
295 	for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
296 		alias = &cfg->aliases[i];
297 		for (j = 0; j < 32 && ISSET32(alias->tcp_chunkmask, j); j++)
298 			stats->tcpchunks += bitcount32(alias->tcp_pgmask[j]);
299 		for (j = 0; j < 32 && ISSET32(alias->udp_chunkmask, j); j++)
300 			stats->udpchunks += bitcount32(alias->udp_pgmask[j]);
301 		for (j = 0; j < 32 && ISSET32(alias->icmp_chunkmask, j); j++)
302 			stats->icmpchunks += bitcount32(alias->icmp_pgmask[j]);
303 	}
304 }
305 #undef	__COPY_STAT_FIELD
306 
307 static void
308 nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
309     ipfw_nat64lsn_cfg *uc)
310 {
311 
312 	uc->flags = cfg->base.flags & NAT64LSN_FLAGSMASK;
313 	uc->states_chunks = cfg->states_chunks;
314 	uc->jmaxlen = cfg->jmaxlen;
315 	uc->nh_delete_delay = cfg->host_delete_delay;
316 	uc->pg_delete_delay = cfg->pg_delete_delay;
317 	uc->st_syn_ttl = cfg->st_syn_ttl;
318 	uc->st_close_ttl = cfg->st_close_ttl;
319 	uc->st_estab_ttl = cfg->st_estab_ttl;
320 	uc->st_udp_ttl = cfg->st_udp_ttl;
321 	uc->st_icmp_ttl = cfg->st_icmp_ttl;
322 	uc->prefix4.s_addr = htonl(cfg->prefix4);
323 	uc->prefix6 = cfg->base.plat_prefix;
324 	uc->plen4 = cfg->plen4;
325 	uc->plen6 = cfg->base.plat_plen;
326 	uc->set = cfg->no.set;
327 	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
328 }
329 
330 struct nat64_dump_arg {
331 	struct ip_fw_chain *ch;
332 	struct sockopt_data *sd;
333 };
334 
335 static int
336 export_config_cb(struct namedobj_instance *ni, struct named_object *no,
337     void *arg)
338 {
339 	struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
340 	ipfw_nat64lsn_cfg *uc;
341 
342 	uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
343 	    sizeof(*uc));
344 	nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
345 	return (0);
346 }
347 
348 /*
349  * Lists all nat64 lsn instances currently available in kernel.
350  * Data layout (v0)(current):
351  * Request: [ ipfw_obj_lheader ]
352  * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
353  *
354  * Returns 0 on success
355  */
356 static int
357 nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
358     struct sockopt_data *sd)
359 {
360 	ipfw_obj_lheader *olh;
361 	struct nat64_dump_arg da;
362 
363 	/* Check minimum header size */
364 	if (sd->valsize < sizeof(ipfw_obj_lheader))
365 		return (EINVAL);
366 
367 	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
368 
369 	IPFW_UH_RLOCK(ch);
370 	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
371 	    IPFW_TLV_NAT64LSN_NAME);
372 	olh->objsize = sizeof(ipfw_nat64lsn_cfg);
373 	olh->size = sizeof(*olh) + olh->count * olh->objsize;
374 
375 	if (sd->valsize < olh->size) {
376 		IPFW_UH_RUNLOCK(ch);
377 		return (ENOMEM);
378 	}
379 	memset(&da, 0, sizeof(da));
380 	da.ch = ch;
381 	da.sd = sd;
382 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
383 	    IPFW_TLV_NAT64LSN_NAME);
384 	IPFW_UH_RUNLOCK(ch);
385 
386 	return (0);
387 }
388 
389 /*
390  * Change existing nat64lsn instance configuration.
391  * Data layout (v0)(current):
392  * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
393  * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ]
394  *
395  * Returns 0 on success
396  */
397 static int
398 nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op,
399     struct sockopt_data *sd)
400 {
401 	ipfw_obj_header *oh;
402 	ipfw_nat64lsn_cfg *uc;
403 	struct nat64lsn_cfg *cfg;
404 	struct namedobj_instance *ni;
405 
406 	if (sd->valsize != sizeof(*oh) + sizeof(*uc))
407 		return (EINVAL);
408 
409 	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd,
410 	    sizeof(*oh) + sizeof(*uc));
411 	uc = (ipfw_nat64lsn_cfg *)(oh + 1);
412 
413 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
414 	    oh->ntlv.set >= IPFW_MAX_SETS)
415 		return (EINVAL);
416 
417 	ni = CHAIN_TO_SRV(ch);
418 	if (sd->sopt->sopt_dir == SOPT_GET) {
419 		IPFW_UH_RLOCK(ch);
420 		cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
421 		if (cfg == NULL) {
422 			IPFW_UH_RUNLOCK(ch);
423 			return (ENOENT);
424 		}
425 		nat64lsn_export_config(ch, cfg, uc);
426 		IPFW_UH_RUNLOCK(ch);
427 		return (0);
428 	}
429 
430 	nat64lsn_default_config(uc);
431 
432 	IPFW_UH_WLOCK(ch);
433 	cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set);
434 	if (cfg == NULL) {
435 		IPFW_UH_WUNLOCK(ch);
436 		return (ENOENT);
437 	}
438 
439 	/*
440 	 * For now allow to change only following values:
441 	 *  jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age,
442 	 *  tcp_est_age, udp_age, icmp_age, flags, states_chunks.
443 	 */
444 
445 	cfg->states_chunks = uc->states_chunks;
446 	cfg->jmaxlen = uc->jmaxlen;
447 	cfg->host_delete_delay = uc->nh_delete_delay;
448 	cfg->pg_delete_delay = uc->pg_delete_delay;
449 	cfg->st_syn_ttl = uc->st_syn_ttl;
450 	cfg->st_close_ttl = uc->st_close_ttl;
451 	cfg->st_estab_ttl = uc->st_estab_ttl;
452 	cfg->st_udp_ttl = uc->st_udp_ttl;
453 	cfg->st_icmp_ttl = uc->st_icmp_ttl;
454 	cfg->base.flags &= ~NAT64LSN_FLAGSMASK;
455 	cfg->base.flags |= uc->flags & NAT64LSN_FLAGSMASK;
456 
457 	IPFW_UH_WUNLOCK(ch);
458 
459 	return (0);
460 }
461 
462 /*
463  * Get nat64lsn statistics.
464  * Data layout (v0)(current):
465  * Request: [ ipfw_obj_header ]
466  * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
467  *
468  * Returns 0 on success
469  */
470 static int
471 nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
472     struct sockopt_data *sd)
473 {
474 	struct ipfw_nat64lsn_stats stats;
475 	struct nat64lsn_cfg *cfg;
476 	ipfw_obj_header *oh;
477 	ipfw_obj_ctlv *ctlv;
478 	size_t sz;
479 
480 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
481 	if (sd->valsize % sizeof(uint64_t))
482 		return (EINVAL);
483 	if (sd->valsize < sz)
484 		return (ENOMEM);
485 	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
486 	if (oh == NULL)
487 		return (EINVAL);
488 	memset(&stats, 0, sizeof(stats));
489 
490 	IPFW_UH_RLOCK(ch);
491 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
492 	if (cfg == NULL) {
493 		IPFW_UH_RUNLOCK(ch);
494 		return (ENOENT);
495 	}
496 
497 	export_stats(ch, cfg, &stats);
498 	IPFW_UH_RUNLOCK(ch);
499 
500 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
501 	memset(ctlv, 0, sizeof(*ctlv));
502 	ctlv->head.type = IPFW_TLV_COUNTERS;
503 	ctlv->head.length = sz - sizeof(ipfw_obj_header);
504 	ctlv->count = sizeof(stats) / sizeof(uint64_t);
505 	ctlv->objsize = sizeof(uint64_t);
506 	ctlv->version = IPFW_NAT64_VERSION;
507 	memcpy(ctlv + 1, &stats, sizeof(stats));
508 	return (0);
509 }
510 
511 /*
512  * Reset nat64lsn statistics.
513  * Data layout (v0)(current):
514  * Request: [ ipfw_obj_header ]
515  *
516  * Returns 0 on success
517  */
518 static int
519 nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
520     struct sockopt_data *sd)
521 {
522 	struct nat64lsn_cfg *cfg;
523 	ipfw_obj_header *oh;
524 
525 	if (sd->valsize != sizeof(*oh))
526 		return (EINVAL);
527 	oh = (ipfw_obj_header *)sd->kbuf;
528 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
529 	    oh->ntlv.set >= IPFW_MAX_SETS)
530 		return (EINVAL);
531 
532 	IPFW_UH_WLOCK(ch);
533 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
534 	if (cfg == NULL) {
535 		IPFW_UH_WUNLOCK(ch);
536 		return (ENOENT);
537 	}
538 	COUNTER_ARRAY_ZERO(cfg->base.stats.cnt, NAT64STATS);
539 	IPFW_UH_WUNLOCK(ch);
540 	return (0);
541 }
542 
543 #ifdef __LP64__
544 #define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n))
545 #else
546 #define	FREEMASK_COPY(pg, n, out)	(out) = *FREEMASK_CHUNK((pg), (n)) | \
547     ((uint64_t)*(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
548 #endif
549 /*
550  * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
551  *	ipfw_nat64lsn_state x count, ... ] ]
552  */
553 static int
554 nat64lsn_export_states_v1(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx,
555     struct nat64lsn_pg *pg, struct sockopt_data *sd, uint32_t *ret_count)
556 {
557 	ipfw_nat64lsn_state_v1 *s;
558 	struct nat64lsn_state *state;
559 	uint64_t freemask;
560 	uint32_t i, count;
561 
562 	/* validate user input */
563 	if (idx->chunk > pg->chunks_count - 1)
564 		return (EINVAL);
565 
566 	FREEMASK_COPY(pg, idx->chunk, freemask);
567 	count = 64 - bitcount64(freemask);
568 	if (count == 0)
569 		return (0);	/* Try next PG/chunk */
570 
571 	DPRINTF(DP_STATE, "EXPORT PG 0x%16jx, count %d",
572 	    (uintmax_t)idx->index, count);
573 
574 	s = (ipfw_nat64lsn_state_v1 *)ipfw_get_sopt_space(sd,
575 	    count * sizeof(ipfw_nat64lsn_state_v1));
576 	if (s == NULL)
577 		return (ENOMEM);
578 
579 	for (i = 0; i < 64; i++) {
580 		if (ISSET64(freemask, i))
581 			continue;
582 		state = pg->chunks_count == 1 ? &pg->states->state[i] :
583 		    &pg->states_chunk[idx->chunk]->state[i];
584 
585 		s->host6 = state->host->addr;
586 		s->daddr.s_addr = htonl(state->ip_dst);
587 		s->dport = state->dport;
588 		s->sport = state->sport;
589 		s->aport = state->aport;
590 		s->flags = (uint8_t)(state->flags & 7);
591 		s->proto = state->proto;
592 		s->idle = GET_AGE(state->timestamp);
593 		s++;
594 	}
595 	*ret_count = count;
596 	return (0);
597 }
598 
599 #define	LAST_IDX	0xFF
600 static int
601 nat64lsn_next_pgidx(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg,
602     union nat64lsn_pgidx *idx)
603 {
604 
605 	/* First iterate over chunks */
606 	if (pg != NULL) {
607 		if (idx->chunk < pg->chunks_count - 1) {
608 			idx->chunk++;
609 			return (0);
610 		}
611 	}
612 	idx->chunk = 0;
613 	/* Then over PGs */
614 	if (idx->port < UINT16_MAX - 64) {
615 		idx->port += 64;
616 		return (0);
617 	}
618 	idx->port = NAT64_MIN_PORT;
619 	/* Then over supported protocols */
620 	switch (idx->proto) {
621 	case IPPROTO_ICMP:
622 		idx->proto = IPPROTO_TCP;
623 		return (0);
624 	case IPPROTO_TCP:
625 		idx->proto = IPPROTO_UDP;
626 		return (0);
627 	default:
628 		idx->proto = IPPROTO_ICMP;
629 	}
630 	/* And then over IPv4 alias addresses */
631 	if (idx->addr < cfg->pmask4) {
632 		idx->addr++;
633 		return (1);	/* New states group is needed */
634 	}
635 	idx->index = LAST_IDX;
636 	return (-1);		/* No more states */
637 }
638 
639 static struct nat64lsn_pg*
640 nat64lsn_get_pg_byidx(struct nat64lsn_cfg *cfg, union nat64lsn_pgidx *idx)
641 {
642 	struct nat64lsn_alias *alias;
643 	int pg_idx;
644 
645 	alias = &cfg->aliases[idx->addr & ((1 << (32 - cfg->plen4)) - 1)];
646 	MPASS(alias->addr == idx->addr);
647 
648 	pg_idx = (idx->port - NAT64_MIN_PORT) / 64;
649 	switch (idx->proto) {
650 	case IPPROTO_ICMP:
651 		if (ISSET32(alias->icmp_pgmask[pg_idx / 32], pg_idx % 32))
652 			return (alias->icmp[pg_idx / 32]->pgptr[pg_idx % 32]);
653 		break;
654 	case IPPROTO_TCP:
655 		if (ISSET32(alias->tcp_pgmask[pg_idx / 32], pg_idx % 32))
656 			return (alias->tcp[pg_idx / 32]->pgptr[pg_idx % 32]);
657 		break;
658 	case IPPROTO_UDP:
659 		if (ISSET32(alias->udp_pgmask[pg_idx / 32], pg_idx % 32))
660 			return (alias->udp[pg_idx / 32]->pgptr[pg_idx % 32]);
661 		break;
662 	}
663 	return (NULL);
664 }
665 
666 /*
667  * Lists nat64lsn states.
668  * Data layout (v0):
669  * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
670  * Reply: [ ipfw_obj_header ipfw_obj_data [
671  *		ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
672  *
673  * Returns 0 on success
674  */
675 static int
676 nat64lsn_states_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
677     struct sockopt_data *sd)
678 {
679 
680 	/* TODO: implement states listing for old ipfw(8) binaries  */
681 	return (EOPNOTSUPP);
682 }
683 
684 /*
685  * Lists nat64lsn states.
686  * Data layout (v1)(current):
687  * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
688  * Reply: [ ipfw_obj_header ipfw_obj_data [
689  *		ipfw_nat64lsn_stg_v1 ipfw_nat64lsn_state_v1 x N] ]
690  *
691  * Returns 0 on success
692  */
693 static int
694 nat64lsn_states_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
695     struct sockopt_data *sd)
696 {
697 	ipfw_obj_header *oh;
698 	ipfw_obj_data *od;
699 	ipfw_nat64lsn_stg_v1 *stg;
700 	struct nat64lsn_cfg *cfg;
701 	struct nat64lsn_pg *pg;
702 	union nat64lsn_pgidx idx;
703 	size_t sz;
704 	uint32_t count, total;
705 	int ret;
706 
707 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
708 	    sizeof(uint64_t);
709 	/* Check minimum header size */
710 	if (sd->valsize < sz)
711 		return (EINVAL);
712 
713 	oh = (ipfw_obj_header *)sd->kbuf;
714 	od = (ipfw_obj_data *)(oh + 1);
715 	if (od->head.type != IPFW_TLV_OBJDATA ||
716 	    od->head.length != sz - sizeof(ipfw_obj_header))
717 		return (EINVAL);
718 
719 	idx.index = *(uint64_t *)(od + 1);
720 	if (idx.index != 0 && idx.proto != IPPROTO_ICMP &&
721 	    idx.proto != IPPROTO_TCP && idx.proto != IPPROTO_UDP)
722 		return (EINVAL);
723 	if (idx.index == LAST_IDX)
724 		return (EINVAL);
725 
726 	IPFW_UH_RLOCK(ch);
727 	cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
728 	if (cfg == NULL) {
729 		IPFW_UH_RUNLOCK(ch);
730 		return (ENOENT);
731 	}
732 	if (idx.index == 0) {	/* Fill in starting point */
733 		idx.addr = cfg->prefix4;
734 		idx.proto = IPPROTO_ICMP;
735 		idx.port = NAT64_MIN_PORT;
736 	}
737 	if (idx.addr < cfg->prefix4 || idx.addr > cfg->pmask4 ||
738 	    idx.port < NAT64_MIN_PORT) {
739 		IPFW_UH_RUNLOCK(ch);
740 		return (EINVAL);
741 	}
742 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
743 	    sizeof(ipfw_nat64lsn_stg_v1);
744 	if (sd->valsize < sz) {
745 		IPFW_UH_RUNLOCK(ch);
746 		return (ENOMEM);
747 	}
748 	oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
749 	od = (ipfw_obj_data *)(oh + 1);
750 	od->head.type = IPFW_TLV_OBJDATA;
751 	od->head.length = sz - sizeof(ipfw_obj_header);
752 	stg = (ipfw_nat64lsn_stg_v1 *)(od + 1);
753 	stg->count = total = 0;
754 	stg->next.index = idx.index;
755 	/*
756 	 * Acquire CALLOUT_LOCK to avoid races with expiration code.
757 	 * Thus states, hosts and PGs will not expire while we hold it.
758 	 */
759 	CALLOUT_LOCK(cfg);
760 	ret = 0;
761 	do {
762 		pg = nat64lsn_get_pg_byidx(cfg, &idx);
763 		if (pg != NULL) {
764 			count = 0;
765 			ret = nat64lsn_export_states_v1(cfg, &idx, pg,
766 			    sd, &count);
767 			if (ret != 0)
768 				break;
769 			if (count > 0) {
770 				stg->count += count;
771 				total += count;
772 				/* Update total size of reply */
773 				od->head.length +=
774 				    count * sizeof(ipfw_nat64lsn_state_v1);
775 				sz += count * sizeof(ipfw_nat64lsn_state_v1);
776 			}
777 			stg->alias4.s_addr = htonl(idx.addr);
778 		}
779 		/* Determine new index */
780 		switch (nat64lsn_next_pgidx(cfg, pg, &idx)) {
781 		case -1:
782 			ret = ENOENT; /* End of search */
783 			break;
784 		case 1: /*
785 			 * Next alias address, new group may be needed.
786 			 * If states count is zero, use this group.
787 			 */
788 			if (stg->count == 0)
789 				continue;
790 			/* Otherwise try to create new group */
791 			sz += sizeof(ipfw_nat64lsn_stg_v1);
792 			if (sd->valsize < sz) {
793 				ret = ENOMEM;
794 				break;
795 			}
796 			/* Save next index in current group */
797 			stg->next.index = idx.index;
798 			stg = (ipfw_nat64lsn_stg_v1 *)ipfw_get_sopt_space(sd,
799 			    sizeof(ipfw_nat64lsn_stg_v1));
800 			od->head.length += sizeof(ipfw_nat64lsn_stg_v1);
801 			stg->count = 0;
802 			break;
803 		}
804 		stg->next.index = idx.index;
805 	} while (ret == 0);
806 	CALLOUT_UNLOCK(cfg);
807 	IPFW_UH_RUNLOCK(ch);
808 	return ((total > 0 || idx.index == LAST_IDX) ? 0: ret);
809 }
810 
811 static struct ipfw_sopt_handler	scodes[] = {
812 	{ IP_FW_NAT64LSN_CREATE, 0,	HDIR_BOTH,	nat64lsn_create },
813 	{ IP_FW_NAT64LSN_DESTROY,0,	HDIR_SET,	nat64lsn_destroy },
814 	{ IP_FW_NAT64LSN_CONFIG, 0,	HDIR_BOTH,	nat64lsn_config },
815 	{ IP_FW_NAT64LSN_LIST,	 0,	HDIR_GET,	nat64lsn_list },
816 	{ IP_FW_NAT64LSN_STATS,	 0,	HDIR_GET,	nat64lsn_stats },
817 	{ IP_FW_NAT64LSN_RESET_STATS,0,	HDIR_SET,	nat64lsn_reset_stats },
818 	{ IP_FW_NAT64LSN_LIST_STATES,0,	HDIR_GET,	nat64lsn_states_v0 },
819 	{ IP_FW_NAT64LSN_LIST_STATES,1,	HDIR_GET,	nat64lsn_states_v1 },
820 };
821 
822 static int
823 nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
824 {
825 	ipfw_insn *icmd;
826 
827 	icmd = cmd - 1;
828 	if (icmd->opcode != O_EXTERNAL_ACTION ||
829 	    icmd->arg1 != V_nat64lsn_eid)
830 		return (1);
831 
832 	*puidx = cmd->arg1;
833 	*ptype = 0;
834 	return (0);
835 }
836 
837 static void
838 nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
839 {
840 
841 	cmd->arg1 = idx;
842 }
843 
844 static int
845 nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
846     struct named_object **pno)
847 {
848 	int err;
849 
850 	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
851 	    IPFW_TLV_NAT64LSN_NAME, pno);
852 	return (err);
853 }
854 
855 static struct named_object *
856 nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
857 {
858 	struct namedobj_instance *ni;
859 	struct named_object *no;
860 
861 	IPFW_UH_WLOCK_ASSERT(ch);
862 	ni = CHAIN_TO_SRV(ch);
863 	no = ipfw_objhash_lookup_kidx(ni, idx);
864 	KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
865 
866 	return (no);
867 }
868 
869 static int
870 nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
871     enum ipfw_sets_cmd cmd)
872 {
873 
874 	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
875 	    set, new_set, cmd));
876 }
877 
878 static struct opcode_obj_rewrite opcodes[] = {
879 	{
880 		.opcode = O_EXTERNAL_INSTANCE,
881 		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
882 		.classifier = nat64lsn_classify,
883 		.update = nat64lsn_update_arg1,
884 		.find_byname = nat64lsn_findbyname,
885 		.find_bykidx = nat64lsn_findbykidx,
886 		.manage_sets = nat64lsn_manage_sets,
887 	},
888 };
889 
890 static int
891 destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
892     void *arg)
893 {
894 	struct nat64lsn_cfg *cfg;
895 	struct ip_fw_chain *ch;
896 
897 	ch = (struct ip_fw_chain *)arg;
898 	cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
899 	SRV_OBJECT(ch, no->kidx) = NULL;
900 	nat64lsn_detach_config(ch, cfg);
901 	nat64lsn_destroy_instance(cfg);
902 	return (0);
903 }
904 
905 int
906 nat64lsn_init(struct ip_fw_chain *ch, int first)
907 {
908 
909 	if (first != 0)
910 		nat64lsn_init_internal();
911 	V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
912 	if (V_nat64lsn_eid == 0)
913 		return (ENXIO);
914 	IPFW_ADD_SOPT_HANDLER(first, scodes);
915 	IPFW_ADD_OBJ_REWRITER(first, opcodes);
916 	return (0);
917 }
918 
919 void
920 nat64lsn_uninit(struct ip_fw_chain *ch, int last)
921 {
922 
923 	IPFW_DEL_OBJ_REWRITER(last, opcodes);
924 	IPFW_DEL_SOPT_HANDLER(last, scodes);
925 	ipfw_del_eaction(ch, V_nat64lsn_eid);
926 	/*
927 	 * Since we already have deregistered external action,
928 	 * our named objects become unaccessible via rules, because
929 	 * all rules were truncated by ipfw_del_eaction().
930 	 * So, we can unlink and destroy our named objects without holding
931 	 * IPFW_WLOCK().
932 	 */
933 	IPFW_UH_WLOCK(ch);
934 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
935 	    IPFW_TLV_NAT64LSN_NAME);
936 	V_nat64lsn_eid = 0;
937 	IPFW_UH_WUNLOCK(ch);
938 	if (last != 0)
939 		nat64lsn_uninit_internal();
940 }
941