xref: /freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c (revision 1d386b48)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2019 Yandex LLC
5  * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org>
6  * Copyright (c) 2016-2019 Andrey V. Elsukov <ae@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/counter.h>
34 #include <sys/ck.h>
35 #include <sys/epoch.h>
36 #include <sys/errno.h>
37 #include <sys/hash.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/module.h>
43 #include <sys/rmlock.h>
44 #include <sys/socket.h>
45 #include <sys/syslog.h>
46 #include <sys/sysctl.h>
47 
48 #include <net/if.h>
49 #include <net/if_var.h>
50 #include <net/if_pflog.h>
51 #include <net/pfil.h>
52 
53 #include <netinet/in.h>
54 #include <netinet/ip.h>
55 #include <netinet/ip_var.h>
56 #include <netinet/ip_fw.h>
57 #include <netinet/ip6.h>
58 #include <netinet/icmp6.h>
59 #include <netinet/ip_icmp.h>
60 #include <netinet/tcp.h>
61 #include <netinet/udp.h>
62 #include <netinet6/in6_var.h>
63 #include <netinet6/ip6_var.h>
64 #include <netinet6/ip_fw_nat64.h>
65 
66 #include <netpfil/ipfw/ip_fw_private.h>
67 #include <netpfil/pf/pf.h>
68 
69 #include "nat64lsn.h"
70 
71 MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN");
72 
73 #define	NAT64LSN_EPOCH_ENTER(et)  NET_EPOCH_ENTER(et)
74 #define	NAT64LSN_EPOCH_EXIT(et)   NET_EPOCH_EXIT(et)
75 #define	NAT64LSN_EPOCH_ASSERT()   NET_EPOCH_ASSERT()
76 #define	NAT64LSN_EPOCH_CALL(c, f) NET_EPOCH_CALL((f), (c))
77 
78 static uma_zone_t nat64lsn_host_zone;
79 static uma_zone_t nat64lsn_pgchunk_zone;
80 static uma_zone_t nat64lsn_pg_zone;
81 static uma_zone_t nat64lsn_aliaslink_zone;
82 static uma_zone_t nat64lsn_state_zone;
83 static uma_zone_t nat64lsn_job_zone;
84 
85 static void nat64lsn_periodic(void *data);
86 #define	PERIODIC_DELAY		4
87 #define	NAT64_LOOKUP(chain, cmd)	\
88 	(struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
89 /*
90  * Delayed job queue, used to create new hosts
91  * and new portgroups
92  */
93 enum nat64lsn_jtype {
94 	JTYPE_NEWHOST = 1,
95 	JTYPE_NEWPORTGROUP,
96 	JTYPE_DESTROY,
97 };
98 
99 struct nat64lsn_job_item {
100 	STAILQ_ENTRY(nat64lsn_job_item)	entries;
101 	enum nat64lsn_jtype	jtype;
102 
103 	union {
104 		struct { /* used by JTYPE_NEWHOST, JTYPE_NEWPORTGROUP */
105 			struct mbuf		*m;
106 			struct nat64lsn_host	*host;
107 			struct nat64lsn_state	*state;
108 			uint32_t		src6_hval;
109 			uint32_t		state_hval;
110 			struct ipfw_flow_id	f_id;
111 			in_addr_t		faddr;
112 			uint16_t		port;
113 			uint8_t			proto;
114 			uint8_t			done;
115 		};
116 		struct { /* used by JTYPE_DESTROY */
117 			struct nat64lsn_hosts_slist	hosts;
118 			struct nat64lsn_pg_slist	portgroups;
119 			struct nat64lsn_pgchunk		*pgchunk;
120 			struct epoch_context		epoch_ctx;
121 		};
122 	};
123 };
124 
125 static struct mtx jmtx;
126 #define	JQUEUE_LOCK_INIT()	mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
127 #define	JQUEUE_LOCK_DESTROY()	mtx_destroy(&jmtx)
128 #define	JQUEUE_LOCK()		mtx_lock(&jmtx)
129 #define	JQUEUE_UNLOCK()		mtx_unlock(&jmtx)
130 
131 static int nat64lsn_alloc_host(struct nat64lsn_cfg *cfg,
132     struct nat64lsn_job_item *ji);
133 static int nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg,
134     struct nat64lsn_job_item *ji);
135 static struct nat64lsn_job_item *nat64lsn_create_job(
136     struct nat64lsn_cfg *cfg, int jtype);
137 static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
138     struct nat64lsn_job_item *ji);
139 static void nat64lsn_job_destroy(epoch_context_t ctx);
140 static void nat64lsn_destroy_host(struct nat64lsn_host *host);
141 static void nat64lsn_destroy_pg(struct nat64lsn_pg *pg);
142 
143 static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
144     const struct ipfw_flow_id *f_id, struct mbuf **mp);
145 static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
146     struct ipfw_flow_id *f_id, struct mbuf **mp);
147 static int nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg,
148     struct mbuf **mp, struct nat64lsn_state *state, uint8_t flags);
149 
150 #define	NAT64_BIT_TCP_FIN	0	/* FIN was seen */
151 #define	NAT64_BIT_TCP_SYN	1	/* First syn in->out */
152 #define	NAT64_BIT_TCP_ESTAB	2	/* Packet with Ack */
153 #define	NAT64_BIT_READY_IPV4	6	/* state is ready for translate4 */
154 #define	NAT64_BIT_STALE		7	/* state is going to be expired */
155 
156 #define	NAT64_FLAG_FIN		(1 << NAT64_BIT_TCP_FIN)
157 #define	NAT64_FLAG_SYN		(1 << NAT64_BIT_TCP_SYN)
158 #define	NAT64_FLAG_ESTAB	(1 << NAT64_BIT_TCP_ESTAB)
159 #define	NAT64_FLAGS_TCP	(NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
160 
161 #define	NAT64_FLAG_READY	(1 << NAT64_BIT_READY_IPV4)
162 #define	NAT64_FLAG_STALE	(1 << NAT64_BIT_STALE)
163 
164 static inline uint8_t
165 convert_tcp_flags(uint8_t flags)
166 {
167 	uint8_t result;
168 
169 	result = flags & (TH_FIN|TH_SYN);
170 	result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
171 	result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
172 
173 	return (result);
174 }
175 
176 static void
177 nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family,
178     struct nat64lsn_state *state)
179 {
180 
181 	memset(plog, 0, sizeof(*plog));
182 	plog->length = PFLOG_HDRLEN;
183 	plog->af = family;
184 	plog->action = PF_NAT;
185 	plog->dir = PF_IN;
186 	plog->rulenr = htonl(state->ip_src);
187 	plog->subrulenr = htonl((uint32_t)(state->aport << 16) |
188 	    (state->proto << 8) | (state->ip_dst & 0xff));
189 	plog->ruleset[0] = '\0';
190 	strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname));
191 	ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m);
192 }
193 
194 #define	HVAL(p, n, s)	jenkins_hash32((const uint32_t *)(p), (n), (s))
195 #define	HOST_HVAL(c, a)	HVAL((a),\
196     sizeof(struct in6_addr) / sizeof(uint32_t), (c)->hash_seed)
197 #define	HOSTS(c, v)	((c)->hosts_hash[(v) & ((c)->hosts_hashsize - 1)])
198 
199 #define	ALIASLINK_HVAL(c, f)	HVAL(&(f)->dst_ip6,\
200     sizeof(struct in6_addr) * 2 / sizeof(uint32_t), (c)->hash_seed)
201 #define	ALIAS_BYHASH(c, v)	\
202     ((c)->aliases[(v) & ((1 << (32 - (c)->plen4)) - 1)])
203 static struct nat64lsn_aliaslink*
204 nat64lsn_get_aliaslink(struct nat64lsn_cfg *cfg __unused,
205     struct nat64lsn_host *host, const struct ipfw_flow_id *f_id __unused)
206 {
207 
208 	/*
209 	 * We can implement some different algorithms how
210 	 * select an alias address.
211 	 * XXX: for now we use first available.
212 	 */
213 	return (CK_SLIST_FIRST(&host->aliases));
214 }
215 
216 #define	STATE_HVAL(c, d)	HVAL((d), 2, (c)->hash_seed)
217 #define	STATE_HASH(h, v)	\
218     ((h)->states_hash[(v) & ((h)->states_hashsize - 1)])
219 #define	STATES_CHUNK(p, v)	\
220     ((p)->chunks_count == 1 ? (p)->states : \
221 	((p)->states_chunk[CHUNK_BY_FADDR(p, v)]))
222 
223 #ifdef __LP64__
224 #define	FREEMASK_FFSLL(pg, faddr)		\
225     ffsll(*FREEMASK_CHUNK((pg), (faddr)))
226 #define	FREEMASK_BTR(pg, faddr, bit)	\
227     ck_pr_btr_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
228 #define	FREEMASK_BTS(pg, faddr, bit)	\
229     ck_pr_bts_64(FREEMASK_CHUNK((pg), (faddr)), (bit))
230 #define	FREEMASK_ISSET(pg, faddr, bit)	\
231     ISSET64(*FREEMASK_CHUNK((pg), (faddr)), (bit))
232 #define	FREEMASK_COPY(pg, n, out)	\
233     (out) = ck_pr_load_64(FREEMASK_CHUNK((pg), (n)))
234 #else
235 static inline int
236 freemask_ffsll(uint32_t *freemask)
237 {
238 	int i;
239 
240 	if ((i = ffsl(freemask[0])) != 0)
241 		return (i);
242 	if ((i = ffsl(freemask[1])) != 0)
243 		return (i + 32);
244 	return (0);
245 }
246 #define	FREEMASK_FFSLL(pg, faddr)		\
247     freemask_ffsll(FREEMASK_CHUNK((pg), (faddr)))
248 #define	FREEMASK_BTR(pg, faddr, bit)	\
249     ck_pr_btr_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
250 #define	FREEMASK_BTS(pg, faddr, bit)	\
251     ck_pr_bts_32(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32, (bit) % 32)
252 #define	FREEMASK_ISSET(pg, faddr, bit)	\
253     ISSET32(*(FREEMASK_CHUNK((pg), (faddr)) + (bit) / 32), (bit) % 32)
254 #define	FREEMASK_COPY(pg, n, out)	\
255     (out) = ck_pr_load_32(FREEMASK_CHUNK((pg), (n))) | \
256 	((uint64_t)ck_pr_load_32(FREEMASK_CHUNK((pg), (n)) + 1) << 32)
257 #endif /* !__LP64__ */
258 
259 #define	NAT64LSN_TRY_PGCNT	32
260 static struct nat64lsn_pg*
261 nat64lsn_get_pg(uint32_t *chunkmask, uint32_t *pgmask,
262     struct nat64lsn_pgchunk **chunks, struct nat64lsn_pg **pgptr,
263     uint32_t *pgidx, in_addr_t faddr)
264 {
265 	struct nat64lsn_pg *pg, *oldpg;
266 	uint32_t idx, oldidx;
267 	int cnt;
268 
269 	cnt = 0;
270 	/* First try last used PG */
271 	oldpg = pg = ck_pr_load_ptr(pgptr);
272 	idx = oldidx = ck_pr_load_32(pgidx);
273 	/* If pgidx is out of range, reset it to the first pgchunk */
274 	if (!ISSET32(*chunkmask, idx / 32))
275 		idx = 0;
276 	do {
277 		ck_pr_fence_load();
278 		if (pg != NULL && FREEMASK_BITCOUNT(pg, faddr) > 0) {
279 			/*
280 			 * If last used PG has not free states,
281 			 * try to update pointer.
282 			 * NOTE: it can be already updated by jobs handler,
283 			 *	 thus we use CAS operation.
284 			 */
285 			if (cnt > 0)
286 				ck_pr_cas_ptr(pgptr, oldpg, pg);
287 			return (pg);
288 		}
289 		/* Stop if idx is out of range */
290 		if (!ISSET32(*chunkmask, idx / 32))
291 			break;
292 
293 		if (ISSET32(pgmask[idx / 32], idx % 32))
294 			pg = ck_pr_load_ptr(
295 			    &chunks[idx / 32]->pgptr[idx % 32]);
296 		else
297 			pg = NULL;
298 
299 		idx++;
300 	} while (++cnt < NAT64LSN_TRY_PGCNT);
301 
302 	/* If pgidx is out of range, reset it to the first pgchunk */
303 	if (!ISSET32(*chunkmask, idx / 32))
304 		idx = 0;
305 	ck_pr_cas_32(pgidx, oldidx, idx);
306 	return (NULL);
307 }
308 
309 static struct nat64lsn_state*
310 nat64lsn_get_state6to4(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
311     const struct ipfw_flow_id *f_id, uint32_t hval, in_addr_t faddr,
312     uint16_t port, uint8_t proto)
313 {
314 	struct nat64lsn_aliaslink *link;
315 	struct nat64lsn_state *state;
316 	struct nat64lsn_pg *pg;
317 	int i, offset;
318 
319 	NAT64LSN_EPOCH_ASSERT();
320 
321 	/* Check that we already have state for given arguments */
322 	CK_SLIST_FOREACH(state, &STATE_HASH(host, hval), entries) {
323 		if (state->proto == proto && state->ip_dst == faddr &&
324 		    state->sport == port && state->dport == f_id->dst_port)
325 			return (state);
326 	}
327 
328 	link = nat64lsn_get_aliaslink(cfg, host, f_id);
329 	if (link == NULL)
330 		return (NULL);
331 
332 	switch (proto) {
333 	case IPPROTO_TCP:
334 		pg = nat64lsn_get_pg(
335 		    &link->alias->tcp_chunkmask, link->alias->tcp_pgmask,
336 		    link->alias->tcp, &link->alias->tcp_pg,
337 		    &link->alias->tcp_pgidx, faddr);
338 		break;
339 	case IPPROTO_UDP:
340 		pg = nat64lsn_get_pg(
341 		    &link->alias->udp_chunkmask, link->alias->udp_pgmask,
342 		    link->alias->udp, &link->alias->udp_pg,
343 		    &link->alias->udp_pgidx, faddr);
344 		break;
345 	case IPPROTO_ICMP:
346 		pg = nat64lsn_get_pg(
347 		    &link->alias->icmp_chunkmask, link->alias->icmp_pgmask,
348 		    link->alias->icmp, &link->alias->icmp_pg,
349 		    &link->alias->icmp_pgidx, faddr);
350 		break;
351 	default:
352 		panic("%s: wrong proto %d", __func__, proto);
353 	}
354 	if (pg == NULL)
355 		return (NULL);
356 
357 	/* Check that PG has some free states */
358 	state = NULL;
359 	i = FREEMASK_BITCOUNT(pg, faddr);
360 	while (i-- > 0) {
361 		offset = FREEMASK_FFSLL(pg, faddr);
362 		if (offset == 0) {
363 			/*
364 			 * We lost the race.
365 			 * No more free states in this PG.
366 			 */
367 			break;
368 		}
369 
370 		/* Lets try to atomically grab the state */
371 		if (FREEMASK_BTR(pg, faddr, offset - 1)) {
372 			state = &STATES_CHUNK(pg, faddr)->state[offset - 1];
373 			/* Initialize */
374 			state->flags = proto != IPPROTO_TCP ? 0 :
375 			    convert_tcp_flags(f_id->_flags);
376 			state->proto = proto;
377 			state->aport = pg->base_port + offset - 1;
378 			state->dport = f_id->dst_port;
379 			state->sport = port;
380 			state->ip6_dst = f_id->dst_ip6;
381 			state->ip_dst = faddr;
382 			state->ip_src = link->alias->addr;
383 			state->hval = hval;
384 			state->host = host;
385 			SET_AGE(state->timestamp);
386 
387 			/* Insert new state into host's hash table */
388 			HOST_LOCK(host);
389 			CK_SLIST_INSERT_HEAD(&STATE_HASH(host, hval),
390 			    state, entries);
391 			host->states_count++;
392 			/*
393 			 * XXX: In case if host is going to be expired,
394 			 * reset NAT64LSN_DEADHOST flag.
395 			 */
396 			host->flags &= ~NAT64LSN_DEADHOST;
397 			HOST_UNLOCK(host);
398 			NAT64STAT_INC(&cfg->base.stats, screated);
399 			/* Mark the state as ready for translate4 */
400 			ck_pr_fence_store();
401 			ck_pr_bts_32(&state->flags, NAT64_BIT_READY_IPV4);
402 			break;
403 		}
404 	}
405 	return (state);
406 }
407 
408 /*
409  * Inspects icmp packets to see if the message contains different
410  * packet header so we need to alter @addr and @port.
411  */
412 static int
413 inspect_icmp_mbuf(struct mbuf **mp, uint8_t *proto, uint32_t *addr,
414     uint16_t *port)
415 {
416 	struct icmp *icmp;
417 	struct ip *ip;
418 	int off;
419 	uint8_t inner_proto;
420 
421 	ip = mtod(*mp, struct ip *); /* Outer IP header */
422 	off = (ip->ip_hl << 2) + ICMP_MINLEN;
423 	if ((*mp)->m_len < off)
424 		*mp = m_pullup(*mp, off);
425 	if (*mp == NULL)
426 		return (ENOMEM);
427 
428 	ip = mtod(*mp, struct ip *); /* Outer IP header */
429 	icmp = L3HDR(ip, struct icmp *);
430 	switch (icmp->icmp_type) {
431 	case ICMP_ECHO:
432 	case ICMP_ECHOREPLY:
433 		/* Use icmp ID as distinguisher */
434 		*port = ntohs(icmp->icmp_id);
435 		return (0);
436 	case ICMP_UNREACH:
437 	case ICMP_TIMXCEED:
438 		break;
439 	default:
440 		return (EOPNOTSUPP);
441 	}
442 	/*
443 	 * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
444 	 * of ULP header.
445 	 */
446 	if ((*mp)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
447 		return (EINVAL);
448 	if ((*mp)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
449 		*mp = m_pullup(*mp, off + sizeof(struct ip) + ICMP_MINLEN);
450 	if (*mp == NULL)
451 		return (ENOMEM);
452 	ip = mtodo(*mp, off); /* Inner IP header */
453 	inner_proto = ip->ip_p;
454 	off += ip->ip_hl << 2; /* Skip inner IP header */
455 	*addr = ntohl(ip->ip_src.s_addr);
456 	if ((*mp)->m_len < off + ICMP_MINLEN)
457 		*mp = m_pullup(*mp, off + ICMP_MINLEN);
458 	if (*mp == NULL)
459 		return (ENOMEM);
460 	switch (inner_proto) {
461 	case IPPROTO_TCP:
462 	case IPPROTO_UDP:
463 		/* Copy source port from the header */
464 		*port = ntohs(*((uint16_t *)mtodo(*mp, off)));
465 		*proto = inner_proto;
466 		return (0);
467 	case IPPROTO_ICMP:
468 		/*
469 		 * We will translate only ICMP errors for our ICMP
470 		 * echo requests.
471 		 */
472 		icmp = mtodo(*mp, off);
473 		if (icmp->icmp_type != ICMP_ECHO)
474 			return (EOPNOTSUPP);
475 		*port = ntohs(icmp->icmp_id);
476 		return (0);
477 	};
478 	return (EOPNOTSUPP);
479 }
480 
481 static struct nat64lsn_state*
482 nat64lsn_get_state4to6(struct nat64lsn_cfg *cfg, struct nat64lsn_alias *alias,
483     in_addr_t faddr, uint16_t port, uint8_t proto)
484 {
485 	struct nat64lsn_state *state;
486 	struct nat64lsn_pg *pg;
487 	int chunk_idx, pg_idx, state_idx;
488 
489 	NAT64LSN_EPOCH_ASSERT();
490 
491 	if (port < NAT64_MIN_PORT)
492 		return (NULL);
493 	/*
494 	 * Alias keeps 32 pgchunks for each protocol.
495 	 * Each pgchunk has 32 pointers to portgroup.
496 	 * Each portgroup has 64 states for ports.
497 	 */
498 	port -= NAT64_MIN_PORT;
499 	chunk_idx = port / 2048;
500 
501 	port -= chunk_idx * 2048;
502 	pg_idx = port / 64;
503 	state_idx = port % 64;
504 
505 	/*
506 	 * First check in proto_chunkmask that we have allocated PG chunk.
507 	 * Then check in proto_pgmask that we have valid PG pointer.
508 	 */
509 	pg = NULL;
510 	switch (proto) {
511 	case IPPROTO_TCP:
512 		if (ISSET32(alias->tcp_chunkmask, chunk_idx) &&
513 		    ISSET32(alias->tcp_pgmask[chunk_idx], pg_idx)) {
514 			pg = alias->tcp[chunk_idx]->pgptr[pg_idx];
515 			break;
516 		}
517 		return (NULL);
518 	case IPPROTO_UDP:
519 		if (ISSET32(alias->udp_chunkmask, chunk_idx) &&
520 		    ISSET32(alias->udp_pgmask[chunk_idx], pg_idx)) {
521 			pg = alias->udp[chunk_idx]->pgptr[pg_idx];
522 			break;
523 		}
524 		return (NULL);
525 	case IPPROTO_ICMP:
526 		if (ISSET32(alias->icmp_chunkmask, chunk_idx) &&
527 		    ISSET32(alias->icmp_pgmask[chunk_idx], pg_idx)) {
528 			pg = alias->icmp[chunk_idx]->pgptr[pg_idx];
529 			break;
530 		}
531 		return (NULL);
532 	default:
533 		panic("%s: wrong proto %d", __func__, proto);
534 	}
535 	if (pg == NULL)
536 		return (NULL);
537 
538 	if (FREEMASK_ISSET(pg, faddr, state_idx))
539 		return (NULL);
540 
541 	state = &STATES_CHUNK(pg, faddr)->state[state_idx];
542 	ck_pr_fence_load();
543 	if (ck_pr_load_32(&state->flags) & NAT64_FLAG_READY)
544 		return (state);
545 	return (NULL);
546 }
547 
548 /*
549  * Reassemble IPv4 fragments, make PULLUP if needed, get some ULP fields
550  * that might be unknown until reassembling is completed.
551  */
552 static struct mbuf*
553 nat64lsn_reassemble4(struct nat64lsn_cfg *cfg, struct mbuf *m,
554     uint16_t *port)
555 {
556 	struct ip *ip;
557 	int len;
558 
559 	m = ip_reass(m);
560 	if (m == NULL)
561 		return (NULL);
562 	/* IP header must be contigious after ip_reass() */
563 	ip = mtod(m, struct ip *);
564 	len = ip->ip_hl << 2;
565 	switch (ip->ip_p) {
566 	case IPPROTO_ICMP:
567 		len += ICMP_MINLEN; /* Enough to get icmp_id */
568 		break;
569 	case IPPROTO_TCP:
570 		len += sizeof(struct tcphdr);
571 		break;
572 	case IPPROTO_UDP:
573 		len += sizeof(struct udphdr);
574 		break;
575 	default:
576 		m_freem(m);
577 		NAT64STAT_INC(&cfg->base.stats, noproto);
578 		return (NULL);
579 	}
580 	if (m->m_len < len) {
581 		m = m_pullup(m, len);
582 		if (m == NULL) {
583 			NAT64STAT_INC(&cfg->base.stats, nomem);
584 			return (NULL);
585 		}
586 		ip = mtod(m, struct ip *);
587 	}
588 	switch (ip->ip_p) {
589 	case IPPROTO_TCP:
590 		*port = ntohs(L3HDR(ip, struct tcphdr *)->th_dport);
591 		break;
592 	case IPPROTO_UDP:
593 		*port = ntohs(L3HDR(ip, struct udphdr *)->uh_dport);
594 		break;
595 	}
596 	return (m);
597 }
598 
599 static int
600 nat64lsn_translate4(struct nat64lsn_cfg *cfg,
601     const struct ipfw_flow_id *f_id, struct mbuf **mp)
602 {
603 	struct pfloghdr loghdr, *logdata;
604 	struct in6_addr src6;
605 	struct nat64lsn_state *state;
606 	struct nat64lsn_alias *alias;
607 	uint32_t addr, flags;
608 	uint16_t port, ts;
609 	int ret;
610 	uint8_t proto;
611 
612 	addr = f_id->dst_ip;
613 	port = f_id->dst_port;
614 	proto = f_id->proto;
615 	if (addr < cfg->prefix4 || addr > cfg->pmask4) {
616 		NAT64STAT_INC(&cfg->base.stats, nomatch4);
617 		return (cfg->nomatch_verdict);
618 	}
619 
620 	/* Reassemble fragments if needed */
621 	ret = ntohs(mtod(*mp, struct ip *)->ip_off);
622 	if ((ret & (IP_MF | IP_OFFMASK)) != 0) {
623 		*mp = nat64lsn_reassemble4(cfg, *mp, &port);
624 		if (*mp == NULL)
625 			return (IP_FW_DENY);
626 	}
627 
628 	/* Check if protocol is supported */
629 	switch (proto) {
630 	case IPPROTO_ICMP:
631 		ret = inspect_icmp_mbuf(mp, &proto, &addr, &port);
632 		if (ret != 0) {
633 			if (ret == ENOMEM) {
634 				NAT64STAT_INC(&cfg->base.stats, nomem);
635 				return (IP_FW_DENY);
636 			}
637 			NAT64STAT_INC(&cfg->base.stats, noproto);
638 			return (cfg->nomatch_verdict);
639 		}
640 		if (addr < cfg->prefix4 || addr > cfg->pmask4) {
641 			NAT64STAT_INC(&cfg->base.stats, nomatch4);
642 			return (cfg->nomatch_verdict);
643 		}
644 		/* FALLTHROUGH */
645 	case IPPROTO_TCP:
646 	case IPPROTO_UDP:
647 		break;
648 	default:
649 		NAT64STAT_INC(&cfg->base.stats, noproto);
650 		return (cfg->nomatch_verdict);
651 	}
652 
653 	alias = &ALIAS_BYHASH(cfg, addr);
654 	MPASS(addr == alias->addr);
655 
656 	/* Check that we have state for this port */
657 	state = nat64lsn_get_state4to6(cfg, alias, f_id->src_ip,
658 	    port, proto);
659 	if (state == NULL) {
660 		NAT64STAT_INC(&cfg->base.stats, nomatch4);
661 		return (cfg->nomatch_verdict);
662 	}
663 
664 	/* TODO: Check flags to see if we need to do some static mapping */
665 
666 	/* Update some state fields if need */
667 	SET_AGE(ts);
668 	if (f_id->proto == IPPROTO_TCP)
669 		flags = convert_tcp_flags(f_id->_flags);
670 	else
671 		flags = 0;
672 	if (state->timestamp != ts)
673 		state->timestamp = ts;
674 	if ((state->flags & flags) != flags)
675 		state->flags |= flags;
676 
677 	port = htons(state->sport);
678 	src6 = state->ip6_dst;
679 
680 	if (cfg->base.flags & NAT64_LOG) {
681 		logdata = &loghdr;
682 		nat64lsn_log(logdata, *mp, AF_INET, state);
683 	} else
684 		logdata = NULL;
685 
686 	/*
687 	 * We already have src6 with embedded address, but it is possible,
688 	 * that src_ip is different than state->ip_dst, this is why we
689 	 * do embedding again.
690 	 */
691 	nat64_embed_ip4(&src6, cfg->base.plat_plen, htonl(f_id->src_ip));
692 	ret = nat64_do_handle_ip4(*mp, &src6, &state->host->addr, port,
693 	    &cfg->base, logdata);
694 	if (ret == NAT64SKIP)
695 		return (cfg->nomatch_verdict);
696 	if (ret == NAT64RETURN)
697 		*mp = NULL;
698 	return (IP_FW_DENY);
699 }
700 
701 /*
702  * Check if particular state is stale and should be deleted.
703  * Return 1 if true, 0 otherwise.
704  */
705 static int
706 nat64lsn_check_state(struct nat64lsn_cfg *cfg, struct nat64lsn_state *state)
707 {
708 	int age, ttl;
709 
710 	/* State was marked as stale in previous pass. */
711 	if (ISSET32(state->flags, NAT64_BIT_STALE))
712 		return (1);
713 
714 	/* State is not yet initialized, it is going to be READY */
715 	if (!ISSET32(state->flags, NAT64_BIT_READY_IPV4))
716 		return (0);
717 
718 	age = GET_AGE(state->timestamp);
719 	switch (state->proto) {
720 	case IPPROTO_TCP:
721 		if (ISSET32(state->flags, NAT64_BIT_TCP_FIN))
722 			ttl = cfg->st_close_ttl;
723 		else if (ISSET32(state->flags, NAT64_BIT_TCP_ESTAB))
724 			ttl = cfg->st_estab_ttl;
725 		else if (ISSET32(state->flags, NAT64_BIT_TCP_SYN))
726 			ttl = cfg->st_syn_ttl;
727 		else
728 			ttl = cfg->st_syn_ttl;
729 		if (age > ttl)
730 			return (1);
731 		break;
732 	case IPPROTO_UDP:
733 		if (age > cfg->st_udp_ttl)
734 			return (1);
735 		break;
736 	case IPPROTO_ICMP:
737 		if (age > cfg->st_icmp_ttl)
738 			return (1);
739 		break;
740 	}
741 	return (0);
742 }
743 
744 static int
745 nat64lsn_maintain_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_pg *pg)
746 {
747 	struct nat64lsn_state *state;
748 	struct nat64lsn_host *host;
749 	uint64_t freemask;
750 	int c, i, update_age;
751 
752 	update_age = 0;
753 	for (c = 0; c < pg->chunks_count; c++) {
754 		FREEMASK_COPY(pg, c, freemask);
755 		for (i = 0; i < 64; i++) {
756 			if (ISSET64(freemask, i))
757 				continue;
758 			state = &STATES_CHUNK(pg, c)->state[i];
759 			if (nat64lsn_check_state(cfg, state) == 0) {
760 				update_age = 1;
761 				continue;
762 			}
763 			/*
764 			 * Expire state:
765 			 * 1. Mark as STALE and unlink from host's hash.
766 			 * 2. Set bit in freemask.
767 			 */
768 			if (ISSET32(state->flags, NAT64_BIT_STALE)) {
769 				/*
770 				 * State was marked as STALE in previous
771 				 * pass. Now it is safe to release it.
772 				 */
773 				state->flags = 0;
774 				ck_pr_fence_store();
775 				FREEMASK_BTS(pg, c, i);
776 				NAT64STAT_INC(&cfg->base.stats, sdeleted);
777 				continue;
778 			}
779 			MPASS(state->flags & NAT64_FLAG_READY);
780 
781 			host = state->host;
782 			HOST_LOCK(host);
783 			CK_SLIST_REMOVE(&STATE_HASH(host, state->hval),
784 			    state, nat64lsn_state, entries);
785 			host->states_count--;
786 			HOST_UNLOCK(host);
787 
788 			/* Reset READY flag */
789 			ck_pr_btr_32(&state->flags, NAT64_BIT_READY_IPV4);
790 			/* And set STALE flag */
791 			ck_pr_bts_32(&state->flags, NAT64_BIT_STALE);
792 			ck_pr_fence_store();
793 			/*
794 			 * Now translate6 will not use this state, wait
795 			 * until it become safe for translate4, then mark
796 			 * state as free.
797 			 */
798 		}
799 	}
800 
801 	/*
802 	 * We have some alive states, update timestamp.
803 	 */
804 	if (update_age)
805 		SET_AGE(pg->timestamp);
806 
807 	if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
808 		return (0);
809 
810 	return (1);
811 }
812 
813 static void
814 nat64lsn_expire_portgroups(struct nat64lsn_cfg *cfg,
815     struct nat64lsn_pg_slist *portgroups)
816 {
817 	struct nat64lsn_alias *alias;
818 	struct nat64lsn_pg *pg, *tpg, *firstpg, **pgptr;
819 	uint32_t *pgmask, *pgidx;
820 	int i, idx;
821 
822 	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
823 		alias = &cfg->aliases[i];
824 		CK_SLIST_FOREACH_SAFE(pg, &alias->portgroups, entries, tpg) {
825 			if (nat64lsn_maintain_pg(cfg, pg) == 0)
826 				continue;
827 			/* Always keep first PG */
828 			if (pg->base_port == NAT64_MIN_PORT)
829 				continue;
830 			/*
831 			 * PG is expired, unlink it and schedule for
832 			 * deferred destroying.
833 			 */
834 			idx = (pg->base_port - NAT64_MIN_PORT) / 64;
835 			switch (pg->proto) {
836 			case IPPROTO_TCP:
837 				pgmask = alias->tcp_pgmask;
838 				pgptr = &alias->tcp_pg;
839 				pgidx = &alias->tcp_pgidx;
840 				firstpg = alias->tcp[0]->pgptr[0];
841 				break;
842 			case IPPROTO_UDP:
843 				pgmask = alias->udp_pgmask;
844 				pgptr = &alias->udp_pg;
845 				pgidx = &alias->udp_pgidx;
846 				firstpg = alias->udp[0]->pgptr[0];
847 				break;
848 			case IPPROTO_ICMP:
849 				pgmask = alias->icmp_pgmask;
850 				pgptr = &alias->icmp_pg;
851 				pgidx = &alias->icmp_pgidx;
852 				firstpg = alias->icmp[0]->pgptr[0];
853 				break;
854 			}
855 			/* Reset the corresponding bit in pgmask array. */
856 			ck_pr_btr_32(&pgmask[idx / 32], idx % 32);
857 			ck_pr_fence_store();
858 			/* If last used PG points to this PG, reset it. */
859 			ck_pr_cas_ptr(pgptr, pg, firstpg);
860 			ck_pr_cas_32(pgidx, idx, 0);
861 			/* Unlink PG from alias's chain */
862 			ALIAS_LOCK(alias);
863 			CK_SLIST_REMOVE(&alias->portgroups, pg,
864 			    nat64lsn_pg, entries);
865 			alias->portgroups_count--;
866 			ALIAS_UNLOCK(alias);
867 			/* And link to job's chain for deferred destroying */
868 			NAT64STAT_INC(&cfg->base.stats, spgdeleted);
869 			CK_SLIST_INSERT_HEAD(portgroups, pg, entries);
870 		}
871 	}
872 }
873 
874 static void
875 nat64lsn_expire_hosts(struct nat64lsn_cfg *cfg,
876     struct nat64lsn_hosts_slist *hosts)
877 {
878 	struct nat64lsn_host *host, *tmp;
879 	int i;
880 
881 	for (i = 0; i < cfg->hosts_hashsize; i++) {
882 		CK_SLIST_FOREACH_SAFE(host, &cfg->hosts_hash[i],
883 		    entries, tmp) {
884 			/* Is host was marked in previous call? */
885 			if (host->flags & NAT64LSN_DEADHOST) {
886 				if (host->states_count > 0) {
887 					host->flags &= ~NAT64LSN_DEADHOST;
888 					continue;
889 				}
890 				/*
891 				 * Unlink host from hash table and schedule
892 				 * it for deferred destroying.
893 				 */
894 				CFG_LOCK(cfg);
895 				CK_SLIST_REMOVE(&cfg->hosts_hash[i], host,
896 				    nat64lsn_host, entries);
897 				cfg->hosts_count--;
898 				CFG_UNLOCK(cfg);
899 				CK_SLIST_INSERT_HEAD(hosts, host, entries);
900 				continue;
901 			}
902 			if (GET_AGE(host->timestamp) < cfg->host_delete_delay)
903 				continue;
904 			if (host->states_count > 0)
905 				continue;
906 			/* Mark host as going to be expired in next pass */
907 			host->flags |= NAT64LSN_DEADHOST;
908 			ck_pr_fence_store();
909 		}
910 	}
911 }
912 
913 static struct nat64lsn_pgchunk*
914 nat64lsn_expire_pgchunk(struct nat64lsn_cfg *cfg)
915 {
916 #if 0
917 	struct nat64lsn_alias *alias;
918 	struct nat64lsn_pgchunk *chunk;
919 	uint32_t pgmask;
920 	int i, c;
921 
922 	for (i = 0; i < 1 << (32 - cfg->plen4); i++) {
923 		alias = &cfg->aliases[i];
924 		if (GET_AGE(alias->timestamp) < cfg->pgchunk_delete_delay)
925 			continue;
926 		/* Always keep single chunk allocated */
927 		for (c = 1; c < 32; c++) {
928 			if ((alias->tcp_chunkmask & (1 << c)) == 0)
929 				break;
930 			chunk = ck_pr_load_ptr(&alias->tcp[c]);
931 			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
932 				continue;
933 			ck_pr_btr_32(&alias->tcp_chunkmask, c);
934 			ck_pr_fence_load();
935 			if (ck_pr_load_32(&alias->tcp_pgmask[c]) != 0)
936 				continue;
937 		}
938 	}
939 #endif
940 	return (NULL);
941 }
942 
943 #if 0
944 static void
945 nat64lsn_maintain_hosts(struct nat64lsn_cfg *cfg)
946 {
947 	struct nat64lsn_host *h;
948 	struct nat64lsn_states_slist *hash;
949 	int i, j, hsize;
950 
951 	for (i = 0; i < cfg->hosts_hashsize; i++) {
952 		CK_SLIST_FOREACH(h, &cfg->hosts_hash[i], entries) {
953 			 if (h->states_count / 2 < h->states_hashsize ||
954 			     h->states_hashsize >= NAT64LSN_MAX_HSIZE)
955 				 continue;
956 			 hsize = h->states_hashsize * 2;
957 			 hash = malloc(sizeof(*hash)* hsize, M_NOWAIT);
958 			 if (hash == NULL)
959 				 continue;
960 			 for (j = 0; j < hsize; j++)
961 				CK_SLIST_INIT(&hash[i]);
962 
963 			 ck_pr_bts_32(&h->flags, NAT64LSN_GROWHASH);
964 		}
965 	}
966 }
967 #endif
968 
969 /*
970  * This procedure is used to perform various maintenance
971  * on dynamic hash list. Currently it is called every 4 seconds.
972  */
973 static void
974 nat64lsn_periodic(void *data)
975 {
976 	struct nat64lsn_job_item *ji;
977 	struct nat64lsn_cfg *cfg;
978 
979 	cfg = (struct nat64lsn_cfg *) data;
980 	CURVNET_SET(cfg->vp);
981 	if (cfg->hosts_count > 0) {
982 		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
983 		if (ji != NULL) {
984 			ji->jtype = JTYPE_DESTROY;
985 			CK_SLIST_INIT(&ji->hosts);
986 			CK_SLIST_INIT(&ji->portgroups);
987 			nat64lsn_expire_hosts(cfg, &ji->hosts);
988 			nat64lsn_expire_portgroups(cfg, &ji->portgroups);
989 			ji->pgchunk = nat64lsn_expire_pgchunk(cfg);
990 			NAT64LSN_EPOCH_CALL(&ji->epoch_ctx,
991 			    nat64lsn_job_destroy);
992 		} else
993 			NAT64STAT_INC(&cfg->base.stats, jnomem);
994 	}
995 	callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
996 	CURVNET_RESTORE();
997 }
998 
999 #define	ALLOC_ERROR(stage, type)	((stage) ? 10 * (type) + (stage): 0)
1000 #define	HOST_ERROR(stage)		ALLOC_ERROR(stage, 1)
1001 #define	PG_ERROR(stage)			ALLOC_ERROR(stage, 2)
1002 static int
1003 nat64lsn_alloc_host(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1004 {
1005 	char a[INET6_ADDRSTRLEN];
1006 	struct nat64lsn_aliaslink *link;
1007 	struct nat64lsn_host *host;
1008 	struct nat64lsn_state *state;
1009 	uint32_t hval, data[2];
1010 	int i;
1011 
1012 	/* Check that host was not yet added. */
1013 	NAT64LSN_EPOCH_ASSERT();
1014 	CK_SLIST_FOREACH(host, &HOSTS(cfg, ji->src6_hval), entries) {
1015 		if (IN6_ARE_ADDR_EQUAL(&ji->f_id.src_ip6, &host->addr)) {
1016 			/* The host was allocated in previous call. */
1017 			ji->host = host;
1018 			goto get_state;
1019 		}
1020 	}
1021 
1022 	host = ji->host = uma_zalloc(nat64lsn_host_zone, M_NOWAIT);
1023 	if (ji->host == NULL)
1024 		return (HOST_ERROR(1));
1025 
1026 	host->states_hashsize = NAT64LSN_HSIZE;
1027 	host->states_hash = malloc(sizeof(struct nat64lsn_states_slist) *
1028 	    host->states_hashsize, M_NAT64LSN, M_NOWAIT);
1029 	if (host->states_hash == NULL) {
1030 		uma_zfree(nat64lsn_host_zone, host);
1031 		return (HOST_ERROR(2));
1032 	}
1033 
1034 	link = uma_zalloc(nat64lsn_aliaslink_zone, M_NOWAIT);
1035 	if (link == NULL) {
1036 		free(host->states_hash, M_NAT64LSN);
1037 		uma_zfree(nat64lsn_host_zone, host);
1038 		return (HOST_ERROR(3));
1039 	}
1040 
1041 	/* Initialize */
1042 	HOST_LOCK_INIT(host);
1043 	SET_AGE(host->timestamp);
1044 	host->addr = ji->f_id.src_ip6;
1045 	host->hval = ji->src6_hval;
1046 	host->flags = 0;
1047 	host->states_count = 0;
1048 	host->states_hashsize = NAT64LSN_HSIZE;
1049 	CK_SLIST_INIT(&host->aliases);
1050 	for (i = 0; i < host->states_hashsize; i++)
1051 		CK_SLIST_INIT(&host->states_hash[i]);
1052 
1053 	/* Determine alias from flow hash. */
1054 	hval = ALIASLINK_HVAL(cfg, &ji->f_id);
1055 	link->alias = &ALIAS_BYHASH(cfg, hval);
1056 	CK_SLIST_INSERT_HEAD(&host->aliases, link, host_entries);
1057 
1058 	ALIAS_LOCK(link->alias);
1059 	CK_SLIST_INSERT_HEAD(&link->alias->hosts, link, alias_entries);
1060 	link->alias->hosts_count++;
1061 	ALIAS_UNLOCK(link->alias);
1062 
1063 	CFG_LOCK(cfg);
1064 	CK_SLIST_INSERT_HEAD(&HOSTS(cfg, ji->src6_hval), host, entries);
1065 	cfg->hosts_count++;
1066 	CFG_UNLOCK(cfg);
1067 
1068 get_state:
1069 	data[0] = ji->faddr;
1070 	data[1] = (ji->f_id.dst_port << 16) | ji->port;
1071 	ji->state_hval = hval = STATE_HVAL(cfg, data);
1072 	state = nat64lsn_get_state6to4(cfg, host, &ji->f_id, hval,
1073 	    ji->faddr, ji->port, ji->proto);
1074 	/*
1075 	 * We failed to obtain new state, used alias needs new PG.
1076 	 * XXX: or another alias should be used.
1077 	 */
1078 	if (state == NULL) {
1079 		/* Try to allocate new PG */
1080 		if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1081 			return (HOST_ERROR(4));
1082 		/* We assume that nat64lsn_alloc_pg() got state */
1083 	} else
1084 		ji->state = state;
1085 
1086 	ji->done = 1;
1087 	DPRINTF(DP_OBJ, "ALLOC HOST %s %p",
1088 	    inet_ntop(AF_INET6, &host->addr, a, sizeof(a)), host);
1089 	return (HOST_ERROR(0));
1090 }
1091 
1092 static int
1093 nat64lsn_find_pg_place(uint32_t *data)
1094 {
1095 	int i;
1096 
1097 	for (i = 0; i < 32; i++) {
1098 		if (~data[i] == 0)
1099 			continue;
1100 		return (i * 32 + ffs(~data[i]) - 1);
1101 	}
1102 	return (-1);
1103 }
1104 
1105 static int
1106 nat64lsn_alloc_proto_pg(struct nat64lsn_cfg *cfg,
1107     struct nat64lsn_alias *alias, uint32_t *chunkmask,
1108     uint32_t *pgmask, struct nat64lsn_pgchunk **chunks,
1109     struct nat64lsn_pg **pgptr, uint8_t proto)
1110 {
1111 	struct nat64lsn_pg *pg;
1112 	int i, pg_idx, chunk_idx;
1113 
1114 	/* Find place in pgchunk where PG can be added */
1115 	pg_idx = nat64lsn_find_pg_place(pgmask);
1116 	if (pg_idx < 0)	/* no more PGs */
1117 		return (PG_ERROR(1));
1118 	/* Check that we have allocated pgchunk for given PG index */
1119 	chunk_idx = pg_idx / 32;
1120 	if (!ISSET32(*chunkmask, chunk_idx)) {
1121 		chunks[chunk_idx] = uma_zalloc(nat64lsn_pgchunk_zone,
1122 		    M_NOWAIT);
1123 		if (chunks[chunk_idx] == NULL)
1124 			return (PG_ERROR(2));
1125 		ck_pr_bts_32(chunkmask, chunk_idx);
1126 		ck_pr_fence_store();
1127 	}
1128 	/* Allocate PG and states chunks */
1129 	pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT);
1130 	if (pg == NULL)
1131 		return (PG_ERROR(3));
1132 	pg->chunks_count = cfg->states_chunks;
1133 	if (pg->chunks_count > 1) {
1134 		pg->freemask_chunk = malloc(pg->chunks_count *
1135 		    sizeof(uint64_t), M_NAT64LSN, M_NOWAIT);
1136 		if (pg->freemask_chunk == NULL) {
1137 			uma_zfree(nat64lsn_pg_zone, pg);
1138 			return (PG_ERROR(4));
1139 		}
1140 		pg->states_chunk = malloc(pg->chunks_count *
1141 		    sizeof(struct nat64lsn_states_chunk *), M_NAT64LSN,
1142 		    M_NOWAIT | M_ZERO);
1143 		if (pg->states_chunk == NULL) {
1144 			free(pg->freemask_chunk, M_NAT64LSN);
1145 			uma_zfree(nat64lsn_pg_zone, pg);
1146 			return (PG_ERROR(5));
1147 		}
1148 		for (i = 0; i < pg->chunks_count; i++) {
1149 			pg->states_chunk[i] = uma_zalloc(
1150 			    nat64lsn_state_zone, M_NOWAIT);
1151 			if (pg->states_chunk[i] == NULL)
1152 				goto states_failed;
1153 		}
1154 		memset(pg->freemask_chunk, 0xff,
1155 		    sizeof(uint64_t) * pg->chunks_count);
1156 	} else {
1157 		pg->states = uma_zalloc(nat64lsn_state_zone, M_NOWAIT);
1158 		if (pg->states == NULL) {
1159 			uma_zfree(nat64lsn_pg_zone, pg);
1160 			return (PG_ERROR(6));
1161 		}
1162 		memset(&pg->freemask64, 0xff, sizeof(uint64_t));
1163 	}
1164 
1165 	/* Initialize PG and hook it to pgchunk */
1166 	SET_AGE(pg->timestamp);
1167 	pg->proto = proto;
1168 	pg->base_port = NAT64_MIN_PORT + 64 * pg_idx;
1169 	ck_pr_store_ptr(&chunks[chunk_idx]->pgptr[pg_idx % 32], pg);
1170 	ck_pr_fence_store();
1171 	ck_pr_bts_32(&pgmask[pg_idx / 32], pg_idx % 32);
1172 	ck_pr_store_ptr(pgptr, pg);
1173 
1174 	ALIAS_LOCK(alias);
1175 	CK_SLIST_INSERT_HEAD(&alias->portgroups, pg, entries);
1176 	SET_AGE(alias->timestamp);
1177 	alias->portgroups_count++;
1178 	ALIAS_UNLOCK(alias);
1179 	NAT64STAT_INC(&cfg->base.stats, spgcreated);
1180 	return (PG_ERROR(0));
1181 
1182 states_failed:
1183 	for (i = 0; i < pg->chunks_count; i++)
1184 		uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1185 	free(pg->freemask_chunk, M_NAT64LSN);
1186 	free(pg->states_chunk, M_NAT64LSN);
1187 	uma_zfree(nat64lsn_pg_zone, pg);
1188 	return (PG_ERROR(7));
1189 }
1190 
1191 static int
1192 nat64lsn_alloc_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1193 {
1194 	struct nat64lsn_aliaslink *link;
1195 	struct nat64lsn_alias *alias;
1196 	int ret;
1197 
1198 	link = nat64lsn_get_aliaslink(cfg, ji->host, &ji->f_id);
1199 	if (link == NULL)
1200 		return (PG_ERROR(1));
1201 
1202 	/*
1203 	 * TODO: check that we did not already allocated PG in
1204 	 *	 previous call.
1205 	 */
1206 
1207 	ret = 0;
1208 	alias = link->alias;
1209 	/* Find place in pgchunk where PG can be added */
1210 	switch (ji->proto) {
1211 	case IPPROTO_TCP:
1212 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1213 		    &alias->tcp_chunkmask, alias->tcp_pgmask,
1214 		    alias->tcp, &alias->tcp_pg, ji->proto);
1215 		break;
1216 	case IPPROTO_UDP:
1217 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1218 		    &alias->udp_chunkmask, alias->udp_pgmask,
1219 		    alias->udp, &alias->udp_pg, ji->proto);
1220 		break;
1221 	case IPPROTO_ICMP:
1222 		ret = nat64lsn_alloc_proto_pg(cfg, alias,
1223 		    &alias->icmp_chunkmask, alias->icmp_pgmask,
1224 		    alias->icmp, &alias->icmp_pg, ji->proto);
1225 		break;
1226 	default:
1227 		panic("%s: wrong proto %d", __func__, ji->proto);
1228 	}
1229 	if (ret == PG_ERROR(1)) {
1230 		/*
1231 		 * PG_ERROR(1) means that alias lacks free PGs
1232 		 * XXX: try next alias.
1233 		 */
1234 		printf("NAT64LSN: %s: failed to obtain PG\n",
1235 		    __func__);
1236 		return (ret);
1237 	}
1238 	if (ret == PG_ERROR(0)) {
1239 		ji->state = nat64lsn_get_state6to4(cfg, ji->host, &ji->f_id,
1240 		    ji->state_hval, ji->faddr, ji->port, ji->proto);
1241 		if (ji->state == NULL)
1242 			ret = PG_ERROR(8);
1243 		else
1244 			ji->done = 1;
1245 	}
1246 	return (ret);
1247 }
1248 
1249 static void
1250 nat64lsn_do_request(void *data)
1251 {
1252 	struct epoch_tracker et;
1253 	struct nat64lsn_job_head jhead;
1254 	struct nat64lsn_job_item *ji, *ji2;
1255 	struct nat64lsn_cfg *cfg;
1256 	int jcount;
1257 	uint8_t flags;
1258 
1259 	cfg = (struct nat64lsn_cfg *)data;
1260 	if (cfg->jlen == 0)
1261 		return;
1262 
1263 	CURVNET_SET(cfg->vp);
1264 	STAILQ_INIT(&jhead);
1265 
1266 	/* Grab queue */
1267 	JQUEUE_LOCK();
1268 	STAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item);
1269 	jcount = cfg->jlen;
1270 	cfg->jlen = 0;
1271 	JQUEUE_UNLOCK();
1272 
1273 	/* TODO: check if we need to resize hash */
1274 
1275 	NAT64STAT_INC(&cfg->base.stats, jcalls);
1276 	DPRINTF(DP_JQUEUE, "count=%d", jcount);
1277 
1278 	/*
1279 	 * TODO:
1280 	 * What we should do here is to build a hash
1281 	 * to ensure we don't have lots of duplicate requests.
1282 	 * Skip this for now.
1283 	 *
1284 	 * TODO: Limit per-call number of items
1285 	 */
1286 
1287 	NAT64LSN_EPOCH_ENTER(et);
1288 	STAILQ_FOREACH(ji, &jhead, entries) {
1289 		switch (ji->jtype) {
1290 		case JTYPE_NEWHOST:
1291 			if (nat64lsn_alloc_host(cfg, ji) != HOST_ERROR(0))
1292 				NAT64STAT_INC(&cfg->base.stats, jhostfails);
1293 			break;
1294 		case JTYPE_NEWPORTGROUP:
1295 			if (nat64lsn_alloc_pg(cfg, ji) != PG_ERROR(0))
1296 				NAT64STAT_INC(&cfg->base.stats, jportfails);
1297 			break;
1298 		default:
1299 			continue;
1300 		}
1301 		if (ji->done != 0) {
1302 			flags = ji->proto != IPPROTO_TCP ? 0 :
1303 			    convert_tcp_flags(ji->f_id._flags);
1304 			nat64lsn_translate6_internal(cfg, &ji->m,
1305 			    ji->state, flags);
1306 			NAT64STAT_INC(&cfg->base.stats, jreinjected);
1307 		}
1308 	}
1309 	NAT64LSN_EPOCH_EXIT(et);
1310 
1311 	ji = STAILQ_FIRST(&jhead);
1312 	while (ji != NULL) {
1313 		ji2 = STAILQ_NEXT(ji, entries);
1314 		/*
1315 		 * In any case we must free mbuf if
1316 		 * translator did not consumed it.
1317 		 */
1318 		m_freem(ji->m);
1319 		uma_zfree(nat64lsn_job_zone, ji);
1320 		ji = ji2;
1321 	}
1322 	CURVNET_RESTORE();
1323 }
1324 
1325 static struct nat64lsn_job_item *
1326 nat64lsn_create_job(struct nat64lsn_cfg *cfg, int jtype)
1327 {
1328 	struct nat64lsn_job_item *ji;
1329 
1330 	/*
1331 	 * Do not try to lock possibly contested mutex if we're near the
1332 	 * limit. Drop packet instead.
1333 	 */
1334 	ji = NULL;
1335 	if (cfg->jlen >= cfg->jmaxlen)
1336 		NAT64STAT_INC(&cfg->base.stats, jmaxlen);
1337 	else {
1338 		ji = uma_zalloc(nat64lsn_job_zone, M_NOWAIT);
1339 		if (ji == NULL)
1340 			NAT64STAT_INC(&cfg->base.stats, jnomem);
1341 	}
1342 	if (ji == NULL) {
1343 		NAT64STAT_INC(&cfg->base.stats, dropped);
1344 		DPRINTF(DP_DROPS, "failed to create job");
1345 	} else {
1346 		ji->jtype = jtype;
1347 		ji->done = 0;
1348 	}
1349 	return (ji);
1350 }
1351 
1352 static void
1353 nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
1354 {
1355 
1356 	JQUEUE_LOCK();
1357 	STAILQ_INSERT_TAIL(&cfg->jhead, ji, entries);
1358 	NAT64STAT_INC(&cfg->base.stats, jrequests);
1359 	cfg->jlen++;
1360 
1361 	if (callout_pending(&cfg->jcallout) == 0)
1362 		callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
1363 	JQUEUE_UNLOCK();
1364 }
1365 
1366 static void
1367 nat64lsn_job_destroy(epoch_context_t ctx)
1368 {
1369 	struct nat64lsn_job_item *ji;
1370 	struct nat64lsn_host *host;
1371 	struct nat64lsn_pg *pg;
1372 	int i;
1373 
1374 	ji = __containerof(ctx, struct nat64lsn_job_item, epoch_ctx);
1375 	MPASS(ji->jtype == JTYPE_DESTROY);
1376 	while (!CK_SLIST_EMPTY(&ji->hosts)) {
1377 		host = CK_SLIST_FIRST(&ji->hosts);
1378 		CK_SLIST_REMOVE_HEAD(&ji->hosts, entries);
1379 		if (host->states_count > 0) {
1380 			/*
1381 			 * XXX: The state has been created
1382 			 * during host deletion.
1383 			 */
1384 			printf("NAT64LSN: %s: destroying host with %d "
1385 			    "states\n", __func__, host->states_count);
1386 		}
1387 		nat64lsn_destroy_host(host);
1388 	}
1389 	while (!CK_SLIST_EMPTY(&ji->portgroups)) {
1390 		pg = CK_SLIST_FIRST(&ji->portgroups);
1391 		CK_SLIST_REMOVE_HEAD(&ji->portgroups, entries);
1392 		for (i = 0; i < pg->chunks_count; i++) {
1393 			if (FREEMASK_BITCOUNT(pg, i) != 64) {
1394 				/*
1395 				 * XXX: The state has been created during
1396 				 * PG deletion.
1397 				 */
1398 				printf("NAT64LSN: %s: destroying PG %p "
1399 				    "with non-empty chunk %d\n", __func__,
1400 				    pg, i);
1401 			}
1402 		}
1403 		nat64lsn_destroy_pg(pg);
1404 	}
1405 	uma_zfree(nat64lsn_pgchunk_zone, ji->pgchunk);
1406 	uma_zfree(nat64lsn_job_zone, ji);
1407 }
1408 
1409 static int
1410 nat64lsn_request_host(struct nat64lsn_cfg *cfg,
1411     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1412     in_addr_t faddr, uint16_t port, uint8_t proto)
1413 {
1414 	struct nat64lsn_job_item *ji;
1415 
1416 	ji = nat64lsn_create_job(cfg, JTYPE_NEWHOST);
1417 	if (ji != NULL) {
1418 		ji->m = *mp;
1419 		ji->f_id = *f_id;
1420 		ji->faddr = faddr;
1421 		ji->port = port;
1422 		ji->proto = proto;
1423 		ji->src6_hval = hval;
1424 
1425 		nat64lsn_enqueue_job(cfg, ji);
1426 		NAT64STAT_INC(&cfg->base.stats, jhostsreq);
1427 		*mp = NULL;
1428 	}
1429 	return (IP_FW_DENY);
1430 }
1431 
1432 static int
1433 nat64lsn_request_pg(struct nat64lsn_cfg *cfg, struct nat64lsn_host *host,
1434     const struct ipfw_flow_id *f_id, struct mbuf **mp, uint32_t hval,
1435     in_addr_t faddr, uint16_t port, uint8_t proto)
1436 {
1437 	struct nat64lsn_job_item *ji;
1438 
1439 	ji = nat64lsn_create_job(cfg, JTYPE_NEWPORTGROUP);
1440 	if (ji != NULL) {
1441 		ji->m = *mp;
1442 		ji->f_id = *f_id;
1443 		ji->faddr = faddr;
1444 		ji->port = port;
1445 		ji->proto = proto;
1446 		ji->state_hval = hval;
1447 		ji->host = host;
1448 
1449 		nat64lsn_enqueue_job(cfg, ji);
1450 		NAT64STAT_INC(&cfg->base.stats, jportreq);
1451 		*mp = NULL;
1452 	}
1453 	return (IP_FW_DENY);
1454 }
1455 
1456 static int
1457 nat64lsn_translate6_internal(struct nat64lsn_cfg *cfg, struct mbuf **mp,
1458     struct nat64lsn_state *state, uint8_t flags)
1459 {
1460 	struct pfloghdr loghdr, *logdata;
1461 	int ret;
1462 	uint16_t ts;
1463 
1464 	/* Update timestamp and flags if needed */
1465 	SET_AGE(ts);
1466 	if (state->timestamp != ts)
1467 		state->timestamp = ts;
1468 	if ((state->flags & flags) != 0)
1469 		state->flags |= flags;
1470 
1471 	if (cfg->base.flags & NAT64_LOG) {
1472 		logdata = &loghdr;
1473 		nat64lsn_log(logdata, *mp, AF_INET6, state);
1474 	} else
1475 		logdata = NULL;
1476 
1477 	ret = nat64_do_handle_ip6(*mp, htonl(state->ip_src),
1478 	    htons(state->aport), &cfg->base, logdata);
1479 	if (ret == NAT64SKIP)
1480 		return (cfg->nomatch_verdict);
1481 	if (ret == NAT64RETURN)
1482 		*mp = NULL;
1483 	return (IP_FW_DENY);
1484 }
1485 
1486 static int
1487 nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
1488     struct mbuf **mp)
1489 {
1490 	struct nat64lsn_state *state;
1491 	struct nat64lsn_host *host;
1492 	struct icmp6_hdr *icmp6;
1493 	uint32_t addr, hval, data[2];
1494 	int offset, proto;
1495 	uint16_t port;
1496 	uint8_t flags;
1497 
1498 	/* Check if protocol is supported */
1499 	port = f_id->src_port;
1500 	proto = f_id->proto;
1501 	switch (f_id->proto) {
1502 	case IPPROTO_ICMPV6:
1503 		/*
1504 		 * For ICMPv6 echo reply/request we use icmp6_id as
1505 		 * local port.
1506 		 */
1507 		offset = 0;
1508 		proto = nat64_getlasthdr(*mp, &offset);
1509 		if (proto < 0) {
1510 			NAT64STAT_INC(&cfg->base.stats, dropped);
1511 			DPRINTF(DP_DROPS, "mbuf isn't contigious");
1512 			return (IP_FW_DENY);
1513 		}
1514 		if (proto == IPPROTO_ICMPV6) {
1515 			icmp6 = mtodo(*mp, offset);
1516 			if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
1517 			    icmp6->icmp6_type == ICMP6_ECHO_REPLY)
1518 				port = ntohs(icmp6->icmp6_id);
1519 		}
1520 		proto = IPPROTO_ICMP;
1521 		/* FALLTHROUGH */
1522 	case IPPROTO_TCP:
1523 	case IPPROTO_UDP:
1524 		break;
1525 	default:
1526 		NAT64STAT_INC(&cfg->base.stats, noproto);
1527 		return (cfg->nomatch_verdict);
1528 	}
1529 
1530 	/* Extract IPv4 from destination IPv6 address */
1531 	addr = nat64_extract_ip4(&f_id->dst_ip6, cfg->base.plat_plen);
1532 	if (addr == 0 || nat64_check_private_ip4(&cfg->base, addr) != 0) {
1533 		char a[INET_ADDRSTRLEN];
1534 
1535 		NAT64STAT_INC(&cfg->base.stats, dropped);
1536 		DPRINTF(DP_DROPS, "dropped due to embedded IPv4 address %s",
1537 		    inet_ntop(AF_INET, &addr, a, sizeof(a)));
1538 		return (IP_FW_DENY); /* XXX: add extra stats? */
1539 	}
1540 
1541 	/* Try to find host */
1542 	hval = HOST_HVAL(cfg, &f_id->src_ip6);
1543 	CK_SLIST_FOREACH(host, &HOSTS(cfg, hval), entries) {
1544 		if (IN6_ARE_ADDR_EQUAL(&f_id->src_ip6, &host->addr))
1545 			break;
1546 	}
1547 	/* We use IPv4 address in host byte order */
1548 	addr = ntohl(addr);
1549 	if (host == NULL)
1550 		return (nat64lsn_request_host(cfg, f_id, mp,
1551 		    hval, addr, port, proto));
1552 
1553 	flags = proto != IPPROTO_TCP ? 0 : convert_tcp_flags(f_id->_flags);
1554 
1555 	data[0] = addr;
1556 	data[1] = (f_id->dst_port << 16) | port;
1557 	hval = STATE_HVAL(cfg, data);
1558 	state = nat64lsn_get_state6to4(cfg, host, f_id, hval, addr,
1559 	    port, proto);
1560 	if (state == NULL)
1561 		return (nat64lsn_request_pg(cfg, host, f_id, mp, hval, addr,
1562 		    port, proto));
1563 	return (nat64lsn_translate6_internal(cfg, mp, state, flags));
1564 }
1565 
1566 /*
1567  * Main dataplane entry point.
1568  */
1569 int
1570 ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
1571     ipfw_insn *cmd, int *done)
1572 {
1573 	struct nat64lsn_cfg *cfg;
1574 	ipfw_insn *icmd;
1575 	int ret;
1576 
1577 	IPFW_RLOCK_ASSERT(ch);
1578 
1579 	*done = 0;	/* continue the search in case of failure */
1580 	icmd = cmd + 1;
1581 	if (cmd->opcode != O_EXTERNAL_ACTION ||
1582 	    cmd->arg1 != V_nat64lsn_eid ||
1583 	    icmd->opcode != O_EXTERNAL_INSTANCE ||
1584 	    (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
1585 		return (IP_FW_DENY);
1586 
1587 	*done = 1;	/* terminate the search */
1588 
1589 	switch (args->f_id.addr_type) {
1590 	case 4:
1591 		ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
1592 		break;
1593 	case 6:
1594 		/*
1595 		 * Check that destination IPv6 address matches our prefix6.
1596 		 */
1597 		if ((cfg->base.flags & NAT64LSN_ANYPREFIX) == 0 &&
1598 		    memcmp(&args->f_id.dst_ip6, &cfg->base.plat_prefix,
1599 		    cfg->base.plat_plen / 8) != 0) {
1600 			ret = cfg->nomatch_verdict;
1601 			break;
1602 		}
1603 		ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
1604 		break;
1605 	default:
1606 		ret = cfg->nomatch_verdict;
1607 	}
1608 
1609 	if (ret != IP_FW_PASS && args->m != NULL) {
1610 		m_freem(args->m);
1611 		args->m = NULL;
1612 	}
1613 	return (ret);
1614 }
1615 
1616 static int
1617 nat64lsn_state_ctor(void *mem, int size, void *arg, int flags)
1618 {
1619 	struct nat64lsn_states_chunk *chunk;
1620 	int i;
1621 
1622 	chunk = (struct nat64lsn_states_chunk *)mem;
1623 	for (i = 0; i < 64; i++)
1624 		chunk->state[i].flags = 0;
1625 	return (0);
1626 }
1627 
1628 void
1629 nat64lsn_init_internal(void)
1630 {
1631 
1632 	nat64lsn_host_zone = uma_zcreate("NAT64LSN hosts",
1633 	    sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
1634 	    UMA_ALIGN_PTR, 0);
1635 	nat64lsn_pgchunk_zone = uma_zcreate("NAT64LSN portgroup chunks",
1636 	    sizeof(struct nat64lsn_pgchunk), NULL, NULL, NULL, NULL,
1637 	    UMA_ALIGN_PTR, 0);
1638 	nat64lsn_pg_zone = uma_zcreate("NAT64LSN portgroups",
1639 	    sizeof(struct nat64lsn_pg), NULL, NULL, NULL, NULL,
1640 	    UMA_ALIGN_PTR, 0);
1641 	nat64lsn_aliaslink_zone = uma_zcreate("NAT64LSN links",
1642 	    sizeof(struct nat64lsn_aliaslink), NULL, NULL, NULL, NULL,
1643 	    UMA_ALIGN_PTR, 0);
1644 	nat64lsn_state_zone = uma_zcreate("NAT64LSN states",
1645 	    sizeof(struct nat64lsn_states_chunk), nat64lsn_state_ctor,
1646 	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
1647 	nat64lsn_job_zone = uma_zcreate("NAT64LSN jobs",
1648 	    sizeof(struct nat64lsn_job_item), NULL, NULL, NULL, NULL,
1649 	    UMA_ALIGN_PTR, 0);
1650 	JQUEUE_LOCK_INIT();
1651 }
1652 
1653 void
1654 nat64lsn_uninit_internal(void)
1655 {
1656 
1657 	/* XXX: epoch_task drain */
1658 	JQUEUE_LOCK_DESTROY();
1659 	uma_zdestroy(nat64lsn_host_zone);
1660 	uma_zdestroy(nat64lsn_pgchunk_zone);
1661 	uma_zdestroy(nat64lsn_pg_zone);
1662 	uma_zdestroy(nat64lsn_aliaslink_zone);
1663 	uma_zdestroy(nat64lsn_state_zone);
1664 	uma_zdestroy(nat64lsn_job_zone);
1665 }
1666 
1667 void
1668 nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
1669 {
1670 
1671 	CALLOUT_LOCK(cfg);
1672 	callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
1673 	    nat64lsn_periodic, cfg);
1674 	CALLOUT_UNLOCK(cfg);
1675 }
1676 
1677 struct nat64lsn_cfg *
1678 nat64lsn_init_instance(struct ip_fw_chain *ch, in_addr_t prefix, int plen)
1679 {
1680 	struct nat64lsn_cfg *cfg;
1681 	struct nat64lsn_alias *alias;
1682 	int i, naddr;
1683 
1684 	cfg = malloc(sizeof(struct nat64lsn_cfg), M_NAT64LSN,
1685 	    M_WAITOK | M_ZERO);
1686 
1687 	CFG_LOCK_INIT(cfg);
1688 	CALLOUT_LOCK_INIT(cfg);
1689 	STAILQ_INIT(&cfg->jhead);
1690 	cfg->vp = curvnet;
1691 	COUNTER_ARRAY_ALLOC(cfg->base.stats.cnt, NAT64STATS, M_WAITOK);
1692 
1693 	cfg->hash_seed = arc4random();
1694 	cfg->hosts_hashsize = NAT64LSN_HOSTS_HSIZE;
1695 	cfg->hosts_hash = malloc(sizeof(struct nat64lsn_hosts_slist) *
1696 	    cfg->hosts_hashsize, M_NAT64LSN, M_WAITOK | M_ZERO);
1697 	for (i = 0; i < cfg->hosts_hashsize; i++)
1698 		CK_SLIST_INIT(&cfg->hosts_hash[i]);
1699 
1700 	naddr = 1 << (32 - plen);
1701 	cfg->prefix4 = prefix;
1702 	cfg->pmask4 = prefix | (naddr - 1);
1703 	cfg->plen4 = plen;
1704 	cfg->aliases = malloc(sizeof(struct nat64lsn_alias) * naddr,
1705 	    M_NAT64LSN, M_WAITOK | M_ZERO);
1706 	for (i = 0; i < naddr; i++) {
1707 		alias = &cfg->aliases[i];
1708 		alias->addr = prefix + i; /* host byte order */
1709 		CK_SLIST_INIT(&alias->hosts);
1710 		ALIAS_LOCK_INIT(alias);
1711 	}
1712 
1713 	callout_init_mtx(&cfg->periodic, &cfg->periodic_lock, 0);
1714 	callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
1715 
1716 	return (cfg);
1717 }
1718 
1719 static void
1720 nat64lsn_destroy_pg(struct nat64lsn_pg *pg)
1721 {
1722 	int i;
1723 
1724 	if (pg->chunks_count == 1) {
1725 		uma_zfree(nat64lsn_state_zone, pg->states);
1726 	} else {
1727 		for (i = 0; i < pg->chunks_count; i++)
1728 			uma_zfree(nat64lsn_state_zone, pg->states_chunk[i]);
1729 		free(pg->states_chunk, M_NAT64LSN);
1730 		free(pg->freemask_chunk, M_NAT64LSN);
1731 	}
1732 	uma_zfree(nat64lsn_pg_zone, pg);
1733 }
1734 
1735 static void
1736 nat64lsn_destroy_alias(struct nat64lsn_cfg *cfg,
1737     struct nat64lsn_alias *alias)
1738 {
1739 	struct nat64lsn_pg *pg;
1740 	int i;
1741 
1742 	while (!CK_SLIST_EMPTY(&alias->portgroups)) {
1743 		pg = CK_SLIST_FIRST(&alias->portgroups);
1744 		CK_SLIST_REMOVE_HEAD(&alias->portgroups, entries);
1745 		nat64lsn_destroy_pg(pg);
1746 	}
1747 	for (i = 0; i < 32; i++) {
1748 		if (ISSET32(alias->tcp_chunkmask, i))
1749 			uma_zfree(nat64lsn_pgchunk_zone, alias->tcp[i]);
1750 		if (ISSET32(alias->udp_chunkmask, i))
1751 			uma_zfree(nat64lsn_pgchunk_zone, alias->udp[i]);
1752 		if (ISSET32(alias->icmp_chunkmask, i))
1753 			uma_zfree(nat64lsn_pgchunk_zone, alias->icmp[i]);
1754 	}
1755 	ALIAS_LOCK_DESTROY(alias);
1756 }
1757 
1758 static void
1759 nat64lsn_destroy_host(struct nat64lsn_host *host)
1760 {
1761 	struct nat64lsn_aliaslink *link;
1762 
1763 	while (!CK_SLIST_EMPTY(&host->aliases)) {
1764 		link = CK_SLIST_FIRST(&host->aliases);
1765 		CK_SLIST_REMOVE_HEAD(&host->aliases, host_entries);
1766 
1767 		ALIAS_LOCK(link->alias);
1768 		CK_SLIST_REMOVE(&link->alias->hosts, link,
1769 		    nat64lsn_aliaslink, alias_entries);
1770 		link->alias->hosts_count--;
1771 		ALIAS_UNLOCK(link->alias);
1772 
1773 		uma_zfree(nat64lsn_aliaslink_zone, link);
1774 	}
1775 	HOST_LOCK_DESTROY(host);
1776 	free(host->states_hash, M_NAT64LSN);
1777 	uma_zfree(nat64lsn_host_zone, host);
1778 }
1779 
1780 void
1781 nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
1782 {
1783 	struct nat64lsn_host *host;
1784 	int i;
1785 
1786 	CALLOUT_LOCK(cfg);
1787 	callout_drain(&cfg->periodic);
1788 	CALLOUT_UNLOCK(cfg);
1789 	callout_drain(&cfg->jcallout);
1790 
1791 	for (i = 0; i < cfg->hosts_hashsize; i++) {
1792 		while (!CK_SLIST_EMPTY(&cfg->hosts_hash[i])) {
1793 			host = CK_SLIST_FIRST(&cfg->hosts_hash[i]);
1794 			CK_SLIST_REMOVE_HEAD(&cfg->hosts_hash[i], entries);
1795 			nat64lsn_destroy_host(host);
1796 		}
1797 	}
1798 
1799 	for (i = 0; i < (1 << (32 - cfg->plen4)); i++)
1800 		nat64lsn_destroy_alias(cfg, &cfg->aliases[i]);
1801 
1802 	CALLOUT_LOCK_DESTROY(cfg);
1803 	CFG_LOCK_DESTROY(cfg);
1804 	COUNTER_ARRAY_FREE(cfg->base.stats.cnt, NAT64STATS);
1805 	free(cfg->hosts_hash, M_NAT64LSN);
1806 	free(cfg->aliases, M_NAT64LSN);
1807 	free(cfg, M_NAT64LSN);
1808 }
1809