xref: /freebsd/sys/netpfil/ipfw/nptv6/nptv6.c (revision 9768746b)
1 /*-
2  * Copyright (c) 2016 Yandex LLC
3  * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/counter.h>
34 #include <sys/eventhandler.h>
35 #include <sys/errno.h>
36 #include <sys/kernel.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/rmlock.h>
42 #include <sys/rwlock.h>
43 #include <sys/socket.h>
44 #include <sys/queue.h>
45 #include <sys/syslog.h>
46 #include <sys/sysctl.h>
47 
48 #include <net/if.h>
49 #include <net/if_var.h>
50 #include <net/if_private.h>
51 #include <net/netisr.h>
52 #include <net/pfil.h>
53 #include <net/vnet.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/ip_var.h>
57 #include <netinet/ip_fw.h>
58 #include <netinet/ip6.h>
59 #include <netinet/icmp6.h>
60 #include <netinet6/in6_var.h>
61 #include <netinet6/ip6_var.h>
62 
63 #include <netpfil/ipfw/ip_fw_private.h>
64 #include <netpfil/ipfw/nptv6/nptv6.h>
65 
66 VNET_DEFINE_STATIC(uint16_t, nptv6_eid) = 0;
67 #define	V_nptv6_eid	VNET(nptv6_eid)
68 #define	IPFW_TLV_NPTV6_NAME	IPFW_TLV_EACTION_NAME(V_nptv6_eid)
69 
70 static eventhandler_tag nptv6_ifaddr_event;
71 
72 static struct nptv6_cfg *nptv6_alloc_config(const char *name, uint8_t set);
73 static void nptv6_free_config(struct nptv6_cfg *cfg);
74 static struct nptv6_cfg *nptv6_find(struct namedobj_instance *ni,
75     const char *name, uint8_t set);
76 static int nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp,
77     int offset);
78 static int nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp,
79     int offset);
80 
81 #define	NPTV6_LOOKUP(chain, cmd)	\
82     (struct nptv6_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
83 
84 #ifndef IN6_MASK_ADDR
85 #define IN6_MASK_ADDR(a, m)	do { \
86 	(a)->s6_addr32[0] &= (m)->s6_addr32[0]; \
87 	(a)->s6_addr32[1] &= (m)->s6_addr32[1]; \
88 	(a)->s6_addr32[2] &= (m)->s6_addr32[2]; \
89 	(a)->s6_addr32[3] &= (m)->s6_addr32[3]; \
90 } while (0)
91 #endif
92 #ifndef IN6_ARE_MASKED_ADDR_EQUAL
93 #define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m)	(	\
94 	(((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \
95 	(((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \
96 	(((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \
97 	(((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 )
98 #endif
99 
100 #if 0
101 #define	NPTV6_DEBUG(fmt, ...)	do {			\
102 	printf("%s: " fmt "\n", __func__, ## __VA_ARGS__);	\
103 } while (0)
104 #define	NPTV6_IPDEBUG(fmt, ...)	do {			\
105 	char _s[INET6_ADDRSTRLEN], _d[INET6_ADDRSTRLEN];	\
106 	printf("%s: " fmt "\n", __func__, ## __VA_ARGS__);	\
107 } while (0)
108 #else
109 #define	NPTV6_DEBUG(fmt, ...)
110 #define	NPTV6_IPDEBUG(fmt, ...)
111 #endif
112 
113 static int
114 nptv6_getlasthdr(struct nptv6_cfg *cfg, struct mbuf *m, int *offset)
115 {
116 	struct ip6_hdr *ip6;
117 	struct ip6_hbh *hbh;
118 	int proto, hlen;
119 
120 	hlen = (offset == NULL) ? 0: *offset;
121 	if (m->m_len < hlen)
122 		return (-1);
123 	ip6 = mtodo(m, hlen);
124 	hlen += sizeof(*ip6);
125 	proto = ip6->ip6_nxt;
126 	while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
127 	    proto == IPPROTO_DSTOPTS) {
128 		hbh = mtodo(m, hlen);
129 		if (m->m_len < hlen)
130 			return (-1);
131 		proto = hbh->ip6h_nxt;
132 		hlen += (hbh->ip6h_len + 1) << 3;
133 	}
134 	if (offset != NULL)
135 		*offset = hlen;
136 	return (proto);
137 }
138 
139 static int
140 nptv6_translate_icmpv6(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
141 {
142 	struct icmp6_hdr *icmp6;
143 	struct ip6_hdr *ip6;
144 	struct mbuf *m;
145 
146 	m = *mp;
147 	if (offset > m->m_len)
148 		return (-1);
149 	icmp6 = mtodo(m, offset);
150 	NPTV6_DEBUG("ICMPv6 type %d", icmp6->icmp6_type);
151 	switch (icmp6->icmp6_type) {
152 	case ICMP6_DST_UNREACH:
153 	case ICMP6_PACKET_TOO_BIG:
154 	case ICMP6_TIME_EXCEEDED:
155 	case ICMP6_PARAM_PROB:
156 		break;
157 	case ICMP6_ECHO_REQUEST:
158 	case ICMP6_ECHO_REPLY:
159 		/* nothing to translate */
160 		return (0);
161 	default:
162 		/*
163 		 * XXX: We can add some checks to not translate NDP and MLD
164 		 * messages. Currently user must explicitly allow these message
165 		 * types, otherwise packets will be dropped.
166 		 */
167 		return (-1);
168 	}
169 	offset += sizeof(*icmp6);
170 	if (offset + sizeof(*ip6) > m->m_pkthdr.len)
171 		return (-1);
172 	if (offset + sizeof(*ip6) > m->m_len)
173 		*mp = m = m_pullup(m, offset + sizeof(*ip6));
174 	if (m == NULL)
175 		return (-1);
176 	ip6 = mtodo(m, offset);
177 	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
178 	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
179 	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
180 	    ip6->ip6_nxt);
181 	if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src,
182 	    &cfg->external, &cfg->mask))
183 		return (nptv6_rewrite_external(cfg, mp, offset));
184 	else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
185 	    &cfg->internal, &cfg->mask))
186 		return (nptv6_rewrite_internal(cfg, mp, offset));
187 	/*
188 	 * Addresses in the inner IPv6 header doesn't matched to
189 	 * our prefixes.
190 	 */
191 	return (-1);
192 }
193 
194 static int
195 nptv6_search_index(struct nptv6_cfg *cfg, struct in6_addr *a)
196 {
197 	int idx;
198 
199 	if (cfg->flags & NPTV6_48PLEN)
200 		return (3);
201 
202 	/* Search suitable word index for adjustment */
203 	for (idx = 4; idx < 8; idx++)
204 		if (a->s6_addr16[idx] != 0xffff)
205 			break;
206 	/*
207 	 * RFC 6296 p3.7: If an NPTv6 Translator discovers a datagram with
208 	 * an IID of all-zeros while performing address mapping, that
209 	 * datagram MUST be dropped, and an ICMPv6 Parameter Problem error
210 	 * SHOULD be generated.
211 	 */
212 	if (idx == 8 ||
213 	    (a->s6_addr32[2] == 0 && a->s6_addr32[3] == 0))
214 		return (-1);
215 	return (idx);
216 }
217 
218 static void
219 nptv6_copy_addr(struct in6_addr *src, struct in6_addr *dst,
220     struct in6_addr *mask)
221 {
222 	int i;
223 
224 	for (i = 0; i < 8 && mask->s6_addr8[i] != 0; i++) {
225 		dst->s6_addr8[i] &=  ~mask->s6_addr8[i];
226 		dst->s6_addr8[i] |= src->s6_addr8[i] & mask->s6_addr8[i];
227 	}
228 }
229 
230 static int
231 nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
232 {
233 	struct in6_addr *addr;
234 	struct ip6_hdr *ip6;
235 	int idx, proto;
236 	uint16_t adj;
237 
238 	ip6 = mtodo(*mp, offset);
239 	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
240 	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
241 	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
242 	    ip6->ip6_nxt);
243 	if (offset == 0)
244 		addr = &ip6->ip6_src;
245 	else {
246 		/*
247 		 * When we rewriting inner IPv6 header, we need to rewrite
248 		 * destination address back to external prefix. The datagram in
249 		 * the ICMPv6 payload should looks like it was send from
250 		 * external prefix.
251 		 */
252 		addr = &ip6->ip6_dst;
253 	}
254 	idx = nptv6_search_index(cfg, addr);
255 	if (idx < 0) {
256 		/*
257 		 * Do not send ICMPv6 error when offset isn't zero.
258 		 * This means we are rewriting inner IPv6 header in the
259 		 * ICMPv6 error message.
260 		 */
261 		if (offset == 0) {
262 			icmp6_error2(*mp, ICMP6_DST_UNREACH,
263 			    ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif);
264 			*mp = NULL;
265 		}
266 		return (IP_FW_DENY);
267 	}
268 	adj = addr->s6_addr16[idx];
269 	nptv6_copy_addr(&cfg->external, addr, &cfg->mask);
270 	adj = cksum_add(adj, cfg->adjustment);
271 	if (adj == 0xffff)
272 		adj = 0;
273 	addr->s6_addr16[idx] = adj;
274 	if (offset == 0) {
275 		/*
276 		 * We may need to translate addresses in the inner IPv6
277 		 * header for ICMPv6 error messages.
278 		 */
279 		proto = nptv6_getlasthdr(cfg, *mp, &offset);
280 		if (proto < 0 || (proto == IPPROTO_ICMPV6 &&
281 		    nptv6_translate_icmpv6(cfg, mp, offset) != 0))
282 			return (IP_FW_DENY);
283 		NPTV6STAT_INC(cfg, in2ex);
284 	}
285 	return (0);
286 }
287 
288 static int
289 nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp, int offset)
290 {
291 	struct in6_addr *addr;
292 	struct ip6_hdr *ip6;
293 	int idx, proto;
294 	uint16_t adj;
295 
296 	ip6 = mtodo(*mp, offset);
297 	NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset,
298 	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
299 	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
300 	    ip6->ip6_nxt);
301 	if (offset == 0)
302 		addr = &ip6->ip6_dst;
303 	else {
304 		/*
305 		 * When we rewriting inner IPv6 header, we need to rewrite
306 		 * source address back to internal prefix. The datagram in
307 		 * the ICMPv6 payload should looks like it was send from
308 		 * internal prefix.
309 		 */
310 		addr = &ip6->ip6_src;
311 	}
312 	idx = nptv6_search_index(cfg, addr);
313 	if (idx < 0) {
314 		/*
315 		 * Do not send ICMPv6 error when offset isn't zero.
316 		 * This means we are rewriting inner IPv6 header in the
317 		 * ICMPv6 error message.
318 		 */
319 		if (offset == 0) {
320 			icmp6_error2(*mp, ICMP6_DST_UNREACH,
321 			    ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif);
322 			*mp = NULL;
323 		}
324 		return (IP_FW_DENY);
325 	}
326 	adj = addr->s6_addr16[idx];
327 	nptv6_copy_addr(&cfg->internal, addr, &cfg->mask);
328 	adj = cksum_add(adj, ~cfg->adjustment);
329 	if (adj == 0xffff)
330 		adj = 0;
331 	addr->s6_addr16[idx] = adj;
332 	if (offset == 0) {
333 		/*
334 		 * We may need to translate addresses in the inner IPv6
335 		 * header for ICMPv6 error messages.
336 		 */
337 		proto = nptv6_getlasthdr(cfg, *mp, &offset);
338 		if (proto < 0 || (proto == IPPROTO_ICMPV6 &&
339 		    nptv6_translate_icmpv6(cfg, mp, offset) != 0))
340 			return (IP_FW_DENY);
341 		NPTV6STAT_INC(cfg, ex2in);
342 	}
343 	return (0);
344 }
345 
346 /*
347  * ipfw external action handler.
348  */
349 static int
350 ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args,
351     ipfw_insn *cmd, int *done)
352 {
353 	struct ip6_hdr *ip6;
354 	struct nptv6_cfg *cfg;
355 	ipfw_insn *icmd;
356 	int ret;
357 
358 	*done = 0; /* try next rule if not matched */
359 	ret = IP_FW_DENY;
360 	icmd = cmd + 1;
361 	if (cmd->opcode != O_EXTERNAL_ACTION ||
362 	    cmd->arg1 != V_nptv6_eid ||
363 	    icmd->opcode != O_EXTERNAL_INSTANCE ||
364 	    (cfg = NPTV6_LOOKUP(chain, icmd)) == NULL ||
365 	    (cfg->flags & NPTV6_READY) == 0)
366 		return (ret);
367 	/*
368 	 * We need act as router, so when forwarding is disabled -
369 	 * do nothing.
370 	 */
371 	if (V_ip6_forwarding == 0 || args->f_id.addr_type != 6)
372 		return (ret);
373 	/*
374 	 * NOTE: we expect ipfw_chk() did m_pullup() up to upper level
375 	 * protocol's headers. Also we skip some checks, that ip6_input(),
376 	 * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
377 	 */
378 	ip6 = mtod(args->m, struct ip6_hdr *);
379 	NPTV6_IPDEBUG("eid %u, oid %u, %s -> %s %d",
380 	    cmd->arg1, icmd->arg1,
381 	    inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)),
382 	    inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)),
383 	    ip6->ip6_nxt);
384 	if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src,
385 	    &cfg->internal, &cfg->mask)) {
386 		/*
387 		 * XXX: Do not translate packets when both src and dst
388 		 * are from internal prefix.
389 		 */
390 		if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
391 		    &cfg->internal, &cfg->mask))
392 			return (ret);
393 		ret = nptv6_rewrite_internal(cfg, &args->m, 0);
394 	} else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst,
395 	    &cfg->external, &cfg->mask))
396 		ret = nptv6_rewrite_external(cfg, &args->m, 0);
397 	else
398 		return (ret);
399 	/*
400 	 * If address wasn't rewrited - free mbuf and terminate the search.
401 	 */
402 	if (ret != 0) {
403 		if (args->m != NULL) {
404 			m_freem(args->m);
405 			args->m = NULL; /* mark mbuf as consumed */
406 		}
407 		NPTV6STAT_INC(cfg, dropped);
408 		*done = 1;
409 	} else {
410 		/* Terminate the search if one_pass is set */
411 		*done = V_fw_one_pass;
412 		/* Update args->f_id when one_pass is off */
413 		if (*done == 0) {
414 			ip6 = mtod(args->m, struct ip6_hdr *);
415 			args->f_id.src_ip6 = ip6->ip6_src;
416 			args->f_id.dst_ip6 = ip6->ip6_dst;
417 		}
418 	}
419 	return (ret);
420 }
421 
422 static struct nptv6_cfg *
423 nptv6_alloc_config(const char *name, uint8_t set)
424 {
425 	struct nptv6_cfg *cfg;
426 
427 	cfg = malloc(sizeof(struct nptv6_cfg), M_IPFW, M_WAITOK | M_ZERO);
428 	COUNTER_ARRAY_ALLOC(cfg->stats, NPTV6STATS, M_WAITOK);
429 	cfg->no.name = cfg->name;
430 	cfg->no.etlv = IPFW_TLV_NPTV6_NAME;
431 	cfg->no.set = set;
432 	strlcpy(cfg->name, name, sizeof(cfg->name));
433 	return (cfg);
434 }
435 
436 static void
437 nptv6_free_config(struct nptv6_cfg *cfg)
438 {
439 
440 	COUNTER_ARRAY_FREE(cfg->stats, NPTV6STATS);
441 	free(cfg, M_IPFW);
442 }
443 
444 static void
445 nptv6_export_config(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
446     ipfw_nptv6_cfg *uc)
447 {
448 
449 	uc->internal = cfg->internal;
450 	if (cfg->flags & NPTV6_DYNAMIC_PREFIX)
451 		memcpy(uc->if_name, cfg->if_name, IF_NAMESIZE);
452 	else
453 		uc->external = cfg->external;
454 	uc->plen = cfg->plen;
455 	uc->flags = cfg->flags & NPTV6_FLAGSMASK;
456 	uc->set = cfg->no.set;
457 	strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
458 }
459 
460 struct nptv6_dump_arg {
461 	struct ip_fw_chain *ch;
462 	struct sockopt_data *sd;
463 };
464 
465 static int
466 export_config_cb(struct namedobj_instance *ni, struct named_object *no,
467     void *arg)
468 {
469 	struct nptv6_dump_arg *da = (struct nptv6_dump_arg *)arg;
470 	ipfw_nptv6_cfg *uc;
471 
472 	uc = (ipfw_nptv6_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc));
473 	nptv6_export_config(da->ch, (struct nptv6_cfg *)no, uc);
474 	return (0);
475 }
476 
477 static struct nptv6_cfg *
478 nptv6_find(struct namedobj_instance *ni, const char *name, uint8_t set)
479 {
480 	struct nptv6_cfg *cfg;
481 
482 	cfg = (struct nptv6_cfg *)ipfw_objhash_lookup_name_type(ni, set,
483 	    IPFW_TLV_NPTV6_NAME, name);
484 
485 	return (cfg);
486 }
487 
488 static void
489 nptv6_calculate_adjustment(struct nptv6_cfg *cfg)
490 {
491 	uint16_t i, e;
492 	uint16_t *p;
493 
494 	/* Calculate checksum of internal prefix */
495 	for (i = 0, p = (uint16_t *)&cfg->internal;
496 	    p < (uint16_t *)(&cfg->internal + 1); p++)
497 		i = cksum_add(i, *p);
498 
499 	/* Calculate checksum of external prefix */
500 	for (e = 0, p = (uint16_t *)&cfg->external;
501 	    p < (uint16_t *)(&cfg->external + 1); p++)
502 		e = cksum_add(e, *p);
503 
504 	/* Adjustment value for Int->Ext direction */
505 	cfg->adjustment = cksum_add(~e, i);
506 }
507 
508 static int
509 nptv6_check_prefix(const struct in6_addr *addr)
510 {
511 
512 	if (IN6_IS_ADDR_MULTICAST(addr) ||
513 	    IN6_IS_ADDR_LINKLOCAL(addr) ||
514 	    IN6_IS_ADDR_LOOPBACK(addr) ||
515 	    IN6_IS_ADDR_UNSPECIFIED(addr))
516 		return (EINVAL);
517 	return (0);
518 }
519 
520 static void
521 nptv6_set_external(struct nptv6_cfg *cfg, struct in6_addr *addr)
522 {
523 
524 	cfg->external = *addr;
525 	IN6_MASK_ADDR(&cfg->external, &cfg->mask);
526 	nptv6_calculate_adjustment(cfg);
527 	cfg->flags |= NPTV6_READY;
528 }
529 
530 /*
531  * Try to determine what prefix to use as external for
532  * configured interface name.
533  */
534 static void
535 nptv6_find_prefix(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
536     struct ifnet *ifp)
537 {
538 	struct epoch_tracker et;
539 	struct ifaddr *ifa;
540 	struct in6_ifaddr *ia;
541 
542 	MPASS(cfg->flags & NPTV6_DYNAMIC_PREFIX);
543 	IPFW_UH_WLOCK_ASSERT(ch);
544 
545 	if (ifp == NULL) {
546 		ifp = ifunit_ref(cfg->if_name);
547 		if (ifp == NULL)
548 			return;
549 	}
550 	NET_EPOCH_ENTER(et);
551 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
552 		if (ifa->ifa_addr->sa_family != AF_INET6)
553 			continue;
554 		ia = (struct in6_ifaddr *)ifa;
555 		if (nptv6_check_prefix(&ia->ia_addr.sin6_addr) ||
556 		    IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
557 		    &cfg->internal, &cfg->mask))
558 			continue;
559 		/* Suitable address is found. */
560 		nptv6_set_external(cfg, &ia->ia_addr.sin6_addr);
561 		break;
562 	}
563 	NET_EPOCH_EXIT(et);
564 	if_rele(ifp);
565 }
566 
567 struct ifaddr_event_args {
568 	struct ifnet *ifp;
569 	const struct in6_addr *addr;
570 	int event;
571 };
572 
573 static int
574 ifaddr_cb(struct namedobj_instance *ni, struct named_object *no,
575     void *arg)
576 {
577 	struct ifaddr_event_args *args;
578 	struct ip_fw_chain *ch;
579 	struct nptv6_cfg *cfg;
580 
581 	ch = &V_layer3_chain;
582 	cfg = (struct nptv6_cfg *)SRV_OBJECT(ch, no->kidx);
583 	if ((cfg->flags & NPTV6_DYNAMIC_PREFIX) == 0)
584 		return (0);
585 
586 	args = arg;
587 	/* If interface name doesn't match, ignore */
588 	if (strncmp(args->ifp->if_xname, cfg->if_name, IF_NAMESIZE))
589 		return (0);
590 	if (args->ifp->if_flags & IFF_DYING) { /* XXX: is it possible? */
591 		cfg->flags &= ~NPTV6_READY;
592 		return (0);
593 	}
594 	if (args->event == IFADDR_EVENT_DEL) {
595 		/* If instance is not ready, ignore */
596 		if ((cfg->flags & NPTV6_READY) == 0)
597 			return (0);
598 		/* If address does not match the external prefix, ignore */
599 		if (IN6_ARE_MASKED_ADDR_EQUAL(&cfg->external, args->addr,
600 		    &cfg->mask) != 0)
601 			return (0);
602 		/* Otherwise clear READY flag */
603 		cfg->flags &= ~NPTV6_READY;
604 	} else {/* IFADDR_EVENT_ADD */
605 		/* If instance is already ready, ignore */
606 		if (cfg->flags & NPTV6_READY)
607 			return (0);
608 		/* If address is not suitable for prefix, ignore */
609 		if (nptv6_check_prefix(args->addr) ||
610 		    IN6_ARE_MASKED_ADDR_EQUAL(args->addr, &cfg->internal,
611 		    &cfg->mask))
612 			return (0);
613 		/* FALLTHROUGH */
614 	}
615 	MPASS(!(cfg->flags & NPTV6_READY));
616 	/* Try to determine the prefix */
617 	if_ref(args->ifp);
618 	nptv6_find_prefix(ch, cfg, args->ifp);
619 	return (0);
620 }
621 
622 static void
623 nptv6_ifaddrevent_handler(void *arg __unused, struct ifnet *ifp,
624     struct ifaddr *ifa, int event)
625 {
626 	struct ifaddr_event_args args;
627 	struct ip_fw_chain *ch;
628 
629 	if (ifa->ifa_addr->sa_family != AF_INET6)
630 		return;
631 
632 	args.ifp = ifp;
633 	args.addr = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
634 	args.event = event;
635 
636 	ch = &V_layer3_chain;
637 	IPFW_UH_WLOCK(ch);
638 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), ifaddr_cb, &args,
639 	    IPFW_TLV_NPTV6_NAME);
640 	IPFW_UH_WUNLOCK(ch);
641 }
642 
643 /*
644  * Creates new NPTv6 instance.
645  * Data layout (v0)(current):
646  * Request: [ ipfw_obj_lheader ipfw_nptv6_cfg ]
647  *
648  * Returns 0 on success
649  */
650 static int
651 nptv6_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
652     struct sockopt_data *sd)
653 {
654 	struct in6_addr mask;
655 	ipfw_obj_lheader *olh;
656 	ipfw_nptv6_cfg *uc;
657 	struct namedobj_instance *ni;
658 	struct nptv6_cfg *cfg;
659 
660 	if (sd->valsize != sizeof(*olh) + sizeof(*uc))
661 		return (EINVAL);
662 
663 	olh = (ipfw_obj_lheader *)sd->kbuf;
664 	uc = (ipfw_nptv6_cfg *)(olh + 1);
665 	if (ipfw_check_object_name_generic(uc->name) != 0)
666 		return (EINVAL);
667 	if (uc->plen < 8 || uc->plen > 64 || uc->set >= IPFW_MAX_SETS)
668 		return (EINVAL);
669 	if (nptv6_check_prefix(&uc->internal))
670 		return (EINVAL);
671 	in6_prefixlen2mask(&mask, uc->plen);
672 	if ((uc->flags & NPTV6_DYNAMIC_PREFIX) == 0 && (
673 	    nptv6_check_prefix(&uc->external) ||
674 	    IN6_ARE_MASKED_ADDR_EQUAL(&uc->external, &uc->internal, &mask)))
675 		return (EINVAL);
676 
677 	ni = CHAIN_TO_SRV(ch);
678 	IPFW_UH_RLOCK(ch);
679 	if (nptv6_find(ni, uc->name, uc->set) != NULL) {
680 		IPFW_UH_RUNLOCK(ch);
681 		return (EEXIST);
682 	}
683 	IPFW_UH_RUNLOCK(ch);
684 
685 	cfg = nptv6_alloc_config(uc->name, uc->set);
686 	cfg->plen = uc->plen;
687 	cfg->flags = uc->flags & NPTV6_FLAGSMASK;
688 	if (cfg->plen <= 48)
689 		cfg->flags |= NPTV6_48PLEN;
690 	cfg->mask = mask;
691 	cfg->internal = uc->internal;
692 	IN6_MASK_ADDR(&cfg->internal, &mask);
693 	if (cfg->flags & NPTV6_DYNAMIC_PREFIX)
694 		memcpy(cfg->if_name, uc->if_name, IF_NAMESIZE);
695 	else
696 		nptv6_set_external(cfg, &uc->external);
697 
698 	if ((uc->flags & NPTV6_DYNAMIC_PREFIX) != 0 &&
699 	    nptv6_ifaddr_event == NULL)
700 		nptv6_ifaddr_event = EVENTHANDLER_REGISTER(
701 		    ifaddr_event_ext, nptv6_ifaddrevent_handler, NULL,
702 		    EVENTHANDLER_PRI_ANY);
703 
704 	IPFW_UH_WLOCK(ch);
705 	if (ipfw_objhash_alloc_idx(ni, &cfg->no.kidx) != 0) {
706 		IPFW_UH_WUNLOCK(ch);
707 		nptv6_free_config(cfg);
708 		return (ENOSPC);
709 	}
710 	ipfw_objhash_add(ni, &cfg->no);
711 	SRV_OBJECT(ch, cfg->no.kidx) = cfg;
712 	if (cfg->flags & NPTV6_DYNAMIC_PREFIX)
713 		nptv6_find_prefix(ch, cfg, NULL);
714 	IPFW_UH_WUNLOCK(ch);
715 
716 	return (0);
717 }
718 
719 /*
720  * Destroys NPTv6 instance.
721  * Data layout (v0)(current):
722  * Request: [ ipfw_obj_header ]
723  *
724  * Returns 0 on success
725  */
726 static int
727 nptv6_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
728     struct sockopt_data *sd)
729 {
730 	ipfw_obj_header *oh;
731 	struct nptv6_cfg *cfg;
732 
733 	if (sd->valsize != sizeof(*oh))
734 		return (EINVAL);
735 
736 	oh = (ipfw_obj_header *)sd->kbuf;
737 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0)
738 		return (EINVAL);
739 
740 	IPFW_UH_WLOCK(ch);
741 	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
742 	if (cfg == NULL) {
743 		IPFW_UH_WUNLOCK(ch);
744 		return (ESRCH);
745 	}
746 	if (cfg->no.refcnt > 0) {
747 		IPFW_UH_WUNLOCK(ch);
748 		return (EBUSY);
749 	}
750 
751 	ipfw_reset_eaction_instance(ch, V_nptv6_eid, cfg->no.kidx);
752 	SRV_OBJECT(ch, cfg->no.kidx) = NULL;
753 	ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
754 	ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
755 	IPFW_UH_WUNLOCK(ch);
756 
757 	nptv6_free_config(cfg);
758 	return (0);
759 }
760 
761 /*
762  * Get or change nptv6 instance config.
763  * Request: [ ipfw_obj_header [ ipfw_nptv6_cfg ] ]
764  */
765 static int
766 nptv6_config(struct ip_fw_chain *chain, ip_fw3_opheader *op,
767     struct sockopt_data *sd)
768 {
769 
770 	return (EOPNOTSUPP);
771 }
772 
773 /*
774  * Lists all NPTv6 instances currently available in kernel.
775  * Data layout (v0)(current):
776  * Request: [ ipfw_obj_lheader ]
777  * Reply: [ ipfw_obj_lheader ipfw_nptv6_cfg x N ]
778  *
779  * Returns 0 on success
780  */
781 static int
782 nptv6_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
783     struct sockopt_data *sd)
784 {
785 	ipfw_obj_lheader *olh;
786 	struct nptv6_dump_arg da;
787 
788 	/* Check minimum header size */
789 	if (sd->valsize < sizeof(ipfw_obj_lheader))
790 		return (EINVAL);
791 
792 	olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
793 
794 	IPFW_UH_RLOCK(ch);
795 	olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
796 	    IPFW_TLV_NPTV6_NAME);
797 	olh->objsize = sizeof(ipfw_nptv6_cfg);
798 	olh->size = sizeof(*olh) + olh->count * olh->objsize;
799 
800 	if (sd->valsize < olh->size) {
801 		IPFW_UH_RUNLOCK(ch);
802 		return (ENOMEM);
803 	}
804 	memset(&da, 0, sizeof(da));
805 	da.ch = ch;
806 	da.sd = sd;
807 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb,
808 	    &da, IPFW_TLV_NPTV6_NAME);
809 	IPFW_UH_RUNLOCK(ch);
810 
811 	return (0);
812 }
813 
814 #define	__COPY_STAT_FIELD(_cfg, _stats, _field)	\
815 	(_stats)->_field = NPTV6STAT_FETCH(_cfg, _field)
816 static void
817 export_stats(struct ip_fw_chain *ch, struct nptv6_cfg *cfg,
818     struct ipfw_nptv6_stats *stats)
819 {
820 
821 	__COPY_STAT_FIELD(cfg, stats, in2ex);
822 	__COPY_STAT_FIELD(cfg, stats, ex2in);
823 	__COPY_STAT_FIELD(cfg, stats, dropped);
824 }
825 
826 /*
827  * Get NPTv6 statistics.
828  * Data layout (v0)(current):
829  * Request: [ ipfw_obj_header ]
830  * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]]
831  *
832  * Returns 0 on success
833  */
834 static int
835 nptv6_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
836     struct sockopt_data *sd)
837 {
838 	struct ipfw_nptv6_stats stats;
839 	struct nptv6_cfg *cfg;
840 	ipfw_obj_header *oh;
841 	ipfw_obj_ctlv *ctlv;
842 	size_t sz;
843 
844 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
845 	if (sd->valsize % sizeof(uint64_t))
846 		return (EINVAL);
847 	if (sd->valsize < sz)
848 		return (ENOMEM);
849 	oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
850 	if (oh == NULL)
851 		return (EINVAL);
852 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
853 	    oh->ntlv.set >= IPFW_MAX_SETS)
854 		return (EINVAL);
855 	memset(&stats, 0, sizeof(stats));
856 
857 	IPFW_UH_RLOCK(ch);
858 	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
859 	if (cfg == NULL) {
860 		IPFW_UH_RUNLOCK(ch);
861 		return (ESRCH);
862 	}
863 	export_stats(ch, cfg, &stats);
864 	IPFW_UH_RUNLOCK(ch);
865 
866 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
867 	memset(ctlv, 0, sizeof(*ctlv));
868 	ctlv->head.type = IPFW_TLV_COUNTERS;
869 	ctlv->head.length = sz - sizeof(ipfw_obj_header);
870 	ctlv->count = sizeof(stats) / sizeof(uint64_t);
871 	ctlv->objsize = sizeof(uint64_t);
872 	ctlv->version = 1;
873 	memcpy(ctlv + 1, &stats, sizeof(stats));
874 	return (0);
875 }
876 
877 /*
878  * Reset NPTv6 statistics.
879  * Data layout (v0)(current):
880  * Request: [ ipfw_obj_header ]
881  *
882  * Returns 0 on success
883  */
884 static int
885 nptv6_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
886     struct sockopt_data *sd)
887 {
888 	struct nptv6_cfg *cfg;
889 	ipfw_obj_header *oh;
890 
891 	if (sd->valsize != sizeof(*oh))
892 		return (EINVAL);
893 	oh = (ipfw_obj_header *)sd->kbuf;
894 	if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 ||
895 	    oh->ntlv.set >= IPFW_MAX_SETS)
896 		return (EINVAL);
897 
898 	IPFW_UH_WLOCK(ch);
899 	cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
900 	if (cfg == NULL) {
901 		IPFW_UH_WUNLOCK(ch);
902 		return (ESRCH);
903 	}
904 	COUNTER_ARRAY_ZERO(cfg->stats, NPTV6STATS);
905 	IPFW_UH_WUNLOCK(ch);
906 	return (0);
907 }
908 
909 static struct ipfw_sopt_handler	scodes[] = {
910 	{ IP_FW_NPTV6_CREATE, 0,	HDIR_SET,	nptv6_create },
911 	{ IP_FW_NPTV6_DESTROY,0,	HDIR_SET,	nptv6_destroy },
912 	{ IP_FW_NPTV6_CONFIG, 0,	HDIR_BOTH,	nptv6_config },
913 	{ IP_FW_NPTV6_LIST,   0,	HDIR_GET,	nptv6_list },
914 	{ IP_FW_NPTV6_STATS,  0,	HDIR_GET,	nptv6_stats },
915 	{ IP_FW_NPTV6_RESET_STATS,0,	HDIR_SET,	nptv6_reset_stats },
916 };
917 
918 static int
919 nptv6_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
920 {
921 	ipfw_insn *icmd;
922 
923 	icmd = cmd - 1;
924 	NPTV6_DEBUG("opcode %d, arg1 %d, opcode0 %d, arg1 %d",
925 	    cmd->opcode, cmd->arg1, icmd->opcode, icmd->arg1);
926 	if (icmd->opcode != O_EXTERNAL_ACTION ||
927 	    icmd->arg1 != V_nptv6_eid)
928 		return (1);
929 
930 	*puidx = cmd->arg1;
931 	*ptype = 0;
932 	return (0);
933 }
934 
935 static void
936 nptv6_update_arg1(ipfw_insn *cmd, uint16_t idx)
937 {
938 
939 	cmd->arg1 = idx;
940 	NPTV6_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1);
941 }
942 
943 static int
944 nptv6_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
945     struct named_object **pno)
946 {
947 	int err;
948 
949 	err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
950 	    IPFW_TLV_NPTV6_NAME, pno);
951 	NPTV6_DEBUG("uidx %u, type %u, err %d", ti->uidx, ti->type, err);
952 	return (err);
953 }
954 
955 static struct named_object *
956 nptv6_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
957 {
958 	struct namedobj_instance *ni;
959 	struct named_object *no;
960 
961 	IPFW_UH_WLOCK_ASSERT(ch);
962 	ni = CHAIN_TO_SRV(ch);
963 	no = ipfw_objhash_lookup_kidx(ni, idx);
964 	KASSERT(no != NULL, ("NPT with index %d not found", idx));
965 
966 	NPTV6_DEBUG("kidx %u -> %s", idx, no->name);
967 	return (no);
968 }
969 
970 static int
971 nptv6_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
972     enum ipfw_sets_cmd cmd)
973 {
974 
975 	return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NPTV6_NAME,
976 	    set, new_set, cmd));
977 }
978 
979 static struct opcode_obj_rewrite opcodes[] = {
980 	{
981 		.opcode	= O_EXTERNAL_INSTANCE,
982 		.etlv = IPFW_TLV_EACTION /* just show it isn't table */,
983 		.classifier = nptv6_classify,
984 		.update = nptv6_update_arg1,
985 		.find_byname = nptv6_findbyname,
986 		.find_bykidx = nptv6_findbykidx,
987 		.manage_sets = nptv6_manage_sets,
988 	},
989 };
990 
991 static int
992 destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
993     void *arg)
994 {
995 	struct nptv6_cfg *cfg;
996 	struct ip_fw_chain *ch;
997 
998 	ch = (struct ip_fw_chain *)arg;
999 	IPFW_UH_WLOCK_ASSERT(ch);
1000 
1001 	cfg = (struct nptv6_cfg *)SRV_OBJECT(ch, no->kidx);
1002 	SRV_OBJECT(ch, no->kidx) = NULL;
1003 	ipfw_objhash_del(ni, &cfg->no);
1004 	ipfw_objhash_free_idx(ni, cfg->no.kidx);
1005 	nptv6_free_config(cfg);
1006 	return (0);
1007 }
1008 
1009 int
1010 nptv6_init(struct ip_fw_chain *ch, int first)
1011 {
1012 
1013 	V_nptv6_eid = ipfw_add_eaction(ch, ipfw_nptv6, "nptv6");
1014 	if (V_nptv6_eid == 0)
1015 		return (ENXIO);
1016 	IPFW_ADD_SOPT_HANDLER(first, scodes);
1017 	IPFW_ADD_OBJ_REWRITER(first, opcodes);
1018 	return (0);
1019 }
1020 
1021 void
1022 nptv6_uninit(struct ip_fw_chain *ch, int last)
1023 {
1024 
1025 	if (last && nptv6_ifaddr_event != NULL)
1026 		EVENTHANDLER_DEREGISTER(ifaddr_event_ext, nptv6_ifaddr_event);
1027 	IPFW_DEL_OBJ_REWRITER(last, opcodes);
1028 	IPFW_DEL_SOPT_HANDLER(last, scodes);
1029 	ipfw_del_eaction(ch, V_nptv6_eid);
1030 	/*
1031 	 * Since we already have deregistered external action,
1032 	 * our named objects become unaccessible via rules, because
1033 	 * all rules were truncated by ipfw_del_eaction().
1034 	 * So, we can unlink and destroy our named objects without holding
1035 	 * IPFW_WLOCK().
1036 	 */
1037 	IPFW_UH_WLOCK(ch);
1038 	ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
1039 	    IPFW_TLV_NPTV6_NAME);
1040 	V_nptv6_eid = 0;
1041 	IPFW_UH_WUNLOCK(ch);
1042 }
1043